2
0

spirv_msl.cpp 673 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797779877997800780178027803780478057806780778087809781078117812781378147815781678177818781978207821782278237824782578267827782878297830783178327833783478357836783778387839784078417842784378447845784678477848784978507851785278537854785578567857785878597860786178627863786478657866786778687869787078717872787378747875787678777878787978807881788278837884788578867887788878897890789178927893789478957896789778987899790079017902790379047905790679077908790979107911791279137914791579167917791879197920792179227923792479257926792779287929793079317932793379347935793679377938793979407941794279437944794579467947794879497950795179527953795479557956795779587959796079617962796379647965796679677968796979707971797279737974797579767977797879797980798179827983798479857986798779887989799079917992799379947995799679977998799980008001800280038004800580068007800880098010801180128013801480158016801780188019802080218022802380248025802680278028802980308031803280338034803580368037803880398040804180428043804480458046804780488049805080518052805380548055805680578058805980608061806280638064806580668067806880698070807180728073807480758076807780788079808080818082808380848085808680878088808980908091809280938094809580968097809880998100810181028103810481058106810781088109811081118112811381148115811681178118811981208121812281238124812581268127812881298130813181328133813481358136813781388139814081418142814381448145814681478148814981508151815281538154815581568157815881598160816181628163816481658166816781688169817081718172817381748175817681778178817981808181818281838184818581868187818881898190819181928193819481958196819781988199820082018202820382048205820682078208820982108211821282138214821582168217821882198220822182228223822482258226822782288229823082318232823382348235823682378238823982408241824282438244824582468247824882498250825182528253825482558256825782588259826082618262826382648265826682678268826982708271827282738274827582768277827882798280828182828283828482858286828782888289829082918292829382948295829682978298829983008301830283038304830583068307830883098310831183128313831483158316831783188319832083218322832383248325832683278328832983308331833283338334833583368337833883398340834183428343834483458346834783488349835083518352835383548355835683578358835983608361836283638364836583668367836883698370837183728373837483758376837783788379838083818382838383848385838683878388838983908391839283938394839583968397839883998400840184028403840484058406840784088409841084118412841384148415841684178418841984208421842284238424842584268427842884298430843184328433843484358436843784388439844084418442844384448445844684478448844984508451845284538454845584568457845884598460846184628463846484658466846784688469847084718472847384748475847684778478847984808481848284838484848584868487848884898490849184928493849484958496849784988499850085018502850385048505850685078508850985108511851285138514851585168517851885198520852185228523852485258526852785288529853085318532853385348535853685378538853985408541854285438544854585468547854885498550855185528553855485558556855785588559856085618562856385648565856685678568856985708571857285738574857585768577857885798580858185828583858485858586858785888589859085918592859385948595859685978598859986008601860286038604860586068607860886098610861186128613861486158616861786188619862086218622862386248625862686278628862986308631863286338634863586368637863886398640864186428643864486458646864786488649865086518652865386548655865686578658865986608661866286638664866586668667866886698670867186728673867486758676867786788679868086818682868386848685868686878688868986908691869286938694869586968697869886998700870187028703870487058706870787088709871087118712871387148715871687178718871987208721872287238724872587268727872887298730873187328733873487358736873787388739874087418742874387448745874687478748874987508751875287538754875587568757875887598760876187628763876487658766876787688769877087718772877387748775877687778778877987808781878287838784878587868787878887898790879187928793879487958796879787988799880088018802880388048805880688078808880988108811881288138814881588168817881888198820882188228823882488258826882788288829883088318832883388348835883688378838883988408841884288438844884588468847884888498850885188528853885488558856885788588859886088618862886388648865886688678868886988708871887288738874887588768877887888798880888188828883888488858886888788888889889088918892889388948895889688978898889989008901890289038904890589068907890889098910891189128913891489158916891789188919892089218922892389248925892689278928892989308931893289338934893589368937893889398940894189428943894489458946894789488949895089518952895389548955895689578958895989608961896289638964896589668967896889698970897189728973897489758976897789788979898089818982898389848985898689878988898989908991899289938994899589968997899889999000900190029003900490059006900790089009901090119012901390149015901690179018901990209021902290239024902590269027902890299030903190329033903490359036903790389039904090419042904390449045904690479048904990509051905290539054905590569057905890599060906190629063906490659066906790689069907090719072907390749075907690779078907990809081908290839084908590869087908890899090909190929093909490959096909790989099910091019102910391049105910691079108910991109111911291139114911591169117911891199120912191229123912491259126912791289129913091319132913391349135913691379138913991409141914291439144914591469147914891499150915191529153915491559156915791589159916091619162916391649165916691679168916991709171917291739174917591769177917891799180918191829183918491859186918791889189919091919192919391949195919691979198919992009201920292039204920592069207920892099210921192129213921492159216921792189219922092219222922392249225922692279228922992309231923292339234923592369237923892399240924192429243924492459246924792489249925092519252925392549255925692579258925992609261926292639264926592669267926892699270927192729273927492759276927792789279928092819282928392849285928692879288928992909291929292939294929592969297929892999300930193029303930493059306930793089309931093119312931393149315931693179318931993209321932293239324932593269327932893299330933193329333933493359336933793389339934093419342934393449345934693479348934993509351935293539354935593569357935893599360936193629363936493659366936793689369937093719372937393749375937693779378937993809381938293839384938593869387938893899390939193929393939493959396939793989399940094019402940394049405940694079408940994109411941294139414941594169417941894199420942194229423942494259426942794289429943094319432943394349435943694379438943994409441944294439444944594469447944894499450945194529453945494559456945794589459946094619462946394649465946694679468946994709471947294739474947594769477947894799480948194829483948494859486948794889489949094919492949394949495949694979498949995009501950295039504950595069507950895099510951195129513951495159516951795189519952095219522952395249525952695279528952995309531953295339534953595369537953895399540954195429543954495459546954795489549955095519552955395549555955695579558955995609561956295639564956595669567956895699570957195729573957495759576957795789579958095819582958395849585958695879588958995909591959295939594959595969597959895999600960196029603960496059606960796089609961096119612961396149615961696179618961996209621962296239624962596269627962896299630963196329633963496359636963796389639964096419642964396449645964696479648964996509651965296539654965596569657965896599660966196629663966496659666966796689669967096719672967396749675967696779678967996809681968296839684968596869687968896899690969196929693969496959696969796989699970097019702970397049705970697079708970997109711971297139714971597169717971897199720972197229723972497259726972797289729973097319732973397349735973697379738973997409741974297439744974597469747974897499750975197529753975497559756975797589759976097619762976397649765976697679768976997709771977297739774977597769777977897799780978197829783978497859786978797889789979097919792979397949795979697979798979998009801980298039804980598069807980898099810981198129813981498159816981798189819982098219822982398249825982698279828982998309831983298339834983598369837983898399840984198429843984498459846984798489849985098519852985398549855985698579858985998609861986298639864986598669867986898699870987198729873987498759876987798789879988098819882988398849885988698879888988998909891989298939894989598969897989898999900990199029903990499059906990799089909991099119912991399149915991699179918991999209921992299239924992599269927992899299930993199329933993499359936993799389939994099419942994399449945994699479948994999509951995299539954995599569957995899599960996199629963996499659966996799689969997099719972997399749975997699779978997999809981998299839984998599869987998899899990999199929993999499959996999799989999100001000110002100031000410005100061000710008100091001010011100121001310014100151001610017100181001910020100211002210023100241002510026100271002810029100301003110032100331003410035100361003710038100391004010041100421004310044100451004610047100481004910050100511005210053100541005510056100571005810059100601006110062100631006410065100661006710068100691007010071100721007310074100751007610077100781007910080100811008210083100841008510086100871008810089100901009110092100931009410095100961009710098100991010010101101021010310104101051010610107101081010910110101111011210113101141011510116101171011810119101201012110122101231012410125101261012710128101291013010131101321013310134101351013610137101381013910140101411014210143101441014510146101471014810149101501015110152101531015410155101561015710158101591016010161101621016310164101651016610167101681016910170101711017210173101741017510176101771017810179101801018110182101831018410185101861018710188101891019010191101921019310194101951019610197101981019910200102011020210203102041020510206102071020810209102101021110212102131021410215102161021710218102191022010221102221022310224102251022610227102281022910230102311023210233102341023510236102371023810239102401024110242102431024410245102461024710248102491025010251102521025310254102551025610257102581025910260102611026210263102641026510266102671026810269102701027110272102731027410275102761027710278102791028010281102821028310284102851028610287102881028910290102911029210293102941029510296102971029810299103001030110302103031030410305103061030710308103091031010311103121031310314103151031610317103181031910320103211032210323103241032510326103271032810329103301033110332103331033410335103361033710338103391034010341103421034310344103451034610347103481034910350103511035210353103541035510356103571035810359103601036110362103631036410365103661036710368103691037010371103721037310374103751037610377103781037910380103811038210383103841038510386103871038810389103901039110392103931039410395103961039710398103991040010401104021040310404104051040610407104081040910410104111041210413104141041510416104171041810419104201042110422104231042410425104261042710428104291043010431104321043310434104351043610437104381043910440104411044210443104441044510446104471044810449104501045110452104531045410455104561045710458104591046010461104621046310464104651046610467104681046910470104711047210473104741047510476104771047810479104801048110482104831048410485104861048710488104891049010491104921049310494104951049610497104981049910500105011050210503105041050510506105071050810509105101051110512105131051410515105161051710518105191052010521105221052310524105251052610527105281052910530105311053210533105341053510536105371053810539105401054110542105431054410545105461054710548105491055010551105521055310554105551055610557105581055910560105611056210563105641056510566105671056810569105701057110572105731057410575105761057710578105791058010581105821058310584105851058610587105881058910590105911059210593105941059510596105971059810599106001060110602106031060410605106061060710608106091061010611106121061310614106151061610617106181061910620106211062210623106241062510626106271062810629106301063110632106331063410635106361063710638106391064010641106421064310644106451064610647106481064910650106511065210653106541065510656106571065810659106601066110662106631066410665106661066710668106691067010671106721067310674106751067610677106781067910680106811068210683106841068510686106871068810689106901069110692106931069410695106961069710698106991070010701107021070310704107051070610707107081070910710107111071210713107141071510716107171071810719107201072110722107231072410725107261072710728107291073010731107321073310734107351073610737107381073910740107411074210743107441074510746107471074810749107501075110752107531075410755107561075710758107591076010761107621076310764107651076610767107681076910770107711077210773107741077510776107771077810779107801078110782107831078410785107861078710788107891079010791107921079310794107951079610797107981079910800108011080210803108041080510806108071080810809108101081110812108131081410815108161081710818108191082010821108221082310824108251082610827108281082910830108311083210833108341083510836108371083810839108401084110842108431084410845108461084710848108491085010851108521085310854108551085610857108581085910860108611086210863108641086510866108671086810869108701087110872108731087410875108761087710878108791088010881108821088310884108851088610887108881088910890108911089210893108941089510896108971089810899109001090110902109031090410905109061090710908109091091010911109121091310914109151091610917109181091910920109211092210923109241092510926109271092810929109301093110932109331093410935109361093710938109391094010941109421094310944109451094610947109481094910950109511095210953109541095510956109571095810959109601096110962109631096410965109661096710968109691097010971109721097310974109751097610977109781097910980109811098210983109841098510986109871098810989109901099110992109931099410995109961099710998109991100011001110021100311004110051100611007110081100911010110111101211013110141101511016110171101811019110201102111022110231102411025110261102711028110291103011031110321103311034110351103611037110381103911040110411104211043110441104511046110471104811049110501105111052110531105411055110561105711058110591106011061110621106311064110651106611067110681106911070110711107211073110741107511076110771107811079110801108111082110831108411085110861108711088110891109011091110921109311094110951109611097110981109911100111011110211103111041110511106111071110811109111101111111112111131111411115111161111711118111191112011121111221112311124111251112611127111281112911130111311113211133111341113511136111371113811139111401114111142111431114411145111461114711148111491115011151111521115311154111551115611157111581115911160111611116211163111641116511166111671116811169111701117111172111731117411175111761117711178111791118011181111821118311184111851118611187111881118911190111911119211193111941119511196111971119811199112001120111202112031120411205112061120711208112091121011211112121121311214112151121611217112181121911220112211122211223112241122511226112271122811229112301123111232112331123411235112361123711238112391124011241112421124311244112451124611247112481124911250112511125211253112541125511256112571125811259112601126111262112631126411265112661126711268112691127011271112721127311274112751127611277112781127911280112811128211283112841128511286112871128811289112901129111292112931129411295112961129711298112991130011301113021130311304113051130611307113081130911310113111131211313113141131511316113171131811319113201132111322113231132411325113261132711328113291133011331113321133311334113351133611337113381133911340113411134211343113441134511346113471134811349113501135111352113531135411355113561135711358113591136011361113621136311364113651136611367113681136911370113711137211373113741137511376113771137811379113801138111382113831138411385113861138711388113891139011391113921139311394113951139611397113981139911400114011140211403114041140511406114071140811409114101141111412114131141411415114161141711418114191142011421114221142311424114251142611427114281142911430114311143211433114341143511436114371143811439114401144111442114431144411445114461144711448114491145011451114521145311454114551145611457114581145911460114611146211463114641146511466114671146811469114701147111472114731147411475114761147711478114791148011481114821148311484114851148611487114881148911490114911149211493114941149511496114971149811499115001150111502115031150411505115061150711508115091151011511115121151311514115151151611517115181151911520115211152211523115241152511526115271152811529115301153111532115331153411535115361153711538115391154011541115421154311544115451154611547115481154911550115511155211553115541155511556115571155811559115601156111562115631156411565115661156711568115691157011571115721157311574115751157611577115781157911580115811158211583115841158511586115871158811589115901159111592115931159411595115961159711598115991160011601116021160311604116051160611607116081160911610116111161211613116141161511616116171161811619116201162111622116231162411625116261162711628116291163011631116321163311634116351163611637116381163911640116411164211643116441164511646116471164811649116501165111652116531165411655116561165711658116591166011661116621166311664116651166611667116681166911670116711167211673116741167511676116771167811679116801168111682116831168411685116861168711688116891169011691116921169311694116951169611697116981169911700117011170211703117041170511706117071170811709117101171111712117131171411715117161171711718117191172011721117221172311724117251172611727117281172911730117311173211733117341173511736117371173811739117401174111742117431174411745117461174711748117491175011751117521175311754117551175611757117581175911760117611176211763117641176511766117671176811769117701177111772117731177411775117761177711778117791178011781117821178311784117851178611787117881178911790117911179211793117941179511796117971179811799118001180111802118031180411805118061180711808118091181011811118121181311814118151181611817118181181911820118211182211823118241182511826118271182811829118301183111832118331183411835118361183711838118391184011841118421184311844118451184611847118481184911850118511185211853118541185511856118571185811859118601186111862118631186411865118661186711868118691187011871118721187311874118751187611877118781187911880118811188211883118841188511886118871188811889118901189111892118931189411895118961189711898118991190011901119021190311904119051190611907119081190911910119111191211913119141191511916119171191811919119201192111922119231192411925119261192711928119291193011931119321193311934119351193611937119381193911940119411194211943119441194511946119471194811949119501195111952119531195411955119561195711958119591196011961119621196311964119651196611967119681196911970119711197211973119741197511976119771197811979119801198111982119831198411985119861198711988119891199011991119921199311994119951199611997119981199912000120011200212003120041200512006120071200812009120101201112012120131201412015120161201712018120191202012021120221202312024120251202612027120281202912030120311203212033120341203512036120371203812039120401204112042120431204412045120461204712048120491205012051120521205312054120551205612057120581205912060120611206212063120641206512066120671206812069120701207112072120731207412075120761207712078120791208012081120821208312084120851208612087120881208912090120911209212093120941209512096120971209812099121001210112102121031210412105121061210712108121091211012111121121211312114121151211612117121181211912120121211212212123121241212512126121271212812129121301213112132121331213412135121361213712138121391214012141121421214312144121451214612147121481214912150121511215212153121541215512156121571215812159121601216112162121631216412165121661216712168121691217012171121721217312174121751217612177121781217912180121811218212183121841218512186121871218812189121901219112192121931219412195121961219712198121991220012201122021220312204122051220612207122081220912210122111221212213122141221512216122171221812219122201222112222122231222412225122261222712228122291223012231122321223312234122351223612237122381223912240122411224212243122441224512246122471224812249122501225112252122531225412255122561225712258122591226012261122621226312264122651226612267122681226912270122711227212273122741227512276122771227812279122801228112282122831228412285122861228712288122891229012291122921229312294122951229612297122981229912300123011230212303123041230512306123071230812309123101231112312123131231412315123161231712318123191232012321123221232312324123251232612327123281232912330123311233212333123341233512336123371233812339123401234112342123431234412345123461234712348123491235012351123521235312354123551235612357123581235912360123611236212363123641236512366123671236812369123701237112372123731237412375123761237712378123791238012381123821238312384123851238612387123881238912390123911239212393123941239512396123971239812399124001240112402124031240412405124061240712408124091241012411124121241312414124151241612417124181241912420124211242212423124241242512426124271242812429124301243112432124331243412435124361243712438124391244012441124421244312444124451244612447124481244912450124511245212453124541245512456124571245812459124601246112462124631246412465124661246712468124691247012471124721247312474124751247612477124781247912480124811248212483124841248512486124871248812489124901249112492124931249412495124961249712498124991250012501125021250312504125051250612507125081250912510125111251212513125141251512516125171251812519125201252112522125231252412525125261252712528125291253012531125321253312534125351253612537125381253912540125411254212543125441254512546125471254812549125501255112552125531255412555125561255712558125591256012561125621256312564125651256612567125681256912570125711257212573125741257512576125771257812579125801258112582125831258412585125861258712588125891259012591125921259312594125951259612597125981259912600126011260212603126041260512606126071260812609126101261112612126131261412615126161261712618126191262012621126221262312624126251262612627126281262912630126311263212633126341263512636126371263812639126401264112642126431264412645126461264712648126491265012651126521265312654126551265612657126581265912660126611266212663126641266512666126671266812669126701267112672126731267412675126761267712678126791268012681126821268312684126851268612687126881268912690126911269212693126941269512696126971269812699127001270112702127031270412705127061270712708127091271012711127121271312714127151271612717127181271912720127211272212723127241272512726127271272812729127301273112732127331273412735127361273712738127391274012741127421274312744127451274612747127481274912750127511275212753127541275512756127571275812759127601276112762127631276412765127661276712768127691277012771127721277312774127751277612777127781277912780127811278212783127841278512786127871278812789127901279112792127931279412795127961279712798127991280012801128021280312804128051280612807128081280912810128111281212813128141281512816128171281812819128201282112822128231282412825128261282712828128291283012831128321283312834128351283612837128381283912840128411284212843128441284512846128471284812849128501285112852128531285412855128561285712858128591286012861128621286312864128651286612867128681286912870128711287212873128741287512876128771287812879128801288112882128831288412885128861288712888128891289012891128921289312894128951289612897128981289912900129011290212903129041290512906129071290812909129101291112912129131291412915129161291712918129191292012921129221292312924129251292612927129281292912930129311293212933129341293512936129371293812939129401294112942129431294412945129461294712948129491295012951129521295312954129551295612957129581295912960129611296212963129641296512966129671296812969129701297112972129731297412975129761297712978129791298012981129821298312984129851298612987129881298912990129911299212993129941299512996129971299812999130001300113002130031300413005130061300713008130091301013011130121301313014130151301613017130181301913020130211302213023130241302513026130271302813029130301303113032130331303413035130361303713038130391304013041130421304313044130451304613047130481304913050130511305213053130541305513056130571305813059130601306113062130631306413065130661306713068130691307013071130721307313074130751307613077130781307913080130811308213083130841308513086130871308813089130901309113092130931309413095130961309713098130991310013101131021310313104131051310613107131081310913110131111311213113131141311513116131171311813119131201312113122131231312413125131261312713128131291313013131131321313313134131351313613137131381313913140131411314213143131441314513146131471314813149131501315113152131531315413155131561315713158131591316013161131621316313164131651316613167131681316913170131711317213173131741317513176131771317813179131801318113182131831318413185131861318713188131891319013191131921319313194131951319613197131981319913200132011320213203132041320513206132071320813209132101321113212132131321413215132161321713218132191322013221132221322313224132251322613227132281322913230132311323213233132341323513236132371323813239132401324113242132431324413245132461324713248132491325013251132521325313254132551325613257132581325913260132611326213263132641326513266132671326813269132701327113272132731327413275132761327713278132791328013281132821328313284132851328613287132881328913290132911329213293132941329513296132971329813299133001330113302133031330413305133061330713308133091331013311133121331313314133151331613317133181331913320133211332213323133241332513326133271332813329133301333113332133331333413335133361333713338133391334013341133421334313344133451334613347133481334913350133511335213353133541335513356133571335813359133601336113362133631336413365133661336713368133691337013371133721337313374133751337613377133781337913380133811338213383133841338513386133871338813389133901339113392133931339413395133961339713398133991340013401134021340313404134051340613407134081340913410134111341213413134141341513416134171341813419134201342113422134231342413425134261342713428134291343013431134321343313434134351343613437134381343913440134411344213443134441344513446134471344813449134501345113452134531345413455134561345713458134591346013461134621346313464134651346613467134681346913470134711347213473134741347513476134771347813479134801348113482134831348413485134861348713488134891349013491134921349313494134951349613497134981349913500135011350213503135041350513506135071350813509135101351113512135131351413515135161351713518135191352013521135221352313524135251352613527135281352913530135311353213533135341353513536135371353813539135401354113542135431354413545135461354713548135491355013551135521355313554135551355613557135581355913560135611356213563135641356513566135671356813569135701357113572135731357413575135761357713578135791358013581135821358313584135851358613587135881358913590135911359213593135941359513596135971359813599136001360113602136031360413605136061360713608136091361013611136121361313614136151361613617136181361913620136211362213623136241362513626136271362813629136301363113632136331363413635136361363713638136391364013641136421364313644136451364613647136481364913650136511365213653136541365513656136571365813659136601366113662136631366413665136661366713668136691367013671136721367313674136751367613677136781367913680136811368213683136841368513686136871368813689136901369113692136931369413695136961369713698136991370013701137021370313704137051370613707137081370913710137111371213713137141371513716137171371813719137201372113722137231372413725137261372713728137291373013731137321373313734137351373613737137381373913740137411374213743137441374513746137471374813749137501375113752137531375413755137561375713758137591376013761137621376313764137651376613767137681376913770137711377213773137741377513776137771377813779137801378113782137831378413785137861378713788137891379013791137921379313794137951379613797137981379913800138011380213803138041380513806138071380813809138101381113812138131381413815138161381713818138191382013821138221382313824138251382613827138281382913830138311383213833138341383513836138371383813839138401384113842138431384413845138461384713848138491385013851138521385313854138551385613857138581385913860138611386213863138641386513866138671386813869138701387113872138731387413875138761387713878138791388013881138821388313884138851388613887138881388913890138911389213893138941389513896138971389813899139001390113902139031390413905139061390713908139091391013911139121391313914139151391613917139181391913920139211392213923139241392513926139271392813929139301393113932139331393413935139361393713938139391394013941139421394313944139451394613947139481394913950139511395213953139541395513956139571395813959139601396113962139631396413965139661396713968139691397013971139721397313974139751397613977139781397913980139811398213983139841398513986139871398813989139901399113992139931399413995139961399713998139991400014001140021400314004140051400614007140081400914010140111401214013140141401514016140171401814019140201402114022140231402414025140261402714028140291403014031140321403314034140351403614037140381403914040140411404214043140441404514046140471404814049140501405114052140531405414055140561405714058140591406014061140621406314064140651406614067140681406914070140711407214073140741407514076140771407814079140801408114082140831408414085140861408714088140891409014091140921409314094140951409614097140981409914100141011410214103141041410514106141071410814109141101411114112141131411414115141161411714118141191412014121141221412314124141251412614127141281412914130141311413214133141341413514136141371413814139141401414114142141431414414145141461414714148141491415014151141521415314154141551415614157141581415914160141611416214163141641416514166141671416814169141701417114172141731417414175141761417714178141791418014181141821418314184141851418614187141881418914190141911419214193141941419514196141971419814199142001420114202142031420414205142061420714208142091421014211142121421314214142151421614217142181421914220142211422214223142241422514226142271422814229142301423114232142331423414235142361423714238142391424014241142421424314244142451424614247142481424914250142511425214253142541425514256142571425814259142601426114262142631426414265142661426714268142691427014271142721427314274142751427614277142781427914280142811428214283142841428514286142871428814289142901429114292142931429414295142961429714298142991430014301143021430314304143051430614307143081430914310143111431214313143141431514316143171431814319143201432114322143231432414325143261432714328143291433014331143321433314334143351433614337143381433914340143411434214343143441434514346143471434814349143501435114352143531435414355143561435714358143591436014361143621436314364143651436614367143681436914370143711437214373143741437514376143771437814379143801438114382143831438414385143861438714388143891439014391143921439314394143951439614397143981439914400144011440214403144041440514406144071440814409144101441114412144131441414415144161441714418144191442014421144221442314424144251442614427144281442914430144311443214433144341443514436144371443814439144401444114442144431444414445144461444714448144491445014451144521445314454144551445614457144581445914460144611446214463144641446514466144671446814469144701447114472144731447414475144761447714478144791448014481144821448314484144851448614487144881448914490144911449214493144941449514496144971449814499145001450114502145031450414505145061450714508145091451014511145121451314514145151451614517145181451914520145211452214523145241452514526145271452814529145301453114532145331453414535145361453714538145391454014541145421454314544145451454614547145481454914550145511455214553145541455514556145571455814559145601456114562145631456414565145661456714568145691457014571145721457314574145751457614577145781457914580145811458214583145841458514586145871458814589145901459114592145931459414595145961459714598145991460014601146021460314604146051460614607146081460914610146111461214613146141461514616146171461814619146201462114622146231462414625146261462714628146291463014631146321463314634146351463614637146381463914640146411464214643146441464514646146471464814649146501465114652146531465414655146561465714658146591466014661146621466314664146651466614667146681466914670146711467214673146741467514676146771467814679146801468114682146831468414685146861468714688146891469014691146921469314694146951469614697146981469914700147011470214703147041470514706147071470814709147101471114712147131471414715147161471714718147191472014721147221472314724147251472614727147281472914730147311473214733147341473514736147371473814739147401474114742147431474414745147461474714748147491475014751147521475314754147551475614757147581475914760147611476214763147641476514766147671476814769147701477114772147731477414775147761477714778147791478014781147821478314784147851478614787147881478914790147911479214793147941479514796147971479814799148001480114802148031480414805148061480714808148091481014811148121481314814148151481614817148181481914820148211482214823148241482514826148271482814829148301483114832148331483414835148361483714838148391484014841148421484314844148451484614847148481484914850148511485214853148541485514856148571485814859148601486114862148631486414865148661486714868148691487014871148721487314874148751487614877148781487914880148811488214883148841488514886148871488814889148901489114892148931489414895148961489714898148991490014901149021490314904149051490614907149081490914910149111491214913149141491514916149171491814919149201492114922149231492414925149261492714928149291493014931149321493314934149351493614937149381493914940149411494214943149441494514946149471494814949149501495114952149531495414955149561495714958149591496014961149621496314964149651496614967149681496914970149711497214973149741497514976149771497814979149801498114982149831498414985149861498714988149891499014991149921499314994149951499614997149981499915000150011500215003150041500515006150071500815009150101501115012150131501415015150161501715018150191502015021150221502315024150251502615027150281502915030150311503215033150341503515036150371503815039150401504115042150431504415045150461504715048150491505015051150521505315054150551505615057150581505915060150611506215063150641506515066150671506815069150701507115072150731507415075150761507715078150791508015081150821508315084150851508615087150881508915090150911509215093150941509515096150971509815099151001510115102151031510415105151061510715108151091511015111151121511315114151151511615117151181511915120151211512215123151241512515126151271512815129151301513115132151331513415135151361513715138151391514015141151421514315144151451514615147151481514915150151511515215153151541515515156151571515815159151601516115162151631516415165151661516715168151691517015171151721517315174151751517615177151781517915180151811518215183151841518515186151871518815189151901519115192151931519415195151961519715198151991520015201152021520315204152051520615207152081520915210152111521215213152141521515216152171521815219152201522115222152231522415225152261522715228152291523015231152321523315234152351523615237152381523915240152411524215243152441524515246152471524815249152501525115252152531525415255152561525715258152591526015261152621526315264152651526615267152681526915270152711527215273152741527515276152771527815279152801528115282152831528415285152861528715288152891529015291152921529315294152951529615297152981529915300153011530215303153041530515306153071530815309153101531115312153131531415315153161531715318153191532015321153221532315324153251532615327153281532915330153311533215333153341533515336153371533815339153401534115342153431534415345153461534715348153491535015351153521535315354153551535615357153581535915360153611536215363153641536515366153671536815369153701537115372153731537415375153761537715378153791538015381153821538315384153851538615387153881538915390153911539215393153941539515396153971539815399154001540115402154031540415405154061540715408154091541015411154121541315414154151541615417154181541915420154211542215423154241542515426154271542815429154301543115432154331543415435154361543715438154391544015441154421544315444154451544615447154481544915450154511545215453154541545515456154571545815459154601546115462154631546415465154661546715468154691547015471154721547315474154751547615477154781547915480154811548215483154841548515486154871548815489154901549115492154931549415495154961549715498154991550015501155021550315504155051550615507155081550915510155111551215513155141551515516155171551815519155201552115522155231552415525155261552715528155291553015531155321553315534155351553615537155381553915540155411554215543155441554515546155471554815549155501555115552155531555415555155561555715558155591556015561155621556315564155651556615567155681556915570155711557215573155741557515576155771557815579155801558115582155831558415585155861558715588155891559015591155921559315594155951559615597155981559915600156011560215603156041560515606156071560815609156101561115612156131561415615156161561715618156191562015621156221562315624156251562615627156281562915630156311563215633156341563515636156371563815639156401564115642156431564415645156461564715648156491565015651156521565315654156551565615657156581565915660156611566215663156641566515666156671566815669156701567115672156731567415675156761567715678156791568015681156821568315684156851568615687156881568915690156911569215693156941569515696156971569815699157001570115702157031570415705157061570715708157091571015711157121571315714157151571615717157181571915720157211572215723157241572515726157271572815729157301573115732157331573415735157361573715738157391574015741157421574315744157451574615747157481574915750157511575215753157541575515756157571575815759157601576115762157631576415765157661576715768157691577015771157721577315774157751577615777157781577915780157811578215783157841578515786157871578815789157901579115792157931579415795157961579715798157991580015801158021580315804158051580615807158081580915810158111581215813158141581515816158171581815819158201582115822158231582415825158261582715828158291583015831158321583315834158351583615837158381583915840158411584215843158441584515846158471584815849158501585115852158531585415855158561585715858158591586015861158621586315864158651586615867158681586915870158711587215873158741587515876158771587815879158801588115882158831588415885158861588715888158891589015891158921589315894158951589615897158981589915900159011590215903159041590515906159071590815909159101591115912159131591415915159161591715918159191592015921159221592315924159251592615927159281592915930159311593215933159341593515936159371593815939159401594115942159431594415945159461594715948159491595015951159521595315954159551595615957159581595915960159611596215963159641596515966159671596815969159701597115972159731597415975159761597715978159791598015981159821598315984159851598615987159881598915990159911599215993159941599515996159971599815999160001600116002160031600416005160061600716008160091601016011160121601316014160151601616017160181601916020160211602216023160241602516026160271602816029160301603116032160331603416035160361603716038160391604016041160421604316044160451604616047160481604916050160511605216053160541605516056160571605816059160601606116062160631606416065160661606716068160691607016071160721607316074160751607616077160781607916080160811608216083160841608516086160871608816089160901609116092160931609416095160961609716098160991610016101161021610316104161051610616107161081610916110161111611216113161141611516116161171611816119161201612116122161231612416125161261612716128161291613016131161321613316134161351613616137161381613916140161411614216143161441614516146161471614816149161501615116152161531615416155161561615716158161591616016161161621616316164161651616616167161681616916170161711617216173161741617516176161771617816179161801618116182161831618416185161861618716188161891619016191161921619316194161951619616197161981619916200162011620216203162041620516206162071620816209162101621116212162131621416215162161621716218162191622016221162221622316224162251622616227162281622916230162311623216233162341623516236162371623816239162401624116242162431624416245162461624716248162491625016251162521625316254162551625616257162581625916260162611626216263162641626516266162671626816269162701627116272162731627416275162761627716278162791628016281162821628316284162851628616287162881628916290162911629216293162941629516296162971629816299163001630116302163031630416305163061630716308163091631016311163121631316314163151631616317163181631916320163211632216323163241632516326163271632816329163301633116332163331633416335163361633716338163391634016341163421634316344163451634616347163481634916350163511635216353163541635516356163571635816359163601636116362163631636416365163661636716368163691637016371163721637316374163751637616377163781637916380163811638216383163841638516386163871638816389163901639116392163931639416395163961639716398163991640016401164021640316404164051640616407164081640916410164111641216413164141641516416164171641816419164201642116422164231642416425164261642716428164291643016431164321643316434164351643616437164381643916440164411644216443164441644516446164471644816449164501645116452164531645416455164561645716458164591646016461164621646316464164651646616467164681646916470164711647216473164741647516476164771647816479164801648116482164831648416485164861648716488164891649016491164921649316494164951649616497164981649916500165011650216503165041650516506165071650816509165101651116512165131651416515165161651716518165191652016521165221652316524165251652616527165281652916530165311653216533165341653516536165371653816539165401654116542165431654416545165461654716548165491655016551165521655316554165551655616557165581655916560165611656216563165641656516566165671656816569165701657116572165731657416575165761657716578165791658016581165821658316584165851658616587165881658916590165911659216593165941659516596165971659816599166001660116602166031660416605166061660716608166091661016611166121661316614166151661616617166181661916620166211662216623166241662516626166271662816629166301663116632166331663416635166361663716638166391664016641166421664316644166451664616647166481664916650166511665216653166541665516656166571665816659166601666116662166631666416665166661666716668166691667016671166721667316674166751667616677166781667916680166811668216683166841668516686166871668816689166901669116692166931669416695166961669716698166991670016701167021670316704167051670616707167081670916710167111671216713167141671516716167171671816719167201672116722167231672416725167261672716728167291673016731167321673316734167351673616737167381673916740167411674216743167441674516746167471674816749167501675116752167531675416755167561675716758167591676016761167621676316764167651676616767167681676916770167711677216773167741677516776167771677816779167801678116782167831678416785167861678716788167891679016791167921679316794167951679616797167981679916800168011680216803168041680516806168071680816809168101681116812168131681416815168161681716818168191682016821168221682316824168251682616827168281682916830168311683216833168341683516836168371683816839168401684116842168431684416845168461684716848168491685016851168521685316854168551685616857168581685916860168611686216863168641686516866168671686816869168701687116872168731687416875168761687716878168791688016881168821688316884168851688616887168881688916890168911689216893168941689516896168971689816899169001690116902169031690416905169061690716908169091691016911169121691316914169151691616917169181691916920169211692216923169241692516926169271692816929169301693116932169331693416935169361693716938169391694016941169421694316944169451694616947169481694916950169511695216953169541695516956169571695816959169601696116962169631696416965169661696716968169691697016971169721697316974169751697616977169781697916980169811698216983169841698516986169871698816989169901699116992169931699416995169961699716998169991700017001170021700317004170051700617007170081700917010170111701217013170141701517016170171701817019170201702117022170231702417025170261702717028170291703017031170321703317034170351703617037170381703917040170411704217043170441704517046170471704817049170501705117052170531705417055170561705717058170591706017061170621706317064170651706617067170681706917070170711707217073170741707517076170771707817079170801708117082170831708417085170861708717088170891709017091170921709317094170951709617097170981709917100171011710217103171041710517106171071710817109171101711117112171131711417115171161711717118171191712017121171221712317124171251712617127171281712917130171311713217133171341713517136171371713817139171401714117142171431714417145171461714717148171491715017151171521715317154171551715617157171581715917160171611716217163171641716517166171671716817169171701717117172171731717417175171761717717178171791718017181171821718317184171851718617187171881718917190171911719217193171941719517196171971719817199172001720117202172031720417205172061720717208172091721017211172121721317214172151721617217172181721917220172211722217223172241722517226172271722817229172301723117232172331723417235172361723717238172391724017241172421724317244172451724617247172481724917250172511725217253172541725517256172571725817259172601726117262172631726417265172661726717268172691727017271172721727317274172751727617277172781727917280172811728217283172841728517286172871728817289172901729117292172931729417295172961729717298172991730017301173021730317304173051730617307173081730917310173111731217313173141731517316173171731817319173201732117322173231732417325173261732717328173291733017331173321733317334173351733617337173381733917340173411734217343173441734517346173471734817349173501735117352173531735417355173561735717358173591736017361173621736317364173651736617367173681736917370173711737217373173741737517376173771737817379173801738117382173831738417385173861738717388173891739017391173921739317394173951739617397173981739917400174011740217403174041740517406174071740817409174101741117412174131741417415174161741717418174191742017421174221742317424174251742617427174281742917430174311743217433174341743517436174371743817439174401744117442174431744417445174461744717448174491745017451174521745317454174551745617457174581745917460174611746217463174641746517466174671746817469174701747117472174731747417475174761747717478174791748017481174821748317484174851748617487174881748917490174911749217493174941749517496174971749817499175001750117502175031750417505175061750717508175091751017511175121751317514175151751617517175181751917520175211752217523175241752517526175271752817529175301753117532175331753417535175361753717538175391754017541175421754317544175451754617547175481754917550175511755217553175541755517556175571755817559175601756117562175631756417565175661756717568175691757017571175721757317574175751757617577175781757917580175811758217583175841758517586175871758817589175901759117592175931759417595175961759717598175991760017601176021760317604176051760617607176081760917610176111761217613176141761517616176171761817619176201762117622176231762417625176261762717628176291763017631176321763317634176351763617637176381763917640176411764217643176441764517646176471764817649176501765117652176531765417655176561765717658176591766017661176621766317664176651766617667176681766917670176711767217673176741767517676176771767817679176801768117682176831768417685176861768717688176891769017691176921769317694176951769617697176981769917700177011770217703177041770517706177071770817709177101771117712177131771417715177161771717718177191772017721177221772317724177251772617727177281772917730177311773217733177341773517736177371773817739177401774117742177431774417745177461774717748177491775017751177521775317754177551775617757177581775917760177611776217763177641776517766177671776817769177701777117772177731777417775177761777717778177791778017781177821778317784177851778617787177881778917790177911779217793177941779517796177971779817799178001780117802178031780417805178061780717808178091781017811178121781317814178151781617817178181781917820178211782217823178241782517826178271782817829178301783117832178331783417835178361783717838178391784017841178421784317844178451784617847178481784917850178511785217853178541785517856178571785817859178601786117862178631786417865178661786717868178691787017871178721787317874178751787617877178781787917880178811788217883178841788517886178871788817889178901789117892178931789417895178961789717898178991790017901179021790317904179051790617907179081790917910179111791217913179141791517916179171791817919179201792117922179231792417925179261792717928179291793017931179321793317934179351793617937179381793917940179411794217943179441794517946179471794817949179501795117952179531795417955179561795717958179591796017961179621796317964179651796617967179681796917970179711797217973179741797517976179771797817979179801798117982179831798417985179861798717988179891799017991179921799317994179951799617997179981799918000180011800218003180041800518006180071800818009180101801118012180131801418015180161801718018180191802018021180221802318024180251802618027180281802918030180311803218033180341803518036180371803818039180401804118042180431804418045180461804718048180491805018051180521805318054180551805618057180581805918060180611806218063180641806518066180671806818069180701807118072180731807418075180761807718078180791808018081180821808318084180851808618087180881808918090180911809218093180941809518096180971809818099181001810118102181031810418105181061810718108181091811018111181121811318114181151811618117181181811918120181211812218123181241812518126181271812818129181301813118132181331813418135181361813718138181391814018141181421814318144181451814618147181481814918150181511815218153181541815518156181571815818159181601816118162181631816418165181661816718168181691817018171181721817318174181751817618177181781817918180181811818218183181841818518186181871818818189181901819118192181931819418195181961819718198181991820018201182021820318204182051820618207182081820918210182111821218213182141821518216182171821818219182201822118222182231822418225182261822718228182291823018231182321823318234182351823618237182381823918240182411824218243182441824518246182471824818249182501825118252182531825418255182561825718258182591826018261182621826318264182651826618267182681826918270182711827218273182741827518276182771827818279182801828118282182831828418285182861828718288182891829018291182921829318294182951829618297182981829918300183011830218303183041830518306183071830818309183101831118312183131831418315183161831718318183191832018321183221832318324183251832618327183281832918330183311833218333183341833518336183371833818339183401834118342183431834418345183461834718348183491835018351183521835318354183551835618357183581835918360183611836218363183641836518366183671836818369183701837118372183731837418375183761837718378183791838018381183821838318384183851838618387183881838918390183911839218393183941839518396183971839818399184001840118402184031840418405184061840718408184091841018411184121841318414184151841618417184181841918420184211842218423184241842518426184271842818429184301843118432184331843418435184361843718438184391844018441184421844318444184451844618447184481844918450184511845218453184541845518456184571845818459184601846118462184631846418465184661846718468184691847018471184721847318474184751847618477184781847918480184811848218483184841848518486184871848818489184901849118492184931849418495184961849718498184991850018501185021850318504185051850618507185081850918510185111851218513185141851518516185171851818519185201852118522185231852418525185261852718528185291853018531185321853318534185351853618537185381853918540185411854218543185441854518546185471854818549185501855118552185531855418555185561855718558185591856018561185621856318564185651856618567185681856918570185711857218573185741857518576185771857818579185801858118582185831858418585185861858718588185891859018591185921859318594185951859618597185981859918600186011860218603186041860518606186071860818609186101861118612186131861418615186161861718618186191862018621186221862318624186251862618627186281862918630186311863218633186341863518636186371863818639186401864118642186431864418645186461864718648186491865018651186521865318654186551865618657186581865918660186611866218663186641866518666186671866818669186701867118672186731867418675186761867718678186791868018681186821868318684186851868618687186881868918690186911869218693186941869518696186971869818699187001870118702187031870418705187061870718708187091871018711187121871318714187151871618717187181871918720187211872218723187241872518726187271872818729187301873118732187331873418735187361873718738187391874018741187421874318744187451874618747187481874918750187511875218753187541875518756187571875818759187601876118762187631876418765187661876718768187691877018771187721877318774187751877618777187781877918780187811878218783187841878518786187871878818789187901879118792187931879418795187961879718798187991880018801188021880318804188051880618807188081880918810
  1. /*
  2. * Copyright 2016-2021 The Brenwill Workshop Ltd.
  3. * SPDX-License-Identifier: Apache-2.0 OR MIT
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /*
  18. * At your option, you may choose to accept this material under either:
  19. * 1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
  20. * 2. The MIT License, found at <http://opensource.org/licenses/MIT>.
  21. */
  22. #include "spirv_msl.hpp"
  23. #include "GLSL.std.450.h"
  24. #include <algorithm>
  25. #include <assert.h>
  26. #include <numeric>
  27. using namespace spv;
  28. using namespace SPIRV_CROSS_NAMESPACE;
  29. using namespace std;
  30. static const uint32_t k_unknown_location = ~0u;
  31. static const uint32_t k_unknown_component = ~0u;
  32. static const char *force_inline = "static inline __attribute__((always_inline))";
  33. CompilerMSL::CompilerMSL(std::vector<uint32_t> spirv_)
  34. : CompilerGLSL(std::move(spirv_))
  35. {
  36. }
  37. CompilerMSL::CompilerMSL(const uint32_t *ir_, size_t word_count)
  38. : CompilerGLSL(ir_, word_count)
  39. {
  40. }
  41. CompilerMSL::CompilerMSL(const ParsedIR &ir_)
  42. : CompilerGLSL(ir_)
  43. {
  44. }
  45. CompilerMSL::CompilerMSL(ParsedIR &&ir_)
  46. : CompilerGLSL(std::move(ir_))
  47. {
  48. }
  49. void CompilerMSL::add_msl_shader_input(const MSLShaderInterfaceVariable &si)
  50. {
  51. inputs_by_location[{si.location, si.component}] = si;
  52. if (si.builtin != BuiltInMax && !inputs_by_builtin.count(si.builtin))
  53. inputs_by_builtin[si.builtin] = si;
  54. }
  55. void CompilerMSL::add_msl_shader_output(const MSLShaderInterfaceVariable &so)
  56. {
  57. outputs_by_location[{so.location, so.component}] = so;
  58. if (so.builtin != BuiltInMax && !outputs_by_builtin.count(so.builtin))
  59. outputs_by_builtin[so.builtin] = so;
  60. }
  61. void CompilerMSL::add_msl_resource_binding(const MSLResourceBinding &binding)
  62. {
  63. StageSetBinding tuple = { binding.stage, binding.desc_set, binding.binding };
  64. resource_bindings[tuple] = { binding, false };
  65. // If we might need to pad argument buffer members to positionally align
  66. // arg buffer indexes, also maintain a lookup by argument buffer index.
  67. if (msl_options.pad_argument_buffer_resources)
  68. {
  69. StageSetBinding arg_idx_tuple = { binding.stage, binding.desc_set, k_unknown_component };
  70. #define ADD_ARG_IDX_TO_BINDING_NUM_LOOKUP(rez) \
  71. arg_idx_tuple.binding = binding.msl_##rez; \
  72. resource_arg_buff_idx_to_binding_number[arg_idx_tuple] = binding.binding
  73. switch (binding.basetype)
  74. {
  75. case SPIRType::Void:
  76. case SPIRType::Boolean:
  77. case SPIRType::SByte:
  78. case SPIRType::UByte:
  79. case SPIRType::Short:
  80. case SPIRType::UShort:
  81. case SPIRType::Int:
  82. case SPIRType::UInt:
  83. case SPIRType::Int64:
  84. case SPIRType::UInt64:
  85. case SPIRType::AtomicCounter:
  86. case SPIRType::Half:
  87. case SPIRType::Float:
  88. case SPIRType::Double:
  89. ADD_ARG_IDX_TO_BINDING_NUM_LOOKUP(buffer);
  90. break;
  91. case SPIRType::Image:
  92. ADD_ARG_IDX_TO_BINDING_NUM_LOOKUP(texture);
  93. break;
  94. case SPIRType::Sampler:
  95. ADD_ARG_IDX_TO_BINDING_NUM_LOOKUP(sampler);
  96. break;
  97. case SPIRType::SampledImage:
  98. ADD_ARG_IDX_TO_BINDING_NUM_LOOKUP(texture);
  99. ADD_ARG_IDX_TO_BINDING_NUM_LOOKUP(sampler);
  100. break;
  101. default:
  102. SPIRV_CROSS_THROW("Unexpected argument buffer resource base type. When padding argument buffer elements, "
  103. "all descriptor set resources must be supplied with a base type by the app.");
  104. }
  105. #undef ADD_ARG_IDX_TO_BINDING_NUM_LOOKUP
  106. }
  107. }
  108. void CompilerMSL::add_dynamic_buffer(uint32_t desc_set, uint32_t binding, uint32_t index)
  109. {
  110. SetBindingPair pair = { desc_set, binding };
  111. buffers_requiring_dynamic_offset[pair] = { index, 0 };
  112. }
  113. void CompilerMSL::add_inline_uniform_block(uint32_t desc_set, uint32_t binding)
  114. {
  115. SetBindingPair pair = { desc_set, binding };
  116. inline_uniform_blocks.insert(pair);
  117. }
  118. void CompilerMSL::add_discrete_descriptor_set(uint32_t desc_set)
  119. {
  120. if (desc_set < kMaxArgumentBuffers)
  121. argument_buffer_discrete_mask |= 1u << desc_set;
  122. }
  123. void CompilerMSL::set_argument_buffer_device_address_space(uint32_t desc_set, bool device_storage)
  124. {
  125. if (desc_set < kMaxArgumentBuffers)
  126. {
  127. if (device_storage)
  128. argument_buffer_device_storage_mask |= 1u << desc_set;
  129. else
  130. argument_buffer_device_storage_mask &= ~(1u << desc_set);
  131. }
  132. }
  133. bool CompilerMSL::is_msl_shader_input_used(uint32_t location)
  134. {
  135. // Don't report internal location allocations to app.
  136. return location_inputs_in_use.count(location) != 0 &&
  137. location_inputs_in_use_fallback.count(location) == 0;
  138. }
  139. bool CompilerMSL::is_msl_shader_output_used(uint32_t location)
  140. {
  141. // Don't report internal location allocations to app.
  142. return location_outputs_in_use.count(location) != 0 &&
  143. location_outputs_in_use_fallback.count(location) == 0;
  144. }
  145. uint32_t CompilerMSL::get_automatic_builtin_input_location(spv::BuiltIn builtin) const
  146. {
  147. auto itr = builtin_to_automatic_input_location.find(builtin);
  148. if (itr == builtin_to_automatic_input_location.end())
  149. return k_unknown_location;
  150. else
  151. return itr->second;
  152. }
  153. uint32_t CompilerMSL::get_automatic_builtin_output_location(spv::BuiltIn builtin) const
  154. {
  155. auto itr = builtin_to_automatic_output_location.find(builtin);
  156. if (itr == builtin_to_automatic_output_location.end())
  157. return k_unknown_location;
  158. else
  159. return itr->second;
  160. }
  161. bool CompilerMSL::is_msl_resource_binding_used(ExecutionModel model, uint32_t desc_set, uint32_t binding) const
  162. {
  163. StageSetBinding tuple = { model, desc_set, binding };
  164. auto itr = resource_bindings.find(tuple);
  165. return itr != end(resource_bindings) && itr->second.second;
  166. }
  167. bool CompilerMSL::is_var_runtime_size_array(const SPIRVariable &var) const
  168. {
  169. auto& type = get_variable_data_type(var);
  170. return is_runtime_size_array(type) && get_resource_array_size(type, var.self) == 0;
  171. }
  172. // Returns the size of the array of resources used by the variable with the specified type and id.
  173. // The size is first retrieved from the type, but in the case of runtime array sizing,
  174. // the size is retrieved from the resource binding added using add_msl_resource_binding().
  175. uint32_t CompilerMSL::get_resource_array_size(const SPIRType &type, uint32_t id) const
  176. {
  177. uint32_t array_size = to_array_size_literal(type);
  178. // If we have argument buffers, we need to honor the ABI by using the correct array size
  179. // from the layout. Only use shader declared size if we're not using argument buffers.
  180. uint32_t desc_set = get_decoration(id, DecorationDescriptorSet);
  181. if (!descriptor_set_is_argument_buffer(desc_set) && array_size)
  182. return array_size;
  183. StageSetBinding tuple = { get_entry_point().model, desc_set,
  184. get_decoration(id, DecorationBinding) };
  185. auto itr = resource_bindings.find(tuple);
  186. return itr != end(resource_bindings) ? itr->second.first.count : array_size;
  187. }
  188. uint32_t CompilerMSL::get_automatic_msl_resource_binding(uint32_t id) const
  189. {
  190. return get_extended_decoration(id, SPIRVCrossDecorationResourceIndexPrimary);
  191. }
  192. uint32_t CompilerMSL::get_automatic_msl_resource_binding_secondary(uint32_t id) const
  193. {
  194. return get_extended_decoration(id, SPIRVCrossDecorationResourceIndexSecondary);
  195. }
  196. uint32_t CompilerMSL::get_automatic_msl_resource_binding_tertiary(uint32_t id) const
  197. {
  198. return get_extended_decoration(id, SPIRVCrossDecorationResourceIndexTertiary);
  199. }
  200. uint32_t CompilerMSL::get_automatic_msl_resource_binding_quaternary(uint32_t id) const
  201. {
  202. return get_extended_decoration(id, SPIRVCrossDecorationResourceIndexQuaternary);
  203. }
  204. void CompilerMSL::set_fragment_output_components(uint32_t location, uint32_t components)
  205. {
  206. fragment_output_components[location] = components;
  207. }
  208. bool CompilerMSL::builtin_translates_to_nonarray(spv::BuiltIn builtin) const
  209. {
  210. return (builtin == BuiltInSampleMask);
  211. }
  212. void CompilerMSL::build_implicit_builtins()
  213. {
  214. bool need_sample_pos = active_input_builtins.get(BuiltInSamplePosition);
  215. bool need_vertex_params = capture_output_to_buffer && get_execution_model() == ExecutionModelVertex &&
  216. !msl_options.vertex_for_tessellation;
  217. bool need_tesc_params = is_tesc_shader();
  218. bool need_tese_params = is_tese_shader() && msl_options.raw_buffer_tese_input;
  219. bool need_subgroup_mask =
  220. active_input_builtins.get(BuiltInSubgroupEqMask) || active_input_builtins.get(BuiltInSubgroupGeMask) ||
  221. active_input_builtins.get(BuiltInSubgroupGtMask) || active_input_builtins.get(BuiltInSubgroupLeMask) ||
  222. active_input_builtins.get(BuiltInSubgroupLtMask);
  223. bool need_subgroup_ge_mask = !msl_options.is_ios() && (active_input_builtins.get(BuiltInSubgroupGeMask) ||
  224. active_input_builtins.get(BuiltInSubgroupGtMask));
  225. bool need_multiview = get_execution_model() == ExecutionModelVertex && !msl_options.view_index_from_device_index &&
  226. msl_options.multiview_layered_rendering &&
  227. (msl_options.multiview || active_input_builtins.get(BuiltInViewIndex));
  228. bool need_dispatch_base =
  229. msl_options.dispatch_base && get_execution_model() == ExecutionModelGLCompute &&
  230. (active_input_builtins.get(BuiltInWorkgroupId) || active_input_builtins.get(BuiltInGlobalInvocationId));
  231. bool need_grid_params = get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation;
  232. bool need_vertex_base_params =
  233. need_grid_params &&
  234. (active_input_builtins.get(BuiltInVertexId) || active_input_builtins.get(BuiltInVertexIndex) ||
  235. active_input_builtins.get(BuiltInBaseVertex) || active_input_builtins.get(BuiltInInstanceId) ||
  236. active_input_builtins.get(BuiltInInstanceIndex) || active_input_builtins.get(BuiltInBaseInstance));
  237. bool need_local_invocation_index = msl_options.emulate_subgroups && active_input_builtins.get(BuiltInSubgroupId);
  238. bool need_workgroup_size = msl_options.emulate_subgroups && active_input_builtins.get(BuiltInNumSubgroups);
  239. bool force_frag_depth_passthrough =
  240. get_execution_model() == ExecutionModelFragment && !uses_explicit_early_fragment_test() && need_subpass_input &&
  241. msl_options.enable_frag_depth_builtin && msl_options.input_attachment_is_ds_attachment;
  242. if (need_subpass_input || need_sample_pos || need_subgroup_mask || need_vertex_params || need_tesc_params ||
  243. need_tese_params || need_multiview || need_dispatch_base || need_vertex_base_params || need_grid_params ||
  244. needs_sample_id || needs_subgroup_invocation_id || needs_subgroup_size || needs_helper_invocation ||
  245. has_additional_fixed_sample_mask() || need_local_invocation_index || need_workgroup_size || force_frag_depth_passthrough)
  246. {
  247. bool has_frag_coord = false;
  248. bool has_sample_id = false;
  249. bool has_vertex_idx = false;
  250. bool has_base_vertex = false;
  251. bool has_instance_idx = false;
  252. bool has_base_instance = false;
  253. bool has_invocation_id = false;
  254. bool has_primitive_id = false;
  255. bool has_subgroup_invocation_id = false;
  256. bool has_subgroup_size = false;
  257. bool has_view_idx = false;
  258. bool has_layer = false;
  259. bool has_helper_invocation = false;
  260. bool has_local_invocation_index = false;
  261. bool has_workgroup_size = false;
  262. bool has_frag_depth = false;
  263. uint32_t workgroup_id_type = 0;
  264. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  265. if (var.storage != StorageClassInput && var.storage != StorageClassOutput)
  266. return;
  267. if (!interface_variable_exists_in_entry_point(var.self))
  268. return;
  269. if (!has_decoration(var.self, DecorationBuiltIn))
  270. return;
  271. BuiltIn builtin = ir.meta[var.self].decoration.builtin_type;
  272. if (var.storage == StorageClassOutput)
  273. {
  274. if (has_additional_fixed_sample_mask() && builtin == BuiltInSampleMask)
  275. {
  276. builtin_sample_mask_id = var.self;
  277. mark_implicit_builtin(StorageClassOutput, BuiltInSampleMask, var.self);
  278. does_shader_write_sample_mask = true;
  279. }
  280. if (force_frag_depth_passthrough && builtin == BuiltInFragDepth)
  281. {
  282. builtin_frag_depth_id = var.self;
  283. mark_implicit_builtin(StorageClassOutput, BuiltInFragDepth, var.self);
  284. has_frag_depth = true;
  285. }
  286. }
  287. if (var.storage != StorageClassInput)
  288. return;
  289. // Use Metal's native frame-buffer fetch API for subpass inputs.
  290. if (need_subpass_input && (!msl_options.use_framebuffer_fetch_subpasses))
  291. {
  292. switch (builtin)
  293. {
  294. case BuiltInFragCoord:
  295. mark_implicit_builtin(StorageClassInput, BuiltInFragCoord, var.self);
  296. builtin_frag_coord_id = var.self;
  297. has_frag_coord = true;
  298. break;
  299. case BuiltInLayer:
  300. if (!msl_options.arrayed_subpass_input || msl_options.multiview)
  301. break;
  302. mark_implicit_builtin(StorageClassInput, BuiltInLayer, var.self);
  303. builtin_layer_id = var.self;
  304. has_layer = true;
  305. break;
  306. case BuiltInViewIndex:
  307. if (!msl_options.multiview)
  308. break;
  309. mark_implicit_builtin(StorageClassInput, BuiltInViewIndex, var.self);
  310. builtin_view_idx_id = var.self;
  311. has_view_idx = true;
  312. break;
  313. default:
  314. break;
  315. }
  316. }
  317. if ((need_sample_pos || needs_sample_id) && builtin == BuiltInSampleId)
  318. {
  319. builtin_sample_id_id = var.self;
  320. mark_implicit_builtin(StorageClassInput, BuiltInSampleId, var.self);
  321. has_sample_id = true;
  322. }
  323. if (need_vertex_params)
  324. {
  325. switch (builtin)
  326. {
  327. case BuiltInVertexIndex:
  328. builtin_vertex_idx_id = var.self;
  329. mark_implicit_builtin(StorageClassInput, BuiltInVertexIndex, var.self);
  330. has_vertex_idx = true;
  331. break;
  332. case BuiltInBaseVertex:
  333. builtin_base_vertex_id = var.self;
  334. mark_implicit_builtin(StorageClassInput, BuiltInBaseVertex, var.self);
  335. has_base_vertex = true;
  336. break;
  337. case BuiltInInstanceIndex:
  338. builtin_instance_idx_id = var.self;
  339. mark_implicit_builtin(StorageClassInput, BuiltInInstanceIndex, var.self);
  340. has_instance_idx = true;
  341. break;
  342. case BuiltInBaseInstance:
  343. builtin_base_instance_id = var.self;
  344. mark_implicit_builtin(StorageClassInput, BuiltInBaseInstance, var.self);
  345. has_base_instance = true;
  346. break;
  347. default:
  348. break;
  349. }
  350. }
  351. if (need_tesc_params && builtin == BuiltInInvocationId)
  352. {
  353. builtin_invocation_id_id = var.self;
  354. mark_implicit_builtin(StorageClassInput, BuiltInInvocationId, var.self);
  355. has_invocation_id = true;
  356. }
  357. if ((need_tesc_params || need_tese_params) && builtin == BuiltInPrimitiveId)
  358. {
  359. builtin_primitive_id_id = var.self;
  360. mark_implicit_builtin(StorageClassInput, BuiltInPrimitiveId, var.self);
  361. has_primitive_id = true;
  362. }
  363. if (need_tese_params && builtin == BuiltInTessLevelOuter)
  364. {
  365. tess_level_outer_var_id = var.self;
  366. }
  367. if (need_tese_params && builtin == BuiltInTessLevelInner)
  368. {
  369. tess_level_inner_var_id = var.self;
  370. }
  371. if ((need_subgroup_mask || needs_subgroup_invocation_id) && builtin == BuiltInSubgroupLocalInvocationId)
  372. {
  373. builtin_subgroup_invocation_id_id = var.self;
  374. mark_implicit_builtin(StorageClassInput, BuiltInSubgroupLocalInvocationId, var.self);
  375. has_subgroup_invocation_id = true;
  376. }
  377. if ((need_subgroup_ge_mask || needs_subgroup_size) && builtin == BuiltInSubgroupSize)
  378. {
  379. builtin_subgroup_size_id = var.self;
  380. mark_implicit_builtin(StorageClassInput, BuiltInSubgroupSize, var.self);
  381. has_subgroup_size = true;
  382. }
  383. if (need_multiview)
  384. {
  385. switch (builtin)
  386. {
  387. case BuiltInInstanceIndex:
  388. // The view index here is derived from the instance index.
  389. builtin_instance_idx_id = var.self;
  390. mark_implicit_builtin(StorageClassInput, BuiltInInstanceIndex, var.self);
  391. has_instance_idx = true;
  392. break;
  393. case BuiltInBaseInstance:
  394. // If a non-zero base instance is used, we need to adjust for it when calculating the view index.
  395. builtin_base_instance_id = var.self;
  396. mark_implicit_builtin(StorageClassInput, BuiltInBaseInstance, var.self);
  397. has_base_instance = true;
  398. break;
  399. case BuiltInViewIndex:
  400. builtin_view_idx_id = var.self;
  401. mark_implicit_builtin(StorageClassInput, BuiltInViewIndex, var.self);
  402. has_view_idx = true;
  403. break;
  404. default:
  405. break;
  406. }
  407. }
  408. if (needs_helper_invocation && builtin == BuiltInHelperInvocation)
  409. {
  410. builtin_helper_invocation_id = var.self;
  411. mark_implicit_builtin(StorageClassInput, BuiltInHelperInvocation, var.self);
  412. has_helper_invocation = true;
  413. }
  414. if (need_local_invocation_index && builtin == BuiltInLocalInvocationIndex)
  415. {
  416. builtin_local_invocation_index_id = var.self;
  417. mark_implicit_builtin(StorageClassInput, BuiltInLocalInvocationIndex, var.self);
  418. has_local_invocation_index = true;
  419. }
  420. if (need_workgroup_size && builtin == BuiltInLocalInvocationId)
  421. {
  422. builtin_workgroup_size_id = var.self;
  423. mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var.self);
  424. has_workgroup_size = true;
  425. }
  426. // The base workgroup needs to have the same type and vector size
  427. // as the workgroup or invocation ID, so keep track of the type that
  428. // was used.
  429. if (need_dispatch_base && workgroup_id_type == 0 &&
  430. (builtin == BuiltInWorkgroupId || builtin == BuiltInGlobalInvocationId))
  431. workgroup_id_type = var.basetype;
  432. });
  433. // Use Metal's native frame-buffer fetch API for subpass inputs.
  434. if ((!has_frag_coord || (msl_options.multiview && !has_view_idx) ||
  435. (msl_options.arrayed_subpass_input && !msl_options.multiview && !has_layer)) &&
  436. (!msl_options.use_framebuffer_fetch_subpasses) && need_subpass_input)
  437. {
  438. if (!has_frag_coord)
  439. {
  440. uint32_t offset = ir.increase_bound_by(3);
  441. uint32_t type_id = offset;
  442. uint32_t type_ptr_id = offset + 1;
  443. uint32_t var_id = offset + 2;
  444. // Create gl_FragCoord.
  445. SPIRType vec4_type { OpTypeVector };
  446. vec4_type.basetype = SPIRType::Float;
  447. vec4_type.width = 32;
  448. vec4_type.vecsize = 4;
  449. set<SPIRType>(type_id, vec4_type);
  450. SPIRType vec4_type_ptr = vec4_type;
  451. vec4_type_ptr.op = OpTypePointer;
  452. vec4_type_ptr.pointer = true;
  453. vec4_type_ptr.pointer_depth++;
  454. vec4_type_ptr.parent_type = type_id;
  455. vec4_type_ptr.storage = StorageClassInput;
  456. auto &ptr_type = set<SPIRType>(type_ptr_id, vec4_type_ptr);
  457. ptr_type.self = type_id;
  458. set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
  459. set_decoration(var_id, DecorationBuiltIn, BuiltInFragCoord);
  460. builtin_frag_coord_id = var_id;
  461. mark_implicit_builtin(StorageClassInput, BuiltInFragCoord, var_id);
  462. }
  463. if (!has_layer && msl_options.arrayed_subpass_input && !msl_options.multiview)
  464. {
  465. uint32_t offset = ir.increase_bound_by(2);
  466. uint32_t type_ptr_id = offset;
  467. uint32_t var_id = offset + 1;
  468. // Create gl_Layer.
  469. SPIRType uint_type_ptr = get_uint_type();
  470. uint_type_ptr.op = OpTypePointer;
  471. uint_type_ptr.pointer = true;
  472. uint_type_ptr.pointer_depth++;
  473. uint_type_ptr.parent_type = get_uint_type_id();
  474. uint_type_ptr.storage = StorageClassInput;
  475. auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
  476. ptr_type.self = get_uint_type_id();
  477. set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
  478. set_decoration(var_id, DecorationBuiltIn, BuiltInLayer);
  479. builtin_layer_id = var_id;
  480. mark_implicit_builtin(StorageClassInput, BuiltInLayer, var_id);
  481. }
  482. if (!has_view_idx && msl_options.multiview)
  483. {
  484. uint32_t offset = ir.increase_bound_by(2);
  485. uint32_t type_ptr_id = offset;
  486. uint32_t var_id = offset + 1;
  487. // Create gl_ViewIndex.
  488. SPIRType uint_type_ptr = get_uint_type();
  489. uint_type_ptr.op = OpTypePointer;
  490. uint_type_ptr.pointer = true;
  491. uint_type_ptr.pointer_depth++;
  492. uint_type_ptr.parent_type = get_uint_type_id();
  493. uint_type_ptr.storage = StorageClassInput;
  494. auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
  495. ptr_type.self = get_uint_type_id();
  496. set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
  497. set_decoration(var_id, DecorationBuiltIn, BuiltInViewIndex);
  498. builtin_view_idx_id = var_id;
  499. mark_implicit_builtin(StorageClassInput, BuiltInViewIndex, var_id);
  500. }
  501. }
  502. if (!has_sample_id && (need_sample_pos || needs_sample_id))
  503. {
  504. uint32_t offset = ir.increase_bound_by(2);
  505. uint32_t type_ptr_id = offset;
  506. uint32_t var_id = offset + 1;
  507. // Create gl_SampleID.
  508. SPIRType uint_type_ptr = get_uint_type();
  509. uint_type_ptr.op = OpTypePointer;
  510. uint_type_ptr.pointer = true;
  511. uint_type_ptr.pointer_depth++;
  512. uint_type_ptr.parent_type = get_uint_type_id();
  513. uint_type_ptr.storage = StorageClassInput;
  514. auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
  515. ptr_type.self = get_uint_type_id();
  516. set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
  517. set_decoration(var_id, DecorationBuiltIn, BuiltInSampleId);
  518. builtin_sample_id_id = var_id;
  519. mark_implicit_builtin(StorageClassInput, BuiltInSampleId, var_id);
  520. }
  521. if ((need_vertex_params && (!has_vertex_idx || !has_base_vertex || !has_instance_idx || !has_base_instance)) ||
  522. (need_multiview && (!has_instance_idx || !has_base_instance || !has_view_idx)))
  523. {
  524. uint32_t type_ptr_id = ir.increase_bound_by(1);
  525. SPIRType uint_type_ptr = get_uint_type();
  526. uint_type_ptr.op = OpTypePointer;
  527. uint_type_ptr.pointer = true;
  528. uint_type_ptr.pointer_depth++;
  529. uint_type_ptr.parent_type = get_uint_type_id();
  530. uint_type_ptr.storage = StorageClassInput;
  531. auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
  532. ptr_type.self = get_uint_type_id();
  533. if (need_vertex_params && !has_vertex_idx)
  534. {
  535. uint32_t var_id = ir.increase_bound_by(1);
  536. // Create gl_VertexIndex.
  537. set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
  538. set_decoration(var_id, DecorationBuiltIn, BuiltInVertexIndex);
  539. builtin_vertex_idx_id = var_id;
  540. mark_implicit_builtin(StorageClassInput, BuiltInVertexIndex, var_id);
  541. }
  542. if (need_vertex_params && !has_base_vertex)
  543. {
  544. uint32_t var_id = ir.increase_bound_by(1);
  545. // Create gl_BaseVertex.
  546. set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
  547. set_decoration(var_id, DecorationBuiltIn, BuiltInBaseVertex);
  548. builtin_base_vertex_id = var_id;
  549. mark_implicit_builtin(StorageClassInput, BuiltInBaseVertex, var_id);
  550. }
  551. if (!has_instance_idx) // Needed by both multiview and tessellation
  552. {
  553. uint32_t var_id = ir.increase_bound_by(1);
  554. // Create gl_InstanceIndex.
  555. set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
  556. set_decoration(var_id, DecorationBuiltIn, BuiltInInstanceIndex);
  557. builtin_instance_idx_id = var_id;
  558. mark_implicit_builtin(StorageClassInput, BuiltInInstanceIndex, var_id);
  559. }
  560. if (!has_base_instance) // Needed by both multiview and tessellation
  561. {
  562. uint32_t var_id = ir.increase_bound_by(1);
  563. // Create gl_BaseInstance.
  564. set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
  565. set_decoration(var_id, DecorationBuiltIn, BuiltInBaseInstance);
  566. builtin_base_instance_id = var_id;
  567. mark_implicit_builtin(StorageClassInput, BuiltInBaseInstance, var_id);
  568. }
  569. if (need_multiview)
  570. {
  571. // Multiview shaders are not allowed to write to gl_Layer, ostensibly because
  572. // it is implicitly written from gl_ViewIndex, but we have to do that explicitly.
  573. // Note that we can't just abuse gl_ViewIndex for this purpose: it's an input, but
  574. // gl_Layer is an output in vertex-pipeline shaders.
  575. uint32_t type_ptr_out_id = ir.increase_bound_by(2);
  576. SPIRType uint_type_ptr_out = get_uint_type();
  577. uint_type_ptr.op = OpTypePointer;
  578. uint_type_ptr_out.pointer = true;
  579. uint_type_ptr_out.pointer_depth++;
  580. uint_type_ptr_out.parent_type = get_uint_type_id();
  581. uint_type_ptr_out.storage = StorageClassOutput;
  582. auto &ptr_out_type = set<SPIRType>(type_ptr_out_id, uint_type_ptr_out);
  583. ptr_out_type.self = get_uint_type_id();
  584. uint32_t var_id = type_ptr_out_id + 1;
  585. set<SPIRVariable>(var_id, type_ptr_out_id, StorageClassOutput);
  586. set_decoration(var_id, DecorationBuiltIn, BuiltInLayer);
  587. builtin_layer_id = var_id;
  588. mark_implicit_builtin(StorageClassOutput, BuiltInLayer, var_id);
  589. }
  590. if (need_multiview && !has_view_idx)
  591. {
  592. uint32_t var_id = ir.increase_bound_by(1);
  593. // Create gl_ViewIndex.
  594. set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
  595. set_decoration(var_id, DecorationBuiltIn, BuiltInViewIndex);
  596. builtin_view_idx_id = var_id;
  597. mark_implicit_builtin(StorageClassInput, BuiltInViewIndex, var_id);
  598. }
  599. }
  600. if ((need_tesc_params && (msl_options.multi_patch_workgroup || !has_invocation_id || !has_primitive_id)) ||
  601. (need_tese_params && !has_primitive_id) || need_grid_params)
  602. {
  603. uint32_t type_ptr_id = ir.increase_bound_by(1);
  604. SPIRType uint_type_ptr = get_uint_type();
  605. uint_type_ptr.op = OpTypePointer;
  606. uint_type_ptr.pointer = true;
  607. uint_type_ptr.pointer_depth++;
  608. uint_type_ptr.parent_type = get_uint_type_id();
  609. uint_type_ptr.storage = StorageClassInput;
  610. auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
  611. ptr_type.self = get_uint_type_id();
  612. if ((need_tesc_params && msl_options.multi_patch_workgroup) || need_grid_params)
  613. {
  614. uint32_t var_id = ir.increase_bound_by(1);
  615. // Create gl_GlobalInvocationID.
  616. set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
  617. set_decoration(var_id, DecorationBuiltIn, BuiltInGlobalInvocationId);
  618. builtin_invocation_id_id = var_id;
  619. mark_implicit_builtin(StorageClassInput, BuiltInGlobalInvocationId, var_id);
  620. }
  621. else if (need_tesc_params && !has_invocation_id)
  622. {
  623. uint32_t var_id = ir.increase_bound_by(1);
  624. // Create gl_InvocationID.
  625. set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
  626. set_decoration(var_id, DecorationBuiltIn, BuiltInInvocationId);
  627. builtin_invocation_id_id = var_id;
  628. mark_implicit_builtin(StorageClassInput, BuiltInInvocationId, var_id);
  629. }
  630. if ((need_tesc_params || need_tese_params) && !has_primitive_id)
  631. {
  632. uint32_t var_id = ir.increase_bound_by(1);
  633. // Create gl_PrimitiveID.
  634. set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
  635. set_decoration(var_id, DecorationBuiltIn, BuiltInPrimitiveId);
  636. builtin_primitive_id_id = var_id;
  637. mark_implicit_builtin(StorageClassInput, BuiltInPrimitiveId, var_id);
  638. }
  639. if (need_grid_params)
  640. {
  641. uint32_t var_id = ir.increase_bound_by(1);
  642. set<SPIRVariable>(var_id, build_extended_vector_type(get_uint_type_id(), 3), StorageClassInput);
  643. set_extended_decoration(var_id, SPIRVCrossDecorationBuiltInStageInputSize);
  644. get_entry_point().interface_variables.push_back(var_id);
  645. set_name(var_id, "spvStageInputSize");
  646. builtin_stage_input_size_id = var_id;
  647. }
  648. }
  649. if (!has_subgroup_invocation_id && (need_subgroup_mask || needs_subgroup_invocation_id))
  650. {
  651. uint32_t offset = ir.increase_bound_by(2);
  652. uint32_t type_ptr_id = offset;
  653. uint32_t var_id = offset + 1;
  654. // Create gl_SubgroupInvocationID.
  655. SPIRType uint_type_ptr = get_uint_type();
  656. uint_type_ptr.op = OpTypePointer;
  657. uint_type_ptr.pointer = true;
  658. uint_type_ptr.pointer_depth++;
  659. uint_type_ptr.parent_type = get_uint_type_id();
  660. uint_type_ptr.storage = StorageClassInput;
  661. auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
  662. ptr_type.self = get_uint_type_id();
  663. set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
  664. set_decoration(var_id, DecorationBuiltIn, BuiltInSubgroupLocalInvocationId);
  665. builtin_subgroup_invocation_id_id = var_id;
  666. mark_implicit_builtin(StorageClassInput, BuiltInSubgroupLocalInvocationId, var_id);
  667. }
  668. if (!has_subgroup_size && (need_subgroup_ge_mask || needs_subgroup_size))
  669. {
  670. uint32_t offset = ir.increase_bound_by(2);
  671. uint32_t type_ptr_id = offset;
  672. uint32_t var_id = offset + 1;
  673. // Create gl_SubgroupSize.
  674. SPIRType uint_type_ptr = get_uint_type();
  675. uint_type_ptr.op = OpTypePointer;
  676. uint_type_ptr.pointer = true;
  677. uint_type_ptr.pointer_depth++;
  678. uint_type_ptr.parent_type = get_uint_type_id();
  679. uint_type_ptr.storage = StorageClassInput;
  680. auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
  681. ptr_type.self = get_uint_type_id();
  682. set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
  683. set_decoration(var_id, DecorationBuiltIn, BuiltInSubgroupSize);
  684. builtin_subgroup_size_id = var_id;
  685. mark_implicit_builtin(StorageClassInput, BuiltInSubgroupSize, var_id);
  686. }
  687. if (need_dispatch_base || need_vertex_base_params)
  688. {
  689. if (workgroup_id_type == 0)
  690. workgroup_id_type = build_extended_vector_type(get_uint_type_id(), 3);
  691. uint32_t var_id;
  692. if (msl_options.supports_msl_version(1, 2))
  693. {
  694. // If we have MSL 1.2, we can (ab)use the [[grid_origin]] builtin
  695. // to convey this information and save a buffer slot.
  696. uint32_t offset = ir.increase_bound_by(1);
  697. var_id = offset;
  698. set<SPIRVariable>(var_id, workgroup_id_type, StorageClassInput);
  699. set_extended_decoration(var_id, SPIRVCrossDecorationBuiltInDispatchBase);
  700. get_entry_point().interface_variables.push_back(var_id);
  701. }
  702. else
  703. {
  704. // Otherwise, we need to fall back to a good ol' fashioned buffer.
  705. uint32_t offset = ir.increase_bound_by(2);
  706. var_id = offset;
  707. uint32_t type_id = offset + 1;
  708. SPIRType var_type = get<SPIRType>(workgroup_id_type);
  709. var_type.storage = StorageClassUniform;
  710. set<SPIRType>(type_id, var_type);
  711. set<SPIRVariable>(var_id, type_id, StorageClassUniform);
  712. // This should never match anything.
  713. set_decoration(var_id, DecorationDescriptorSet, ~(5u));
  714. set_decoration(var_id, DecorationBinding, msl_options.indirect_params_buffer_index);
  715. set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary,
  716. msl_options.indirect_params_buffer_index);
  717. }
  718. set_name(var_id, "spvDispatchBase");
  719. builtin_dispatch_base_id = var_id;
  720. }
  721. if (has_additional_fixed_sample_mask() && !does_shader_write_sample_mask)
  722. {
  723. uint32_t offset = ir.increase_bound_by(2);
  724. uint32_t var_id = offset + 1;
  725. // Create gl_SampleMask.
  726. SPIRType uint_type_ptr_out = get_uint_type();
  727. uint_type_ptr_out.op = OpTypePointer;
  728. uint_type_ptr_out.pointer = true;
  729. uint_type_ptr_out.pointer_depth++;
  730. uint_type_ptr_out.parent_type = get_uint_type_id();
  731. uint_type_ptr_out.storage = StorageClassOutput;
  732. auto &ptr_out_type = set<SPIRType>(offset, uint_type_ptr_out);
  733. ptr_out_type.self = get_uint_type_id();
  734. set<SPIRVariable>(var_id, offset, StorageClassOutput);
  735. set_decoration(var_id, DecorationBuiltIn, BuiltInSampleMask);
  736. builtin_sample_mask_id = var_id;
  737. mark_implicit_builtin(StorageClassOutput, BuiltInSampleMask, var_id);
  738. }
  739. if (!has_helper_invocation && needs_helper_invocation)
  740. {
  741. uint32_t offset = ir.increase_bound_by(3);
  742. uint32_t type_id = offset;
  743. uint32_t type_ptr_id = offset + 1;
  744. uint32_t var_id = offset + 2;
  745. // Create gl_HelperInvocation.
  746. SPIRType bool_type { OpTypeBool };
  747. bool_type.basetype = SPIRType::Boolean;
  748. bool_type.width = 8;
  749. bool_type.vecsize = 1;
  750. set<SPIRType>(type_id, bool_type);
  751. SPIRType bool_type_ptr_in = bool_type;
  752. bool_type_ptr_in.op = spv::OpTypePointer;
  753. bool_type_ptr_in.pointer = true;
  754. bool_type_ptr_in.pointer_depth++;
  755. bool_type_ptr_in.parent_type = type_id;
  756. bool_type_ptr_in.storage = StorageClassInput;
  757. auto &ptr_in_type = set<SPIRType>(type_ptr_id, bool_type_ptr_in);
  758. ptr_in_type.self = type_id;
  759. set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
  760. set_decoration(var_id, DecorationBuiltIn, BuiltInHelperInvocation);
  761. builtin_helper_invocation_id = var_id;
  762. mark_implicit_builtin(StorageClassInput, BuiltInHelperInvocation, var_id);
  763. }
  764. if (need_local_invocation_index && !has_local_invocation_index)
  765. {
  766. uint32_t offset = ir.increase_bound_by(2);
  767. uint32_t type_ptr_id = offset;
  768. uint32_t var_id = offset + 1;
  769. // Create gl_LocalInvocationIndex.
  770. SPIRType uint_type_ptr = get_uint_type();
  771. uint_type_ptr.op = OpTypePointer;
  772. uint_type_ptr.pointer = true;
  773. uint_type_ptr.pointer_depth++;
  774. uint_type_ptr.parent_type = get_uint_type_id();
  775. uint_type_ptr.storage = StorageClassInput;
  776. auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
  777. ptr_type.self = get_uint_type_id();
  778. set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
  779. set_decoration(var_id, DecorationBuiltIn, BuiltInLocalInvocationIndex);
  780. builtin_local_invocation_index_id = var_id;
  781. mark_implicit_builtin(StorageClassInput, BuiltInLocalInvocationIndex, var_id);
  782. }
  783. if (need_workgroup_size && !has_workgroup_size)
  784. {
  785. uint32_t offset = ir.increase_bound_by(2);
  786. uint32_t type_ptr_id = offset;
  787. uint32_t var_id = offset + 1;
  788. // Create gl_WorkgroupSize.
  789. uint32_t type_id = build_extended_vector_type(get_uint_type_id(), 3);
  790. SPIRType uint_type_ptr = get<SPIRType>(type_id);
  791. uint_type_ptr.op = OpTypePointer;
  792. uint_type_ptr.pointer = true;
  793. uint_type_ptr.pointer_depth++;
  794. uint_type_ptr.parent_type = type_id;
  795. uint_type_ptr.storage = StorageClassInput;
  796. auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
  797. ptr_type.self = type_id;
  798. set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
  799. set_decoration(var_id, DecorationBuiltIn, BuiltInWorkgroupSize);
  800. builtin_workgroup_size_id = var_id;
  801. mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var_id);
  802. }
  803. if (!has_frag_depth && force_frag_depth_passthrough)
  804. {
  805. uint32_t offset = ir.increase_bound_by(3);
  806. uint32_t type_id = offset;
  807. uint32_t type_ptr_id = offset + 1;
  808. uint32_t var_id = offset + 2;
  809. // Create gl_FragDepth
  810. SPIRType float_type { OpTypeFloat };
  811. float_type.basetype = SPIRType::Float;
  812. float_type.width = 32;
  813. float_type.vecsize = 1;
  814. set<SPIRType>(type_id, float_type);
  815. SPIRType float_type_ptr_in = float_type;
  816. float_type_ptr_in.op = spv::OpTypePointer;
  817. float_type_ptr_in.pointer = true;
  818. float_type_ptr_in.pointer_depth++;
  819. float_type_ptr_in.parent_type = type_id;
  820. float_type_ptr_in.storage = StorageClassOutput;
  821. auto &ptr_in_type = set<SPIRType>(type_ptr_id, float_type_ptr_in);
  822. ptr_in_type.self = type_id;
  823. set<SPIRVariable>(var_id, type_ptr_id, StorageClassOutput);
  824. set_decoration(var_id, DecorationBuiltIn, BuiltInFragDepth);
  825. builtin_frag_depth_id = var_id;
  826. mark_implicit_builtin(StorageClassOutput, BuiltInFragDepth, var_id);
  827. active_output_builtins.set(BuiltInFragDepth);
  828. }
  829. }
  830. if (needs_swizzle_buffer_def)
  831. {
  832. uint32_t var_id = build_constant_uint_array_pointer();
  833. set_name(var_id, "spvSwizzleConstants");
  834. // This should never match anything.
  835. set_decoration(var_id, DecorationDescriptorSet, kSwizzleBufferBinding);
  836. set_decoration(var_id, DecorationBinding, msl_options.swizzle_buffer_index);
  837. set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary, msl_options.swizzle_buffer_index);
  838. swizzle_buffer_id = var_id;
  839. }
  840. if (needs_buffer_size_buffer())
  841. {
  842. uint32_t var_id = build_constant_uint_array_pointer();
  843. set_name(var_id, "spvBufferSizeConstants");
  844. // This should never match anything.
  845. set_decoration(var_id, DecorationDescriptorSet, kBufferSizeBufferBinding);
  846. set_decoration(var_id, DecorationBinding, msl_options.buffer_size_buffer_index);
  847. set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary, msl_options.buffer_size_buffer_index);
  848. buffer_size_buffer_id = var_id;
  849. }
  850. if (needs_view_mask_buffer())
  851. {
  852. uint32_t var_id = build_constant_uint_array_pointer();
  853. set_name(var_id, "spvViewMask");
  854. // This should never match anything.
  855. set_decoration(var_id, DecorationDescriptorSet, ~(4u));
  856. set_decoration(var_id, DecorationBinding, msl_options.view_mask_buffer_index);
  857. set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary, msl_options.view_mask_buffer_index);
  858. view_mask_buffer_id = var_id;
  859. }
  860. if (!buffers_requiring_dynamic_offset.empty())
  861. {
  862. uint32_t var_id = build_constant_uint_array_pointer();
  863. set_name(var_id, "spvDynamicOffsets");
  864. // This should never match anything.
  865. set_decoration(var_id, DecorationDescriptorSet, ~(5u));
  866. set_decoration(var_id, DecorationBinding, msl_options.dynamic_offsets_buffer_index);
  867. set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary,
  868. msl_options.dynamic_offsets_buffer_index);
  869. dynamic_offsets_buffer_id = var_id;
  870. }
  871. // If we're returning a struct from a vertex-like entry point, we must return a position attribute.
  872. bool need_position = (get_execution_model() == ExecutionModelVertex || is_tese_shader()) &&
  873. !capture_output_to_buffer && !get_is_rasterization_disabled() &&
  874. !active_output_builtins.get(BuiltInPosition);
  875. if (need_position)
  876. {
  877. // If we can get away with returning void from entry point, we don't need to care.
  878. // If there is at least one other stage output, we need to return [[position]],
  879. // so we need to create one if it doesn't appear in the SPIR-V. Before adding the
  880. // implicit variable, check if it actually exists already, but just has not been used
  881. // or initialized, and if so, mark it as active, and do not create the implicit variable.
  882. bool has_output = false;
  883. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  884. if (var.storage == StorageClassOutput && interface_variable_exists_in_entry_point(var.self))
  885. {
  886. has_output = true;
  887. // Check if the var is the Position builtin
  888. if (has_decoration(var.self, DecorationBuiltIn) && get_decoration(var.self, DecorationBuiltIn) == BuiltInPosition)
  889. active_output_builtins.set(BuiltInPosition);
  890. // If the var is a struct, check if any members is the Position builtin
  891. auto &var_type = get_variable_element_type(var);
  892. if (var_type.basetype == SPIRType::Struct)
  893. {
  894. auto mbr_cnt = var_type.member_types.size();
  895. for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
  896. {
  897. auto builtin = BuiltInMax;
  898. bool is_builtin = is_member_builtin(var_type, mbr_idx, &builtin);
  899. if (is_builtin && builtin == BuiltInPosition)
  900. active_output_builtins.set(BuiltInPosition);
  901. }
  902. }
  903. }
  904. });
  905. need_position = has_output && !active_output_builtins.get(BuiltInPosition);
  906. }
  907. if (need_position)
  908. {
  909. uint32_t offset = ir.increase_bound_by(3);
  910. uint32_t type_id = offset;
  911. uint32_t type_ptr_id = offset + 1;
  912. uint32_t var_id = offset + 2;
  913. // Create gl_Position.
  914. SPIRType vec4_type { OpTypeVector };
  915. vec4_type.basetype = SPIRType::Float;
  916. vec4_type.width = 32;
  917. vec4_type.vecsize = 4;
  918. set<SPIRType>(type_id, vec4_type);
  919. SPIRType vec4_type_ptr = vec4_type;
  920. vec4_type_ptr.op = OpTypePointer;
  921. vec4_type_ptr.pointer = true;
  922. vec4_type_ptr.pointer_depth++;
  923. vec4_type_ptr.parent_type = type_id;
  924. vec4_type_ptr.storage = StorageClassOutput;
  925. auto &ptr_type = set<SPIRType>(type_ptr_id, vec4_type_ptr);
  926. ptr_type.self = type_id;
  927. set<SPIRVariable>(var_id, type_ptr_id, StorageClassOutput);
  928. set_decoration(var_id, DecorationBuiltIn, BuiltInPosition);
  929. mark_implicit_builtin(StorageClassOutput, BuiltInPosition, var_id);
  930. }
  931. }
  932. // Checks if the specified builtin variable (e.g. gl_InstanceIndex) is marked as active.
  933. // If not, it marks it as active and forces a recompilation.
  934. // This might be used when the optimization of inactive builtins was too optimistic (e.g. when "spvOut" is emitted).
  935. void CompilerMSL::ensure_builtin(spv::StorageClass storage, spv::BuiltIn builtin)
  936. {
  937. Bitset *active_builtins = nullptr;
  938. switch (storage)
  939. {
  940. case StorageClassInput:
  941. active_builtins = &active_input_builtins;
  942. break;
  943. case StorageClassOutput:
  944. active_builtins = &active_output_builtins;
  945. break;
  946. default:
  947. break;
  948. }
  949. // At this point, the specified builtin variable must have already been declared in the entry point.
  950. // If not, mark as active and force recompile.
  951. if (active_builtins != nullptr && !active_builtins->get(builtin))
  952. {
  953. active_builtins->set(builtin);
  954. force_recompile();
  955. }
  956. }
  957. void CompilerMSL::mark_implicit_builtin(StorageClass storage, BuiltIn builtin, uint32_t id)
  958. {
  959. Bitset *active_builtins = nullptr;
  960. switch (storage)
  961. {
  962. case StorageClassInput:
  963. active_builtins = &active_input_builtins;
  964. break;
  965. case StorageClassOutput:
  966. active_builtins = &active_output_builtins;
  967. break;
  968. default:
  969. break;
  970. }
  971. assert(active_builtins != nullptr);
  972. active_builtins->set(builtin);
  973. auto &var = get_entry_point().interface_variables;
  974. if (find(begin(var), end(var), VariableID(id)) == end(var))
  975. var.push_back(id);
  976. }
  977. uint32_t CompilerMSL::build_constant_uint_array_pointer()
  978. {
  979. uint32_t offset = ir.increase_bound_by(3);
  980. uint32_t type_ptr_id = offset;
  981. uint32_t type_ptr_ptr_id = offset + 1;
  982. uint32_t var_id = offset + 2;
  983. // Create a buffer to hold extra data, including the swizzle constants.
  984. SPIRType uint_type_pointer = get_uint_type();
  985. uint_type_pointer.op = OpTypePointer;
  986. uint_type_pointer.pointer = true;
  987. uint_type_pointer.pointer_depth++;
  988. uint_type_pointer.parent_type = get_uint_type_id();
  989. uint_type_pointer.storage = StorageClassUniform;
  990. set<SPIRType>(type_ptr_id, uint_type_pointer);
  991. set_decoration(type_ptr_id, DecorationArrayStride, 4);
  992. SPIRType uint_type_pointer2 = uint_type_pointer;
  993. uint_type_pointer2.pointer_depth++;
  994. uint_type_pointer2.parent_type = type_ptr_id;
  995. set<SPIRType>(type_ptr_ptr_id, uint_type_pointer2);
  996. set<SPIRVariable>(var_id, type_ptr_ptr_id, StorageClassUniformConstant);
  997. return var_id;
  998. }
  999. static string create_sampler_address(const char *prefix, MSLSamplerAddress addr)
  1000. {
  1001. switch (addr)
  1002. {
  1003. case MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE:
  1004. return join(prefix, "address::clamp_to_edge");
  1005. case MSL_SAMPLER_ADDRESS_CLAMP_TO_ZERO:
  1006. return join(prefix, "address::clamp_to_zero");
  1007. case MSL_SAMPLER_ADDRESS_CLAMP_TO_BORDER:
  1008. return join(prefix, "address::clamp_to_border");
  1009. case MSL_SAMPLER_ADDRESS_REPEAT:
  1010. return join(prefix, "address::repeat");
  1011. case MSL_SAMPLER_ADDRESS_MIRRORED_REPEAT:
  1012. return join(prefix, "address::mirrored_repeat");
  1013. default:
  1014. SPIRV_CROSS_THROW("Invalid sampler addressing mode.");
  1015. }
  1016. }
  1017. SPIRType &CompilerMSL::get_stage_in_struct_type()
  1018. {
  1019. auto &si_var = get<SPIRVariable>(stage_in_var_id);
  1020. return get_variable_data_type(si_var);
  1021. }
  1022. SPIRType &CompilerMSL::get_stage_out_struct_type()
  1023. {
  1024. auto &so_var = get<SPIRVariable>(stage_out_var_id);
  1025. return get_variable_data_type(so_var);
  1026. }
  1027. SPIRType &CompilerMSL::get_patch_stage_in_struct_type()
  1028. {
  1029. auto &si_var = get<SPIRVariable>(patch_stage_in_var_id);
  1030. return get_variable_data_type(si_var);
  1031. }
  1032. SPIRType &CompilerMSL::get_patch_stage_out_struct_type()
  1033. {
  1034. auto &so_var = get<SPIRVariable>(patch_stage_out_var_id);
  1035. return get_variable_data_type(so_var);
  1036. }
  1037. std::string CompilerMSL::get_tess_factor_struct_name()
  1038. {
  1039. if (is_tessellating_triangles())
  1040. return "MTLTriangleTessellationFactorsHalf";
  1041. return "MTLQuadTessellationFactorsHalf";
  1042. }
  1043. SPIRType &CompilerMSL::get_uint_type()
  1044. {
  1045. return get<SPIRType>(get_uint_type_id());
  1046. }
  1047. uint32_t CompilerMSL::get_uint_type_id()
  1048. {
  1049. if (uint_type_id != 0)
  1050. return uint_type_id;
  1051. uint_type_id = ir.increase_bound_by(1);
  1052. SPIRType type { OpTypeInt };
  1053. type.basetype = SPIRType::UInt;
  1054. type.width = 32;
  1055. set<SPIRType>(uint_type_id, type);
  1056. return uint_type_id;
  1057. }
  1058. void CompilerMSL::emit_entry_point_declarations()
  1059. {
  1060. // FIXME: Get test coverage here ...
  1061. // Constant arrays of non-primitive types (i.e. matrices) won't link properly into Metal libraries
  1062. declare_complex_constant_arrays();
  1063. // Emit constexpr samplers here.
  1064. for (auto &samp : constexpr_samplers_by_id)
  1065. {
  1066. auto &var = get<SPIRVariable>(samp.first);
  1067. auto &type = get<SPIRType>(var.basetype);
  1068. if (type.basetype == SPIRType::Sampler)
  1069. add_resource_name(samp.first);
  1070. SmallVector<string> args;
  1071. auto &s = samp.second;
  1072. if (s.coord != MSL_SAMPLER_COORD_NORMALIZED)
  1073. args.push_back("coord::pixel");
  1074. if (s.min_filter == s.mag_filter)
  1075. {
  1076. if (s.min_filter != MSL_SAMPLER_FILTER_NEAREST)
  1077. args.push_back("filter::linear");
  1078. }
  1079. else
  1080. {
  1081. if (s.min_filter != MSL_SAMPLER_FILTER_NEAREST)
  1082. args.push_back("min_filter::linear");
  1083. if (s.mag_filter != MSL_SAMPLER_FILTER_NEAREST)
  1084. args.push_back("mag_filter::linear");
  1085. }
  1086. switch (s.mip_filter)
  1087. {
  1088. case MSL_SAMPLER_MIP_FILTER_NONE:
  1089. // Default
  1090. break;
  1091. case MSL_SAMPLER_MIP_FILTER_NEAREST:
  1092. args.push_back("mip_filter::nearest");
  1093. break;
  1094. case MSL_SAMPLER_MIP_FILTER_LINEAR:
  1095. args.push_back("mip_filter::linear");
  1096. break;
  1097. default:
  1098. SPIRV_CROSS_THROW("Invalid mip filter.");
  1099. }
  1100. if (s.s_address == s.t_address && s.s_address == s.r_address)
  1101. {
  1102. if (s.s_address != MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE)
  1103. args.push_back(create_sampler_address("", s.s_address));
  1104. }
  1105. else
  1106. {
  1107. if (s.s_address != MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE)
  1108. args.push_back(create_sampler_address("s_", s.s_address));
  1109. if (s.t_address != MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE)
  1110. args.push_back(create_sampler_address("t_", s.t_address));
  1111. if (s.r_address != MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE)
  1112. args.push_back(create_sampler_address("r_", s.r_address));
  1113. }
  1114. if (s.compare_enable)
  1115. {
  1116. switch (s.compare_func)
  1117. {
  1118. case MSL_SAMPLER_COMPARE_FUNC_ALWAYS:
  1119. args.push_back("compare_func::always");
  1120. break;
  1121. case MSL_SAMPLER_COMPARE_FUNC_NEVER:
  1122. args.push_back("compare_func::never");
  1123. break;
  1124. case MSL_SAMPLER_COMPARE_FUNC_EQUAL:
  1125. args.push_back("compare_func::equal");
  1126. break;
  1127. case MSL_SAMPLER_COMPARE_FUNC_NOT_EQUAL:
  1128. args.push_back("compare_func::not_equal");
  1129. break;
  1130. case MSL_SAMPLER_COMPARE_FUNC_LESS:
  1131. args.push_back("compare_func::less");
  1132. break;
  1133. case MSL_SAMPLER_COMPARE_FUNC_LESS_EQUAL:
  1134. args.push_back("compare_func::less_equal");
  1135. break;
  1136. case MSL_SAMPLER_COMPARE_FUNC_GREATER:
  1137. args.push_back("compare_func::greater");
  1138. break;
  1139. case MSL_SAMPLER_COMPARE_FUNC_GREATER_EQUAL:
  1140. args.push_back("compare_func::greater_equal");
  1141. break;
  1142. default:
  1143. SPIRV_CROSS_THROW("Invalid sampler compare function.");
  1144. }
  1145. }
  1146. if (s.s_address == MSL_SAMPLER_ADDRESS_CLAMP_TO_BORDER || s.t_address == MSL_SAMPLER_ADDRESS_CLAMP_TO_BORDER ||
  1147. s.r_address == MSL_SAMPLER_ADDRESS_CLAMP_TO_BORDER)
  1148. {
  1149. switch (s.border_color)
  1150. {
  1151. case MSL_SAMPLER_BORDER_COLOR_OPAQUE_BLACK:
  1152. args.push_back("border_color::opaque_black");
  1153. break;
  1154. case MSL_SAMPLER_BORDER_COLOR_OPAQUE_WHITE:
  1155. args.push_back("border_color::opaque_white");
  1156. break;
  1157. case MSL_SAMPLER_BORDER_COLOR_TRANSPARENT_BLACK:
  1158. args.push_back("border_color::transparent_black");
  1159. break;
  1160. default:
  1161. SPIRV_CROSS_THROW("Invalid sampler border color.");
  1162. }
  1163. }
  1164. if (s.anisotropy_enable)
  1165. args.push_back(join("max_anisotropy(", s.max_anisotropy, ")"));
  1166. if (s.lod_clamp_enable)
  1167. {
  1168. args.push_back(join("lod_clamp(", format_float(s.lod_clamp_min), ", ", format_float(s.lod_clamp_max), ")"));
  1169. }
  1170. // If we would emit no arguments, then omit the parentheses entirely. Otherwise,
  1171. // we'll wind up with a "most vexing parse" situation.
  1172. if (args.empty())
  1173. statement("constexpr sampler ",
  1174. type.basetype == SPIRType::SampledImage ? to_sampler_expression(samp.first) : to_name(samp.first),
  1175. ";");
  1176. else
  1177. statement("constexpr sampler ",
  1178. type.basetype == SPIRType::SampledImage ? to_sampler_expression(samp.first) : to_name(samp.first),
  1179. "(", merge(args), ");");
  1180. }
  1181. // Emit dynamic buffers here.
  1182. for (auto &dynamic_buffer : buffers_requiring_dynamic_offset)
  1183. {
  1184. if (!dynamic_buffer.second.second)
  1185. {
  1186. // Could happen if no buffer was used at requested binding point.
  1187. continue;
  1188. }
  1189. const auto &var = get<SPIRVariable>(dynamic_buffer.second.second);
  1190. uint32_t var_id = var.self;
  1191. const auto &type = get_variable_data_type(var);
  1192. string name = to_name(var.self);
  1193. uint32_t desc_set = get_decoration(var.self, DecorationDescriptorSet);
  1194. uint32_t arg_id = argument_buffer_ids[desc_set];
  1195. uint32_t base_index = dynamic_buffer.second.first;
  1196. if (is_array(type))
  1197. {
  1198. is_using_builtin_array = true;
  1199. statement(get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(var_id, true), name,
  1200. type_to_array_glsl(type, var_id), " =");
  1201. uint32_t array_size = get_resource_array_size(type, var_id);
  1202. if (array_size == 0)
  1203. SPIRV_CROSS_THROW("Size of runtime array with dynamic offset could not be determined from resource bindings.");
  1204. begin_scope();
  1205. for (uint32_t i = 0; i < array_size; i++)
  1206. {
  1207. statement("(", get_argument_address_space(var), " ", type_to_glsl(type), "* ",
  1208. to_restrict(var_id, false), ")((", get_argument_address_space(var), " char* ",
  1209. to_restrict(var_id, false), ")", to_name(arg_id), ".", ensure_valid_name(name, "m"),
  1210. "[", i, "]", " + ", to_name(dynamic_offsets_buffer_id), "[", base_index + i, "]),");
  1211. }
  1212. end_scope_decl();
  1213. statement_no_indent("");
  1214. is_using_builtin_array = false;
  1215. }
  1216. else
  1217. {
  1218. statement(get_argument_address_space(var), " auto& ", to_restrict(var_id, true), name, " = *(",
  1219. get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(var_id, false), ")((",
  1220. get_argument_address_space(var), " char* ", to_restrict(var_id, false), ")", to_name(arg_id), ".",
  1221. ensure_valid_name(name, "m"), " + ", to_name(dynamic_offsets_buffer_id), "[", base_index, "]);");
  1222. }
  1223. }
  1224. bool has_runtime_array_declaration = false;
  1225. for (SPIRVariable *arg : entry_point_bindings)
  1226. {
  1227. const auto &var = *arg;
  1228. const auto &type = get_variable_data_type(var);
  1229. const auto &buffer_type = get_variable_element_type(var);
  1230. const string name = to_name(var.self);
  1231. if (is_var_runtime_size_array(var))
  1232. {
  1233. if (msl_options.argument_buffers_tier < Options::ArgumentBuffersTier::Tier2)
  1234. {
  1235. SPIRV_CROSS_THROW("Unsized array of descriptors requires argument buffer tier 2");
  1236. }
  1237. string resource_name;
  1238. if (descriptor_set_is_argument_buffer(get_decoration(var.self, DecorationDescriptorSet)))
  1239. resource_name = ir.meta[var.self].decoration.qualified_alias;
  1240. else
  1241. resource_name = name + "_";
  1242. switch (type.basetype)
  1243. {
  1244. case SPIRType::Image:
  1245. case SPIRType::Sampler:
  1246. case SPIRType::AccelerationStructure:
  1247. statement("spvDescriptorArray<", type_to_glsl(buffer_type, var.self), "> ", name, " {", resource_name, "};");
  1248. break;
  1249. case SPIRType::SampledImage:
  1250. statement("spvDescriptorArray<", type_to_glsl(buffer_type, var.self), "> ", name, " {", resource_name, "};");
  1251. // Unsupported with argument buffer for now.
  1252. statement("spvDescriptorArray<sampler> ", name, "Smplr {", name, "Smplr_};");
  1253. break;
  1254. case SPIRType::Struct:
  1255. statement("spvDescriptorArray<", get_argument_address_space(var), " ", type_to_glsl(buffer_type), "*> ",
  1256. name, " {", resource_name, "};");
  1257. break;
  1258. default:
  1259. break;
  1260. }
  1261. has_runtime_array_declaration = true;
  1262. }
  1263. else if (!type.array.empty() && type.basetype == SPIRType::Struct)
  1264. {
  1265. // Emit only buffer arrays here.
  1266. statement(get_argument_address_space(var), " ", type_to_glsl(buffer_type), "* ",
  1267. to_restrict(var.self, true), name, "[] =");
  1268. begin_scope();
  1269. uint32_t array_size = get_resource_array_size(type, var.self);
  1270. for (uint32_t i = 0; i < array_size; ++i)
  1271. statement(name, "_", i, ",");
  1272. end_scope_decl();
  1273. statement_no_indent("");
  1274. }
  1275. }
  1276. if (has_runtime_array_declaration)
  1277. statement_no_indent("");
  1278. // Emit buffer aliases here.
  1279. for (auto &var_id : buffer_aliases_discrete)
  1280. {
  1281. const auto &var = get<SPIRVariable>(var_id);
  1282. const auto &type = get_variable_data_type(var);
  1283. auto addr_space = get_argument_address_space(var);
  1284. auto name = to_name(var_id);
  1285. uint32_t desc_set = get_decoration(var_id, DecorationDescriptorSet);
  1286. uint32_t desc_binding = get_decoration(var_id, DecorationBinding);
  1287. auto alias_name = join("spvBufferAliasSet", desc_set, "Binding", desc_binding);
  1288. statement(addr_space, " auto& ", to_restrict(var_id, true),
  1289. name,
  1290. " = *(", addr_space, " ", type_to_glsl(type), "*)", alias_name, ";");
  1291. }
  1292. // Discrete descriptors are processed in entry point emission every compiler iteration.
  1293. buffer_aliases_discrete.clear();
  1294. for (auto &var_pair : buffer_aliases_argument)
  1295. {
  1296. uint32_t var_id = var_pair.first;
  1297. uint32_t alias_id = var_pair.second;
  1298. const auto &var = get<SPIRVariable>(var_id);
  1299. const auto &type = get_variable_data_type(var);
  1300. auto addr_space = get_argument_address_space(var);
  1301. if (type.array.empty())
  1302. {
  1303. statement(addr_space, " auto& ", to_restrict(var_id, true), to_name(var_id), " = (", addr_space, " ",
  1304. type_to_glsl(type), "&)", ir.meta[alias_id].decoration.qualified_alias, ";");
  1305. }
  1306. else
  1307. {
  1308. const char *desc_addr_space = descriptor_address_space(var_id, var.storage, "thread");
  1309. // Esoteric type cast. Reference to array of pointers.
  1310. // Auto here defers to UBO or SSBO. The address space of the reference needs to refer to the
  1311. // address space of the argument buffer itself, which is usually constant, but can be const device for
  1312. // large argument buffers.
  1313. is_using_builtin_array = true;
  1314. statement(desc_addr_space, " auto& ", to_restrict(var_id, true), to_name(var_id), " = (", addr_space, " ",
  1315. type_to_glsl(type), "* ", desc_addr_space, " (&)",
  1316. type_to_array_glsl(type, var_id), ")", ir.meta[alias_id].decoration.qualified_alias, ";");
  1317. is_using_builtin_array = false;
  1318. }
  1319. }
  1320. // Emit disabled fragment outputs.
  1321. std::sort(disabled_frag_outputs.begin(), disabled_frag_outputs.end());
  1322. for (uint32_t var_id : disabled_frag_outputs)
  1323. {
  1324. auto &var = get<SPIRVariable>(var_id);
  1325. add_local_variable_name(var_id);
  1326. statement(CompilerGLSL::variable_decl(var), ";");
  1327. var.deferred_declaration = false;
  1328. }
  1329. }
  1330. string CompilerMSL::compile()
  1331. {
  1332. replace_illegal_entry_point_names();
  1333. ir.fixup_reserved_names();
  1334. // Do not deal with GLES-isms like precision, older extensions and such.
  1335. options.vulkan_semantics = true;
  1336. options.es = false;
  1337. options.version = 450;
  1338. backend.null_pointer_literal = "nullptr";
  1339. backend.float_literal_suffix = false;
  1340. backend.uint32_t_literal_suffix = true;
  1341. backend.int16_t_literal_suffix = "";
  1342. backend.uint16_t_literal_suffix = "";
  1343. backend.basic_int_type = "int";
  1344. backend.basic_uint_type = "uint";
  1345. backend.basic_int8_type = "char";
  1346. backend.basic_uint8_type = "uchar";
  1347. backend.basic_int16_type = "short";
  1348. backend.basic_uint16_type = "ushort";
  1349. backend.boolean_mix_function = "select";
  1350. backend.swizzle_is_function = false;
  1351. backend.shared_is_implied = false;
  1352. backend.use_initializer_list = true;
  1353. backend.use_typed_initializer_list = true;
  1354. backend.native_row_major_matrix = false;
  1355. backend.unsized_array_supported = false;
  1356. backend.can_declare_arrays_inline = false;
  1357. backend.allow_truncated_access_chain = true;
  1358. backend.comparison_image_samples_scalar = true;
  1359. backend.native_pointers = true;
  1360. backend.nonuniform_qualifier = "";
  1361. backend.support_small_type_sampling_result = true;
  1362. backend.supports_empty_struct = true;
  1363. backend.support_64bit_switch = true;
  1364. backend.boolean_in_struct_remapped_type = SPIRType::Short;
  1365. // Allow Metal to use the array<T> template unless we force it off.
  1366. backend.can_return_array = !msl_options.force_native_arrays;
  1367. backend.array_is_value_type = !msl_options.force_native_arrays;
  1368. // Arrays which are part of buffer objects are never considered to be value types (just plain C-style).
  1369. backend.array_is_value_type_in_buffer_blocks = false;
  1370. backend.support_pointer_to_pointer = true;
  1371. backend.implicit_c_integer_promotion_rules = true;
  1372. capture_output_to_buffer = msl_options.capture_output_to_buffer;
  1373. is_rasterization_disabled = msl_options.disable_rasterization || capture_output_to_buffer;
  1374. // Initialize array here rather than constructor, MSVC 2013 workaround.
  1375. for (auto &id : next_metal_resource_ids)
  1376. id = 0;
  1377. fixup_anonymous_struct_names();
  1378. fixup_type_alias();
  1379. replace_illegal_names();
  1380. sync_entry_point_aliases_and_names();
  1381. build_function_control_flow_graphs_and_analyze();
  1382. update_active_builtins();
  1383. analyze_image_and_sampler_usage();
  1384. analyze_sampled_image_usage();
  1385. analyze_interlocked_resource_usage();
  1386. preprocess_op_codes();
  1387. build_implicit_builtins();
  1388. if (needs_manual_helper_invocation_updates() &&
  1389. (active_input_builtins.get(BuiltInHelperInvocation) || needs_helper_invocation))
  1390. {
  1391. string builtin_helper_invocation = builtin_to_glsl(BuiltInHelperInvocation, StorageClassInput);
  1392. string discard_expr = join(builtin_helper_invocation, " = true, discard_fragment()");
  1393. if (msl_options.force_fragment_with_side_effects_execution)
  1394. discard_expr = join("!", builtin_helper_invocation, " ? (", discard_expr, ") : (void)0");
  1395. backend.discard_literal = discard_expr;
  1396. backend.demote_literal = discard_expr;
  1397. }
  1398. else
  1399. {
  1400. backend.discard_literal = "discard_fragment()";
  1401. backend.demote_literal = "discard_fragment()";
  1402. }
  1403. fixup_image_load_store_access();
  1404. set_enabled_interface_variables(get_active_interface_variables());
  1405. if (msl_options.force_active_argument_buffer_resources)
  1406. activate_argument_buffer_resources();
  1407. if (swizzle_buffer_id)
  1408. add_active_interface_variable(swizzle_buffer_id);
  1409. if (buffer_size_buffer_id)
  1410. add_active_interface_variable(buffer_size_buffer_id);
  1411. if (view_mask_buffer_id)
  1412. add_active_interface_variable(view_mask_buffer_id);
  1413. if (dynamic_offsets_buffer_id)
  1414. add_active_interface_variable(dynamic_offsets_buffer_id);
  1415. if (builtin_layer_id)
  1416. add_active_interface_variable(builtin_layer_id);
  1417. if (builtin_dispatch_base_id && !msl_options.supports_msl_version(1, 2))
  1418. add_active_interface_variable(builtin_dispatch_base_id);
  1419. if (builtin_sample_mask_id)
  1420. add_active_interface_variable(builtin_sample_mask_id);
  1421. if (builtin_frag_depth_id)
  1422. add_active_interface_variable(builtin_frag_depth_id);
  1423. // Create structs to hold input, output and uniform variables.
  1424. // Do output first to ensure out. is declared at top of entry function.
  1425. qual_pos_var_name = "";
  1426. stage_out_var_id = add_interface_block(StorageClassOutput);
  1427. patch_stage_out_var_id = add_interface_block(StorageClassOutput, true);
  1428. stage_in_var_id = add_interface_block(StorageClassInput);
  1429. if (is_tese_shader())
  1430. patch_stage_in_var_id = add_interface_block(StorageClassInput, true);
  1431. if (is_tesc_shader())
  1432. stage_out_ptr_var_id = add_interface_block_pointer(stage_out_var_id, StorageClassOutput);
  1433. if (is_tessellation_shader())
  1434. stage_in_ptr_var_id = add_interface_block_pointer(stage_in_var_id, StorageClassInput);
  1435. // Metal vertex functions that define no output must disable rasterization and return void.
  1436. if (!stage_out_var_id)
  1437. is_rasterization_disabled = true;
  1438. // Convert the use of global variables to recursively-passed function parameters
  1439. localize_global_variables();
  1440. extract_global_variables_from_functions();
  1441. // Mark any non-stage-in structs to be tightly packed.
  1442. mark_packable_structs();
  1443. reorder_type_alias();
  1444. // Add fixup hooks required by shader inputs and outputs. This needs to happen before
  1445. // the loop, so the hooks aren't added multiple times.
  1446. fix_up_shader_inputs_outputs();
  1447. // If we are using argument buffers, we create argument buffer structures for them here.
  1448. // These buffers will be used in the entry point, not the individual resources.
  1449. if (msl_options.argument_buffers)
  1450. {
  1451. if (!msl_options.supports_msl_version(2, 0))
  1452. SPIRV_CROSS_THROW("Argument buffers can only be used with MSL 2.0 and up.");
  1453. analyze_argument_buffers();
  1454. }
  1455. uint32_t pass_count = 0;
  1456. do
  1457. {
  1458. reset(pass_count);
  1459. // Start bindings at zero.
  1460. next_metal_resource_index_buffer = 0;
  1461. next_metal_resource_index_texture = 0;
  1462. next_metal_resource_index_sampler = 0;
  1463. for (auto &id : next_metal_resource_ids)
  1464. id = 0;
  1465. // Move constructor for this type is broken on GCC 4.9 ...
  1466. buffer.reset();
  1467. emit_header();
  1468. emit_custom_templates();
  1469. emit_custom_functions();
  1470. emit_specialization_constants_and_structs();
  1471. emit_resources();
  1472. emit_function(get<SPIRFunction>(ir.default_entry_point), Bitset());
  1473. pass_count++;
  1474. } while (is_forcing_recompilation());
  1475. return buffer.str();
  1476. }
  1477. // Register the need to output any custom functions.
  1478. void CompilerMSL::preprocess_op_codes()
  1479. {
  1480. OpCodePreprocessor preproc(*this);
  1481. traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), preproc);
  1482. suppress_missing_prototypes = preproc.suppress_missing_prototypes;
  1483. if (preproc.uses_atomics)
  1484. {
  1485. add_header_line("#include <metal_atomic>");
  1486. add_pragma_line("#pragma clang diagnostic ignored \"-Wunused-variable\"");
  1487. }
  1488. // Before MSL 2.1 (2.2 for textures), Metal vertex functions that write to
  1489. // resources must disable rasterization and return void.
  1490. if ((preproc.uses_buffer_write && !msl_options.supports_msl_version(2, 1)) ||
  1491. (preproc.uses_image_write && !msl_options.supports_msl_version(2, 2)))
  1492. is_rasterization_disabled = true;
  1493. // Tessellation control shaders are run as compute functions in Metal, and so
  1494. // must capture their output to a buffer.
  1495. if (is_tesc_shader() || (get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation))
  1496. {
  1497. is_rasterization_disabled = true;
  1498. capture_output_to_buffer = true;
  1499. }
  1500. if (preproc.needs_subgroup_invocation_id)
  1501. needs_subgroup_invocation_id = true;
  1502. if (preproc.needs_subgroup_size)
  1503. needs_subgroup_size = true;
  1504. // build_implicit_builtins() hasn't run yet, and in fact, this needs to execute
  1505. // before then so that gl_SampleID will get added; so we also need to check if
  1506. // that function would add gl_FragCoord.
  1507. if (preproc.needs_sample_id || msl_options.force_sample_rate_shading ||
  1508. (is_sample_rate() && (active_input_builtins.get(BuiltInFragCoord) ||
  1509. (need_subpass_input_ms && !msl_options.use_framebuffer_fetch_subpasses))))
  1510. needs_sample_id = true;
  1511. if (preproc.needs_helper_invocation)
  1512. needs_helper_invocation = true;
  1513. // OpKill is removed by the parser, so we need to identify those by inspecting
  1514. // blocks.
  1515. ir.for_each_typed_id<SPIRBlock>([&preproc](uint32_t, SPIRBlock &block) {
  1516. if (block.terminator == SPIRBlock::Kill)
  1517. preproc.uses_discard = true;
  1518. });
  1519. // Fragment shaders that both write to storage resources and discard fragments
  1520. // need checks on the writes, to work around Metal allowing these writes despite
  1521. // the fragment being dead. We also require to force Metal to execute fragment
  1522. // shaders instead of being prematurely discarded.
  1523. if (preproc.uses_discard && (preproc.uses_buffer_write || preproc.uses_image_write))
  1524. {
  1525. bool should_enable = (msl_options.check_discarded_frag_stores || msl_options.force_fragment_with_side_effects_execution);
  1526. frag_shader_needs_discard_checks |= msl_options.check_discarded_frag_stores;
  1527. needs_helper_invocation |= should_enable;
  1528. // Fragment discard store checks imply manual HelperInvocation updates.
  1529. msl_options.manual_helper_invocation_updates |= should_enable;
  1530. }
  1531. if (is_intersection_query())
  1532. {
  1533. add_header_line("#if __METAL_VERSION__ >= 230");
  1534. add_header_line("#include <metal_raytracing>");
  1535. add_header_line("using namespace metal::raytracing;");
  1536. add_header_line("#endif");
  1537. }
  1538. }
  1539. // Move the Private and Workgroup global variables to the entry function.
  1540. // Non-constant variables cannot have global scope in Metal.
  1541. void CompilerMSL::localize_global_variables()
  1542. {
  1543. auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
  1544. auto iter = global_variables.begin();
  1545. while (iter != global_variables.end())
  1546. {
  1547. uint32_t v_id = *iter;
  1548. auto &var = get<SPIRVariable>(v_id);
  1549. if (var.storage == StorageClassPrivate || var.storage == StorageClassWorkgroup)
  1550. {
  1551. if (!variable_is_lut(var))
  1552. entry_func.add_local_variable(v_id);
  1553. iter = global_variables.erase(iter);
  1554. }
  1555. else
  1556. iter++;
  1557. }
  1558. }
  1559. // For any global variable accessed directly by a function,
  1560. // extract that variable and add it as an argument to that function.
  1561. void CompilerMSL::extract_global_variables_from_functions()
  1562. {
  1563. // Uniforms
  1564. unordered_set<uint32_t> global_var_ids;
  1565. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  1566. // Some builtins resolve directly to a function call which does not need any declared variables.
  1567. // Skip these.
  1568. if (var.storage == StorageClassInput && has_decoration(var.self, DecorationBuiltIn))
  1569. {
  1570. auto bi_type = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
  1571. if (bi_type == BuiltInHelperInvocation && !needs_manual_helper_invocation_updates())
  1572. return;
  1573. if (bi_type == BuiltInHelperInvocation && needs_manual_helper_invocation_updates())
  1574. {
  1575. if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 3))
  1576. SPIRV_CROSS_THROW("simd_is_helper_thread() requires version 2.3 on iOS.");
  1577. else if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 1))
  1578. SPIRV_CROSS_THROW("simd_is_helper_thread() requires version 2.1 on macOS.");
  1579. // Make sure this is declared and initialized.
  1580. // Force this to have the proper name.
  1581. set_name(var.self, builtin_to_glsl(BuiltInHelperInvocation, StorageClassInput));
  1582. auto &entry_func = this->get<SPIRFunction>(ir.default_entry_point);
  1583. entry_func.add_local_variable(var.self);
  1584. vars_needing_early_declaration.push_back(var.self);
  1585. entry_func.fixup_hooks_in.push_back([this, &var]()
  1586. { statement(to_name(var.self), " = simd_is_helper_thread();"); });
  1587. }
  1588. }
  1589. if (var.storage == StorageClassInput || var.storage == StorageClassOutput ||
  1590. var.storage == StorageClassUniform || var.storage == StorageClassUniformConstant ||
  1591. var.storage == StorageClassPushConstant || var.storage == StorageClassStorageBuffer)
  1592. {
  1593. global_var_ids.insert(var.self);
  1594. }
  1595. });
  1596. // Local vars that are declared in the main function and accessed directly by a function
  1597. auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
  1598. for (auto &var : entry_func.local_variables)
  1599. if (get<SPIRVariable>(var).storage != StorageClassFunction)
  1600. global_var_ids.insert(var);
  1601. std::set<uint32_t> added_arg_ids;
  1602. unordered_set<uint32_t> processed_func_ids;
  1603. extract_global_variables_from_function(ir.default_entry_point, added_arg_ids, global_var_ids, processed_func_ids);
  1604. }
  1605. // MSL does not support the use of global variables for shader input content.
  1606. // For any global variable accessed directly by the specified function, extract that variable,
  1607. // add it as an argument to that function, and the arg to the added_arg_ids collection.
  1608. void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::set<uint32_t> &added_arg_ids,
  1609. unordered_set<uint32_t> &global_var_ids,
  1610. unordered_set<uint32_t> &processed_func_ids)
  1611. {
  1612. // Avoid processing a function more than once
  1613. if (processed_func_ids.find(func_id) != processed_func_ids.end())
  1614. {
  1615. // Return function global variables
  1616. added_arg_ids = function_global_vars[func_id];
  1617. return;
  1618. }
  1619. processed_func_ids.insert(func_id);
  1620. auto &func = get<SPIRFunction>(func_id);
  1621. // Recursively establish global args added to functions on which we depend.
  1622. for (auto block : func.blocks)
  1623. {
  1624. auto &b = get<SPIRBlock>(block);
  1625. for (auto &i : b.ops)
  1626. {
  1627. auto ops = stream(i);
  1628. auto op = static_cast<Op>(i.op);
  1629. switch (op)
  1630. {
  1631. case OpLoad:
  1632. case OpInBoundsAccessChain:
  1633. case OpAccessChain:
  1634. case OpPtrAccessChain:
  1635. case OpArrayLength:
  1636. {
  1637. uint32_t base_id = ops[2];
  1638. if (global_var_ids.find(base_id) != global_var_ids.end())
  1639. added_arg_ids.insert(base_id);
  1640. // Use Metal's native frame-buffer fetch API for subpass inputs.
  1641. auto &type = get<SPIRType>(ops[0]);
  1642. if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData &&
  1643. (!msl_options.use_framebuffer_fetch_subpasses))
  1644. {
  1645. // Implicitly reads gl_FragCoord.
  1646. assert(builtin_frag_coord_id != 0);
  1647. added_arg_ids.insert(builtin_frag_coord_id);
  1648. if (msl_options.multiview)
  1649. {
  1650. // Implicitly reads gl_ViewIndex.
  1651. assert(builtin_view_idx_id != 0);
  1652. added_arg_ids.insert(builtin_view_idx_id);
  1653. }
  1654. else if (msl_options.arrayed_subpass_input)
  1655. {
  1656. // Implicitly reads gl_Layer.
  1657. assert(builtin_layer_id != 0);
  1658. added_arg_ids.insert(builtin_layer_id);
  1659. }
  1660. }
  1661. break;
  1662. }
  1663. case OpFunctionCall:
  1664. {
  1665. // First see if any of the function call args are globals
  1666. for (uint32_t arg_idx = 3; arg_idx < i.length; arg_idx++)
  1667. {
  1668. uint32_t arg_id = ops[arg_idx];
  1669. if (global_var_ids.find(arg_id) != global_var_ids.end())
  1670. added_arg_ids.insert(arg_id);
  1671. }
  1672. // Then recurse into the function itself to extract globals used internally in the function
  1673. uint32_t inner_func_id = ops[2];
  1674. std::set<uint32_t> inner_func_args;
  1675. extract_global_variables_from_function(inner_func_id, inner_func_args, global_var_ids,
  1676. processed_func_ids);
  1677. added_arg_ids.insert(inner_func_args.begin(), inner_func_args.end());
  1678. break;
  1679. }
  1680. case OpStore:
  1681. {
  1682. uint32_t base_id = ops[0];
  1683. if (global_var_ids.find(base_id) != global_var_ids.end())
  1684. {
  1685. added_arg_ids.insert(base_id);
  1686. if (msl_options.input_attachment_is_ds_attachment && base_id == builtin_frag_depth_id)
  1687. writes_to_depth = true;
  1688. }
  1689. uint32_t rvalue_id = ops[1];
  1690. if (global_var_ids.find(rvalue_id) != global_var_ids.end())
  1691. added_arg_ids.insert(rvalue_id);
  1692. if (needs_frag_discard_checks())
  1693. added_arg_ids.insert(builtin_helper_invocation_id);
  1694. break;
  1695. }
  1696. case OpSelect:
  1697. {
  1698. uint32_t base_id = ops[3];
  1699. if (global_var_ids.find(base_id) != global_var_ids.end())
  1700. added_arg_ids.insert(base_id);
  1701. base_id = ops[4];
  1702. if (global_var_ids.find(base_id) != global_var_ids.end())
  1703. added_arg_ids.insert(base_id);
  1704. break;
  1705. }
  1706. case OpAtomicExchange:
  1707. case OpAtomicCompareExchange:
  1708. case OpAtomicStore:
  1709. case OpAtomicIIncrement:
  1710. case OpAtomicIDecrement:
  1711. case OpAtomicIAdd:
  1712. case OpAtomicFAddEXT:
  1713. case OpAtomicISub:
  1714. case OpAtomicSMin:
  1715. case OpAtomicUMin:
  1716. case OpAtomicSMax:
  1717. case OpAtomicUMax:
  1718. case OpAtomicAnd:
  1719. case OpAtomicOr:
  1720. case OpAtomicXor:
  1721. case OpImageWrite:
  1722. {
  1723. if (needs_frag_discard_checks())
  1724. added_arg_ids.insert(builtin_helper_invocation_id);
  1725. uint32_t ptr = 0;
  1726. if (op == OpAtomicStore || op == OpImageWrite)
  1727. ptr = ops[0];
  1728. else
  1729. ptr = ops[2];
  1730. if (global_var_ids.find(ptr) != global_var_ids.end())
  1731. added_arg_ids.insert(ptr);
  1732. break;
  1733. }
  1734. // Emulate texture2D atomic operations
  1735. case OpImageTexelPointer:
  1736. {
  1737. // When using the pointer, we need to know which variable it is actually loaded from.
  1738. uint32_t base_id = ops[2];
  1739. auto *var = maybe_get_backing_variable(base_id);
  1740. if (var)
  1741. {
  1742. if (atomic_image_vars_emulated.count(var->self) &&
  1743. !get<SPIRType>(var->basetype).array.empty())
  1744. {
  1745. SPIRV_CROSS_THROW(
  1746. "Cannot emulate array of storage images with atomics. Use MSL 3.1 for native support.");
  1747. }
  1748. if (global_var_ids.find(base_id) != global_var_ids.end())
  1749. added_arg_ids.insert(base_id);
  1750. }
  1751. break;
  1752. }
  1753. case OpExtInst:
  1754. {
  1755. uint32_t extension_set = ops[2];
  1756. if (get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL)
  1757. {
  1758. auto op_450 = static_cast<GLSLstd450>(ops[3]);
  1759. switch (op_450)
  1760. {
  1761. case GLSLstd450InterpolateAtCentroid:
  1762. case GLSLstd450InterpolateAtSample:
  1763. case GLSLstd450InterpolateAtOffset:
  1764. {
  1765. // For these, we really need the stage-in block. It is theoretically possible to pass the
  1766. // interpolant object, but a) doing so would require us to create an entirely new variable
  1767. // with Interpolant type, and b) if we have a struct or array, handling all the members and
  1768. // elements could get unwieldy fast.
  1769. added_arg_ids.insert(stage_in_var_id);
  1770. break;
  1771. }
  1772. case GLSLstd450Modf:
  1773. case GLSLstd450Frexp:
  1774. {
  1775. uint32_t base_id = ops[5];
  1776. if (global_var_ids.find(base_id) != global_var_ids.end())
  1777. added_arg_ids.insert(base_id);
  1778. break;
  1779. }
  1780. default:
  1781. break;
  1782. }
  1783. }
  1784. break;
  1785. }
  1786. case OpGroupNonUniformInverseBallot:
  1787. {
  1788. added_arg_ids.insert(builtin_subgroup_invocation_id_id);
  1789. break;
  1790. }
  1791. case OpGroupNonUniformBallotFindLSB:
  1792. case OpGroupNonUniformBallotFindMSB:
  1793. {
  1794. added_arg_ids.insert(builtin_subgroup_size_id);
  1795. break;
  1796. }
  1797. case OpGroupNonUniformBallotBitCount:
  1798. {
  1799. auto operation = static_cast<GroupOperation>(ops[3]);
  1800. switch (operation)
  1801. {
  1802. case GroupOperationReduce:
  1803. added_arg_ids.insert(builtin_subgroup_size_id);
  1804. break;
  1805. case GroupOperationInclusiveScan:
  1806. case GroupOperationExclusiveScan:
  1807. added_arg_ids.insert(builtin_subgroup_invocation_id_id);
  1808. break;
  1809. default:
  1810. break;
  1811. }
  1812. break;
  1813. }
  1814. case OpDemoteToHelperInvocation:
  1815. if (needs_manual_helper_invocation_updates() &&
  1816. (active_input_builtins.get(BuiltInHelperInvocation) || needs_helper_invocation))
  1817. added_arg_ids.insert(builtin_helper_invocation_id);
  1818. break;
  1819. case OpIsHelperInvocationEXT:
  1820. if (needs_manual_helper_invocation_updates())
  1821. added_arg_ids.insert(builtin_helper_invocation_id);
  1822. break;
  1823. case OpRayQueryInitializeKHR:
  1824. case OpRayQueryProceedKHR:
  1825. case OpRayQueryTerminateKHR:
  1826. case OpRayQueryGenerateIntersectionKHR:
  1827. case OpRayQueryConfirmIntersectionKHR:
  1828. {
  1829. // Ray query accesses memory directly, need check pass down object if using Private storage class.
  1830. uint32_t base_id = ops[0];
  1831. if (global_var_ids.find(base_id) != global_var_ids.end())
  1832. added_arg_ids.insert(base_id);
  1833. break;
  1834. }
  1835. case OpRayQueryGetRayTMinKHR:
  1836. case OpRayQueryGetRayFlagsKHR:
  1837. case OpRayQueryGetWorldRayOriginKHR:
  1838. case OpRayQueryGetWorldRayDirectionKHR:
  1839. case OpRayQueryGetIntersectionCandidateAABBOpaqueKHR:
  1840. case OpRayQueryGetIntersectionTypeKHR:
  1841. case OpRayQueryGetIntersectionTKHR:
  1842. case OpRayQueryGetIntersectionInstanceCustomIndexKHR:
  1843. case OpRayQueryGetIntersectionInstanceIdKHR:
  1844. case OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR:
  1845. case OpRayQueryGetIntersectionGeometryIndexKHR:
  1846. case OpRayQueryGetIntersectionPrimitiveIndexKHR:
  1847. case OpRayQueryGetIntersectionBarycentricsKHR:
  1848. case OpRayQueryGetIntersectionFrontFaceKHR:
  1849. case OpRayQueryGetIntersectionObjectRayDirectionKHR:
  1850. case OpRayQueryGetIntersectionObjectRayOriginKHR:
  1851. case OpRayQueryGetIntersectionObjectToWorldKHR:
  1852. case OpRayQueryGetIntersectionWorldToObjectKHR:
  1853. {
  1854. // Ray query accesses memory directly, need check pass down object if using Private storage class.
  1855. uint32_t base_id = ops[2];
  1856. if (global_var_ids.find(base_id) != global_var_ids.end())
  1857. added_arg_ids.insert(base_id);
  1858. break;
  1859. }
  1860. default:
  1861. break;
  1862. }
  1863. if (needs_manual_helper_invocation_updates() && b.terminator == SPIRBlock::Kill &&
  1864. (active_input_builtins.get(BuiltInHelperInvocation) || needs_helper_invocation))
  1865. added_arg_ids.insert(builtin_helper_invocation_id);
  1866. // TODO: Add all other operations which can affect memory.
  1867. // We should consider a more unified system here to reduce boiler-plate.
  1868. // This kind of analysis is done in several places ...
  1869. }
  1870. }
  1871. function_global_vars[func_id] = added_arg_ids;
  1872. // Add the global variables as arguments to the function
  1873. if (func_id != ir.default_entry_point)
  1874. {
  1875. bool control_point_added_in = false;
  1876. bool control_point_added_out = false;
  1877. bool patch_added_in = false;
  1878. bool patch_added_out = false;
  1879. for (uint32_t arg_id : added_arg_ids)
  1880. {
  1881. auto &var = get<SPIRVariable>(arg_id);
  1882. uint32_t type_id = var.basetype;
  1883. auto *p_type = &get<SPIRType>(type_id);
  1884. BuiltIn bi_type = BuiltIn(get_decoration(arg_id, DecorationBuiltIn));
  1885. bool is_patch = has_decoration(arg_id, DecorationPatch) || is_patch_block(*p_type);
  1886. bool is_block = has_decoration(p_type->self, DecorationBlock);
  1887. bool is_control_point_storage =
  1888. !is_patch && ((is_tessellation_shader() && var.storage == StorageClassInput) ||
  1889. (is_tesc_shader() && var.storage == StorageClassOutput));
  1890. bool is_patch_block_storage = is_patch && is_block && var.storage == StorageClassOutput;
  1891. bool is_builtin = is_builtin_variable(var);
  1892. bool variable_is_stage_io =
  1893. !is_builtin || bi_type == BuiltInPosition || bi_type == BuiltInPointSize ||
  1894. bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance ||
  1895. p_type->basetype == SPIRType::Struct;
  1896. bool is_redirected_to_global_stage_io = (is_control_point_storage || is_patch_block_storage) &&
  1897. variable_is_stage_io;
  1898. // If output is masked it is not considered part of the global stage IO interface.
  1899. if (is_redirected_to_global_stage_io && var.storage == StorageClassOutput)
  1900. is_redirected_to_global_stage_io = !is_stage_output_variable_masked(var);
  1901. if (is_redirected_to_global_stage_io)
  1902. {
  1903. // Tessellation control shaders see inputs and per-point outputs as arrays.
  1904. // Similarly, tessellation evaluation shaders see per-point inputs as arrays.
  1905. // We collected them into a structure; we must pass the array of this
  1906. // structure to the function.
  1907. std::string name;
  1908. if (is_patch)
  1909. name = var.storage == StorageClassInput ? patch_stage_in_var_name : patch_stage_out_var_name;
  1910. else
  1911. name = var.storage == StorageClassInput ? "gl_in" : "gl_out";
  1912. if (var.storage == StorageClassOutput && has_decoration(p_type->self, DecorationBlock))
  1913. {
  1914. // If we're redirecting a block, we might still need to access the original block
  1915. // variable if we're masking some members.
  1916. for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(p_type->member_types.size()); mbr_idx++)
  1917. {
  1918. if (is_stage_output_block_member_masked(var, mbr_idx, true))
  1919. {
  1920. func.add_parameter(var.basetype, var.self, true);
  1921. break;
  1922. }
  1923. }
  1924. }
  1925. if (var.storage == StorageClassInput)
  1926. {
  1927. auto &added_in = is_patch ? patch_added_in : control_point_added_in;
  1928. if (added_in)
  1929. continue;
  1930. arg_id = is_patch ? patch_stage_in_var_id : stage_in_ptr_var_id;
  1931. added_in = true;
  1932. }
  1933. else if (var.storage == StorageClassOutput)
  1934. {
  1935. auto &added_out = is_patch ? patch_added_out : control_point_added_out;
  1936. if (added_out)
  1937. continue;
  1938. arg_id = is_patch ? patch_stage_out_var_id : stage_out_ptr_var_id;
  1939. added_out = true;
  1940. }
  1941. type_id = get<SPIRVariable>(arg_id).basetype;
  1942. uint32_t next_id = ir.increase_bound_by(1);
  1943. func.add_parameter(type_id, next_id, true);
  1944. set<SPIRVariable>(next_id, type_id, StorageClassFunction, 0, arg_id);
  1945. set_name(next_id, name);
  1946. if (is_tese_shader() && msl_options.raw_buffer_tese_input && var.storage == StorageClassInput)
  1947. set_decoration(next_id, DecorationNonWritable);
  1948. }
  1949. else if (is_builtin && has_decoration(p_type->self, DecorationBlock))
  1950. {
  1951. // Get the pointee type
  1952. type_id = get_pointee_type_id(type_id);
  1953. p_type = &get<SPIRType>(type_id);
  1954. uint32_t mbr_idx = 0;
  1955. for (auto &mbr_type_id : p_type->member_types)
  1956. {
  1957. BuiltIn builtin = BuiltInMax;
  1958. is_builtin = is_member_builtin(*p_type, mbr_idx, &builtin);
  1959. if (is_builtin && has_active_builtin(builtin, var.storage))
  1960. {
  1961. // Add a arg variable with the same type and decorations as the member
  1962. uint32_t next_ids = ir.increase_bound_by(2);
  1963. uint32_t ptr_type_id = next_ids + 0;
  1964. uint32_t var_id = next_ids + 1;
  1965. // Make sure we have an actual pointer type,
  1966. // so that we will get the appropriate address space when declaring these builtins.
  1967. auto &ptr = set<SPIRType>(ptr_type_id, get<SPIRType>(mbr_type_id));
  1968. ptr.self = mbr_type_id;
  1969. ptr.storage = var.storage;
  1970. ptr.pointer = true;
  1971. ptr.pointer_depth++;
  1972. ptr.parent_type = mbr_type_id;
  1973. func.add_parameter(mbr_type_id, var_id, true);
  1974. set<SPIRVariable>(var_id, ptr_type_id, StorageClassFunction);
  1975. ir.meta[var_id].decoration = ir.meta[type_id].members[mbr_idx];
  1976. }
  1977. mbr_idx++;
  1978. }
  1979. }
  1980. else
  1981. {
  1982. uint32_t next_id = ir.increase_bound_by(1);
  1983. func.add_parameter(type_id, next_id, true);
  1984. set<SPIRVariable>(next_id, type_id, StorageClassFunction, 0, arg_id);
  1985. // Ensure the new variable has all the same meta info
  1986. ir.meta[next_id] = ir.meta[arg_id];
  1987. }
  1988. }
  1989. }
  1990. }
  1991. // For all variables that are some form of non-input-output interface block, mark that all the structs
  1992. // that are recursively contained within the type referenced by that variable should be packed tightly.
  1993. void CompilerMSL::mark_packable_structs()
  1994. {
  1995. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  1996. if (var.storage != StorageClassFunction && !is_hidden_variable(var))
  1997. {
  1998. auto &type = this->get<SPIRType>(var.basetype);
  1999. if (type.pointer &&
  2000. (type.storage == StorageClassUniform || type.storage == StorageClassUniformConstant ||
  2001. type.storage == StorageClassPushConstant || type.storage == StorageClassStorageBuffer) &&
  2002. (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock)))
  2003. mark_as_packable(type);
  2004. }
  2005. if (var.storage == StorageClassWorkgroup)
  2006. {
  2007. auto *type = &this->get<SPIRType>(var.basetype);
  2008. if (type->basetype == SPIRType::Struct)
  2009. mark_as_workgroup_struct(*type);
  2010. }
  2011. });
  2012. // Physical storage buffer pointers can appear outside of the context of a variable, if the address
  2013. // is calculated from a ulong or uvec2 and cast to a pointer, so check if they need to be packed too.
  2014. ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) {
  2015. if (type.basetype == SPIRType::Struct && type.pointer && type.storage == StorageClassPhysicalStorageBuffer)
  2016. mark_as_packable(type);
  2017. });
  2018. }
  2019. // If the specified type is a struct, it and any nested structs
  2020. // are marked as packable with the SPIRVCrossDecorationBufferBlockRepacked decoration,
  2021. void CompilerMSL::mark_as_packable(SPIRType &type)
  2022. {
  2023. // If this is not the base type (eg. it's a pointer or array), tunnel down
  2024. if (type.parent_type)
  2025. {
  2026. mark_as_packable(get<SPIRType>(type.parent_type));
  2027. return;
  2028. }
  2029. // Handle possible recursion when a struct contains a pointer to its own type nested somewhere.
  2030. if (type.basetype == SPIRType::Struct && !has_extended_decoration(type.self, SPIRVCrossDecorationBufferBlockRepacked))
  2031. {
  2032. set_extended_decoration(type.self, SPIRVCrossDecorationBufferBlockRepacked);
  2033. // Recurse
  2034. uint32_t mbr_cnt = uint32_t(type.member_types.size());
  2035. for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
  2036. {
  2037. uint32_t mbr_type_id = type.member_types[mbr_idx];
  2038. auto &mbr_type = get<SPIRType>(mbr_type_id);
  2039. mark_as_packable(mbr_type);
  2040. if (mbr_type.type_alias)
  2041. {
  2042. auto &mbr_type_alias = get<SPIRType>(mbr_type.type_alias);
  2043. mark_as_packable(mbr_type_alias);
  2044. }
  2045. }
  2046. }
  2047. }
  2048. // If the specified type is a struct, it and any nested structs
  2049. // are marked as used with workgroup storage using the SPIRVCrossDecorationWorkgroupStruct decoration.
  2050. void CompilerMSL::mark_as_workgroup_struct(SPIRType &type)
  2051. {
  2052. // If this is not the base type (eg. it's a pointer or array), tunnel down
  2053. if (type.parent_type)
  2054. {
  2055. mark_as_workgroup_struct(get<SPIRType>(type.parent_type));
  2056. return;
  2057. }
  2058. // Handle possible recursion when a struct contains a pointer to its own type nested somewhere.
  2059. if (type.basetype == SPIRType::Struct && !has_extended_decoration(type.self, SPIRVCrossDecorationWorkgroupStruct))
  2060. {
  2061. set_extended_decoration(type.self, SPIRVCrossDecorationWorkgroupStruct);
  2062. // Recurse
  2063. uint32_t mbr_cnt = uint32_t(type.member_types.size());
  2064. for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
  2065. {
  2066. uint32_t mbr_type_id = type.member_types[mbr_idx];
  2067. auto &mbr_type = get<SPIRType>(mbr_type_id);
  2068. mark_as_workgroup_struct(mbr_type);
  2069. if (mbr_type.type_alias)
  2070. {
  2071. auto &mbr_type_alias = get<SPIRType>(mbr_type.type_alias);
  2072. mark_as_workgroup_struct(mbr_type_alias);
  2073. }
  2074. }
  2075. }
  2076. }
  2077. // If a shader input exists at the location, it is marked as being used by this shader
  2078. void CompilerMSL::mark_location_as_used_by_shader(uint32_t location, const SPIRType &type,
  2079. StorageClass storage, bool fallback)
  2080. {
  2081. uint32_t count = type_to_location_count(type);
  2082. switch (storage)
  2083. {
  2084. case StorageClassInput:
  2085. for (uint32_t i = 0; i < count; i++)
  2086. {
  2087. location_inputs_in_use.insert(location + i);
  2088. if (fallback)
  2089. location_inputs_in_use_fallback.insert(location + i);
  2090. }
  2091. break;
  2092. case StorageClassOutput:
  2093. for (uint32_t i = 0; i < count; i++)
  2094. {
  2095. location_outputs_in_use.insert(location + i);
  2096. if (fallback)
  2097. location_outputs_in_use_fallback.insert(location + i);
  2098. }
  2099. break;
  2100. default:
  2101. return;
  2102. }
  2103. }
  2104. uint32_t CompilerMSL::get_target_components_for_fragment_location(uint32_t location) const
  2105. {
  2106. auto itr = fragment_output_components.find(location);
  2107. if (itr == end(fragment_output_components))
  2108. return 4;
  2109. else
  2110. return itr->second;
  2111. }
  2112. uint32_t CompilerMSL::build_extended_vector_type(uint32_t type_id, uint32_t components, SPIRType::BaseType basetype)
  2113. {
  2114. assert(components > 1);
  2115. uint32_t new_type_id = ir.increase_bound_by(1);
  2116. const auto *p_old_type = &get<SPIRType>(type_id);
  2117. const SPIRType *old_ptr_t = nullptr;
  2118. const SPIRType *old_array_t = nullptr;
  2119. if (is_pointer(*p_old_type))
  2120. {
  2121. old_ptr_t = p_old_type;
  2122. p_old_type = &get_pointee_type(*old_ptr_t);
  2123. }
  2124. if (is_array(*p_old_type))
  2125. {
  2126. old_array_t = p_old_type;
  2127. p_old_type = &get_type(old_array_t->parent_type);
  2128. }
  2129. auto *type = &set<SPIRType>(new_type_id, *p_old_type);
  2130. assert(is_scalar(*type) || is_vector(*type));
  2131. type->op = OpTypeVector;
  2132. type->vecsize = components;
  2133. if (basetype != SPIRType::Unknown)
  2134. type->basetype = basetype;
  2135. type->self = new_type_id;
  2136. // We want parent type to point to the scalar type.
  2137. type->parent_type = is_scalar(*p_old_type) ? TypeID(p_old_type->self) : p_old_type->parent_type;
  2138. assert(is_scalar(get<SPIRType>(type->parent_type)));
  2139. type->array.clear();
  2140. type->array_size_literal.clear();
  2141. type->pointer = false;
  2142. if (old_array_t)
  2143. {
  2144. uint32_t array_type_id = ir.increase_bound_by(1);
  2145. type = &set<SPIRType>(array_type_id, *type);
  2146. type->op = OpTypeArray;
  2147. type->parent_type = new_type_id;
  2148. type->array = old_array_t->array;
  2149. type->array_size_literal = old_array_t->array_size_literal;
  2150. new_type_id = array_type_id;
  2151. }
  2152. if (old_ptr_t)
  2153. {
  2154. uint32_t ptr_type_id = ir.increase_bound_by(1);
  2155. type = &set<SPIRType>(ptr_type_id, *type);
  2156. type->op = OpTypePointer;
  2157. type->parent_type = new_type_id;
  2158. type->storage = old_ptr_t->storage;
  2159. type->pointer = true;
  2160. type->pointer_depth++;
  2161. new_type_id = ptr_type_id;
  2162. }
  2163. return new_type_id;
  2164. }
  2165. uint32_t CompilerMSL::build_msl_interpolant_type(uint32_t type_id, bool is_noperspective)
  2166. {
  2167. uint32_t new_type_id = ir.increase_bound_by(1);
  2168. SPIRType &type = set<SPIRType>(new_type_id, get<SPIRType>(type_id));
  2169. type.basetype = SPIRType::Interpolant;
  2170. type.parent_type = type_id;
  2171. // In Metal, the pull-model interpolant type encodes perspective-vs-no-perspective in the type itself.
  2172. // Add this decoration so we know which argument to pass to the template.
  2173. if (is_noperspective)
  2174. set_decoration(new_type_id, DecorationNoPerspective);
  2175. return new_type_id;
  2176. }
  2177. bool CompilerMSL::add_component_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref,
  2178. SPIRVariable &var,
  2179. const SPIRType &type,
  2180. InterfaceBlockMeta &meta)
  2181. {
  2182. // Deal with Component decorations.
  2183. const InterfaceBlockMeta::LocationMeta *location_meta = nullptr;
  2184. uint32_t location = ~0u;
  2185. if (has_decoration(var.self, DecorationLocation))
  2186. {
  2187. location = get_decoration(var.self, DecorationLocation);
  2188. auto location_meta_itr = meta.location_meta.find(location);
  2189. if (location_meta_itr != end(meta.location_meta))
  2190. location_meta = &location_meta_itr->second;
  2191. }
  2192. // Check if we need to pad fragment output to match a certain number of components.
  2193. if (location_meta)
  2194. {
  2195. bool pad_fragment_output = has_decoration(var.self, DecorationLocation) &&
  2196. msl_options.pad_fragment_output_components &&
  2197. get_entry_point().model == ExecutionModelFragment && storage == StorageClassOutput;
  2198. auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
  2199. uint32_t start_component = get_decoration(var.self, DecorationComponent);
  2200. uint32_t type_components = type.vecsize;
  2201. uint32_t num_components = location_meta->num_components;
  2202. if (pad_fragment_output)
  2203. {
  2204. uint32_t locn = get_decoration(var.self, DecorationLocation);
  2205. num_components = max<uint32_t>(num_components, get_target_components_for_fragment_location(locn));
  2206. }
  2207. // We have already declared an IO block member as m_location_N.
  2208. // Just emit an early-declared variable and fixup as needed.
  2209. // Arrays need to be unrolled here since each location might need a different number of components.
  2210. entry_func.add_local_variable(var.self);
  2211. vars_needing_early_declaration.push_back(var.self);
  2212. if (var.storage == StorageClassInput)
  2213. {
  2214. entry_func.fixup_hooks_in.push_back([=, &type, &var]() {
  2215. if (!type.array.empty())
  2216. {
  2217. uint32_t array_size = to_array_size_literal(type);
  2218. for (uint32_t loc_off = 0; loc_off < array_size; loc_off++)
  2219. {
  2220. statement(to_name(var.self), "[", loc_off, "]", " = ", ib_var_ref,
  2221. ".m_location_", location + loc_off,
  2222. vector_swizzle(type_components, start_component), ";");
  2223. }
  2224. }
  2225. else
  2226. {
  2227. statement(to_name(var.self), " = ", ib_var_ref, ".m_location_", location,
  2228. vector_swizzle(type_components, start_component), ";");
  2229. }
  2230. });
  2231. }
  2232. else
  2233. {
  2234. entry_func.fixup_hooks_out.push_back([=, &type, &var]() {
  2235. if (!type.array.empty())
  2236. {
  2237. uint32_t array_size = to_array_size_literal(type);
  2238. for (uint32_t loc_off = 0; loc_off < array_size; loc_off++)
  2239. {
  2240. statement(ib_var_ref, ".m_location_", location + loc_off,
  2241. vector_swizzle(type_components, start_component), " = ",
  2242. to_name(var.self), "[", loc_off, "];");
  2243. }
  2244. }
  2245. else
  2246. {
  2247. statement(ib_var_ref, ".m_location_", location,
  2248. vector_swizzle(type_components, start_component), " = ", to_name(var.self), ";");
  2249. }
  2250. });
  2251. }
  2252. return true;
  2253. }
  2254. else
  2255. return false;
  2256. }
  2257. void CompilerMSL::add_plain_variable_to_interface_block(StorageClass storage, const string &ib_var_ref,
  2258. SPIRType &ib_type, SPIRVariable &var, InterfaceBlockMeta &meta)
  2259. {
  2260. bool is_builtin = is_builtin_variable(var);
  2261. BuiltIn builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
  2262. bool is_flat = has_decoration(var.self, DecorationFlat);
  2263. bool is_noperspective = has_decoration(var.self, DecorationNoPerspective);
  2264. bool is_centroid = has_decoration(var.self, DecorationCentroid);
  2265. bool is_sample = has_decoration(var.self, DecorationSample);
  2266. // Add a reference to the variable type to the interface struct.
  2267. uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size());
  2268. uint32_t type_id = ensure_correct_builtin_type(var.basetype, builtin);
  2269. var.basetype = type_id;
  2270. type_id = get_pointee_type_id(var.basetype);
  2271. if (meta.strip_array && is_array(get<SPIRType>(type_id)))
  2272. type_id = get<SPIRType>(type_id).parent_type;
  2273. auto &type = get<SPIRType>(type_id);
  2274. uint32_t target_components = 0;
  2275. uint32_t type_components = type.vecsize;
  2276. bool padded_output = false;
  2277. bool padded_input = false;
  2278. uint32_t start_component = 0;
  2279. auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
  2280. if (add_component_variable_to_interface_block(storage, ib_var_ref, var, type, meta))
  2281. return;
  2282. bool pad_fragment_output = has_decoration(var.self, DecorationLocation) &&
  2283. msl_options.pad_fragment_output_components &&
  2284. get_entry_point().model == ExecutionModelFragment && storage == StorageClassOutput;
  2285. if (pad_fragment_output)
  2286. {
  2287. uint32_t locn = get_decoration(var.self, DecorationLocation);
  2288. target_components = get_target_components_for_fragment_location(locn);
  2289. if (type_components < target_components)
  2290. {
  2291. // Make a new type here.
  2292. type_id = build_extended_vector_type(type_id, target_components);
  2293. padded_output = true;
  2294. }
  2295. }
  2296. if (storage == StorageClassInput && pull_model_inputs.count(var.self))
  2297. ib_type.member_types.push_back(build_msl_interpolant_type(type_id, is_noperspective));
  2298. else
  2299. ib_type.member_types.push_back(type_id);
  2300. // Give the member a name
  2301. string mbr_name = ensure_valid_name(to_expression(var.self), "m");
  2302. set_member_name(ib_type.self, ib_mbr_idx, mbr_name);
  2303. // Update the original variable reference to include the structure reference
  2304. string qual_var_name = ib_var_ref + "." + mbr_name;
  2305. // If using pull-model interpolation, need to add a call to the correct interpolation method.
  2306. if (storage == StorageClassInput && pull_model_inputs.count(var.self))
  2307. {
  2308. if (is_centroid)
  2309. qual_var_name += ".interpolate_at_centroid()";
  2310. else if (is_sample)
  2311. qual_var_name += join(".interpolate_at_sample(", to_expression(builtin_sample_id_id), ")");
  2312. else
  2313. qual_var_name += ".interpolate_at_center()";
  2314. }
  2315. if (padded_output || padded_input)
  2316. {
  2317. entry_func.add_local_variable(var.self);
  2318. vars_needing_early_declaration.push_back(var.self);
  2319. if (padded_output)
  2320. {
  2321. entry_func.fixup_hooks_out.push_back([=, &var]() {
  2322. statement(qual_var_name, vector_swizzle(type_components, start_component), " = ", to_name(var.self),
  2323. ";");
  2324. });
  2325. }
  2326. else
  2327. {
  2328. entry_func.fixup_hooks_in.push_back([=, &var]() {
  2329. statement(to_name(var.self), " = ", qual_var_name, vector_swizzle(type_components, start_component),
  2330. ";");
  2331. });
  2332. }
  2333. }
  2334. else if (!meta.strip_array)
  2335. ir.meta[var.self].decoration.qualified_alias = qual_var_name;
  2336. if (var.storage == StorageClassOutput && var.initializer != ID(0))
  2337. {
  2338. if (padded_output || padded_input)
  2339. {
  2340. entry_func.fixup_hooks_in.push_back(
  2341. [=, &var]() { statement(to_name(var.self), " = ", to_expression(var.initializer), ";"); });
  2342. }
  2343. else
  2344. {
  2345. if (meta.strip_array)
  2346. {
  2347. entry_func.fixup_hooks_in.push_back([=, &var]() {
  2348. uint32_t index = get_extended_decoration(var.self, SPIRVCrossDecorationInterfaceMemberIndex);
  2349. auto invocation = to_tesc_invocation_id();
  2350. statement(to_expression(stage_out_ptr_var_id), "[",
  2351. invocation, "].",
  2352. to_member_name(ib_type, index), " = ", to_expression(var.initializer), "[",
  2353. invocation, "];");
  2354. });
  2355. }
  2356. else
  2357. {
  2358. entry_func.fixup_hooks_in.push_back([=, &var]() {
  2359. statement(qual_var_name, " = ", to_expression(var.initializer), ";");
  2360. });
  2361. }
  2362. }
  2363. }
  2364. // Copy the variable location from the original variable to the member
  2365. if (get_decoration_bitset(var.self).get(DecorationLocation))
  2366. {
  2367. uint32_t locn = get_decoration(var.self, DecorationLocation);
  2368. uint32_t comp = get_decoration(var.self, DecorationComponent);
  2369. if (storage == StorageClassInput)
  2370. {
  2371. type_id = ensure_correct_input_type(var.basetype, locn, comp, 0, meta.strip_array);
  2372. var.basetype = type_id;
  2373. type_id = get_pointee_type_id(type_id);
  2374. if (meta.strip_array && is_array(get<SPIRType>(type_id)))
  2375. type_id = get<SPIRType>(type_id).parent_type;
  2376. if (pull_model_inputs.count(var.self))
  2377. ib_type.member_types[ib_mbr_idx] = build_msl_interpolant_type(type_id, is_noperspective);
  2378. else
  2379. ib_type.member_types[ib_mbr_idx] = type_id;
  2380. }
  2381. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
  2382. if (comp)
  2383. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationComponent, comp);
  2384. mark_location_as_used_by_shader(locn, get<SPIRType>(type_id), storage);
  2385. }
  2386. else if (is_builtin && is_tessellation_shader() && storage == StorageClassInput && inputs_by_builtin.count(builtin))
  2387. {
  2388. uint32_t locn = inputs_by_builtin[builtin].location;
  2389. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
  2390. mark_location_as_used_by_shader(locn, type, storage);
  2391. }
  2392. else if (is_builtin && capture_output_to_buffer && storage == StorageClassOutput && outputs_by_builtin.count(builtin))
  2393. {
  2394. uint32_t locn = outputs_by_builtin[builtin].location;
  2395. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
  2396. mark_location_as_used_by_shader(locn, type, storage);
  2397. }
  2398. if (get_decoration_bitset(var.self).get(DecorationComponent))
  2399. {
  2400. uint32_t component = get_decoration(var.self, DecorationComponent);
  2401. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationComponent, component);
  2402. }
  2403. if (get_decoration_bitset(var.self).get(DecorationIndex))
  2404. {
  2405. uint32_t index = get_decoration(var.self, DecorationIndex);
  2406. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationIndex, index);
  2407. }
  2408. // Mark the member as builtin if needed
  2409. if (is_builtin)
  2410. {
  2411. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationBuiltIn, builtin);
  2412. if (builtin == BuiltInPosition && storage == StorageClassOutput)
  2413. qual_pos_var_name = qual_var_name;
  2414. }
  2415. // Copy interpolation decorations if needed
  2416. if (storage != StorageClassInput || !pull_model_inputs.count(var.self))
  2417. {
  2418. if (is_flat)
  2419. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat);
  2420. if (is_noperspective)
  2421. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective);
  2422. if (is_centroid)
  2423. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid);
  2424. if (is_sample)
  2425. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample);
  2426. }
  2427. set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceOrigID, var.self);
  2428. }
  2429. void CompilerMSL::add_composite_variable_to_interface_block(StorageClass storage, const string &ib_var_ref,
  2430. SPIRType &ib_type, SPIRVariable &var,
  2431. InterfaceBlockMeta &meta)
  2432. {
  2433. auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
  2434. auto &var_type = meta.strip_array ? get_variable_element_type(var) : get_variable_data_type(var);
  2435. uint32_t elem_cnt = 0;
  2436. if (add_component_variable_to_interface_block(storage, ib_var_ref, var, var_type, meta))
  2437. return;
  2438. if (is_matrix(var_type))
  2439. {
  2440. if (is_array(var_type))
  2441. SPIRV_CROSS_THROW("MSL cannot emit arrays-of-matrices in input and output variables.");
  2442. elem_cnt = var_type.columns;
  2443. }
  2444. else if (is_array(var_type))
  2445. {
  2446. if (var_type.array.size() != 1)
  2447. SPIRV_CROSS_THROW("MSL cannot emit arrays-of-arrays in input and output variables.");
  2448. elem_cnt = to_array_size_literal(var_type);
  2449. }
  2450. bool is_builtin = is_builtin_variable(var);
  2451. BuiltIn builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
  2452. bool is_flat = has_decoration(var.self, DecorationFlat);
  2453. bool is_noperspective = has_decoration(var.self, DecorationNoPerspective);
  2454. bool is_centroid = has_decoration(var.self, DecorationCentroid);
  2455. bool is_sample = has_decoration(var.self, DecorationSample);
  2456. auto *usable_type = &var_type;
  2457. if (usable_type->pointer)
  2458. usable_type = &get<SPIRType>(usable_type->parent_type);
  2459. while (is_array(*usable_type) || is_matrix(*usable_type))
  2460. usable_type = &get<SPIRType>(usable_type->parent_type);
  2461. // If a builtin, force it to have the proper name.
  2462. if (is_builtin)
  2463. set_name(var.self, builtin_to_glsl(builtin, StorageClassFunction));
  2464. bool flatten_from_ib_var = false;
  2465. string flatten_from_ib_mbr_name;
  2466. if (storage == StorageClassOutput && is_builtin && builtin == BuiltInClipDistance)
  2467. {
  2468. // Also declare [[clip_distance]] attribute here.
  2469. uint32_t clip_array_mbr_idx = uint32_t(ib_type.member_types.size());
  2470. ib_type.member_types.push_back(get_variable_data_type_id(var));
  2471. set_member_decoration(ib_type.self, clip_array_mbr_idx, DecorationBuiltIn, BuiltInClipDistance);
  2472. flatten_from_ib_mbr_name = builtin_to_glsl(BuiltInClipDistance, StorageClassOutput);
  2473. set_member_name(ib_type.self, clip_array_mbr_idx, flatten_from_ib_mbr_name);
  2474. // When we flatten, we flatten directly from the "out" struct,
  2475. // not from a function variable.
  2476. flatten_from_ib_var = true;
  2477. if (!msl_options.enable_clip_distance_user_varying)
  2478. return;
  2479. }
  2480. else if (!meta.strip_array)
  2481. {
  2482. // Only flatten/unflatten IO composites for non-tessellation cases where arrays are not stripped.
  2483. entry_func.add_local_variable(var.self);
  2484. // We need to declare the variable early and at entry-point scope.
  2485. vars_needing_early_declaration.push_back(var.self);
  2486. }
  2487. for (uint32_t i = 0; i < elem_cnt; i++)
  2488. {
  2489. // Add a reference to the variable type to the interface struct.
  2490. uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size());
  2491. uint32_t target_components = 0;
  2492. bool padded_output = false;
  2493. uint32_t type_id = usable_type->self;
  2494. // Check if we need to pad fragment output to match a certain number of components.
  2495. if (get_decoration_bitset(var.self).get(DecorationLocation) && msl_options.pad_fragment_output_components &&
  2496. get_entry_point().model == ExecutionModelFragment && storage == StorageClassOutput)
  2497. {
  2498. uint32_t locn = get_decoration(var.self, DecorationLocation) + i;
  2499. target_components = get_target_components_for_fragment_location(locn);
  2500. if (usable_type->vecsize < target_components)
  2501. {
  2502. // Make a new type here.
  2503. type_id = build_extended_vector_type(usable_type->self, target_components);
  2504. padded_output = true;
  2505. }
  2506. }
  2507. if (storage == StorageClassInput && pull_model_inputs.count(var.self))
  2508. ib_type.member_types.push_back(build_msl_interpolant_type(get_pointee_type_id(type_id), is_noperspective));
  2509. else
  2510. ib_type.member_types.push_back(get_pointee_type_id(type_id));
  2511. // Give the member a name
  2512. string mbr_name = ensure_valid_name(join(to_expression(var.self), "_", i), "m");
  2513. set_member_name(ib_type.self, ib_mbr_idx, mbr_name);
  2514. // There is no qualified alias since we need to flatten the internal array on return.
  2515. if (get_decoration_bitset(var.self).get(DecorationLocation))
  2516. {
  2517. uint32_t locn = get_decoration(var.self, DecorationLocation) + i;
  2518. uint32_t comp = get_decoration(var.self, DecorationComponent);
  2519. if (storage == StorageClassInput)
  2520. {
  2521. var.basetype = ensure_correct_input_type(var.basetype, locn, comp, 0, meta.strip_array);
  2522. uint32_t mbr_type_id = ensure_correct_input_type(usable_type->self, locn, comp, 0, meta.strip_array);
  2523. if (storage == StorageClassInput && pull_model_inputs.count(var.self))
  2524. ib_type.member_types[ib_mbr_idx] = build_msl_interpolant_type(mbr_type_id, is_noperspective);
  2525. else
  2526. ib_type.member_types[ib_mbr_idx] = mbr_type_id;
  2527. }
  2528. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
  2529. if (comp)
  2530. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationComponent, comp);
  2531. mark_location_as_used_by_shader(locn, *usable_type, storage);
  2532. }
  2533. else if (is_builtin && is_tessellation_shader() && storage == StorageClassInput && inputs_by_builtin.count(builtin))
  2534. {
  2535. uint32_t locn = inputs_by_builtin[builtin].location + i;
  2536. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
  2537. mark_location_as_used_by_shader(locn, *usable_type, storage);
  2538. }
  2539. else if (is_builtin && capture_output_to_buffer && storage == StorageClassOutput && outputs_by_builtin.count(builtin))
  2540. {
  2541. uint32_t locn = outputs_by_builtin[builtin].location + i;
  2542. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
  2543. mark_location_as_used_by_shader(locn, *usable_type, storage);
  2544. }
  2545. else if (is_builtin && (builtin == BuiltInClipDistance || builtin == BuiltInCullDistance))
  2546. {
  2547. // Declare the Clip/CullDistance as [[user(clip/cullN)]].
  2548. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationBuiltIn, builtin);
  2549. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationIndex, i);
  2550. }
  2551. if (get_decoration_bitset(var.self).get(DecorationIndex))
  2552. {
  2553. uint32_t index = get_decoration(var.self, DecorationIndex);
  2554. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationIndex, index);
  2555. }
  2556. if (storage != StorageClassInput || !pull_model_inputs.count(var.self))
  2557. {
  2558. // Copy interpolation decorations if needed
  2559. if (is_flat)
  2560. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat);
  2561. if (is_noperspective)
  2562. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective);
  2563. if (is_centroid)
  2564. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid);
  2565. if (is_sample)
  2566. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample);
  2567. }
  2568. set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceOrigID, var.self);
  2569. // Only flatten/unflatten IO composites for non-tessellation cases where arrays are not stripped.
  2570. if (!meta.strip_array)
  2571. {
  2572. switch (storage)
  2573. {
  2574. case StorageClassInput:
  2575. entry_func.fixup_hooks_in.push_back([=, &var]() {
  2576. if (pull_model_inputs.count(var.self))
  2577. {
  2578. string lerp_call;
  2579. if (is_centroid)
  2580. lerp_call = ".interpolate_at_centroid()";
  2581. else if (is_sample)
  2582. lerp_call = join(".interpolate_at_sample(", to_expression(builtin_sample_id_id), ")");
  2583. else
  2584. lerp_call = ".interpolate_at_center()";
  2585. statement(to_name(var.self), "[", i, "] = ", ib_var_ref, ".", mbr_name, lerp_call, ";");
  2586. }
  2587. else
  2588. {
  2589. statement(to_name(var.self), "[", i, "] = ", ib_var_ref, ".", mbr_name, ";");
  2590. }
  2591. });
  2592. break;
  2593. case StorageClassOutput:
  2594. entry_func.fixup_hooks_out.push_back([=, &var]() {
  2595. if (padded_output)
  2596. {
  2597. auto &padded_type = this->get<SPIRType>(type_id);
  2598. statement(
  2599. ib_var_ref, ".", mbr_name, " = ",
  2600. remap_swizzle(padded_type, usable_type->vecsize, join(to_name(var.self), "[", i, "]")),
  2601. ";");
  2602. }
  2603. else if (flatten_from_ib_var)
  2604. statement(ib_var_ref, ".", mbr_name, " = ", ib_var_ref, ".", flatten_from_ib_mbr_name, "[", i,
  2605. "];");
  2606. else
  2607. statement(ib_var_ref, ".", mbr_name, " = ", to_name(var.self), "[", i, "];");
  2608. });
  2609. break;
  2610. default:
  2611. break;
  2612. }
  2613. }
  2614. }
  2615. }
  2616. void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass storage,
  2617. const string &ib_var_ref, SPIRType &ib_type,
  2618. SPIRVariable &var, SPIRType &var_type,
  2619. uint32_t mbr_idx, InterfaceBlockMeta &meta,
  2620. const string &mbr_name_qual,
  2621. const string &var_chain_qual,
  2622. uint32_t &location, uint32_t &var_mbr_idx,
  2623. const Bitset &interpolation_qual)
  2624. {
  2625. auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
  2626. BuiltIn builtin = BuiltInMax;
  2627. bool is_builtin = is_member_builtin(var_type, mbr_idx, &builtin);
  2628. bool is_flat = interpolation_qual.get(DecorationFlat) ||
  2629. has_member_decoration(var_type.self, mbr_idx, DecorationFlat) ||
  2630. has_decoration(var.self, DecorationFlat);
  2631. bool is_noperspective = interpolation_qual.get(DecorationNoPerspective) ||
  2632. has_member_decoration(var_type.self, mbr_idx, DecorationNoPerspective) ||
  2633. has_decoration(var.self, DecorationNoPerspective);
  2634. bool is_centroid = interpolation_qual.get(DecorationCentroid) ||
  2635. has_member_decoration(var_type.self, mbr_idx, DecorationCentroid) ||
  2636. has_decoration(var.self, DecorationCentroid);
  2637. bool is_sample = interpolation_qual.get(DecorationSample) ||
  2638. has_member_decoration(var_type.self, mbr_idx, DecorationSample) ||
  2639. has_decoration(var.self, DecorationSample);
  2640. Bitset inherited_qual;
  2641. if (is_flat)
  2642. inherited_qual.set(DecorationFlat);
  2643. if (is_noperspective)
  2644. inherited_qual.set(DecorationNoPerspective);
  2645. if (is_centroid)
  2646. inherited_qual.set(DecorationCentroid);
  2647. if (is_sample)
  2648. inherited_qual.set(DecorationSample);
  2649. uint32_t mbr_type_id = var_type.member_types[mbr_idx];
  2650. auto &mbr_type = get<SPIRType>(mbr_type_id);
  2651. bool mbr_is_indexable = false;
  2652. uint32_t elem_cnt = 1;
  2653. if (is_matrix(mbr_type))
  2654. {
  2655. if (is_array(mbr_type))
  2656. SPIRV_CROSS_THROW("MSL cannot emit arrays-of-matrices in input and output variables.");
  2657. mbr_is_indexable = true;
  2658. elem_cnt = mbr_type.columns;
  2659. }
  2660. else if (is_array(mbr_type))
  2661. {
  2662. if (mbr_type.array.size() != 1)
  2663. SPIRV_CROSS_THROW("MSL cannot emit arrays-of-arrays in input and output variables.");
  2664. mbr_is_indexable = true;
  2665. elem_cnt = to_array_size_literal(mbr_type);
  2666. }
  2667. auto *usable_type = &mbr_type;
  2668. if (usable_type->pointer)
  2669. usable_type = &get<SPIRType>(usable_type->parent_type);
  2670. while (is_array(*usable_type) || is_matrix(*usable_type))
  2671. usable_type = &get<SPIRType>(usable_type->parent_type);
  2672. bool flatten_from_ib_var = false;
  2673. string flatten_from_ib_mbr_name;
  2674. if (storage == StorageClassOutput && is_builtin && builtin == BuiltInClipDistance)
  2675. {
  2676. // Also declare [[clip_distance]] attribute here.
  2677. uint32_t clip_array_mbr_idx = uint32_t(ib_type.member_types.size());
  2678. ib_type.member_types.push_back(mbr_type_id);
  2679. set_member_decoration(ib_type.self, clip_array_mbr_idx, DecorationBuiltIn, BuiltInClipDistance);
  2680. flatten_from_ib_mbr_name = builtin_to_glsl(BuiltInClipDistance, StorageClassOutput);
  2681. set_member_name(ib_type.self, clip_array_mbr_idx, flatten_from_ib_mbr_name);
  2682. // When we flatten, we flatten directly from the "out" struct,
  2683. // not from a function variable.
  2684. flatten_from_ib_var = true;
  2685. if (!msl_options.enable_clip_distance_user_varying)
  2686. return;
  2687. }
  2688. // Recursively handle nested structures.
  2689. if (mbr_type.basetype == SPIRType::Struct)
  2690. {
  2691. for (uint32_t i = 0; i < elem_cnt; i++)
  2692. {
  2693. string mbr_name = append_member_name(mbr_name_qual, var_type, mbr_idx) + (mbr_is_indexable ? join("_", i) : "");
  2694. string var_chain = join(var_chain_qual, ".", to_member_name(var_type, mbr_idx), (mbr_is_indexable ? join("[", i, "]") : ""));
  2695. uint32_t sub_mbr_cnt = uint32_t(mbr_type.member_types.size());
  2696. for (uint32_t sub_mbr_idx = 0; sub_mbr_idx < sub_mbr_cnt; sub_mbr_idx++)
  2697. {
  2698. add_composite_member_variable_to_interface_block(storage, ib_var_ref, ib_type,
  2699. var, mbr_type, sub_mbr_idx,
  2700. meta, mbr_name, var_chain,
  2701. location, var_mbr_idx, inherited_qual);
  2702. // FIXME: Recursive structs and tessellation breaks here.
  2703. var_mbr_idx++;
  2704. }
  2705. }
  2706. return;
  2707. }
  2708. for (uint32_t i = 0; i < elem_cnt; i++)
  2709. {
  2710. // Add a reference to the variable type to the interface struct.
  2711. uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size());
  2712. if (storage == StorageClassInput && pull_model_inputs.count(var.self))
  2713. ib_type.member_types.push_back(build_msl_interpolant_type(usable_type->self, is_noperspective));
  2714. else
  2715. ib_type.member_types.push_back(usable_type->self);
  2716. // Give the member a name
  2717. string mbr_name = ensure_valid_name(append_member_name(mbr_name_qual, var_type, mbr_idx) + (mbr_is_indexable ? join("_", i) : ""), "m");
  2718. set_member_name(ib_type.self, ib_mbr_idx, mbr_name);
  2719. // Once we determine the location of the first member within nested structures,
  2720. // from a var of the topmost structure, the remaining flattened members of
  2721. // the nested structures will have consecutive location values. At this point,
  2722. // we've recursively tunnelled into structs, arrays, and matrices, and are
  2723. // down to a single location for each member now.
  2724. if (!is_builtin && location != UINT32_MAX)
  2725. {
  2726. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location);
  2727. mark_location_as_used_by_shader(location, *usable_type, storage);
  2728. location++;
  2729. }
  2730. else if (has_member_decoration(var_type.self, mbr_idx, DecorationLocation))
  2731. {
  2732. location = get_member_decoration(var_type.self, mbr_idx, DecorationLocation) + i;
  2733. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location);
  2734. mark_location_as_used_by_shader(location, *usable_type, storage);
  2735. location++;
  2736. }
  2737. else if (has_decoration(var.self, DecorationLocation))
  2738. {
  2739. location = get_accumulated_member_location(var, mbr_idx, meta.strip_array) + i;
  2740. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location);
  2741. mark_location_as_used_by_shader(location, *usable_type, storage);
  2742. location++;
  2743. }
  2744. else if (is_builtin && is_tessellation_shader() && storage == StorageClassInput && inputs_by_builtin.count(builtin))
  2745. {
  2746. location = inputs_by_builtin[builtin].location + i;
  2747. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location);
  2748. mark_location_as_used_by_shader(location, *usable_type, storage);
  2749. location++;
  2750. }
  2751. else if (is_builtin && capture_output_to_buffer && storage == StorageClassOutput && outputs_by_builtin.count(builtin))
  2752. {
  2753. location = outputs_by_builtin[builtin].location + i;
  2754. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location);
  2755. mark_location_as_used_by_shader(location, *usable_type, storage);
  2756. location++;
  2757. }
  2758. else if (is_builtin && (builtin == BuiltInClipDistance || builtin == BuiltInCullDistance))
  2759. {
  2760. // Declare the Clip/CullDistance as [[user(clip/cullN)]].
  2761. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationBuiltIn, builtin);
  2762. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationIndex, i);
  2763. }
  2764. if (has_member_decoration(var_type.self, mbr_idx, DecorationComponent))
  2765. SPIRV_CROSS_THROW("DecorationComponent on matrices and arrays is not supported.");
  2766. if (storage != StorageClassInput || !pull_model_inputs.count(var.self))
  2767. {
  2768. // Copy interpolation decorations if needed
  2769. if (is_flat)
  2770. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat);
  2771. if (is_noperspective)
  2772. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective);
  2773. if (is_centroid)
  2774. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid);
  2775. if (is_sample)
  2776. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample);
  2777. }
  2778. set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceOrigID, var.self);
  2779. set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, var_mbr_idx);
  2780. // Unflatten or flatten from [[stage_in]] or [[stage_out]] as appropriate.
  2781. if (!meta.strip_array && meta.allow_local_declaration)
  2782. {
  2783. string var_chain = join(var_chain_qual, ".", to_member_name(var_type, mbr_idx), (mbr_is_indexable ? join("[", i, "]") : ""));
  2784. switch (storage)
  2785. {
  2786. case StorageClassInput:
  2787. entry_func.fixup_hooks_in.push_back([=, &var]() {
  2788. string lerp_call;
  2789. if (pull_model_inputs.count(var.self))
  2790. {
  2791. if (is_centroid)
  2792. lerp_call = ".interpolate_at_centroid()";
  2793. else if (is_sample)
  2794. lerp_call = join(".interpolate_at_sample(", to_expression(builtin_sample_id_id), ")");
  2795. else
  2796. lerp_call = ".interpolate_at_center()";
  2797. }
  2798. statement(var_chain, " = ", ib_var_ref, ".", mbr_name, lerp_call, ";");
  2799. });
  2800. break;
  2801. case StorageClassOutput:
  2802. entry_func.fixup_hooks_out.push_back([=]() {
  2803. if (flatten_from_ib_var)
  2804. statement(ib_var_ref, ".", mbr_name, " = ", ib_var_ref, ".", flatten_from_ib_mbr_name, "[", i, "];");
  2805. else
  2806. statement(ib_var_ref, ".", mbr_name, " = ", var_chain, ";");
  2807. });
  2808. break;
  2809. default:
  2810. break;
  2811. }
  2812. }
  2813. }
  2814. }
  2815. void CompilerMSL::add_plain_member_variable_to_interface_block(StorageClass storage,
  2816. const string &ib_var_ref, SPIRType &ib_type,
  2817. SPIRVariable &var, SPIRType &var_type,
  2818. uint32_t mbr_idx, InterfaceBlockMeta &meta,
  2819. const string &mbr_name_qual,
  2820. const string &var_chain_qual,
  2821. uint32_t &location, uint32_t &var_mbr_idx)
  2822. {
  2823. auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
  2824. BuiltIn builtin = BuiltInMax;
  2825. bool is_builtin = is_member_builtin(var_type, mbr_idx, &builtin);
  2826. bool is_flat =
  2827. has_member_decoration(var_type.self, mbr_idx, DecorationFlat) || has_decoration(var.self, DecorationFlat);
  2828. bool is_noperspective = has_member_decoration(var_type.self, mbr_idx, DecorationNoPerspective) ||
  2829. has_decoration(var.self, DecorationNoPerspective);
  2830. bool is_centroid = has_member_decoration(var_type.self, mbr_idx, DecorationCentroid) ||
  2831. has_decoration(var.self, DecorationCentroid);
  2832. bool is_sample =
  2833. has_member_decoration(var_type.self, mbr_idx, DecorationSample) || has_decoration(var.self, DecorationSample);
  2834. // Add a reference to the member to the interface struct.
  2835. uint32_t mbr_type_id = var_type.member_types[mbr_idx];
  2836. uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size());
  2837. mbr_type_id = ensure_correct_builtin_type(mbr_type_id, builtin);
  2838. var_type.member_types[mbr_idx] = mbr_type_id;
  2839. if (storage == StorageClassInput && pull_model_inputs.count(var.self))
  2840. ib_type.member_types.push_back(build_msl_interpolant_type(mbr_type_id, is_noperspective));
  2841. else
  2842. ib_type.member_types.push_back(mbr_type_id);
  2843. // Give the member a name
  2844. string mbr_name = ensure_valid_name(append_member_name(mbr_name_qual, var_type, mbr_idx), "m");
  2845. set_member_name(ib_type.self, ib_mbr_idx, mbr_name);
  2846. // Update the original variable reference to include the structure reference
  2847. string qual_var_name = ib_var_ref + "." + mbr_name;
  2848. // If using pull-model interpolation, need to add a call to the correct interpolation method.
  2849. if (storage == StorageClassInput && pull_model_inputs.count(var.self))
  2850. {
  2851. if (is_centroid)
  2852. qual_var_name += ".interpolate_at_centroid()";
  2853. else if (is_sample)
  2854. qual_var_name += join(".interpolate_at_sample(", to_expression(builtin_sample_id_id), ")");
  2855. else
  2856. qual_var_name += ".interpolate_at_center()";
  2857. }
  2858. bool flatten_stage_out = false;
  2859. string var_chain = var_chain_qual + "." + to_member_name(var_type, mbr_idx);
  2860. if (is_builtin && !meta.strip_array)
  2861. {
  2862. // For the builtin gl_PerVertex, we cannot treat it as a block anyways,
  2863. // so redirect to qualified name.
  2864. set_member_qualified_name(var_type.self, mbr_idx, qual_var_name);
  2865. }
  2866. else if (!meta.strip_array && meta.allow_local_declaration)
  2867. {
  2868. // Unflatten or flatten from [[stage_in]] or [[stage_out]] as appropriate.
  2869. switch (storage)
  2870. {
  2871. case StorageClassInput:
  2872. entry_func.fixup_hooks_in.push_back([=]() {
  2873. statement(var_chain, " = ", qual_var_name, ";");
  2874. });
  2875. break;
  2876. case StorageClassOutput:
  2877. flatten_stage_out = true;
  2878. entry_func.fixup_hooks_out.push_back([=]() {
  2879. statement(qual_var_name, " = ", var_chain, ";");
  2880. });
  2881. break;
  2882. default:
  2883. break;
  2884. }
  2885. }
  2886. // Once we determine the location of the first member within nested structures,
  2887. // from a var of the topmost structure, the remaining flattened members of
  2888. // the nested structures will have consecutive location values. At this point,
  2889. // we've recursively tunnelled into structs, arrays, and matrices, and are
  2890. // down to a single location for each member now.
  2891. if (!is_builtin && location != UINT32_MAX)
  2892. {
  2893. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location);
  2894. mark_location_as_used_by_shader(location, get<SPIRType>(mbr_type_id), storage);
  2895. location += type_to_location_count(get<SPIRType>(mbr_type_id));
  2896. }
  2897. else if (has_member_decoration(var_type.self, mbr_idx, DecorationLocation))
  2898. {
  2899. location = get_member_decoration(var_type.self, mbr_idx, DecorationLocation);
  2900. uint32_t comp = get_member_decoration(var_type.self, mbr_idx, DecorationComponent);
  2901. if (storage == StorageClassInput)
  2902. {
  2903. mbr_type_id = ensure_correct_input_type(mbr_type_id, location, comp, 0, meta.strip_array);
  2904. var_type.member_types[mbr_idx] = mbr_type_id;
  2905. if (storage == StorageClassInput && pull_model_inputs.count(var.self))
  2906. ib_type.member_types[ib_mbr_idx] = build_msl_interpolant_type(mbr_type_id, is_noperspective);
  2907. else
  2908. ib_type.member_types[ib_mbr_idx] = mbr_type_id;
  2909. }
  2910. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location);
  2911. mark_location_as_used_by_shader(location, get<SPIRType>(mbr_type_id), storage);
  2912. location += type_to_location_count(get<SPIRType>(mbr_type_id));
  2913. }
  2914. else if (has_decoration(var.self, DecorationLocation))
  2915. {
  2916. location = get_accumulated_member_location(var, mbr_idx, meta.strip_array);
  2917. if (storage == StorageClassInput)
  2918. {
  2919. mbr_type_id = ensure_correct_input_type(mbr_type_id, location, 0, 0, meta.strip_array);
  2920. var_type.member_types[mbr_idx] = mbr_type_id;
  2921. if (storage == StorageClassInput && pull_model_inputs.count(var.self))
  2922. ib_type.member_types[ib_mbr_idx] = build_msl_interpolant_type(mbr_type_id, is_noperspective);
  2923. else
  2924. ib_type.member_types[ib_mbr_idx] = mbr_type_id;
  2925. }
  2926. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location);
  2927. mark_location_as_used_by_shader(location, get<SPIRType>(mbr_type_id), storage);
  2928. location += type_to_location_count(get<SPIRType>(mbr_type_id));
  2929. }
  2930. else if (is_builtin && is_tessellation_shader() && storage == StorageClassInput && inputs_by_builtin.count(builtin))
  2931. {
  2932. location = inputs_by_builtin[builtin].location;
  2933. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location);
  2934. mark_location_as_used_by_shader(location, get<SPIRType>(mbr_type_id), storage);
  2935. location += type_to_location_count(get<SPIRType>(mbr_type_id));
  2936. }
  2937. else if (is_builtin && capture_output_to_buffer && storage == StorageClassOutput && outputs_by_builtin.count(builtin))
  2938. {
  2939. location = outputs_by_builtin[builtin].location;
  2940. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location);
  2941. mark_location_as_used_by_shader(location, get<SPIRType>(mbr_type_id), storage);
  2942. location += type_to_location_count(get<SPIRType>(mbr_type_id));
  2943. }
  2944. // Copy the component location, if present.
  2945. if (has_member_decoration(var_type.self, mbr_idx, DecorationComponent))
  2946. {
  2947. uint32_t comp = get_member_decoration(var_type.self, mbr_idx, DecorationComponent);
  2948. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationComponent, comp);
  2949. }
  2950. // Mark the member as builtin if needed
  2951. if (is_builtin)
  2952. {
  2953. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationBuiltIn, builtin);
  2954. if (builtin == BuiltInPosition && storage == StorageClassOutput)
  2955. qual_pos_var_name = qual_var_name;
  2956. }
  2957. const SPIRConstant *c = nullptr;
  2958. if (!flatten_stage_out && var.storage == StorageClassOutput &&
  2959. var.initializer != ID(0) && (c = maybe_get<SPIRConstant>(var.initializer)))
  2960. {
  2961. if (meta.strip_array)
  2962. {
  2963. entry_func.fixup_hooks_in.push_back([=, &var]() {
  2964. auto &type = this->get<SPIRType>(var.basetype);
  2965. uint32_t index = get_extended_member_decoration(var.self, mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex);
  2966. auto invocation = to_tesc_invocation_id();
  2967. auto constant_chain = join(to_expression(var.initializer), "[", invocation, "]");
  2968. statement(to_expression(stage_out_ptr_var_id), "[",
  2969. invocation, "].",
  2970. to_member_name(ib_type, index), " = ",
  2971. constant_chain, ".", to_member_name(type, mbr_idx), ";");
  2972. });
  2973. }
  2974. else
  2975. {
  2976. entry_func.fixup_hooks_in.push_back([=]() {
  2977. statement(qual_var_name, " = ", constant_expression(
  2978. this->get<SPIRConstant>(c->subconstants[mbr_idx])), ";");
  2979. });
  2980. }
  2981. }
  2982. if (storage != StorageClassInput || !pull_model_inputs.count(var.self))
  2983. {
  2984. // Copy interpolation decorations if needed
  2985. if (is_flat)
  2986. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat);
  2987. if (is_noperspective)
  2988. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective);
  2989. if (is_centroid)
  2990. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid);
  2991. if (is_sample)
  2992. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample);
  2993. }
  2994. set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceOrigID, var.self);
  2995. set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, var_mbr_idx);
  2996. }
  2997. // In Metal, the tessellation levels are stored as tightly packed half-precision floating point values.
  2998. // But, stage-in attribute offsets and strides must be multiples of four, so we can't pass the levels
  2999. // individually. Therefore, we must pass them as vectors. Triangles get a single float4, with the outer
  3000. // levels in 'xyz' and the inner level in 'w'. Quads get a float4 containing the outer levels and a
  3001. // float2 containing the inner levels.
  3002. void CompilerMSL::add_tess_level_input_to_interface_block(const std::string &ib_var_ref, SPIRType &ib_type,
  3003. SPIRVariable &var)
  3004. {
  3005. auto &var_type = get_variable_element_type(var);
  3006. BuiltIn builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
  3007. bool triangles = is_tessellating_triangles();
  3008. string mbr_name;
  3009. // Add a reference to the variable type to the interface struct.
  3010. uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size());
  3011. const auto mark_locations = [&](const SPIRType &new_var_type) {
  3012. if (get_decoration_bitset(var.self).get(DecorationLocation))
  3013. {
  3014. uint32_t locn = get_decoration(var.self, DecorationLocation);
  3015. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
  3016. mark_location_as_used_by_shader(locn, new_var_type, StorageClassInput);
  3017. }
  3018. else if (inputs_by_builtin.count(builtin))
  3019. {
  3020. uint32_t locn = inputs_by_builtin[builtin].location;
  3021. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
  3022. mark_location_as_used_by_shader(locn, new_var_type, StorageClassInput);
  3023. }
  3024. };
  3025. if (triangles)
  3026. {
  3027. // Triangles are tricky, because we want only one member in the struct.
  3028. mbr_name = "gl_TessLevel";
  3029. // If we already added the other one, we can skip this step.
  3030. if (!added_builtin_tess_level)
  3031. {
  3032. uint32_t type_id = build_extended_vector_type(var_type.self, 4);
  3033. ib_type.member_types.push_back(type_id);
  3034. // Give the member a name
  3035. set_member_name(ib_type.self, ib_mbr_idx, mbr_name);
  3036. // We cannot decorate both, but the important part is that
  3037. // it's marked as builtin so we can get automatic attribute assignment if needed.
  3038. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationBuiltIn, builtin);
  3039. mark_locations(var_type);
  3040. added_builtin_tess_level = true;
  3041. }
  3042. }
  3043. else
  3044. {
  3045. mbr_name = builtin_to_glsl(builtin, StorageClassFunction);
  3046. uint32_t type_id = build_extended_vector_type(var_type.self, builtin == BuiltInTessLevelOuter ? 4 : 2);
  3047. uint32_t ptr_type_id = ir.increase_bound_by(1);
  3048. auto &new_var_type = set<SPIRType>(ptr_type_id, get<SPIRType>(type_id));
  3049. new_var_type.pointer = true;
  3050. new_var_type.pointer_depth++;
  3051. new_var_type.storage = StorageClassInput;
  3052. new_var_type.parent_type = type_id;
  3053. ib_type.member_types.push_back(type_id);
  3054. // Give the member a name
  3055. set_member_name(ib_type.self, ib_mbr_idx, mbr_name);
  3056. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationBuiltIn, builtin);
  3057. mark_locations(new_var_type);
  3058. }
  3059. add_tess_level_input(ib_var_ref, mbr_name, var);
  3060. }
  3061. void CompilerMSL::add_tess_level_input(const std::string &base_ref, const std::string &mbr_name, SPIRVariable &var)
  3062. {
  3063. auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
  3064. BuiltIn builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
  3065. // Force the variable to have the proper name.
  3066. string var_name = builtin_to_glsl(builtin, StorageClassFunction);
  3067. set_name(var.self, var_name);
  3068. // We need to declare the variable early and at entry-point scope.
  3069. entry_func.add_local_variable(var.self);
  3070. vars_needing_early_declaration.push_back(var.self);
  3071. bool triangles = is_tessellating_triangles();
  3072. if (builtin == BuiltInTessLevelOuter)
  3073. {
  3074. entry_func.fixup_hooks_in.push_back(
  3075. [=]()
  3076. {
  3077. statement(var_name, "[0] = ", base_ref, ".", mbr_name, "[0];");
  3078. statement(var_name, "[1] = ", base_ref, ".", mbr_name, "[1];");
  3079. statement(var_name, "[2] = ", base_ref, ".", mbr_name, "[2];");
  3080. if (!triangles)
  3081. statement(var_name, "[3] = ", base_ref, ".", mbr_name, "[3];");
  3082. });
  3083. }
  3084. else
  3085. {
  3086. entry_func.fixup_hooks_in.push_back([=]() {
  3087. if (triangles)
  3088. {
  3089. if (msl_options.raw_buffer_tese_input)
  3090. statement(var_name, "[0] = ", base_ref, ".", mbr_name, ";");
  3091. else
  3092. statement(var_name, "[0] = ", base_ref, ".", mbr_name, "[3];");
  3093. }
  3094. else
  3095. {
  3096. statement(var_name, "[0] = ", base_ref, ".", mbr_name, "[0];");
  3097. statement(var_name, "[1] = ", base_ref, ".", mbr_name, "[1];");
  3098. }
  3099. });
  3100. }
  3101. }
  3102. bool CompilerMSL::variable_storage_requires_stage_io(spv::StorageClass storage) const
  3103. {
  3104. if (storage == StorageClassOutput)
  3105. return !capture_output_to_buffer;
  3106. else if (storage == StorageClassInput)
  3107. return !(is_tesc_shader() && msl_options.multi_patch_workgroup) &&
  3108. !(is_tese_shader() && msl_options.raw_buffer_tese_input);
  3109. else
  3110. return false;
  3111. }
  3112. string CompilerMSL::to_tesc_invocation_id()
  3113. {
  3114. if (msl_options.multi_patch_workgroup)
  3115. {
  3116. // n.b. builtin_invocation_id_id here is the dispatch global invocation ID,
  3117. // not the TC invocation ID.
  3118. return join(to_expression(builtin_invocation_id_id), ".x % ", get_entry_point().output_vertices);
  3119. }
  3120. else
  3121. return builtin_to_glsl(BuiltInInvocationId, StorageClassInput);
  3122. }
  3123. void CompilerMSL::emit_local_masked_variable(const SPIRVariable &masked_var, bool strip_array)
  3124. {
  3125. auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
  3126. bool threadgroup_storage = variable_decl_is_remapped_storage(masked_var, StorageClassWorkgroup);
  3127. if (threadgroup_storage && msl_options.multi_patch_workgroup)
  3128. {
  3129. // We need one threadgroup block per patch, so fake this.
  3130. entry_func.fixup_hooks_in.push_back([this, &masked_var]() {
  3131. auto &type = get_variable_data_type(masked_var);
  3132. add_local_variable_name(masked_var.self);
  3133. const uint32_t max_control_points_per_patch = 32u;
  3134. uint32_t max_num_instances =
  3135. (max_control_points_per_patch + get_entry_point().output_vertices - 1u) /
  3136. get_entry_point().output_vertices;
  3137. statement("threadgroup ", type_to_glsl(type), " ",
  3138. "spvStorage", to_name(masked_var.self), "[", max_num_instances, "]",
  3139. type_to_array_glsl(type, 0), ";");
  3140. // Assign a threadgroup slice to each PrimitiveID.
  3141. // We assume here that workgroup size is rounded to 32,
  3142. // since that's the maximum number of control points per patch.
  3143. // We cannot size the array based on fixed dispatch parameters,
  3144. // since Metal does not allow that. :(
  3145. // FIXME: We will likely need an option to support passing down target workgroup size,
  3146. // so we can emit appropriate size here.
  3147. statement("threadgroup auto ",
  3148. "&", to_name(masked_var.self),
  3149. " = spvStorage", to_name(masked_var.self), "[",
  3150. "(", to_expression(builtin_invocation_id_id), ".x / ",
  3151. get_entry_point().output_vertices, ") % ",
  3152. max_num_instances, "];");
  3153. });
  3154. }
  3155. else
  3156. {
  3157. entry_func.add_local_variable(masked_var.self);
  3158. }
  3159. if (!threadgroup_storage)
  3160. {
  3161. vars_needing_early_declaration.push_back(masked_var.self);
  3162. }
  3163. else if (masked_var.initializer)
  3164. {
  3165. // Cannot directly initialize threadgroup variables. Need fixup hooks.
  3166. ID initializer = masked_var.initializer;
  3167. if (strip_array)
  3168. {
  3169. entry_func.fixup_hooks_in.push_back([this, &masked_var, initializer]() {
  3170. auto invocation = to_tesc_invocation_id();
  3171. statement(to_expression(masked_var.self), "[",
  3172. invocation, "] = ",
  3173. to_expression(initializer), "[",
  3174. invocation, "];");
  3175. });
  3176. }
  3177. else
  3178. {
  3179. entry_func.fixup_hooks_in.push_back([this, &masked_var, initializer]() {
  3180. statement(to_expression(masked_var.self), " = ", to_expression(initializer), ";");
  3181. });
  3182. }
  3183. }
  3184. }
  3185. void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const string &ib_var_ref, SPIRType &ib_type,
  3186. SPIRVariable &var, InterfaceBlockMeta &meta)
  3187. {
  3188. auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
  3189. // Tessellation control I/O variables and tessellation evaluation per-point inputs are
  3190. // usually declared as arrays. In these cases, we want to add the element type to the
  3191. // interface block, since in Metal it's the interface block itself which is arrayed.
  3192. auto &var_type = meta.strip_array ? get_variable_element_type(var) : get_variable_data_type(var);
  3193. bool is_builtin = is_builtin_variable(var);
  3194. auto builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
  3195. bool is_block = has_decoration(var_type.self, DecorationBlock);
  3196. // If stage variables are masked out, emit them as plain variables instead.
  3197. // For builtins, we query them one by one later.
  3198. // IO blocks are not masked here, we need to mask them per-member instead.
  3199. if (storage == StorageClassOutput && is_stage_output_variable_masked(var))
  3200. {
  3201. // If we ignore an output, we must still emit it, since it might be used by app.
  3202. // Instead, just emit it as early declaration.
  3203. emit_local_masked_variable(var, meta.strip_array);
  3204. return;
  3205. }
  3206. if (storage == StorageClassInput && has_decoration(var.self, DecorationPerVertexKHR))
  3207. SPIRV_CROSS_THROW("PerVertexKHR decoration is not supported in MSL.");
  3208. // If variable names alias, they will end up with wrong names in the interface struct, because
  3209. // there might be aliases in the member name cache and there would be a mismatch in fixup_in code.
  3210. // Make sure to register the variables as unique resource names ahead of time.
  3211. // This would normally conflict with the name cache when emitting local variables,
  3212. // but this happens in the setup stage, before we hit compilation loops.
  3213. // The name cache is cleared before we actually emit code, so this is safe.
  3214. add_resource_name(var.self);
  3215. if (var_type.basetype == SPIRType::Struct)
  3216. {
  3217. bool block_requires_flattening =
  3218. variable_storage_requires_stage_io(storage) || (is_block && var_type.array.empty());
  3219. bool needs_local_declaration = !is_builtin && block_requires_flattening && meta.allow_local_declaration;
  3220. if (needs_local_declaration)
  3221. {
  3222. // For I/O blocks or structs, we will need to pass the block itself around
  3223. // to functions if they are used globally in leaf functions.
  3224. // Rather than passing down member by member,
  3225. // we unflatten I/O blocks while running the shader,
  3226. // and pass the actual struct type down to leaf functions.
  3227. // We then unflatten inputs, and flatten outputs in the "fixup" stages.
  3228. emit_local_masked_variable(var, meta.strip_array);
  3229. }
  3230. if (!block_requires_flattening)
  3231. {
  3232. // In Metal tessellation shaders, the interface block itself is arrayed. This makes things
  3233. // very complicated, since stage-in structures in MSL don't support nested structures.
  3234. // Luckily, for stage-out when capturing output, we can avoid this and just add
  3235. // composite members directly, because the stage-out structure is stored to a buffer,
  3236. // not returned.
  3237. add_plain_variable_to_interface_block(storage, ib_var_ref, ib_type, var, meta);
  3238. }
  3239. else
  3240. {
  3241. bool masked_block = false;
  3242. uint32_t location = UINT32_MAX;
  3243. uint32_t var_mbr_idx = 0;
  3244. uint32_t elem_cnt = 1;
  3245. if (is_matrix(var_type))
  3246. {
  3247. if (is_array(var_type))
  3248. SPIRV_CROSS_THROW("MSL cannot emit arrays-of-matrices in input and output variables.");
  3249. elem_cnt = var_type.columns;
  3250. }
  3251. else if (is_array(var_type))
  3252. {
  3253. if (var_type.array.size() != 1)
  3254. SPIRV_CROSS_THROW("MSL cannot emit arrays-of-arrays in input and output variables.");
  3255. elem_cnt = to_array_size_literal(var_type);
  3256. }
  3257. for (uint32_t elem_idx = 0; elem_idx < elem_cnt; elem_idx++)
  3258. {
  3259. // Flatten the struct members into the interface struct
  3260. for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(var_type.member_types.size()); mbr_idx++)
  3261. {
  3262. builtin = BuiltInMax;
  3263. is_builtin = is_member_builtin(var_type, mbr_idx, &builtin);
  3264. auto &mbr_type = get<SPIRType>(var_type.member_types[mbr_idx]);
  3265. if (storage == StorageClassOutput && is_stage_output_block_member_masked(var, mbr_idx, meta.strip_array))
  3266. {
  3267. location = UINT32_MAX; // Skip this member and resolve location again on next var member
  3268. if (is_block)
  3269. masked_block = true;
  3270. // Non-builtin block output variables are just ignored, since they will still access
  3271. // the block variable as-is. They're just not flattened.
  3272. if (is_builtin && !meta.strip_array)
  3273. {
  3274. // Emit a fake variable instead.
  3275. uint32_t ids = ir.increase_bound_by(2);
  3276. uint32_t ptr_type_id = ids + 0;
  3277. uint32_t var_id = ids + 1;
  3278. auto ptr_type = mbr_type;
  3279. ptr_type.pointer = true;
  3280. ptr_type.pointer_depth++;
  3281. ptr_type.parent_type = var_type.member_types[mbr_idx];
  3282. ptr_type.storage = StorageClassOutput;
  3283. uint32_t initializer = 0;
  3284. if (var.initializer)
  3285. if (auto *c = maybe_get<SPIRConstant>(var.initializer))
  3286. initializer = c->subconstants[mbr_idx];
  3287. set<SPIRType>(ptr_type_id, ptr_type);
  3288. set<SPIRVariable>(var_id, ptr_type_id, StorageClassOutput, initializer);
  3289. entry_func.add_local_variable(var_id);
  3290. vars_needing_early_declaration.push_back(var_id);
  3291. set_name(var_id, builtin_to_glsl(builtin, StorageClassOutput));
  3292. set_decoration(var_id, DecorationBuiltIn, builtin);
  3293. }
  3294. }
  3295. else if (!is_builtin || has_active_builtin(builtin, storage))
  3296. {
  3297. bool is_composite_type = is_matrix(mbr_type) || is_array(mbr_type) || mbr_type.basetype == SPIRType::Struct;
  3298. bool attribute_load_store =
  3299. storage == StorageClassInput && get_execution_model() != ExecutionModelFragment;
  3300. bool storage_is_stage_io = variable_storage_requires_stage_io(storage);
  3301. // Clip/CullDistance always need to be declared as user attributes.
  3302. if (builtin == BuiltInClipDistance || builtin == BuiltInCullDistance)
  3303. is_builtin = false;
  3304. const string var_name = to_name(var.self);
  3305. string mbr_name_qual = var_name;
  3306. string var_chain_qual = var_name;
  3307. if (elem_cnt > 1)
  3308. {
  3309. mbr_name_qual += join("_", elem_idx);
  3310. var_chain_qual += join("[", elem_idx, "]");
  3311. }
  3312. if ((!is_builtin || attribute_load_store) && storage_is_stage_io && is_composite_type)
  3313. {
  3314. add_composite_member_variable_to_interface_block(storage, ib_var_ref, ib_type,
  3315. var, var_type, mbr_idx, meta,
  3316. mbr_name_qual, var_chain_qual,
  3317. location, var_mbr_idx, {});
  3318. }
  3319. else
  3320. {
  3321. add_plain_member_variable_to_interface_block(storage, ib_var_ref, ib_type,
  3322. var, var_type, mbr_idx, meta,
  3323. mbr_name_qual, var_chain_qual,
  3324. location, var_mbr_idx);
  3325. }
  3326. }
  3327. var_mbr_idx++;
  3328. }
  3329. }
  3330. // If we're redirecting a block, we might still need to access the original block
  3331. // variable if we're masking some members.
  3332. if (masked_block && !needs_local_declaration && (!is_builtin_variable(var) || is_tesc_shader()))
  3333. {
  3334. if (is_builtin_variable(var))
  3335. {
  3336. // Ensure correct names for the block members if we're actually going to
  3337. // declare gl_PerVertex.
  3338. for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(var_type.member_types.size()); mbr_idx++)
  3339. {
  3340. set_member_name(var_type.self, mbr_idx, builtin_to_glsl(
  3341. BuiltIn(get_member_decoration(var_type.self, mbr_idx, DecorationBuiltIn)),
  3342. StorageClassOutput));
  3343. }
  3344. set_name(var_type.self, "gl_PerVertex");
  3345. set_name(var.self, "gl_out_masked");
  3346. stage_out_masked_builtin_type_id = var_type.self;
  3347. }
  3348. emit_local_masked_variable(var, meta.strip_array);
  3349. }
  3350. }
  3351. }
  3352. else if (is_tese_shader() && storage == StorageClassInput && !meta.strip_array && is_builtin &&
  3353. (builtin == BuiltInTessLevelOuter || builtin == BuiltInTessLevelInner))
  3354. {
  3355. add_tess_level_input_to_interface_block(ib_var_ref, ib_type, var);
  3356. }
  3357. else if (var_type.basetype == SPIRType::Boolean || var_type.basetype == SPIRType::Char ||
  3358. type_is_integral(var_type) || type_is_floating_point(var_type))
  3359. {
  3360. if (!is_builtin || has_active_builtin(builtin, storage))
  3361. {
  3362. bool is_composite_type = is_matrix(var_type) || is_array(var_type);
  3363. bool storage_is_stage_io = variable_storage_requires_stage_io(storage);
  3364. bool attribute_load_store = storage == StorageClassInput && get_execution_model() != ExecutionModelFragment;
  3365. // Clip/CullDistance always needs to be declared as user attributes.
  3366. if (builtin == BuiltInClipDistance || builtin == BuiltInCullDistance)
  3367. is_builtin = false;
  3368. // MSL does not allow matrices or arrays in input or output variables, so need to handle it specially.
  3369. if ((!is_builtin || attribute_load_store) && storage_is_stage_io && is_composite_type)
  3370. {
  3371. add_composite_variable_to_interface_block(storage, ib_var_ref, ib_type, var, meta);
  3372. }
  3373. else
  3374. {
  3375. add_plain_variable_to_interface_block(storage, ib_var_ref, ib_type, var, meta);
  3376. }
  3377. }
  3378. }
  3379. }
  3380. // Fix up the mapping of variables to interface member indices, which is used to compile access chains
  3381. // for per-vertex variables in a tessellation control shader.
  3382. void CompilerMSL::fix_up_interface_member_indices(StorageClass storage, uint32_t ib_type_id)
  3383. {
  3384. // Only needed for tessellation shaders and pull-model interpolants.
  3385. // Need to redirect interface indices back to variables themselves.
  3386. // For structs, each member of the struct need a separate instance.
  3387. if (!is_tesc_shader() && !(is_tese_shader() && storage == StorageClassInput) &&
  3388. !(get_execution_model() == ExecutionModelFragment && storage == StorageClassInput &&
  3389. !pull_model_inputs.empty()))
  3390. return;
  3391. auto mbr_cnt = uint32_t(ir.meta[ib_type_id].members.size());
  3392. for (uint32_t i = 0; i < mbr_cnt; i++)
  3393. {
  3394. uint32_t var_id = get_extended_member_decoration(ib_type_id, i, SPIRVCrossDecorationInterfaceOrigID);
  3395. if (!var_id)
  3396. continue;
  3397. auto &var = get<SPIRVariable>(var_id);
  3398. auto &type = get_variable_element_type(var);
  3399. bool flatten_composites = variable_storage_requires_stage_io(var.storage);
  3400. bool is_block = has_decoration(type.self, DecorationBlock);
  3401. uint32_t mbr_idx = uint32_t(-1);
  3402. if (type.basetype == SPIRType::Struct && (flatten_composites || is_block))
  3403. mbr_idx = get_extended_member_decoration(ib_type_id, i, SPIRVCrossDecorationInterfaceMemberIndex);
  3404. if (mbr_idx != uint32_t(-1))
  3405. {
  3406. // Only set the lowest InterfaceMemberIndex for each variable member.
  3407. // IB struct members will be emitted in-order w.r.t. interface member index.
  3408. if (!has_extended_member_decoration(var_id, mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex))
  3409. set_extended_member_decoration(var_id, mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, i);
  3410. }
  3411. else
  3412. {
  3413. // Only set the lowest InterfaceMemberIndex for each variable.
  3414. // IB struct members will be emitted in-order w.r.t. interface member index.
  3415. if (!has_extended_decoration(var_id, SPIRVCrossDecorationInterfaceMemberIndex))
  3416. set_extended_decoration(var_id, SPIRVCrossDecorationInterfaceMemberIndex, i);
  3417. }
  3418. }
  3419. }
  3420. // Add an interface structure for the type of storage, which is either StorageClassInput or StorageClassOutput.
  3421. // Returns the ID of the newly added variable, or zero if no variable was added.
  3422. uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch)
  3423. {
  3424. // Accumulate the variables that should appear in the interface struct.
  3425. SmallVector<SPIRVariable *> vars;
  3426. bool incl_builtins = storage == StorageClassOutput || is_tessellation_shader();
  3427. bool has_seen_barycentric = false;
  3428. InterfaceBlockMeta meta;
  3429. // Varying interfaces between stages which use "user()" attribute can be dealt with
  3430. // without explicit packing and unpacking of components. For any variables which link against the runtime
  3431. // in some way (vertex attributes, fragment output, etc), we'll need to deal with it somehow.
  3432. bool pack_components =
  3433. (storage == StorageClassInput && get_execution_model() == ExecutionModelVertex) ||
  3434. (storage == StorageClassOutput && get_execution_model() == ExecutionModelFragment) ||
  3435. (storage == StorageClassOutput && get_execution_model() == ExecutionModelVertex && capture_output_to_buffer);
  3436. ir.for_each_typed_id<SPIRVariable>([&](uint32_t var_id, SPIRVariable &var) {
  3437. if (var.storage != storage)
  3438. return;
  3439. auto &type = this->get<SPIRType>(var.basetype);
  3440. bool is_builtin = is_builtin_variable(var);
  3441. bool is_block = has_decoration(type.self, DecorationBlock);
  3442. auto bi_type = BuiltInMax;
  3443. bool builtin_is_gl_in_out = false;
  3444. if (is_builtin && !is_block)
  3445. {
  3446. bi_type = BuiltIn(get_decoration(var_id, DecorationBuiltIn));
  3447. builtin_is_gl_in_out = bi_type == BuiltInPosition || bi_type == BuiltInPointSize ||
  3448. bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance;
  3449. }
  3450. if (is_builtin && is_block)
  3451. builtin_is_gl_in_out = true;
  3452. uint32_t location = get_decoration(var_id, DecorationLocation);
  3453. bool builtin_is_stage_in_out = builtin_is_gl_in_out ||
  3454. bi_type == BuiltInLayer || bi_type == BuiltInViewportIndex ||
  3455. bi_type == BuiltInBaryCoordKHR || bi_type == BuiltInBaryCoordNoPerspKHR ||
  3456. bi_type == BuiltInFragDepth ||
  3457. bi_type == BuiltInFragStencilRefEXT || bi_type == BuiltInSampleMask;
  3458. // These builtins are part of the stage in/out structs.
  3459. bool is_interface_block_builtin =
  3460. builtin_is_stage_in_out || (is_tese_shader() && !msl_options.raw_buffer_tese_input &&
  3461. (bi_type == BuiltInTessLevelOuter || bi_type == BuiltInTessLevelInner));
  3462. bool is_active = interface_variable_exists_in_entry_point(var.self);
  3463. if (is_builtin && is_active)
  3464. {
  3465. // Only emit the builtin if it's active in this entry point. Interface variable list might lie.
  3466. if (is_block)
  3467. {
  3468. // If any builtin is active, the block is active.
  3469. uint32_t mbr_cnt = uint32_t(type.member_types.size());
  3470. for (uint32_t i = 0; !is_active && i < mbr_cnt; i++)
  3471. is_active = has_active_builtin(BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn)), storage);
  3472. }
  3473. else
  3474. {
  3475. is_active = has_active_builtin(bi_type, storage);
  3476. }
  3477. }
  3478. bool filter_patch_decoration = (has_decoration(var_id, DecorationPatch) || is_patch_block(type)) == patch;
  3479. bool hidden = is_hidden_variable(var, incl_builtins);
  3480. // ClipDistance is never hidden, we need to emulate it when used as an input.
  3481. if (bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance)
  3482. hidden = false;
  3483. // It's not enough to simply avoid marking fragment outputs if the pipeline won't
  3484. // accept them. We can't put them in the struct at all, or otherwise the compiler
  3485. // complains that the outputs weren't explicitly marked.
  3486. // Frag depth and stencil outputs are incompatible with explicit early fragment tests.
  3487. // In GLSL, depth and stencil outputs are just ignored when explicit early fragment tests are required.
  3488. // In Metal, it's a compilation error, so we need to exclude them from the output struct.
  3489. if (get_execution_model() == ExecutionModelFragment && storage == StorageClassOutput && !patch &&
  3490. ((is_builtin && ((bi_type == BuiltInFragDepth && (!msl_options.enable_frag_depth_builtin || uses_explicit_early_fragment_test())) ||
  3491. (bi_type == BuiltInFragStencilRefEXT && (!msl_options.enable_frag_stencil_ref_builtin || uses_explicit_early_fragment_test())))) ||
  3492. (!is_builtin && !(msl_options.enable_frag_output_mask & (1 << location)))))
  3493. {
  3494. hidden = true;
  3495. disabled_frag_outputs.push_back(var_id);
  3496. // If a builtin, force it to have the proper name, and mark it as not part of the output struct.
  3497. if (is_builtin)
  3498. {
  3499. set_name(var_id, builtin_to_glsl(bi_type, StorageClassFunction));
  3500. mask_stage_output_by_builtin(bi_type);
  3501. }
  3502. }
  3503. // Barycentric inputs must be emitted in stage-in, because they can have interpolation arguments.
  3504. if (is_active && (bi_type == BuiltInBaryCoordKHR || bi_type == BuiltInBaryCoordNoPerspKHR))
  3505. {
  3506. if (has_seen_barycentric)
  3507. SPIRV_CROSS_THROW("Cannot declare both BaryCoordNV and BaryCoordNoPerspNV in same shader in MSL.");
  3508. has_seen_barycentric = true;
  3509. hidden = false;
  3510. }
  3511. if (is_active && !hidden && type.pointer && filter_patch_decoration &&
  3512. (!is_builtin || is_interface_block_builtin))
  3513. {
  3514. vars.push_back(&var);
  3515. if (!is_builtin)
  3516. {
  3517. // Need to deal specially with DecorationComponent.
  3518. // Multiple variables can alias the same Location, and try to make sure each location is declared only once.
  3519. // We will swizzle data in and out to make this work.
  3520. // This is only relevant for vertex inputs and fragment outputs.
  3521. // Technically tessellation as well, but it is too complicated to support.
  3522. uint32_t component = get_decoration(var_id, DecorationComponent);
  3523. if (component != 0)
  3524. {
  3525. if (is_tessellation_shader())
  3526. SPIRV_CROSS_THROW("Component decoration is not supported in tessellation shaders.");
  3527. else if (pack_components)
  3528. {
  3529. uint32_t array_size = 1;
  3530. if (!type.array.empty())
  3531. array_size = to_array_size_literal(type);
  3532. for (uint32_t location_offset = 0; location_offset < array_size; location_offset++)
  3533. {
  3534. auto &location_meta = meta.location_meta[location + location_offset];
  3535. location_meta.num_components = max<uint32_t>(location_meta.num_components, component + type.vecsize);
  3536. // For variables sharing location, decorations and base type must match.
  3537. location_meta.base_type_id = type.self;
  3538. location_meta.flat = has_decoration(var.self, DecorationFlat);
  3539. location_meta.noperspective = has_decoration(var.self, DecorationNoPerspective);
  3540. location_meta.centroid = has_decoration(var.self, DecorationCentroid);
  3541. location_meta.sample = has_decoration(var.self, DecorationSample);
  3542. }
  3543. }
  3544. }
  3545. }
  3546. }
  3547. if (is_tese_shader() && msl_options.raw_buffer_tese_input && patch && storage == StorageClassInput &&
  3548. (bi_type == BuiltInTessLevelOuter || bi_type == BuiltInTessLevelInner))
  3549. {
  3550. // In this case, we won't add the builtin to the interface struct,
  3551. // but we still need the hook to run to populate the arrays.
  3552. string base_ref = join(tess_factor_buffer_var_name, "[", to_expression(builtin_primitive_id_id), "]");
  3553. const char *mbr_name =
  3554. bi_type == BuiltInTessLevelOuter ? "edgeTessellationFactor" : "insideTessellationFactor";
  3555. add_tess_level_input(base_ref, mbr_name, var);
  3556. if (inputs_by_builtin.count(bi_type))
  3557. {
  3558. uint32_t locn = inputs_by_builtin[bi_type].location;
  3559. mark_location_as_used_by_shader(locn, type, StorageClassInput);
  3560. }
  3561. }
  3562. });
  3563. // If no variables qualify, leave.
  3564. // For patch input in a tessellation evaluation shader, the per-vertex stage inputs
  3565. // are included in a special patch control point array.
  3566. if (vars.empty() &&
  3567. !(!msl_options.raw_buffer_tese_input && storage == StorageClassInput && patch && stage_in_var_id))
  3568. return 0;
  3569. // Add a new typed variable for this interface structure.
  3570. // The initializer expression is allocated here, but populated when the function
  3571. // declaraion is emitted, because it is cleared after each compilation pass.
  3572. uint32_t next_id = ir.increase_bound_by(3);
  3573. uint32_t ib_type_id = next_id++;
  3574. auto &ib_type = set<SPIRType>(ib_type_id, OpTypeStruct);
  3575. ib_type.basetype = SPIRType::Struct;
  3576. ib_type.storage = storage;
  3577. set_decoration(ib_type_id, DecorationBlock);
  3578. uint32_t ib_var_id = next_id++;
  3579. auto &var = set<SPIRVariable>(ib_var_id, ib_type_id, storage, 0);
  3580. var.initializer = next_id++;
  3581. string ib_var_ref;
  3582. auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
  3583. switch (storage)
  3584. {
  3585. case StorageClassInput:
  3586. ib_var_ref = patch ? patch_stage_in_var_name : stage_in_var_name;
  3587. switch (get_execution_model())
  3588. {
  3589. case ExecutionModelTessellationControl:
  3590. // Add a hook to populate the shared workgroup memory containing the gl_in array.
  3591. entry_func.fixup_hooks_in.push_back([=]() {
  3592. // Can't use PatchVertices, PrimitiveId, or InvocationId yet; the hooks for those may not have run yet.
  3593. if (msl_options.multi_patch_workgroup)
  3594. {
  3595. // n.b. builtin_invocation_id_id here is the dispatch global invocation ID,
  3596. // not the TC invocation ID.
  3597. statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "* gl_in = &",
  3598. input_buffer_var_name, "[min(", to_expression(builtin_invocation_id_id), ".x / ",
  3599. get_entry_point().output_vertices,
  3600. ", spvIndirectParams[1] - 1) * spvIndirectParams[0]];");
  3601. }
  3602. else
  3603. {
  3604. // It's safe to use InvocationId here because it's directly mapped to a
  3605. // Metal builtin, and therefore doesn't need a hook.
  3606. statement("if (", to_expression(builtin_invocation_id_id), " < spvIndirectParams[0])");
  3607. statement(" ", input_wg_var_name, "[", to_expression(builtin_invocation_id_id),
  3608. "] = ", ib_var_ref, ";");
  3609. statement("threadgroup_barrier(mem_flags::mem_threadgroup);");
  3610. statement("if (", to_expression(builtin_invocation_id_id),
  3611. " >= ", get_entry_point().output_vertices, ")");
  3612. statement(" return;");
  3613. }
  3614. });
  3615. break;
  3616. case ExecutionModelTessellationEvaluation:
  3617. if (!msl_options.raw_buffer_tese_input)
  3618. break;
  3619. if (patch)
  3620. {
  3621. entry_func.fixup_hooks_in.push_back(
  3622. [=]()
  3623. {
  3624. statement("const device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref,
  3625. " = ", patch_input_buffer_var_name, "[", to_expression(builtin_primitive_id_id),
  3626. "];");
  3627. });
  3628. }
  3629. else
  3630. {
  3631. entry_func.fixup_hooks_in.push_back(
  3632. [=]()
  3633. {
  3634. statement("const device ", to_name(ir.default_entry_point), "_", ib_var_ref, "* gl_in = &",
  3635. input_buffer_var_name, "[", to_expression(builtin_primitive_id_id), " * ",
  3636. get_entry_point().output_vertices, "];");
  3637. });
  3638. }
  3639. break;
  3640. default:
  3641. break;
  3642. }
  3643. break;
  3644. case StorageClassOutput:
  3645. {
  3646. ib_var_ref = patch ? patch_stage_out_var_name : stage_out_var_name;
  3647. // Add the output interface struct as a local variable to the entry function.
  3648. // If the entry point should return the output struct, set the entry function
  3649. // to return the output interface struct, otherwise to return nothing.
  3650. // Watch out for the rare case where the terminator of the last entry point block is a
  3651. // Kill, instead of a Return. Based on SPIR-V's block-domination rules, we assume that
  3652. // any block that has a Kill will also have a terminating Return, except the last block.
  3653. // Indicate the output var requires early initialization.
  3654. bool ep_should_return_output = !get_is_rasterization_disabled();
  3655. uint32_t rtn_id = ep_should_return_output ? ib_var_id : 0;
  3656. if (!capture_output_to_buffer)
  3657. {
  3658. entry_func.add_local_variable(ib_var_id);
  3659. for (auto &blk_id : entry_func.blocks)
  3660. {
  3661. auto &blk = get<SPIRBlock>(blk_id);
  3662. if (blk.terminator == SPIRBlock::Return || (blk.terminator == SPIRBlock::Kill && blk_id == entry_func.blocks.back()))
  3663. blk.return_value = rtn_id;
  3664. }
  3665. vars_needing_early_declaration.push_back(ib_var_id);
  3666. }
  3667. else
  3668. {
  3669. switch (get_execution_model())
  3670. {
  3671. case ExecutionModelVertex:
  3672. case ExecutionModelTessellationEvaluation:
  3673. // Instead of declaring a struct variable to hold the output and then
  3674. // copying that to the output buffer, we'll declare the output variable
  3675. // as a reference to the final output element in the buffer. Then we can
  3676. // avoid the extra copy.
  3677. entry_func.fixup_hooks_in.push_back([=]() {
  3678. if (stage_out_var_id)
  3679. {
  3680. // The first member of the indirect buffer is always the number of vertices
  3681. // to draw.
  3682. // We zero-base the InstanceID & VertexID variables for HLSL emulation elsewhere, so don't do it twice
  3683. if (get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation)
  3684. {
  3685. statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref,
  3686. " = ", output_buffer_var_name, "[", to_expression(builtin_invocation_id_id),
  3687. ".y * ", to_expression(builtin_stage_input_size_id), ".x + ",
  3688. to_expression(builtin_invocation_id_id), ".x];");
  3689. }
  3690. else if (msl_options.enable_base_index_zero)
  3691. {
  3692. statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref,
  3693. " = ", output_buffer_var_name, "[", to_expression(builtin_instance_idx_id),
  3694. " * spvIndirectParams[0] + ", to_expression(builtin_vertex_idx_id), "];");
  3695. }
  3696. else
  3697. {
  3698. statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref,
  3699. " = ", output_buffer_var_name, "[(", to_expression(builtin_instance_idx_id),
  3700. " - ", to_expression(builtin_base_instance_id), ") * spvIndirectParams[0] + ",
  3701. to_expression(builtin_vertex_idx_id), " - ",
  3702. to_expression(builtin_base_vertex_id), "];");
  3703. }
  3704. }
  3705. });
  3706. break;
  3707. case ExecutionModelTessellationControl:
  3708. if (msl_options.multi_patch_workgroup)
  3709. {
  3710. // We cannot use PrimitiveId here, because the hook may not have run yet.
  3711. if (patch)
  3712. {
  3713. entry_func.fixup_hooks_in.push_back([=]() {
  3714. statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref,
  3715. " = ", patch_output_buffer_var_name, "[", to_expression(builtin_invocation_id_id),
  3716. ".x / ", get_entry_point().output_vertices, "];");
  3717. });
  3718. }
  3719. else
  3720. {
  3721. entry_func.fixup_hooks_in.push_back([=]() {
  3722. statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "* gl_out = &",
  3723. output_buffer_var_name, "[", to_expression(builtin_invocation_id_id), ".x - ",
  3724. to_expression(builtin_invocation_id_id), ".x % ",
  3725. get_entry_point().output_vertices, "];");
  3726. });
  3727. }
  3728. }
  3729. else
  3730. {
  3731. if (patch)
  3732. {
  3733. entry_func.fixup_hooks_in.push_back([=]() {
  3734. statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref,
  3735. " = ", patch_output_buffer_var_name, "[", to_expression(builtin_primitive_id_id),
  3736. "];");
  3737. });
  3738. }
  3739. else
  3740. {
  3741. entry_func.fixup_hooks_in.push_back([=]() {
  3742. statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "* gl_out = &",
  3743. output_buffer_var_name, "[", to_expression(builtin_primitive_id_id), " * ",
  3744. get_entry_point().output_vertices, "];");
  3745. });
  3746. }
  3747. }
  3748. break;
  3749. default:
  3750. break;
  3751. }
  3752. }
  3753. break;
  3754. }
  3755. default:
  3756. break;
  3757. }
  3758. set_name(ib_type_id, to_name(ir.default_entry_point) + "_" + ib_var_ref);
  3759. set_name(ib_var_id, ib_var_ref);
  3760. for (auto *p_var : vars)
  3761. {
  3762. bool strip_array = (is_tesc_shader() || (is_tese_shader() && storage == StorageClassInput)) && !patch;
  3763. // Fixing up flattened stores in TESC is impossible since the memory is group shared either via
  3764. // device (not masked) or threadgroup (masked) storage classes and it's race condition city.
  3765. meta.strip_array = strip_array;
  3766. meta.allow_local_declaration = !strip_array && !(is_tesc_shader() && storage == StorageClassOutput);
  3767. add_variable_to_interface_block(storage, ib_var_ref, ib_type, *p_var, meta);
  3768. }
  3769. if (((is_tesc_shader() && msl_options.multi_patch_workgroup) ||
  3770. (is_tese_shader() && msl_options.raw_buffer_tese_input)) &&
  3771. storage == StorageClassInput)
  3772. {
  3773. // For tessellation inputs, add all outputs from the previous stage to ensure
  3774. // the struct containing them is the correct size and layout.
  3775. for (auto &input : inputs_by_location)
  3776. {
  3777. if (location_inputs_in_use.count(input.first.location) != 0)
  3778. continue;
  3779. if (patch != (input.second.rate == MSL_SHADER_VARIABLE_RATE_PER_PATCH))
  3780. continue;
  3781. // Tessellation levels have their own struct, so there's no need to add them here.
  3782. if (input.second.builtin == BuiltInTessLevelOuter || input.second.builtin == BuiltInTessLevelInner)
  3783. continue;
  3784. // Create a fake variable to put at the location.
  3785. uint32_t offset = ir.increase_bound_by(5);
  3786. uint32_t type_id = offset;
  3787. uint32_t vec_type_id = offset + 1;
  3788. uint32_t array_type_id = offset + 2;
  3789. uint32_t ptr_type_id = offset + 3;
  3790. uint32_t var_id = offset + 4;
  3791. SPIRType type { OpTypeInt };
  3792. switch (input.second.format)
  3793. {
  3794. case MSL_SHADER_VARIABLE_FORMAT_UINT16:
  3795. case MSL_SHADER_VARIABLE_FORMAT_ANY16:
  3796. type.basetype = SPIRType::UShort;
  3797. type.width = 16;
  3798. break;
  3799. case MSL_SHADER_VARIABLE_FORMAT_ANY32:
  3800. default:
  3801. type.basetype = SPIRType::UInt;
  3802. type.width = 32;
  3803. break;
  3804. }
  3805. set<SPIRType>(type_id, type);
  3806. if (input.second.vecsize > 1)
  3807. {
  3808. type.op = OpTypeVector;
  3809. type.vecsize = input.second.vecsize;
  3810. set<SPIRType>(vec_type_id, type);
  3811. type_id = vec_type_id;
  3812. }
  3813. type.op = OpTypeArray;
  3814. type.array.push_back(0);
  3815. type.array_size_literal.push_back(true);
  3816. type.parent_type = type_id;
  3817. set<SPIRType>(array_type_id, type);
  3818. type.self = type_id;
  3819. type.op = OpTypePointer;
  3820. type.pointer = true;
  3821. type.pointer_depth++;
  3822. type.parent_type = array_type_id;
  3823. type.storage = storage;
  3824. auto &ptr_type = set<SPIRType>(ptr_type_id, type);
  3825. ptr_type.self = array_type_id;
  3826. auto &fake_var = set<SPIRVariable>(var_id, ptr_type_id, storage);
  3827. set_decoration(var_id, DecorationLocation, input.first.location);
  3828. if (input.first.component)
  3829. set_decoration(var_id, DecorationComponent, input.first.component);
  3830. meta.strip_array = true;
  3831. meta.allow_local_declaration = false;
  3832. add_variable_to_interface_block(storage, ib_var_ref, ib_type, fake_var, meta);
  3833. }
  3834. }
  3835. if (capture_output_to_buffer && storage == StorageClassOutput)
  3836. {
  3837. // For captured output, add all inputs from the next stage to ensure
  3838. // the struct containing them is the correct size and layout. This is
  3839. // necessary for certain implicit builtins that may nonetheless be read,
  3840. // even when they aren't written.
  3841. for (auto &output : outputs_by_location)
  3842. {
  3843. if (location_outputs_in_use.count(output.first.location) != 0)
  3844. continue;
  3845. // Create a fake variable to put at the location.
  3846. uint32_t offset = ir.increase_bound_by(5);
  3847. uint32_t type_id = offset;
  3848. uint32_t vec_type_id = offset + 1;
  3849. uint32_t array_type_id = offset + 2;
  3850. uint32_t ptr_type_id = offset + 3;
  3851. uint32_t var_id = offset + 4;
  3852. SPIRType type { OpTypeInt };
  3853. switch (output.second.format)
  3854. {
  3855. case MSL_SHADER_VARIABLE_FORMAT_UINT16:
  3856. case MSL_SHADER_VARIABLE_FORMAT_ANY16:
  3857. type.basetype = SPIRType::UShort;
  3858. type.width = 16;
  3859. break;
  3860. case MSL_SHADER_VARIABLE_FORMAT_ANY32:
  3861. default:
  3862. type.basetype = SPIRType::UInt;
  3863. type.width = 32;
  3864. break;
  3865. }
  3866. set<SPIRType>(type_id, type);
  3867. if (output.second.vecsize > 1)
  3868. {
  3869. type.op = OpTypeVector;
  3870. type.vecsize = output.second.vecsize;
  3871. set<SPIRType>(vec_type_id, type);
  3872. type_id = vec_type_id;
  3873. }
  3874. if (is_tesc_shader())
  3875. {
  3876. type.op = OpTypeArray;
  3877. type.array.push_back(0);
  3878. type.array_size_literal.push_back(true);
  3879. type.parent_type = type_id;
  3880. set<SPIRType>(array_type_id, type);
  3881. }
  3882. type.op = OpTypePointer;
  3883. type.pointer = true;
  3884. type.pointer_depth++;
  3885. type.parent_type = is_tesc_shader() ? array_type_id : type_id;
  3886. type.storage = storage;
  3887. auto &ptr_type = set<SPIRType>(ptr_type_id, type);
  3888. ptr_type.self = type.parent_type;
  3889. auto &fake_var = set<SPIRVariable>(var_id, ptr_type_id, storage);
  3890. set_decoration(var_id, DecorationLocation, output.first.location);
  3891. if (output.first.component)
  3892. set_decoration(var_id, DecorationComponent, output.first.component);
  3893. meta.strip_array = true;
  3894. meta.allow_local_declaration = false;
  3895. add_variable_to_interface_block(storage, ib_var_ref, ib_type, fake_var, meta);
  3896. }
  3897. }
  3898. // When multiple variables need to access same location,
  3899. // unroll locations one by one and we will flatten output or input as necessary.
  3900. for (auto &loc : meta.location_meta)
  3901. {
  3902. uint32_t location = loc.first;
  3903. auto &location_meta = loc.second;
  3904. uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size());
  3905. uint32_t type_id = build_extended_vector_type(location_meta.base_type_id, location_meta.num_components);
  3906. ib_type.member_types.push_back(type_id);
  3907. set_member_name(ib_type.self, ib_mbr_idx, join("m_location_", location));
  3908. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location);
  3909. mark_location_as_used_by_shader(location, get<SPIRType>(type_id), storage);
  3910. if (location_meta.flat)
  3911. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat);
  3912. if (location_meta.noperspective)
  3913. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective);
  3914. if (location_meta.centroid)
  3915. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid);
  3916. if (location_meta.sample)
  3917. set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample);
  3918. }
  3919. // Sort the members of the structure by their locations.
  3920. MemberSorter member_sorter(ib_type, ir.meta[ib_type_id], MemberSorter::LocationThenBuiltInType);
  3921. member_sorter.sort();
  3922. // The member indices were saved to the original variables, but after the members
  3923. // were sorted, those indices are now likely incorrect. Fix those up now.
  3924. fix_up_interface_member_indices(storage, ib_type_id);
  3925. // For patch inputs, add one more member, holding the array of control point data.
  3926. if (is_tese_shader() && !msl_options.raw_buffer_tese_input && storage == StorageClassInput && patch &&
  3927. stage_in_var_id)
  3928. {
  3929. uint32_t pcp_type_id = ir.increase_bound_by(1);
  3930. auto &pcp_type = set<SPIRType>(pcp_type_id, ib_type);
  3931. pcp_type.basetype = SPIRType::ControlPointArray;
  3932. pcp_type.parent_type = pcp_type.type_alias = get_stage_in_struct_type().self;
  3933. pcp_type.storage = storage;
  3934. ir.meta[pcp_type_id] = ir.meta[ib_type.self];
  3935. uint32_t mbr_idx = uint32_t(ib_type.member_types.size());
  3936. ib_type.member_types.push_back(pcp_type_id);
  3937. set_member_name(ib_type.self, mbr_idx, "gl_in");
  3938. }
  3939. if (storage == StorageClassInput)
  3940. set_decoration(ib_var_id, DecorationNonWritable);
  3941. return ib_var_id;
  3942. }
  3943. uint32_t CompilerMSL::add_interface_block_pointer(uint32_t ib_var_id, StorageClass storage)
  3944. {
  3945. if (!ib_var_id)
  3946. return 0;
  3947. uint32_t ib_ptr_var_id;
  3948. uint32_t next_id = ir.increase_bound_by(3);
  3949. auto &ib_type = expression_type(ib_var_id);
  3950. if (is_tesc_shader() || (is_tese_shader() && msl_options.raw_buffer_tese_input))
  3951. {
  3952. // Tessellation control per-vertex I/O is presented as an array, so we must
  3953. // do the same with our struct here.
  3954. uint32_t ib_ptr_type_id = next_id++;
  3955. auto &ib_ptr_type = set<SPIRType>(ib_ptr_type_id, ib_type);
  3956. ib_ptr_type.op = OpTypePointer;
  3957. ib_ptr_type.parent_type = ib_ptr_type.type_alias = ib_type.self;
  3958. ib_ptr_type.pointer = true;
  3959. ib_ptr_type.pointer_depth++;
  3960. ib_ptr_type.storage = storage == StorageClassInput ?
  3961. ((is_tesc_shader() && msl_options.multi_patch_workgroup) ||
  3962. (is_tese_shader() && msl_options.raw_buffer_tese_input) ?
  3963. StorageClassStorageBuffer :
  3964. StorageClassWorkgroup) :
  3965. StorageClassStorageBuffer;
  3966. ir.meta[ib_ptr_type_id] = ir.meta[ib_type.self];
  3967. // To ensure that get_variable_data_type() doesn't strip off the pointer,
  3968. // which we need, use another pointer.
  3969. uint32_t ib_ptr_ptr_type_id = next_id++;
  3970. auto &ib_ptr_ptr_type = set<SPIRType>(ib_ptr_ptr_type_id, ib_ptr_type);
  3971. ib_ptr_ptr_type.parent_type = ib_ptr_type_id;
  3972. ib_ptr_ptr_type.type_alias = ib_type.self;
  3973. ib_ptr_ptr_type.storage = StorageClassFunction;
  3974. ir.meta[ib_ptr_ptr_type_id] = ir.meta[ib_type.self];
  3975. ib_ptr_var_id = next_id;
  3976. set<SPIRVariable>(ib_ptr_var_id, ib_ptr_ptr_type_id, StorageClassFunction, 0);
  3977. set_name(ib_ptr_var_id, storage == StorageClassInput ? "gl_in" : "gl_out");
  3978. if (storage == StorageClassInput)
  3979. set_decoration(ib_ptr_var_id, DecorationNonWritable);
  3980. }
  3981. else
  3982. {
  3983. // Tessellation evaluation per-vertex inputs are also presented as arrays.
  3984. // But, in Metal, this array uses a very special type, 'patch_control_point<T>',
  3985. // which is a container that can be used to access the control point data.
  3986. // To represent this, a special 'ControlPointArray' type has been added to the
  3987. // SPIRV-Cross type system. It should only be generated by and seen in the MSL
  3988. // backend (i.e. this one).
  3989. uint32_t pcp_type_id = next_id++;
  3990. auto &pcp_type = set<SPIRType>(pcp_type_id, ib_type);
  3991. pcp_type.basetype = SPIRType::ControlPointArray;
  3992. pcp_type.parent_type = pcp_type.type_alias = ib_type.self;
  3993. pcp_type.storage = storage;
  3994. ir.meta[pcp_type_id] = ir.meta[ib_type.self];
  3995. ib_ptr_var_id = next_id;
  3996. set<SPIRVariable>(ib_ptr_var_id, pcp_type_id, storage, 0);
  3997. set_name(ib_ptr_var_id, "gl_in");
  3998. ir.meta[ib_ptr_var_id].decoration.qualified_alias = join(patch_stage_in_var_name, ".gl_in");
  3999. }
  4000. return ib_ptr_var_id;
  4001. }
  4002. // Ensure that the type is compatible with the builtin.
  4003. // If it is, simply return the given type ID.
  4004. // Otherwise, create a new type, and return it's ID.
  4005. uint32_t CompilerMSL::ensure_correct_builtin_type(uint32_t type_id, BuiltIn builtin)
  4006. {
  4007. auto &type = get<SPIRType>(type_id);
  4008. auto &pointee_type = get_pointee_type(type);
  4009. if ((builtin == BuiltInSampleMask && is_array(pointee_type)) ||
  4010. ((builtin == BuiltInLayer || builtin == BuiltInViewportIndex || builtin == BuiltInFragStencilRefEXT) &&
  4011. pointee_type.basetype != SPIRType::UInt))
  4012. {
  4013. uint32_t next_id = ir.increase_bound_by(is_pointer(type) ? 2 : 1);
  4014. uint32_t base_type_id = next_id++;
  4015. auto &base_type = set<SPIRType>(base_type_id, OpTypeInt);
  4016. base_type.basetype = SPIRType::UInt;
  4017. base_type.width = 32;
  4018. if (!is_pointer(type))
  4019. return base_type_id;
  4020. uint32_t ptr_type_id = next_id++;
  4021. auto &ptr_type = set<SPIRType>(ptr_type_id, base_type);
  4022. ptr_type.op = spv::OpTypePointer;
  4023. ptr_type.pointer = true;
  4024. ptr_type.pointer_depth++;
  4025. ptr_type.storage = type.storage;
  4026. ptr_type.parent_type = base_type_id;
  4027. return ptr_type_id;
  4028. }
  4029. return type_id;
  4030. }
  4031. // Ensure that the type is compatible with the shader input.
  4032. // If it is, simply return the given type ID.
  4033. // Otherwise, create a new type, and return its ID.
  4034. uint32_t CompilerMSL::ensure_correct_input_type(uint32_t type_id, uint32_t location, uint32_t component, uint32_t num_components, bool strip_array)
  4035. {
  4036. auto &type = get<SPIRType>(type_id);
  4037. uint32_t max_array_dimensions = strip_array ? 1 : 0;
  4038. // Struct and array types must match exactly.
  4039. if (type.basetype == SPIRType::Struct || type.array.size() > max_array_dimensions)
  4040. return type_id;
  4041. auto p_va = inputs_by_location.find({location, component});
  4042. if (p_va == end(inputs_by_location))
  4043. {
  4044. if (num_components > type.vecsize)
  4045. return build_extended_vector_type(type_id, num_components);
  4046. else
  4047. return type_id;
  4048. }
  4049. if (num_components == 0)
  4050. num_components = p_va->second.vecsize;
  4051. switch (p_va->second.format)
  4052. {
  4053. case MSL_SHADER_VARIABLE_FORMAT_UINT8:
  4054. {
  4055. switch (type.basetype)
  4056. {
  4057. case SPIRType::UByte:
  4058. case SPIRType::UShort:
  4059. case SPIRType::UInt:
  4060. if (num_components > type.vecsize)
  4061. return build_extended_vector_type(type_id, num_components);
  4062. else
  4063. return type_id;
  4064. case SPIRType::Short:
  4065. return build_extended_vector_type(type_id, num_components > type.vecsize ? num_components : type.vecsize,
  4066. SPIRType::UShort);
  4067. case SPIRType::Int:
  4068. return build_extended_vector_type(type_id, num_components > type.vecsize ? num_components : type.vecsize,
  4069. SPIRType::UInt);
  4070. default:
  4071. SPIRV_CROSS_THROW("Vertex attribute type mismatch between host and shader");
  4072. }
  4073. }
  4074. case MSL_SHADER_VARIABLE_FORMAT_UINT16:
  4075. {
  4076. switch (type.basetype)
  4077. {
  4078. case SPIRType::UShort:
  4079. case SPIRType::UInt:
  4080. if (num_components > type.vecsize)
  4081. return build_extended_vector_type(type_id, num_components);
  4082. else
  4083. return type_id;
  4084. case SPIRType::Int:
  4085. return build_extended_vector_type(type_id, num_components > type.vecsize ? num_components : type.vecsize,
  4086. SPIRType::UInt);
  4087. default:
  4088. SPIRV_CROSS_THROW("Vertex attribute type mismatch between host and shader");
  4089. }
  4090. }
  4091. default:
  4092. if (num_components > type.vecsize)
  4093. type_id = build_extended_vector_type(type_id, num_components);
  4094. break;
  4095. }
  4096. return type_id;
  4097. }
  4098. void CompilerMSL::mark_struct_members_packed(const SPIRType &type)
  4099. {
  4100. // Handle possible recursion when a struct contains a pointer to its own type nested somewhere.
  4101. if (has_extended_decoration(type.self, SPIRVCrossDecorationPhysicalTypePacked))
  4102. return;
  4103. set_extended_decoration(type.self, SPIRVCrossDecorationPhysicalTypePacked);
  4104. // Problem case! Struct needs to be placed at an awkward alignment.
  4105. // Mark every member of the child struct as packed.
  4106. uint32_t mbr_cnt = uint32_t(type.member_types.size());
  4107. for (uint32_t i = 0; i < mbr_cnt; i++)
  4108. {
  4109. auto &mbr_type = get<SPIRType>(type.member_types[i]);
  4110. if (mbr_type.basetype == SPIRType::Struct)
  4111. {
  4112. // Recursively mark structs as packed.
  4113. auto *struct_type = &mbr_type;
  4114. while (!struct_type->array.empty())
  4115. struct_type = &get<SPIRType>(struct_type->parent_type);
  4116. mark_struct_members_packed(*struct_type);
  4117. }
  4118. else if (!is_scalar(mbr_type))
  4119. set_extended_member_decoration(type.self, i, SPIRVCrossDecorationPhysicalTypePacked);
  4120. }
  4121. }
  4122. void CompilerMSL::mark_scalar_layout_structs(const SPIRType &type)
  4123. {
  4124. uint32_t mbr_cnt = uint32_t(type.member_types.size());
  4125. for (uint32_t i = 0; i < mbr_cnt; i++)
  4126. {
  4127. // Handle possible recursion when a struct contains a pointer to its own type nested somewhere.
  4128. auto &mbr_type = get<SPIRType>(type.member_types[i]);
  4129. if (mbr_type.basetype == SPIRType::Struct && !(mbr_type.pointer && mbr_type.storage == StorageClassPhysicalStorageBuffer))
  4130. {
  4131. auto *struct_type = &mbr_type;
  4132. while (!struct_type->array.empty())
  4133. struct_type = &get<SPIRType>(struct_type->parent_type);
  4134. if (has_extended_decoration(struct_type->self, SPIRVCrossDecorationPhysicalTypePacked))
  4135. continue;
  4136. uint32_t msl_alignment = get_declared_struct_member_alignment_msl(type, i);
  4137. uint32_t msl_size = get_declared_struct_member_size_msl(type, i);
  4138. uint32_t spirv_offset = type_struct_member_offset(type, i);
  4139. uint32_t spirv_offset_next;
  4140. if (i + 1 < mbr_cnt)
  4141. spirv_offset_next = type_struct_member_offset(type, i + 1);
  4142. else
  4143. spirv_offset_next = spirv_offset + msl_size;
  4144. // Both are complicated cases. In scalar layout, a struct of float3 might just consume 12 bytes,
  4145. // and the next member will be placed at offset 12.
  4146. bool struct_is_misaligned = (spirv_offset % msl_alignment) != 0;
  4147. bool struct_is_too_large = spirv_offset + msl_size > spirv_offset_next;
  4148. uint32_t array_stride = 0;
  4149. bool struct_needs_explicit_padding = false;
  4150. // Verify that if a struct is used as an array that ArrayStride matches the effective size of the struct.
  4151. if (!mbr_type.array.empty())
  4152. {
  4153. array_stride = type_struct_member_array_stride(type, i);
  4154. uint32_t dimensions = uint32_t(mbr_type.array.size() - 1);
  4155. for (uint32_t dim = 0; dim < dimensions; dim++)
  4156. {
  4157. uint32_t array_size = to_array_size_literal(mbr_type, dim);
  4158. array_stride /= max<uint32_t>(array_size, 1u);
  4159. }
  4160. // Set expected struct size based on ArrayStride.
  4161. struct_needs_explicit_padding = true;
  4162. // If struct size is larger than array stride, we might be able to fit, if we tightly pack.
  4163. if (get_declared_struct_size_msl(*struct_type) > array_stride)
  4164. struct_is_too_large = true;
  4165. }
  4166. if (struct_is_misaligned || struct_is_too_large)
  4167. mark_struct_members_packed(*struct_type);
  4168. mark_scalar_layout_structs(*struct_type);
  4169. if (struct_needs_explicit_padding)
  4170. {
  4171. msl_size = get_declared_struct_size_msl(*struct_type, true, true);
  4172. if (array_stride < msl_size)
  4173. {
  4174. SPIRV_CROSS_THROW("Cannot express an array stride smaller than size of struct type.");
  4175. }
  4176. else
  4177. {
  4178. if (has_extended_decoration(struct_type->self, SPIRVCrossDecorationPaddingTarget))
  4179. {
  4180. if (array_stride !=
  4181. get_extended_decoration(struct_type->self, SPIRVCrossDecorationPaddingTarget))
  4182. SPIRV_CROSS_THROW(
  4183. "A struct is used with different array strides. Cannot express this in MSL.");
  4184. }
  4185. else
  4186. set_extended_decoration(struct_type->self, SPIRVCrossDecorationPaddingTarget, array_stride);
  4187. }
  4188. }
  4189. }
  4190. }
  4191. }
  4192. // Sort the members of the struct type by offset, and pack and then pad members where needed
  4193. // to align MSL members with SPIR-V offsets. The struct members are iterated twice. Packing
  4194. // occurs first, followed by padding, because packing a member reduces both its size and its
  4195. // natural alignment, possibly requiring a padding member to be added ahead of it.
  4196. void CompilerMSL::align_struct(SPIRType &ib_type, unordered_set<uint32_t> &aligned_structs)
  4197. {
  4198. // We align structs recursively, so stop any redundant work.
  4199. ID &ib_type_id = ib_type.self;
  4200. if (aligned_structs.count(ib_type_id))
  4201. return;
  4202. aligned_structs.insert(ib_type_id);
  4203. // Sort the members of the interface structure by their offset.
  4204. // They should already be sorted per SPIR-V spec anyway.
  4205. MemberSorter member_sorter(ib_type, ir.meta[ib_type_id], MemberSorter::Offset);
  4206. member_sorter.sort();
  4207. auto mbr_cnt = uint32_t(ib_type.member_types.size());
  4208. for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
  4209. {
  4210. // Pack any dependent struct types before we pack a parent struct.
  4211. auto &mbr_type = get<SPIRType>(ib_type.member_types[mbr_idx]);
  4212. if (mbr_type.basetype == SPIRType::Struct)
  4213. align_struct(mbr_type, aligned_structs);
  4214. }
  4215. // Test the alignment of each member, and if a member should be closer to the previous
  4216. // member than the default spacing expects, it is likely that the previous member is in
  4217. // a packed format. If so, and the previous member is packable, pack it.
  4218. // For example ... this applies to any 3-element vector that is followed by a scalar.
  4219. uint32_t msl_offset = 0;
  4220. for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
  4221. {
  4222. // This checks the member in isolation, if the member needs some kind of type remapping to conform to SPIR-V
  4223. // offsets, array strides and matrix strides.
  4224. ensure_member_packing_rules_msl(ib_type, mbr_idx);
  4225. // Align current offset to the current member's default alignment. If the member was packed, it will observe
  4226. // the updated alignment here.
  4227. uint32_t msl_align_mask = get_declared_struct_member_alignment_msl(ib_type, mbr_idx) - 1;
  4228. uint32_t aligned_msl_offset = (msl_offset + msl_align_mask) & ~msl_align_mask;
  4229. // Fetch the member offset as declared in the SPIRV.
  4230. uint32_t spirv_mbr_offset = get_member_decoration(ib_type_id, mbr_idx, DecorationOffset);
  4231. if (spirv_mbr_offset > aligned_msl_offset)
  4232. {
  4233. // Since MSL and SPIR-V have slightly different struct member alignment and
  4234. // size rules, we'll pad to standard C-packing rules with a char[] array. If the member is farther
  4235. // away than C-packing, expects, add an inert padding member before the the member.
  4236. uint32_t padding_bytes = spirv_mbr_offset - aligned_msl_offset;
  4237. set_extended_member_decoration(ib_type_id, mbr_idx, SPIRVCrossDecorationPaddingTarget, padding_bytes);
  4238. // Re-align as a sanity check that aligning post-padding matches up.
  4239. msl_offset += padding_bytes;
  4240. aligned_msl_offset = (msl_offset + msl_align_mask) & ~msl_align_mask;
  4241. }
  4242. else if (spirv_mbr_offset < aligned_msl_offset)
  4243. {
  4244. // This should not happen, but deal with unexpected scenarios.
  4245. // It *might* happen if a sub-struct has a larger alignment requirement in MSL than SPIR-V.
  4246. SPIRV_CROSS_THROW("Cannot represent buffer block correctly in MSL.");
  4247. }
  4248. assert(aligned_msl_offset == spirv_mbr_offset);
  4249. // Increment the current offset to be positioned immediately after the current member.
  4250. // Don't do this for the last member since it can be unsized, and it is not relevant for padding purposes here.
  4251. if (mbr_idx + 1 < mbr_cnt)
  4252. msl_offset = aligned_msl_offset + get_declared_struct_member_size_msl(ib_type, mbr_idx);
  4253. }
  4254. }
  4255. bool CompilerMSL::validate_member_packing_rules_msl(const SPIRType &type, uint32_t index) const
  4256. {
  4257. auto &mbr_type = get<SPIRType>(type.member_types[index]);
  4258. uint32_t spirv_offset = get_member_decoration(type.self, index, DecorationOffset);
  4259. if (index + 1 < type.member_types.size())
  4260. {
  4261. // First, we will check offsets. If SPIR-V offset + MSL size > SPIR-V offset of next member,
  4262. // we *must* perform some kind of remapping, no way getting around it.
  4263. // We can always pad after this member if necessary, so that case is fine.
  4264. uint32_t spirv_offset_next = get_member_decoration(type.self, index + 1, DecorationOffset);
  4265. assert(spirv_offset_next >= spirv_offset);
  4266. uint32_t maximum_size = spirv_offset_next - spirv_offset;
  4267. uint32_t msl_mbr_size = get_declared_struct_member_size_msl(type, index);
  4268. if (msl_mbr_size > maximum_size)
  4269. return false;
  4270. }
  4271. if (is_array(mbr_type))
  4272. {
  4273. // If we have an array type, array stride must match exactly with SPIR-V.
  4274. // An exception to this requirement is if we have one array element.
  4275. // This comes from DX scalar layout workaround.
  4276. // If app tries to be cheeky and access the member out of bounds, this will not work, but this is the best we can do.
  4277. // In OpAccessChain with logical memory models, access chains must be in-bounds in SPIR-V specification.
  4278. bool relax_array_stride = mbr_type.array.back() == 1 && mbr_type.array_size_literal.back();
  4279. if (!relax_array_stride)
  4280. {
  4281. uint32_t spirv_array_stride = type_struct_member_array_stride(type, index);
  4282. uint32_t msl_array_stride = get_declared_struct_member_array_stride_msl(type, index);
  4283. if (spirv_array_stride != msl_array_stride)
  4284. return false;
  4285. }
  4286. }
  4287. if (is_matrix(mbr_type))
  4288. {
  4289. // Need to check MatrixStride as well.
  4290. uint32_t spirv_matrix_stride = type_struct_member_matrix_stride(type, index);
  4291. uint32_t msl_matrix_stride = get_declared_struct_member_matrix_stride_msl(type, index);
  4292. if (spirv_matrix_stride != msl_matrix_stride)
  4293. return false;
  4294. }
  4295. // Now, we check alignment.
  4296. uint32_t msl_alignment = get_declared_struct_member_alignment_msl(type, index);
  4297. if ((spirv_offset % msl_alignment) != 0)
  4298. return false;
  4299. // We're in the clear.
  4300. return true;
  4301. }
  4302. // Here we need to verify that the member type we declare conforms to Offset, ArrayStride or MatrixStride restrictions.
  4303. // If there is a mismatch, we need to emit remapped types, either normal types, or "packed_X" types.
  4304. // In odd cases we need to emit packed and remapped types, for e.g. weird matrices or arrays with weird array strides.
  4305. void CompilerMSL::ensure_member_packing_rules_msl(SPIRType &ib_type, uint32_t index)
  4306. {
  4307. if (validate_member_packing_rules_msl(ib_type, index))
  4308. return;
  4309. // We failed validation.
  4310. // This case will be nightmare-ish to deal with. This could possibly happen if struct alignment does not quite
  4311. // match up with what we want. Scalar block layout comes to mind here where we might have to work around the rule
  4312. // that struct alignment == max alignment of all members and struct size depends on this alignment.
  4313. // Can't repack structs, but can repack pointers to structs.
  4314. auto &mbr_type = get<SPIRType>(ib_type.member_types[index]);
  4315. bool is_buff_ptr = mbr_type.pointer && mbr_type.storage == StorageClassPhysicalStorageBuffer;
  4316. if (mbr_type.basetype == SPIRType::Struct && !is_buff_ptr)
  4317. SPIRV_CROSS_THROW("Cannot perform any repacking for structs when it is used as a member of another struct.");
  4318. // Perform remapping here.
  4319. // There is nothing to be gained by using packed scalars, so don't attempt it.
  4320. if (!is_scalar(ib_type))
  4321. set_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypePacked);
  4322. // Try validating again, now with packed.
  4323. if (validate_member_packing_rules_msl(ib_type, index))
  4324. return;
  4325. // We're in deep trouble, and we need to create a new PhysicalType which matches up with what we expect.
  4326. // A lot of work goes here ...
  4327. // We will need remapping on Load and Store to translate the types between Logical and Physical.
  4328. // First, we check if we have small vector std140 array.
  4329. // We detect this if we have an array of vectors, and array stride is greater than number of elements.
  4330. if (!mbr_type.array.empty() && !is_matrix(mbr_type))
  4331. {
  4332. uint32_t array_stride = type_struct_member_array_stride(ib_type, index);
  4333. // Hack off array-of-arrays until we find the array stride per element we must have to make it work.
  4334. uint32_t dimensions = uint32_t(mbr_type.array.size() - 1);
  4335. for (uint32_t dim = 0; dim < dimensions; dim++)
  4336. array_stride /= max<uint32_t>(to_array_size_literal(mbr_type, dim), 1u);
  4337. // Pointers are 8 bytes
  4338. uint32_t mbr_width_in_bytes = is_buff_ptr ? 8 : (mbr_type.width / 8);
  4339. uint32_t elems_per_stride = array_stride / mbr_width_in_bytes;
  4340. if (elems_per_stride == 3)
  4341. SPIRV_CROSS_THROW("Cannot use ArrayStride of 3 elements in remapping scenarios.");
  4342. else if (elems_per_stride > 4 && elems_per_stride != 8)
  4343. SPIRV_CROSS_THROW("Cannot represent vectors with more than 4 elements in MSL.");
  4344. if (elems_per_stride == 8)
  4345. {
  4346. if (mbr_type.width == 16)
  4347. add_spv_func_and_recompile(SPVFuncImplPaddedStd140);
  4348. else
  4349. SPIRV_CROSS_THROW("Unexpected type in std140 wide array resolve.");
  4350. }
  4351. auto physical_type = mbr_type;
  4352. physical_type.vecsize = elems_per_stride;
  4353. physical_type.parent_type = 0;
  4354. // If this is a physical buffer pointer, replace type with a ulongn vector.
  4355. if (is_buff_ptr)
  4356. {
  4357. physical_type.width = 64;
  4358. physical_type.basetype = to_unsigned_basetype(physical_type.width);
  4359. physical_type.pointer = false;
  4360. physical_type.pointer_depth = false;
  4361. physical_type.forward_pointer = false;
  4362. }
  4363. uint32_t type_id = ir.increase_bound_by(1);
  4364. set<SPIRType>(type_id, physical_type);
  4365. set_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypeID, type_id);
  4366. set_decoration(type_id, DecorationArrayStride, array_stride);
  4367. // Remove packed_ for vectors of size 1, 2 and 4.
  4368. unset_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypePacked);
  4369. }
  4370. else if (is_matrix(mbr_type))
  4371. {
  4372. // MatrixStride might be std140-esque.
  4373. uint32_t matrix_stride = type_struct_member_matrix_stride(ib_type, index);
  4374. uint32_t elems_per_stride = matrix_stride / (mbr_type.width / 8);
  4375. if (elems_per_stride == 3)
  4376. SPIRV_CROSS_THROW("Cannot use ArrayStride of 3 elements in remapping scenarios.");
  4377. else if (elems_per_stride > 4 && elems_per_stride != 8)
  4378. SPIRV_CROSS_THROW("Cannot represent vectors with more than 4 elements in MSL.");
  4379. if (elems_per_stride == 8)
  4380. {
  4381. if (mbr_type.basetype != SPIRType::Half)
  4382. SPIRV_CROSS_THROW("Unexpected type in std140 wide matrix stride resolve.");
  4383. add_spv_func_and_recompile(SPVFuncImplPaddedStd140);
  4384. }
  4385. bool row_major = has_member_decoration(ib_type.self, index, DecorationRowMajor);
  4386. auto physical_type = mbr_type;
  4387. physical_type.parent_type = 0;
  4388. if (row_major)
  4389. physical_type.columns = elems_per_stride;
  4390. else
  4391. physical_type.vecsize = elems_per_stride;
  4392. uint32_t type_id = ir.increase_bound_by(1);
  4393. set<SPIRType>(type_id, physical_type);
  4394. set_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypeID, type_id);
  4395. // Remove packed_ for vectors of size 1, 2 and 4.
  4396. unset_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypePacked);
  4397. }
  4398. else
  4399. SPIRV_CROSS_THROW("Found a buffer packing case which we cannot represent in MSL.");
  4400. // Try validating again, now with physical type remapping.
  4401. if (validate_member_packing_rules_msl(ib_type, index))
  4402. return;
  4403. // We might have a particular odd scalar layout case where the last element of an array
  4404. // does not take up as much space as the ArrayStride or MatrixStride. This can happen with DX cbuffers.
  4405. // The "proper" workaround for this is extremely painful and essentially impossible in the edge case of float3[],
  4406. // so we hack around it by declaring the offending array or matrix with one less array size/col/row,
  4407. // and rely on padding to get the correct value. We will technically access arrays out of bounds into the padding region,
  4408. // but it should spill over gracefully without too much trouble. We rely on behavior like this for unsized arrays anyways.
  4409. // E.g. we might observe a physical layout of:
  4410. // { float2 a[2]; float b; } in cbuffer layout where ArrayStride of a is 16, but offset of b is 24, packed right after a[1] ...
  4411. uint32_t type_id = get_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypeID);
  4412. auto &type = get<SPIRType>(type_id);
  4413. // Modify the physical type in-place. This is safe since each physical type workaround is a copy.
  4414. if (is_array(type))
  4415. {
  4416. if (type.array.back() > 1)
  4417. {
  4418. if (!type.array_size_literal.back())
  4419. SPIRV_CROSS_THROW("Cannot apply scalar layout workaround with spec constant array size.");
  4420. type.array.back() -= 1;
  4421. }
  4422. else
  4423. {
  4424. // We have an array of size 1, so we cannot decrement that. Our only option now is to
  4425. // force a packed layout instead, and drop the physical type remap since ArrayStride is meaningless now.
  4426. unset_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypeID);
  4427. set_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypePacked);
  4428. }
  4429. }
  4430. else if (is_matrix(type))
  4431. {
  4432. bool row_major = has_member_decoration(ib_type.self, index, DecorationRowMajor);
  4433. if (!row_major)
  4434. {
  4435. // Slice off one column. If we only have 2 columns, this might turn the matrix into a vector with one array element instead.
  4436. if (type.columns > 2)
  4437. {
  4438. type.columns--;
  4439. }
  4440. else if (type.columns == 2)
  4441. {
  4442. type.columns = 1;
  4443. assert(type.array.empty());
  4444. type.op = OpTypeArray;
  4445. type.array.push_back(1);
  4446. type.array_size_literal.push_back(true);
  4447. }
  4448. }
  4449. else
  4450. {
  4451. // Slice off one row. If we only have 2 rows, this might turn the matrix into a vector with one array element instead.
  4452. if (type.vecsize > 2)
  4453. {
  4454. type.vecsize--;
  4455. }
  4456. else if (type.vecsize == 2)
  4457. {
  4458. type.vecsize = type.columns;
  4459. type.columns = 1;
  4460. assert(type.array.empty());
  4461. type.op = OpTypeArray;
  4462. type.array.push_back(1);
  4463. type.array_size_literal.push_back(true);
  4464. }
  4465. }
  4466. }
  4467. // This better validate now, or we must fail gracefully.
  4468. if (!validate_member_packing_rules_msl(ib_type, index))
  4469. SPIRV_CROSS_THROW("Found a buffer packing case which we cannot represent in MSL.");
  4470. }
  4471. void CompilerMSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression)
  4472. {
  4473. auto &type = expression_type(rhs_expression);
  4474. bool lhs_remapped_type = has_extended_decoration(lhs_expression, SPIRVCrossDecorationPhysicalTypeID);
  4475. bool lhs_packed_type = has_extended_decoration(lhs_expression, SPIRVCrossDecorationPhysicalTypePacked);
  4476. auto *lhs_e = maybe_get<SPIRExpression>(lhs_expression);
  4477. auto *rhs_e = maybe_get<SPIRExpression>(rhs_expression);
  4478. bool transpose = lhs_e && lhs_e->need_transpose;
  4479. if (has_decoration(lhs_expression, DecorationBuiltIn) &&
  4480. BuiltIn(get_decoration(lhs_expression, DecorationBuiltIn)) == BuiltInSampleMask &&
  4481. is_array(type))
  4482. {
  4483. // Storing an array to SampleMask, have to remove the array-ness before storing.
  4484. statement(to_expression(lhs_expression), " = ", to_enclosed_unpacked_expression(rhs_expression), "[0];");
  4485. register_write(lhs_expression);
  4486. }
  4487. else if (!lhs_remapped_type && !lhs_packed_type)
  4488. {
  4489. // No physical type remapping, and no packed type, so can just emit a store directly.
  4490. // We might not be dealing with remapped physical types or packed types,
  4491. // but we might be doing a clean store to a row-major matrix.
  4492. // In this case, we just flip transpose states, and emit the store, a transpose must be in the RHS expression, if any.
  4493. if (is_matrix(type) && lhs_e && lhs_e->need_transpose)
  4494. {
  4495. lhs_e->need_transpose = false;
  4496. if (rhs_e && rhs_e->need_transpose)
  4497. {
  4498. // Direct copy, but might need to unpack RHS.
  4499. // Skip the transpose, as we will transpose when writing to LHS and transpose(transpose(T)) == T.
  4500. rhs_e->need_transpose = false;
  4501. statement(to_expression(lhs_expression), " = ", to_unpacked_row_major_matrix_expression(rhs_expression),
  4502. ";");
  4503. rhs_e->need_transpose = true;
  4504. }
  4505. else
  4506. statement(to_expression(lhs_expression), " = transpose(", to_unpacked_expression(rhs_expression), ");");
  4507. lhs_e->need_transpose = true;
  4508. register_write(lhs_expression);
  4509. }
  4510. else if (lhs_e && lhs_e->need_transpose)
  4511. {
  4512. lhs_e->need_transpose = false;
  4513. // Storing a column to a row-major matrix. Unroll the write.
  4514. for (uint32_t c = 0; c < type.vecsize; c++)
  4515. {
  4516. auto lhs_expr = to_dereferenced_expression(lhs_expression);
  4517. auto column_index = lhs_expr.find_last_of('[');
  4518. if (column_index != string::npos)
  4519. {
  4520. statement(lhs_expr.insert(column_index, join('[', c, ']')), " = ",
  4521. to_extract_component_expression(rhs_expression, c), ";");
  4522. }
  4523. }
  4524. lhs_e->need_transpose = true;
  4525. register_write(lhs_expression);
  4526. }
  4527. else
  4528. CompilerGLSL::emit_store_statement(lhs_expression, rhs_expression);
  4529. }
  4530. else if (!lhs_remapped_type && !is_matrix(type) && !transpose)
  4531. {
  4532. // Even if the target type is packed, we can directly store to it. We cannot store to packed matrices directly,
  4533. // since they are declared as array of vectors instead, and we need the fallback path below.
  4534. CompilerGLSL::emit_store_statement(lhs_expression, rhs_expression);
  4535. }
  4536. else
  4537. {
  4538. // Special handling when storing to a remapped physical type.
  4539. // This is mostly to deal with std140 padded matrices or vectors.
  4540. TypeID physical_type_id = lhs_remapped_type ?
  4541. ID(get_extended_decoration(lhs_expression, SPIRVCrossDecorationPhysicalTypeID)) :
  4542. type.self;
  4543. auto &physical_type = get<SPIRType>(physical_type_id);
  4544. string cast_addr_space = "thread";
  4545. auto *p_var_lhs = maybe_get_backing_variable(lhs_expression);
  4546. if (p_var_lhs)
  4547. cast_addr_space = get_type_address_space(get<SPIRType>(p_var_lhs->basetype), lhs_expression);
  4548. if (is_matrix(type))
  4549. {
  4550. const char *packed_pfx = lhs_packed_type ? "packed_" : "";
  4551. // Packed matrices are stored as arrays of packed vectors, so we need
  4552. // to assign the vectors one at a time.
  4553. // For row-major matrices, we need to transpose the *right-hand* side,
  4554. // not the left-hand side.
  4555. // Lots of cases to cover here ...
  4556. bool rhs_transpose = rhs_e && rhs_e->need_transpose;
  4557. SPIRType write_type = type;
  4558. string cast_expr;
  4559. // We're dealing with transpose manually.
  4560. if (rhs_transpose)
  4561. rhs_e->need_transpose = false;
  4562. if (transpose)
  4563. {
  4564. // We're dealing with transpose manually.
  4565. lhs_e->need_transpose = false;
  4566. write_type.vecsize = type.columns;
  4567. write_type.columns = 1;
  4568. if (physical_type.columns != type.columns)
  4569. cast_expr = join("(", cast_addr_space, " ", packed_pfx, type_to_glsl(write_type), "&)");
  4570. if (rhs_transpose)
  4571. {
  4572. // If RHS is also transposed, we can just copy row by row.
  4573. for (uint32_t i = 0; i < type.vecsize; i++)
  4574. {
  4575. statement(cast_expr, to_enclosed_expression(lhs_expression), "[", i, "]", " = ",
  4576. to_unpacked_row_major_matrix_expression(rhs_expression), "[", i, "];");
  4577. }
  4578. }
  4579. else
  4580. {
  4581. auto vector_type = expression_type(rhs_expression);
  4582. vector_type.vecsize = vector_type.columns;
  4583. vector_type.columns = 1;
  4584. // Transpose on the fly. Emitting a lot of full transpose() ops and extracting lanes seems very bad,
  4585. // so pick out individual components instead.
  4586. for (uint32_t i = 0; i < type.vecsize; i++)
  4587. {
  4588. string rhs_row = type_to_glsl_constructor(vector_type) + "(";
  4589. for (uint32_t j = 0; j < vector_type.vecsize; j++)
  4590. {
  4591. rhs_row += join(to_enclosed_unpacked_expression(rhs_expression), "[", j, "][", i, "]");
  4592. if (j + 1 < vector_type.vecsize)
  4593. rhs_row += ", ";
  4594. }
  4595. rhs_row += ")";
  4596. statement(cast_expr, to_enclosed_expression(lhs_expression), "[", i, "]", " = ", rhs_row, ";");
  4597. }
  4598. }
  4599. // We're dealing with transpose manually.
  4600. lhs_e->need_transpose = true;
  4601. }
  4602. else
  4603. {
  4604. write_type.columns = 1;
  4605. if (physical_type.vecsize != type.vecsize)
  4606. cast_expr = join("(", cast_addr_space, " ", packed_pfx, type_to_glsl(write_type), "&)");
  4607. if (rhs_transpose)
  4608. {
  4609. auto vector_type = expression_type(rhs_expression);
  4610. vector_type.columns = 1;
  4611. // Transpose on the fly. Emitting a lot of full transpose() ops and extracting lanes seems very bad,
  4612. // so pick out individual components instead.
  4613. for (uint32_t i = 0; i < type.columns; i++)
  4614. {
  4615. string rhs_row = type_to_glsl_constructor(vector_type) + "(";
  4616. for (uint32_t j = 0; j < vector_type.vecsize; j++)
  4617. {
  4618. // Need to explicitly unpack expression since we've mucked with transpose state.
  4619. auto unpacked_expr = to_unpacked_row_major_matrix_expression(rhs_expression);
  4620. rhs_row += join(unpacked_expr, "[", j, "][", i, "]");
  4621. if (j + 1 < vector_type.vecsize)
  4622. rhs_row += ", ";
  4623. }
  4624. rhs_row += ")";
  4625. statement(cast_expr, to_enclosed_expression(lhs_expression), "[", i, "]", " = ", rhs_row, ";");
  4626. }
  4627. }
  4628. else
  4629. {
  4630. // Copy column-by-column.
  4631. for (uint32_t i = 0; i < type.columns; i++)
  4632. {
  4633. statement(cast_expr, to_enclosed_expression(lhs_expression), "[", i, "]", " = ",
  4634. to_enclosed_unpacked_expression(rhs_expression), "[", i, "];");
  4635. }
  4636. }
  4637. }
  4638. // We're dealing with transpose manually.
  4639. if (rhs_transpose)
  4640. rhs_e->need_transpose = true;
  4641. }
  4642. else if (transpose)
  4643. {
  4644. lhs_e->need_transpose = false;
  4645. SPIRType write_type = type;
  4646. write_type.vecsize = 1;
  4647. write_type.columns = 1;
  4648. // Storing a column to a row-major matrix. Unroll the write.
  4649. for (uint32_t c = 0; c < type.vecsize; c++)
  4650. {
  4651. auto lhs_expr = to_enclosed_expression(lhs_expression);
  4652. auto column_index = lhs_expr.find_last_of('[');
  4653. // Get rid of any ".data" half8 handling here, we're casting to scalar anyway.
  4654. auto end_column_index = lhs_expr.find_last_of(']');
  4655. auto end_dot_index = lhs_expr.find_last_of('.');
  4656. if (end_dot_index != string::npos && end_dot_index > end_column_index)
  4657. lhs_expr.resize(end_dot_index);
  4658. if (column_index != string::npos)
  4659. {
  4660. statement("((", cast_addr_space, " ", type_to_glsl(write_type), "*)&",
  4661. lhs_expr.insert(column_index, join('[', c, ']', ")")), " = ",
  4662. to_extract_component_expression(rhs_expression, c), ";");
  4663. }
  4664. }
  4665. lhs_e->need_transpose = true;
  4666. }
  4667. else if ((is_matrix(physical_type) || is_array(physical_type)) &&
  4668. physical_type.vecsize <= 4 &&
  4669. physical_type.vecsize > type.vecsize)
  4670. {
  4671. assert(type.vecsize >= 1 && type.vecsize <= 3);
  4672. // If we have packed types, we cannot use swizzled stores.
  4673. // We could technically unroll the store for each element if needed.
  4674. // When remapping to a std140 physical type, we always get float4,
  4675. // and the packed decoration should always be removed.
  4676. assert(!lhs_packed_type);
  4677. string lhs = to_dereferenced_expression(lhs_expression);
  4678. string rhs = to_pointer_expression(rhs_expression);
  4679. // Unpack the expression so we can store to it with a float or float2.
  4680. // It's still an l-value, so it's fine. Most other unpacking of expressions turn them into r-values instead.
  4681. lhs = join("(", cast_addr_space, " ", type_to_glsl(type), "&)", enclose_expression(lhs));
  4682. if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs))
  4683. statement(lhs, " = ", rhs, ";");
  4684. }
  4685. else if (!is_matrix(type))
  4686. {
  4687. string lhs = to_dereferenced_expression(lhs_expression);
  4688. string rhs = to_pointer_expression(rhs_expression);
  4689. if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs))
  4690. statement(lhs, " = ", rhs, ";");
  4691. }
  4692. register_write(lhs_expression);
  4693. }
  4694. }
  4695. static bool expression_ends_with(const string &expr_str, const std::string &ending)
  4696. {
  4697. if (expr_str.length() >= ending.length())
  4698. return (expr_str.compare(expr_str.length() - ending.length(), ending.length(), ending) == 0);
  4699. else
  4700. return false;
  4701. }
  4702. // Converts the format of the current expression from packed to unpacked,
  4703. // by wrapping the expression in a constructor of the appropriate type.
  4704. // Also, handle special physical ID remapping scenarios, similar to emit_store_statement().
  4705. string CompilerMSL::unpack_expression_type(string expr_str, const SPIRType &type, uint32_t physical_type_id,
  4706. bool packed, bool row_major)
  4707. {
  4708. // Trivial case, nothing to do.
  4709. if (physical_type_id == 0 && !packed)
  4710. return expr_str;
  4711. const SPIRType *physical_type = nullptr;
  4712. if (physical_type_id)
  4713. physical_type = &get<SPIRType>(physical_type_id);
  4714. static const char *swizzle_lut[] = {
  4715. ".x",
  4716. ".xy",
  4717. ".xyz",
  4718. "",
  4719. };
  4720. // TODO: Move everything to the template wrapper?
  4721. bool uses_std140_wrapper = physical_type && physical_type->vecsize > 4;
  4722. if (physical_type && is_vector(*physical_type) && is_array(*physical_type) &&
  4723. !uses_std140_wrapper &&
  4724. physical_type->vecsize > type.vecsize && !expression_ends_with(expr_str, swizzle_lut[type.vecsize - 1]))
  4725. {
  4726. // std140 array cases for vectors.
  4727. assert(type.vecsize >= 1 && type.vecsize <= 3);
  4728. return enclose_expression(expr_str) + swizzle_lut[type.vecsize - 1];
  4729. }
  4730. else if (physical_type && is_matrix(*physical_type) && is_vector(type) &&
  4731. !uses_std140_wrapper &&
  4732. physical_type->vecsize > type.vecsize)
  4733. {
  4734. // Extract column from padded matrix.
  4735. assert(type.vecsize >= 1 && type.vecsize <= 4);
  4736. return enclose_expression(expr_str) + swizzle_lut[type.vecsize - 1];
  4737. }
  4738. else if (is_matrix(type))
  4739. {
  4740. // Packed matrices are stored as arrays of packed vectors. Unfortunately,
  4741. // we can't just pass the array straight to the matrix constructor. We have to
  4742. // pass each vector individually, so that they can be unpacked to normal vectors.
  4743. if (!physical_type)
  4744. physical_type = &type;
  4745. uint32_t vecsize = type.vecsize;
  4746. uint32_t columns = type.columns;
  4747. if (row_major)
  4748. swap(vecsize, columns);
  4749. uint32_t physical_vecsize = row_major ? physical_type->columns : physical_type->vecsize;
  4750. const char *base_type = type.width == 16 ? "half" : "float";
  4751. string unpack_expr = join(base_type, columns, "x", vecsize, "(");
  4752. const char *load_swiz = "";
  4753. const char *data_swiz = physical_vecsize > 4 ? ".data" : "";
  4754. if (physical_vecsize != vecsize)
  4755. load_swiz = swizzle_lut[vecsize - 1];
  4756. for (uint32_t i = 0; i < columns; i++)
  4757. {
  4758. if (i > 0)
  4759. unpack_expr += ", ";
  4760. if (packed)
  4761. unpack_expr += join(base_type, physical_vecsize, "(", expr_str, "[", i, "]", ")", load_swiz);
  4762. else
  4763. unpack_expr += join(expr_str, "[", i, "]", data_swiz, load_swiz);
  4764. }
  4765. unpack_expr += ")";
  4766. return unpack_expr;
  4767. }
  4768. else
  4769. {
  4770. return join(type_to_glsl(type), "(", expr_str, ")");
  4771. }
  4772. }
  4773. // Emits the file header info
  4774. void CompilerMSL::emit_header()
  4775. {
  4776. // This particular line can be overridden during compilation, so make it a flag and not a pragma line.
  4777. if (suppress_missing_prototypes)
  4778. statement("#pragma clang diagnostic ignored \"-Wmissing-prototypes\"");
  4779. if (suppress_incompatible_pointer_types_discard_qualifiers)
  4780. statement("#pragma clang diagnostic ignored \"-Wincompatible-pointer-types-discards-qualifiers\"");
  4781. // Disable warning about missing braces for array<T> template to make arrays a value type
  4782. if (spv_function_implementations.count(SPVFuncImplUnsafeArray) != 0)
  4783. statement("#pragma clang diagnostic ignored \"-Wmissing-braces\"");
  4784. for (auto &pragma : pragma_lines)
  4785. statement(pragma);
  4786. if (!pragma_lines.empty() || suppress_missing_prototypes)
  4787. statement("");
  4788. statement("#include <metal_stdlib>");
  4789. statement("#include <simd/simd.h>");
  4790. for (auto &header : header_lines)
  4791. statement(header);
  4792. statement("");
  4793. statement("using namespace metal;");
  4794. statement("");
  4795. for (auto &td : typedef_lines)
  4796. statement(td);
  4797. if (!typedef_lines.empty())
  4798. statement("");
  4799. }
  4800. void CompilerMSL::add_pragma_line(const string &line)
  4801. {
  4802. auto rslt = pragma_lines.insert(line);
  4803. if (rslt.second)
  4804. force_recompile();
  4805. }
  4806. void CompilerMSL::add_typedef_line(const string &line)
  4807. {
  4808. auto rslt = typedef_lines.insert(line);
  4809. if (rslt.second)
  4810. force_recompile();
  4811. }
  4812. // Template struct like spvUnsafeArray<> need to be declared *before* any resources are declared
  4813. void CompilerMSL::emit_custom_templates()
  4814. {
  4815. static const char * const address_spaces[] = {
  4816. "thread", "constant", "device", "threadgroup", "threadgroup_imageblock", "ray_data", "object_data"
  4817. };
  4818. for (const auto &spv_func : spv_function_implementations)
  4819. {
  4820. switch (spv_func)
  4821. {
  4822. case SPVFuncImplUnsafeArray:
  4823. statement("template<typename T, size_t Num>");
  4824. statement("struct spvUnsafeArray");
  4825. begin_scope();
  4826. statement("T elements[Num ? Num : 1];");
  4827. statement("");
  4828. statement("thread T& operator [] (size_t pos) thread");
  4829. begin_scope();
  4830. statement("return elements[pos];");
  4831. end_scope();
  4832. statement("constexpr const thread T& operator [] (size_t pos) const thread");
  4833. begin_scope();
  4834. statement("return elements[pos];");
  4835. end_scope();
  4836. statement("");
  4837. statement("device T& operator [] (size_t pos) device");
  4838. begin_scope();
  4839. statement("return elements[pos];");
  4840. end_scope();
  4841. statement("constexpr const device T& operator [] (size_t pos) const device");
  4842. begin_scope();
  4843. statement("return elements[pos];");
  4844. end_scope();
  4845. statement("");
  4846. statement("constexpr const constant T& operator [] (size_t pos) const constant");
  4847. begin_scope();
  4848. statement("return elements[pos];");
  4849. end_scope();
  4850. statement("");
  4851. statement("threadgroup T& operator [] (size_t pos) threadgroup");
  4852. begin_scope();
  4853. statement("return elements[pos];");
  4854. end_scope();
  4855. statement("constexpr const threadgroup T& operator [] (size_t pos) const threadgroup");
  4856. begin_scope();
  4857. statement("return elements[pos];");
  4858. end_scope();
  4859. end_scope_decl();
  4860. statement("");
  4861. break;
  4862. case SPVFuncImplStorageMatrix:
  4863. statement("template<typename T, int Cols, int Rows=Cols>");
  4864. statement("struct spvStorageMatrix");
  4865. begin_scope();
  4866. statement("vec<T, Rows> columns[Cols];");
  4867. statement("");
  4868. for (size_t method_idx = 0; method_idx < sizeof(address_spaces) / sizeof(address_spaces[0]); ++method_idx)
  4869. {
  4870. // Some address spaces require particular features.
  4871. if (method_idx == 4) // threadgroup_imageblock
  4872. statement("#ifdef __HAVE_IMAGEBLOCKS__");
  4873. else if (method_idx == 5) // ray_data
  4874. statement("#ifdef __HAVE_RAYTRACING__");
  4875. else if (method_idx == 6) // object_data
  4876. statement("#ifdef __HAVE_MESH__");
  4877. const string &method_as = address_spaces[method_idx];
  4878. statement("spvStorageMatrix() ", method_as, " = default;");
  4879. if (method_idx != 1) // constant
  4880. {
  4881. statement(method_as, " spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) ",
  4882. method_as);
  4883. begin_scope();
  4884. statement("size_t i;");
  4885. statement("thread vec<T, Rows>* col;");
  4886. statement("for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)");
  4887. statement(" columns[i] = *col;");
  4888. statement("return *this;");
  4889. end_scope();
  4890. }
  4891. statement("");
  4892. for (size_t param_idx = 0; param_idx < sizeof(address_spaces) / sizeof(address_spaces[0]); ++param_idx)
  4893. {
  4894. if (param_idx != method_idx)
  4895. {
  4896. if (param_idx == 4) // threadgroup_imageblock
  4897. statement("#ifdef __HAVE_IMAGEBLOCKS__");
  4898. else if (param_idx == 5) // ray_data
  4899. statement("#ifdef __HAVE_RAYTRACING__");
  4900. else if (param_idx == 6) // object_data
  4901. statement("#ifdef __HAVE_MESH__");
  4902. }
  4903. const string &param_as = address_spaces[param_idx];
  4904. statement("spvStorageMatrix(const ", param_as, " matrix<T, Cols, Rows>& m) ", method_as);
  4905. begin_scope();
  4906. statement("for (size_t i = 0; i < Cols; ++i)");
  4907. statement(" columns[i] = m.columns[i];");
  4908. end_scope();
  4909. statement("spvStorageMatrix(const ", param_as, " spvStorageMatrix& m) ", method_as, " = default;");
  4910. if (method_idx != 1) // constant
  4911. {
  4912. statement(method_as, " spvStorageMatrix& operator=(const ", param_as,
  4913. " matrix<T, Cols, Rows>& m) ", method_as);
  4914. begin_scope();
  4915. statement("for (size_t i = 0; i < Cols; ++i)");
  4916. statement(" columns[i] = m.columns[i];");
  4917. statement("return *this;");
  4918. end_scope();
  4919. statement(method_as, " spvStorageMatrix& operator=(const ", param_as, " spvStorageMatrix& m) ",
  4920. method_as, " = default;");
  4921. }
  4922. if (param_idx != method_idx && param_idx >= 4)
  4923. statement("#endif");
  4924. statement("");
  4925. }
  4926. statement("operator matrix<T, Cols, Rows>() const ", method_as);
  4927. begin_scope();
  4928. statement("matrix<T, Cols, Rows> m;");
  4929. statement("for (int i = 0; i < Cols; ++i)");
  4930. statement(" m.columns[i] = columns[i];");
  4931. statement("return m;");
  4932. end_scope();
  4933. statement("");
  4934. statement("vec<T, Rows> operator[](size_t idx) const ", method_as);
  4935. begin_scope();
  4936. statement("return columns[idx];");
  4937. end_scope();
  4938. if (method_idx != 1) // constant
  4939. {
  4940. statement(method_as, " vec<T, Rows>& operator[](size_t idx) ", method_as);
  4941. begin_scope();
  4942. statement("return columns[idx];");
  4943. end_scope();
  4944. }
  4945. if (method_idx >= 4)
  4946. statement("#endif");
  4947. statement("");
  4948. }
  4949. end_scope_decl();
  4950. statement("");
  4951. statement("template<typename T, int Cols, int Rows>");
  4952. statement("matrix<T, Rows, Cols> transpose(spvStorageMatrix<T, Cols, Rows> m)");
  4953. begin_scope();
  4954. statement("return transpose(matrix<T, Cols, Rows>(m));");
  4955. end_scope();
  4956. statement("");
  4957. statement("typedef spvStorageMatrix<half, 2, 2> spvStorage_half2x2;");
  4958. statement("typedef spvStorageMatrix<half, 2, 3> spvStorage_half2x3;");
  4959. statement("typedef spvStorageMatrix<half, 2, 4> spvStorage_half2x4;");
  4960. statement("typedef spvStorageMatrix<half, 3, 2> spvStorage_half3x2;");
  4961. statement("typedef spvStorageMatrix<half, 3, 3> spvStorage_half3x3;");
  4962. statement("typedef spvStorageMatrix<half, 3, 4> spvStorage_half3x4;");
  4963. statement("typedef spvStorageMatrix<half, 4, 2> spvStorage_half4x2;");
  4964. statement("typedef spvStorageMatrix<half, 4, 3> spvStorage_half4x3;");
  4965. statement("typedef spvStorageMatrix<half, 4, 4> spvStorage_half4x4;");
  4966. statement("typedef spvStorageMatrix<float, 2, 2> spvStorage_float2x2;");
  4967. statement("typedef spvStorageMatrix<float, 2, 3> spvStorage_float2x3;");
  4968. statement("typedef spvStorageMatrix<float, 2, 4> spvStorage_float2x4;");
  4969. statement("typedef spvStorageMatrix<float, 3, 2> spvStorage_float3x2;");
  4970. statement("typedef spvStorageMatrix<float, 3, 3> spvStorage_float3x3;");
  4971. statement("typedef spvStorageMatrix<float, 3, 4> spvStorage_float3x4;");
  4972. statement("typedef spvStorageMatrix<float, 4, 2> spvStorage_float4x2;");
  4973. statement("typedef spvStorageMatrix<float, 4, 3> spvStorage_float4x3;");
  4974. statement("typedef spvStorageMatrix<float, 4, 4> spvStorage_float4x4;");
  4975. statement("");
  4976. break;
  4977. default:
  4978. break;
  4979. }
  4980. }
  4981. }
  4982. // Emits any needed custom function bodies.
  4983. // Metal helper functions must be static force-inline, i.e. static inline __attribute__((always_inline))
  4984. // otherwise they will cause problems when linked together in a single Metallib.
  4985. void CompilerMSL::emit_custom_functions()
  4986. {
  4987. // Use when outputting overloaded functions to cover different address spaces.
  4988. static const char *texture_addr_spaces[] = { "device", "constant", "thread" };
  4989. static uint32_t texture_addr_space_count = sizeof(texture_addr_spaces) / sizeof(char*);
  4990. if (spv_function_implementations.count(SPVFuncImplArrayCopyMultidim))
  4991. spv_function_implementations.insert(SPVFuncImplArrayCopy);
  4992. if (spv_function_implementations.count(SPVFuncImplDynamicImageSampler))
  4993. {
  4994. // Unfortunately, this one needs a lot of the other functions to compile OK.
  4995. if (!msl_options.supports_msl_version(2))
  4996. SPIRV_CROSS_THROW(
  4997. "spvDynamicImageSampler requires default-constructible texture objects, which require MSL 2.0.");
  4998. spv_function_implementations.insert(SPVFuncImplForwardArgs);
  4999. spv_function_implementations.insert(SPVFuncImplTextureSwizzle);
  5000. if (msl_options.swizzle_texture_samples)
  5001. spv_function_implementations.insert(SPVFuncImplGatherSwizzle);
  5002. for (uint32_t i = SPVFuncImplChromaReconstructNearest2Plane;
  5003. i <= SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane; i++)
  5004. spv_function_implementations.insert(static_cast<SPVFuncImpl>(i));
  5005. spv_function_implementations.insert(SPVFuncImplExpandITUFullRange);
  5006. spv_function_implementations.insert(SPVFuncImplExpandITUNarrowRange);
  5007. spv_function_implementations.insert(SPVFuncImplConvertYCbCrBT709);
  5008. spv_function_implementations.insert(SPVFuncImplConvertYCbCrBT601);
  5009. spv_function_implementations.insert(SPVFuncImplConvertYCbCrBT2020);
  5010. }
  5011. for (uint32_t i = SPVFuncImplChromaReconstructNearest2Plane;
  5012. i <= SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane; i++)
  5013. if (spv_function_implementations.count(static_cast<SPVFuncImpl>(i)))
  5014. spv_function_implementations.insert(SPVFuncImplForwardArgs);
  5015. if (spv_function_implementations.count(SPVFuncImplTextureSwizzle) ||
  5016. spv_function_implementations.count(SPVFuncImplGatherSwizzle) ||
  5017. spv_function_implementations.count(SPVFuncImplGatherCompareSwizzle))
  5018. {
  5019. spv_function_implementations.insert(SPVFuncImplForwardArgs);
  5020. spv_function_implementations.insert(SPVFuncImplGetSwizzle);
  5021. }
  5022. for (const auto &spv_func : spv_function_implementations)
  5023. {
  5024. switch (spv_func)
  5025. {
  5026. case SPVFuncImplMod:
  5027. statement("// Implementation of the GLSL mod() function, which is slightly different than Metal fmod()");
  5028. statement("template<typename Tx, typename Ty>");
  5029. statement("inline Tx mod(Tx x, Ty y)");
  5030. begin_scope();
  5031. statement("return x - y * floor(x / y);");
  5032. end_scope();
  5033. statement("");
  5034. break;
  5035. case SPVFuncImplRadians:
  5036. statement("// Implementation of the GLSL radians() function");
  5037. statement("template<typename T>");
  5038. statement("inline T radians(T d)");
  5039. begin_scope();
  5040. statement("return d * T(0.01745329251);");
  5041. end_scope();
  5042. statement("");
  5043. break;
  5044. case SPVFuncImplDegrees:
  5045. statement("// Implementation of the GLSL degrees() function");
  5046. statement("template<typename T>");
  5047. statement("inline T degrees(T r)");
  5048. begin_scope();
  5049. statement("return r * T(57.2957795131);");
  5050. end_scope();
  5051. statement("");
  5052. break;
  5053. case SPVFuncImplFindILsb:
  5054. statement("// Implementation of the GLSL findLSB() function");
  5055. statement("template<typename T>");
  5056. statement("inline T spvFindLSB(T x)");
  5057. begin_scope();
  5058. statement("return select(ctz(x), T(-1), x == T(0));");
  5059. end_scope();
  5060. statement("");
  5061. break;
  5062. case SPVFuncImplFindUMsb:
  5063. statement("// Implementation of the unsigned GLSL findMSB() function");
  5064. statement("template<typename T>");
  5065. statement("inline T spvFindUMSB(T x)");
  5066. begin_scope();
  5067. statement("return select(clz(T(0)) - (clz(x) + T(1)), T(-1), x == T(0));");
  5068. end_scope();
  5069. statement("");
  5070. break;
  5071. case SPVFuncImplFindSMsb:
  5072. statement("// Implementation of the signed GLSL findMSB() function");
  5073. statement("template<typename T>");
  5074. statement("inline T spvFindSMSB(T x)");
  5075. begin_scope();
  5076. statement("T v = select(x, T(-1) - x, x < T(0));");
  5077. statement("return select(clz(T(0)) - (clz(v) + T(1)), T(-1), v == T(0));");
  5078. end_scope();
  5079. statement("");
  5080. break;
  5081. case SPVFuncImplSSign:
  5082. statement("// Implementation of the GLSL sign() function for integer types");
  5083. statement("template<typename T, typename E = typename enable_if<is_integral<T>::value>::type>");
  5084. statement("inline T sign(T x)");
  5085. begin_scope();
  5086. statement("return select(select(select(x, T(0), x == T(0)), T(1), x > T(0)), T(-1), x < T(0));");
  5087. end_scope();
  5088. statement("");
  5089. break;
  5090. case SPVFuncImplArrayCopy:
  5091. case SPVFuncImplArrayCopyMultidim:
  5092. {
  5093. // Unfortunately we cannot template on the address space, so combinatorial explosion it is.
  5094. static const char *function_name_tags[] = {
  5095. "FromConstantToStack", "FromConstantToThreadGroup", "FromStackToStack",
  5096. "FromStackToThreadGroup", "FromThreadGroupToStack", "FromThreadGroupToThreadGroup",
  5097. "FromDeviceToDevice", "FromConstantToDevice", "FromStackToDevice",
  5098. "FromThreadGroupToDevice", "FromDeviceToStack", "FromDeviceToThreadGroup",
  5099. };
  5100. static const char *src_address_space[] = {
  5101. "constant", "constant", "thread const", "thread const",
  5102. "threadgroup const", "threadgroup const", "device const", "constant",
  5103. "thread const", "threadgroup const", "device const", "device const",
  5104. };
  5105. static const char *dst_address_space[] = {
  5106. "thread", "threadgroup", "thread", "threadgroup", "thread", "threadgroup",
  5107. "device", "device", "device", "device", "thread", "threadgroup",
  5108. };
  5109. for (uint32_t variant = 0; variant < 12; variant++)
  5110. {
  5111. bool is_multidim = spv_func == SPVFuncImplArrayCopyMultidim;
  5112. const char* dim = is_multidim ? "[N][M]" : "[N]";
  5113. statement("template<typename T, uint N", is_multidim ? ", uint M>" : ">");
  5114. statement("inline void spvArrayCopy", function_name_tags[variant], "(",
  5115. dst_address_space[variant], " T (&dst)", dim, ", ",
  5116. src_address_space[variant], " T (&src)", dim, ")");
  5117. begin_scope();
  5118. statement("for (uint i = 0; i < N; i++)");
  5119. begin_scope();
  5120. if (is_multidim)
  5121. statement("spvArrayCopy", function_name_tags[variant], "(dst[i], src[i]);");
  5122. else
  5123. statement("dst[i] = src[i];");
  5124. end_scope();
  5125. end_scope();
  5126. statement("");
  5127. }
  5128. break;
  5129. }
  5130. // Support for Metal 2.1's new texture_buffer type.
  5131. case SPVFuncImplTexelBufferCoords:
  5132. {
  5133. if (msl_options.texel_buffer_texture_width > 0)
  5134. {
  5135. string tex_width_str = convert_to_string(msl_options.texel_buffer_texture_width);
  5136. statement("// Returns 2D texture coords corresponding to 1D texel buffer coords");
  5137. statement(force_inline);
  5138. statement("uint2 spvTexelBufferCoord(uint tc)");
  5139. begin_scope();
  5140. statement(join("return uint2(tc % ", tex_width_str, ", tc / ", tex_width_str, ");"));
  5141. end_scope();
  5142. statement("");
  5143. }
  5144. else
  5145. {
  5146. statement("// Returns 2D texture coords corresponding to 1D texel buffer coords");
  5147. statement(
  5148. "#define spvTexelBufferCoord(tc, tex) uint2((tc) % (tex).get_width(), (tc) / (tex).get_width())");
  5149. statement("");
  5150. }
  5151. break;
  5152. }
  5153. // Emulate texture2D atomic operations
  5154. case SPVFuncImplImage2DAtomicCoords:
  5155. {
  5156. if (msl_options.supports_msl_version(1, 2))
  5157. {
  5158. statement("// The required alignment of a linear texture of R32Uint format.");
  5159. statement("constant uint spvLinearTextureAlignmentOverride [[function_constant(",
  5160. msl_options.r32ui_alignment_constant_id, ")]];");
  5161. statement("constant uint spvLinearTextureAlignment = ",
  5162. "is_function_constant_defined(spvLinearTextureAlignmentOverride) ? ",
  5163. "spvLinearTextureAlignmentOverride : ", msl_options.r32ui_linear_texture_alignment, ";");
  5164. }
  5165. else
  5166. {
  5167. statement("// The required alignment of a linear texture of R32Uint format.");
  5168. statement("constant uint spvLinearTextureAlignment = ", msl_options.r32ui_linear_texture_alignment,
  5169. ";");
  5170. }
  5171. statement("// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics");
  5172. statement("#define spvImage2DAtomicCoord(tc, tex) (((((tex).get_width() + ",
  5173. " spvLinearTextureAlignment / 4 - 1) & ~(",
  5174. " spvLinearTextureAlignment / 4 - 1)) * (tc).y) + (tc).x)");
  5175. statement("");
  5176. break;
  5177. }
  5178. // Fix up gradient vectors when sampling a cube texture for Apple Silicon.
  5179. // h/t Alexey Knyazev (https://github.com/KhronosGroup/MoltenVK/issues/2068#issuecomment-1817799067) for the code.
  5180. case SPVFuncImplGradientCube:
  5181. statement("static inline gradientcube spvGradientCube(float3 P, float3 dPdx, float3 dPdy)");
  5182. begin_scope();
  5183. statement("// Major axis selection");
  5184. statement("float3 absP = abs(P);");
  5185. statement("bool xMajor = absP.x >= max(absP.y, absP.z);");
  5186. statement("bool yMajor = absP.y >= absP.z;");
  5187. statement("float3 Q = xMajor ? P.yzx : (yMajor ? P.xzy : P);");
  5188. statement("float3 dQdx = xMajor ? dPdx.yzx : (yMajor ? dPdx.xzy : dPdx);");
  5189. statement("float3 dQdy = xMajor ? dPdy.yzx : (yMajor ? dPdy.xzy : dPdy);");
  5190. statement_no_indent("");
  5191. statement("// Skip a couple of operations compared to usual projection");
  5192. statement("float4 d = float4(dQdx.xy, dQdy.xy) - (Q.xy / Q.z).xyxy * float4(dQdx.zz, dQdy.zz);");
  5193. statement_no_indent("");
  5194. statement("// Final swizzle to put the intermediate values into non-ignored components");
  5195. statement("// X major: X and Z");
  5196. statement("// Y major: X and Y");
  5197. statement("// Z major: Y and Z");
  5198. statement("return gradientcube(xMajor ? d.xxy : d.xyx, xMajor ? d.zzw : d.zwz);");
  5199. end_scope();
  5200. statement("");
  5201. break;
  5202. // "fadd" intrinsic support
  5203. case SPVFuncImplFAdd:
  5204. statement("template<typename T>");
  5205. statement("[[clang::optnone]] T spvFAdd(T l, T r)");
  5206. begin_scope();
  5207. statement("return fma(T(1), l, r);");
  5208. end_scope();
  5209. statement("");
  5210. break;
  5211. // "fsub" intrinsic support
  5212. case SPVFuncImplFSub:
  5213. statement("template<typename T>");
  5214. statement("[[clang::optnone]] T spvFSub(T l, T r)");
  5215. begin_scope();
  5216. statement("return fma(T(-1), r, l);");
  5217. end_scope();
  5218. statement("");
  5219. break;
  5220. // "fmul' intrinsic support
  5221. case SPVFuncImplFMul:
  5222. statement("template<typename T>");
  5223. statement("[[clang::optnone]] T spvFMul(T l, T r)");
  5224. begin_scope();
  5225. statement("return fma(l, r, T(0));");
  5226. end_scope();
  5227. statement("");
  5228. statement("template<typename T, int Cols, int Rows>");
  5229. statement("[[clang::optnone]] vec<T, Cols> spvFMulVectorMatrix(vec<T, Rows> v, matrix<T, Cols, Rows> m)");
  5230. begin_scope();
  5231. statement("vec<T, Cols> res = vec<T, Cols>(0);");
  5232. statement("for (uint i = Rows; i > 0; --i)");
  5233. begin_scope();
  5234. statement("vec<T, Cols> tmp(0);");
  5235. statement("for (uint j = 0; j < Cols; ++j)");
  5236. begin_scope();
  5237. statement("tmp[j] = m[j][i - 1];");
  5238. end_scope();
  5239. statement("res = fma(tmp, vec<T, Cols>(v[i - 1]), res);");
  5240. end_scope();
  5241. statement("return res;");
  5242. end_scope();
  5243. statement("");
  5244. statement("template<typename T, int Cols, int Rows>");
  5245. statement("[[clang::optnone]] vec<T, Rows> spvFMulMatrixVector(matrix<T, Cols, Rows> m, vec<T, Cols> v)");
  5246. begin_scope();
  5247. statement("vec<T, Rows> res = vec<T, Rows>(0);");
  5248. statement("for (uint i = Cols; i > 0; --i)");
  5249. begin_scope();
  5250. statement("res = fma(m[i - 1], vec<T, Rows>(v[i - 1]), res);");
  5251. end_scope();
  5252. statement("return res;");
  5253. end_scope();
  5254. statement("");
  5255. statement("template<typename T, int LCols, int LRows, int RCols, int RRows>");
  5256. statement("[[clang::optnone]] matrix<T, RCols, LRows> spvFMulMatrixMatrix(matrix<T, LCols, LRows> l, matrix<T, RCols, RRows> r)");
  5257. begin_scope();
  5258. statement("matrix<T, RCols, LRows> res;");
  5259. statement("for (uint i = 0; i < RCols; i++)");
  5260. begin_scope();
  5261. statement("vec<T, RCols> tmp(0);");
  5262. statement("for (uint j = 0; j < LCols; j++)");
  5263. begin_scope();
  5264. statement("tmp = fma(vec<T, RCols>(r[i][j]), l[j], tmp);");
  5265. end_scope();
  5266. statement("res[i] = tmp;");
  5267. end_scope();
  5268. statement("return res;");
  5269. end_scope();
  5270. statement("");
  5271. break;
  5272. case SPVFuncImplQuantizeToF16:
  5273. // Ensure fast-math is disabled to match Vulkan results.
  5274. // SpvHalfTypeSelector is used to match the half* template type to the float* template type.
  5275. // Depending on GPU, MSL does not always flush converted subnormal halfs to zero,
  5276. // as required by OpQuantizeToF16, so check for subnormals and flush them to zero.
  5277. statement("template <typename F> struct SpvHalfTypeSelector;");
  5278. statement("template <> struct SpvHalfTypeSelector<float> { public: using H = half; };");
  5279. statement("template<uint N> struct SpvHalfTypeSelector<vec<float, N>> { using H = vec<half, N>; };");
  5280. statement("template<typename F, typename H = typename SpvHalfTypeSelector<F>::H>");
  5281. statement("[[clang::optnone]] F spvQuantizeToF16(F fval)");
  5282. begin_scope();
  5283. statement("H hval = H(fval);");
  5284. statement("hval = select(copysign(H(0), hval), hval, isnormal(hval) || isinf(hval) || isnan(hval));");
  5285. statement("return F(hval);");
  5286. end_scope();
  5287. statement("");
  5288. break;
  5289. // Emulate texturecube_array with texture2d_array for iOS where this type is not available
  5290. case SPVFuncImplCubemapTo2DArrayFace:
  5291. statement(force_inline);
  5292. statement("float3 spvCubemapTo2DArrayFace(float3 P)");
  5293. begin_scope();
  5294. statement("float3 Coords = abs(P.xyz);");
  5295. statement("float CubeFace = 0;");
  5296. statement("float ProjectionAxis = 0;");
  5297. statement("float u = 0;");
  5298. statement("float v = 0;");
  5299. statement("if (Coords.x >= Coords.y && Coords.x >= Coords.z)");
  5300. begin_scope();
  5301. statement("CubeFace = P.x >= 0 ? 0 : 1;");
  5302. statement("ProjectionAxis = Coords.x;");
  5303. statement("u = P.x >= 0 ? -P.z : P.z;");
  5304. statement("v = -P.y;");
  5305. end_scope();
  5306. statement("else if (Coords.y >= Coords.x && Coords.y >= Coords.z)");
  5307. begin_scope();
  5308. statement("CubeFace = P.y >= 0 ? 2 : 3;");
  5309. statement("ProjectionAxis = Coords.y;");
  5310. statement("u = P.x;");
  5311. statement("v = P.y >= 0 ? P.z : -P.z;");
  5312. end_scope();
  5313. statement("else");
  5314. begin_scope();
  5315. statement("CubeFace = P.z >= 0 ? 4 : 5;");
  5316. statement("ProjectionAxis = Coords.z;");
  5317. statement("u = P.z >= 0 ? P.x : -P.x;");
  5318. statement("v = -P.y;");
  5319. end_scope();
  5320. statement("u = 0.5 * (u/ProjectionAxis + 1);");
  5321. statement("v = 0.5 * (v/ProjectionAxis + 1);");
  5322. statement("return float3(u, v, CubeFace);");
  5323. end_scope();
  5324. statement("");
  5325. break;
  5326. case SPVFuncImplInverse4x4:
  5327. statement("// Returns the determinant of a 2x2 matrix.");
  5328. statement(force_inline);
  5329. statement("float spvDet2x2(float a1, float a2, float b1, float b2)");
  5330. begin_scope();
  5331. statement("return a1 * b2 - b1 * a2;");
  5332. end_scope();
  5333. statement("");
  5334. statement("// Returns the determinant of a 3x3 matrix.");
  5335. statement(force_inline);
  5336. statement("float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, "
  5337. "float c2, float c3)");
  5338. begin_scope();
  5339. statement("return a1 * spvDet2x2(b2, b3, c2, c3) - b1 * spvDet2x2(a2, a3, c2, c3) + c1 * spvDet2x2(a2, a3, "
  5340. "b2, b3);");
  5341. end_scope();
  5342. statement("");
  5343. statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical");
  5344. statement("// adjoint and dividing by the determinant. The contents of the matrix are changed.");
  5345. statement(force_inline);
  5346. statement("float4x4 spvInverse4x4(float4x4 m)");
  5347. begin_scope();
  5348. statement("float4x4 adj; // The adjoint matrix (inverse after dividing by determinant)");
  5349. statement_no_indent("");
  5350. statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix.");
  5351. statement("adj[0][0] = spvDet3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], "
  5352. "m[3][3]);");
  5353. statement("adj[0][1] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], "
  5354. "m[3][3]);");
  5355. statement("adj[0][2] = spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], "
  5356. "m[3][3]);");
  5357. statement("adj[0][3] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], "
  5358. "m[2][3]);");
  5359. statement_no_indent("");
  5360. statement("adj[1][0] = -spvDet3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], "
  5361. "m[3][3]);");
  5362. statement("adj[1][1] = spvDet3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], "
  5363. "m[3][3]);");
  5364. statement("adj[1][2] = -spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], "
  5365. "m[3][3]);");
  5366. statement("adj[1][3] = spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], "
  5367. "m[2][3]);");
  5368. statement_no_indent("");
  5369. statement("adj[2][0] = spvDet3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], "
  5370. "m[3][3]);");
  5371. statement("adj[2][1] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], "
  5372. "m[3][3]);");
  5373. statement("adj[2][2] = spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], "
  5374. "m[3][3]);");
  5375. statement("adj[2][3] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], "
  5376. "m[2][3]);");
  5377. statement_no_indent("");
  5378. statement("adj[3][0] = -spvDet3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], "
  5379. "m[3][2]);");
  5380. statement("adj[3][1] = spvDet3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], "
  5381. "m[3][2]);");
  5382. statement("adj[3][2] = -spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], "
  5383. "m[3][2]);");
  5384. statement("adj[3][3] = spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], "
  5385. "m[2][2]);");
  5386. statement_no_indent("");
  5387. statement("// Calculate the determinant as a combination of the cofactors of the first row.");
  5388. statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]) + (adj[0][3] "
  5389. "* m[3][0]);");
  5390. statement_no_indent("");
  5391. statement("// Divide the classical adjoint matrix by the determinant.");
  5392. statement("// If determinant is zero, matrix is not invertable, so leave it unchanged.");
  5393. statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;");
  5394. end_scope();
  5395. statement("");
  5396. break;
  5397. case SPVFuncImplInverse3x3:
  5398. if (spv_function_implementations.count(SPVFuncImplInverse4x4) == 0)
  5399. {
  5400. statement("// Returns the determinant of a 2x2 matrix.");
  5401. statement(force_inline);
  5402. statement("float spvDet2x2(float a1, float a2, float b1, float b2)");
  5403. begin_scope();
  5404. statement("return a1 * b2 - b1 * a2;");
  5405. end_scope();
  5406. statement("");
  5407. }
  5408. statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical");
  5409. statement("// adjoint and dividing by the determinant. The contents of the matrix are changed.");
  5410. statement(force_inline);
  5411. statement("float3x3 spvInverse3x3(float3x3 m)");
  5412. begin_scope();
  5413. statement("float3x3 adj; // The adjoint matrix (inverse after dividing by determinant)");
  5414. statement_no_indent("");
  5415. statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix.");
  5416. statement("adj[0][0] = spvDet2x2(m[1][1], m[1][2], m[2][1], m[2][2]);");
  5417. statement("adj[0][1] = -spvDet2x2(m[0][1], m[0][2], m[2][1], m[2][2]);");
  5418. statement("adj[0][2] = spvDet2x2(m[0][1], m[0][2], m[1][1], m[1][2]);");
  5419. statement_no_indent("");
  5420. statement("adj[1][0] = -spvDet2x2(m[1][0], m[1][2], m[2][0], m[2][2]);");
  5421. statement("adj[1][1] = spvDet2x2(m[0][0], m[0][2], m[2][0], m[2][2]);");
  5422. statement("adj[1][2] = -spvDet2x2(m[0][0], m[0][2], m[1][0], m[1][2]);");
  5423. statement_no_indent("");
  5424. statement("adj[2][0] = spvDet2x2(m[1][0], m[1][1], m[2][0], m[2][1]);");
  5425. statement("adj[2][1] = -spvDet2x2(m[0][0], m[0][1], m[2][0], m[2][1]);");
  5426. statement("adj[2][2] = spvDet2x2(m[0][0], m[0][1], m[1][0], m[1][1]);");
  5427. statement_no_indent("");
  5428. statement("// Calculate the determinant as a combination of the cofactors of the first row.");
  5429. statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]);");
  5430. statement_no_indent("");
  5431. statement("// Divide the classical adjoint matrix by the determinant.");
  5432. statement("// If determinant is zero, matrix is not invertable, so leave it unchanged.");
  5433. statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;");
  5434. end_scope();
  5435. statement("");
  5436. break;
  5437. case SPVFuncImplInverse2x2:
  5438. statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical");
  5439. statement("// adjoint and dividing by the determinant. The contents of the matrix are changed.");
  5440. statement(force_inline);
  5441. statement("float2x2 spvInverse2x2(float2x2 m)");
  5442. begin_scope();
  5443. statement("float2x2 adj; // The adjoint matrix (inverse after dividing by determinant)");
  5444. statement_no_indent("");
  5445. statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix.");
  5446. statement("adj[0][0] = m[1][1];");
  5447. statement("adj[0][1] = -m[0][1];");
  5448. statement_no_indent("");
  5449. statement("adj[1][0] = -m[1][0];");
  5450. statement("adj[1][1] = m[0][0];");
  5451. statement_no_indent("");
  5452. statement("// Calculate the determinant as a combination of the cofactors of the first row.");
  5453. statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]);");
  5454. statement_no_indent("");
  5455. statement("// Divide the classical adjoint matrix by the determinant.");
  5456. statement("// If determinant is zero, matrix is not invertable, so leave it unchanged.");
  5457. statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;");
  5458. end_scope();
  5459. statement("");
  5460. break;
  5461. case SPVFuncImplForwardArgs:
  5462. statement("template<typename T> struct spvRemoveReference { typedef T type; };");
  5463. statement("template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };");
  5464. statement("template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };");
  5465. statement("template<typename T> inline constexpr thread T&& spvForward(thread typename "
  5466. "spvRemoveReference<T>::type& x)");
  5467. begin_scope();
  5468. statement("return static_cast<thread T&&>(x);");
  5469. end_scope();
  5470. statement("template<typename T> inline constexpr thread T&& spvForward(thread typename "
  5471. "spvRemoveReference<T>::type&& x)");
  5472. begin_scope();
  5473. statement("return static_cast<thread T&&>(x);");
  5474. end_scope();
  5475. statement("");
  5476. break;
  5477. case SPVFuncImplGetSwizzle:
  5478. statement("enum class spvSwizzle : uint");
  5479. begin_scope();
  5480. statement("none = 0,");
  5481. statement("zero,");
  5482. statement("one,");
  5483. statement("red,");
  5484. statement("green,");
  5485. statement("blue,");
  5486. statement("alpha");
  5487. end_scope_decl();
  5488. statement("");
  5489. statement("template<typename T>");
  5490. statement("inline T spvGetSwizzle(vec<T, 4> x, T c, spvSwizzle s)");
  5491. begin_scope();
  5492. statement("switch (s)");
  5493. begin_scope();
  5494. statement("case spvSwizzle::none:");
  5495. statement(" return c;");
  5496. statement("case spvSwizzle::zero:");
  5497. statement(" return 0;");
  5498. statement("case spvSwizzle::one:");
  5499. statement(" return 1;");
  5500. statement("case spvSwizzle::red:");
  5501. statement(" return x.r;");
  5502. statement("case spvSwizzle::green:");
  5503. statement(" return x.g;");
  5504. statement("case spvSwizzle::blue:");
  5505. statement(" return x.b;");
  5506. statement("case spvSwizzle::alpha:");
  5507. statement(" return x.a;");
  5508. end_scope();
  5509. end_scope();
  5510. statement("");
  5511. break;
  5512. case SPVFuncImplTextureSwizzle:
  5513. statement("// Wrapper function that swizzles texture samples and fetches.");
  5514. statement("template<typename T>");
  5515. statement("inline vec<T, 4> spvTextureSwizzle(vec<T, 4> x, uint s)");
  5516. begin_scope();
  5517. statement("if (!s)");
  5518. statement(" return x;");
  5519. statement("return vec<T, 4>(spvGetSwizzle(x, x.r, spvSwizzle((s >> 0) & 0xFF)), "
  5520. "spvGetSwizzle(x, x.g, spvSwizzle((s >> 8) & 0xFF)), spvGetSwizzle(x, x.b, spvSwizzle((s >> 16) "
  5521. "& 0xFF)), "
  5522. "spvGetSwizzle(x, x.a, spvSwizzle((s >> 24) & 0xFF)));");
  5523. end_scope();
  5524. statement("");
  5525. statement("template<typename T>");
  5526. statement("inline T spvTextureSwizzle(T x, uint s)");
  5527. begin_scope();
  5528. statement("return spvTextureSwizzle(vec<T, 4>(x, 0, 0, 1), s).x;");
  5529. end_scope();
  5530. statement("");
  5531. break;
  5532. case SPVFuncImplGatherSwizzle:
  5533. statement("// Wrapper function that swizzles texture gathers.");
  5534. statement("template<typename T, template<typename, access = access::sample, typename = void> class Tex, "
  5535. "typename... Ts>");
  5536. statement("inline vec<T, 4> spvGatherSwizzle(const thread Tex<T>& t, sampler s, "
  5537. "uint sw, component c, Ts... params) METAL_CONST_ARG(c)");
  5538. begin_scope();
  5539. statement("if (sw)");
  5540. begin_scope();
  5541. statement("switch (spvSwizzle((sw >> (uint(c) * 8)) & 0xFF))");
  5542. begin_scope();
  5543. statement("case spvSwizzle::none:");
  5544. statement(" break;");
  5545. statement("case spvSwizzle::zero:");
  5546. statement(" return vec<T, 4>(0, 0, 0, 0);");
  5547. statement("case spvSwizzle::one:");
  5548. statement(" return vec<T, 4>(1, 1, 1, 1);");
  5549. statement("case spvSwizzle::red:");
  5550. statement(" return t.gather(s, spvForward<Ts>(params)..., component::x);");
  5551. statement("case spvSwizzle::green:");
  5552. statement(" return t.gather(s, spvForward<Ts>(params)..., component::y);");
  5553. statement("case spvSwizzle::blue:");
  5554. statement(" return t.gather(s, spvForward<Ts>(params)..., component::z);");
  5555. statement("case spvSwizzle::alpha:");
  5556. statement(" return t.gather(s, spvForward<Ts>(params)..., component::w);");
  5557. end_scope();
  5558. end_scope();
  5559. // texture::gather insists on its component parameter being a constant
  5560. // expression, so we need this silly workaround just to compile the shader.
  5561. statement("switch (c)");
  5562. begin_scope();
  5563. statement("case component::x:");
  5564. statement(" return t.gather(s, spvForward<Ts>(params)..., component::x);");
  5565. statement("case component::y:");
  5566. statement(" return t.gather(s, spvForward<Ts>(params)..., component::y);");
  5567. statement("case component::z:");
  5568. statement(" return t.gather(s, spvForward<Ts>(params)..., component::z);");
  5569. statement("case component::w:");
  5570. statement(" return t.gather(s, spvForward<Ts>(params)..., component::w);");
  5571. end_scope();
  5572. end_scope();
  5573. statement("");
  5574. break;
  5575. case SPVFuncImplGatherCompareSwizzle:
  5576. statement("// Wrapper function that swizzles depth texture gathers.");
  5577. statement("template<typename T, template<typename, access = access::sample, typename = void> class Tex, "
  5578. "typename... Ts>");
  5579. statement("inline vec<T, 4> spvGatherCompareSwizzle(const thread Tex<T>& t, sampler "
  5580. "s, uint sw, Ts... params) ");
  5581. begin_scope();
  5582. statement("if (sw)");
  5583. begin_scope();
  5584. statement("switch (spvSwizzle(sw & 0xFF))");
  5585. begin_scope();
  5586. statement("case spvSwizzle::none:");
  5587. statement("case spvSwizzle::red:");
  5588. statement(" break;");
  5589. statement("case spvSwizzle::zero:");
  5590. statement("case spvSwizzle::green:");
  5591. statement("case spvSwizzle::blue:");
  5592. statement("case spvSwizzle::alpha:");
  5593. statement(" return vec<T, 4>(0, 0, 0, 0);");
  5594. statement("case spvSwizzle::one:");
  5595. statement(" return vec<T, 4>(1, 1, 1, 1);");
  5596. end_scope();
  5597. end_scope();
  5598. statement("return t.gather_compare(s, spvForward<Ts>(params)...);");
  5599. end_scope();
  5600. statement("");
  5601. break;
  5602. case SPVFuncImplGatherConstOffsets:
  5603. // Because we are passing a texture reference, we have to output an overloaded version of this function for each address space.
  5604. for (uint32_t i = 0; i < texture_addr_space_count; i++)
  5605. {
  5606. statement("// Wrapper function that processes a ", texture_addr_spaces[i], " texture gather with a constant offset array.");
  5607. statement("template<typename T, template<typename, access = access::sample, typename = void> class Tex, "
  5608. "typename Toff, typename... Tp>");
  5609. statement("inline vec<T, 4> spvGatherConstOffsets(const ", texture_addr_spaces[i], " Tex<T>& t, sampler s, "
  5610. "Toff coffsets, component c, Tp... params) METAL_CONST_ARG(c)");
  5611. begin_scope();
  5612. statement("vec<T, 4> rslts[4];");
  5613. statement("for (uint i = 0; i < 4; i++)");
  5614. begin_scope();
  5615. statement("switch (c)");
  5616. begin_scope();
  5617. // Work around texture::gather() requiring its component parameter to be a constant expression
  5618. statement("case component::x:");
  5619. statement(" rslts[i] = t.gather(s, spvForward<Tp>(params)..., coffsets[i], component::x);");
  5620. statement(" break;");
  5621. statement("case component::y:");
  5622. statement(" rslts[i] = t.gather(s, spvForward<Tp>(params)..., coffsets[i], component::y);");
  5623. statement(" break;");
  5624. statement("case component::z:");
  5625. statement(" rslts[i] = t.gather(s, spvForward<Tp>(params)..., coffsets[i], component::z);");
  5626. statement(" break;");
  5627. statement("case component::w:");
  5628. statement(" rslts[i] = t.gather(s, spvForward<Tp>(params)..., coffsets[i], component::w);");
  5629. statement(" break;");
  5630. end_scope();
  5631. end_scope();
  5632. // Pull all values from the i0j0 component of each gather footprint
  5633. statement("return vec<T, 4>(rslts[0].w, rslts[1].w, rslts[2].w, rslts[3].w);");
  5634. end_scope();
  5635. statement("");
  5636. }
  5637. break;
  5638. case SPVFuncImplGatherCompareConstOffsets:
  5639. // Because we are passing a texture reference, we have to output an overloaded version of this function for each address space.
  5640. for (uint32_t i = 0; i < texture_addr_space_count; i++)
  5641. {
  5642. statement("// Wrapper function that processes a ", texture_addr_spaces[i], " texture gather with a constant offset array.");
  5643. statement("template<typename T, template<typename, access = access::sample, typename = void> class Tex, "
  5644. "typename Toff, typename... Tp>");
  5645. statement("inline vec<T, 4> spvGatherCompareConstOffsets(const ", texture_addr_spaces[i], " Tex<T>& t, sampler s, "
  5646. "Toff coffsets, Tp... params)");
  5647. begin_scope();
  5648. statement("vec<T, 4> rslts[4];");
  5649. statement("for (uint i = 0; i < 4; i++)");
  5650. begin_scope();
  5651. statement(" rslts[i] = t.gather_compare(s, spvForward<Tp>(params)..., coffsets[i]);");
  5652. end_scope();
  5653. // Pull all values from the i0j0 component of each gather footprint
  5654. statement("return vec<T, 4>(rslts[0].w, rslts[1].w, rslts[2].w, rslts[3].w);");
  5655. end_scope();
  5656. statement("");
  5657. }
  5658. break;
  5659. case SPVFuncImplSubgroupBroadcast:
  5660. // Metal doesn't allow broadcasting boolean values directly, but we can work around that by broadcasting
  5661. // them as integers.
  5662. statement("template<typename T>");
  5663. statement("inline T spvSubgroupBroadcast(T value, ushort lane)");
  5664. begin_scope();
  5665. if (msl_options.use_quadgroup_operation())
  5666. statement("return quad_broadcast(value, lane);");
  5667. else
  5668. statement("return simd_broadcast(value, lane);");
  5669. end_scope();
  5670. statement("");
  5671. statement("template<>");
  5672. statement("inline bool spvSubgroupBroadcast(bool value, ushort lane)");
  5673. begin_scope();
  5674. if (msl_options.use_quadgroup_operation())
  5675. statement("return !!quad_broadcast((ushort)value, lane);");
  5676. else
  5677. statement("return !!simd_broadcast((ushort)value, lane);");
  5678. end_scope();
  5679. statement("");
  5680. statement("template<uint N>");
  5681. statement("inline vec<bool, N> spvSubgroupBroadcast(vec<bool, N> value, ushort lane)");
  5682. begin_scope();
  5683. if (msl_options.use_quadgroup_operation())
  5684. statement("return (vec<bool, N>)quad_broadcast((vec<ushort, N>)value, lane);");
  5685. else
  5686. statement("return (vec<bool, N>)simd_broadcast((vec<ushort, N>)value, lane);");
  5687. end_scope();
  5688. statement("");
  5689. break;
  5690. case SPVFuncImplSubgroupBroadcastFirst:
  5691. statement("template<typename T>");
  5692. statement("inline T spvSubgroupBroadcastFirst(T value)");
  5693. begin_scope();
  5694. if (msl_options.use_quadgroup_operation())
  5695. statement("return quad_broadcast_first(value);");
  5696. else
  5697. statement("return simd_broadcast_first(value);");
  5698. end_scope();
  5699. statement("");
  5700. statement("template<>");
  5701. statement("inline bool spvSubgroupBroadcastFirst(bool value)");
  5702. begin_scope();
  5703. if (msl_options.use_quadgroup_operation())
  5704. statement("return !!quad_broadcast_first((ushort)value);");
  5705. else
  5706. statement("return !!simd_broadcast_first((ushort)value);");
  5707. end_scope();
  5708. statement("");
  5709. statement("template<uint N>");
  5710. statement("inline vec<bool, N> spvSubgroupBroadcastFirst(vec<bool, N> value)");
  5711. begin_scope();
  5712. if (msl_options.use_quadgroup_operation())
  5713. statement("return (vec<bool, N>)quad_broadcast_first((vec<ushort, N>)value);");
  5714. else
  5715. statement("return (vec<bool, N>)simd_broadcast_first((vec<ushort, N>)value);");
  5716. end_scope();
  5717. statement("");
  5718. break;
  5719. case SPVFuncImplSubgroupBallot:
  5720. statement("inline uint4 spvSubgroupBallot(bool value)");
  5721. begin_scope();
  5722. if (msl_options.use_quadgroup_operation())
  5723. {
  5724. statement("return uint4((quad_vote::vote_t)quad_ballot(value), 0, 0, 0);");
  5725. }
  5726. else if (msl_options.is_ios())
  5727. {
  5728. // The current simd_vote on iOS uses a 32-bit integer-like object.
  5729. statement("return uint4((simd_vote::vote_t)simd_ballot(value), 0, 0, 0);");
  5730. }
  5731. else
  5732. {
  5733. statement("simd_vote vote = simd_ballot(value);");
  5734. statement("// simd_ballot() returns a 64-bit integer-like object, but");
  5735. statement("// SPIR-V callers expect a uint4. We must convert.");
  5736. statement("// FIXME: This won't include higher bits if Apple ever supports");
  5737. statement("// 128 lanes in an SIMD-group.");
  5738. statement("return uint4(as_type<uint2>((simd_vote::vote_t)vote), 0, 0);");
  5739. }
  5740. end_scope();
  5741. statement("");
  5742. break;
  5743. case SPVFuncImplSubgroupBallotBitExtract:
  5744. statement("inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit)");
  5745. begin_scope();
  5746. statement("return !!extract_bits(ballot[bit / 32], bit % 32, 1);");
  5747. end_scope();
  5748. statement("");
  5749. break;
  5750. case SPVFuncImplSubgroupBallotFindLSB:
  5751. statement("inline uint spvSubgroupBallotFindLSB(uint4 ballot, uint gl_SubgroupSize)");
  5752. begin_scope();
  5753. if (msl_options.is_ios())
  5754. {
  5755. statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0));");
  5756. }
  5757. else
  5758. {
  5759. statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), "
  5760. "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));");
  5761. }
  5762. statement("ballot &= mask;");
  5763. statement("return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + "
  5764. "ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);");
  5765. end_scope();
  5766. statement("");
  5767. break;
  5768. case SPVFuncImplSubgroupBallotFindMSB:
  5769. statement("inline uint spvSubgroupBallotFindMSB(uint4 ballot, uint gl_SubgroupSize)");
  5770. begin_scope();
  5771. if (msl_options.is_ios())
  5772. {
  5773. statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0));");
  5774. }
  5775. else
  5776. {
  5777. statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), "
  5778. "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));");
  5779. }
  5780. statement("ballot &= mask;");
  5781. statement("return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - "
  5782. "(clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), "
  5783. "ballot.z == 0), ballot.w == 0);");
  5784. end_scope();
  5785. statement("");
  5786. break;
  5787. case SPVFuncImplSubgroupBallotBitCount:
  5788. statement("inline uint spvPopCount4(uint4 ballot)");
  5789. begin_scope();
  5790. statement("return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);");
  5791. end_scope();
  5792. statement("");
  5793. statement("inline uint spvSubgroupBallotBitCount(uint4 ballot, uint gl_SubgroupSize)");
  5794. begin_scope();
  5795. if (msl_options.is_ios())
  5796. {
  5797. statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0));");
  5798. }
  5799. else
  5800. {
  5801. statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), "
  5802. "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));");
  5803. }
  5804. statement("return spvPopCount4(ballot & mask);");
  5805. end_scope();
  5806. statement("");
  5807. statement("inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)");
  5808. begin_scope();
  5809. if (msl_options.is_ios())
  5810. {
  5811. statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID + 1), uint3(0));");
  5812. }
  5813. else
  5814. {
  5815. statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), "
  5816. "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), "
  5817. "uint2(0));");
  5818. }
  5819. statement("return spvPopCount4(ballot & mask);");
  5820. end_scope();
  5821. statement("");
  5822. statement("inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)");
  5823. begin_scope();
  5824. if (msl_options.is_ios())
  5825. {
  5826. statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID), uint2(0));");
  5827. }
  5828. else
  5829. {
  5830. statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), "
  5831. "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));");
  5832. }
  5833. statement("return spvPopCount4(ballot & mask);");
  5834. end_scope();
  5835. statement("");
  5836. break;
  5837. case SPVFuncImplSubgroupAllEqual:
  5838. // Metal doesn't provide a function to evaluate this directly. But, we can
  5839. // implement this by comparing every thread's value to one thread's value
  5840. // (in this case, the value of the first active thread). Then, by the transitive
  5841. // property of equality, if all comparisons return true, then they are all equal.
  5842. statement("template<typename T>");
  5843. statement("inline bool spvSubgroupAllEqual(T value)");
  5844. begin_scope();
  5845. if (msl_options.use_quadgroup_operation())
  5846. statement("return quad_all(all(value == quad_broadcast_first(value)));");
  5847. else
  5848. statement("return simd_all(all(value == simd_broadcast_first(value)));");
  5849. end_scope();
  5850. statement("");
  5851. statement("template<>");
  5852. statement("inline bool spvSubgroupAllEqual(bool value)");
  5853. begin_scope();
  5854. if (msl_options.use_quadgroup_operation())
  5855. statement("return quad_all(value) || !quad_any(value);");
  5856. else
  5857. statement("return simd_all(value) || !simd_any(value);");
  5858. end_scope();
  5859. statement("");
  5860. statement("template<uint N>");
  5861. statement("inline bool spvSubgroupAllEqual(vec<bool, N> value)");
  5862. begin_scope();
  5863. if (msl_options.use_quadgroup_operation())
  5864. statement("return quad_all(all(value == (vec<bool, N>)quad_broadcast_first((vec<ushort, N>)value)));");
  5865. else
  5866. statement("return simd_all(all(value == (vec<bool, N>)simd_broadcast_first((vec<ushort, N>)value)));");
  5867. end_scope();
  5868. statement("");
  5869. break;
  5870. case SPVFuncImplSubgroupShuffle:
  5871. statement("template<typename T>");
  5872. statement("inline T spvSubgroupShuffle(T value, ushort lane)");
  5873. begin_scope();
  5874. if (msl_options.use_quadgroup_operation())
  5875. statement("return quad_shuffle(value, lane);");
  5876. else
  5877. statement("return simd_shuffle(value, lane);");
  5878. end_scope();
  5879. statement("");
  5880. statement("template<>");
  5881. statement("inline bool spvSubgroupShuffle(bool value, ushort lane)");
  5882. begin_scope();
  5883. if (msl_options.use_quadgroup_operation())
  5884. statement("return !!quad_shuffle((ushort)value, lane);");
  5885. else
  5886. statement("return !!simd_shuffle((ushort)value, lane);");
  5887. end_scope();
  5888. statement("");
  5889. statement("template<uint N>");
  5890. statement("inline vec<bool, N> spvSubgroupShuffle(vec<bool, N> value, ushort lane)");
  5891. begin_scope();
  5892. if (msl_options.use_quadgroup_operation())
  5893. statement("return (vec<bool, N>)quad_shuffle((vec<ushort, N>)value, lane);");
  5894. else
  5895. statement("return (vec<bool, N>)simd_shuffle((vec<ushort, N>)value, lane);");
  5896. end_scope();
  5897. statement("");
  5898. break;
  5899. case SPVFuncImplSubgroupShuffleXor:
  5900. statement("template<typename T>");
  5901. statement("inline T spvSubgroupShuffleXor(T value, ushort mask)");
  5902. begin_scope();
  5903. if (msl_options.use_quadgroup_operation())
  5904. statement("return quad_shuffle_xor(value, mask);");
  5905. else
  5906. statement("return simd_shuffle_xor(value, mask);");
  5907. end_scope();
  5908. statement("");
  5909. statement("template<>");
  5910. statement("inline bool spvSubgroupShuffleXor(bool value, ushort mask)");
  5911. begin_scope();
  5912. if (msl_options.use_quadgroup_operation())
  5913. statement("return !!quad_shuffle_xor((ushort)value, mask);");
  5914. else
  5915. statement("return !!simd_shuffle_xor((ushort)value, mask);");
  5916. end_scope();
  5917. statement("");
  5918. statement("template<uint N>");
  5919. statement("inline vec<bool, N> spvSubgroupShuffleXor(vec<bool, N> value, ushort mask)");
  5920. begin_scope();
  5921. if (msl_options.use_quadgroup_operation())
  5922. statement("return (vec<bool, N>)quad_shuffle_xor((vec<ushort, N>)value, mask);");
  5923. else
  5924. statement("return (vec<bool, N>)simd_shuffle_xor((vec<ushort, N>)value, mask);");
  5925. end_scope();
  5926. statement("");
  5927. break;
  5928. case SPVFuncImplSubgroupShuffleUp:
  5929. statement("template<typename T>");
  5930. statement("inline T spvSubgroupShuffleUp(T value, ushort delta)");
  5931. begin_scope();
  5932. if (msl_options.use_quadgroup_operation())
  5933. statement("return quad_shuffle_up(value, delta);");
  5934. else
  5935. statement("return simd_shuffle_up(value, delta);");
  5936. end_scope();
  5937. statement("");
  5938. statement("template<>");
  5939. statement("inline bool spvSubgroupShuffleUp(bool value, ushort delta)");
  5940. begin_scope();
  5941. if (msl_options.use_quadgroup_operation())
  5942. statement("return !!quad_shuffle_up((ushort)value, delta);");
  5943. else
  5944. statement("return !!simd_shuffle_up((ushort)value, delta);");
  5945. end_scope();
  5946. statement("");
  5947. statement("template<uint N>");
  5948. statement("inline vec<bool, N> spvSubgroupShuffleUp(vec<bool, N> value, ushort delta)");
  5949. begin_scope();
  5950. if (msl_options.use_quadgroup_operation())
  5951. statement("return (vec<bool, N>)quad_shuffle_up((vec<ushort, N>)value, delta);");
  5952. else
  5953. statement("return (vec<bool, N>)simd_shuffle_up((vec<ushort, N>)value, delta);");
  5954. end_scope();
  5955. statement("");
  5956. break;
  5957. case SPVFuncImplSubgroupShuffleDown:
  5958. statement("template<typename T>");
  5959. statement("inline T spvSubgroupShuffleDown(T value, ushort delta)");
  5960. begin_scope();
  5961. if (msl_options.use_quadgroup_operation())
  5962. statement("return quad_shuffle_down(value, delta);");
  5963. else
  5964. statement("return simd_shuffle_down(value, delta);");
  5965. end_scope();
  5966. statement("");
  5967. statement("template<>");
  5968. statement("inline bool spvSubgroupShuffleDown(bool value, ushort delta)");
  5969. begin_scope();
  5970. if (msl_options.use_quadgroup_operation())
  5971. statement("return !!quad_shuffle_down((ushort)value, delta);");
  5972. else
  5973. statement("return !!simd_shuffle_down((ushort)value, delta);");
  5974. end_scope();
  5975. statement("");
  5976. statement("template<uint N>");
  5977. statement("inline vec<bool, N> spvSubgroupShuffleDown(vec<bool, N> value, ushort delta)");
  5978. begin_scope();
  5979. if (msl_options.use_quadgroup_operation())
  5980. statement("return (vec<bool, N>)quad_shuffle_down((vec<ushort, N>)value, delta);");
  5981. else
  5982. statement("return (vec<bool, N>)simd_shuffle_down((vec<ushort, N>)value, delta);");
  5983. end_scope();
  5984. statement("");
  5985. break;
  5986. case SPVFuncImplQuadBroadcast:
  5987. statement("template<typename T>");
  5988. statement("inline T spvQuadBroadcast(T value, uint lane)");
  5989. begin_scope();
  5990. statement("return quad_broadcast(value, lane);");
  5991. end_scope();
  5992. statement("");
  5993. statement("template<>");
  5994. statement("inline bool spvQuadBroadcast(bool value, uint lane)");
  5995. begin_scope();
  5996. statement("return !!quad_broadcast((ushort)value, lane);");
  5997. end_scope();
  5998. statement("");
  5999. statement("template<uint N>");
  6000. statement("inline vec<bool, N> spvQuadBroadcast(vec<bool, N> value, uint lane)");
  6001. begin_scope();
  6002. statement("return (vec<bool, N>)quad_broadcast((vec<ushort, N>)value, lane);");
  6003. end_scope();
  6004. statement("");
  6005. break;
  6006. case SPVFuncImplQuadSwap:
  6007. // We can implement this easily based on the following table giving
  6008. // the target lane ID from the direction and current lane ID:
  6009. // Direction
  6010. // | 0 | 1 | 2 |
  6011. // ---+---+---+---+
  6012. // L 0 | 1 2 3
  6013. // a 1 | 0 3 2
  6014. // n 2 | 3 0 1
  6015. // e 3 | 2 1 0
  6016. // Notice that target = source ^ (direction + 1).
  6017. statement("template<typename T>");
  6018. statement("inline T spvQuadSwap(T value, uint dir)");
  6019. begin_scope();
  6020. statement("return quad_shuffle_xor(value, dir + 1);");
  6021. end_scope();
  6022. statement("");
  6023. statement("template<>");
  6024. statement("inline bool spvQuadSwap(bool value, uint dir)");
  6025. begin_scope();
  6026. statement("return !!quad_shuffle_xor((ushort)value, dir + 1);");
  6027. end_scope();
  6028. statement("");
  6029. statement("template<uint N>");
  6030. statement("inline vec<bool, N> spvQuadSwap(vec<bool, N> value, uint dir)");
  6031. begin_scope();
  6032. statement("return (vec<bool, N>)quad_shuffle_xor((vec<ushort, N>)value, dir + 1);");
  6033. end_scope();
  6034. statement("");
  6035. break;
  6036. case SPVFuncImplReflectScalar:
  6037. // Metal does not support scalar versions of these functions.
  6038. // Ensure fast-math is disabled to match Vulkan results.
  6039. statement("template<typename T>");
  6040. statement("[[clang::optnone]] T spvReflect(T i, T n)");
  6041. begin_scope();
  6042. statement("return i - T(2) * i * n * n;");
  6043. end_scope();
  6044. statement("");
  6045. break;
  6046. case SPVFuncImplRefractScalar:
  6047. // Metal does not support scalar versions of these functions.
  6048. statement("template<typename T>");
  6049. statement("inline T spvRefract(T i, T n, T eta)");
  6050. begin_scope();
  6051. statement("T NoI = n * i;");
  6052. statement("T NoI2 = NoI * NoI;");
  6053. statement("T k = T(1) - eta * eta * (T(1) - NoI2);");
  6054. statement("if (k < T(0))");
  6055. begin_scope();
  6056. statement("return T(0);");
  6057. end_scope();
  6058. statement("else");
  6059. begin_scope();
  6060. statement("return eta * i - (eta * NoI + sqrt(k)) * n;");
  6061. end_scope();
  6062. end_scope();
  6063. statement("");
  6064. break;
  6065. case SPVFuncImplFaceForwardScalar:
  6066. // Metal does not support scalar versions of these functions.
  6067. statement("template<typename T>");
  6068. statement("inline T spvFaceForward(T n, T i, T nref)");
  6069. begin_scope();
  6070. statement("return i * nref < T(0) ? n : -n;");
  6071. end_scope();
  6072. statement("");
  6073. break;
  6074. case SPVFuncImplChromaReconstructNearest2Plane:
  6075. statement("template<typename T, typename... LodOptions>");
  6076. statement("inline vec<T, 4> spvChromaReconstructNearest(texture2d<T> plane0, texture2d<T> plane1, sampler "
  6077. "samp, float2 coord, LodOptions... options)");
  6078. begin_scope();
  6079. statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
  6080. statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
  6081. statement("ycbcr.br = plane1.sample(samp, coord, spvForward<LodOptions>(options)...).rg;");
  6082. statement("return ycbcr;");
  6083. end_scope();
  6084. statement("");
  6085. break;
  6086. case SPVFuncImplChromaReconstructNearest3Plane:
  6087. statement("template<typename T, typename... LodOptions>");
  6088. statement("inline vec<T, 4> spvChromaReconstructNearest(texture2d<T> plane0, texture2d<T> plane1, "
  6089. "texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)");
  6090. begin_scope();
  6091. statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
  6092. statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
  6093. statement("ycbcr.b = plane1.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
  6094. statement("ycbcr.r = plane2.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
  6095. statement("return ycbcr;");
  6096. end_scope();
  6097. statement("");
  6098. break;
  6099. case SPVFuncImplChromaReconstructLinear422CositedEven2Plane:
  6100. statement("template<typename T, typename... LodOptions>");
  6101. statement("inline vec<T, 4> spvChromaReconstructLinear422CositedEven(texture2d<T> plane0, texture2d<T> "
  6102. "plane1, sampler samp, float2 coord, LodOptions... options)");
  6103. begin_scope();
  6104. statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
  6105. statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
  6106. statement("if (fract(coord.x * plane1.get_width()) != 0.0)");
  6107. begin_scope();
  6108. statement("ycbcr.br = vec<T, 2>(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
  6109. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), 0.5).rg);");
  6110. end_scope();
  6111. statement("else");
  6112. begin_scope();
  6113. statement("ycbcr.br = plane1.sample(samp, coord, spvForward<LodOptions>(options)...).rg;");
  6114. end_scope();
  6115. statement("return ycbcr;");
  6116. end_scope();
  6117. statement("");
  6118. break;
  6119. case SPVFuncImplChromaReconstructLinear422CositedEven3Plane:
  6120. statement("template<typename T, typename... LodOptions>");
  6121. statement("inline vec<T, 4> spvChromaReconstructLinear422CositedEven(texture2d<T> plane0, texture2d<T> "
  6122. "plane1, texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)");
  6123. begin_scope();
  6124. statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
  6125. statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
  6126. statement("if (fract(coord.x * plane1.get_width()) != 0.0)");
  6127. begin_scope();
  6128. statement("ycbcr.b = T(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
  6129. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), 0.5).r);");
  6130. statement("ycbcr.r = T(mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)...), "
  6131. "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), 0.5).r);");
  6132. end_scope();
  6133. statement("else");
  6134. begin_scope();
  6135. statement("ycbcr.b = plane1.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
  6136. statement("ycbcr.r = plane2.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
  6137. end_scope();
  6138. statement("return ycbcr;");
  6139. end_scope();
  6140. statement("");
  6141. break;
  6142. case SPVFuncImplChromaReconstructLinear422Midpoint2Plane:
  6143. statement("template<typename T, typename... LodOptions>");
  6144. statement("inline vec<T, 4> spvChromaReconstructLinear422Midpoint(texture2d<T> plane0, texture2d<T> "
  6145. "plane1, sampler samp, float2 coord, LodOptions... options)");
  6146. begin_scope();
  6147. statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
  6148. statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
  6149. statement("int2 offs = int2(fract(coord.x * plane1.get_width()) != 0.0 ? 1 : -1, 0);");
  6150. statement("ycbcr.br = vec<T, 2>(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
  6151. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., offs), 0.25).rg);");
  6152. statement("return ycbcr;");
  6153. end_scope();
  6154. statement("");
  6155. break;
  6156. case SPVFuncImplChromaReconstructLinear422Midpoint3Plane:
  6157. statement("template<typename T, typename... LodOptions>");
  6158. statement("inline vec<T, 4> spvChromaReconstructLinear422Midpoint(texture2d<T> plane0, texture2d<T> "
  6159. "plane1, texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)");
  6160. begin_scope();
  6161. statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
  6162. statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
  6163. statement("int2 offs = int2(fract(coord.x * plane1.get_width()) != 0.0 ? 1 : -1, 0);");
  6164. statement("ycbcr.b = T(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
  6165. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., offs), 0.25).r);");
  6166. statement("ycbcr.r = T(mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)...), "
  6167. "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., offs), 0.25).r);");
  6168. statement("return ycbcr;");
  6169. end_scope();
  6170. statement("");
  6171. break;
  6172. case SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven2Plane:
  6173. statement("template<typename T, typename... LodOptions>");
  6174. statement("inline vec<T, 4> spvChromaReconstructLinear420XCositedEvenYCositedEven(texture2d<T> plane0, "
  6175. "texture2d<T> plane1, sampler samp, float2 coord, LodOptions... options)");
  6176. begin_scope();
  6177. statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
  6178. statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
  6179. statement("float2 ab = fract(round(coord * float2(plane0.get_width(), plane0.get_height())) * 0.5);");
  6180. statement("ycbcr.br = vec<T, 2>(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
  6181. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
  6182. "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
  6183. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).rg);");
  6184. statement("return ycbcr;");
  6185. end_scope();
  6186. statement("");
  6187. break;
  6188. case SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven3Plane:
  6189. statement("template<typename T, typename... LodOptions>");
  6190. statement("inline vec<T, 4> spvChromaReconstructLinear420XCositedEvenYCositedEven(texture2d<T> plane0, "
  6191. "texture2d<T> plane1, texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)");
  6192. begin_scope();
  6193. statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
  6194. statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
  6195. statement("float2 ab = fract(round(coord * float2(plane0.get_width(), plane0.get_height())) * 0.5);");
  6196. statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
  6197. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
  6198. "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
  6199. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
  6200. statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)...), "
  6201. "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
  6202. "mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
  6203. "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
  6204. statement("return ycbcr;");
  6205. end_scope();
  6206. statement("");
  6207. break;
  6208. case SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven2Plane:
  6209. statement("template<typename T, typename... LodOptions>");
  6210. statement("inline vec<T, 4> spvChromaReconstructLinear420XMidpointYCositedEven(texture2d<T> plane0, "
  6211. "texture2d<T> plane1, sampler samp, float2 coord, LodOptions... options)");
  6212. begin_scope();
  6213. statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
  6214. statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
  6215. statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, "
  6216. "0)) * 0.5);");
  6217. statement("ycbcr.br = vec<T, 2>(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
  6218. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
  6219. "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
  6220. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).rg);");
  6221. statement("return ycbcr;");
  6222. end_scope();
  6223. statement("");
  6224. break;
  6225. case SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven3Plane:
  6226. statement("template<typename T, typename... LodOptions>");
  6227. statement("inline vec<T, 4> spvChromaReconstructLinear420XMidpointYCositedEven(texture2d<T> plane0, "
  6228. "texture2d<T> plane1, texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)");
  6229. begin_scope();
  6230. statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
  6231. statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
  6232. statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, "
  6233. "0)) * 0.5);");
  6234. statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
  6235. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
  6236. "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
  6237. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
  6238. statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)...), "
  6239. "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
  6240. "mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
  6241. "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
  6242. statement("return ycbcr;");
  6243. end_scope();
  6244. statement("");
  6245. break;
  6246. case SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint2Plane:
  6247. statement("template<typename T, typename... LodOptions>");
  6248. statement("inline vec<T, 4> spvChromaReconstructLinear420XCositedEvenYMidpoint(texture2d<T> plane0, "
  6249. "texture2d<T> plane1, sampler samp, float2 coord, LodOptions... options)");
  6250. begin_scope();
  6251. statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
  6252. statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
  6253. statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0, "
  6254. "0.5)) * 0.5);");
  6255. statement("ycbcr.br = vec<T, 2>(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
  6256. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
  6257. "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
  6258. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).rg);");
  6259. statement("return ycbcr;");
  6260. end_scope();
  6261. statement("");
  6262. break;
  6263. case SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint3Plane:
  6264. statement("template<typename T, typename... LodOptions>");
  6265. statement("inline vec<T, 4> spvChromaReconstructLinear420XCositedEvenYMidpoint(texture2d<T> plane0, "
  6266. "texture2d<T> plane1, texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)");
  6267. begin_scope();
  6268. statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
  6269. statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
  6270. statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0, "
  6271. "0.5)) * 0.5);");
  6272. statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
  6273. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
  6274. "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
  6275. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
  6276. statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)...), "
  6277. "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
  6278. "mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
  6279. "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
  6280. statement("return ycbcr;");
  6281. end_scope();
  6282. statement("");
  6283. break;
  6284. case SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint2Plane:
  6285. statement("template<typename T, typename... LodOptions>");
  6286. statement("inline vec<T, 4> spvChromaReconstructLinear420XMidpointYMidpoint(texture2d<T> plane0, "
  6287. "texture2d<T> plane1, sampler samp, float2 coord, LodOptions... options)");
  6288. begin_scope();
  6289. statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
  6290. statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
  6291. statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, "
  6292. "0.5)) * 0.5);");
  6293. statement("ycbcr.br = vec<T, 2>(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
  6294. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
  6295. "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
  6296. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).rg);");
  6297. statement("return ycbcr;");
  6298. end_scope();
  6299. statement("");
  6300. break;
  6301. case SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane:
  6302. statement("template<typename T, typename... LodOptions>");
  6303. statement("inline vec<T, 4> spvChromaReconstructLinear420XMidpointYMidpoint(texture2d<T> plane0, "
  6304. "texture2d<T> plane1, texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)");
  6305. begin_scope();
  6306. statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
  6307. statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
  6308. statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, "
  6309. "0.5)) * 0.5);");
  6310. statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
  6311. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
  6312. "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
  6313. "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
  6314. statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)...), "
  6315. "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
  6316. "mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
  6317. "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
  6318. statement("return ycbcr;");
  6319. end_scope();
  6320. statement("");
  6321. break;
  6322. case SPVFuncImplExpandITUFullRange:
  6323. statement("template<typename T>");
  6324. statement("inline vec<T, 4> spvExpandITUFullRange(vec<T, 4> ycbcr, int n)");
  6325. begin_scope();
  6326. statement("ycbcr.br -= exp2(T(n-1))/(exp2(T(n))-1);");
  6327. statement("return ycbcr;");
  6328. end_scope();
  6329. statement("");
  6330. break;
  6331. case SPVFuncImplExpandITUNarrowRange:
  6332. statement("template<typename T>");
  6333. statement("inline vec<T, 4> spvExpandITUNarrowRange(vec<T, 4> ycbcr, int n)");
  6334. begin_scope();
  6335. statement("ycbcr.g = (ycbcr.g * (exp2(T(n)) - 1) - ldexp(T(16), n - 8))/ldexp(T(219), n - 8);");
  6336. statement("ycbcr.br = (ycbcr.br * (exp2(T(n)) - 1) - ldexp(T(128), n - 8))/ldexp(T(224), n - 8);");
  6337. statement("return ycbcr;");
  6338. end_scope();
  6339. statement("");
  6340. break;
  6341. case SPVFuncImplConvertYCbCrBT709:
  6342. statement("// cf. Khronos Data Format Specification, section 15.1.1");
  6343. statement("constant float3x3 spvBT709Factors = {{1, 1, 1}, {0, -0.13397432/0.7152, 1.8556}, {1.5748, "
  6344. "-0.33480248/0.7152, 0}};");
  6345. statement("");
  6346. statement("template<typename T>");
  6347. statement("inline vec<T, 4> spvConvertYCbCrBT709(vec<T, 4> ycbcr)");
  6348. begin_scope();
  6349. statement("vec<T, 4> rgba;");
  6350. statement("rgba.rgb = vec<T, 3>(spvBT709Factors * ycbcr.gbr);");
  6351. statement("rgba.a = ycbcr.a;");
  6352. statement("return rgba;");
  6353. end_scope();
  6354. statement("");
  6355. break;
  6356. case SPVFuncImplConvertYCbCrBT601:
  6357. statement("// cf. Khronos Data Format Specification, section 15.1.2");
  6358. statement("constant float3x3 spvBT601Factors = {{1, 1, 1}, {0, -0.202008/0.587, 1.772}, {1.402, "
  6359. "-0.419198/0.587, 0}};");
  6360. statement("");
  6361. statement("template<typename T>");
  6362. statement("inline vec<T, 4> spvConvertYCbCrBT601(vec<T, 4> ycbcr)");
  6363. begin_scope();
  6364. statement("vec<T, 4> rgba;");
  6365. statement("rgba.rgb = vec<T, 3>(spvBT601Factors * ycbcr.gbr);");
  6366. statement("rgba.a = ycbcr.a;");
  6367. statement("return rgba;");
  6368. end_scope();
  6369. statement("");
  6370. break;
  6371. case SPVFuncImplConvertYCbCrBT2020:
  6372. statement("// cf. Khronos Data Format Specification, section 15.1.3");
  6373. statement("constant float3x3 spvBT2020Factors = {{1, 1, 1}, {0, -0.11156702/0.6780, 1.8814}, {1.4746, "
  6374. "-0.38737742/0.6780, 0}};");
  6375. statement("");
  6376. statement("template<typename T>");
  6377. statement("inline vec<T, 4> spvConvertYCbCrBT2020(vec<T, 4> ycbcr)");
  6378. begin_scope();
  6379. statement("vec<T, 4> rgba;");
  6380. statement("rgba.rgb = vec<T, 3>(spvBT2020Factors * ycbcr.gbr);");
  6381. statement("rgba.a = ycbcr.a;");
  6382. statement("return rgba;");
  6383. end_scope();
  6384. statement("");
  6385. break;
  6386. case SPVFuncImplDynamicImageSampler:
  6387. statement("enum class spvFormatResolution");
  6388. begin_scope();
  6389. statement("_444 = 0,");
  6390. statement("_422,");
  6391. statement("_420");
  6392. end_scope_decl();
  6393. statement("");
  6394. statement("enum class spvChromaFilter");
  6395. begin_scope();
  6396. statement("nearest = 0,");
  6397. statement("linear");
  6398. end_scope_decl();
  6399. statement("");
  6400. statement("enum class spvXChromaLocation");
  6401. begin_scope();
  6402. statement("cosited_even = 0,");
  6403. statement("midpoint");
  6404. end_scope_decl();
  6405. statement("");
  6406. statement("enum class spvYChromaLocation");
  6407. begin_scope();
  6408. statement("cosited_even = 0,");
  6409. statement("midpoint");
  6410. end_scope_decl();
  6411. statement("");
  6412. statement("enum class spvYCbCrModelConversion");
  6413. begin_scope();
  6414. statement("rgb_identity = 0,");
  6415. statement("ycbcr_identity,");
  6416. statement("ycbcr_bt_709,");
  6417. statement("ycbcr_bt_601,");
  6418. statement("ycbcr_bt_2020");
  6419. end_scope_decl();
  6420. statement("");
  6421. statement("enum class spvYCbCrRange");
  6422. begin_scope();
  6423. statement("itu_full = 0,");
  6424. statement("itu_narrow");
  6425. end_scope_decl();
  6426. statement("");
  6427. statement("struct spvComponentBits");
  6428. begin_scope();
  6429. statement("constexpr explicit spvComponentBits(int v) thread : value(v) {}");
  6430. statement("uchar value : 6;");
  6431. end_scope_decl();
  6432. statement("// A class corresponding to metal::sampler which holds sampler");
  6433. statement("// Y'CbCr conversion info.");
  6434. statement("struct spvYCbCrSampler");
  6435. begin_scope();
  6436. statement("constexpr spvYCbCrSampler() thread : val(build()) {}");
  6437. statement("template<typename... Ts>");
  6438. statement("constexpr spvYCbCrSampler(Ts... t) thread : val(build(t...)) {}");
  6439. statement("constexpr spvYCbCrSampler(const thread spvYCbCrSampler& s) thread = default;");
  6440. statement("");
  6441. statement("spvFormatResolution get_resolution() const thread");
  6442. begin_scope();
  6443. statement("return spvFormatResolution((val & resolution_mask) >> resolution_base);");
  6444. end_scope();
  6445. statement("spvChromaFilter get_chroma_filter() const thread");
  6446. begin_scope();
  6447. statement("return spvChromaFilter((val & chroma_filter_mask) >> chroma_filter_base);");
  6448. end_scope();
  6449. statement("spvXChromaLocation get_x_chroma_offset() const thread");
  6450. begin_scope();
  6451. statement("return spvXChromaLocation((val & x_chroma_off_mask) >> x_chroma_off_base);");
  6452. end_scope();
  6453. statement("spvYChromaLocation get_y_chroma_offset() const thread");
  6454. begin_scope();
  6455. statement("return spvYChromaLocation((val & y_chroma_off_mask) >> y_chroma_off_base);");
  6456. end_scope();
  6457. statement("spvYCbCrModelConversion get_ycbcr_model() const thread");
  6458. begin_scope();
  6459. statement("return spvYCbCrModelConversion((val & ycbcr_model_mask) >> ycbcr_model_base);");
  6460. end_scope();
  6461. statement("spvYCbCrRange get_ycbcr_range() const thread");
  6462. begin_scope();
  6463. statement("return spvYCbCrRange((val & ycbcr_range_mask) >> ycbcr_range_base);");
  6464. end_scope();
  6465. statement("int get_bpc() const thread { return (val & bpc_mask) >> bpc_base; }");
  6466. statement("");
  6467. statement("private:");
  6468. statement("ushort val;");
  6469. statement("");
  6470. statement("constexpr static constant ushort resolution_bits = 2;");
  6471. statement("constexpr static constant ushort chroma_filter_bits = 2;");
  6472. statement("constexpr static constant ushort x_chroma_off_bit = 1;");
  6473. statement("constexpr static constant ushort y_chroma_off_bit = 1;");
  6474. statement("constexpr static constant ushort ycbcr_model_bits = 3;");
  6475. statement("constexpr static constant ushort ycbcr_range_bit = 1;");
  6476. statement("constexpr static constant ushort bpc_bits = 6;");
  6477. statement("");
  6478. statement("constexpr static constant ushort resolution_base = 0;");
  6479. statement("constexpr static constant ushort chroma_filter_base = 2;");
  6480. statement("constexpr static constant ushort x_chroma_off_base = 4;");
  6481. statement("constexpr static constant ushort y_chroma_off_base = 5;");
  6482. statement("constexpr static constant ushort ycbcr_model_base = 6;");
  6483. statement("constexpr static constant ushort ycbcr_range_base = 9;");
  6484. statement("constexpr static constant ushort bpc_base = 10;");
  6485. statement("");
  6486. statement(
  6487. "constexpr static constant ushort resolution_mask = ((1 << resolution_bits) - 1) << resolution_base;");
  6488. statement("constexpr static constant ushort chroma_filter_mask = ((1 << chroma_filter_bits) - 1) << "
  6489. "chroma_filter_base;");
  6490. statement("constexpr static constant ushort x_chroma_off_mask = ((1 << x_chroma_off_bit) - 1) << "
  6491. "x_chroma_off_base;");
  6492. statement("constexpr static constant ushort y_chroma_off_mask = ((1 << y_chroma_off_bit) - 1) << "
  6493. "y_chroma_off_base;");
  6494. statement("constexpr static constant ushort ycbcr_model_mask = ((1 << ycbcr_model_bits) - 1) << "
  6495. "ycbcr_model_base;");
  6496. statement("constexpr static constant ushort ycbcr_range_mask = ((1 << ycbcr_range_bit) - 1) << "
  6497. "ycbcr_range_base;");
  6498. statement("constexpr static constant ushort bpc_mask = ((1 << bpc_bits) - 1) << bpc_base;");
  6499. statement("");
  6500. statement("static constexpr ushort build()");
  6501. begin_scope();
  6502. statement("return 0;");
  6503. end_scope();
  6504. statement("");
  6505. statement("template<typename... Ts>");
  6506. statement("static constexpr ushort build(spvFormatResolution res, Ts... t)");
  6507. begin_scope();
  6508. statement("return (ushort(res) << resolution_base) | (build(t...) & ~resolution_mask);");
  6509. end_scope();
  6510. statement("");
  6511. statement("template<typename... Ts>");
  6512. statement("static constexpr ushort build(spvChromaFilter filt, Ts... t)");
  6513. begin_scope();
  6514. statement("return (ushort(filt) << chroma_filter_base) | (build(t...) & ~chroma_filter_mask);");
  6515. end_scope();
  6516. statement("");
  6517. statement("template<typename... Ts>");
  6518. statement("static constexpr ushort build(spvXChromaLocation loc, Ts... t)");
  6519. begin_scope();
  6520. statement("return (ushort(loc) << x_chroma_off_base) | (build(t...) & ~x_chroma_off_mask);");
  6521. end_scope();
  6522. statement("");
  6523. statement("template<typename... Ts>");
  6524. statement("static constexpr ushort build(spvYChromaLocation loc, Ts... t)");
  6525. begin_scope();
  6526. statement("return (ushort(loc) << y_chroma_off_base) | (build(t...) & ~y_chroma_off_mask);");
  6527. end_scope();
  6528. statement("");
  6529. statement("template<typename... Ts>");
  6530. statement("static constexpr ushort build(spvYCbCrModelConversion model, Ts... t)");
  6531. begin_scope();
  6532. statement("return (ushort(model) << ycbcr_model_base) | (build(t...) & ~ycbcr_model_mask);");
  6533. end_scope();
  6534. statement("");
  6535. statement("template<typename... Ts>");
  6536. statement("static constexpr ushort build(spvYCbCrRange range, Ts... t)");
  6537. begin_scope();
  6538. statement("return (ushort(range) << ycbcr_range_base) | (build(t...) & ~ycbcr_range_mask);");
  6539. end_scope();
  6540. statement("");
  6541. statement("template<typename... Ts>");
  6542. statement("static constexpr ushort build(spvComponentBits bpc, Ts... t)");
  6543. begin_scope();
  6544. statement("return (ushort(bpc.value) << bpc_base) | (build(t...) & ~bpc_mask);");
  6545. end_scope();
  6546. end_scope_decl();
  6547. statement("");
  6548. statement("// A class which can hold up to three textures and a sampler, including");
  6549. statement("// Y'CbCr conversion info, used to pass combined image-samplers");
  6550. statement("// dynamically to functions.");
  6551. statement("template<typename T>");
  6552. statement("struct spvDynamicImageSampler");
  6553. begin_scope();
  6554. statement("texture2d<T> plane0;");
  6555. statement("texture2d<T> plane1;");
  6556. statement("texture2d<T> plane2;");
  6557. statement("sampler samp;");
  6558. statement("spvYCbCrSampler ycbcr_samp;");
  6559. statement("uint swizzle = 0;");
  6560. statement("");
  6561. if (msl_options.swizzle_texture_samples)
  6562. {
  6563. statement("constexpr spvDynamicImageSampler(texture2d<T> tex, sampler samp, uint sw) thread :");
  6564. statement(" plane0(tex), samp(samp), swizzle(sw) {}");
  6565. }
  6566. else
  6567. {
  6568. statement("constexpr spvDynamicImageSampler(texture2d<T> tex, sampler samp) thread :");
  6569. statement(" plane0(tex), samp(samp) {}");
  6570. }
  6571. statement("constexpr spvDynamicImageSampler(texture2d<T> tex, sampler samp, spvYCbCrSampler ycbcr_samp, "
  6572. "uint sw) thread :");
  6573. statement(" plane0(tex), samp(samp), ycbcr_samp(ycbcr_samp), swizzle(sw) {}");
  6574. statement("constexpr spvDynamicImageSampler(texture2d<T> plane0, texture2d<T> plane1,");
  6575. statement(" sampler samp, spvYCbCrSampler ycbcr_samp, uint sw) thread :");
  6576. statement(" plane0(plane0), plane1(plane1), samp(samp), ycbcr_samp(ycbcr_samp), swizzle(sw) {}");
  6577. statement(
  6578. "constexpr spvDynamicImageSampler(texture2d<T> plane0, texture2d<T> plane1, texture2d<T> plane2,");
  6579. statement(" sampler samp, spvYCbCrSampler ycbcr_samp, uint sw) thread :");
  6580. statement(" plane0(plane0), plane1(plane1), plane2(plane2), samp(samp), ycbcr_samp(ycbcr_samp), "
  6581. "swizzle(sw) {}");
  6582. statement("");
  6583. // XXX This is really hard to follow... I've left comments to make it a bit easier.
  6584. statement("template<typename... LodOptions>");
  6585. statement("vec<T, 4> do_sample(float2 coord, LodOptions... options) const thread");
  6586. begin_scope();
  6587. statement("if (!is_null_texture(plane1))");
  6588. begin_scope();
  6589. statement("if (ycbcr_samp.get_resolution() == spvFormatResolution::_444 ||");
  6590. statement(" ycbcr_samp.get_chroma_filter() == spvChromaFilter::nearest)");
  6591. begin_scope();
  6592. statement("if (!is_null_texture(plane2))");
  6593. statement(" return spvChromaReconstructNearest(plane0, plane1, plane2, samp, coord,");
  6594. statement(" spvForward<LodOptions>(options)...);");
  6595. statement(
  6596. "return spvChromaReconstructNearest(plane0, plane1, samp, coord, spvForward<LodOptions>(options)...);");
  6597. end_scope(); // if (resolution == 422 || chroma_filter == nearest)
  6598. statement("switch (ycbcr_samp.get_resolution())");
  6599. begin_scope();
  6600. statement("case spvFormatResolution::_444: break;");
  6601. statement("case spvFormatResolution::_422:");
  6602. begin_scope();
  6603. statement("switch (ycbcr_samp.get_x_chroma_offset())");
  6604. begin_scope();
  6605. statement("case spvXChromaLocation::cosited_even:");
  6606. statement(" if (!is_null_texture(plane2))");
  6607. statement(" return spvChromaReconstructLinear422CositedEven(");
  6608. statement(" plane0, plane1, plane2, samp,");
  6609. statement(" coord, spvForward<LodOptions>(options)...);");
  6610. statement(" return spvChromaReconstructLinear422CositedEven(");
  6611. statement(" plane0, plane1, samp, coord,");
  6612. statement(" spvForward<LodOptions>(options)...);");
  6613. statement("case spvXChromaLocation::midpoint:");
  6614. statement(" if (!is_null_texture(plane2))");
  6615. statement(" return spvChromaReconstructLinear422Midpoint(");
  6616. statement(" plane0, plane1, plane2, samp,");
  6617. statement(" coord, spvForward<LodOptions>(options)...);");
  6618. statement(" return spvChromaReconstructLinear422Midpoint(");
  6619. statement(" plane0, plane1, samp, coord,");
  6620. statement(" spvForward<LodOptions>(options)...);");
  6621. end_scope(); // switch (x_chroma_offset)
  6622. end_scope(); // case 422:
  6623. statement("case spvFormatResolution::_420:");
  6624. begin_scope();
  6625. statement("switch (ycbcr_samp.get_x_chroma_offset())");
  6626. begin_scope();
  6627. statement("case spvXChromaLocation::cosited_even:");
  6628. begin_scope();
  6629. statement("switch (ycbcr_samp.get_y_chroma_offset())");
  6630. begin_scope();
  6631. statement("case spvYChromaLocation::cosited_even:");
  6632. statement(" if (!is_null_texture(plane2))");
  6633. statement(" return spvChromaReconstructLinear420XCositedEvenYCositedEven(");
  6634. statement(" plane0, plane1, plane2, samp,");
  6635. statement(" coord, spvForward<LodOptions>(options)...);");
  6636. statement(" return spvChromaReconstructLinear420XCositedEvenYCositedEven(");
  6637. statement(" plane0, plane1, samp, coord,");
  6638. statement(" spvForward<LodOptions>(options)...);");
  6639. statement("case spvYChromaLocation::midpoint:");
  6640. statement(" if (!is_null_texture(plane2))");
  6641. statement(" return spvChromaReconstructLinear420XCositedEvenYMidpoint(");
  6642. statement(" plane0, plane1, plane2, samp,");
  6643. statement(" coord, spvForward<LodOptions>(options)...);");
  6644. statement(" return spvChromaReconstructLinear420XCositedEvenYMidpoint(");
  6645. statement(" plane0, plane1, samp, coord,");
  6646. statement(" spvForward<LodOptions>(options)...);");
  6647. end_scope(); // switch (y_chroma_offset)
  6648. end_scope(); // case x::cosited_even:
  6649. statement("case spvXChromaLocation::midpoint:");
  6650. begin_scope();
  6651. statement("switch (ycbcr_samp.get_y_chroma_offset())");
  6652. begin_scope();
  6653. statement("case spvYChromaLocation::cosited_even:");
  6654. statement(" if (!is_null_texture(plane2))");
  6655. statement(" return spvChromaReconstructLinear420XMidpointYCositedEven(");
  6656. statement(" plane0, plane1, plane2, samp,");
  6657. statement(" coord, spvForward<LodOptions>(options)...);");
  6658. statement(" return spvChromaReconstructLinear420XMidpointYCositedEven(");
  6659. statement(" plane0, plane1, samp, coord,");
  6660. statement(" spvForward<LodOptions>(options)...);");
  6661. statement("case spvYChromaLocation::midpoint:");
  6662. statement(" if (!is_null_texture(plane2))");
  6663. statement(" return spvChromaReconstructLinear420XMidpointYMidpoint(");
  6664. statement(" plane0, plane1, plane2, samp,");
  6665. statement(" coord, spvForward<LodOptions>(options)...);");
  6666. statement(" return spvChromaReconstructLinear420XMidpointYMidpoint(");
  6667. statement(" plane0, plane1, samp, coord,");
  6668. statement(" spvForward<LodOptions>(options)...);");
  6669. end_scope(); // switch (y_chroma_offset)
  6670. end_scope(); // case x::midpoint
  6671. end_scope(); // switch (x_chroma_offset)
  6672. end_scope(); // case 420:
  6673. end_scope(); // switch (resolution)
  6674. end_scope(); // if (multiplanar)
  6675. statement("return plane0.sample(samp, coord, spvForward<LodOptions>(options)...);");
  6676. end_scope(); // do_sample()
  6677. statement("template <typename... LodOptions>");
  6678. statement("vec<T, 4> sample(float2 coord, LodOptions... options) const thread");
  6679. begin_scope();
  6680. statement(
  6681. "vec<T, 4> s = spvTextureSwizzle(do_sample(coord, spvForward<LodOptions>(options)...), swizzle);");
  6682. statement("if (ycbcr_samp.get_ycbcr_model() == spvYCbCrModelConversion::rgb_identity)");
  6683. statement(" return s;");
  6684. statement("");
  6685. statement("switch (ycbcr_samp.get_ycbcr_range())");
  6686. begin_scope();
  6687. statement("case spvYCbCrRange::itu_full:");
  6688. statement(" s = spvExpandITUFullRange(s, ycbcr_samp.get_bpc());");
  6689. statement(" break;");
  6690. statement("case spvYCbCrRange::itu_narrow:");
  6691. statement(" s = spvExpandITUNarrowRange(s, ycbcr_samp.get_bpc());");
  6692. statement(" break;");
  6693. end_scope();
  6694. statement("");
  6695. statement("switch (ycbcr_samp.get_ycbcr_model())");
  6696. begin_scope();
  6697. statement("case spvYCbCrModelConversion::rgb_identity:"); // Silence Clang warning
  6698. statement("case spvYCbCrModelConversion::ycbcr_identity:");
  6699. statement(" return s;");
  6700. statement("case spvYCbCrModelConversion::ycbcr_bt_709:");
  6701. statement(" return spvConvertYCbCrBT709(s);");
  6702. statement("case spvYCbCrModelConversion::ycbcr_bt_601:");
  6703. statement(" return spvConvertYCbCrBT601(s);");
  6704. statement("case spvYCbCrModelConversion::ycbcr_bt_2020:");
  6705. statement(" return spvConvertYCbCrBT2020(s);");
  6706. end_scope();
  6707. end_scope();
  6708. statement("");
  6709. // Sampler Y'CbCr conversion forbids offsets.
  6710. statement("vec<T, 4> sample(float2 coord, int2 offset) const thread");
  6711. begin_scope();
  6712. if (msl_options.swizzle_texture_samples)
  6713. statement("return spvTextureSwizzle(plane0.sample(samp, coord, offset), swizzle);");
  6714. else
  6715. statement("return plane0.sample(samp, coord, offset);");
  6716. end_scope();
  6717. statement("template<typename lod_options>");
  6718. statement("vec<T, 4> sample(float2 coord, lod_options options, int2 offset) const thread");
  6719. begin_scope();
  6720. if (msl_options.swizzle_texture_samples)
  6721. statement("return spvTextureSwizzle(plane0.sample(samp, coord, options, offset), swizzle);");
  6722. else
  6723. statement("return plane0.sample(samp, coord, options, offset);");
  6724. end_scope();
  6725. statement("#if __HAVE_MIN_LOD_CLAMP__");
  6726. statement("vec<T, 4> sample(float2 coord, bias b, min_lod_clamp min_lod, int2 offset) const thread");
  6727. begin_scope();
  6728. statement("return plane0.sample(samp, coord, b, min_lod, offset);");
  6729. end_scope();
  6730. statement(
  6731. "vec<T, 4> sample(float2 coord, gradient2d grad, min_lod_clamp min_lod, int2 offset) const thread");
  6732. begin_scope();
  6733. statement("return plane0.sample(samp, coord, grad, min_lod, offset);");
  6734. end_scope();
  6735. statement("#endif");
  6736. statement("");
  6737. // Y'CbCr conversion forbids all operations but sampling.
  6738. statement("vec<T, 4> read(uint2 coord, uint lod = 0) const thread");
  6739. begin_scope();
  6740. statement("return plane0.read(coord, lod);");
  6741. end_scope();
  6742. statement("");
  6743. statement("vec<T, 4> gather(float2 coord, int2 offset = int2(0), component c = component::x) const thread");
  6744. begin_scope();
  6745. if (msl_options.swizzle_texture_samples)
  6746. statement("return spvGatherSwizzle(plane0, samp, swizzle, c, coord, offset);");
  6747. else
  6748. statement("return plane0.gather(samp, coord, offset, c);");
  6749. end_scope();
  6750. end_scope_decl();
  6751. statement("");
  6752. break;
  6753. case SPVFuncImplRayQueryIntersectionParams:
  6754. statement("intersection_params spvMakeIntersectionParams(uint flags)");
  6755. begin_scope();
  6756. statement("intersection_params ip;");
  6757. statement("if ((flags & ", RayFlagsOpaqueKHRMask, ") != 0)");
  6758. statement(" ip.force_opacity(forced_opacity::opaque);");
  6759. statement("if ((flags & ", RayFlagsNoOpaqueKHRMask, ") != 0)");
  6760. statement(" ip.force_opacity(forced_opacity::non_opaque);");
  6761. statement("if ((flags & ", RayFlagsTerminateOnFirstHitKHRMask, ") != 0)");
  6762. statement(" ip.accept_any_intersection(true);");
  6763. // RayFlagsSkipClosestHitShaderKHRMask is not available in MSL
  6764. statement("if ((flags & ", RayFlagsCullBackFacingTrianglesKHRMask, ") != 0)");
  6765. statement(" ip.set_triangle_cull_mode(triangle_cull_mode::back);");
  6766. statement("if ((flags & ", RayFlagsCullFrontFacingTrianglesKHRMask, ") != 0)");
  6767. statement(" ip.set_triangle_cull_mode(triangle_cull_mode::front);");
  6768. statement("if ((flags & ", RayFlagsCullOpaqueKHRMask, ") != 0)");
  6769. statement(" ip.set_opacity_cull_mode(opacity_cull_mode::opaque);");
  6770. statement("if ((flags & ", RayFlagsCullNoOpaqueKHRMask, ") != 0)");
  6771. statement(" ip.set_opacity_cull_mode(opacity_cull_mode::non_opaque);");
  6772. statement("if ((flags & ", RayFlagsSkipTrianglesKHRMask, ") != 0)");
  6773. statement(" ip.set_geometry_cull_mode(geometry_cull_mode::triangle);");
  6774. statement("if ((flags & ", RayFlagsSkipAABBsKHRMask, ") != 0)");
  6775. statement(" ip.set_geometry_cull_mode(geometry_cull_mode::bounding_box);");
  6776. statement("return ip;");
  6777. end_scope();
  6778. statement("");
  6779. break;
  6780. case SPVFuncImplVariableDescriptor:
  6781. statement("template<typename T>");
  6782. statement("struct spvDescriptor");
  6783. begin_scope();
  6784. statement("T value;");
  6785. end_scope_decl();
  6786. statement("");
  6787. break;
  6788. case SPVFuncImplVariableSizedDescriptor:
  6789. statement("template<typename T>");
  6790. statement("struct spvBufferDescriptor");
  6791. begin_scope();
  6792. statement("T value;");
  6793. statement("int length;");
  6794. statement("const device T& operator -> () const device");
  6795. begin_scope();
  6796. statement("return value;");
  6797. end_scope();
  6798. statement("const device T& operator * () const device");
  6799. begin_scope();
  6800. statement("return value;");
  6801. end_scope();
  6802. end_scope_decl();
  6803. statement("");
  6804. break;
  6805. case SPVFuncImplVariableDescriptorArray:
  6806. if (spv_function_implementations.count(SPVFuncImplVariableDescriptor) != 0)
  6807. {
  6808. statement("template<typename T>");
  6809. statement("struct spvDescriptorArray");
  6810. begin_scope();
  6811. statement("spvDescriptorArray(const device spvDescriptor<T>* ptr) : ptr(&ptr->value)");
  6812. begin_scope();
  6813. end_scope();
  6814. statement("const device T& operator [] (size_t i) const");
  6815. begin_scope();
  6816. statement("return ptr[i];");
  6817. end_scope();
  6818. statement("const device T* ptr;");
  6819. end_scope_decl();
  6820. statement("");
  6821. }
  6822. else
  6823. {
  6824. statement("template<typename T>");
  6825. statement("struct spvDescriptorArray;");
  6826. statement("");
  6827. }
  6828. if (msl_options.runtime_array_rich_descriptor &&
  6829. spv_function_implementations.count(SPVFuncImplVariableSizedDescriptor) != 0)
  6830. {
  6831. statement("template<typename T>");
  6832. statement("struct spvDescriptorArray<device T*>");
  6833. begin_scope();
  6834. statement("spvDescriptorArray(const device spvBufferDescriptor<device T*>* ptr) : ptr(ptr)");
  6835. begin_scope();
  6836. end_scope();
  6837. statement("const device T* operator [] (size_t i) const");
  6838. begin_scope();
  6839. statement("return ptr[i].value;");
  6840. end_scope();
  6841. statement("const int length(int i) const");
  6842. begin_scope();
  6843. statement("return ptr[i].length;");
  6844. end_scope();
  6845. statement("const device spvBufferDescriptor<device T*>* ptr;");
  6846. end_scope_decl();
  6847. statement("");
  6848. }
  6849. break;
  6850. case SPVFuncImplPaddedStd140:
  6851. // .data is used in access chain.
  6852. statement("template <typename T>");
  6853. statement("struct spvPaddedStd140 { alignas(16) T data; };");
  6854. statement("template <typename T, int n>");
  6855. statement("using spvPaddedStd140Matrix = spvPaddedStd140<T>[n];");
  6856. statement("");
  6857. break;
  6858. case SPVFuncImplReduceAdd:
  6859. // Metal doesn't support __builtin_reduce_add or simd_reduce_add, so we need this.
  6860. // Metal also doesn't support the other vector builtins, which would have been useful to make this a single template.
  6861. statement("template <typename T>");
  6862. statement("T reduce_add(vec<T, 2> v) { return v.x + v.y; }");
  6863. statement("template <typename T>");
  6864. statement("T reduce_add(vec<T, 3> v) { return v.x + v.y + v.z; }");
  6865. statement("template <typename T>");
  6866. statement("T reduce_add(vec<T, 4> v) { return v.x + v.y + v.z + v.w; }");
  6867. statement("");
  6868. break;
  6869. case SPVFuncImplImageFence:
  6870. statement("template <typename ImageT>");
  6871. statement("void spvImageFence(ImageT img) { img.fence(); }");
  6872. statement("");
  6873. break;
  6874. case SPVFuncImplTextureCast:
  6875. statement("template <typename T, typename U>");
  6876. statement("T spvTextureCast(U img)");
  6877. begin_scope();
  6878. // MSL complains if you try to cast the texture itself, but casting the reference type is ... ok? *shrug*
  6879. // Gotta go what you gotta do I suppose.
  6880. statement("return reinterpret_cast<thread const T &>(img);");
  6881. end_scope();
  6882. statement("");
  6883. break;
  6884. default:
  6885. break;
  6886. }
  6887. }
  6888. }
  6889. static string inject_top_level_storage_qualifier(const string &expr, const string &qualifier)
  6890. {
  6891. // Easier to do this through text munging since the qualifier does not exist in the type system at all,
  6892. // and plumbing in all that information is not very helpful.
  6893. size_t last_reference = expr.find_last_of('&');
  6894. size_t last_pointer = expr.find_last_of('*');
  6895. size_t last_significant = string::npos;
  6896. if (last_reference == string::npos)
  6897. last_significant = last_pointer;
  6898. else if (last_pointer == string::npos)
  6899. last_significant = last_reference;
  6900. else
  6901. last_significant = max<size_t>(last_reference, last_pointer);
  6902. if (last_significant == string::npos)
  6903. return join(qualifier, " ", expr);
  6904. else
  6905. {
  6906. return join(expr.substr(0, last_significant + 1), " ",
  6907. qualifier, expr.substr(last_significant + 1, string::npos));
  6908. }
  6909. }
  6910. void CompilerMSL::declare_constant_arrays()
  6911. {
  6912. bool fully_inlined = ir.ids_for_type[TypeFunction].size() == 1;
  6913. // MSL cannot declare arrays inline (except when declaring a variable), so we must move them out to
  6914. // global constants directly, so we are able to use constants as variable expressions.
  6915. bool emitted = false;
  6916. ir.for_each_typed_id<SPIRConstant>([&](uint32_t, SPIRConstant &c) {
  6917. if (c.specialization)
  6918. return;
  6919. auto &type = this->get<SPIRType>(c.constant_type);
  6920. // Constant arrays of non-primitive types (i.e. matrices) won't link properly into Metal libraries.
  6921. // FIXME: However, hoisting constants to main() means we need to pass down constant arrays to leaf functions if they are used there.
  6922. // If there are multiple functions in the module, drop this case to avoid breaking use cases which do not need to
  6923. // link into Metal libraries. This is hacky.
  6924. if (is_array(type) && (!fully_inlined || is_scalar(type) || is_vector(type)))
  6925. {
  6926. add_resource_name(c.self);
  6927. auto name = to_name(c.self);
  6928. statement(inject_top_level_storage_qualifier(variable_decl(type, name), "constant"),
  6929. " = ", constant_expression(c), ";");
  6930. emitted = true;
  6931. }
  6932. });
  6933. if (emitted)
  6934. statement("");
  6935. }
  6936. // Constant arrays of non-primitive types (i.e. matrices) won't link properly into Metal libraries
  6937. void CompilerMSL::declare_complex_constant_arrays()
  6938. {
  6939. // If we do not have a fully inlined module, we did not opt in to
  6940. // declaring constant arrays of complex types. See CompilerMSL::declare_constant_arrays().
  6941. bool fully_inlined = ir.ids_for_type[TypeFunction].size() == 1;
  6942. if (!fully_inlined)
  6943. return;
  6944. // MSL cannot declare arrays inline (except when declaring a variable), so we must move them out to
  6945. // global constants directly, so we are able to use constants as variable expressions.
  6946. bool emitted = false;
  6947. ir.for_each_typed_id<SPIRConstant>([&](uint32_t, SPIRConstant &c) {
  6948. if (c.specialization)
  6949. return;
  6950. auto &type = this->get<SPIRType>(c.constant_type);
  6951. if (is_array(type) && !(is_scalar(type) || is_vector(type)))
  6952. {
  6953. add_resource_name(c.self);
  6954. auto name = to_name(c.self);
  6955. statement("", variable_decl(type, name), " = ", constant_expression(c), ";");
  6956. emitted = true;
  6957. }
  6958. });
  6959. if (emitted)
  6960. statement("");
  6961. }
  6962. void CompilerMSL::emit_resources()
  6963. {
  6964. declare_constant_arrays();
  6965. // Emit the special [[stage_in]] and [[stage_out]] interface blocks which we created.
  6966. emit_interface_block(stage_out_var_id);
  6967. emit_interface_block(patch_stage_out_var_id);
  6968. emit_interface_block(stage_in_var_id);
  6969. emit_interface_block(patch_stage_in_var_id);
  6970. }
  6971. // Emit declarations for the specialization Metal function constants
  6972. void CompilerMSL::emit_specialization_constants_and_structs()
  6973. {
  6974. SpecializationConstant wg_x, wg_y, wg_z;
  6975. ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
  6976. bool emitted = false;
  6977. unordered_set<uint32_t> declared_structs;
  6978. unordered_set<uint32_t> aligned_structs;
  6979. // First, we need to deal with scalar block layout.
  6980. // It is possible that a struct may have to be placed at an alignment which does not match the innate alignment of the struct itself.
  6981. // In that case, if such a case exists for a struct, we must force that all elements of the struct become packed_ types.
  6982. // This makes the struct alignment as small as physically possible.
  6983. // When we actually align the struct later, we can insert padding as necessary to make the packed members behave like normally aligned types.
  6984. ir.for_each_typed_id<SPIRType>([&](uint32_t type_id, const SPIRType &type) {
  6985. if (type.basetype == SPIRType::Struct &&
  6986. has_extended_decoration(type_id, SPIRVCrossDecorationBufferBlockRepacked))
  6987. mark_scalar_layout_structs(type);
  6988. });
  6989. bool builtin_block_type_is_required = false;
  6990. // Very special case. If gl_PerVertex is initialized as an array (tessellation)
  6991. // we have to potentially emit the gl_PerVertex struct type so that we can emit a constant LUT.
  6992. ir.for_each_typed_id<SPIRConstant>([&](uint32_t, SPIRConstant &c) {
  6993. auto &type = this->get<SPIRType>(c.constant_type);
  6994. if (is_array(type) && has_decoration(type.self, DecorationBlock) && is_builtin_type(type))
  6995. builtin_block_type_is_required = true;
  6996. });
  6997. // Very particular use of the soft loop lock.
  6998. // align_struct may need to create custom types on the fly, but we don't care about
  6999. // these types for purpose of iterating over them in ir.ids_for_type and friends.
  7000. auto loop_lock = ir.create_loop_soft_lock();
  7001. // Physical storage buffer pointers can have cyclical references,
  7002. // so emit forward declarations of them before other structs.
  7003. // Ignore type_id because we want the underlying struct type from the pointer.
  7004. ir.for_each_typed_id<SPIRType>([&](uint32_t /* type_id */, const SPIRType &type) {
  7005. if (type.basetype == SPIRType::Struct &&
  7006. type.pointer && type.storage == StorageClassPhysicalStorageBuffer &&
  7007. declared_structs.count(type.self) == 0)
  7008. {
  7009. statement("struct ", to_name(type.self), ";");
  7010. declared_structs.insert(type.self);
  7011. emitted = true;
  7012. }
  7013. });
  7014. if (emitted)
  7015. statement("");
  7016. emitted = false;
  7017. declared_structs.clear();
  7018. // It is possible to have multiple spec constants that use the same spec constant ID.
  7019. // The most common cause of this is defining spec constants in GLSL while also declaring
  7020. // the workgroup size to use those spec constants. But, Metal forbids declaring more than
  7021. // one variable with the same function constant ID.
  7022. // In this case, we must only declare one variable with the [[function_constant(id)]]
  7023. // attribute, and use its initializer to initialize all the spec constants with
  7024. // that ID.
  7025. std::unordered_map<uint32_t, ConstantID> unique_func_constants;
  7026. for (auto &id_ : ir.ids_for_constant_undef_or_type)
  7027. {
  7028. auto &id = ir.ids[id_];
  7029. if (id.get_type() == TypeConstant)
  7030. {
  7031. auto &c = id.get<SPIRConstant>();
  7032. if (c.self == workgroup_size_id)
  7033. {
  7034. // TODO: This can be expressed as a [[threads_per_threadgroup]] input semantic, but we need to know
  7035. // the work group size at compile time in SPIR-V, and [[threads_per_threadgroup]] would need to be passed around as a global.
  7036. // The work group size may be a specialization constant.
  7037. statement("constant uint3 ", builtin_to_glsl(BuiltInWorkgroupSize, StorageClassWorkgroup),
  7038. " [[maybe_unused]] = ", constant_expression(get<SPIRConstant>(workgroup_size_id)), ";");
  7039. emitted = true;
  7040. }
  7041. else if (c.specialization)
  7042. {
  7043. auto &type = get<SPIRType>(c.constant_type);
  7044. string sc_type_name = type_to_glsl(type);
  7045. add_resource_name(c.self);
  7046. string sc_name = to_name(c.self);
  7047. // Function constants are only supported in MSL 1.2 and later.
  7048. // If we don't support it just declare the "default" directly.
  7049. // This "default" value can be overridden to the true specialization constant by the API user.
  7050. // Specialization constants which are used as array length expressions cannot be function constants in MSL,
  7051. // so just fall back to macros.
  7052. if (msl_options.supports_msl_version(1, 2) && has_decoration(c.self, DecorationSpecId) &&
  7053. !c.is_used_as_array_length)
  7054. {
  7055. // Only scalar, non-composite values can be function constants.
  7056. uint32_t constant_id = get_decoration(c.self, DecorationSpecId);
  7057. if (!unique_func_constants.count(constant_id))
  7058. unique_func_constants.insert(make_pair(constant_id, c.self));
  7059. SPIRType::BaseType sc_tmp_type = expression_type(unique_func_constants[constant_id]).basetype;
  7060. string sc_tmp_name = to_name(unique_func_constants[constant_id]) + "_tmp";
  7061. if (unique_func_constants[constant_id] == c.self)
  7062. statement("constant ", sc_type_name, " ", sc_tmp_name, " [[function_constant(", constant_id,
  7063. ")]];");
  7064. statement("constant ", sc_type_name, " ", sc_name, " = is_function_constant_defined(", sc_tmp_name,
  7065. ") ? ", bitcast_expression(type, sc_tmp_type, sc_tmp_name), " : ", constant_expression(c),
  7066. ";");
  7067. }
  7068. else if (has_decoration(c.self, DecorationSpecId))
  7069. {
  7070. // Fallback to macro overrides.
  7071. c.specialization_constant_macro_name =
  7072. constant_value_macro_name(get_decoration(c.self, DecorationSpecId));
  7073. statement("#ifndef ", c.specialization_constant_macro_name);
  7074. statement("#define ", c.specialization_constant_macro_name, " ", constant_expression(c));
  7075. statement("#endif");
  7076. statement("constant ", sc_type_name, " ", sc_name, " = ", c.specialization_constant_macro_name,
  7077. ";");
  7078. }
  7079. else
  7080. {
  7081. // Composite specialization constants must be built from other specialization constants.
  7082. statement("constant ", sc_type_name, " ", sc_name, " = ", constant_expression(c), ";");
  7083. }
  7084. emitted = true;
  7085. }
  7086. }
  7087. else if (id.get_type() == TypeConstantOp)
  7088. {
  7089. auto &c = id.get<SPIRConstantOp>();
  7090. auto &type = get<SPIRType>(c.basetype);
  7091. add_resource_name(c.self);
  7092. auto name = to_name(c.self);
  7093. statement("constant ", variable_decl(type, name), " = ", constant_op_expression(c), ";");
  7094. emitted = true;
  7095. }
  7096. else if (id.get_type() == TypeType)
  7097. {
  7098. // Output non-builtin interface structs. These include local function structs
  7099. // and structs nested within uniform and read-write buffers.
  7100. auto &type = id.get<SPIRType>();
  7101. TypeID type_id = type.self;
  7102. bool is_struct = (type.basetype == SPIRType::Struct) && type.array.empty() && !type.pointer;
  7103. bool is_block =
  7104. has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock);
  7105. bool is_builtin_block = is_block && is_builtin_type(type);
  7106. bool is_declarable_struct = is_struct && (!is_builtin_block || builtin_block_type_is_required);
  7107. // We'll declare this later.
  7108. if (stage_out_var_id && get_stage_out_struct_type().self == type_id)
  7109. is_declarable_struct = false;
  7110. if (patch_stage_out_var_id && get_patch_stage_out_struct_type().self == type_id)
  7111. is_declarable_struct = false;
  7112. if (stage_in_var_id && get_stage_in_struct_type().self == type_id)
  7113. is_declarable_struct = false;
  7114. if (patch_stage_in_var_id && get_patch_stage_in_struct_type().self == type_id)
  7115. is_declarable_struct = false;
  7116. // Special case. Declare builtin struct anyways if we need to emit a threadgroup version of it.
  7117. if (stage_out_masked_builtin_type_id == type_id)
  7118. is_declarable_struct = true;
  7119. // Align and emit declarable structs...but avoid declaring each more than once.
  7120. if (is_declarable_struct && declared_structs.count(type_id) == 0)
  7121. {
  7122. if (emitted)
  7123. statement("");
  7124. emitted = false;
  7125. declared_structs.insert(type_id);
  7126. if (has_extended_decoration(type_id, SPIRVCrossDecorationBufferBlockRepacked))
  7127. align_struct(type, aligned_structs);
  7128. // Make sure we declare the underlying struct type, and not the "decorated" type with pointers, etc.
  7129. emit_struct(get<SPIRType>(type_id));
  7130. }
  7131. }
  7132. else if (id.get_type() == TypeUndef)
  7133. {
  7134. auto &undef = id.get<SPIRUndef>();
  7135. auto &type = get<SPIRType>(undef.basetype);
  7136. // OpUndef can be void for some reason ...
  7137. if (type.basetype == SPIRType::Void)
  7138. return;
  7139. // Undefined global memory is not allowed in MSL.
  7140. // Declare constant and init to zeros. Use {}, as global constructors can break Metal.
  7141. statement(
  7142. inject_top_level_storage_qualifier(variable_decl(type, to_name(undef.self), undef.self), "constant"),
  7143. " = {};");
  7144. emitted = true;
  7145. }
  7146. }
  7147. if (emitted)
  7148. statement("");
  7149. }
  7150. void CompilerMSL::emit_binary_ptr_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op)
  7151. {
  7152. bool forward = should_forward(op0) && should_forward(op1);
  7153. emit_op(result_type, result_id, join(to_ptr_expression(op0), " ", op, " ", to_ptr_expression(op1)), forward);
  7154. inherit_expression_dependencies(result_id, op0);
  7155. inherit_expression_dependencies(result_id, op1);
  7156. }
  7157. string CompilerMSL::to_ptr_expression(uint32_t id, bool register_expression_read)
  7158. {
  7159. auto *e = maybe_get<SPIRExpression>(id);
  7160. auto expr = enclose_expression(e && e->need_transpose ? e->expression : to_expression(id, register_expression_read));
  7161. if (!should_dereference(id))
  7162. expr = address_of_expression(expr);
  7163. return expr;
  7164. }
  7165. void CompilerMSL::emit_binary_unord_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  7166. const char *op)
  7167. {
  7168. bool forward = should_forward(op0) && should_forward(op1);
  7169. emit_op(result_type, result_id,
  7170. join("(isunordered(", to_enclosed_unpacked_expression(op0), ", ", to_enclosed_unpacked_expression(op1),
  7171. ") || ", to_enclosed_unpacked_expression(op0), " ", op, " ", to_enclosed_unpacked_expression(op1),
  7172. ")"),
  7173. forward);
  7174. inherit_expression_dependencies(result_id, op0);
  7175. inherit_expression_dependencies(result_id, op1);
  7176. }
  7177. bool CompilerMSL::emit_tessellation_io_load(uint32_t result_type_id, uint32_t id, uint32_t ptr)
  7178. {
  7179. auto &ptr_type = expression_type(ptr);
  7180. auto &result_type = get<SPIRType>(result_type_id);
  7181. if (ptr_type.storage != StorageClassInput && ptr_type.storage != StorageClassOutput)
  7182. return false;
  7183. if (ptr_type.storage == StorageClassOutput && is_tese_shader())
  7184. return false;
  7185. if (has_decoration(ptr, DecorationPatch))
  7186. return false;
  7187. bool ptr_is_io_variable = ir.ids[ptr].get_type() == TypeVariable;
  7188. bool flattened_io = variable_storage_requires_stage_io(ptr_type.storage);
  7189. bool flat_data_type = flattened_io &&
  7190. (is_matrix(result_type) || is_array(result_type) || result_type.basetype == SPIRType::Struct);
  7191. // Edge case, even with multi-patch workgroups, we still need to unroll load
  7192. // if we're loading control points directly.
  7193. if (ptr_is_io_variable && is_array(result_type))
  7194. flat_data_type = true;
  7195. if (!flat_data_type)
  7196. return false;
  7197. // Now, we must unflatten a composite type and take care of interleaving array access with gl_in/gl_out.
  7198. // Lots of painful code duplication since we *really* should not unroll these kinds of loads in entry point fixup
  7199. // unless we're forced to do this when the code is emitting inoptimal OpLoads.
  7200. string expr;
  7201. uint32_t interface_index = get_extended_decoration(ptr, SPIRVCrossDecorationInterfaceMemberIndex);
  7202. auto *var = maybe_get_backing_variable(ptr);
  7203. auto &expr_type = get_pointee_type(ptr_type.self);
  7204. const auto &iface_type = expression_type(stage_in_ptr_var_id);
  7205. if (!flattened_io)
  7206. {
  7207. // Simplest case for multi-patch workgroups, just unroll array as-is.
  7208. if (interface_index == uint32_t(-1))
  7209. return false;
  7210. expr += type_to_glsl(result_type) + "({ ";
  7211. uint32_t num_control_points = to_array_size_literal(result_type, uint32_t(result_type.array.size()) - 1);
  7212. for (uint32_t i = 0; i < num_control_points; i++)
  7213. {
  7214. const uint32_t indices[2] = { i, interface_index };
  7215. AccessChainMeta meta;
  7216. expr += access_chain_internal(stage_in_ptr_var_id, indices, 2,
  7217. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta);
  7218. if (i + 1 < num_control_points)
  7219. expr += ", ";
  7220. }
  7221. expr += " })";
  7222. }
  7223. else if (result_type.array.size() > 2)
  7224. {
  7225. SPIRV_CROSS_THROW("Cannot load tessellation IO variables with more than 2 dimensions.");
  7226. }
  7227. else if (result_type.array.size() == 2)
  7228. {
  7229. if (!ptr_is_io_variable)
  7230. SPIRV_CROSS_THROW("Loading an array-of-array must be loaded directly from an IO variable.");
  7231. if (interface_index == uint32_t(-1))
  7232. SPIRV_CROSS_THROW("Interface index is unknown. Cannot continue.");
  7233. if (result_type.basetype == SPIRType::Struct || is_matrix(result_type))
  7234. SPIRV_CROSS_THROW("Cannot load array-of-array of composite type in tessellation IO.");
  7235. expr += type_to_glsl(result_type) + "({ ";
  7236. uint32_t num_control_points = to_array_size_literal(result_type, 1);
  7237. uint32_t base_interface_index = interface_index;
  7238. auto &sub_type = get<SPIRType>(result_type.parent_type);
  7239. for (uint32_t i = 0; i < num_control_points; i++)
  7240. {
  7241. expr += type_to_glsl(sub_type) + "({ ";
  7242. interface_index = base_interface_index;
  7243. uint32_t array_size = to_array_size_literal(result_type, 0);
  7244. for (uint32_t j = 0; j < array_size; j++, interface_index++)
  7245. {
  7246. const uint32_t indices[2] = { i, interface_index };
  7247. AccessChainMeta meta;
  7248. expr += access_chain_internal(stage_in_ptr_var_id, indices, 2,
  7249. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta);
  7250. if (!is_matrix(sub_type) && sub_type.basetype != SPIRType::Struct &&
  7251. expr_type.vecsize > sub_type.vecsize)
  7252. expr += vector_swizzle(sub_type.vecsize, 0);
  7253. if (j + 1 < array_size)
  7254. expr += ", ";
  7255. }
  7256. expr += " })";
  7257. if (i + 1 < num_control_points)
  7258. expr += ", ";
  7259. }
  7260. expr += " })";
  7261. }
  7262. else if (result_type.basetype == SPIRType::Struct)
  7263. {
  7264. bool is_array_of_struct = is_array(result_type);
  7265. if (is_array_of_struct && !ptr_is_io_variable)
  7266. SPIRV_CROSS_THROW("Loading array of struct from IO variable must come directly from IO variable.");
  7267. uint32_t num_control_points = 1;
  7268. if (is_array_of_struct)
  7269. {
  7270. num_control_points = to_array_size_literal(result_type, 0);
  7271. expr += type_to_glsl(result_type) + "({ ";
  7272. }
  7273. auto &struct_type = is_array_of_struct ? get<SPIRType>(result_type.parent_type) : result_type;
  7274. assert(struct_type.array.empty());
  7275. for (uint32_t i = 0; i < num_control_points; i++)
  7276. {
  7277. expr += type_to_glsl(struct_type) + "{ ";
  7278. for (uint32_t j = 0; j < uint32_t(struct_type.member_types.size()); j++)
  7279. {
  7280. // The base interface index is stored per variable for structs.
  7281. if (var)
  7282. {
  7283. interface_index =
  7284. get_extended_member_decoration(var->self, j, SPIRVCrossDecorationInterfaceMemberIndex);
  7285. }
  7286. if (interface_index == uint32_t(-1))
  7287. SPIRV_CROSS_THROW("Interface index is unknown. Cannot continue.");
  7288. const auto &mbr_type = get<SPIRType>(struct_type.member_types[j]);
  7289. const auto &expr_mbr_type = get<SPIRType>(expr_type.member_types[j]);
  7290. if (is_matrix(mbr_type) && ptr_type.storage == StorageClassInput)
  7291. {
  7292. expr += type_to_glsl(mbr_type) + "(";
  7293. for (uint32_t k = 0; k < mbr_type.columns; k++, interface_index++)
  7294. {
  7295. if (is_array_of_struct)
  7296. {
  7297. const uint32_t indices[2] = { i, interface_index };
  7298. AccessChainMeta meta;
  7299. expr += access_chain_internal(
  7300. stage_in_ptr_var_id, indices, 2,
  7301. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta);
  7302. }
  7303. else
  7304. expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index);
  7305. if (expr_mbr_type.vecsize > mbr_type.vecsize)
  7306. expr += vector_swizzle(mbr_type.vecsize, 0);
  7307. if (k + 1 < mbr_type.columns)
  7308. expr += ", ";
  7309. }
  7310. expr += ")";
  7311. }
  7312. else if (is_array(mbr_type))
  7313. {
  7314. expr += type_to_glsl(mbr_type) + "({ ";
  7315. uint32_t array_size = to_array_size_literal(mbr_type, 0);
  7316. for (uint32_t k = 0; k < array_size; k++, interface_index++)
  7317. {
  7318. if (is_array_of_struct)
  7319. {
  7320. const uint32_t indices[2] = { i, interface_index };
  7321. AccessChainMeta meta;
  7322. expr += access_chain_internal(
  7323. stage_in_ptr_var_id, indices, 2,
  7324. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta);
  7325. }
  7326. else
  7327. expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index);
  7328. if (expr_mbr_type.vecsize > mbr_type.vecsize)
  7329. expr += vector_swizzle(mbr_type.vecsize, 0);
  7330. if (k + 1 < array_size)
  7331. expr += ", ";
  7332. }
  7333. expr += " })";
  7334. }
  7335. else
  7336. {
  7337. if (is_array_of_struct)
  7338. {
  7339. const uint32_t indices[2] = { i, interface_index };
  7340. AccessChainMeta meta;
  7341. expr += access_chain_internal(stage_in_ptr_var_id, indices, 2,
  7342. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT,
  7343. &meta);
  7344. }
  7345. else
  7346. expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index);
  7347. if (expr_mbr_type.vecsize > mbr_type.vecsize)
  7348. expr += vector_swizzle(mbr_type.vecsize, 0);
  7349. }
  7350. if (j + 1 < struct_type.member_types.size())
  7351. expr += ", ";
  7352. }
  7353. expr += " }";
  7354. if (i + 1 < num_control_points)
  7355. expr += ", ";
  7356. }
  7357. if (is_array_of_struct)
  7358. expr += " })";
  7359. }
  7360. else if (is_matrix(result_type))
  7361. {
  7362. bool is_array_of_matrix = is_array(result_type);
  7363. if (is_array_of_matrix && !ptr_is_io_variable)
  7364. SPIRV_CROSS_THROW("Loading array of matrix from IO variable must come directly from IO variable.");
  7365. if (interface_index == uint32_t(-1))
  7366. SPIRV_CROSS_THROW("Interface index is unknown. Cannot continue.");
  7367. if (is_array_of_matrix)
  7368. {
  7369. // Loading a matrix from each control point.
  7370. uint32_t base_interface_index = interface_index;
  7371. uint32_t num_control_points = to_array_size_literal(result_type, 0);
  7372. expr += type_to_glsl(result_type) + "({ ";
  7373. auto &matrix_type = get_variable_element_type(get<SPIRVariable>(ptr));
  7374. for (uint32_t i = 0; i < num_control_points; i++)
  7375. {
  7376. interface_index = base_interface_index;
  7377. expr += type_to_glsl(matrix_type) + "(";
  7378. for (uint32_t j = 0; j < result_type.columns; j++, interface_index++)
  7379. {
  7380. const uint32_t indices[2] = { i, interface_index };
  7381. AccessChainMeta meta;
  7382. expr += access_chain_internal(stage_in_ptr_var_id, indices, 2,
  7383. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta);
  7384. if (expr_type.vecsize > result_type.vecsize)
  7385. expr += vector_swizzle(result_type.vecsize, 0);
  7386. if (j + 1 < result_type.columns)
  7387. expr += ", ";
  7388. }
  7389. expr += ")";
  7390. if (i + 1 < num_control_points)
  7391. expr += ", ";
  7392. }
  7393. expr += " })";
  7394. }
  7395. else
  7396. {
  7397. expr += type_to_glsl(result_type) + "(";
  7398. for (uint32_t i = 0; i < result_type.columns; i++, interface_index++)
  7399. {
  7400. expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index);
  7401. if (expr_type.vecsize > result_type.vecsize)
  7402. expr += vector_swizzle(result_type.vecsize, 0);
  7403. if (i + 1 < result_type.columns)
  7404. expr += ", ";
  7405. }
  7406. expr += ")";
  7407. }
  7408. }
  7409. else if (ptr_is_io_variable)
  7410. {
  7411. assert(is_array(result_type));
  7412. assert(result_type.array.size() == 1);
  7413. if (interface_index == uint32_t(-1))
  7414. SPIRV_CROSS_THROW("Interface index is unknown. Cannot continue.");
  7415. // We're loading an array directly from a global variable.
  7416. // This means we're loading one member from each control point.
  7417. expr += type_to_glsl(result_type) + "({ ";
  7418. uint32_t num_control_points = to_array_size_literal(result_type, 0);
  7419. for (uint32_t i = 0; i < num_control_points; i++)
  7420. {
  7421. const uint32_t indices[2] = { i, interface_index };
  7422. AccessChainMeta meta;
  7423. expr += access_chain_internal(stage_in_ptr_var_id, indices, 2,
  7424. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta);
  7425. if (expr_type.vecsize > result_type.vecsize)
  7426. expr += vector_swizzle(result_type.vecsize, 0);
  7427. if (i + 1 < num_control_points)
  7428. expr += ", ";
  7429. }
  7430. expr += " })";
  7431. }
  7432. else
  7433. {
  7434. // We're loading an array from a concrete control point.
  7435. assert(is_array(result_type));
  7436. assert(result_type.array.size() == 1);
  7437. if (interface_index == uint32_t(-1))
  7438. SPIRV_CROSS_THROW("Interface index is unknown. Cannot continue.");
  7439. expr += type_to_glsl(result_type) + "({ ";
  7440. uint32_t array_size = to_array_size_literal(result_type, 0);
  7441. for (uint32_t i = 0; i < array_size; i++, interface_index++)
  7442. {
  7443. expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index);
  7444. if (expr_type.vecsize > result_type.vecsize)
  7445. expr += vector_swizzle(result_type.vecsize, 0);
  7446. if (i + 1 < array_size)
  7447. expr += ", ";
  7448. }
  7449. expr += " })";
  7450. }
  7451. emit_op(result_type_id, id, expr, false);
  7452. register_read(id, ptr, false);
  7453. return true;
  7454. }
  7455. bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t length)
  7456. {
  7457. // If this is a per-vertex output, remap it to the I/O array buffer.
  7458. // Any object which did not go through IO flattening shenanigans will go there instead.
  7459. // We will unflatten on-demand instead as needed, but not all possible cases can be supported, especially with arrays.
  7460. auto *var = maybe_get_backing_variable(ops[2]);
  7461. bool patch = false;
  7462. bool flat_data = false;
  7463. bool ptr_is_chain = false;
  7464. bool flatten_composites = false;
  7465. bool is_block = false;
  7466. bool is_arrayed = false;
  7467. if (var)
  7468. {
  7469. auto &type = get_variable_data_type(*var);
  7470. is_block = has_decoration(type.self, DecorationBlock);
  7471. is_arrayed = !type.array.empty();
  7472. flatten_composites = variable_storage_requires_stage_io(var->storage);
  7473. patch = has_decoration(ops[2], DecorationPatch) || is_patch_block(type);
  7474. // Should match strip_array in add_interface_block.
  7475. flat_data = var->storage == StorageClassInput || (var->storage == StorageClassOutput && is_tesc_shader());
  7476. // Patch inputs are treated as normal block IO variables, so they don't deal with this path at all.
  7477. if (patch && (!is_block || is_arrayed || var->storage == StorageClassInput))
  7478. flat_data = false;
  7479. // We might have a chained access chain, where
  7480. // we first take the access chain to the control point, and then we chain into a member or something similar.
  7481. // In this case, we need to skip gl_in/gl_out remapping.
  7482. // Also, skip ptr chain for patches.
  7483. ptr_is_chain = var->self != ID(ops[2]);
  7484. }
  7485. bool builtin_variable = false;
  7486. bool variable_is_flat = false;
  7487. if (var && flat_data)
  7488. {
  7489. builtin_variable = is_builtin_variable(*var);
  7490. BuiltIn bi_type = BuiltInMax;
  7491. if (builtin_variable && !is_block)
  7492. bi_type = BuiltIn(get_decoration(var->self, DecorationBuiltIn));
  7493. variable_is_flat = !builtin_variable || is_block ||
  7494. bi_type == BuiltInPosition || bi_type == BuiltInPointSize ||
  7495. bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance;
  7496. }
  7497. if (variable_is_flat)
  7498. {
  7499. // If output is masked, it is emitted as a "normal" variable, just go through normal code paths.
  7500. // Only check this for the first level of access chain.
  7501. // Dealing with this for partial access chains should be possible, but awkward.
  7502. if (var->storage == StorageClassOutput && !ptr_is_chain)
  7503. {
  7504. bool masked = false;
  7505. if (is_block)
  7506. {
  7507. uint32_t relevant_member_index = patch ? 3 : 4;
  7508. // FIXME: This won't work properly if the application first access chains into gl_out element,
  7509. // then access chains into the member. Super weird, but theoretically possible ...
  7510. if (length > relevant_member_index)
  7511. {
  7512. uint32_t mbr_idx = get<SPIRConstant>(ops[relevant_member_index]).scalar();
  7513. masked = is_stage_output_block_member_masked(*var, mbr_idx, true);
  7514. }
  7515. }
  7516. else if (var)
  7517. masked = is_stage_output_variable_masked(*var);
  7518. if (masked)
  7519. return false;
  7520. }
  7521. AccessChainMeta meta;
  7522. SmallVector<uint32_t> indices;
  7523. uint32_t next_id = ir.increase_bound_by(1);
  7524. indices.reserve(length - 3 + 1);
  7525. uint32_t first_non_array_index = (ptr_is_chain ? 3 : 4) - (patch ? 1 : 0);
  7526. VariableID stage_var_id;
  7527. if (patch)
  7528. stage_var_id = var->storage == StorageClassInput ? patch_stage_in_var_id : patch_stage_out_var_id;
  7529. else
  7530. stage_var_id = var->storage == StorageClassInput ? stage_in_ptr_var_id : stage_out_ptr_var_id;
  7531. VariableID ptr = ptr_is_chain ? VariableID(ops[2]) : stage_var_id;
  7532. if (!ptr_is_chain && !patch)
  7533. {
  7534. // Index into gl_in/gl_out with first array index.
  7535. indices.push_back(ops[first_non_array_index - 1]);
  7536. }
  7537. auto &result_ptr_type = get<SPIRType>(ops[0]);
  7538. uint32_t const_mbr_id = next_id++;
  7539. uint32_t index = get_extended_decoration(ops[2], SPIRVCrossDecorationInterfaceMemberIndex);
  7540. // If we have a pointer chain expression, and we are no longer pointing to a composite
  7541. // object, we are in the clear. There is no longer a need to flatten anything.
  7542. bool further_access_chain_is_trivial = false;
  7543. if (ptr_is_chain && flatten_composites)
  7544. {
  7545. auto &ptr_type = expression_type(ptr);
  7546. if (!is_array(ptr_type) && !is_matrix(ptr_type) && ptr_type.basetype != SPIRType::Struct)
  7547. further_access_chain_is_trivial = true;
  7548. }
  7549. if (!further_access_chain_is_trivial && (flatten_composites || is_block))
  7550. {
  7551. uint32_t i = first_non_array_index;
  7552. auto *type = &get_variable_element_type(*var);
  7553. if (index == uint32_t(-1) && length >= (first_non_array_index + 1))
  7554. {
  7555. // Maybe this is a struct type in the input class, in which case
  7556. // we put it as a decoration on the corresponding member.
  7557. uint32_t mbr_idx = get_constant(ops[first_non_array_index]).scalar();
  7558. index = get_extended_member_decoration(var->self, mbr_idx,
  7559. SPIRVCrossDecorationInterfaceMemberIndex);
  7560. assert(index != uint32_t(-1));
  7561. i++;
  7562. type = &get<SPIRType>(type->member_types[mbr_idx]);
  7563. }
  7564. // In this case, we're poking into flattened structures and arrays, so now we have to
  7565. // combine the following indices. If we encounter a non-constant index,
  7566. // we're hosed.
  7567. for (; flatten_composites && i < length; ++i)
  7568. {
  7569. if (!is_array(*type) && !is_matrix(*type) && type->basetype != SPIRType::Struct)
  7570. break;
  7571. auto *c = maybe_get<SPIRConstant>(ops[i]);
  7572. if (!c || c->specialization)
  7573. SPIRV_CROSS_THROW("Trying to dynamically index into an array interface variable in tessellation. "
  7574. "This is currently unsupported.");
  7575. // We're in flattened space, so just increment the member index into IO block.
  7576. // We can only do this once in the current implementation, so either:
  7577. // Struct, Matrix or 1-dimensional array for a control point.
  7578. if (type->basetype == SPIRType::Struct && var->storage == StorageClassOutput)
  7579. {
  7580. // Need to consider holes, since individual block members might be masked away.
  7581. uint32_t mbr_idx = c->scalar();
  7582. for (uint32_t j = 0; j < mbr_idx; j++)
  7583. if (!is_stage_output_block_member_masked(*var, j, true))
  7584. index++;
  7585. }
  7586. else
  7587. index += c->scalar();
  7588. if (type->parent_type)
  7589. type = &get<SPIRType>(type->parent_type);
  7590. else if (type->basetype == SPIRType::Struct)
  7591. type = &get<SPIRType>(type->member_types[c->scalar()]);
  7592. }
  7593. // We're not going to emit the actual member name, we let any further OpLoad take care of that.
  7594. // Tag the access chain with the member index we're referencing.
  7595. auto &result_pointee_type = get_pointee_type(result_ptr_type);
  7596. bool defer_access_chain = flatten_composites && (is_matrix(result_pointee_type) || is_array(result_pointee_type) ||
  7597. result_pointee_type.basetype == SPIRType::Struct);
  7598. if (!defer_access_chain)
  7599. {
  7600. // Access the appropriate member of gl_in/gl_out.
  7601. set<SPIRConstant>(const_mbr_id, get_uint_type_id(), index, false);
  7602. indices.push_back(const_mbr_id);
  7603. // Member index is now irrelevant.
  7604. index = uint32_t(-1);
  7605. // Append any straggling access chain indices.
  7606. if (i < length)
  7607. indices.insert(indices.end(), ops + i, ops + length);
  7608. }
  7609. else
  7610. {
  7611. // We must have consumed the entire access chain if we're deferring it.
  7612. assert(i == length);
  7613. }
  7614. if (index != uint32_t(-1))
  7615. set_extended_decoration(ops[1], SPIRVCrossDecorationInterfaceMemberIndex, index);
  7616. else
  7617. unset_extended_decoration(ops[1], SPIRVCrossDecorationInterfaceMemberIndex);
  7618. }
  7619. else
  7620. {
  7621. if (index != uint32_t(-1))
  7622. {
  7623. set<SPIRConstant>(const_mbr_id, get_uint_type_id(), index, false);
  7624. indices.push_back(const_mbr_id);
  7625. }
  7626. // Member index is now irrelevant.
  7627. index = uint32_t(-1);
  7628. unset_extended_decoration(ops[1], SPIRVCrossDecorationInterfaceMemberIndex);
  7629. indices.insert(indices.end(), ops + first_non_array_index, ops + length);
  7630. }
  7631. // We use the pointer to the base of the input/output array here,
  7632. // so this is always a pointer chain.
  7633. string e;
  7634. if (!ptr_is_chain)
  7635. {
  7636. // This is the start of an access chain, use ptr_chain to index into control point array.
  7637. e = access_chain(ptr, indices.data(), uint32_t(indices.size()), result_ptr_type, &meta, !patch);
  7638. }
  7639. else
  7640. {
  7641. // If we're accessing a struct, we need to use member indices which are based on the IO block,
  7642. // not actual struct type, so we have to use a split access chain here where
  7643. // first path resolves the control point index, i.e. gl_in[index], and second half deals with
  7644. // looking up flattened member name.
  7645. // However, it is possible that we partially accessed a struct,
  7646. // by taking pointer to member inside the control-point array.
  7647. // For this case, we fall back to a natural access chain since we have already dealt with remapping struct members.
  7648. // One way to check this here is if we have 2 implied read expressions.
  7649. // First one is the gl_in/gl_out struct itself, then an index into that array.
  7650. // If we have traversed further, we use a normal access chain formulation.
  7651. auto *ptr_expr = maybe_get<SPIRExpression>(ptr);
  7652. bool split_access_chain_formulation = flatten_composites && ptr_expr &&
  7653. ptr_expr->implied_read_expressions.size() == 2 &&
  7654. !further_access_chain_is_trivial;
  7655. if (split_access_chain_formulation)
  7656. {
  7657. e = join(to_expression(ptr),
  7658. access_chain_internal(stage_var_id, indices.data(), uint32_t(indices.size()),
  7659. ACCESS_CHAIN_CHAIN_ONLY_BIT, &meta));
  7660. }
  7661. else
  7662. {
  7663. e = access_chain_internal(ptr, indices.data(), uint32_t(indices.size()), 0, &meta);
  7664. }
  7665. }
  7666. // Get the actual type of the object that was accessed. If it's a vector type and we changed it,
  7667. // then we'll need to add a swizzle.
  7668. // For this, we can't necessarily rely on the type of the base expression, because it might be
  7669. // another access chain, and it will therefore already have the "correct" type.
  7670. auto *expr_type = &get_variable_data_type(*var);
  7671. if (has_extended_decoration(ops[2], SPIRVCrossDecorationTessIOOriginalInputTypeID))
  7672. expr_type = &get<SPIRType>(get_extended_decoration(ops[2], SPIRVCrossDecorationTessIOOriginalInputTypeID));
  7673. for (uint32_t i = 3; i < length; i++)
  7674. {
  7675. if (!is_array(*expr_type) && expr_type->basetype == SPIRType::Struct)
  7676. expr_type = &get<SPIRType>(expr_type->member_types[get<SPIRConstant>(ops[i]).scalar()]);
  7677. else
  7678. expr_type = &get<SPIRType>(expr_type->parent_type);
  7679. }
  7680. if (!is_array(*expr_type) && !is_matrix(*expr_type) && expr_type->basetype != SPIRType::Struct &&
  7681. expr_type->vecsize > result_ptr_type.vecsize)
  7682. e += vector_swizzle(result_ptr_type.vecsize, 0);
  7683. auto &expr = set<SPIRExpression>(ops[1], std::move(e), ops[0], should_forward(ops[2]));
  7684. expr.loaded_from = var->self;
  7685. expr.need_transpose = meta.need_transpose;
  7686. expr.access_chain = true;
  7687. // Mark the result as being packed if necessary.
  7688. if (meta.storage_is_packed)
  7689. set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypePacked);
  7690. if (meta.storage_physical_type != 0)
  7691. set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type);
  7692. if (meta.storage_is_invariant)
  7693. set_decoration(ops[1], DecorationInvariant);
  7694. // Save the type we found in case the result is used in another access chain.
  7695. set_extended_decoration(ops[1], SPIRVCrossDecorationTessIOOriginalInputTypeID, expr_type->self);
  7696. // If we have some expression dependencies in our access chain, this access chain is technically a forwarded
  7697. // temporary which could be subject to invalidation.
  7698. // Need to assume we're forwarded while calling inherit_expression_depdendencies.
  7699. forwarded_temporaries.insert(ops[1]);
  7700. // The access chain itself is never forced to a temporary, but its dependencies might.
  7701. suppressed_usage_tracking.insert(ops[1]);
  7702. for (uint32_t i = 2; i < length; i++)
  7703. {
  7704. inherit_expression_dependencies(ops[1], ops[i]);
  7705. add_implied_read_expression(expr, ops[i]);
  7706. }
  7707. // If we have no dependencies after all, i.e., all indices in the access chain are immutable temporaries,
  7708. // we're not forwarded after all.
  7709. if (expr.expression_dependencies.empty())
  7710. forwarded_temporaries.erase(ops[1]);
  7711. return true;
  7712. }
  7713. // If this is the inner tessellation level, and we're tessellating triangles,
  7714. // drop the last index. It isn't an array in this case, so we can't have an
  7715. // array reference here. We need to make this ID a variable instead of an
  7716. // expression so we don't try to dereference it as a variable pointer.
  7717. // Don't do this if the index is a constant 1, though. We need to drop stores
  7718. // to that one.
  7719. auto *m = ir.find_meta(var ? var->self : ID(0));
  7720. if (is_tesc_shader() && var && m && m->decoration.builtin_type == BuiltInTessLevelInner &&
  7721. is_tessellating_triangles())
  7722. {
  7723. auto *c = maybe_get<SPIRConstant>(ops[3]);
  7724. if (c && c->scalar() == 1)
  7725. return false;
  7726. auto &dest_var = set<SPIRVariable>(ops[1], *var);
  7727. dest_var.basetype = ops[0];
  7728. ir.meta[ops[1]] = ir.meta[ops[2]];
  7729. inherit_expression_dependencies(ops[1], ops[2]);
  7730. return true;
  7731. }
  7732. return false;
  7733. }
  7734. bool CompilerMSL::is_out_of_bounds_tessellation_level(uint32_t id_lhs)
  7735. {
  7736. if (!is_tessellating_triangles())
  7737. return false;
  7738. // In SPIR-V, TessLevelInner always has two elements and TessLevelOuter always has
  7739. // four. This is true even if we are tessellating triangles. This allows clients
  7740. // to use a single tessellation control shader with multiple tessellation evaluation
  7741. // shaders.
  7742. // In Metal, however, only the first element of TessLevelInner and the first three
  7743. // of TessLevelOuter are accessible. This stems from how in Metal, the tessellation
  7744. // levels must be stored to a dedicated buffer in a particular format that depends
  7745. // on the patch type. Therefore, in Triangles mode, any store to the second
  7746. // inner level or the fourth outer level must be dropped.
  7747. const auto *e = maybe_get<SPIRExpression>(id_lhs);
  7748. if (!e || !e->access_chain)
  7749. return false;
  7750. BuiltIn builtin = BuiltIn(get_decoration(e->loaded_from, DecorationBuiltIn));
  7751. if (builtin != BuiltInTessLevelInner && builtin != BuiltInTessLevelOuter)
  7752. return false;
  7753. auto *c = maybe_get<SPIRConstant>(e->implied_read_expressions[1]);
  7754. if (!c)
  7755. return false;
  7756. return (builtin == BuiltInTessLevelInner && c->scalar() == 1) ||
  7757. (builtin == BuiltInTessLevelOuter && c->scalar() == 3);
  7758. }
  7759. bool CompilerMSL::prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type,
  7760. spv::StorageClass storage, bool &is_packed)
  7761. {
  7762. // If there is any risk of writes happening with the access chain in question,
  7763. // and there is a risk of concurrent write access to other components,
  7764. // we must cast the access chain to a plain pointer to ensure we only access the exact scalars we expect.
  7765. // The MSL compiler refuses to allow component-level access for any non-packed vector types.
  7766. if (!is_packed && (storage == StorageClassStorageBuffer || storage == StorageClassWorkgroup))
  7767. {
  7768. const char *addr_space = storage == StorageClassWorkgroup ? "threadgroup" : "device";
  7769. expr = join("((", addr_space, " ", type_to_glsl(type), "*)&", enclose_expression(expr), ")");
  7770. // Further indexing should happen with packed rules (array index, not swizzle).
  7771. is_packed = true;
  7772. return true;
  7773. }
  7774. else
  7775. return false;
  7776. }
  7777. bool CompilerMSL::access_chain_needs_stage_io_builtin_translation(uint32_t base)
  7778. {
  7779. auto *var = maybe_get_backing_variable(base);
  7780. if (!var || !is_tessellation_shader())
  7781. return true;
  7782. // We only need to rewrite builtin access chains when accessing flattened builtins like gl_ClipDistance_N.
  7783. // Avoid overriding it back to just gl_ClipDistance.
  7784. // This can only happen in scenarios where we cannot flatten/unflatten access chains, so, the only case
  7785. // where this triggers is evaluation shader inputs.
  7786. bool redirect_builtin = is_tese_shader() ? var->storage == StorageClassOutput : false;
  7787. return redirect_builtin;
  7788. }
  7789. // Sets the interface member index for an access chain to a pull-model interpolant.
  7790. void CompilerMSL::fix_up_interpolant_access_chain(const uint32_t *ops, uint32_t length)
  7791. {
  7792. auto *var = maybe_get_backing_variable(ops[2]);
  7793. if (!var || !pull_model_inputs.count(var->self))
  7794. return;
  7795. // Get the base index.
  7796. uint32_t interface_index;
  7797. auto &var_type = get_variable_data_type(*var);
  7798. auto &result_type = get<SPIRType>(ops[0]);
  7799. auto *type = &var_type;
  7800. if (has_extended_decoration(ops[2], SPIRVCrossDecorationInterfaceMemberIndex))
  7801. {
  7802. interface_index = get_extended_decoration(ops[2], SPIRVCrossDecorationInterfaceMemberIndex);
  7803. }
  7804. else
  7805. {
  7806. // Assume an access chain into a struct variable.
  7807. assert(var_type.basetype == SPIRType::Struct);
  7808. auto &c = get<SPIRConstant>(ops[3 + var_type.array.size()]);
  7809. interface_index =
  7810. get_extended_member_decoration(var->self, c.scalar(), SPIRVCrossDecorationInterfaceMemberIndex);
  7811. }
  7812. // Accumulate indices. We'll have to skip over the one for the struct, if present, because we already accounted
  7813. // for that getting the base index.
  7814. for (uint32_t i = 3; i < length; ++i)
  7815. {
  7816. if (is_vector(*type) && !is_array(*type) && is_scalar(result_type))
  7817. {
  7818. // We don't want to combine the next index. Actually, we need to save it
  7819. // so we know to apply a swizzle to the result of the interpolation.
  7820. set_extended_decoration(ops[1], SPIRVCrossDecorationInterpolantComponentExpr, ops[i]);
  7821. break;
  7822. }
  7823. auto *c = maybe_get<SPIRConstant>(ops[i]);
  7824. if (!c || c->specialization)
  7825. SPIRV_CROSS_THROW("Trying to dynamically index into an array interface variable using pull-model "
  7826. "interpolation. This is currently unsupported.");
  7827. if (type->parent_type)
  7828. type = &get<SPIRType>(type->parent_type);
  7829. else if (type->basetype == SPIRType::Struct)
  7830. type = &get<SPIRType>(type->member_types[c->scalar()]);
  7831. if (!has_extended_decoration(ops[2], SPIRVCrossDecorationInterfaceMemberIndex) &&
  7832. i - 3 == var_type.array.size())
  7833. continue;
  7834. interface_index += c->scalar();
  7835. }
  7836. // Save this to the access chain itself so we can recover it later when calling an interpolation function.
  7837. set_extended_decoration(ops[1], SPIRVCrossDecorationInterfaceMemberIndex, interface_index);
  7838. }
  7839. // If the physical type of a physical buffer pointer has been changed
  7840. // to a ulong or ulongn vector, add a cast back to the pointer type.
  7841. void CompilerMSL::check_physical_type_cast(std::string &expr, const SPIRType *type, uint32_t physical_type)
  7842. {
  7843. auto *p_physical_type = maybe_get<SPIRType>(physical_type);
  7844. if (p_physical_type &&
  7845. p_physical_type->storage == StorageClassPhysicalStorageBuffer &&
  7846. p_physical_type->basetype == to_unsigned_basetype(64))
  7847. {
  7848. if (p_physical_type->vecsize > 1)
  7849. expr += ".x";
  7850. expr = join("((", type_to_glsl(*type), ")", expr, ")");
  7851. }
  7852. }
  7853. // Override for MSL-specific syntax instructions
  7854. void CompilerMSL::emit_instruction(const Instruction &instruction)
  7855. {
  7856. #define MSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op)
  7857. #define MSL_PTR_BOP(op) emit_binary_ptr_op(ops[0], ops[1], ops[2], ops[3], #op)
  7858. // MSL does care about implicit integer promotion, but those cases are all handled in common code.
  7859. #define MSL_BOP_CAST(op, type) \
  7860. emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode), false)
  7861. #define MSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op)
  7862. #define MSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op)
  7863. #define MSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op)
  7864. #define MSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
  7865. #define MSL_BFOP_CAST(op, type) \
  7866. emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
  7867. #define MSL_UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op)
  7868. #define MSL_UNORD_BOP(op) emit_binary_unord_op(ops[0], ops[1], ops[2], ops[3], #op)
  7869. auto ops = stream(instruction);
  7870. auto opcode = static_cast<Op>(instruction.op);
  7871. opcode = get_remapped_spirv_op(opcode);
  7872. // If we need to do implicit bitcasts, make sure we do it with the correct type.
  7873. uint32_t integer_width = get_integer_width_for_instruction(instruction);
  7874. auto int_type = to_signed_basetype(integer_width);
  7875. auto uint_type = to_unsigned_basetype(integer_width);
  7876. switch (opcode)
  7877. {
  7878. case OpLoad:
  7879. {
  7880. uint32_t id = ops[1];
  7881. uint32_t ptr = ops[2];
  7882. if (is_tessellation_shader())
  7883. {
  7884. if (!emit_tessellation_io_load(ops[0], id, ptr))
  7885. CompilerGLSL::emit_instruction(instruction);
  7886. }
  7887. else
  7888. {
  7889. // Sample mask input for Metal is not an array
  7890. if (BuiltIn(get_decoration(ptr, DecorationBuiltIn)) == BuiltInSampleMask)
  7891. set_decoration(id, DecorationBuiltIn, BuiltInSampleMask);
  7892. CompilerGLSL::emit_instruction(instruction);
  7893. }
  7894. break;
  7895. }
  7896. // Comparisons
  7897. case OpIEqual:
  7898. MSL_BOP_CAST(==, int_type);
  7899. break;
  7900. case OpLogicalEqual:
  7901. case OpFOrdEqual:
  7902. MSL_BOP(==);
  7903. break;
  7904. case OpINotEqual:
  7905. MSL_BOP_CAST(!=, int_type);
  7906. break;
  7907. case OpLogicalNotEqual:
  7908. case OpFOrdNotEqual:
  7909. // TODO: Should probably negate the == result here.
  7910. // Typically OrdNotEqual comes from GLSL which itself does not really specify what
  7911. // happens with NaN.
  7912. // Consider fixing this if we run into real issues.
  7913. MSL_BOP(!=);
  7914. break;
  7915. case OpUGreaterThan:
  7916. MSL_BOP_CAST(>, uint_type);
  7917. break;
  7918. case OpSGreaterThan:
  7919. MSL_BOP_CAST(>, int_type);
  7920. break;
  7921. case OpFOrdGreaterThan:
  7922. MSL_BOP(>);
  7923. break;
  7924. case OpUGreaterThanEqual:
  7925. MSL_BOP_CAST(>=, uint_type);
  7926. break;
  7927. case OpSGreaterThanEqual:
  7928. MSL_BOP_CAST(>=, int_type);
  7929. break;
  7930. case OpFOrdGreaterThanEqual:
  7931. MSL_BOP(>=);
  7932. break;
  7933. case OpULessThan:
  7934. MSL_BOP_CAST(<, uint_type);
  7935. break;
  7936. case OpSLessThan:
  7937. MSL_BOP_CAST(<, int_type);
  7938. break;
  7939. case OpFOrdLessThan:
  7940. MSL_BOP(<);
  7941. break;
  7942. case OpULessThanEqual:
  7943. MSL_BOP_CAST(<=, uint_type);
  7944. break;
  7945. case OpSLessThanEqual:
  7946. MSL_BOP_CAST(<=, int_type);
  7947. break;
  7948. case OpFOrdLessThanEqual:
  7949. MSL_BOP(<=);
  7950. break;
  7951. case OpFUnordEqual:
  7952. MSL_UNORD_BOP(==);
  7953. break;
  7954. case OpFUnordNotEqual:
  7955. // not equal in MSL generates une opcodes to begin with.
  7956. // Since unordered not equal is how it works in C, just inherit that behavior.
  7957. MSL_BOP(!=);
  7958. break;
  7959. case OpFUnordGreaterThan:
  7960. MSL_UNORD_BOP(>);
  7961. break;
  7962. case OpFUnordGreaterThanEqual:
  7963. MSL_UNORD_BOP(>=);
  7964. break;
  7965. case OpFUnordLessThan:
  7966. MSL_UNORD_BOP(<);
  7967. break;
  7968. case OpFUnordLessThanEqual:
  7969. MSL_UNORD_BOP(<=);
  7970. break;
  7971. // Pointer math
  7972. case OpPtrEqual:
  7973. MSL_PTR_BOP(==);
  7974. break;
  7975. case OpPtrNotEqual:
  7976. MSL_PTR_BOP(!=);
  7977. break;
  7978. case OpPtrDiff:
  7979. MSL_PTR_BOP(-);
  7980. break;
  7981. // Derivatives
  7982. case OpDPdx:
  7983. case OpDPdxFine:
  7984. case OpDPdxCoarse:
  7985. MSL_UFOP(dfdx);
  7986. register_control_dependent_expression(ops[1]);
  7987. break;
  7988. case OpDPdy:
  7989. case OpDPdyFine:
  7990. case OpDPdyCoarse:
  7991. MSL_UFOP(dfdy);
  7992. register_control_dependent_expression(ops[1]);
  7993. break;
  7994. case OpFwidth:
  7995. case OpFwidthCoarse:
  7996. case OpFwidthFine:
  7997. MSL_UFOP(fwidth);
  7998. register_control_dependent_expression(ops[1]);
  7999. break;
  8000. // Bitfield
  8001. case OpBitFieldInsert:
  8002. {
  8003. emit_bitfield_insert_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], "insert_bits", SPIRType::UInt);
  8004. break;
  8005. }
  8006. case OpBitFieldSExtract:
  8007. {
  8008. emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "extract_bits", int_type, int_type,
  8009. SPIRType::UInt, SPIRType::UInt);
  8010. break;
  8011. }
  8012. case OpBitFieldUExtract:
  8013. {
  8014. emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "extract_bits", uint_type, uint_type,
  8015. SPIRType::UInt, SPIRType::UInt);
  8016. break;
  8017. }
  8018. case OpBitReverse:
  8019. // BitReverse does not have issues with sign since result type must match input type.
  8020. MSL_UFOP(reverse_bits);
  8021. break;
  8022. case OpBitCount:
  8023. {
  8024. auto basetype = expression_type(ops[2]).basetype;
  8025. emit_unary_func_op_cast(ops[0], ops[1], ops[2], "popcount", basetype, basetype);
  8026. break;
  8027. }
  8028. case OpFRem:
  8029. MSL_BFOP(fmod);
  8030. break;
  8031. case OpFMul:
  8032. if (msl_options.invariant_float_math || has_decoration(ops[1], DecorationNoContraction))
  8033. MSL_BFOP(spvFMul);
  8034. else
  8035. MSL_BOP(*);
  8036. break;
  8037. case OpFAdd:
  8038. if (msl_options.invariant_float_math || has_decoration(ops[1], DecorationNoContraction))
  8039. MSL_BFOP(spvFAdd);
  8040. else
  8041. MSL_BOP(+);
  8042. break;
  8043. case OpFSub:
  8044. if (msl_options.invariant_float_math || has_decoration(ops[1], DecorationNoContraction))
  8045. MSL_BFOP(spvFSub);
  8046. else
  8047. MSL_BOP(-);
  8048. break;
  8049. // Atomics
  8050. case OpAtomicExchange:
  8051. {
  8052. uint32_t result_type = ops[0];
  8053. uint32_t id = ops[1];
  8054. uint32_t ptr = ops[2];
  8055. uint32_t mem_sem = ops[4];
  8056. uint32_t val = ops[5];
  8057. emit_atomic_func_op(result_type, id, "atomic_exchange", opcode, mem_sem, mem_sem, false, ptr, val);
  8058. break;
  8059. }
  8060. case OpAtomicCompareExchange:
  8061. {
  8062. uint32_t result_type = ops[0];
  8063. uint32_t id = ops[1];
  8064. uint32_t ptr = ops[2];
  8065. uint32_t mem_sem_pass = ops[4];
  8066. uint32_t mem_sem_fail = ops[5];
  8067. uint32_t val = ops[6];
  8068. uint32_t comp = ops[7];
  8069. emit_atomic_func_op(result_type, id, "atomic_compare_exchange_weak", opcode,
  8070. mem_sem_pass, mem_sem_fail, true,
  8071. ptr, comp, true, false, val);
  8072. break;
  8073. }
  8074. case OpAtomicCompareExchangeWeak:
  8075. SPIRV_CROSS_THROW("OpAtomicCompareExchangeWeak is only supported in kernel profile.");
  8076. case OpAtomicLoad:
  8077. {
  8078. uint32_t result_type = ops[0];
  8079. uint32_t id = ops[1];
  8080. uint32_t ptr = ops[2];
  8081. uint32_t mem_sem = ops[4];
  8082. check_atomic_image(ptr);
  8083. emit_atomic_func_op(result_type, id, "atomic_load", opcode, mem_sem, mem_sem, false, ptr, 0);
  8084. break;
  8085. }
  8086. case OpAtomicStore:
  8087. {
  8088. uint32_t result_type = expression_type(ops[0]).self;
  8089. uint32_t id = ops[0];
  8090. uint32_t ptr = ops[0];
  8091. uint32_t mem_sem = ops[2];
  8092. uint32_t val = ops[3];
  8093. check_atomic_image(ptr);
  8094. emit_atomic_func_op(result_type, id, "atomic_store", opcode, mem_sem, mem_sem, false, ptr, val);
  8095. break;
  8096. }
  8097. #define MSL_AFMO_IMPL(op, valsrc, valconst) \
  8098. do \
  8099. { \
  8100. uint32_t result_type = ops[0]; \
  8101. uint32_t id = ops[1]; \
  8102. uint32_t ptr = ops[2]; \
  8103. uint32_t mem_sem = ops[4]; \
  8104. uint32_t val = valsrc; \
  8105. emit_atomic_func_op(result_type, id, "atomic_fetch_" #op, opcode, \
  8106. mem_sem, mem_sem, false, ptr, val, \
  8107. false, valconst); \
  8108. } while (false)
  8109. #define MSL_AFMO(op) MSL_AFMO_IMPL(op, ops[5], false)
  8110. #define MSL_AFMIO(op) MSL_AFMO_IMPL(op, 1, true)
  8111. case OpAtomicIIncrement:
  8112. MSL_AFMIO(add);
  8113. break;
  8114. case OpAtomicIDecrement:
  8115. MSL_AFMIO(sub);
  8116. break;
  8117. case OpAtomicIAdd:
  8118. case OpAtomicFAddEXT:
  8119. MSL_AFMO(add);
  8120. break;
  8121. case OpAtomicISub:
  8122. MSL_AFMO(sub);
  8123. break;
  8124. case OpAtomicSMin:
  8125. case OpAtomicUMin:
  8126. MSL_AFMO(min);
  8127. break;
  8128. case OpAtomicSMax:
  8129. case OpAtomicUMax:
  8130. MSL_AFMO(max);
  8131. break;
  8132. case OpAtomicAnd:
  8133. MSL_AFMO(and);
  8134. break;
  8135. case OpAtomicOr:
  8136. MSL_AFMO(or);
  8137. break;
  8138. case OpAtomicXor:
  8139. MSL_AFMO(xor);
  8140. break;
  8141. // Images
  8142. // Reads == Fetches in Metal
  8143. case OpImageRead:
  8144. {
  8145. // Mark that this shader reads from this image
  8146. uint32_t img_id = ops[2];
  8147. auto &type = expression_type(img_id);
  8148. auto *p_var = maybe_get_backing_variable(img_id);
  8149. if (type.image.dim != DimSubpassData)
  8150. {
  8151. if (p_var && has_decoration(p_var->self, DecorationNonReadable))
  8152. {
  8153. unset_decoration(p_var->self, DecorationNonReadable);
  8154. force_recompile();
  8155. }
  8156. }
  8157. // Metal requires explicit fences to break up RAW hazards, even within the same shader invocation
  8158. if (msl_options.readwrite_texture_fences && p_var && !has_decoration(p_var->self, DecorationNonWritable))
  8159. {
  8160. add_spv_func_and_recompile(SPVFuncImplImageFence);
  8161. // Need to wrap this with a value type,
  8162. // since the Metal headers are broken and do not consider case when the image is a reference.
  8163. statement("spvImageFence(", to_expression(img_id), ");");
  8164. }
  8165. emit_texture_op(instruction, false);
  8166. break;
  8167. }
  8168. // Emulate texture2D atomic operations
  8169. case OpImageTexelPointer:
  8170. {
  8171. // When using the pointer, we need to know which variable it is actually loaded from.
  8172. auto *var = maybe_get_backing_variable(ops[2]);
  8173. if (var && atomic_image_vars_emulated.count(var->self))
  8174. {
  8175. uint32_t result_type = ops[0];
  8176. uint32_t id = ops[1];
  8177. std::string coord = to_expression(ops[3]);
  8178. auto &type = expression_type(ops[2]);
  8179. if (type.image.dim == Dim2D)
  8180. {
  8181. coord = join("spvImage2DAtomicCoord(", coord, ", ", to_expression(ops[2]), ")");
  8182. }
  8183. auto &e = set<SPIRExpression>(id, join(to_expression(ops[2]), "_atomic[", coord, "]"), result_type, true);
  8184. e.loaded_from = var ? var->self : ID(0);
  8185. inherit_expression_dependencies(id, ops[3]);
  8186. }
  8187. else
  8188. {
  8189. uint32_t result_type = ops[0];
  8190. uint32_t id = ops[1];
  8191. // Virtual expression. Split this up in the actual image atomic.
  8192. // In GLSL and HLSL we are able to resolve the dereference inline, but MSL has
  8193. // image.op(coord, ...) syntax.
  8194. auto &e =
  8195. set<SPIRExpression>(id, join(to_expression(ops[2]), "@",
  8196. bitcast_expression(SPIRType::UInt, ops[3])),
  8197. result_type, true);
  8198. // When using the pointer, we need to know which variable it is actually loaded from.
  8199. e.loaded_from = var ? var->self : ID(0);
  8200. inherit_expression_dependencies(id, ops[3]);
  8201. }
  8202. break;
  8203. }
  8204. case OpImageWrite:
  8205. {
  8206. uint32_t img_id = ops[0];
  8207. uint32_t coord_id = ops[1];
  8208. uint32_t texel_id = ops[2];
  8209. const uint32_t *opt = &ops[3];
  8210. uint32_t length = instruction.length - 3;
  8211. // Bypass pointers because we need the real image struct
  8212. auto &type = expression_type(img_id);
  8213. auto &img_type = get<SPIRType>(type.self);
  8214. // Ensure this image has been marked as being written to and force a
  8215. // recommpile so that the image type output will include write access
  8216. auto *p_var = maybe_get_backing_variable(img_id);
  8217. if (p_var && has_decoration(p_var->self, DecorationNonWritable))
  8218. {
  8219. unset_decoration(p_var->self, DecorationNonWritable);
  8220. force_recompile();
  8221. }
  8222. bool forward = false;
  8223. uint32_t bias = 0;
  8224. uint32_t lod = 0;
  8225. uint32_t flags = 0;
  8226. if (length)
  8227. {
  8228. flags = *opt++;
  8229. length--;
  8230. }
  8231. auto test = [&](uint32_t &v, uint32_t flag) {
  8232. if (length && (flags & flag))
  8233. {
  8234. v = *opt++;
  8235. length--;
  8236. }
  8237. };
  8238. test(bias, ImageOperandsBiasMask);
  8239. test(lod, ImageOperandsLodMask);
  8240. auto &texel_type = expression_type(texel_id);
  8241. auto store_type = texel_type;
  8242. store_type.vecsize = 4;
  8243. TextureFunctionArguments args = {};
  8244. args.base.img = img_id;
  8245. args.base.imgtype = &img_type;
  8246. args.base.is_fetch = true;
  8247. args.coord = coord_id;
  8248. args.lod = lod;
  8249. string expr;
  8250. if (needs_frag_discard_checks())
  8251. expr = join("(", builtin_to_glsl(BuiltInHelperInvocation, StorageClassInput), " ? ((void)0) : ");
  8252. expr += join(to_expression(img_id), ".write(",
  8253. remap_swizzle(store_type, texel_type.vecsize, to_expression(texel_id)), ", ",
  8254. CompilerMSL::to_function_args(args, &forward), ")");
  8255. if (needs_frag_discard_checks())
  8256. expr += ")";
  8257. statement(expr, ";");
  8258. if (p_var && variable_storage_is_aliased(*p_var))
  8259. flush_all_aliased_variables();
  8260. break;
  8261. }
  8262. case OpImageQuerySize:
  8263. case OpImageQuerySizeLod:
  8264. {
  8265. uint32_t rslt_type_id = ops[0];
  8266. auto &rslt_type = get<SPIRType>(rslt_type_id);
  8267. uint32_t id = ops[1];
  8268. uint32_t img_id = ops[2];
  8269. string img_exp = to_expression(img_id);
  8270. auto &img_type = expression_type(img_id);
  8271. Dim img_dim = img_type.image.dim;
  8272. bool img_is_array = img_type.image.arrayed;
  8273. if (img_type.basetype != SPIRType::Image)
  8274. SPIRV_CROSS_THROW("Invalid type for OpImageQuerySize.");
  8275. string lod;
  8276. if (opcode == OpImageQuerySizeLod)
  8277. {
  8278. // LOD index defaults to zero, so don't bother outputing level zero index
  8279. string decl_lod = to_expression(ops[3]);
  8280. if (decl_lod != "0")
  8281. lod = decl_lod;
  8282. }
  8283. string expr = type_to_glsl(rslt_type) + "(";
  8284. expr += img_exp + ".get_width(" + lod + ")";
  8285. if (img_dim == Dim2D || img_dim == DimCube || img_dim == Dim3D)
  8286. expr += ", " + img_exp + ".get_height(" + lod + ")";
  8287. if (img_dim == Dim3D)
  8288. expr += ", " + img_exp + ".get_depth(" + lod + ")";
  8289. if (img_is_array)
  8290. {
  8291. expr += ", " + img_exp + ".get_array_size()";
  8292. if (img_dim == DimCube && msl_options.emulate_cube_array)
  8293. expr += " / 6";
  8294. }
  8295. expr += ")";
  8296. emit_op(rslt_type_id, id, expr, should_forward(img_id));
  8297. break;
  8298. }
  8299. case OpImageQueryLod:
  8300. {
  8301. if (!msl_options.supports_msl_version(2, 2))
  8302. SPIRV_CROSS_THROW("ImageQueryLod is only supported on MSL 2.2 and up.");
  8303. uint32_t result_type = ops[0];
  8304. uint32_t id = ops[1];
  8305. uint32_t image_id = ops[2];
  8306. uint32_t coord_id = ops[3];
  8307. emit_uninitialized_temporary_expression(result_type, id);
  8308. std::string coord_expr = to_expression(coord_id);
  8309. auto sampler_expr = to_sampler_expression(image_id);
  8310. auto *combined = maybe_get<SPIRCombinedImageSampler>(image_id);
  8311. auto image_expr = combined ? to_expression(combined->image) : to_expression(image_id);
  8312. const SPIRType &image_type = expression_type(image_id);
  8313. const SPIRType &coord_type = expression_type(coord_id);
  8314. switch (image_type.image.dim)
  8315. {
  8316. case Dim1D:
  8317. if (!msl_options.texture_1D_as_2D)
  8318. SPIRV_CROSS_THROW("ImageQueryLod is not supported on 1D textures.");
  8319. [[fallthrough]];
  8320. case Dim2D:
  8321. if (coord_type.vecsize > 2)
  8322. coord_expr = enclose_expression(coord_expr) + ".xy";
  8323. break;
  8324. case DimCube:
  8325. case Dim3D:
  8326. if (coord_type.vecsize > 3)
  8327. coord_expr = enclose_expression(coord_expr) + ".xyz";
  8328. break;
  8329. default:
  8330. SPIRV_CROSS_THROW("Bad image type given to OpImageQueryLod");
  8331. }
  8332. // TODO: It is unclear if calculcate_clamped_lod also conditionally rounds
  8333. // the reported LOD based on the sampler. NEAREST miplevel should
  8334. // round the LOD, but LINEAR miplevel should not round.
  8335. // Let's hope this does not become an issue ...
  8336. statement(to_expression(id), ".x = ", image_expr, ".calculate_clamped_lod(", sampler_expr, ", ",
  8337. coord_expr, ");");
  8338. statement(to_expression(id), ".y = ", image_expr, ".calculate_unclamped_lod(", sampler_expr, ", ",
  8339. coord_expr, ");");
  8340. register_control_dependent_expression(id);
  8341. break;
  8342. }
  8343. #define MSL_ImgQry(qrytype) \
  8344. do \
  8345. { \
  8346. uint32_t rslt_type_id = ops[0]; \
  8347. auto &rslt_type = get<SPIRType>(rslt_type_id); \
  8348. uint32_t id = ops[1]; \
  8349. uint32_t img_id = ops[2]; \
  8350. string img_exp = to_expression(img_id); \
  8351. string expr = type_to_glsl(rslt_type) + "(" + img_exp + ".get_num_" #qrytype "())"; \
  8352. emit_op(rslt_type_id, id, expr, should_forward(img_id)); \
  8353. } while (false)
  8354. case OpImageQueryLevels:
  8355. MSL_ImgQry(mip_levels);
  8356. break;
  8357. case OpImageQuerySamples:
  8358. MSL_ImgQry(samples);
  8359. break;
  8360. case OpImage:
  8361. {
  8362. uint32_t result_type = ops[0];
  8363. uint32_t id = ops[1];
  8364. auto *combined = maybe_get<SPIRCombinedImageSampler>(ops[2]);
  8365. if (combined)
  8366. {
  8367. auto &e = emit_op(result_type, id, to_expression(combined->image), true, true);
  8368. auto *var = maybe_get_backing_variable(combined->image);
  8369. if (var)
  8370. e.loaded_from = var->self;
  8371. }
  8372. else
  8373. {
  8374. auto *var = maybe_get_backing_variable(ops[2]);
  8375. SPIRExpression *e;
  8376. if (var && has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler))
  8377. e = &emit_op(result_type, id, join(to_expression(ops[2]), ".plane0"), true, true);
  8378. else
  8379. e = &emit_op(result_type, id, to_expression(ops[2]), true, true);
  8380. if (var)
  8381. e->loaded_from = var->self;
  8382. }
  8383. break;
  8384. }
  8385. // Casting
  8386. case OpQuantizeToF16:
  8387. {
  8388. uint32_t result_type = ops[0];
  8389. uint32_t id = ops[1];
  8390. uint32_t arg = ops[2];
  8391. string exp = join("spvQuantizeToF16(", to_expression(arg), ")");
  8392. emit_op(result_type, id, exp, should_forward(arg));
  8393. break;
  8394. }
  8395. case OpInBoundsAccessChain:
  8396. case OpAccessChain:
  8397. case OpPtrAccessChain:
  8398. if (is_tessellation_shader())
  8399. {
  8400. if (!emit_tessellation_access_chain(ops, instruction.length))
  8401. CompilerGLSL::emit_instruction(instruction);
  8402. }
  8403. else
  8404. CompilerGLSL::emit_instruction(instruction);
  8405. fix_up_interpolant_access_chain(ops, instruction.length);
  8406. break;
  8407. case OpStore:
  8408. {
  8409. const auto &type = expression_type(ops[0]);
  8410. if (is_out_of_bounds_tessellation_level(ops[0]))
  8411. break;
  8412. if (needs_frag_discard_checks() &&
  8413. (type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform))
  8414. {
  8415. // If we're in a continue block, this kludge will make the block too complex
  8416. // to emit normally.
  8417. assert(current_emitting_block);
  8418. auto cont_type = continue_block_type(*current_emitting_block);
  8419. if (cont_type != SPIRBlock::ContinueNone && cont_type != SPIRBlock::ComplexLoop)
  8420. {
  8421. current_emitting_block->complex_continue = true;
  8422. force_recompile();
  8423. }
  8424. statement("if (!", builtin_to_glsl(BuiltInHelperInvocation, StorageClassInput), ")");
  8425. begin_scope();
  8426. }
  8427. if (!maybe_emit_array_assignment(ops[0], ops[1]))
  8428. CompilerGLSL::emit_instruction(instruction);
  8429. if (needs_frag_discard_checks() &&
  8430. (type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform))
  8431. end_scope();
  8432. break;
  8433. }
  8434. // Compute barriers
  8435. case OpMemoryBarrier:
  8436. emit_barrier(0, ops[0], ops[1]);
  8437. break;
  8438. case OpControlBarrier:
  8439. // In GLSL a memory barrier is often followed by a control barrier.
  8440. // But in MSL, memory barriers are also control barriers, so don't
  8441. // emit a simple control barrier if a memory barrier has just been emitted.
  8442. if (previous_instruction_opcode != OpMemoryBarrier)
  8443. emit_barrier(ops[0], ops[1], ops[2]);
  8444. break;
  8445. case OpOuterProduct:
  8446. {
  8447. uint32_t result_type = ops[0];
  8448. uint32_t id = ops[1];
  8449. uint32_t a = ops[2];
  8450. uint32_t b = ops[3];
  8451. auto &type = get<SPIRType>(result_type);
  8452. string expr = type_to_glsl_constructor(type);
  8453. expr += "(";
  8454. for (uint32_t col = 0; col < type.columns; col++)
  8455. {
  8456. expr += to_enclosed_unpacked_expression(a);
  8457. expr += " * ";
  8458. expr += to_extract_component_expression(b, col);
  8459. if (col + 1 < type.columns)
  8460. expr += ", ";
  8461. }
  8462. expr += ")";
  8463. emit_op(result_type, id, expr, should_forward(a) && should_forward(b));
  8464. inherit_expression_dependencies(id, a);
  8465. inherit_expression_dependencies(id, b);
  8466. break;
  8467. }
  8468. case OpVectorTimesMatrix:
  8469. case OpMatrixTimesVector:
  8470. {
  8471. if (!msl_options.invariant_float_math && !has_decoration(ops[1], DecorationNoContraction))
  8472. {
  8473. CompilerGLSL::emit_instruction(instruction);
  8474. break;
  8475. }
  8476. // If the matrix needs transpose, just flip the multiply order.
  8477. auto *e = maybe_get<SPIRExpression>(ops[opcode == OpMatrixTimesVector ? 2 : 3]);
  8478. if (e && e->need_transpose)
  8479. {
  8480. e->need_transpose = false;
  8481. string expr;
  8482. if (opcode == OpMatrixTimesVector)
  8483. {
  8484. expr = join("spvFMulVectorMatrix(", to_enclosed_unpacked_expression(ops[3]), ", ",
  8485. to_unpacked_row_major_matrix_expression(ops[2]), ")");
  8486. }
  8487. else
  8488. {
  8489. expr = join("spvFMulMatrixVector(", to_unpacked_row_major_matrix_expression(ops[3]), ", ",
  8490. to_enclosed_unpacked_expression(ops[2]), ")");
  8491. }
  8492. bool forward = should_forward(ops[2]) && should_forward(ops[3]);
  8493. emit_op(ops[0], ops[1], expr, forward);
  8494. e->need_transpose = true;
  8495. inherit_expression_dependencies(ops[1], ops[2]);
  8496. inherit_expression_dependencies(ops[1], ops[3]);
  8497. }
  8498. else
  8499. {
  8500. if (opcode == OpMatrixTimesVector)
  8501. MSL_BFOP(spvFMulMatrixVector);
  8502. else
  8503. MSL_BFOP(spvFMulVectorMatrix);
  8504. }
  8505. break;
  8506. }
  8507. case OpMatrixTimesMatrix:
  8508. {
  8509. if (!msl_options.invariant_float_math && !has_decoration(ops[1], DecorationNoContraction))
  8510. {
  8511. CompilerGLSL::emit_instruction(instruction);
  8512. break;
  8513. }
  8514. auto *a = maybe_get<SPIRExpression>(ops[2]);
  8515. auto *b = maybe_get<SPIRExpression>(ops[3]);
  8516. // If both matrices need transpose, we can multiply in flipped order and tag the expression as transposed.
  8517. // a^T * b^T = (b * a)^T.
  8518. if (a && b && a->need_transpose && b->need_transpose)
  8519. {
  8520. a->need_transpose = false;
  8521. b->need_transpose = false;
  8522. auto expr =
  8523. join("spvFMulMatrixMatrix(", enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), ", ",
  8524. enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])), ")");
  8525. bool forward = should_forward(ops[2]) && should_forward(ops[3]);
  8526. auto &e = emit_op(ops[0], ops[1], expr, forward);
  8527. e.need_transpose = true;
  8528. a->need_transpose = true;
  8529. b->need_transpose = true;
  8530. inherit_expression_dependencies(ops[1], ops[2]);
  8531. inherit_expression_dependencies(ops[1], ops[3]);
  8532. }
  8533. else
  8534. MSL_BFOP(spvFMulMatrixMatrix);
  8535. break;
  8536. }
  8537. case OpIAddCarry:
  8538. case OpISubBorrow:
  8539. {
  8540. uint32_t result_type = ops[0];
  8541. uint32_t result_id = ops[1];
  8542. uint32_t op0 = ops[2];
  8543. uint32_t op1 = ops[3];
  8544. auto &type = get<SPIRType>(result_type);
  8545. emit_uninitialized_temporary_expression(result_type, result_id);
  8546. auto &res_type = get<SPIRType>(type.member_types[1]);
  8547. if (opcode == OpIAddCarry)
  8548. {
  8549. statement(to_expression(result_id), ".", to_member_name(type, 0), " = ",
  8550. to_enclosed_unpacked_expression(op0), " + ", to_enclosed_unpacked_expression(op1), ";");
  8551. statement(to_expression(result_id), ".", to_member_name(type, 1), " = select(", type_to_glsl(res_type),
  8552. "(1), ", type_to_glsl(res_type), "(0), ", to_unpacked_expression(result_id), ".", to_member_name(type, 0),
  8553. " >= max(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), "));");
  8554. }
  8555. else
  8556. {
  8557. statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", to_enclosed_unpacked_expression(op0), " - ",
  8558. to_enclosed_unpacked_expression(op1), ";");
  8559. statement(to_expression(result_id), ".", to_member_name(type, 1), " = select(", type_to_glsl(res_type),
  8560. "(1), ", type_to_glsl(res_type), "(0), ", to_enclosed_unpacked_expression(op0),
  8561. " >= ", to_enclosed_unpacked_expression(op1), ");");
  8562. }
  8563. break;
  8564. }
  8565. case OpUMulExtended:
  8566. case OpSMulExtended:
  8567. {
  8568. uint32_t result_type = ops[0];
  8569. uint32_t result_id = ops[1];
  8570. uint32_t op0 = ops[2];
  8571. uint32_t op1 = ops[3];
  8572. auto &type = get<SPIRType>(result_type);
  8573. auto input_type = opcode == OpSMulExtended ? int_type : uint_type;
  8574. string cast_op0, cast_op1;
  8575. binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, false);
  8576. emit_uninitialized_temporary_expression(result_type, result_id);
  8577. statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", cast_op0, " * ", cast_op1, ";");
  8578. statement(to_expression(result_id), ".", to_member_name(type, 1), " = mulhi(", cast_op0, ", ", cast_op1, ");");
  8579. break;
  8580. }
  8581. case OpArrayLength:
  8582. {
  8583. auto &type = expression_type(ops[2]);
  8584. uint32_t offset = type_struct_member_offset(type, ops[3]);
  8585. uint32_t stride = type_struct_member_array_stride(type, ops[3]);
  8586. auto expr = join("(", to_buffer_size_expression(ops[2]), " - ", offset, ") / ", stride);
  8587. emit_op(ops[0], ops[1], expr, true);
  8588. break;
  8589. }
  8590. // Legacy sub-group stuff ...
  8591. case OpSubgroupBallotKHR:
  8592. case OpSubgroupFirstInvocationKHR:
  8593. case OpSubgroupReadInvocationKHR:
  8594. case OpSubgroupAllKHR:
  8595. case OpSubgroupAnyKHR:
  8596. case OpSubgroupAllEqualKHR:
  8597. emit_subgroup_op(instruction);
  8598. break;
  8599. // SPV_INTEL_shader_integer_functions2
  8600. case OpUCountLeadingZerosINTEL:
  8601. MSL_UFOP(clz);
  8602. break;
  8603. case OpUCountTrailingZerosINTEL:
  8604. MSL_UFOP(ctz);
  8605. break;
  8606. case OpAbsISubINTEL:
  8607. case OpAbsUSubINTEL:
  8608. MSL_BFOP(absdiff);
  8609. break;
  8610. case OpIAddSatINTEL:
  8611. case OpUAddSatINTEL:
  8612. MSL_BFOP(addsat);
  8613. break;
  8614. case OpIAverageINTEL:
  8615. case OpUAverageINTEL:
  8616. MSL_BFOP(hadd);
  8617. break;
  8618. case OpIAverageRoundedINTEL:
  8619. case OpUAverageRoundedINTEL:
  8620. MSL_BFOP(rhadd);
  8621. break;
  8622. case OpISubSatINTEL:
  8623. case OpUSubSatINTEL:
  8624. MSL_BFOP(subsat);
  8625. break;
  8626. case OpIMul32x16INTEL:
  8627. {
  8628. uint32_t result_type = ops[0];
  8629. uint32_t id = ops[1];
  8630. uint32_t a = ops[2], b = ops[3];
  8631. bool forward = should_forward(a) && should_forward(b);
  8632. emit_op(result_type, id, join("int(short(", to_unpacked_expression(a), ")) * int(short(", to_unpacked_expression(b), "))"), forward);
  8633. inherit_expression_dependencies(id, a);
  8634. inherit_expression_dependencies(id, b);
  8635. break;
  8636. }
  8637. case OpUMul32x16INTEL:
  8638. {
  8639. uint32_t result_type = ops[0];
  8640. uint32_t id = ops[1];
  8641. uint32_t a = ops[2], b = ops[3];
  8642. bool forward = should_forward(a) && should_forward(b);
  8643. emit_op(result_type, id, join("uint(ushort(", to_unpacked_expression(a), ")) * uint(ushort(", to_unpacked_expression(b), "))"), forward);
  8644. inherit_expression_dependencies(id, a);
  8645. inherit_expression_dependencies(id, b);
  8646. break;
  8647. }
  8648. // SPV_EXT_demote_to_helper_invocation
  8649. case OpDemoteToHelperInvocationEXT:
  8650. if (!msl_options.supports_msl_version(2, 3))
  8651. SPIRV_CROSS_THROW("discard_fragment() does not formally have demote semantics until MSL 2.3.");
  8652. CompilerGLSL::emit_instruction(instruction);
  8653. break;
  8654. case OpIsHelperInvocationEXT:
  8655. if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 3))
  8656. SPIRV_CROSS_THROW("simd_is_helper_thread() requires MSL 2.3 on iOS.");
  8657. else if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 1))
  8658. SPIRV_CROSS_THROW("simd_is_helper_thread() requires MSL 2.1 on macOS.");
  8659. emit_op(ops[0], ops[1],
  8660. needs_manual_helper_invocation_updates() ? builtin_to_glsl(BuiltInHelperInvocation, StorageClassInput) :
  8661. "simd_is_helper_thread()",
  8662. false);
  8663. break;
  8664. case OpBeginInvocationInterlockEXT:
  8665. case OpEndInvocationInterlockEXT:
  8666. if (!msl_options.supports_msl_version(2, 0))
  8667. SPIRV_CROSS_THROW("Raster order groups require MSL 2.0.");
  8668. break; // Nothing to do in the body
  8669. case OpConvertUToAccelerationStructureKHR:
  8670. SPIRV_CROSS_THROW("ConvertUToAccelerationStructure is not supported in MSL.");
  8671. case OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR:
  8672. SPIRV_CROSS_THROW("BindingTableRecordOffset is not supported in MSL.");
  8673. case OpRayQueryInitializeKHR:
  8674. {
  8675. flush_variable_declaration(ops[0]);
  8676. register_write(ops[0]);
  8677. add_spv_func_and_recompile(SPVFuncImplRayQueryIntersectionParams);
  8678. statement(to_expression(ops[0]), ".reset(", "ray(", to_expression(ops[4]), ", ", to_expression(ops[6]), ", ",
  8679. to_expression(ops[5]), ", ", to_expression(ops[7]), "), ", to_expression(ops[1]), ", ", to_expression(ops[3]),
  8680. ", spvMakeIntersectionParams(", to_expression(ops[2]), "));");
  8681. break;
  8682. }
  8683. case OpRayQueryProceedKHR:
  8684. {
  8685. flush_variable_declaration(ops[0]);
  8686. register_write(ops[2]);
  8687. emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".next()"), false);
  8688. break;
  8689. }
  8690. #define MSL_RAY_QUERY_IS_CANDIDATE get<SPIRConstant>(ops[3]).scalar_i32() == 0
  8691. #define MSL_RAY_QUERY_GET_OP(op, msl_op) \
  8692. case OpRayQueryGet##op##KHR: \
  8693. flush_variable_declaration(ops[2]); \
  8694. emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".get_" #msl_op "()"), false); \
  8695. break
  8696. #define MSL_RAY_QUERY_OP_INNER2(op, msl_prefix, msl_op) \
  8697. case OpRayQueryGet##op##KHR: \
  8698. flush_variable_declaration(ops[2]); \
  8699. if (MSL_RAY_QUERY_IS_CANDIDATE) \
  8700. emit_op(ops[0], ops[1], join(to_expression(ops[2]), #msl_prefix "_candidate_" #msl_op "()"), false); \
  8701. else \
  8702. emit_op(ops[0], ops[1], join(to_expression(ops[2]), #msl_prefix "_committed_" #msl_op "()"), false); \
  8703. break
  8704. #define MSL_RAY_QUERY_GET_OP2(op, msl_op) MSL_RAY_QUERY_OP_INNER2(op, .get, msl_op)
  8705. #define MSL_RAY_QUERY_IS_OP2(op, msl_op) MSL_RAY_QUERY_OP_INNER2(op, .is, msl_op)
  8706. MSL_RAY_QUERY_GET_OP(RayTMin, ray_min_distance);
  8707. MSL_RAY_QUERY_GET_OP(WorldRayOrigin, world_space_ray_origin);
  8708. MSL_RAY_QUERY_GET_OP(WorldRayDirection, world_space_ray_direction);
  8709. MSL_RAY_QUERY_GET_OP2(IntersectionInstanceId, instance_id);
  8710. MSL_RAY_QUERY_GET_OP2(IntersectionInstanceCustomIndex, user_instance_id);
  8711. MSL_RAY_QUERY_GET_OP2(IntersectionBarycentrics, triangle_barycentric_coord);
  8712. MSL_RAY_QUERY_GET_OP2(IntersectionPrimitiveIndex, primitive_id);
  8713. MSL_RAY_QUERY_GET_OP2(IntersectionGeometryIndex, geometry_id);
  8714. MSL_RAY_QUERY_GET_OP2(IntersectionObjectRayOrigin, ray_origin);
  8715. MSL_RAY_QUERY_GET_OP2(IntersectionObjectRayDirection, ray_direction);
  8716. MSL_RAY_QUERY_GET_OP2(IntersectionObjectToWorld, object_to_world_transform);
  8717. MSL_RAY_QUERY_GET_OP2(IntersectionWorldToObject, world_to_object_transform);
  8718. MSL_RAY_QUERY_IS_OP2(IntersectionFrontFace, triangle_front_facing);
  8719. case OpRayQueryGetIntersectionTypeKHR:
  8720. flush_variable_declaration(ops[2]);
  8721. if (MSL_RAY_QUERY_IS_CANDIDATE)
  8722. emit_op(ops[0], ops[1], join("uint(", to_expression(ops[2]), ".get_candidate_intersection_type()) - 1"),
  8723. false);
  8724. else
  8725. emit_op(ops[0], ops[1], join("uint(", to_expression(ops[2]), ".get_committed_intersection_type())"), false);
  8726. break;
  8727. case OpRayQueryGetIntersectionTKHR:
  8728. flush_variable_declaration(ops[2]);
  8729. if (MSL_RAY_QUERY_IS_CANDIDATE)
  8730. emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".get_candidate_triangle_distance()"), false);
  8731. else
  8732. emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".get_committed_distance()"), false);
  8733. break;
  8734. case OpRayQueryGetIntersectionCandidateAABBOpaqueKHR:
  8735. {
  8736. flush_variable_declaration(ops[0]);
  8737. emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".is_candidate_non_opaque_bounding_box()"), false);
  8738. break;
  8739. }
  8740. case OpRayQueryConfirmIntersectionKHR:
  8741. flush_variable_declaration(ops[0]);
  8742. register_write(ops[0]);
  8743. statement(to_expression(ops[0]), ".commit_triangle_intersection();");
  8744. break;
  8745. case OpRayQueryGenerateIntersectionKHR:
  8746. flush_variable_declaration(ops[0]);
  8747. register_write(ops[0]);
  8748. statement(to_expression(ops[0]), ".commit_bounding_box_intersection(", to_expression(ops[1]), ");");
  8749. break;
  8750. case OpRayQueryTerminateKHR:
  8751. flush_variable_declaration(ops[0]);
  8752. register_write(ops[0]);
  8753. statement(to_expression(ops[0]), ".abort();");
  8754. break;
  8755. #undef MSL_RAY_QUERY_GET_OP
  8756. #undef MSL_RAY_QUERY_IS_CANDIDATE
  8757. #undef MSL_RAY_QUERY_IS_OP2
  8758. #undef MSL_RAY_QUERY_GET_OP2
  8759. #undef MSL_RAY_QUERY_OP_INNER2
  8760. case OpConvertPtrToU:
  8761. case OpConvertUToPtr:
  8762. case OpBitcast:
  8763. {
  8764. auto &type = get<SPIRType>(ops[0]);
  8765. auto &input_type = expression_type(ops[2]);
  8766. if (opcode != OpBitcast || type.pointer || input_type.pointer)
  8767. {
  8768. string op;
  8769. if (type.vecsize == 1 && input_type.vecsize == 1)
  8770. op = join("reinterpret_cast<", type_to_glsl(type), ">(", to_unpacked_expression(ops[2]), ")");
  8771. else if (input_type.vecsize == 2)
  8772. op = join("reinterpret_cast<", type_to_glsl(type), ">(as_type<ulong>(", to_unpacked_expression(ops[2]), "))");
  8773. else
  8774. op = join("as_type<", type_to_glsl(type), ">(reinterpret_cast<ulong>(", to_unpacked_expression(ops[2]), "))");
  8775. emit_op(ops[0], ops[1], op, should_forward(ops[2]));
  8776. inherit_expression_dependencies(ops[1], ops[2]);
  8777. }
  8778. else
  8779. CompilerGLSL::emit_instruction(instruction);
  8780. break;
  8781. }
  8782. case OpSDot:
  8783. case OpUDot:
  8784. case OpSUDot:
  8785. {
  8786. uint32_t result_type = ops[0];
  8787. uint32_t id = ops[1];
  8788. uint32_t vec1 = ops[2];
  8789. uint32_t vec2 = ops[3];
  8790. auto &input_type1 = expression_type(vec1);
  8791. auto &input_type2 = expression_type(vec2);
  8792. string vec1input, vec2input;
  8793. auto input_size = input_type1.vecsize;
  8794. if (instruction.length == 5)
  8795. {
  8796. if (ops[4] == PackedVectorFormatPackedVectorFormat4x8Bit)
  8797. {
  8798. string type = opcode == OpSDot || opcode == OpSUDot ? "char4" : "uchar4";
  8799. vec1input = join("as_type<", type, ">(", to_expression(vec1), ")");
  8800. type = opcode == OpSDot ? "char4" : "uchar4";
  8801. vec2input = join("as_type<", type, ">(", to_expression(vec2), ")");
  8802. input_size = 4;
  8803. }
  8804. else
  8805. SPIRV_CROSS_THROW("Packed vector formats other than 4x8Bit for integer dot product is not supported.");
  8806. }
  8807. else
  8808. {
  8809. // Inputs are sign or zero-extended to their target width.
  8810. SPIRType::BaseType vec1_expected_type =
  8811. opcode != OpUDot ?
  8812. to_signed_basetype(input_type1.width) :
  8813. to_unsigned_basetype(input_type1.width);
  8814. SPIRType::BaseType vec2_expected_type =
  8815. opcode != OpSDot ?
  8816. to_unsigned_basetype(input_type2.width) :
  8817. to_signed_basetype(input_type2.width);
  8818. vec1input = bitcast_expression(vec1_expected_type, vec1);
  8819. vec2input = bitcast_expression(vec2_expected_type, vec2);
  8820. }
  8821. auto &type = get<SPIRType>(result_type);
  8822. // We'll get the appropriate sign-extend or zero-extend, no matter which type we cast to here.
  8823. // The addition in reduce_add is sign-invariant.
  8824. auto result_type_cast = join(type_to_glsl(type), input_size);
  8825. string exp = join("reduce_add(",
  8826. result_type_cast, "(", vec1input, ") * ",
  8827. result_type_cast, "(", vec2input, "))");
  8828. emit_op(result_type, id, exp, should_forward(vec1) && should_forward(vec2));
  8829. inherit_expression_dependencies(id, vec1);
  8830. inherit_expression_dependencies(id, vec2);
  8831. break;
  8832. }
  8833. case OpSDotAccSat:
  8834. case OpUDotAccSat:
  8835. case OpSUDotAccSat:
  8836. {
  8837. uint32_t result_type = ops[0];
  8838. uint32_t id = ops[1];
  8839. uint32_t vec1 = ops[2];
  8840. uint32_t vec2 = ops[3];
  8841. uint32_t acc = ops[4];
  8842. auto input_type1 = expression_type(vec1);
  8843. auto input_type2 = expression_type(vec2);
  8844. string vec1input, vec2input;
  8845. if (instruction.length == 6)
  8846. {
  8847. if (ops[5] == PackedVectorFormatPackedVectorFormat4x8Bit)
  8848. {
  8849. string type = opcode == OpSDotAccSat || opcode == OpSUDotAccSat ? "char4" : "uchar4";
  8850. vec1input = join("as_type<", type, ">(", to_expression(vec1), ")");
  8851. type = opcode == OpSDotAccSat ? "char4" : "uchar4";
  8852. vec2input = join("as_type<", type, ">(", to_expression(vec2), ")");
  8853. input_type1.vecsize = 4;
  8854. input_type2.vecsize = 4;
  8855. }
  8856. else
  8857. SPIRV_CROSS_THROW("Packed vector formats other than 4x8Bit for integer dot product is not supported.");
  8858. }
  8859. else
  8860. {
  8861. // Inputs are sign or zero-extended to their target width.
  8862. SPIRType::BaseType vec1_expected_type =
  8863. opcode != OpUDotAccSat ?
  8864. to_signed_basetype(input_type1.width) :
  8865. to_unsigned_basetype(input_type1.width);
  8866. SPIRType::BaseType vec2_expected_type =
  8867. opcode != OpSDotAccSat ?
  8868. to_unsigned_basetype(input_type2.width) :
  8869. to_signed_basetype(input_type2.width);
  8870. vec1input = bitcast_expression(vec1_expected_type, vec1);
  8871. vec2input = bitcast_expression(vec2_expected_type, vec2);
  8872. }
  8873. auto &type = get<SPIRType>(result_type);
  8874. SPIRType::BaseType pre_saturate_type =
  8875. opcode != OpUDotAccSat ?
  8876. to_signed_basetype(type.width) :
  8877. to_unsigned_basetype(type.width);
  8878. input_type1.basetype = pre_saturate_type;
  8879. input_type2.basetype = pre_saturate_type;
  8880. string exp = join(type_to_glsl(type), "(addsat(reduce_add(",
  8881. type_to_glsl(input_type1), "(", vec1input, ") * ",
  8882. type_to_glsl(input_type2), "(", vec2input, ")), ",
  8883. bitcast_expression(pre_saturate_type, acc), "))");
  8884. emit_op(result_type, id, exp, should_forward(vec1) && should_forward(vec2));
  8885. inherit_expression_dependencies(id, vec1);
  8886. inherit_expression_dependencies(id, vec2);
  8887. break;
  8888. }
  8889. default:
  8890. CompilerGLSL::emit_instruction(instruction);
  8891. break;
  8892. }
  8893. previous_instruction_opcode = opcode;
  8894. }
  8895. void CompilerMSL::emit_texture_op(const Instruction &i, bool sparse)
  8896. {
  8897. if (sparse)
  8898. SPIRV_CROSS_THROW("Sparse feedback not yet supported in MSL.");
  8899. if (msl_options.use_framebuffer_fetch_subpasses)
  8900. {
  8901. auto *ops = stream(i);
  8902. uint32_t result_type_id = ops[0];
  8903. uint32_t id = ops[1];
  8904. uint32_t img = ops[2];
  8905. auto &type = expression_type(img);
  8906. auto &imgtype = get<SPIRType>(type.self);
  8907. // Use Metal's native frame-buffer fetch API for subpass inputs.
  8908. if (imgtype.image.dim == DimSubpassData)
  8909. {
  8910. // Subpass inputs cannot be invalidated,
  8911. // so just forward the expression directly.
  8912. string expr = to_expression(img);
  8913. emit_op(result_type_id, id, expr, true);
  8914. return;
  8915. }
  8916. }
  8917. // Fallback to default implementation
  8918. CompilerGLSL::emit_texture_op(i, sparse);
  8919. }
  8920. void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem)
  8921. {
  8922. if (get_execution_model() != ExecutionModelGLCompute && !is_tesc_shader())
  8923. return;
  8924. uint32_t exe_scope = id_exe_scope ? evaluate_constant_u32(id_exe_scope) : uint32_t(ScopeInvocation);
  8925. uint32_t mem_scope = id_mem_scope ? evaluate_constant_u32(id_mem_scope) : uint32_t(ScopeInvocation);
  8926. // Use the wider of the two scopes (smaller value)
  8927. exe_scope = min(exe_scope, mem_scope);
  8928. if (msl_options.emulate_subgroups && exe_scope >= ScopeSubgroup && !id_mem_sem)
  8929. // In this case, we assume a "subgroup" size of 1. The barrier, then, is a noop.
  8930. return;
  8931. string bar_stmt;
  8932. if ((msl_options.is_ios() && msl_options.supports_msl_version(1, 2)) || msl_options.supports_msl_version(2))
  8933. bar_stmt = exe_scope < ScopeSubgroup ? "threadgroup_barrier" : "simdgroup_barrier";
  8934. else
  8935. bar_stmt = "threadgroup_barrier";
  8936. bar_stmt += "(";
  8937. uint32_t mem_sem = id_mem_sem ? evaluate_constant_u32(id_mem_sem) : uint32_t(MemorySemanticsMaskNone);
  8938. // Use the | operator to combine flags if we can.
  8939. if (msl_options.supports_msl_version(1, 2))
  8940. {
  8941. string mem_flags = "";
  8942. // For tesc shaders, this also affects objects in the Output storage class.
  8943. // Since in Metal, these are placed in a device buffer, we have to sync device memory here.
  8944. if (is_tesc_shader() ||
  8945. (mem_sem & (MemorySemanticsUniformMemoryMask | MemorySemanticsCrossWorkgroupMemoryMask)))
  8946. mem_flags += "mem_flags::mem_device";
  8947. // Fix tessellation patch function processing
  8948. if (is_tesc_shader() || (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask)))
  8949. {
  8950. if (!mem_flags.empty())
  8951. mem_flags += " | ";
  8952. mem_flags += "mem_flags::mem_threadgroup";
  8953. }
  8954. if (mem_sem & MemorySemanticsImageMemoryMask)
  8955. {
  8956. if (!mem_flags.empty())
  8957. mem_flags += " | ";
  8958. mem_flags += "mem_flags::mem_texture";
  8959. }
  8960. if (mem_flags.empty())
  8961. mem_flags = "mem_flags::mem_none";
  8962. bar_stmt += mem_flags;
  8963. }
  8964. else
  8965. {
  8966. if ((mem_sem & (MemorySemanticsUniformMemoryMask | MemorySemanticsCrossWorkgroupMemoryMask)) &&
  8967. (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask)))
  8968. bar_stmt += "mem_flags::mem_device_and_threadgroup";
  8969. else if (mem_sem & (MemorySemanticsUniformMemoryMask | MemorySemanticsCrossWorkgroupMemoryMask))
  8970. bar_stmt += "mem_flags::mem_device";
  8971. else if (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask))
  8972. bar_stmt += "mem_flags::mem_threadgroup";
  8973. else if (mem_sem & MemorySemanticsImageMemoryMask)
  8974. bar_stmt += "mem_flags::mem_texture";
  8975. else
  8976. bar_stmt += "mem_flags::mem_none";
  8977. }
  8978. bar_stmt += ");";
  8979. statement(bar_stmt);
  8980. assert(current_emitting_block);
  8981. flush_control_dependent_expressions(current_emitting_block->self);
  8982. flush_all_active_variables();
  8983. }
  8984. static bool storage_class_array_is_thread(StorageClass storage)
  8985. {
  8986. switch (storage)
  8987. {
  8988. case StorageClassInput:
  8989. case StorageClassOutput:
  8990. case StorageClassGeneric:
  8991. case StorageClassFunction:
  8992. case StorageClassPrivate:
  8993. return true;
  8994. default:
  8995. return false;
  8996. }
  8997. }
  8998. bool CompilerMSL::emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t rhs_id,
  8999. StorageClass lhs_storage, StorageClass rhs_storage)
  9000. {
  9001. // Allow Metal to use the array<T> template to make arrays a value type.
  9002. // This, however, cannot be used for threadgroup address specifiers, so consider the custom array copy as fallback.
  9003. bool lhs_is_thread_storage = storage_class_array_is_thread(lhs_storage);
  9004. bool rhs_is_thread_storage = storage_class_array_is_thread(rhs_storage);
  9005. bool lhs_is_array_template = lhs_is_thread_storage || lhs_storage == StorageClassWorkgroup;
  9006. bool rhs_is_array_template = rhs_is_thread_storage || rhs_storage == StorageClassWorkgroup;
  9007. // Special considerations for stage IO variables.
  9008. // If the variable is actually backed by non-user visible device storage, we use array templates for those.
  9009. //
  9010. // Another special consideration is given to thread local variables which happen to have Offset decorations
  9011. // applied to them. Block-like types do not use array templates, so we need to force POD path if we detect
  9012. // these scenarios. This check isn't perfect since it would be technically possible to mix and match these things,
  9013. // and for a fully correct solution we might have to track array template state through access chains as well,
  9014. // but for all reasonable use cases, this should suffice.
  9015. // This special case should also only apply to Function/Private storage classes.
  9016. // We should not check backing variable for temporaries.
  9017. auto *lhs_var = maybe_get_backing_variable(lhs_id);
  9018. if (lhs_var && lhs_storage == StorageClassStorageBuffer && storage_class_array_is_thread(lhs_var->storage))
  9019. lhs_is_array_template = true;
  9020. else if (lhs_var && lhs_storage != StorageClassGeneric && type_is_block_like(get<SPIRType>(lhs_var->basetype)))
  9021. lhs_is_array_template = false;
  9022. auto *rhs_var = maybe_get_backing_variable(rhs_id);
  9023. if (rhs_var && rhs_storage == StorageClassStorageBuffer && storage_class_array_is_thread(rhs_var->storage))
  9024. rhs_is_array_template = true;
  9025. else if (rhs_var && rhs_storage != StorageClassGeneric && type_is_block_like(get<SPIRType>(rhs_var->basetype)))
  9026. rhs_is_array_template = false;
  9027. // If threadgroup storage qualifiers are *not* used:
  9028. // Avoid spvCopy* wrapper functions; Otherwise, spvUnsafeArray<> template cannot be used with that storage qualifier.
  9029. if (lhs_is_array_template && rhs_is_array_template && !using_builtin_array())
  9030. {
  9031. // Fall back to normal copy path.
  9032. return false;
  9033. }
  9034. else
  9035. {
  9036. // Ensure the LHS variable has been declared
  9037. if (lhs_var)
  9038. flush_variable_declaration(lhs_var->self);
  9039. string lhs;
  9040. if (expr)
  9041. lhs = expr;
  9042. else
  9043. lhs = to_expression(lhs_id);
  9044. // Assignment from an array initializer is fine.
  9045. auto &type = expression_type(rhs_id);
  9046. auto *var = maybe_get_backing_variable(rhs_id);
  9047. // Unfortunately, we cannot template on address space in MSL,
  9048. // so explicit address space redirection it is ...
  9049. bool is_constant = false;
  9050. if (ir.ids[rhs_id].get_type() == TypeConstant)
  9051. {
  9052. is_constant = true;
  9053. }
  9054. else if (var && var->remapped_variable && var->statically_assigned &&
  9055. ir.ids[var->static_expression].get_type() == TypeConstant)
  9056. {
  9057. is_constant = true;
  9058. }
  9059. else if (rhs_storage == StorageClassUniform || rhs_storage == StorageClassUniformConstant)
  9060. {
  9061. is_constant = true;
  9062. }
  9063. // For the case where we have OpLoad triggering an array copy,
  9064. // we cannot easily detect this case ahead of time since it's
  9065. // context dependent. We might have to force a recompile here
  9066. // if this is the only use of array copies in our shader.
  9067. add_spv_func_and_recompile(type.array.size() > 1 ? SPVFuncImplArrayCopyMultidim : SPVFuncImplArrayCopy);
  9068. const char *tag = nullptr;
  9069. if (lhs_is_thread_storage && is_constant)
  9070. tag = "FromConstantToStack";
  9071. else if (lhs_storage == StorageClassWorkgroup && is_constant)
  9072. tag = "FromConstantToThreadGroup";
  9073. else if (lhs_is_thread_storage && rhs_is_thread_storage)
  9074. tag = "FromStackToStack";
  9075. else if (lhs_storage == StorageClassWorkgroup && rhs_is_thread_storage)
  9076. tag = "FromStackToThreadGroup";
  9077. else if (lhs_is_thread_storage && rhs_storage == StorageClassWorkgroup)
  9078. tag = "FromThreadGroupToStack";
  9079. else if (lhs_storage == StorageClassWorkgroup && rhs_storage == StorageClassWorkgroup)
  9080. tag = "FromThreadGroupToThreadGroup";
  9081. else if (lhs_storage == StorageClassStorageBuffer && rhs_storage == StorageClassStorageBuffer)
  9082. tag = "FromDeviceToDevice";
  9083. else if (lhs_storage == StorageClassStorageBuffer && is_constant)
  9084. tag = "FromConstantToDevice";
  9085. else if (lhs_storage == StorageClassStorageBuffer && rhs_storage == StorageClassWorkgroup)
  9086. tag = "FromThreadGroupToDevice";
  9087. else if (lhs_storage == StorageClassStorageBuffer && rhs_is_thread_storage)
  9088. tag = "FromStackToDevice";
  9089. else if (lhs_storage == StorageClassWorkgroup && rhs_storage == StorageClassStorageBuffer)
  9090. tag = "FromDeviceToThreadGroup";
  9091. else if (lhs_is_thread_storage && rhs_storage == StorageClassStorageBuffer)
  9092. tag = "FromDeviceToStack";
  9093. else
  9094. SPIRV_CROSS_THROW("Unknown storage class used for copying arrays.");
  9095. // Pass internal array of spvUnsafeArray<> into wrapper functions
  9096. if (lhs_is_array_template && rhs_is_array_template && !msl_options.force_native_arrays)
  9097. statement("spvArrayCopy", tag, "(", lhs, ".elements, ", to_expression(rhs_id), ".elements);");
  9098. if (lhs_is_array_template && !msl_options.force_native_arrays)
  9099. statement("spvArrayCopy", tag, "(", lhs, ".elements, ", to_expression(rhs_id), ");");
  9100. else if (rhs_is_array_template && !msl_options.force_native_arrays)
  9101. statement("spvArrayCopy", tag, "(", lhs, ", ", to_expression(rhs_id), ".elements);");
  9102. else
  9103. statement("spvArrayCopy", tag, "(", lhs, ", ", to_expression(rhs_id), ");");
  9104. }
  9105. return true;
  9106. }
  9107. uint32_t CompilerMSL::get_physical_tess_level_array_size(spv::BuiltIn builtin) const
  9108. {
  9109. if (is_tessellating_triangles())
  9110. return builtin == BuiltInTessLevelInner ? 1 : 3;
  9111. else
  9112. return builtin == BuiltInTessLevelInner ? 2 : 4;
  9113. }
  9114. // Since MSL does not allow arrays to be copied via simple variable assignment,
  9115. // if the LHS and RHS represent an assignment of an entire array, it must be
  9116. // implemented by calling an array copy function.
  9117. // Returns whether the struct assignment was emitted.
  9118. bool CompilerMSL::maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs)
  9119. {
  9120. // We only care about assignments of an entire array
  9121. auto &type = expression_type(id_lhs);
  9122. if (!is_array(get_pointee_type(type)))
  9123. return false;
  9124. auto *var = maybe_get<SPIRVariable>(id_lhs);
  9125. // Is this a remapped, static constant? Don't do anything.
  9126. if (var && var->remapped_variable && var->statically_assigned)
  9127. return true;
  9128. if (ir.ids[id_rhs].get_type() == TypeConstant && var && var->deferred_declaration)
  9129. {
  9130. // Special case, if we end up declaring a variable when assigning the constant array,
  9131. // we can avoid the copy by directly assigning the constant expression.
  9132. // This is likely necessary to be able to use a variable as a true look-up table, as it is unlikely
  9133. // the compiler will be able to optimize the spvArrayCopy() into a constant LUT.
  9134. // After a variable has been declared, we can no longer assign constant arrays in MSL unfortunately.
  9135. statement(to_expression(id_lhs), " = ", constant_expression(get<SPIRConstant>(id_rhs)), ";");
  9136. return true;
  9137. }
  9138. if (is_tesc_shader() && has_decoration(id_lhs, DecorationBuiltIn))
  9139. {
  9140. auto builtin = BuiltIn(get_decoration(id_lhs, DecorationBuiltIn));
  9141. // Need to manually unroll the array store.
  9142. if (builtin == BuiltInTessLevelInner || builtin == BuiltInTessLevelOuter)
  9143. {
  9144. uint32_t array_size = get_physical_tess_level_array_size(builtin);
  9145. if (array_size == 1)
  9146. statement(to_expression(id_lhs), " = half(", to_expression(id_rhs), "[0]);");
  9147. else
  9148. {
  9149. for (uint32_t i = 0; i < array_size; i++)
  9150. statement(to_expression(id_lhs), "[", i, "] = half(", to_expression(id_rhs), "[", i, "]);");
  9151. }
  9152. return true;
  9153. }
  9154. }
  9155. auto lhs_storage = get_expression_effective_storage_class(id_lhs);
  9156. auto rhs_storage = get_expression_effective_storage_class(id_rhs);
  9157. if (!emit_array_copy(nullptr, id_lhs, id_rhs, lhs_storage, rhs_storage))
  9158. return false;
  9159. register_write(id_lhs);
  9160. return true;
  9161. }
  9162. // Emits one of the atomic functions. In MSL, the atomic functions operate on pointers
  9163. void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, const char *op, Op opcode,
  9164. uint32_t mem_order_1, uint32_t mem_order_2, bool has_mem_order_2, uint32_t obj, uint32_t op1,
  9165. bool op1_is_pointer, bool op1_is_literal, uint32_t op2)
  9166. {
  9167. string exp;
  9168. auto &ptr_type = expression_type(obj);
  9169. auto &type = get_pointee_type(ptr_type);
  9170. auto expected_type = type.basetype;
  9171. if (opcode == OpAtomicUMax || opcode == OpAtomicUMin)
  9172. expected_type = to_unsigned_basetype(type.width);
  9173. else if (opcode == OpAtomicSMax || opcode == OpAtomicSMin)
  9174. expected_type = to_signed_basetype(type.width);
  9175. bool use_native_image_atomic;
  9176. if (msl_options.supports_msl_version(3, 1))
  9177. use_native_image_atomic = check_atomic_image(obj);
  9178. else
  9179. use_native_image_atomic = false;
  9180. if (type.width == 64)
  9181. SPIRV_CROSS_THROW("MSL currently does not support 64-bit atomics.");
  9182. auto remapped_type = type;
  9183. remapped_type.basetype = expected_type;
  9184. auto *var = maybe_get_backing_variable(obj);
  9185. const auto *res_type = var ? &get<SPIRType>(var->basetype) : nullptr;
  9186. assert(type.storage != StorageClassImage || res_type);
  9187. bool is_atomic_compare_exchange_strong = op1_is_pointer && op1;
  9188. bool check_discard = opcode != OpAtomicLoad && needs_frag_discard_checks() &&
  9189. ptr_type.storage != StorageClassWorkgroup;
  9190. // Even compare exchange atomics are vec4 on metal for ... reasons :v
  9191. uint32_t vec4_temporary_id = 0;
  9192. if (use_native_image_atomic && is_atomic_compare_exchange_strong)
  9193. {
  9194. uint32_t &tmp_id = extra_sub_expressions[result_id];
  9195. if (!tmp_id)
  9196. {
  9197. tmp_id = ir.increase_bound_by(2);
  9198. auto vec4_type = get<SPIRType>(result_type);
  9199. vec4_type.vecsize = 4;
  9200. set<SPIRType>(tmp_id + 1, vec4_type);
  9201. }
  9202. vec4_temporary_id = tmp_id;
  9203. }
  9204. if (check_discard)
  9205. {
  9206. if (is_atomic_compare_exchange_strong)
  9207. {
  9208. // We're already emitting a CAS loop here; a conditional won't hurt.
  9209. emit_uninitialized_temporary_expression(result_type, result_id);
  9210. if (vec4_temporary_id)
  9211. emit_uninitialized_temporary_expression(vec4_temporary_id + 1, vec4_temporary_id);
  9212. statement("if (!", builtin_to_glsl(BuiltInHelperInvocation, StorageClassInput), ")");
  9213. begin_scope();
  9214. }
  9215. else
  9216. exp = join("(!", builtin_to_glsl(BuiltInHelperInvocation, StorageClassInput), " ? ");
  9217. }
  9218. if (use_native_image_atomic)
  9219. {
  9220. auto obj_expression = to_expression(obj);
  9221. auto split_index = obj_expression.find_first_of('@');
  9222. // Will only be false if we're in "force recompile later" mode.
  9223. if (split_index != string::npos)
  9224. {
  9225. auto coord = obj_expression.substr(split_index + 1);
  9226. auto image_expr = obj_expression.substr(0, split_index);
  9227. // Handle problem cases with sign where we need signed min/max on a uint image for example.
  9228. // It seems to work to cast the texture type itself, even if it is probably wildly outside of spec,
  9229. // but SPIR-V requires this to work.
  9230. if ((opcode == OpAtomicUMax || opcode == OpAtomicUMin ||
  9231. opcode == OpAtomicSMax || opcode == OpAtomicSMin) &&
  9232. type.basetype != expected_type)
  9233. {
  9234. auto *backing_var = maybe_get_backing_variable(obj);
  9235. if (backing_var)
  9236. {
  9237. add_spv_func_and_recompile(SPVFuncImplTextureCast);
  9238. const auto *backing_type = &get<SPIRType>(backing_var->basetype);
  9239. while (backing_type->op != OpTypeImage)
  9240. backing_type = &get<SPIRType>(backing_type->parent_type);
  9241. auto img_type = *backing_type;
  9242. auto tmp_type = type;
  9243. tmp_type.basetype = expected_type;
  9244. img_type.image.type = ir.increase_bound_by(1);
  9245. set<SPIRType>(img_type.image.type, tmp_type);
  9246. image_expr = join("spvTextureCast<", type_to_glsl(img_type, obj), ">(", image_expr, ")");
  9247. }
  9248. }
  9249. exp += join(image_expr, ".", op, "(");
  9250. if (ptr_type.storage == StorageClassImage && res_type->image.arrayed)
  9251. {
  9252. switch (res_type->image.dim)
  9253. {
  9254. case Dim1D:
  9255. if (msl_options.texture_1D_as_2D)
  9256. exp += join("uint2(", coord, ".x, 0), ", coord, ".y");
  9257. else
  9258. exp += join(coord, ".x, ", coord, ".y");
  9259. break;
  9260. case Dim2D:
  9261. exp += join(coord, ".xy, ", coord, ".z");
  9262. break;
  9263. default:
  9264. SPIRV_CROSS_THROW("Cannot do atomics on Cube textures.");
  9265. }
  9266. }
  9267. else if (ptr_type.storage == StorageClassImage && res_type->image.dim == Dim1D && msl_options.texture_1D_as_2D)
  9268. exp += join("uint2(", coord, ", 0)");
  9269. else
  9270. exp += coord;
  9271. }
  9272. else
  9273. {
  9274. exp += obj_expression;
  9275. }
  9276. }
  9277. else
  9278. {
  9279. exp += string(op) + "_explicit(";
  9280. exp += "(";
  9281. // Emulate texture2D atomic operations
  9282. if (ptr_type.storage == StorageClassImage)
  9283. {
  9284. auto &flags = ir.get_decoration_bitset(var->self);
  9285. if (decoration_flags_signal_volatile(flags))
  9286. exp += "volatile ";
  9287. exp += "device";
  9288. }
  9289. else if (var && ptr_type.storage != StorageClassPhysicalStorageBuffer)
  9290. {
  9291. exp += get_argument_address_space(*var);
  9292. }
  9293. else
  9294. {
  9295. // Fallback scenario, could happen for raw pointers.
  9296. exp += ptr_type.storage == StorageClassWorkgroup ? "threadgroup" : "device";
  9297. }
  9298. exp += " atomic_";
  9299. // For signed and unsigned min/max, we can signal this through the pointer type.
  9300. // There is no other way, since C++ does not have explicit signage for atomics.
  9301. exp += type_to_glsl(remapped_type);
  9302. exp += "*)";
  9303. exp += "&";
  9304. exp += to_enclosed_expression(obj);
  9305. }
  9306. if (is_atomic_compare_exchange_strong)
  9307. {
  9308. assert(strcmp(op, "atomic_compare_exchange_weak") == 0);
  9309. assert(op2);
  9310. assert(has_mem_order_2);
  9311. exp += ", &";
  9312. exp += to_name(vec4_temporary_id ? vec4_temporary_id : result_id);
  9313. exp += ", ";
  9314. exp += to_expression(op2);
  9315. if (!use_native_image_atomic)
  9316. {
  9317. exp += ", ";
  9318. exp += get_memory_order(mem_order_1);
  9319. exp += ", ";
  9320. exp += get_memory_order(mem_order_2);
  9321. }
  9322. exp += ")";
  9323. // MSL only supports the weak atomic compare exchange, so emit a CAS loop here.
  9324. // The MSL function returns false if the atomic write fails OR the comparison test fails,
  9325. // so we must validate that it wasn't the comparison test that failed before continuing
  9326. // the CAS loop, otherwise it will loop infinitely, with the comparison test always failing.
  9327. // The function updates the comparator value from the memory value, so the additional
  9328. // comparison test evaluates the memory value against the expected value.
  9329. if (!check_discard)
  9330. {
  9331. emit_uninitialized_temporary_expression(result_type, result_id);
  9332. if (vec4_temporary_id)
  9333. emit_uninitialized_temporary_expression(vec4_temporary_id + 1, vec4_temporary_id);
  9334. }
  9335. statement("do");
  9336. begin_scope();
  9337. string scalar_expression;
  9338. if (vec4_temporary_id)
  9339. scalar_expression = join(to_expression(vec4_temporary_id), ".x");
  9340. else
  9341. scalar_expression = to_expression(result_id);
  9342. statement(scalar_expression, " = ", to_expression(op1), ";");
  9343. end_scope_decl(join("while (!", exp, " && ", scalar_expression, " == ", to_enclosed_expression(op1), ")"));
  9344. if (vec4_temporary_id)
  9345. statement(to_expression(result_id), " = ", scalar_expression, ";");
  9346. // Vulkan: (section 9.29: ... and values returned by atomic instructions in helper invocations are undefined)
  9347. if (check_discard)
  9348. {
  9349. end_scope();
  9350. statement("else");
  9351. begin_scope();
  9352. statement(to_expression(result_id), " = {};");
  9353. end_scope();
  9354. }
  9355. }
  9356. else
  9357. {
  9358. assert(strcmp(op, "atomic_compare_exchange_weak") != 0);
  9359. if (op1)
  9360. {
  9361. exp += ", ";
  9362. if (op1_is_literal)
  9363. exp += to_string(op1);
  9364. else
  9365. exp += bitcast_expression(expected_type, op1);
  9366. }
  9367. if (op2)
  9368. exp += ", " + to_expression(op2);
  9369. if (!use_native_image_atomic)
  9370. {
  9371. exp += string(", ") + get_memory_order(mem_order_1);
  9372. if (has_mem_order_2)
  9373. exp += string(", ") + get_memory_order(mem_order_2);
  9374. }
  9375. exp += ")";
  9376. // For some particular reason, atomics return vec4 in Metal ...
  9377. if (use_native_image_atomic)
  9378. exp += ".x";
  9379. // Vulkan: (section 9.29: ... and values returned by atomic instructions in helper invocations are undefined)
  9380. if (check_discard)
  9381. {
  9382. exp += " : ";
  9383. if (strcmp(op, "atomic_store") != 0)
  9384. exp += join(type_to_glsl(get<SPIRType>(result_type)), "{}");
  9385. else
  9386. exp += "((void)0)";
  9387. exp += ")";
  9388. }
  9389. if (expected_type != type.basetype)
  9390. exp = bitcast_expression(type, expected_type, exp);
  9391. if (strcmp(op, "atomic_store") != 0)
  9392. emit_op(result_type, result_id, exp, false);
  9393. else
  9394. statement(exp, ";");
  9395. }
  9396. flush_all_atomic_capable_variables();
  9397. }
  9398. // Metal only supports relaxed memory order for now
  9399. const char *CompilerMSL::get_memory_order(uint32_t)
  9400. {
  9401. return "memory_order_relaxed";
  9402. }
  9403. // Override for MSL-specific extension syntax instructions.
  9404. // In some cases, deliberately select either the fast or precise versions of the MSL functions to match Vulkan math precision results.
  9405. void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t count)
  9406. {
  9407. auto op = static_cast<GLSLstd450>(eop);
  9408. // If we need to do implicit bitcasts, make sure we do it with the correct type.
  9409. uint32_t integer_width = get_integer_width_for_glsl_instruction(op, args, count);
  9410. auto int_type = to_signed_basetype(integer_width);
  9411. auto uint_type = to_unsigned_basetype(integer_width);
  9412. op = get_remapped_glsl_op(op);
  9413. auto &restype = get<SPIRType>(result_type);
  9414. switch (op)
  9415. {
  9416. case GLSLstd450Sinh:
  9417. if (restype.basetype == SPIRType::Half)
  9418. {
  9419. // MSL does not have overload for half. Force-cast back to half.
  9420. auto expr = join("half(fast::sinh(", to_unpacked_expression(args[0]), "))");
  9421. emit_op(result_type, id, expr, should_forward(args[0]));
  9422. inherit_expression_dependencies(id, args[0]);
  9423. }
  9424. else
  9425. emit_unary_func_op(result_type, id, args[0], "fast::sinh");
  9426. break;
  9427. case GLSLstd450Cosh:
  9428. if (restype.basetype == SPIRType::Half)
  9429. {
  9430. // MSL does not have overload for half. Force-cast back to half.
  9431. auto expr = join("half(fast::cosh(", to_unpacked_expression(args[0]), "))");
  9432. emit_op(result_type, id, expr, should_forward(args[0]));
  9433. inherit_expression_dependencies(id, args[0]);
  9434. }
  9435. else
  9436. emit_unary_func_op(result_type, id, args[0], "fast::cosh");
  9437. break;
  9438. case GLSLstd450Tanh:
  9439. if (restype.basetype == SPIRType::Half)
  9440. {
  9441. // MSL does not have overload for half. Force-cast back to half.
  9442. auto expr = join("half(fast::tanh(", to_unpacked_expression(args[0]), "))");
  9443. emit_op(result_type, id, expr, should_forward(args[0]));
  9444. inherit_expression_dependencies(id, args[0]);
  9445. }
  9446. else
  9447. emit_unary_func_op(result_type, id, args[0], "precise::tanh");
  9448. break;
  9449. case GLSLstd450Atan2:
  9450. if (restype.basetype == SPIRType::Half)
  9451. {
  9452. // MSL does not have overload for half. Force-cast back to half.
  9453. auto expr = join("half(fast::atan2(", to_unpacked_expression(args[0]), ", ", to_unpacked_expression(args[1]), "))");
  9454. emit_op(result_type, id, expr, should_forward(args[0]) && should_forward(args[1]));
  9455. inherit_expression_dependencies(id, args[0]);
  9456. inherit_expression_dependencies(id, args[1]);
  9457. }
  9458. else
  9459. emit_binary_func_op(result_type, id, args[0], args[1], "precise::atan2");
  9460. break;
  9461. case GLSLstd450InverseSqrt:
  9462. emit_unary_func_op(result_type, id, args[0], "rsqrt");
  9463. break;
  9464. case GLSLstd450RoundEven:
  9465. emit_unary_func_op(result_type, id, args[0], "rint");
  9466. break;
  9467. case GLSLstd450FindILsb:
  9468. {
  9469. // In this template version of findLSB, we return T.
  9470. auto basetype = expression_type(args[0]).basetype;
  9471. emit_unary_func_op_cast(result_type, id, args[0], "spvFindLSB", basetype, basetype);
  9472. break;
  9473. }
  9474. case GLSLstd450FindSMsb:
  9475. emit_unary_func_op_cast(result_type, id, args[0], "spvFindSMSB", int_type, int_type);
  9476. break;
  9477. case GLSLstd450FindUMsb:
  9478. emit_unary_func_op_cast(result_type, id, args[0], "spvFindUMSB", uint_type, uint_type);
  9479. break;
  9480. case GLSLstd450PackSnorm4x8:
  9481. emit_unary_func_op(result_type, id, args[0], "pack_float_to_snorm4x8");
  9482. break;
  9483. case GLSLstd450PackUnorm4x8:
  9484. emit_unary_func_op(result_type, id, args[0], "pack_float_to_unorm4x8");
  9485. break;
  9486. case GLSLstd450PackSnorm2x16:
  9487. emit_unary_func_op(result_type, id, args[0], "pack_float_to_snorm2x16");
  9488. break;
  9489. case GLSLstd450PackUnorm2x16:
  9490. emit_unary_func_op(result_type, id, args[0], "pack_float_to_unorm2x16");
  9491. break;
  9492. case GLSLstd450PackHalf2x16:
  9493. {
  9494. auto expr = join("as_type<uint>(half2(", to_expression(args[0]), "))");
  9495. emit_op(result_type, id, expr, should_forward(args[0]));
  9496. inherit_expression_dependencies(id, args[0]);
  9497. break;
  9498. }
  9499. case GLSLstd450UnpackSnorm4x8:
  9500. emit_unary_func_op(result_type, id, args[0], "unpack_snorm4x8_to_float");
  9501. break;
  9502. case GLSLstd450UnpackUnorm4x8:
  9503. emit_unary_func_op(result_type, id, args[0], "unpack_unorm4x8_to_float");
  9504. break;
  9505. case GLSLstd450UnpackSnorm2x16:
  9506. emit_unary_func_op(result_type, id, args[0], "unpack_snorm2x16_to_float");
  9507. break;
  9508. case GLSLstd450UnpackUnorm2x16:
  9509. emit_unary_func_op(result_type, id, args[0], "unpack_unorm2x16_to_float");
  9510. break;
  9511. case GLSLstd450UnpackHalf2x16:
  9512. {
  9513. auto expr = join("float2(as_type<half2>(", to_expression(args[0]), "))");
  9514. emit_op(result_type, id, expr, should_forward(args[0]));
  9515. inherit_expression_dependencies(id, args[0]);
  9516. break;
  9517. }
  9518. case GLSLstd450PackDouble2x32:
  9519. emit_unary_func_op(result_type, id, args[0], "unsupported_GLSLstd450PackDouble2x32"); // Currently unsupported
  9520. break;
  9521. case GLSLstd450UnpackDouble2x32:
  9522. emit_unary_func_op(result_type, id, args[0], "unsupported_GLSLstd450UnpackDouble2x32"); // Currently unsupported
  9523. break;
  9524. case GLSLstd450MatrixInverse:
  9525. {
  9526. auto &mat_type = get<SPIRType>(result_type);
  9527. switch (mat_type.columns)
  9528. {
  9529. case 2:
  9530. emit_unary_func_op(result_type, id, args[0], "spvInverse2x2");
  9531. break;
  9532. case 3:
  9533. emit_unary_func_op(result_type, id, args[0], "spvInverse3x3");
  9534. break;
  9535. case 4:
  9536. emit_unary_func_op(result_type, id, args[0], "spvInverse4x4");
  9537. break;
  9538. default:
  9539. break;
  9540. }
  9541. break;
  9542. }
  9543. case GLSLstd450FMin:
  9544. // If the result type isn't float, don't bother calling the specific
  9545. // precise::/fast:: version. Metal doesn't have those for half and
  9546. // double types.
  9547. if (get<SPIRType>(result_type).basetype != SPIRType::Float)
  9548. emit_binary_func_op(result_type, id, args[0], args[1], "min");
  9549. else
  9550. emit_binary_func_op(result_type, id, args[0], args[1], "fast::min");
  9551. break;
  9552. case GLSLstd450FMax:
  9553. if (get<SPIRType>(result_type).basetype != SPIRType::Float)
  9554. emit_binary_func_op(result_type, id, args[0], args[1], "max");
  9555. else
  9556. emit_binary_func_op(result_type, id, args[0], args[1], "fast::max");
  9557. break;
  9558. case GLSLstd450FClamp:
  9559. // TODO: If args[1] is 0 and args[2] is 1, emit a saturate() call.
  9560. if (get<SPIRType>(result_type).basetype != SPIRType::Float)
  9561. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp");
  9562. else
  9563. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "fast::clamp");
  9564. break;
  9565. case GLSLstd450NMin:
  9566. if (get<SPIRType>(result_type).basetype != SPIRType::Float)
  9567. emit_binary_func_op(result_type, id, args[0], args[1], "min");
  9568. else
  9569. emit_binary_func_op(result_type, id, args[0], args[1], "precise::min");
  9570. break;
  9571. case GLSLstd450NMax:
  9572. if (get<SPIRType>(result_type).basetype != SPIRType::Float)
  9573. emit_binary_func_op(result_type, id, args[0], args[1], "max");
  9574. else
  9575. emit_binary_func_op(result_type, id, args[0], args[1], "precise::max");
  9576. break;
  9577. case GLSLstd450NClamp:
  9578. // TODO: If args[1] is 0 and args[2] is 1, emit a saturate() call.
  9579. if (get<SPIRType>(result_type).basetype != SPIRType::Float)
  9580. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp");
  9581. else
  9582. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "precise::clamp");
  9583. break;
  9584. case GLSLstd450InterpolateAtCentroid:
  9585. {
  9586. // We can't just emit the expression normally, because the qualified name contains a call to the default
  9587. // interpolate method, or refers to a local variable. We saved the interface index we need; use it to construct
  9588. // the base for the method call.
  9589. uint32_t interface_index = get_extended_decoration(args[0], SPIRVCrossDecorationInterfaceMemberIndex);
  9590. string component;
  9591. if (has_extended_decoration(args[0], SPIRVCrossDecorationInterpolantComponentExpr))
  9592. {
  9593. uint32_t index_expr = get_extended_decoration(args[0], SPIRVCrossDecorationInterpolantComponentExpr);
  9594. auto *c = maybe_get<SPIRConstant>(index_expr);
  9595. if (!c || c->specialization)
  9596. component = join("[", to_expression(index_expr), "]");
  9597. else
  9598. component = join(".", index_to_swizzle(c->scalar()));
  9599. }
  9600. emit_op(result_type, id,
  9601. join(to_name(stage_in_var_id), ".", to_member_name(get_stage_in_struct_type(), interface_index),
  9602. ".interpolate_at_centroid()", component),
  9603. should_forward(args[0]));
  9604. break;
  9605. }
  9606. case GLSLstd450InterpolateAtSample:
  9607. {
  9608. uint32_t interface_index = get_extended_decoration(args[0], SPIRVCrossDecorationInterfaceMemberIndex);
  9609. string component;
  9610. if (has_extended_decoration(args[0], SPIRVCrossDecorationInterpolantComponentExpr))
  9611. {
  9612. uint32_t index_expr = get_extended_decoration(args[0], SPIRVCrossDecorationInterpolantComponentExpr);
  9613. auto *c = maybe_get<SPIRConstant>(index_expr);
  9614. if (!c || c->specialization)
  9615. component = join("[", to_expression(index_expr), "]");
  9616. else
  9617. component = join(".", index_to_swizzle(c->scalar()));
  9618. }
  9619. emit_op(result_type, id,
  9620. join(to_name(stage_in_var_id), ".", to_member_name(get_stage_in_struct_type(), interface_index),
  9621. ".interpolate_at_sample(", to_expression(args[1]), ")", component),
  9622. should_forward(args[0]) && should_forward(args[1]));
  9623. break;
  9624. }
  9625. case GLSLstd450InterpolateAtOffset:
  9626. {
  9627. uint32_t interface_index = get_extended_decoration(args[0], SPIRVCrossDecorationInterfaceMemberIndex);
  9628. string component;
  9629. if (has_extended_decoration(args[0], SPIRVCrossDecorationInterpolantComponentExpr))
  9630. {
  9631. uint32_t index_expr = get_extended_decoration(args[0], SPIRVCrossDecorationInterpolantComponentExpr);
  9632. auto *c = maybe_get<SPIRConstant>(index_expr);
  9633. if (!c || c->specialization)
  9634. component = join("[", to_expression(index_expr), "]");
  9635. else
  9636. component = join(".", index_to_swizzle(c->scalar()));
  9637. }
  9638. // Like Direct3D, Metal puts the (0, 0) at the upper-left corner, not the center as SPIR-V and GLSL do.
  9639. // Offset the offset by (1/2 - 1/16), or 0.4375, to compensate for this.
  9640. // It has to be (1/2 - 1/16) and not 1/2, or several CTS tests subtly break on Intel.
  9641. emit_op(result_type, id,
  9642. join(to_name(stage_in_var_id), ".", to_member_name(get_stage_in_struct_type(), interface_index),
  9643. ".interpolate_at_offset(", to_expression(args[1]), " + 0.4375)", component),
  9644. should_forward(args[0]) && should_forward(args[1]));
  9645. break;
  9646. }
  9647. case GLSLstd450Distance:
  9648. // MSL does not support scalar versions here.
  9649. if (expression_type(args[0]).vecsize == 1)
  9650. {
  9651. // Equivalent to length(a - b) -> abs(a - b).
  9652. emit_op(result_type, id,
  9653. join("abs(", to_enclosed_unpacked_expression(args[0]), " - ",
  9654. to_enclosed_unpacked_expression(args[1]), ")"),
  9655. should_forward(args[0]) && should_forward(args[1]));
  9656. inherit_expression_dependencies(id, args[0]);
  9657. inherit_expression_dependencies(id, args[1]);
  9658. }
  9659. else
  9660. CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
  9661. break;
  9662. case GLSLstd450Length:
  9663. // MSL does not support scalar versions, so use abs().
  9664. if (expression_type(args[0]).vecsize == 1)
  9665. emit_unary_func_op(result_type, id, args[0], "abs");
  9666. else
  9667. CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
  9668. break;
  9669. case GLSLstd450Normalize:
  9670. {
  9671. auto &exp_type = expression_type(args[0]);
  9672. // MSL does not support scalar versions here.
  9673. // MSL has no implementation for normalize in the fast:: namespace for half2 and half3
  9674. // Returns -1 or 1 for valid input, sign() does the job.
  9675. if (exp_type.vecsize == 1)
  9676. emit_unary_func_op(result_type, id, args[0], "sign");
  9677. else if (exp_type.vecsize <= 3 && exp_type.basetype == SPIRType::Half)
  9678. emit_unary_func_op(result_type, id, args[0], "normalize");
  9679. else
  9680. emit_unary_func_op(result_type, id, args[0], "fast::normalize");
  9681. break;
  9682. }
  9683. case GLSLstd450Reflect:
  9684. if (get<SPIRType>(result_type).vecsize == 1)
  9685. emit_binary_func_op(result_type, id, args[0], args[1], "spvReflect");
  9686. else
  9687. CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
  9688. break;
  9689. case GLSLstd450Refract:
  9690. if (get<SPIRType>(result_type).vecsize == 1)
  9691. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "spvRefract");
  9692. else
  9693. CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
  9694. break;
  9695. case GLSLstd450FaceForward:
  9696. if (get<SPIRType>(result_type).vecsize == 1)
  9697. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "spvFaceForward");
  9698. else
  9699. CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
  9700. break;
  9701. case GLSLstd450Modf:
  9702. case GLSLstd450Frexp:
  9703. {
  9704. // Special case. If the variable is a scalar access chain, we cannot use it directly. We have to emit a temporary.
  9705. // Another special case is if the variable is in a storage class which is not thread.
  9706. auto *ptr = maybe_get<SPIRExpression>(args[1]);
  9707. auto &type = expression_type(args[1]);
  9708. bool is_thread_storage = storage_class_array_is_thread(type.storage);
  9709. if (type.storage == StorageClassOutput && capture_output_to_buffer)
  9710. is_thread_storage = false;
  9711. if (!is_thread_storage ||
  9712. (ptr && ptr->access_chain && is_scalar(expression_type(args[1]))))
  9713. {
  9714. register_call_out_argument(args[1]);
  9715. forced_temporaries.insert(id);
  9716. // Need to create temporaries and copy over to access chain after.
  9717. // We cannot directly take the reference of a vector swizzle in MSL, even if it's scalar ...
  9718. uint32_t &tmp_id = extra_sub_expressions[id];
  9719. if (!tmp_id)
  9720. tmp_id = ir.increase_bound_by(1);
  9721. uint32_t tmp_type_id = get_pointee_type_id(expression_type_id(args[1]));
  9722. emit_uninitialized_temporary_expression(tmp_type_id, tmp_id);
  9723. emit_binary_func_op(result_type, id, args[0], tmp_id, eop == GLSLstd450Modf ? "modf" : "frexp");
  9724. statement(to_expression(args[1]), " = ", to_expression(tmp_id), ";");
  9725. }
  9726. else
  9727. CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
  9728. break;
  9729. }
  9730. case GLSLstd450Pow:
  9731. // powr makes x < 0.0 undefined, just like SPIR-V.
  9732. emit_binary_func_op(result_type, id, args[0], args[1], "powr");
  9733. break;
  9734. default:
  9735. CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
  9736. break;
  9737. }
  9738. }
  9739. void CompilerMSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop,
  9740. const uint32_t *args, uint32_t count)
  9741. {
  9742. enum AMDShaderTrinaryMinMax
  9743. {
  9744. FMin3AMD = 1,
  9745. UMin3AMD = 2,
  9746. SMin3AMD = 3,
  9747. FMax3AMD = 4,
  9748. UMax3AMD = 5,
  9749. SMax3AMD = 6,
  9750. FMid3AMD = 7,
  9751. UMid3AMD = 8,
  9752. SMid3AMD = 9
  9753. };
  9754. if (!msl_options.supports_msl_version(2, 1))
  9755. SPIRV_CROSS_THROW("Trinary min/max functions require MSL 2.1.");
  9756. auto op = static_cast<AMDShaderTrinaryMinMax>(eop);
  9757. switch (op)
  9758. {
  9759. case FMid3AMD:
  9760. case UMid3AMD:
  9761. case SMid3AMD:
  9762. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "median3");
  9763. break;
  9764. default:
  9765. CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(result_type, id, eop, args, count);
  9766. break;
  9767. }
  9768. }
  9769. // Emit a structure declaration for the specified interface variable.
  9770. void CompilerMSL::emit_interface_block(uint32_t ib_var_id)
  9771. {
  9772. if (ib_var_id)
  9773. {
  9774. auto &ib_var = get<SPIRVariable>(ib_var_id);
  9775. auto &ib_type = get_variable_data_type(ib_var);
  9776. //assert(ib_type.basetype == SPIRType::Struct && !ib_type.member_types.empty());
  9777. assert(ib_type.basetype == SPIRType::Struct);
  9778. emit_struct(ib_type);
  9779. }
  9780. }
  9781. // Emits the declaration signature of the specified function.
  9782. // If this is the entry point function, Metal-specific return value and function arguments are added.
  9783. void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &)
  9784. {
  9785. if (func.self != ir.default_entry_point)
  9786. add_function_overload(func);
  9787. local_variable_names = resource_names;
  9788. string decl;
  9789. processing_entry_point = func.self == ir.default_entry_point;
  9790. // Metal helper functions must be static force-inline otherwise they will cause problems when linked together in a single Metallib.
  9791. if (!processing_entry_point)
  9792. statement(force_inline);
  9793. auto &type = get<SPIRType>(func.return_type);
  9794. if (!type.array.empty() && msl_options.force_native_arrays)
  9795. {
  9796. // We cannot return native arrays in MSL, so "return" through an out variable.
  9797. decl += "void";
  9798. }
  9799. else
  9800. {
  9801. decl += func_type_decl(type);
  9802. }
  9803. decl += " ";
  9804. decl += to_name(func.self);
  9805. decl += "(";
  9806. if (!type.array.empty() && msl_options.force_native_arrays)
  9807. {
  9808. // Fake arrays returns by writing to an out array instead.
  9809. decl += "thread ";
  9810. decl += type_to_glsl(type);
  9811. decl += " (&spvReturnValue)";
  9812. decl += type_to_array_glsl(type, 0);
  9813. if (!func.arguments.empty())
  9814. decl += ", ";
  9815. }
  9816. if (processing_entry_point)
  9817. {
  9818. if (msl_options.argument_buffers)
  9819. decl += entry_point_args_argument_buffer(!func.arguments.empty());
  9820. else
  9821. decl += entry_point_args_classic(!func.arguments.empty());
  9822. // append entry point args to avoid conflicts in local variable names.
  9823. local_variable_names.insert(resource_names.begin(), resource_names.end());
  9824. // If entry point function has variables that require early declaration,
  9825. // ensure they each have an empty initializer, creating one if needed.
  9826. // This is done at this late stage because the initialization expression
  9827. // is cleared after each compilation pass.
  9828. for (auto var_id : vars_needing_early_declaration)
  9829. {
  9830. auto &ed_var = get<SPIRVariable>(var_id);
  9831. ID &initializer = ed_var.initializer;
  9832. if (!initializer)
  9833. initializer = ir.increase_bound_by(1);
  9834. // Do not override proper initializers.
  9835. if (ir.ids[initializer].get_type() == TypeNone || ir.ids[initializer].get_type() == TypeExpression)
  9836. set<SPIRExpression>(ed_var.initializer, "{}", ed_var.basetype, true);
  9837. }
  9838. }
  9839. for (auto &arg : func.arguments)
  9840. {
  9841. uint32_t name_id = arg.id;
  9842. auto *var = maybe_get<SPIRVariable>(arg.id);
  9843. if (var)
  9844. {
  9845. // If we need to modify the name of the variable, make sure we modify the original variable.
  9846. // Our alias is just a shadow variable.
  9847. if (arg.alias_global_variable && var->basevariable)
  9848. name_id = var->basevariable;
  9849. var->parameter = &arg; // Hold a pointer to the parameter so we can invalidate the readonly field if needed.
  9850. }
  9851. add_local_variable_name(name_id);
  9852. decl += argument_decl(arg);
  9853. bool is_dynamic_img_sampler = has_extended_decoration(arg.id, SPIRVCrossDecorationDynamicImageSampler);
  9854. auto &arg_type = get<SPIRType>(arg.type);
  9855. if (arg_type.basetype == SPIRType::SampledImage && !is_dynamic_img_sampler)
  9856. {
  9857. // Manufacture automatic plane args for multiplanar texture
  9858. uint32_t planes = 1;
  9859. if (auto *constexpr_sampler = find_constexpr_sampler(name_id))
  9860. if (constexpr_sampler->ycbcr_conversion_enable)
  9861. planes = constexpr_sampler->planes;
  9862. for (uint32_t i = 1; i < planes; i++)
  9863. decl += join(", ", argument_decl(arg), plane_name_suffix, i);
  9864. // Manufacture automatic sampler arg for SampledImage texture
  9865. if (arg_type.image.dim != DimBuffer)
  9866. {
  9867. if (arg_type.array.empty() || (var ? is_var_runtime_size_array(*var) : is_runtime_size_array(arg_type)))
  9868. {
  9869. decl += join(", ", sampler_type(arg_type, arg.id, false), " ", to_sampler_expression(name_id));
  9870. }
  9871. else
  9872. {
  9873. const char *sampler_address_space =
  9874. descriptor_address_space(name_id,
  9875. StorageClassUniformConstant,
  9876. "thread const");
  9877. decl += join(", ", sampler_address_space, " ", sampler_type(arg_type, name_id, false), "& ",
  9878. to_sampler_expression(name_id));
  9879. }
  9880. }
  9881. }
  9882. // Manufacture automatic swizzle arg.
  9883. if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(arg_type) &&
  9884. !is_dynamic_img_sampler)
  9885. {
  9886. bool arg_is_array = !arg_type.array.empty();
  9887. decl += join(", constant uint", arg_is_array ? "* " : "& ", to_swizzle_expression(name_id));
  9888. }
  9889. if (buffer_requires_array_length(name_id))
  9890. {
  9891. bool arg_is_array = !arg_type.array.empty();
  9892. decl += join(", constant uint", arg_is_array ? "* " : "& ", to_buffer_size_expression(name_id));
  9893. }
  9894. if (&arg != &func.arguments.back())
  9895. decl += ", ";
  9896. }
  9897. decl += ")";
  9898. statement(decl);
  9899. }
  9900. static bool needs_chroma_reconstruction(const MSLConstexprSampler *constexpr_sampler)
  9901. {
  9902. // For now, only multiplanar images need explicit reconstruction. GBGR and BGRG images
  9903. // use implicit reconstruction.
  9904. return constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable && constexpr_sampler->planes > 1;
  9905. }
  9906. // Returns the texture sampling function string for the specified image and sampling characteristics.
  9907. string CompilerMSL::to_function_name(const TextureFunctionNameArguments &args)
  9908. {
  9909. VariableID img = args.base.img;
  9910. const MSLConstexprSampler *constexpr_sampler = nullptr;
  9911. bool is_dynamic_img_sampler = false;
  9912. if (auto *var = maybe_get_backing_variable(img))
  9913. {
  9914. constexpr_sampler = find_constexpr_sampler(var->basevariable ? var->basevariable : VariableID(var->self));
  9915. is_dynamic_img_sampler = has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler);
  9916. }
  9917. // Special-case gather. We have to alter the component being looked up in the swizzle case.
  9918. if (msl_options.swizzle_texture_samples && args.base.is_gather && !is_dynamic_img_sampler &&
  9919. (!constexpr_sampler || !constexpr_sampler->ycbcr_conversion_enable))
  9920. {
  9921. bool is_compare = comparison_ids.count(img);
  9922. add_spv_func_and_recompile(is_compare ? SPVFuncImplGatherCompareSwizzle : SPVFuncImplGatherSwizzle);
  9923. return is_compare ? "spvGatherCompareSwizzle" : "spvGatherSwizzle";
  9924. }
  9925. // Special-case gather with an array of offsets. We have to lower into 4 separate gathers.
  9926. if (args.has_array_offsets && !is_dynamic_img_sampler &&
  9927. (!constexpr_sampler || !constexpr_sampler->ycbcr_conversion_enable))
  9928. {
  9929. bool is_compare = comparison_ids.count(img);
  9930. add_spv_func_and_recompile(is_compare ? SPVFuncImplGatherCompareConstOffsets : SPVFuncImplGatherConstOffsets);
  9931. add_spv_func_and_recompile(SPVFuncImplForwardArgs);
  9932. return is_compare ? "spvGatherCompareConstOffsets" : "spvGatherConstOffsets";
  9933. }
  9934. auto *combined = maybe_get<SPIRCombinedImageSampler>(img);
  9935. // Texture reference
  9936. string fname;
  9937. if (needs_chroma_reconstruction(constexpr_sampler) && !is_dynamic_img_sampler)
  9938. {
  9939. if (constexpr_sampler->planes != 2 && constexpr_sampler->planes != 3)
  9940. SPIRV_CROSS_THROW("Unhandled number of color image planes!");
  9941. // 444 images aren't downsampled, so we don't need to do linear filtering.
  9942. if (constexpr_sampler->resolution == MSL_FORMAT_RESOLUTION_444 ||
  9943. constexpr_sampler->chroma_filter == MSL_SAMPLER_FILTER_NEAREST)
  9944. {
  9945. if (constexpr_sampler->planes == 2)
  9946. add_spv_func_and_recompile(SPVFuncImplChromaReconstructNearest2Plane);
  9947. else
  9948. add_spv_func_and_recompile(SPVFuncImplChromaReconstructNearest3Plane);
  9949. fname = "spvChromaReconstructNearest";
  9950. }
  9951. else // Linear with a downsampled format
  9952. {
  9953. fname = "spvChromaReconstructLinear";
  9954. switch (constexpr_sampler->resolution)
  9955. {
  9956. case MSL_FORMAT_RESOLUTION_444:
  9957. assert(false);
  9958. break; // not reached
  9959. case MSL_FORMAT_RESOLUTION_422:
  9960. switch (constexpr_sampler->x_chroma_offset)
  9961. {
  9962. case MSL_CHROMA_LOCATION_COSITED_EVEN:
  9963. if (constexpr_sampler->planes == 2)
  9964. add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear422CositedEven2Plane);
  9965. else
  9966. add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear422CositedEven3Plane);
  9967. fname += "422CositedEven";
  9968. break;
  9969. case MSL_CHROMA_LOCATION_MIDPOINT:
  9970. if (constexpr_sampler->planes == 2)
  9971. add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear422Midpoint2Plane);
  9972. else
  9973. add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear422Midpoint3Plane);
  9974. fname += "422Midpoint";
  9975. break;
  9976. default:
  9977. SPIRV_CROSS_THROW("Invalid chroma location.");
  9978. }
  9979. break;
  9980. case MSL_FORMAT_RESOLUTION_420:
  9981. fname += "420";
  9982. switch (constexpr_sampler->x_chroma_offset)
  9983. {
  9984. case MSL_CHROMA_LOCATION_COSITED_EVEN:
  9985. switch (constexpr_sampler->y_chroma_offset)
  9986. {
  9987. case MSL_CHROMA_LOCATION_COSITED_EVEN:
  9988. if (constexpr_sampler->planes == 2)
  9989. add_spv_func_and_recompile(
  9990. SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven2Plane);
  9991. else
  9992. add_spv_func_and_recompile(
  9993. SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven3Plane);
  9994. fname += "XCositedEvenYCositedEven";
  9995. break;
  9996. case MSL_CHROMA_LOCATION_MIDPOINT:
  9997. if (constexpr_sampler->planes == 2)
  9998. add_spv_func_and_recompile(
  9999. SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint2Plane);
  10000. else
  10001. add_spv_func_and_recompile(
  10002. SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint3Plane);
  10003. fname += "XCositedEvenYMidpoint";
  10004. break;
  10005. default:
  10006. SPIRV_CROSS_THROW("Invalid Y chroma location.");
  10007. }
  10008. break;
  10009. case MSL_CHROMA_LOCATION_MIDPOINT:
  10010. switch (constexpr_sampler->y_chroma_offset)
  10011. {
  10012. case MSL_CHROMA_LOCATION_COSITED_EVEN:
  10013. if (constexpr_sampler->planes == 2)
  10014. add_spv_func_and_recompile(
  10015. SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven2Plane);
  10016. else
  10017. add_spv_func_and_recompile(
  10018. SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven3Plane);
  10019. fname += "XMidpointYCositedEven";
  10020. break;
  10021. case MSL_CHROMA_LOCATION_MIDPOINT:
  10022. if (constexpr_sampler->planes == 2)
  10023. add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint2Plane);
  10024. else
  10025. add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane);
  10026. fname += "XMidpointYMidpoint";
  10027. break;
  10028. default:
  10029. SPIRV_CROSS_THROW("Invalid Y chroma location.");
  10030. }
  10031. break;
  10032. default:
  10033. SPIRV_CROSS_THROW("Invalid X chroma location.");
  10034. }
  10035. break;
  10036. default:
  10037. SPIRV_CROSS_THROW("Invalid format resolution.");
  10038. }
  10039. }
  10040. }
  10041. else
  10042. {
  10043. fname = to_expression(combined ? combined->image : img) + ".";
  10044. // Texture function and sampler
  10045. if (args.base.is_fetch)
  10046. fname += "read";
  10047. else if (args.base.is_gather)
  10048. fname += "gather";
  10049. else
  10050. fname += "sample";
  10051. if (args.has_dref)
  10052. fname += "_compare";
  10053. }
  10054. return fname;
  10055. }
  10056. string CompilerMSL::convert_to_f32(const string &expr, uint32_t components)
  10057. {
  10058. SPIRType t { components > 1 ? OpTypeVector : OpTypeFloat };
  10059. t.basetype = SPIRType::Float;
  10060. t.vecsize = components;
  10061. t.columns = 1;
  10062. return join(type_to_glsl_constructor(t), "(", expr, ")");
  10063. }
  10064. static inline bool sampling_type_needs_f32_conversion(const SPIRType &type)
  10065. {
  10066. // Double is not supported to begin with, but doesn't hurt to check for completion.
  10067. return type.basetype == SPIRType::Half || type.basetype == SPIRType::Double;
  10068. }
  10069. // Returns the function args for a texture sampling function for the specified image and sampling characteristics.
  10070. string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool *p_forward)
  10071. {
  10072. VariableID img = args.base.img;
  10073. auto &imgtype = *args.base.imgtype;
  10074. uint32_t lod = args.lod;
  10075. uint32_t grad_x = args.grad_x;
  10076. uint32_t grad_y = args.grad_y;
  10077. uint32_t bias = args.bias;
  10078. const MSLConstexprSampler *constexpr_sampler = nullptr;
  10079. bool is_dynamic_img_sampler = false;
  10080. if (auto *var = maybe_get_backing_variable(img))
  10081. {
  10082. constexpr_sampler = find_constexpr_sampler(var->basevariable ? var->basevariable : VariableID(var->self));
  10083. is_dynamic_img_sampler = has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler);
  10084. }
  10085. string farg_str;
  10086. bool forward = true;
  10087. if (!is_dynamic_img_sampler)
  10088. {
  10089. // Texture reference (for some cases)
  10090. if (needs_chroma_reconstruction(constexpr_sampler))
  10091. {
  10092. // Multiplanar images need two or three textures.
  10093. farg_str += to_expression(img);
  10094. for (uint32_t i = 1; i < constexpr_sampler->planes; i++)
  10095. farg_str += join(", ", to_expression(img), plane_name_suffix, i);
  10096. }
  10097. else if ((!constexpr_sampler || !constexpr_sampler->ycbcr_conversion_enable) &&
  10098. msl_options.swizzle_texture_samples && args.base.is_gather)
  10099. {
  10100. auto *combined = maybe_get<SPIRCombinedImageSampler>(img);
  10101. farg_str += to_expression(combined ? combined->image : img);
  10102. }
  10103. // Gathers with constant offsets call a special function, so include the texture.
  10104. if (args.has_array_offsets)
  10105. farg_str += to_expression(img);
  10106. // Sampler reference
  10107. if (!args.base.is_fetch)
  10108. {
  10109. if (!farg_str.empty())
  10110. farg_str += ", ";
  10111. farg_str += to_sampler_expression(img);
  10112. }
  10113. if ((!constexpr_sampler || !constexpr_sampler->ycbcr_conversion_enable) &&
  10114. msl_options.swizzle_texture_samples && args.base.is_gather)
  10115. {
  10116. // Add the swizzle constant from the swizzle buffer.
  10117. farg_str += ", " + to_swizzle_expression(img);
  10118. used_swizzle_buffer = true;
  10119. }
  10120. // Const offsets gather puts the const offsets before the other args.
  10121. if (args.has_array_offsets)
  10122. {
  10123. forward = forward && should_forward(args.offset);
  10124. farg_str += ", " + to_expression(args.offset);
  10125. }
  10126. // Const offsets gather or swizzled gather puts the component before the other args.
  10127. if (args.component && (args.has_array_offsets || msl_options.swizzle_texture_samples))
  10128. {
  10129. forward = forward && should_forward(args.component);
  10130. farg_str += ", " + to_component_argument(args.component);
  10131. }
  10132. }
  10133. // Texture coordinates
  10134. forward = forward && should_forward(args.coord);
  10135. auto coord_expr = to_enclosed_expression(args.coord);
  10136. auto &coord_type = expression_type(args.coord);
  10137. bool coord_is_fp = type_is_floating_point(coord_type);
  10138. bool is_cube_fetch = false;
  10139. string tex_coords = coord_expr;
  10140. uint32_t alt_coord_component = 0;
  10141. switch (imgtype.image.dim)
  10142. {
  10143. case Dim1D:
  10144. if (coord_type.vecsize > 1)
  10145. tex_coords = enclose_expression(tex_coords) + ".x";
  10146. if (args.base.is_fetch)
  10147. tex_coords = "uint(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")";
  10148. else if (sampling_type_needs_f32_conversion(coord_type))
  10149. tex_coords = convert_to_f32(tex_coords, 1);
  10150. if (msl_options.texture_1D_as_2D)
  10151. {
  10152. if (args.base.is_fetch)
  10153. tex_coords = "uint2(" + tex_coords + ", 0)";
  10154. else
  10155. tex_coords = "float2(" + tex_coords + ", 0.5)";
  10156. }
  10157. alt_coord_component = 1;
  10158. break;
  10159. case DimBuffer:
  10160. if (coord_type.vecsize > 1)
  10161. tex_coords = enclose_expression(tex_coords) + ".x";
  10162. if (msl_options.texture_buffer_native)
  10163. {
  10164. tex_coords = "uint(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")";
  10165. }
  10166. else
  10167. {
  10168. // Metal texel buffer textures are 2D, so convert 1D coord to 2D.
  10169. // Support for Metal 2.1's new texture_buffer type.
  10170. if (args.base.is_fetch)
  10171. {
  10172. if (msl_options.texel_buffer_texture_width > 0)
  10173. {
  10174. tex_coords = "spvTexelBufferCoord(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")";
  10175. }
  10176. else
  10177. {
  10178. tex_coords = "spvTexelBufferCoord(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ", " +
  10179. to_expression(img) + ")";
  10180. }
  10181. }
  10182. }
  10183. alt_coord_component = 1;
  10184. break;
  10185. case DimSubpassData:
  10186. // If we're using Metal's native frame-buffer fetch API for subpass inputs,
  10187. // this path will not be hit.
  10188. tex_coords = "uint2(gl_FragCoord.xy)";
  10189. alt_coord_component = 2;
  10190. break;
  10191. case Dim2D:
  10192. if (coord_type.vecsize > 2)
  10193. tex_coords = enclose_expression(tex_coords) + ".xy";
  10194. if (args.base.is_fetch)
  10195. tex_coords = "uint2(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")";
  10196. else if (sampling_type_needs_f32_conversion(coord_type))
  10197. tex_coords = convert_to_f32(tex_coords, 2);
  10198. alt_coord_component = 2;
  10199. break;
  10200. case Dim3D:
  10201. if (coord_type.vecsize > 3)
  10202. tex_coords = enclose_expression(tex_coords) + ".xyz";
  10203. if (args.base.is_fetch)
  10204. tex_coords = "uint3(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")";
  10205. else if (sampling_type_needs_f32_conversion(coord_type))
  10206. tex_coords = convert_to_f32(tex_coords, 3);
  10207. alt_coord_component = 3;
  10208. break;
  10209. case DimCube:
  10210. if (args.base.is_fetch)
  10211. {
  10212. is_cube_fetch = true;
  10213. tex_coords += ".xy";
  10214. tex_coords = "uint2(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")";
  10215. }
  10216. else
  10217. {
  10218. if (coord_type.vecsize > 3)
  10219. tex_coords = enclose_expression(tex_coords) + ".xyz";
  10220. }
  10221. if (sampling_type_needs_f32_conversion(coord_type))
  10222. tex_coords = convert_to_f32(tex_coords, 3);
  10223. alt_coord_component = 3;
  10224. break;
  10225. default:
  10226. break;
  10227. }
  10228. if (args.base.is_fetch && args.offset)
  10229. {
  10230. // Fetch offsets must be applied directly to the coordinate.
  10231. forward = forward && should_forward(args.offset);
  10232. auto &type = expression_type(args.offset);
  10233. if (imgtype.image.dim == Dim1D && msl_options.texture_1D_as_2D)
  10234. {
  10235. if (type.basetype != SPIRType::UInt)
  10236. tex_coords += join(" + uint2(", bitcast_expression(SPIRType::UInt, args.offset), ", 0)");
  10237. else
  10238. tex_coords += join(" + uint2(", to_enclosed_expression(args.offset), ", 0)");
  10239. }
  10240. else
  10241. {
  10242. if (type.basetype != SPIRType::UInt)
  10243. tex_coords += " + " + bitcast_expression(SPIRType::UInt, args.offset);
  10244. else
  10245. tex_coords += " + " + to_enclosed_expression(args.offset);
  10246. }
  10247. }
  10248. // If projection, use alt coord as divisor
  10249. if (args.base.is_proj)
  10250. {
  10251. if (sampling_type_needs_f32_conversion(coord_type))
  10252. tex_coords += " / " + convert_to_f32(to_extract_component_expression(args.coord, alt_coord_component), 1);
  10253. else
  10254. tex_coords += " / " + to_extract_component_expression(args.coord, alt_coord_component);
  10255. }
  10256. if (!farg_str.empty())
  10257. farg_str += ", ";
  10258. if (imgtype.image.dim == DimCube && imgtype.image.arrayed && msl_options.emulate_cube_array)
  10259. {
  10260. farg_str += "spvCubemapTo2DArrayFace(" + tex_coords + ").xy";
  10261. if (is_cube_fetch)
  10262. farg_str += ", uint(" + to_extract_component_expression(args.coord, 2) + ")";
  10263. else
  10264. farg_str +=
  10265. ", uint(spvCubemapTo2DArrayFace(" + tex_coords + ").z) + (uint(" +
  10266. round_fp_tex_coords(to_extract_component_expression(args.coord, alt_coord_component), coord_is_fp) +
  10267. ") * 6u)";
  10268. add_spv_func_and_recompile(SPVFuncImplCubemapTo2DArrayFace);
  10269. }
  10270. else
  10271. {
  10272. farg_str += tex_coords;
  10273. // If fetch from cube, add face explicitly
  10274. if (is_cube_fetch)
  10275. {
  10276. // Special case for cube arrays, face and layer are packed in one dimension.
  10277. if (imgtype.image.arrayed)
  10278. farg_str += ", uint(" + to_extract_component_expression(args.coord, 2) + ") % 6u";
  10279. else
  10280. farg_str +=
  10281. ", uint(" + round_fp_tex_coords(to_extract_component_expression(args.coord, 2), coord_is_fp) + ")";
  10282. }
  10283. // If array, use alt coord
  10284. if (imgtype.image.arrayed)
  10285. {
  10286. // Special case for cube arrays, face and layer are packed in one dimension.
  10287. if (imgtype.image.dim == DimCube && args.base.is_fetch)
  10288. {
  10289. farg_str += ", uint(" + to_extract_component_expression(args.coord, 2) + ") / 6u";
  10290. }
  10291. else
  10292. {
  10293. farg_str +=
  10294. ", uint(" +
  10295. round_fp_tex_coords(to_extract_component_expression(args.coord, alt_coord_component), coord_is_fp) +
  10296. ")";
  10297. if (imgtype.image.dim == DimSubpassData)
  10298. {
  10299. if (msl_options.multiview)
  10300. farg_str += " + gl_ViewIndex";
  10301. else if (msl_options.arrayed_subpass_input)
  10302. farg_str += " + gl_Layer";
  10303. }
  10304. }
  10305. }
  10306. else if (imgtype.image.dim == DimSubpassData)
  10307. {
  10308. if (msl_options.multiview)
  10309. farg_str += ", gl_ViewIndex";
  10310. else if (msl_options.arrayed_subpass_input)
  10311. farg_str += ", gl_Layer";
  10312. }
  10313. }
  10314. // Depth compare reference value
  10315. if (args.dref)
  10316. {
  10317. forward = forward && should_forward(args.dref);
  10318. farg_str += ", ";
  10319. auto &dref_type = expression_type(args.dref);
  10320. string dref_expr;
  10321. if (args.base.is_proj)
  10322. dref_expr = join(to_enclosed_expression(args.dref), " / ",
  10323. to_extract_component_expression(args.coord, alt_coord_component));
  10324. else
  10325. dref_expr = to_expression(args.dref);
  10326. if (sampling_type_needs_f32_conversion(dref_type))
  10327. dref_expr = convert_to_f32(dref_expr, 1);
  10328. farg_str += dref_expr;
  10329. if (msl_options.is_macos() && (grad_x || grad_y))
  10330. {
  10331. // For sample compare, MSL does not support gradient2d for all targets (only iOS apparently according to docs).
  10332. // However, the most common case here is to have a constant gradient of 0, as that is the only way to express
  10333. // LOD == 0 in GLSL with sampler2DArrayShadow (cascaded shadow mapping).
  10334. // We will detect a compile-time constant 0 value for gradient and promote that to level(0) on MSL.
  10335. bool constant_zero_x = !grad_x || expression_is_constant_null(grad_x);
  10336. bool constant_zero_y = !grad_y || expression_is_constant_null(grad_y);
  10337. if (constant_zero_x && constant_zero_y &&
  10338. (!imgtype.image.arrayed || !msl_options.sample_dref_lod_array_as_grad))
  10339. {
  10340. lod = 0;
  10341. grad_x = 0;
  10342. grad_y = 0;
  10343. farg_str += ", level(0)";
  10344. }
  10345. else if (!msl_options.supports_msl_version(2, 3))
  10346. {
  10347. SPIRV_CROSS_THROW("Using non-constant 0.0 gradient() qualifier for sample_compare. This is not "
  10348. "supported on macOS prior to MSL 2.3.");
  10349. }
  10350. }
  10351. if (msl_options.is_macos() && bias)
  10352. {
  10353. // Bias is not supported either on macOS with sample_compare.
  10354. // Verify it is compile-time zero, and drop the argument.
  10355. if (expression_is_constant_null(bias))
  10356. {
  10357. bias = 0;
  10358. }
  10359. else if (!msl_options.supports_msl_version(2, 3))
  10360. {
  10361. SPIRV_CROSS_THROW("Using non-constant 0.0 bias() qualifier for sample_compare. This is not supported "
  10362. "on macOS prior to MSL 2.3.");
  10363. }
  10364. }
  10365. }
  10366. // LOD Options
  10367. // Metal does not support LOD for 1D textures.
  10368. if (bias && (imgtype.image.dim != Dim1D || msl_options.texture_1D_as_2D))
  10369. {
  10370. forward = forward && should_forward(bias);
  10371. farg_str += ", bias(" + to_expression(bias) + ")";
  10372. }
  10373. // Metal does not support LOD for 1D textures.
  10374. if (lod && (imgtype.image.dim != Dim1D || msl_options.texture_1D_as_2D))
  10375. {
  10376. forward = forward && should_forward(lod);
  10377. if (args.base.is_fetch)
  10378. {
  10379. farg_str += ", " + to_expression(lod);
  10380. }
  10381. else if (msl_options.sample_dref_lod_array_as_grad && args.dref && imgtype.image.arrayed)
  10382. {
  10383. if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 3))
  10384. SPIRV_CROSS_THROW("Using non-constant 0.0 gradient() qualifier for sample_compare. This is not "
  10385. "supported on macOS prior to MSL 2.3.");
  10386. // Some Metal devices have a bug where the LoD is erroneously biased upward
  10387. // when using a level() argument. Since this doesn't happen as much with gradient2d(),
  10388. // if we perform the LoD calculation in reverse, we can pass a gradient
  10389. // instead.
  10390. // lod = log2(rhoMax/eta) -> exp2(lod) = rhoMax/eta
  10391. // If we make all of the scale factors the same, eta will be 1 and
  10392. // exp2(lod) = rho.
  10393. // rhoX = dP/dx * extent; rhoY = dP/dy * extent
  10394. // Therefore, dP/dx = dP/dy = exp2(lod)/extent.
  10395. // (Subtracting 0.5 before exponentiation gives better results.)
  10396. string grad_opt, extent, grad_coord;
  10397. VariableID base_img = img;
  10398. if (auto *combined = maybe_get<SPIRCombinedImageSampler>(img))
  10399. base_img = combined->image;
  10400. switch (imgtype.image.dim)
  10401. {
  10402. case Dim1D:
  10403. grad_opt = "gradient2d";
  10404. extent = join("float2(", to_expression(base_img), ".get_width(), 1.0)");
  10405. break;
  10406. case Dim2D:
  10407. grad_opt = "gradient2d";
  10408. extent = join("float2(", to_expression(base_img), ".get_width(), ", to_expression(base_img), ".get_height())");
  10409. break;
  10410. case DimCube:
  10411. if (imgtype.image.arrayed && msl_options.emulate_cube_array)
  10412. {
  10413. grad_opt = "gradient2d";
  10414. extent = join("float2(", to_expression(base_img), ".get_width())");
  10415. }
  10416. else
  10417. {
  10418. if (msl_options.agx_manual_cube_grad_fixup)
  10419. {
  10420. add_spv_func_and_recompile(SPVFuncImplGradientCube);
  10421. grad_opt = "spvGradientCube";
  10422. grad_coord = tex_coords + ", ";
  10423. }
  10424. else
  10425. {
  10426. grad_opt = "gradientcube";
  10427. }
  10428. extent = join("float3(", to_expression(base_img), ".get_width())");
  10429. }
  10430. break;
  10431. default:
  10432. grad_opt = "unsupported_gradient_dimension";
  10433. extent = "float3(1.0)";
  10434. break;
  10435. }
  10436. farg_str += join(", ", grad_opt, "(", grad_coord, "exp2(", to_expression(lod), " - 0.5) / ", extent,
  10437. ", exp2(", to_expression(lod), " - 0.5) / ", extent, ")");
  10438. }
  10439. else
  10440. {
  10441. farg_str += ", level(" + to_expression(lod) + ")";
  10442. }
  10443. }
  10444. else if (args.base.is_fetch && !lod && (imgtype.image.dim != Dim1D || msl_options.texture_1D_as_2D) &&
  10445. imgtype.image.dim != DimBuffer && !imgtype.image.ms && imgtype.image.sampled != 2)
  10446. {
  10447. // Lod argument is optional in OpImageFetch, but we require a LOD value, pick 0 as the default.
  10448. // Check for sampled type as well, because is_fetch is also used for OpImageRead in MSL.
  10449. farg_str += ", 0";
  10450. }
  10451. // Metal does not support LOD for 1D textures.
  10452. if ((grad_x || grad_y) && (imgtype.image.dim != Dim1D || msl_options.texture_1D_as_2D))
  10453. {
  10454. forward = forward && should_forward(grad_x);
  10455. forward = forward && should_forward(grad_y);
  10456. string grad_opt, grad_coord;
  10457. switch (imgtype.image.dim)
  10458. {
  10459. case Dim1D:
  10460. case Dim2D:
  10461. grad_opt = "gradient2d";
  10462. break;
  10463. case Dim3D:
  10464. grad_opt = "gradient3d";
  10465. break;
  10466. case DimCube:
  10467. if (imgtype.image.arrayed && msl_options.emulate_cube_array)
  10468. {
  10469. grad_opt = "gradient2d";
  10470. }
  10471. else if (msl_options.agx_manual_cube_grad_fixup)
  10472. {
  10473. add_spv_func_and_recompile(SPVFuncImplGradientCube);
  10474. grad_opt = "spvGradientCube";
  10475. grad_coord = tex_coords + ", ";
  10476. }
  10477. else
  10478. {
  10479. grad_opt = "gradientcube";
  10480. }
  10481. break;
  10482. default:
  10483. grad_opt = "unsupported_gradient_dimension";
  10484. break;
  10485. }
  10486. farg_str += join(", ", grad_opt, "(", grad_coord, to_expression(grad_x), ", ", to_expression(grad_y), ")");
  10487. }
  10488. if (args.min_lod)
  10489. {
  10490. if (!msl_options.supports_msl_version(2, 2))
  10491. SPIRV_CROSS_THROW("min_lod_clamp() is only supported in MSL 2.2+ and up.");
  10492. forward = forward && should_forward(args.min_lod);
  10493. farg_str += ", min_lod_clamp(" + to_expression(args.min_lod) + ")";
  10494. }
  10495. // Add offsets
  10496. string offset_expr;
  10497. const SPIRType *offset_type = nullptr;
  10498. if (args.offset && !args.base.is_fetch && !args.has_array_offsets)
  10499. {
  10500. forward = forward && should_forward(args.offset);
  10501. offset_expr = to_expression(args.offset);
  10502. offset_type = &expression_type(args.offset);
  10503. }
  10504. if (!offset_expr.empty())
  10505. {
  10506. switch (imgtype.image.dim)
  10507. {
  10508. case Dim1D:
  10509. if (!msl_options.texture_1D_as_2D)
  10510. break;
  10511. if (offset_type->vecsize > 1)
  10512. offset_expr = enclose_expression(offset_expr) + ".x";
  10513. farg_str += join(", int2(", offset_expr, ", 0)");
  10514. break;
  10515. case Dim2D:
  10516. if (offset_type->vecsize > 2)
  10517. offset_expr = enclose_expression(offset_expr) + ".xy";
  10518. farg_str += ", " + offset_expr;
  10519. break;
  10520. case Dim3D:
  10521. if (offset_type->vecsize > 3)
  10522. offset_expr = enclose_expression(offset_expr) + ".xyz";
  10523. farg_str += ", " + offset_expr;
  10524. break;
  10525. default:
  10526. break;
  10527. }
  10528. }
  10529. if (args.component && !args.has_array_offsets)
  10530. {
  10531. // If 2D has gather component, ensure it also has an offset arg
  10532. if (imgtype.image.dim == Dim2D && offset_expr.empty())
  10533. farg_str += ", int2(0)";
  10534. if (!msl_options.swizzle_texture_samples || is_dynamic_img_sampler)
  10535. {
  10536. forward = forward && should_forward(args.component);
  10537. uint32_t image_var = 0;
  10538. if (const auto *combined = maybe_get<SPIRCombinedImageSampler>(img))
  10539. {
  10540. if (const auto *img_var = maybe_get_backing_variable(combined->image))
  10541. image_var = img_var->self;
  10542. }
  10543. else if (const auto *var = maybe_get_backing_variable(img))
  10544. {
  10545. image_var = var->self;
  10546. }
  10547. if (image_var == 0 || !is_depth_image(expression_type(image_var), image_var))
  10548. farg_str += ", " + to_component_argument(args.component);
  10549. }
  10550. }
  10551. if (args.sample)
  10552. {
  10553. forward = forward && should_forward(args.sample);
  10554. farg_str += ", ";
  10555. farg_str += to_expression(args.sample);
  10556. }
  10557. *p_forward = forward;
  10558. return farg_str;
  10559. }
  10560. // If the texture coordinates are floating point, invokes MSL round() function to round them.
  10561. string CompilerMSL::round_fp_tex_coords(string tex_coords, bool coord_is_fp)
  10562. {
  10563. return coord_is_fp ? ("rint(" + tex_coords + ")") : tex_coords;
  10564. }
  10565. // Returns a string to use in an image sampling function argument.
  10566. // The ID must be a scalar constant.
  10567. string CompilerMSL::to_component_argument(uint32_t id)
  10568. {
  10569. uint32_t component_index = evaluate_constant_u32(id);
  10570. switch (component_index)
  10571. {
  10572. case 0:
  10573. return "component::x";
  10574. case 1:
  10575. return "component::y";
  10576. case 2:
  10577. return "component::z";
  10578. case 3:
  10579. return "component::w";
  10580. default:
  10581. SPIRV_CROSS_THROW("The value (" + to_string(component_index) + ") of OpConstant ID " + to_string(id) +
  10582. " is not a valid Component index, which must be one of 0, 1, 2, or 3.");
  10583. }
  10584. }
  10585. // Establish sampled image as expression object and assign the sampler to it.
  10586. void CompilerMSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id)
  10587. {
  10588. set<SPIRCombinedImageSampler>(result_id, result_type, image_id, samp_id);
  10589. }
  10590. string CompilerMSL::to_texture_op(const Instruction &i, bool sparse, bool *forward,
  10591. SmallVector<uint32_t> &inherited_expressions)
  10592. {
  10593. auto *ops = stream(i);
  10594. uint32_t result_type_id = ops[0];
  10595. uint32_t img = ops[2];
  10596. auto &result_type = get<SPIRType>(result_type_id);
  10597. auto op = static_cast<Op>(i.op);
  10598. bool is_gather = (op == OpImageGather || op == OpImageDrefGather);
  10599. // Bypass pointers because we need the real image struct
  10600. auto &type = expression_type(img);
  10601. auto &imgtype = get<SPIRType>(type.self);
  10602. const MSLConstexprSampler *constexpr_sampler = nullptr;
  10603. bool is_dynamic_img_sampler = false;
  10604. if (auto *var = maybe_get_backing_variable(img))
  10605. {
  10606. constexpr_sampler = find_constexpr_sampler(var->basevariable ? var->basevariable : VariableID(var->self));
  10607. is_dynamic_img_sampler = has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler);
  10608. }
  10609. string expr;
  10610. if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable && !is_dynamic_img_sampler)
  10611. {
  10612. // If this needs sampler Y'CbCr conversion, we need to do some additional
  10613. // processing.
  10614. switch (constexpr_sampler->ycbcr_model)
  10615. {
  10616. case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY:
  10617. case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY:
  10618. // Default
  10619. break;
  10620. case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709:
  10621. add_spv_func_and_recompile(SPVFuncImplConvertYCbCrBT709);
  10622. expr += "spvConvertYCbCrBT709(";
  10623. break;
  10624. case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601:
  10625. add_spv_func_and_recompile(SPVFuncImplConvertYCbCrBT601);
  10626. expr += "spvConvertYCbCrBT601(";
  10627. break;
  10628. case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020:
  10629. add_spv_func_and_recompile(SPVFuncImplConvertYCbCrBT2020);
  10630. expr += "spvConvertYCbCrBT2020(";
  10631. break;
  10632. default:
  10633. SPIRV_CROSS_THROW("Invalid Y'CbCr model conversion.");
  10634. }
  10635. if (constexpr_sampler->ycbcr_model != MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY)
  10636. {
  10637. switch (constexpr_sampler->ycbcr_range)
  10638. {
  10639. case MSL_SAMPLER_YCBCR_RANGE_ITU_FULL:
  10640. add_spv_func_and_recompile(SPVFuncImplExpandITUFullRange);
  10641. expr += "spvExpandITUFullRange(";
  10642. break;
  10643. case MSL_SAMPLER_YCBCR_RANGE_ITU_NARROW:
  10644. add_spv_func_and_recompile(SPVFuncImplExpandITUNarrowRange);
  10645. expr += "spvExpandITUNarrowRange(";
  10646. break;
  10647. default:
  10648. SPIRV_CROSS_THROW("Invalid Y'CbCr range.");
  10649. }
  10650. }
  10651. }
  10652. else if (msl_options.swizzle_texture_samples && !is_gather && is_sampled_image_type(imgtype) &&
  10653. !is_dynamic_img_sampler)
  10654. {
  10655. add_spv_func_and_recompile(SPVFuncImplTextureSwizzle);
  10656. expr += "spvTextureSwizzle(";
  10657. }
  10658. string inner_expr = CompilerGLSL::to_texture_op(i, sparse, forward, inherited_expressions);
  10659. if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable && !is_dynamic_img_sampler)
  10660. {
  10661. if (!constexpr_sampler->swizzle_is_identity())
  10662. {
  10663. static const char swizzle_names[] = "rgba";
  10664. if (!constexpr_sampler->swizzle_has_one_or_zero())
  10665. {
  10666. // If we can, do it inline.
  10667. expr += inner_expr + ".";
  10668. for (uint32_t c = 0; c < 4; c++)
  10669. {
  10670. switch (constexpr_sampler->swizzle[c])
  10671. {
  10672. case MSL_COMPONENT_SWIZZLE_IDENTITY:
  10673. expr += swizzle_names[c];
  10674. break;
  10675. case MSL_COMPONENT_SWIZZLE_R:
  10676. case MSL_COMPONENT_SWIZZLE_G:
  10677. case MSL_COMPONENT_SWIZZLE_B:
  10678. case MSL_COMPONENT_SWIZZLE_A:
  10679. expr += swizzle_names[constexpr_sampler->swizzle[c] - MSL_COMPONENT_SWIZZLE_R];
  10680. break;
  10681. default:
  10682. SPIRV_CROSS_THROW("Invalid component swizzle.");
  10683. }
  10684. }
  10685. }
  10686. else
  10687. {
  10688. // Otherwise, we need to emit a temporary and swizzle that.
  10689. uint32_t temp_id = ir.increase_bound_by(1);
  10690. emit_op(result_type_id, temp_id, inner_expr, false);
  10691. for (auto &inherit : inherited_expressions)
  10692. inherit_expression_dependencies(temp_id, inherit);
  10693. inherited_expressions.clear();
  10694. inherited_expressions.push_back(temp_id);
  10695. switch (op)
  10696. {
  10697. case OpImageSampleDrefImplicitLod:
  10698. case OpImageSampleImplicitLod:
  10699. case OpImageSampleProjImplicitLod:
  10700. case OpImageSampleProjDrefImplicitLod:
  10701. register_control_dependent_expression(temp_id);
  10702. break;
  10703. default:
  10704. break;
  10705. }
  10706. expr += type_to_glsl(result_type) + "(";
  10707. for (uint32_t c = 0; c < 4; c++)
  10708. {
  10709. switch (constexpr_sampler->swizzle[c])
  10710. {
  10711. case MSL_COMPONENT_SWIZZLE_IDENTITY:
  10712. expr += to_expression(temp_id) + "." + swizzle_names[c];
  10713. break;
  10714. case MSL_COMPONENT_SWIZZLE_ZERO:
  10715. expr += "0";
  10716. break;
  10717. case MSL_COMPONENT_SWIZZLE_ONE:
  10718. expr += "1";
  10719. break;
  10720. case MSL_COMPONENT_SWIZZLE_R:
  10721. case MSL_COMPONENT_SWIZZLE_G:
  10722. case MSL_COMPONENT_SWIZZLE_B:
  10723. case MSL_COMPONENT_SWIZZLE_A:
  10724. expr += to_expression(temp_id) + "." +
  10725. swizzle_names[constexpr_sampler->swizzle[c] - MSL_COMPONENT_SWIZZLE_R];
  10726. break;
  10727. default:
  10728. SPIRV_CROSS_THROW("Invalid component swizzle.");
  10729. }
  10730. if (c < 3)
  10731. expr += ", ";
  10732. }
  10733. expr += ")";
  10734. }
  10735. }
  10736. else
  10737. expr += inner_expr;
  10738. if (constexpr_sampler->ycbcr_model != MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY)
  10739. {
  10740. expr += join(", ", constexpr_sampler->bpc, ")");
  10741. if (constexpr_sampler->ycbcr_model != MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY)
  10742. expr += ")";
  10743. }
  10744. }
  10745. else
  10746. {
  10747. expr += inner_expr;
  10748. if (msl_options.swizzle_texture_samples && !is_gather && is_sampled_image_type(imgtype) &&
  10749. !is_dynamic_img_sampler)
  10750. {
  10751. // Add the swizzle constant from the swizzle buffer.
  10752. expr += ", " + to_swizzle_expression(img) + ")";
  10753. used_swizzle_buffer = true;
  10754. }
  10755. }
  10756. return expr;
  10757. }
  10758. static string create_swizzle(MSLComponentSwizzle swizzle)
  10759. {
  10760. switch (swizzle)
  10761. {
  10762. case MSL_COMPONENT_SWIZZLE_IDENTITY:
  10763. return "spvSwizzle::none";
  10764. case MSL_COMPONENT_SWIZZLE_ZERO:
  10765. return "spvSwizzle::zero";
  10766. case MSL_COMPONENT_SWIZZLE_ONE:
  10767. return "spvSwizzle::one";
  10768. case MSL_COMPONENT_SWIZZLE_R:
  10769. return "spvSwizzle::red";
  10770. case MSL_COMPONENT_SWIZZLE_G:
  10771. return "spvSwizzle::green";
  10772. case MSL_COMPONENT_SWIZZLE_B:
  10773. return "spvSwizzle::blue";
  10774. case MSL_COMPONENT_SWIZZLE_A:
  10775. return "spvSwizzle::alpha";
  10776. default:
  10777. SPIRV_CROSS_THROW("Invalid component swizzle.");
  10778. }
  10779. }
  10780. // Returns a string representation of the ID, usable as a function arg.
  10781. // Manufacture automatic sampler arg for SampledImage texture.
  10782. string CompilerMSL::to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id)
  10783. {
  10784. string arg_str;
  10785. auto &type = expression_type(id);
  10786. bool is_dynamic_img_sampler = has_extended_decoration(arg.id, SPIRVCrossDecorationDynamicImageSampler);
  10787. // If the argument *itself* is a "dynamic" combined-image sampler, then we can just pass that around.
  10788. bool arg_is_dynamic_img_sampler = has_extended_decoration(id, SPIRVCrossDecorationDynamicImageSampler);
  10789. if (is_dynamic_img_sampler && !arg_is_dynamic_img_sampler)
  10790. arg_str = join("spvDynamicImageSampler<", type_to_glsl(get<SPIRType>(type.image.type)), ">(");
  10791. auto *c = maybe_get<SPIRConstant>(id);
  10792. if (msl_options.force_native_arrays && c && !get<SPIRType>(c->constant_type).array.empty())
  10793. {
  10794. // If we are passing a constant array directly to a function for some reason,
  10795. // the callee will expect an argument in thread const address space
  10796. // (since we can only bind to arrays with references in MSL).
  10797. // To resolve this, we must emit a copy in this address space.
  10798. // This kind of code gen should be rare enough that performance is not a real concern.
  10799. // Inline the SPIR-V to avoid this kind of suboptimal codegen.
  10800. //
  10801. // We risk calling this inside a continue block (invalid code),
  10802. // so just create a thread local copy in the current function.
  10803. arg_str = join("_", id, "_array_copy");
  10804. auto &constants = current_function->constant_arrays_needed_on_stack;
  10805. auto itr = find(begin(constants), end(constants), ID(id));
  10806. if (itr == end(constants))
  10807. {
  10808. force_recompile();
  10809. constants.push_back(id);
  10810. }
  10811. }
  10812. // Dereference pointer variables where needed.
  10813. // FIXME: This dereference is actually backwards. We should really just support passing pointer variables between functions.
  10814. else if (should_dereference(id))
  10815. arg_str += dereference_expression(type, CompilerGLSL::to_func_call_arg(arg, id));
  10816. else
  10817. arg_str += CompilerGLSL::to_func_call_arg(arg, id);
  10818. // Need to check the base variable in case we need to apply a qualified alias.
  10819. uint32_t var_id = 0;
  10820. auto *var = maybe_get<SPIRVariable>(id);
  10821. if (var)
  10822. var_id = var->basevariable;
  10823. if (!arg_is_dynamic_img_sampler)
  10824. {
  10825. auto *constexpr_sampler = find_constexpr_sampler(var_id ? var_id : id);
  10826. if (type.basetype == SPIRType::SampledImage)
  10827. {
  10828. // Manufacture automatic plane args for multiplanar texture
  10829. uint32_t planes = 1;
  10830. if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable)
  10831. {
  10832. planes = constexpr_sampler->planes;
  10833. // If this parameter isn't aliasing a global, then we need to use
  10834. // the special "dynamic image-sampler" class to pass it--and we need
  10835. // to use it for *every* non-alias parameter, in case a combined
  10836. // image-sampler with a Y'CbCr conversion is passed. Hopefully, this
  10837. // pathological case is so rare that it should never be hit in practice.
  10838. if (!arg.alias_global_variable)
  10839. add_spv_func_and_recompile(SPVFuncImplDynamicImageSampler);
  10840. }
  10841. for (uint32_t i = 1; i < planes; i++)
  10842. arg_str += join(", ", CompilerGLSL::to_func_call_arg(arg, id), plane_name_suffix, i);
  10843. // Manufacture automatic sampler arg if the arg is a SampledImage texture.
  10844. if (type.image.dim != DimBuffer)
  10845. arg_str += ", " + to_sampler_expression(var_id ? var_id : id);
  10846. // Add sampler Y'CbCr conversion info if we have it
  10847. if (is_dynamic_img_sampler && constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable)
  10848. {
  10849. SmallVector<string> samp_args;
  10850. switch (constexpr_sampler->resolution)
  10851. {
  10852. case MSL_FORMAT_RESOLUTION_444:
  10853. // Default
  10854. break;
  10855. case MSL_FORMAT_RESOLUTION_422:
  10856. samp_args.push_back("spvFormatResolution::_422");
  10857. break;
  10858. case MSL_FORMAT_RESOLUTION_420:
  10859. samp_args.push_back("spvFormatResolution::_420");
  10860. break;
  10861. default:
  10862. SPIRV_CROSS_THROW("Invalid format resolution.");
  10863. }
  10864. if (constexpr_sampler->chroma_filter != MSL_SAMPLER_FILTER_NEAREST)
  10865. samp_args.push_back("spvChromaFilter::linear");
  10866. if (constexpr_sampler->x_chroma_offset != MSL_CHROMA_LOCATION_COSITED_EVEN)
  10867. samp_args.push_back("spvXChromaLocation::midpoint");
  10868. if (constexpr_sampler->y_chroma_offset != MSL_CHROMA_LOCATION_COSITED_EVEN)
  10869. samp_args.push_back("spvYChromaLocation::midpoint");
  10870. switch (constexpr_sampler->ycbcr_model)
  10871. {
  10872. case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY:
  10873. // Default
  10874. break;
  10875. case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY:
  10876. samp_args.push_back("spvYCbCrModelConversion::ycbcr_identity");
  10877. break;
  10878. case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709:
  10879. samp_args.push_back("spvYCbCrModelConversion::ycbcr_bt_709");
  10880. break;
  10881. case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601:
  10882. samp_args.push_back("spvYCbCrModelConversion::ycbcr_bt_601");
  10883. break;
  10884. case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020:
  10885. samp_args.push_back("spvYCbCrModelConversion::ycbcr_bt_2020");
  10886. break;
  10887. default:
  10888. SPIRV_CROSS_THROW("Invalid Y'CbCr model conversion.");
  10889. }
  10890. if (constexpr_sampler->ycbcr_range != MSL_SAMPLER_YCBCR_RANGE_ITU_FULL)
  10891. samp_args.push_back("spvYCbCrRange::itu_narrow");
  10892. samp_args.push_back(join("spvComponentBits(", constexpr_sampler->bpc, ")"));
  10893. arg_str += join(", spvYCbCrSampler(", merge(samp_args), ")");
  10894. }
  10895. }
  10896. if (is_dynamic_img_sampler && constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable)
  10897. arg_str += join(", (uint(", create_swizzle(constexpr_sampler->swizzle[3]), ") << 24) | (uint(",
  10898. create_swizzle(constexpr_sampler->swizzle[2]), ") << 16) | (uint(",
  10899. create_swizzle(constexpr_sampler->swizzle[1]), ") << 8) | uint(",
  10900. create_swizzle(constexpr_sampler->swizzle[0]), ")");
  10901. else if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(type))
  10902. arg_str += ", " + to_swizzle_expression(var_id ? var_id : id);
  10903. if (buffer_requires_array_length(var_id))
  10904. arg_str += ", " + to_buffer_size_expression(var_id ? var_id : id);
  10905. if (is_dynamic_img_sampler)
  10906. arg_str += ")";
  10907. }
  10908. // Emulate texture2D atomic operations
  10909. auto *backing_var = maybe_get_backing_variable(var_id);
  10910. if (backing_var && atomic_image_vars_emulated.count(backing_var->self))
  10911. {
  10912. arg_str += ", " + to_expression(var_id) + "_atomic";
  10913. }
  10914. return arg_str;
  10915. }
  10916. // If the ID represents a sampled image that has been assigned a sampler already,
  10917. // generate an expression for the sampler, otherwise generate a fake sampler name
  10918. // by appending a suffix to the expression constructed from the ID.
  10919. string CompilerMSL::to_sampler_expression(uint32_t id)
  10920. {
  10921. auto *combined = maybe_get<SPIRCombinedImageSampler>(id);
  10922. if (combined && combined->sampler)
  10923. return to_expression(combined->sampler);
  10924. uint32_t expr_id = combined ? uint32_t(combined->image) : id;
  10925. // Constexpr samplers are declared as local variables,
  10926. // so exclude any qualifier names on the image expression.
  10927. if (auto *var = maybe_get_backing_variable(expr_id))
  10928. {
  10929. uint32_t img_id = var->basevariable ? var->basevariable : VariableID(var->self);
  10930. if (find_constexpr_sampler(img_id))
  10931. return Compiler::to_name(img_id) + sampler_name_suffix;
  10932. }
  10933. auto img_expr = to_expression(expr_id);
  10934. auto index = img_expr.find_first_of('[');
  10935. if (index == string::npos)
  10936. return img_expr + sampler_name_suffix;
  10937. else
  10938. return img_expr.substr(0, index) + sampler_name_suffix + img_expr.substr(index);
  10939. }
  10940. string CompilerMSL::to_swizzle_expression(uint32_t id)
  10941. {
  10942. auto *combined = maybe_get<SPIRCombinedImageSampler>(id);
  10943. auto expr = to_expression(combined ? combined->image : VariableID(id));
  10944. auto index = expr.find_first_of('[');
  10945. // If an image is part of an argument buffer translate this to a legal identifier.
  10946. string::size_type period = 0;
  10947. while ((period = expr.find_first_of('.', period)) != string::npos && period < index)
  10948. expr[period] = '_';
  10949. if (index == string::npos)
  10950. return expr + swizzle_name_suffix;
  10951. else
  10952. {
  10953. auto image_expr = expr.substr(0, index);
  10954. auto array_expr = expr.substr(index);
  10955. return image_expr + swizzle_name_suffix + array_expr;
  10956. }
  10957. }
  10958. string CompilerMSL::to_buffer_size_expression(uint32_t id)
  10959. {
  10960. auto expr = to_expression(id);
  10961. auto index = expr.find_first_of('[');
  10962. // This is quite crude, but we need to translate the reference name (*spvDescriptorSetN.name) to
  10963. // the pointer expression spvDescriptorSetN.name to make a reasonable expression here.
  10964. // This only happens if we have argument buffers and we are using OpArrayLength on a lone SSBO in that set.
  10965. if (expr.size() >= 3 && expr[0] == '(' && expr[1] == '*')
  10966. expr = address_of_expression(expr);
  10967. // If a buffer is part of an argument buffer translate this to a legal identifier.
  10968. for (auto &c : expr)
  10969. if (c == '.')
  10970. c = '_';
  10971. if (index == string::npos)
  10972. return expr + buffer_size_name_suffix;
  10973. else
  10974. {
  10975. auto buffer_expr = expr.substr(0, index);
  10976. auto array_expr = expr.substr(index);
  10977. if (auto var = maybe_get_backing_variable(id))
  10978. {
  10979. if (is_var_runtime_size_array(*var))
  10980. {
  10981. if (!msl_options.runtime_array_rich_descriptor)
  10982. SPIRV_CROSS_THROW("OpArrayLength requires rich descriptor format");
  10983. auto last_pos = array_expr.find_last_of(']');
  10984. if (last_pos != std::string::npos)
  10985. return buffer_expr + ".length(" + array_expr.substr(1, last_pos - 1) + ")";
  10986. }
  10987. }
  10988. return buffer_expr + buffer_size_name_suffix + array_expr;
  10989. }
  10990. }
  10991. // Checks whether the type is a Block all of whose members have DecorationPatch.
  10992. bool CompilerMSL::is_patch_block(const SPIRType &type)
  10993. {
  10994. if (!has_decoration(type.self, DecorationBlock))
  10995. return false;
  10996. for (uint32_t i = 0; i < type.member_types.size(); i++)
  10997. {
  10998. if (!has_member_decoration(type.self, i, DecorationPatch))
  10999. return false;
  11000. }
  11001. return true;
  11002. }
  11003. // Checks whether the ID is a row_major matrix that requires conversion before use
  11004. bool CompilerMSL::is_non_native_row_major_matrix(uint32_t id)
  11005. {
  11006. auto *e = maybe_get<SPIRExpression>(id);
  11007. if (e)
  11008. return e->need_transpose;
  11009. else
  11010. return has_decoration(id, DecorationRowMajor);
  11011. }
  11012. // Checks whether the member is a row_major matrix that requires conversion before use
  11013. bool CompilerMSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index)
  11014. {
  11015. return has_member_decoration(type.self, index, DecorationRowMajor);
  11016. }
  11017. string CompilerMSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, uint32_t physical_type_id,
  11018. bool is_packed, bool relaxed)
  11019. {
  11020. if (!is_matrix(exp_type))
  11021. {
  11022. return CompilerGLSL::convert_row_major_matrix(std::move(exp_str), exp_type, physical_type_id, is_packed, relaxed);
  11023. }
  11024. else
  11025. {
  11026. strip_enclosed_expression(exp_str);
  11027. if (physical_type_id != 0 || is_packed)
  11028. exp_str = unpack_expression_type(exp_str, exp_type, physical_type_id, is_packed, true);
  11029. return join("transpose(", exp_str, ")");
  11030. }
  11031. }
  11032. // Called automatically at the end of the entry point function
  11033. void CompilerMSL::emit_fixup()
  11034. {
  11035. if (is_vertex_like_shader() && stage_out_var_id && !qual_pos_var_name.empty() && !capture_output_to_buffer)
  11036. {
  11037. if (options.vertex.fixup_clipspace)
  11038. statement(qual_pos_var_name, ".z = (", qual_pos_var_name, ".z + ", qual_pos_var_name,
  11039. ".w) * 0.5; // Adjust clip-space for Metal");
  11040. if (options.vertex.flip_vert_y)
  11041. statement(qual_pos_var_name, ".y = -(", qual_pos_var_name, ".y);", " // Invert Y-axis for Metal");
  11042. }
  11043. }
  11044. // Return a string defining a structure member, with padding and packing.
  11045. string CompilerMSL::to_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
  11046. const string &qualifier)
  11047. {
  11048. uint32_t orig_member_type_id = member_type_id;
  11049. if (member_is_remapped_physical_type(type, index))
  11050. member_type_id = get_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID);
  11051. auto &physical_type = get<SPIRType>(member_type_id);
  11052. // If this member is packed, mark it as so.
  11053. string pack_pfx;
  11054. // Allow Metal to use the array<T> template to make arrays a value type
  11055. uint32_t orig_id = 0;
  11056. if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationInterfaceOrigID))
  11057. orig_id = get_extended_member_decoration(type.self, index, SPIRVCrossDecorationInterfaceOrigID);
  11058. bool row_major = false;
  11059. if (is_matrix(physical_type))
  11060. row_major = has_member_decoration(type.self, index, DecorationRowMajor);
  11061. SPIRType row_major_physical_type { OpTypeMatrix };
  11062. const SPIRType *declared_type = &physical_type;
  11063. // If a struct is being declared with physical layout,
  11064. // do not use array<T> wrappers.
  11065. // This avoids a lot of complicated cases with packed vectors and matrices,
  11066. // and generally we cannot copy full arrays in and out of buffers into Function
  11067. // address space.
  11068. // Array of resources should also be declared as builtin arrays.
  11069. if (has_member_decoration(type.self, index, DecorationOffset))
  11070. is_using_builtin_array = true;
  11071. else if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary))
  11072. is_using_builtin_array = true;
  11073. if (member_is_packed_physical_type(type, index))
  11074. {
  11075. // If we're packing a matrix, output an appropriate typedef
  11076. if (physical_type.basetype == SPIRType::Struct)
  11077. {
  11078. SPIRV_CROSS_THROW("Cannot emit a packed struct currently.");
  11079. }
  11080. else if (is_matrix(physical_type))
  11081. {
  11082. uint32_t rows = physical_type.vecsize;
  11083. uint32_t cols = physical_type.columns;
  11084. pack_pfx = "packed_";
  11085. if (row_major)
  11086. {
  11087. // These are stored transposed.
  11088. rows = physical_type.columns;
  11089. cols = physical_type.vecsize;
  11090. pack_pfx = "packed_rm_";
  11091. }
  11092. string base_type = physical_type.width == 16 ? "half" : "float";
  11093. string td_line = "typedef ";
  11094. td_line += "packed_" + base_type + to_string(rows);
  11095. td_line += " " + pack_pfx;
  11096. // Use the actual matrix size here.
  11097. td_line += base_type + to_string(physical_type.columns) + "x" + to_string(physical_type.vecsize);
  11098. td_line += "[" + to_string(cols) + "]";
  11099. td_line += ";";
  11100. add_typedef_line(td_line);
  11101. }
  11102. else if (!is_scalar(physical_type)) // scalar type is already packed.
  11103. pack_pfx = "packed_";
  11104. }
  11105. else if (is_matrix(physical_type))
  11106. {
  11107. if (!msl_options.supports_msl_version(3, 0) &&
  11108. has_extended_decoration(type.self, SPIRVCrossDecorationWorkgroupStruct))
  11109. {
  11110. pack_pfx = "spvStorage_";
  11111. add_spv_func_and_recompile(SPVFuncImplStorageMatrix);
  11112. // The pack prefix causes problems with array<T> wrappers.
  11113. is_using_builtin_array = true;
  11114. }
  11115. if (row_major)
  11116. {
  11117. // Need to declare type with flipped vecsize/columns.
  11118. row_major_physical_type = physical_type;
  11119. swap(row_major_physical_type.vecsize, row_major_physical_type.columns);
  11120. declared_type = &row_major_physical_type;
  11121. }
  11122. }
  11123. // iOS Tier 1 argument buffers do not support writable images.
  11124. if (physical_type.basetype == SPIRType::Image &&
  11125. physical_type.image.sampled == 2 &&
  11126. msl_options.is_ios() &&
  11127. msl_options.argument_buffers_tier <= Options::ArgumentBuffersTier::Tier1 &&
  11128. !has_decoration(orig_id, DecorationNonWritable))
  11129. {
  11130. SPIRV_CROSS_THROW("Writable images are not allowed on Tier1 argument buffers on iOS.");
  11131. }
  11132. // Array information is baked into these types.
  11133. string array_type;
  11134. if (physical_type.basetype != SPIRType::Image && physical_type.basetype != SPIRType::Sampler &&
  11135. physical_type.basetype != SPIRType::SampledImage)
  11136. {
  11137. BuiltIn builtin = BuiltInMax;
  11138. // Special handling. In [[stage_out]] or [[stage_in]] blocks,
  11139. // we need flat arrays, but if we're somehow declaring gl_PerVertex for constant array reasons, we want
  11140. // template array types to be declared.
  11141. bool is_ib_in_out =
  11142. ((stage_out_var_id && get_stage_out_struct_type().self == type.self &&
  11143. variable_storage_requires_stage_io(StorageClassOutput)) ||
  11144. (stage_in_var_id && get_stage_in_struct_type().self == type.self &&
  11145. variable_storage_requires_stage_io(StorageClassInput)));
  11146. if (is_ib_in_out && is_member_builtin(type, index, &builtin))
  11147. is_using_builtin_array = true;
  11148. array_type = type_to_array_glsl(physical_type, orig_id);
  11149. }
  11150. if (orig_id)
  11151. {
  11152. auto *data_type = declared_type;
  11153. if (is_pointer(*data_type))
  11154. data_type = &get_pointee_type(*data_type);
  11155. if (is_array(*data_type) && get_resource_array_size(*data_type, orig_id) == 0)
  11156. {
  11157. // Hack for declaring unsized array of resources. Need to declare dummy sized array by value inline.
  11158. // This can then be wrapped in spvDescriptorArray as usual.
  11159. array_type = "[1] /* unsized array hack */";
  11160. }
  11161. }
  11162. string decl_type;
  11163. if (declared_type->vecsize > 4)
  11164. {
  11165. auto orig_type = get<SPIRType>(orig_member_type_id);
  11166. if (is_matrix(orig_type) && row_major)
  11167. swap(orig_type.vecsize, orig_type.columns);
  11168. orig_type.columns = 1;
  11169. decl_type = type_to_glsl(orig_type, orig_id, true);
  11170. if (declared_type->columns > 1)
  11171. decl_type = join("spvPaddedStd140Matrix<", decl_type, ", ", declared_type->columns, ">");
  11172. else
  11173. decl_type = join("spvPaddedStd140<", decl_type, ">");
  11174. }
  11175. else
  11176. decl_type = type_to_glsl(*declared_type, orig_id, true);
  11177. const char *overlapping_binding_tag =
  11178. has_extended_member_decoration(type.self, index, SPIRVCrossDecorationOverlappingBinding) ?
  11179. "// Overlapping binding: " : "";
  11180. auto result = join(overlapping_binding_tag, pack_pfx, decl_type, " ", qualifier,
  11181. to_member_name(type, index), member_attribute_qualifier(type, index), array_type, ";");
  11182. is_using_builtin_array = false;
  11183. return result;
  11184. }
  11185. // Emit a structure member, padding and packing to maintain the correct memeber alignments.
  11186. void CompilerMSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
  11187. const string &qualifier, uint32_t)
  11188. {
  11189. // If this member requires padding to maintain its declared offset, emit a dummy padding member before it.
  11190. if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPaddingTarget))
  11191. {
  11192. uint32_t pad_len = get_extended_member_decoration(type.self, index, SPIRVCrossDecorationPaddingTarget);
  11193. statement("char _m", index, "_pad", "[", pad_len, "];");
  11194. }
  11195. // Handle HLSL-style 0-based vertex/instance index.
  11196. builtin_declaration = true;
  11197. statement(to_struct_member(type, member_type_id, index, qualifier));
  11198. builtin_declaration = false;
  11199. }
  11200. void CompilerMSL::emit_struct_padding_target(const SPIRType &type)
  11201. {
  11202. uint32_t struct_size = get_declared_struct_size_msl(type, true, true);
  11203. uint32_t target_size = get_extended_decoration(type.self, SPIRVCrossDecorationPaddingTarget);
  11204. if (target_size < struct_size)
  11205. SPIRV_CROSS_THROW("Cannot pad with negative bytes.");
  11206. else if (target_size > struct_size)
  11207. statement("char _m0_final_padding[", target_size - struct_size, "];");
  11208. }
  11209. // Return a MSL qualifier for the specified function attribute member
  11210. string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t index)
  11211. {
  11212. auto &execution = get_entry_point();
  11213. uint32_t mbr_type_id = type.member_types[index];
  11214. auto &mbr_type = get<SPIRType>(mbr_type_id);
  11215. BuiltIn builtin = BuiltInMax;
  11216. bool is_builtin = is_member_builtin(type, index, &builtin);
  11217. if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary))
  11218. {
  11219. string quals = join(
  11220. " [[id(", get_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary), ")");
  11221. if (interlocked_resources.count(
  11222. get_extended_member_decoration(type.self, index, SPIRVCrossDecorationInterfaceOrigID)))
  11223. quals += ", raster_order_group(0)";
  11224. quals += "]]";
  11225. return quals;
  11226. }
  11227. // Vertex function inputs
  11228. if (execution.model == ExecutionModelVertex && type.storage == StorageClassInput)
  11229. {
  11230. if (is_builtin)
  11231. {
  11232. switch (builtin)
  11233. {
  11234. case BuiltInVertexId:
  11235. case BuiltInVertexIndex:
  11236. case BuiltInBaseVertex:
  11237. case BuiltInInstanceId:
  11238. case BuiltInInstanceIndex:
  11239. case BuiltInBaseInstance:
  11240. if (msl_options.vertex_for_tessellation)
  11241. return "";
  11242. return string(" [[") + builtin_qualifier(builtin) + "]]";
  11243. case BuiltInDrawIndex:
  11244. SPIRV_CROSS_THROW("DrawIndex is not supported in MSL.");
  11245. default:
  11246. return "";
  11247. }
  11248. }
  11249. uint32_t locn;
  11250. if (is_builtin)
  11251. locn = get_or_allocate_builtin_input_member_location(builtin, type.self, index);
  11252. else
  11253. locn = get_member_location(type.self, index);
  11254. if (locn != k_unknown_location)
  11255. return string(" [[attribute(") + convert_to_string(locn) + ")]]";
  11256. }
  11257. // Vertex and tessellation evaluation function outputs
  11258. if (((execution.model == ExecutionModelVertex && !msl_options.vertex_for_tessellation) || is_tese_shader()) &&
  11259. type.storage == StorageClassOutput)
  11260. {
  11261. if (is_builtin)
  11262. {
  11263. switch (builtin)
  11264. {
  11265. case BuiltInPointSize:
  11266. // Only mark the PointSize builtin if really rendering points.
  11267. // Some shaders may include a PointSize builtin even when used to render
  11268. // non-point topologies, and Metal will reject this builtin when compiling
  11269. // the shader into a render pipeline that uses a non-point topology.
  11270. return msl_options.enable_point_size_builtin ? (string(" [[") + builtin_qualifier(builtin) + "]]") : "";
  11271. case BuiltInViewportIndex:
  11272. if (!msl_options.supports_msl_version(2, 0))
  11273. SPIRV_CROSS_THROW("ViewportIndex requires Metal 2.0.");
  11274. /* fallthrough */
  11275. case BuiltInPosition:
  11276. case BuiltInLayer:
  11277. return string(" [[") + builtin_qualifier(builtin) + "]]" + (mbr_type.array.empty() ? "" : " ");
  11278. case BuiltInClipDistance:
  11279. if (has_member_decoration(type.self, index, DecorationIndex))
  11280. return join(" [[user(clip", get_member_decoration(type.self, index, DecorationIndex), ")]]");
  11281. else
  11282. return string(" [[") + builtin_qualifier(builtin) + "]]" + (mbr_type.array.empty() ? "" : " ");
  11283. case BuiltInCullDistance:
  11284. if (has_member_decoration(type.self, index, DecorationIndex))
  11285. return join(" [[user(cull", get_member_decoration(type.self, index, DecorationIndex), ")]]");
  11286. else
  11287. return string(" [[") + builtin_qualifier(builtin) + "]]" + (mbr_type.array.empty() ? "" : " ");
  11288. default:
  11289. return "";
  11290. }
  11291. }
  11292. string loc_qual = member_location_attribute_qualifier(type, index);
  11293. if (!loc_qual.empty())
  11294. return join(" [[", loc_qual, "]]");
  11295. }
  11296. if (execution.model == ExecutionModelVertex && msl_options.vertex_for_tessellation && type.storage == StorageClassOutput)
  11297. {
  11298. // For this type of shader, we always arrange for it to capture its
  11299. // output to a buffer. For this reason, qualifiers are irrelevant here.
  11300. if (is_builtin)
  11301. // We still have to assign a location so the output struct will sort correctly.
  11302. get_or_allocate_builtin_output_member_location(builtin, type.self, index);
  11303. return "";
  11304. }
  11305. // Tessellation control function inputs
  11306. if (is_tesc_shader() && type.storage == StorageClassInput)
  11307. {
  11308. if (is_builtin)
  11309. {
  11310. switch (builtin)
  11311. {
  11312. case BuiltInInvocationId:
  11313. case BuiltInPrimitiveId:
  11314. if (msl_options.multi_patch_workgroup)
  11315. return "";
  11316. return string(" [[") + builtin_qualifier(builtin) + "]]" + (mbr_type.array.empty() ? "" : " ");
  11317. case BuiltInSubgroupLocalInvocationId: // FIXME: Should work in any stage
  11318. case BuiltInSubgroupSize: // FIXME: Should work in any stage
  11319. if (msl_options.emulate_subgroups)
  11320. return "";
  11321. return string(" [[") + builtin_qualifier(builtin) + "]]" + (mbr_type.array.empty() ? "" : " ");
  11322. case BuiltInPatchVertices:
  11323. return "";
  11324. // Others come from stage input.
  11325. default:
  11326. break;
  11327. }
  11328. }
  11329. if (msl_options.multi_patch_workgroup)
  11330. return "";
  11331. uint32_t locn;
  11332. if (is_builtin)
  11333. locn = get_or_allocate_builtin_input_member_location(builtin, type.self, index);
  11334. else
  11335. locn = get_member_location(type.self, index);
  11336. if (locn != k_unknown_location)
  11337. return string(" [[attribute(") + convert_to_string(locn) + ")]]";
  11338. }
  11339. // Tessellation control function outputs
  11340. if (is_tesc_shader() && type.storage == StorageClassOutput)
  11341. {
  11342. // For this type of shader, we always arrange for it to capture its
  11343. // output to a buffer. For this reason, qualifiers are irrelevant here.
  11344. if (is_builtin)
  11345. // We still have to assign a location so the output struct will sort correctly.
  11346. get_or_allocate_builtin_output_member_location(builtin, type.self, index);
  11347. return "";
  11348. }
  11349. // Tessellation evaluation function inputs
  11350. if (is_tese_shader() && type.storage == StorageClassInput)
  11351. {
  11352. if (is_builtin)
  11353. {
  11354. switch (builtin)
  11355. {
  11356. case BuiltInPrimitiveId:
  11357. case BuiltInTessCoord:
  11358. return string(" [[") + builtin_qualifier(builtin) + "]]";
  11359. case BuiltInPatchVertices:
  11360. return "";
  11361. // Others come from stage input.
  11362. default:
  11363. break;
  11364. }
  11365. }
  11366. if (msl_options.raw_buffer_tese_input)
  11367. return "";
  11368. // The special control point array must not be marked with an attribute.
  11369. if (get_type(type.member_types[index]).basetype == SPIRType::ControlPointArray)
  11370. return "";
  11371. uint32_t locn;
  11372. if (is_builtin)
  11373. locn = get_or_allocate_builtin_input_member_location(builtin, type.self, index);
  11374. else
  11375. locn = get_member_location(type.self, index);
  11376. if (locn != k_unknown_location)
  11377. return string(" [[attribute(") + convert_to_string(locn) + ")]]";
  11378. }
  11379. // Tessellation evaluation function outputs were handled above.
  11380. // Fragment function inputs
  11381. if (execution.model == ExecutionModelFragment && type.storage == StorageClassInput)
  11382. {
  11383. string quals;
  11384. if (is_builtin)
  11385. {
  11386. switch (builtin)
  11387. {
  11388. case BuiltInViewIndex:
  11389. if (!msl_options.multiview || !msl_options.multiview_layered_rendering)
  11390. break;
  11391. /* fallthrough */
  11392. case BuiltInFrontFacing:
  11393. case BuiltInPointCoord:
  11394. case BuiltInFragCoord:
  11395. case BuiltInSampleId:
  11396. case BuiltInSampleMask:
  11397. case BuiltInLayer:
  11398. case BuiltInBaryCoordKHR:
  11399. case BuiltInBaryCoordNoPerspKHR:
  11400. quals = builtin_qualifier(builtin);
  11401. break;
  11402. case BuiltInClipDistance:
  11403. return join(" [[user(clip", get_member_decoration(type.self, index, DecorationIndex), ")]]");
  11404. case BuiltInCullDistance:
  11405. return join(" [[user(cull", get_member_decoration(type.self, index, DecorationIndex), ")]]");
  11406. default:
  11407. break;
  11408. }
  11409. }
  11410. else
  11411. quals = member_location_attribute_qualifier(type, index);
  11412. if (builtin == BuiltInBaryCoordKHR || builtin == BuiltInBaryCoordNoPerspKHR)
  11413. {
  11414. if (has_member_decoration(type.self, index, DecorationFlat) ||
  11415. has_member_decoration(type.self, index, DecorationCentroid) ||
  11416. has_member_decoration(type.self, index, DecorationSample) ||
  11417. has_member_decoration(type.self, index, DecorationNoPerspective))
  11418. {
  11419. // NoPerspective is baked into the builtin type.
  11420. SPIRV_CROSS_THROW(
  11421. "Flat, Centroid, Sample, NoPerspective decorations are not supported for BaryCoord inputs.");
  11422. }
  11423. }
  11424. // Don't bother decorating integers with the 'flat' attribute; it's
  11425. // the default (in fact, the only option). Also don't bother with the
  11426. // FragCoord builtin; it's always noperspective on Metal.
  11427. if (!type_is_integral(mbr_type) && (!is_builtin || builtin != BuiltInFragCoord))
  11428. {
  11429. if (has_member_decoration(type.self, index, DecorationFlat))
  11430. {
  11431. if (!quals.empty())
  11432. quals += ", ";
  11433. quals += "flat";
  11434. }
  11435. else if (has_member_decoration(type.self, index, DecorationCentroid))
  11436. {
  11437. if (!quals.empty())
  11438. quals += ", ";
  11439. if (has_member_decoration(type.self, index, DecorationNoPerspective))
  11440. quals += "centroid_no_perspective";
  11441. else
  11442. quals += "centroid_perspective";
  11443. }
  11444. else if (has_member_decoration(type.self, index, DecorationSample))
  11445. {
  11446. if (!quals.empty())
  11447. quals += ", ";
  11448. if (has_member_decoration(type.self, index, DecorationNoPerspective))
  11449. quals += "sample_no_perspective";
  11450. else
  11451. quals += "sample_perspective";
  11452. }
  11453. else if (has_member_decoration(type.self, index, DecorationNoPerspective))
  11454. {
  11455. if (!quals.empty())
  11456. quals += ", ";
  11457. quals += "center_no_perspective";
  11458. }
  11459. }
  11460. if (!quals.empty())
  11461. return " [[" + quals + "]]";
  11462. }
  11463. // Fragment function outputs
  11464. if (execution.model == ExecutionModelFragment && type.storage == StorageClassOutput)
  11465. {
  11466. if (is_builtin)
  11467. {
  11468. switch (builtin)
  11469. {
  11470. case BuiltInFragStencilRefEXT:
  11471. // Similar to PointSize, only mark FragStencilRef if there's a stencil buffer.
  11472. // Some shaders may include a FragStencilRef builtin even when used to render
  11473. // without a stencil attachment, and Metal will reject this builtin
  11474. // when compiling the shader into a render pipeline that does not set
  11475. // stencilAttachmentPixelFormat.
  11476. if (!msl_options.enable_frag_stencil_ref_builtin)
  11477. return "";
  11478. if (!msl_options.supports_msl_version(2, 1))
  11479. SPIRV_CROSS_THROW("Stencil export only supported in MSL 2.1 and up.");
  11480. return string(" [[") + builtin_qualifier(builtin) + "]]";
  11481. case BuiltInFragDepth:
  11482. // Ditto FragDepth.
  11483. if (!msl_options.enable_frag_depth_builtin)
  11484. return "";
  11485. /* fallthrough */
  11486. case BuiltInSampleMask:
  11487. return string(" [[") + builtin_qualifier(builtin) + "]]";
  11488. default:
  11489. return "";
  11490. }
  11491. }
  11492. uint32_t locn = get_member_location(type.self, index);
  11493. // Metal will likely complain about missing color attachments, too.
  11494. if (locn != k_unknown_location && !(msl_options.enable_frag_output_mask & (1 << locn)))
  11495. return "";
  11496. if (locn != k_unknown_location && has_member_decoration(type.self, index, DecorationIndex))
  11497. return join(" [[color(", locn, "), index(", get_member_decoration(type.self, index, DecorationIndex),
  11498. ")]]");
  11499. else if (locn != k_unknown_location)
  11500. return join(" [[color(", locn, ")]]");
  11501. else if (has_member_decoration(type.self, index, DecorationIndex))
  11502. return join(" [[index(", get_member_decoration(type.self, index, DecorationIndex), ")]]");
  11503. else
  11504. return "";
  11505. }
  11506. // Compute function inputs
  11507. if (execution.model == ExecutionModelGLCompute && type.storage == StorageClassInput)
  11508. {
  11509. if (is_builtin)
  11510. {
  11511. switch (builtin)
  11512. {
  11513. case BuiltInNumSubgroups:
  11514. case BuiltInSubgroupId:
  11515. case BuiltInSubgroupLocalInvocationId: // FIXME: Should work in any stage
  11516. case BuiltInSubgroupSize: // FIXME: Should work in any stage
  11517. if (msl_options.emulate_subgroups)
  11518. break;
  11519. /* fallthrough */
  11520. case BuiltInGlobalInvocationId:
  11521. case BuiltInWorkgroupId:
  11522. case BuiltInNumWorkgroups:
  11523. case BuiltInLocalInvocationId:
  11524. case BuiltInLocalInvocationIndex:
  11525. return string(" [[") + builtin_qualifier(builtin) + "]]";
  11526. default:
  11527. return "";
  11528. }
  11529. }
  11530. }
  11531. return "";
  11532. }
  11533. // A user-defined output variable is considered to match an input variable in the subsequent
  11534. // stage if the two variables are declared with the same Location and Component decoration and
  11535. // match in type and decoration, except that interpolation decorations are not required to match.
  11536. // For the purposes of interface matching, variables declared without a Component decoration are
  11537. // considered to have a Component decoration of zero.
  11538. string CompilerMSL::member_location_attribute_qualifier(const SPIRType &type, uint32_t index)
  11539. {
  11540. string quals;
  11541. uint32_t comp;
  11542. uint32_t locn = get_member_location(type.self, index, &comp);
  11543. if (locn != k_unknown_location)
  11544. {
  11545. quals += "user(locn";
  11546. quals += convert_to_string(locn);
  11547. if (comp != k_unknown_component && comp != 0)
  11548. {
  11549. quals += "_";
  11550. quals += convert_to_string(comp);
  11551. }
  11552. quals += ")";
  11553. }
  11554. return quals;
  11555. }
  11556. // Returns the location decoration of the member with the specified index in the specified type.
  11557. // If the location of the member has been explicitly set, that location is used. If not, this
  11558. // function assumes the members are ordered in their location order, and simply returns the
  11559. // index as the location.
  11560. uint32_t CompilerMSL::get_member_location(uint32_t type_id, uint32_t index, uint32_t *comp) const
  11561. {
  11562. if (comp)
  11563. {
  11564. if (has_member_decoration(type_id, index, DecorationComponent))
  11565. *comp = get_member_decoration(type_id, index, DecorationComponent);
  11566. else
  11567. *comp = k_unknown_component;
  11568. }
  11569. if (has_member_decoration(type_id, index, DecorationLocation))
  11570. return get_member_decoration(type_id, index, DecorationLocation);
  11571. else
  11572. return k_unknown_location;
  11573. }
  11574. uint32_t CompilerMSL::get_or_allocate_builtin_input_member_location(spv::BuiltIn builtin,
  11575. uint32_t type_id, uint32_t index,
  11576. uint32_t *comp)
  11577. {
  11578. uint32_t loc = get_member_location(type_id, index, comp);
  11579. if (loc != k_unknown_location)
  11580. return loc;
  11581. if (comp)
  11582. *comp = k_unknown_component;
  11583. // Late allocation. Find a location which is unused by the application.
  11584. // This can happen for built-in inputs in tessellation which are mixed and matched with user inputs.
  11585. auto &mbr_type = get<SPIRType>(get<SPIRType>(type_id).member_types[index]);
  11586. uint32_t count = type_to_location_count(mbr_type);
  11587. loc = 0;
  11588. const auto location_range_in_use = [this](uint32_t location, uint32_t location_count) -> bool {
  11589. for (uint32_t i = 0; i < location_count; i++)
  11590. if (location_inputs_in_use.count(location + i) != 0)
  11591. return true;
  11592. return false;
  11593. };
  11594. while (location_range_in_use(loc, count))
  11595. loc++;
  11596. set_member_decoration(type_id, index, DecorationLocation, loc);
  11597. // Triangle tess level inputs are shared in one packed float4,
  11598. // mark both builtins as sharing one location.
  11599. if (!msl_options.raw_buffer_tese_input && is_tessellating_triangles() &&
  11600. (builtin == BuiltInTessLevelInner || builtin == BuiltInTessLevelOuter))
  11601. {
  11602. builtin_to_automatic_input_location[BuiltInTessLevelInner] = loc;
  11603. builtin_to_automatic_input_location[BuiltInTessLevelOuter] = loc;
  11604. }
  11605. else
  11606. builtin_to_automatic_input_location[builtin] = loc;
  11607. mark_location_as_used_by_shader(loc, mbr_type, StorageClassInput, true);
  11608. return loc;
  11609. }
  11610. uint32_t CompilerMSL::get_or_allocate_builtin_output_member_location(spv::BuiltIn builtin,
  11611. uint32_t type_id, uint32_t index,
  11612. uint32_t *comp)
  11613. {
  11614. uint32_t loc = get_member_location(type_id, index, comp);
  11615. if (loc != k_unknown_location)
  11616. return loc;
  11617. loc = 0;
  11618. if (comp)
  11619. *comp = k_unknown_component;
  11620. // Late allocation. Find a location which is unused by the application.
  11621. // This can happen for built-in outputs in tessellation which are mixed and matched with user inputs.
  11622. auto &mbr_type = get<SPIRType>(get<SPIRType>(type_id).member_types[index]);
  11623. uint32_t count = type_to_location_count(mbr_type);
  11624. const auto location_range_in_use = [this](uint32_t location, uint32_t location_count) -> bool {
  11625. for (uint32_t i = 0; i < location_count; i++)
  11626. if (location_outputs_in_use.count(location + i) != 0)
  11627. return true;
  11628. return false;
  11629. };
  11630. while (location_range_in_use(loc, count))
  11631. loc++;
  11632. set_member_decoration(type_id, index, DecorationLocation, loc);
  11633. // Triangle tess level inputs are shared in one packed float4;
  11634. // mark both builtins as sharing one location.
  11635. if (is_tessellating_triangles() && (builtin == BuiltInTessLevelInner || builtin == BuiltInTessLevelOuter))
  11636. {
  11637. builtin_to_automatic_output_location[BuiltInTessLevelInner] = loc;
  11638. builtin_to_automatic_output_location[BuiltInTessLevelOuter] = loc;
  11639. }
  11640. else
  11641. builtin_to_automatic_output_location[builtin] = loc;
  11642. mark_location_as_used_by_shader(loc, mbr_type, StorageClassOutput, true);
  11643. return loc;
  11644. }
  11645. // Returns the type declaration for a function, including the
  11646. // entry type if the current function is the entry point function
  11647. string CompilerMSL::func_type_decl(SPIRType &type)
  11648. {
  11649. // The regular function return type. If not processing the entry point function, that's all we need
  11650. string return_type = type_to_glsl(type) + type_to_array_glsl(type, 0);
  11651. if (!processing_entry_point)
  11652. return return_type;
  11653. // If an outgoing interface block has been defined, and it should be returned, override the entry point return type
  11654. bool ep_should_return_output = !get_is_rasterization_disabled();
  11655. if (stage_out_var_id && ep_should_return_output)
  11656. return_type = type_to_glsl(get_stage_out_struct_type()) + type_to_array_glsl(type, 0);
  11657. // Prepend a entry type, based on the execution model
  11658. string entry_type;
  11659. auto &execution = get_entry_point();
  11660. switch (execution.model)
  11661. {
  11662. case ExecutionModelVertex:
  11663. if (msl_options.vertex_for_tessellation && !msl_options.supports_msl_version(1, 2))
  11664. SPIRV_CROSS_THROW("Tessellation requires Metal 1.2.");
  11665. entry_type = msl_options.vertex_for_tessellation ? "kernel" : "vertex";
  11666. break;
  11667. case ExecutionModelTessellationEvaluation:
  11668. if (!msl_options.supports_msl_version(1, 2))
  11669. SPIRV_CROSS_THROW("Tessellation requires Metal 1.2.");
  11670. if (execution.flags.get(ExecutionModeIsolines))
  11671. SPIRV_CROSS_THROW("Metal does not support isoline tessellation.");
  11672. if (msl_options.is_ios())
  11673. entry_type = join("[[ patch(", is_tessellating_triangles() ? "triangle" : "quad", ") ]] vertex");
  11674. else
  11675. entry_type = join("[[ patch(", is_tessellating_triangles() ? "triangle" : "quad", ", ",
  11676. execution.output_vertices, ") ]] vertex");
  11677. break;
  11678. case ExecutionModelFragment:
  11679. entry_type = uses_explicit_early_fragment_test() ? "[[ early_fragment_tests ]] fragment" : "fragment";
  11680. break;
  11681. case ExecutionModelTessellationControl:
  11682. if (!msl_options.supports_msl_version(1, 2))
  11683. SPIRV_CROSS_THROW("Tessellation requires Metal 1.2.");
  11684. if (execution.flags.get(ExecutionModeIsolines))
  11685. SPIRV_CROSS_THROW("Metal does not support isoline tessellation.");
  11686. /* fallthrough */
  11687. case ExecutionModelGLCompute:
  11688. case ExecutionModelKernel:
  11689. entry_type = "kernel";
  11690. break;
  11691. default:
  11692. entry_type = "unknown";
  11693. break;
  11694. }
  11695. return entry_type + " " + return_type;
  11696. }
  11697. bool CompilerMSL::is_tesc_shader() const
  11698. {
  11699. return get_execution_model() == ExecutionModelTessellationControl;
  11700. }
  11701. bool CompilerMSL::is_tese_shader() const
  11702. {
  11703. return get_execution_model() == ExecutionModelTessellationEvaluation;
  11704. }
  11705. bool CompilerMSL::uses_explicit_early_fragment_test()
  11706. {
  11707. auto &ep_flags = get_entry_point().flags;
  11708. return ep_flags.get(ExecutionModeEarlyFragmentTests) || ep_flags.get(ExecutionModePostDepthCoverage);
  11709. }
  11710. // In MSL, address space qualifiers are required for all pointer or reference variables
  11711. string CompilerMSL::get_argument_address_space(const SPIRVariable &argument)
  11712. {
  11713. const auto &type = get<SPIRType>(argument.basetype);
  11714. return get_type_address_space(type, argument.self, true);
  11715. }
  11716. bool CompilerMSL::decoration_flags_signal_volatile(const Bitset &flags)
  11717. {
  11718. return flags.get(DecorationVolatile) || flags.get(DecorationCoherent);
  11719. }
  11720. string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id, bool argument)
  11721. {
  11722. // This can be called for variable pointer contexts as well, so be very careful about which method we choose.
  11723. Bitset flags;
  11724. auto *var = maybe_get<SPIRVariable>(id);
  11725. if (var && type.basetype == SPIRType::Struct &&
  11726. (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock)))
  11727. flags = get_buffer_block_flags(id);
  11728. else
  11729. flags = get_decoration_bitset(id);
  11730. const char *addr_space = nullptr;
  11731. switch (type.storage)
  11732. {
  11733. case StorageClassWorkgroup:
  11734. addr_space = "threadgroup";
  11735. break;
  11736. case StorageClassStorageBuffer:
  11737. case StorageClassPhysicalStorageBuffer:
  11738. {
  11739. // For arguments from variable pointers, we use the write count deduction, so
  11740. // we should not assume any constness here. Only for global SSBOs.
  11741. bool readonly = false;
  11742. if (!var || has_decoration(type.self, DecorationBlock))
  11743. readonly = flags.get(DecorationNonWritable);
  11744. addr_space = readonly ? "const device" : "device";
  11745. break;
  11746. }
  11747. case StorageClassUniform:
  11748. case StorageClassUniformConstant:
  11749. case StorageClassPushConstant:
  11750. if (type.basetype == SPIRType::Struct)
  11751. {
  11752. bool ssbo = has_decoration(type.self, DecorationBufferBlock);
  11753. if (ssbo)
  11754. addr_space = flags.get(DecorationNonWritable) ? "const device" : "device";
  11755. else
  11756. addr_space = "constant";
  11757. }
  11758. else if (!argument)
  11759. {
  11760. addr_space = "constant";
  11761. }
  11762. else if (type_is_msl_framebuffer_fetch(type))
  11763. {
  11764. // Subpass inputs are passed around by value.
  11765. addr_space = "";
  11766. }
  11767. break;
  11768. case StorageClassFunction:
  11769. case StorageClassGeneric:
  11770. break;
  11771. case StorageClassInput:
  11772. if (is_tesc_shader() && var && var->basevariable == stage_in_ptr_var_id)
  11773. addr_space = msl_options.multi_patch_workgroup ? "const device" : "threadgroup";
  11774. // Don't pass tessellation levels in the device AS; we load and convert them
  11775. // to float manually.
  11776. if (is_tese_shader() && msl_options.raw_buffer_tese_input && var)
  11777. {
  11778. bool is_stage_in = var->basevariable == stage_in_ptr_var_id;
  11779. bool is_patch_stage_in = has_decoration(var->self, DecorationPatch);
  11780. bool is_builtin = has_decoration(var->self, DecorationBuiltIn);
  11781. BuiltIn builtin = (BuiltIn)get_decoration(var->self, DecorationBuiltIn);
  11782. bool is_tess_level = is_builtin && (builtin == BuiltInTessLevelOuter || builtin == BuiltInTessLevelInner);
  11783. if (is_stage_in || (is_patch_stage_in && !is_tess_level))
  11784. addr_space = "const device";
  11785. }
  11786. if (get_execution_model() == ExecutionModelFragment && var && var->basevariable == stage_in_var_id)
  11787. addr_space = "thread";
  11788. break;
  11789. case StorageClassOutput:
  11790. if (capture_output_to_buffer)
  11791. {
  11792. if (var && type.storage == StorageClassOutput)
  11793. {
  11794. bool is_masked = is_stage_output_variable_masked(*var);
  11795. if (is_masked)
  11796. {
  11797. if (is_tessellation_shader())
  11798. addr_space = "threadgroup";
  11799. else
  11800. addr_space = "thread";
  11801. }
  11802. else if (variable_decl_is_remapped_storage(*var, StorageClassWorkgroup))
  11803. addr_space = "threadgroup";
  11804. }
  11805. if (!addr_space)
  11806. addr_space = "device";
  11807. }
  11808. break;
  11809. default:
  11810. break;
  11811. }
  11812. if (!addr_space)
  11813. {
  11814. // No address space for plain values.
  11815. addr_space = type.pointer || (argument && type.basetype == SPIRType::ControlPointArray) ? "thread" : "";
  11816. }
  11817. return join(decoration_flags_signal_volatile(flags) ? "volatile " : "", addr_space);
  11818. }
  11819. const char *CompilerMSL::to_restrict(uint32_t id, bool space)
  11820. {
  11821. // This can be called for variable pointer contexts as well, so be very careful about which method we choose.
  11822. Bitset flags;
  11823. if (ir.ids[id].get_type() == TypeVariable)
  11824. {
  11825. uint32_t type_id = expression_type_id(id);
  11826. auto &type = expression_type(id);
  11827. if (type.basetype == SPIRType::Struct &&
  11828. (has_decoration(type_id, DecorationBlock) || has_decoration(type_id, DecorationBufferBlock)))
  11829. flags = get_buffer_block_flags(id);
  11830. else
  11831. flags = get_decoration_bitset(id);
  11832. }
  11833. else
  11834. flags = get_decoration_bitset(id);
  11835. return flags.get(DecorationRestrict) || flags.get(DecorationRestrictPointerEXT) ?
  11836. (space ? "__restrict " : "__restrict") : "";
  11837. }
  11838. string CompilerMSL::entry_point_arg_stage_in()
  11839. {
  11840. string decl;
  11841. if ((is_tesc_shader() && msl_options.multi_patch_workgroup) ||
  11842. (is_tese_shader() && msl_options.raw_buffer_tese_input))
  11843. return decl;
  11844. // Stage-in structure
  11845. uint32_t stage_in_id;
  11846. if (is_tese_shader())
  11847. stage_in_id = patch_stage_in_var_id;
  11848. else
  11849. stage_in_id = stage_in_var_id;
  11850. if (stage_in_id)
  11851. {
  11852. auto &var = get<SPIRVariable>(stage_in_id);
  11853. auto &type = get_variable_data_type(var);
  11854. add_resource_name(var.self);
  11855. decl = join(type_to_glsl(type), " ", to_name(var.self), " [[stage_in]]");
  11856. }
  11857. return decl;
  11858. }
  11859. // Returns true if this input builtin should be a direct parameter on a shader function parameter list,
  11860. // and false for builtins that should be passed or calculated some other way.
  11861. bool CompilerMSL::is_direct_input_builtin(BuiltIn bi_type)
  11862. {
  11863. switch (bi_type)
  11864. {
  11865. // Vertex function in
  11866. case BuiltInVertexId:
  11867. case BuiltInVertexIndex:
  11868. case BuiltInBaseVertex:
  11869. case BuiltInInstanceId:
  11870. case BuiltInInstanceIndex:
  11871. case BuiltInBaseInstance:
  11872. return get_execution_model() != ExecutionModelVertex || !msl_options.vertex_for_tessellation;
  11873. // Tess. control function in
  11874. case BuiltInPosition:
  11875. case BuiltInPointSize:
  11876. case BuiltInClipDistance:
  11877. case BuiltInCullDistance:
  11878. case BuiltInPatchVertices:
  11879. return false;
  11880. case BuiltInInvocationId:
  11881. case BuiltInPrimitiveId:
  11882. return !is_tesc_shader() || !msl_options.multi_patch_workgroup;
  11883. // Tess. evaluation function in
  11884. case BuiltInTessLevelInner:
  11885. case BuiltInTessLevelOuter:
  11886. return false;
  11887. // Fragment function in
  11888. case BuiltInSamplePosition:
  11889. case BuiltInHelperInvocation:
  11890. case BuiltInBaryCoordKHR:
  11891. case BuiltInBaryCoordNoPerspKHR:
  11892. return false;
  11893. case BuiltInViewIndex:
  11894. return get_execution_model() == ExecutionModelFragment && msl_options.multiview &&
  11895. msl_options.multiview_layered_rendering;
  11896. // Compute function in
  11897. case BuiltInSubgroupId:
  11898. case BuiltInNumSubgroups:
  11899. return !msl_options.emulate_subgroups;
  11900. // Any stage function in
  11901. case BuiltInDeviceIndex:
  11902. case BuiltInSubgroupEqMask:
  11903. case BuiltInSubgroupGeMask:
  11904. case BuiltInSubgroupGtMask:
  11905. case BuiltInSubgroupLeMask:
  11906. case BuiltInSubgroupLtMask:
  11907. return false;
  11908. case BuiltInSubgroupSize:
  11909. if (msl_options.fixed_subgroup_size != 0)
  11910. return false;
  11911. /* fallthrough */
  11912. case BuiltInSubgroupLocalInvocationId:
  11913. return !msl_options.emulate_subgroups;
  11914. default:
  11915. return true;
  11916. }
  11917. }
  11918. // Returns true if this is a fragment shader that runs per sample, and false otherwise.
  11919. bool CompilerMSL::is_sample_rate() const
  11920. {
  11921. auto &caps = get_declared_capabilities();
  11922. return get_execution_model() == ExecutionModelFragment &&
  11923. (msl_options.force_sample_rate_shading ||
  11924. std::find(caps.begin(), caps.end(), CapabilitySampleRateShading) != caps.end() ||
  11925. (msl_options.use_framebuffer_fetch_subpasses && need_subpass_input_ms));
  11926. }
  11927. bool CompilerMSL::is_intersection_query() const
  11928. {
  11929. auto &caps = get_declared_capabilities();
  11930. return std::find(caps.begin(), caps.end(), CapabilityRayQueryKHR) != caps.end();
  11931. }
  11932. void CompilerMSL::entry_point_args_builtin(string &ep_args)
  11933. {
  11934. // Builtin variables
  11935. SmallVector<pair<SPIRVariable *, BuiltIn>, 8> active_builtins;
  11936. ir.for_each_typed_id<SPIRVariable>([&](uint32_t var_id, SPIRVariable &var) {
  11937. if (var.storage != StorageClassInput)
  11938. return;
  11939. auto bi_type = BuiltIn(get_decoration(var_id, DecorationBuiltIn));
  11940. // Don't emit SamplePosition as a separate parameter. In the entry
  11941. // point, we get that by calling get_sample_position() on the sample ID.
  11942. if (is_builtin_variable(var) &&
  11943. get_variable_data_type(var).basetype != SPIRType::Struct &&
  11944. get_variable_data_type(var).basetype != SPIRType::ControlPointArray)
  11945. {
  11946. // If the builtin is not part of the active input builtin set, don't emit it.
  11947. // Relevant for multiple entry-point modules which might declare unused builtins.
  11948. if (!active_input_builtins.get(bi_type) || !interface_variable_exists_in_entry_point(var_id))
  11949. return;
  11950. // Remember this variable. We may need to correct its type.
  11951. active_builtins.push_back(make_pair(&var, bi_type));
  11952. if (is_direct_input_builtin(bi_type))
  11953. {
  11954. if (!ep_args.empty())
  11955. ep_args += ", ";
  11956. // Handle HLSL-style 0-based vertex/instance index.
  11957. builtin_declaration = true;
  11958. // Handle different MSL gl_TessCoord types. (float2, float3)
  11959. if (bi_type == BuiltInTessCoord && get_entry_point().flags.get(ExecutionModeQuads))
  11960. ep_args += "float2 " + to_expression(var_id) + "In";
  11961. else
  11962. ep_args += builtin_type_decl(bi_type, var_id) + " " + to_expression(var_id);
  11963. ep_args += string(" [[") + builtin_qualifier(bi_type);
  11964. if (bi_type == BuiltInSampleMask && get_entry_point().flags.get(ExecutionModePostDepthCoverage))
  11965. {
  11966. if (!msl_options.supports_msl_version(2))
  11967. SPIRV_CROSS_THROW("Post-depth coverage requires MSL 2.0.");
  11968. if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 3))
  11969. SPIRV_CROSS_THROW("Post-depth coverage on Mac requires MSL 2.3.");
  11970. ep_args += ", post_depth_coverage";
  11971. }
  11972. ep_args += "]]";
  11973. builtin_declaration = false;
  11974. }
  11975. }
  11976. if (has_extended_decoration(var_id, SPIRVCrossDecorationBuiltInDispatchBase))
  11977. {
  11978. // This is a special implicit builtin, not corresponding to any SPIR-V builtin,
  11979. // which holds the base that was passed to vkCmdDispatchBase() or vkCmdDrawIndexed(). If it's present,
  11980. // assume we emitted it for a good reason.
  11981. assert(msl_options.supports_msl_version(1, 2));
  11982. if (!ep_args.empty())
  11983. ep_args += ", ";
  11984. ep_args += type_to_glsl(get_variable_data_type(var)) + " " + to_expression(var_id) + " [[grid_origin]]";
  11985. }
  11986. if (has_extended_decoration(var_id, SPIRVCrossDecorationBuiltInStageInputSize))
  11987. {
  11988. // This is another special implicit builtin, not corresponding to any SPIR-V builtin,
  11989. // which holds the number of vertices and instances to draw. If it's present,
  11990. // assume we emitted it for a good reason.
  11991. assert(msl_options.supports_msl_version(1, 2));
  11992. if (!ep_args.empty())
  11993. ep_args += ", ";
  11994. ep_args += type_to_glsl(get_variable_data_type(var)) + " " + to_expression(var_id) + " [[grid_size]]";
  11995. }
  11996. });
  11997. // Correct the types of all encountered active builtins. We couldn't do this before
  11998. // because ensure_correct_builtin_type() may increase the bound, which isn't allowed
  11999. // while iterating over IDs.
  12000. for (auto &var : active_builtins)
  12001. var.first->basetype = ensure_correct_builtin_type(var.first->basetype, var.second);
  12002. // Handle HLSL-style 0-based vertex/instance index.
  12003. if (needs_base_vertex_arg == TriState::Yes)
  12004. ep_args += built_in_func_arg(BuiltInBaseVertex, !ep_args.empty());
  12005. if (needs_base_instance_arg == TriState::Yes)
  12006. ep_args += built_in_func_arg(BuiltInBaseInstance, !ep_args.empty());
  12007. if (capture_output_to_buffer)
  12008. {
  12009. // Add parameters to hold the indirect draw parameters and the shader output. This has to be handled
  12010. // specially because it needs to be a pointer, not a reference.
  12011. if (stage_out_var_id)
  12012. {
  12013. if (!ep_args.empty())
  12014. ep_args += ", ";
  12015. ep_args += join("device ", type_to_glsl(get_stage_out_struct_type()), "* ", output_buffer_var_name,
  12016. " [[buffer(", msl_options.shader_output_buffer_index, ")]]");
  12017. }
  12018. if (is_tesc_shader())
  12019. {
  12020. if (!ep_args.empty())
  12021. ep_args += ", ";
  12022. ep_args +=
  12023. join("constant uint* spvIndirectParams [[buffer(", msl_options.indirect_params_buffer_index, ")]]");
  12024. }
  12025. else if (stage_out_var_id &&
  12026. !(get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation))
  12027. {
  12028. if (!ep_args.empty())
  12029. ep_args += ", ";
  12030. ep_args +=
  12031. join("device uint* spvIndirectParams [[buffer(", msl_options.indirect_params_buffer_index, ")]]");
  12032. }
  12033. if (get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation &&
  12034. (active_input_builtins.get(BuiltInVertexIndex) || active_input_builtins.get(BuiltInVertexId)) &&
  12035. msl_options.vertex_index_type != Options::IndexType::None)
  12036. {
  12037. // Add the index buffer so we can set gl_VertexIndex correctly.
  12038. if (!ep_args.empty())
  12039. ep_args += ", ";
  12040. switch (msl_options.vertex_index_type)
  12041. {
  12042. case Options::IndexType::None:
  12043. break;
  12044. case Options::IndexType::UInt16:
  12045. ep_args += join("const device ushort* ", index_buffer_var_name, " [[buffer(",
  12046. msl_options.shader_index_buffer_index, ")]]");
  12047. break;
  12048. case Options::IndexType::UInt32:
  12049. ep_args += join("const device uint* ", index_buffer_var_name, " [[buffer(",
  12050. msl_options.shader_index_buffer_index, ")]]");
  12051. break;
  12052. }
  12053. }
  12054. // Tessellation control shaders get three additional parameters:
  12055. // a buffer to hold the per-patch data, a buffer to hold the per-patch
  12056. // tessellation levels, and a block of workgroup memory to hold the
  12057. // input control point data.
  12058. if (is_tesc_shader())
  12059. {
  12060. if (patch_stage_out_var_id)
  12061. {
  12062. if (!ep_args.empty())
  12063. ep_args += ", ";
  12064. ep_args +=
  12065. join("device ", type_to_glsl(get_patch_stage_out_struct_type()), "* ", patch_output_buffer_var_name,
  12066. " [[buffer(", convert_to_string(msl_options.shader_patch_output_buffer_index), ")]]");
  12067. }
  12068. if (!ep_args.empty())
  12069. ep_args += ", ";
  12070. ep_args += join("device ", get_tess_factor_struct_name(), "* ", tess_factor_buffer_var_name, " [[buffer(",
  12071. convert_to_string(msl_options.shader_tess_factor_buffer_index), ")]]");
  12072. // Initializer for tess factors must be handled specially since it's never declared as a normal variable.
  12073. uint32_t outer_factor_initializer_id = 0;
  12074. uint32_t inner_factor_initializer_id = 0;
  12075. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  12076. if (!has_decoration(var.self, DecorationBuiltIn) || var.storage != StorageClassOutput || !var.initializer)
  12077. return;
  12078. BuiltIn builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
  12079. if (builtin == BuiltInTessLevelInner)
  12080. inner_factor_initializer_id = var.initializer;
  12081. else if (builtin == BuiltInTessLevelOuter)
  12082. outer_factor_initializer_id = var.initializer;
  12083. });
  12084. const SPIRConstant *c = nullptr;
  12085. if (outer_factor_initializer_id && (c = maybe_get<SPIRConstant>(outer_factor_initializer_id)))
  12086. {
  12087. auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
  12088. entry_func.fixup_hooks_in.push_back(
  12089. [=]()
  12090. {
  12091. uint32_t components = is_tessellating_triangles() ? 3 : 4;
  12092. for (uint32_t i = 0; i < components; i++)
  12093. {
  12094. statement(builtin_to_glsl(BuiltInTessLevelOuter, StorageClassOutput), "[", i,
  12095. "] = ", "half(", to_expression(c->subconstants[i]), ");");
  12096. }
  12097. });
  12098. }
  12099. if (inner_factor_initializer_id && (c = maybe_get<SPIRConstant>(inner_factor_initializer_id)))
  12100. {
  12101. auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
  12102. if (is_tessellating_triangles())
  12103. {
  12104. entry_func.fixup_hooks_in.push_back([=]() {
  12105. statement(builtin_to_glsl(BuiltInTessLevelInner, StorageClassOutput), " = ", "half(",
  12106. to_expression(c->subconstants[0]), ");");
  12107. });
  12108. }
  12109. else
  12110. {
  12111. entry_func.fixup_hooks_in.push_back([=]() {
  12112. for (uint32_t i = 0; i < 2; i++)
  12113. {
  12114. statement(builtin_to_glsl(BuiltInTessLevelInner, StorageClassOutput), "[", i, "] = ",
  12115. "half(", to_expression(c->subconstants[i]), ");");
  12116. }
  12117. });
  12118. }
  12119. }
  12120. if (stage_in_var_id)
  12121. {
  12122. if (!ep_args.empty())
  12123. ep_args += ", ";
  12124. if (msl_options.multi_patch_workgroup)
  12125. {
  12126. ep_args += join("device ", type_to_glsl(get_stage_in_struct_type()), "* ", input_buffer_var_name,
  12127. " [[buffer(", convert_to_string(msl_options.shader_input_buffer_index), ")]]");
  12128. }
  12129. else
  12130. {
  12131. ep_args += join("threadgroup ", type_to_glsl(get_stage_in_struct_type()), "* ", input_wg_var_name,
  12132. " [[threadgroup(", convert_to_string(msl_options.shader_input_wg_index), ")]]");
  12133. }
  12134. }
  12135. }
  12136. }
  12137. // Tessellation evaluation shaders get three additional parameters:
  12138. // a buffer for the per-patch data, a buffer for the per-patch
  12139. // tessellation levels, and a buffer for the control point data.
  12140. if (is_tese_shader() && msl_options.raw_buffer_tese_input)
  12141. {
  12142. if (patch_stage_in_var_id)
  12143. {
  12144. if (!ep_args.empty())
  12145. ep_args += ", ";
  12146. ep_args +=
  12147. join("const device ", type_to_glsl(get_patch_stage_in_struct_type()), "* ", patch_input_buffer_var_name,
  12148. " [[buffer(", convert_to_string(msl_options.shader_patch_input_buffer_index), ")]]");
  12149. }
  12150. if (tess_level_inner_var_id || tess_level_outer_var_id)
  12151. {
  12152. if (!ep_args.empty())
  12153. ep_args += ", ";
  12154. ep_args += join("const device ", get_tess_factor_struct_name(), "* ", tess_factor_buffer_var_name,
  12155. " [[buffer(", convert_to_string(msl_options.shader_tess_factor_buffer_index), ")]]");
  12156. }
  12157. if (stage_in_var_id)
  12158. {
  12159. if (!ep_args.empty())
  12160. ep_args += ", ";
  12161. ep_args += join("const device ", type_to_glsl(get_stage_in_struct_type()), "* ", input_buffer_var_name,
  12162. " [[buffer(", convert_to_string(msl_options.shader_input_buffer_index), ")]]");
  12163. }
  12164. }
  12165. }
  12166. string CompilerMSL::entry_point_args_argument_buffer(bool append_comma)
  12167. {
  12168. string ep_args = entry_point_arg_stage_in();
  12169. Bitset claimed_bindings;
  12170. for (uint32_t i = 0; i < kMaxArgumentBuffers; i++)
  12171. {
  12172. uint32_t id = argument_buffer_ids[i];
  12173. if (id == 0)
  12174. continue;
  12175. add_resource_name(id);
  12176. auto &var = get<SPIRVariable>(id);
  12177. auto &type = get_variable_data_type(var);
  12178. if (!ep_args.empty())
  12179. ep_args += ", ";
  12180. // Check if the argument buffer binding itself has been remapped.
  12181. uint32_t buffer_binding;
  12182. auto itr = resource_bindings.find({ get_entry_point().model, i, kArgumentBufferBinding });
  12183. if (itr != end(resource_bindings))
  12184. {
  12185. buffer_binding = itr->second.first.msl_buffer;
  12186. itr->second.second = true;
  12187. }
  12188. else
  12189. {
  12190. // As a fallback, directly map desc set <-> binding.
  12191. // If that was taken, take the next buffer binding.
  12192. if (claimed_bindings.get(i))
  12193. buffer_binding = next_metal_resource_index_buffer;
  12194. else
  12195. buffer_binding = i;
  12196. }
  12197. claimed_bindings.set(buffer_binding);
  12198. ep_args += get_argument_address_space(var) + " ";
  12199. if (recursive_inputs.count(type.self))
  12200. ep_args += string("void* ") + to_restrict(id, true) + to_name(id) + "_vp";
  12201. else
  12202. ep_args += type_to_glsl(type) + "& " + to_restrict(id, true) + to_name(id);
  12203. ep_args += " [[buffer(" + convert_to_string(buffer_binding) + ")]]";
  12204. next_metal_resource_index_buffer = max(next_metal_resource_index_buffer, buffer_binding + 1);
  12205. }
  12206. entry_point_args_discrete_descriptors(ep_args);
  12207. entry_point_args_builtin(ep_args);
  12208. if (!ep_args.empty() && append_comma)
  12209. ep_args += ", ";
  12210. return ep_args;
  12211. }
  12212. const MSLConstexprSampler *CompilerMSL::find_constexpr_sampler(uint32_t id) const
  12213. {
  12214. // Try by ID.
  12215. {
  12216. auto itr = constexpr_samplers_by_id.find(id);
  12217. if (itr != end(constexpr_samplers_by_id))
  12218. return &itr->second;
  12219. }
  12220. // Try by binding.
  12221. {
  12222. uint32_t desc_set = get_decoration(id, DecorationDescriptorSet);
  12223. uint32_t binding = get_decoration(id, DecorationBinding);
  12224. auto itr = constexpr_samplers_by_binding.find({ desc_set, binding });
  12225. if (itr != end(constexpr_samplers_by_binding))
  12226. return &itr->second;
  12227. }
  12228. return nullptr;
  12229. }
  12230. void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
  12231. {
  12232. // Output resources, sorted by resource index & type
  12233. // We need to sort to work around a bug on macOS 10.13 with NVidia drivers where switching between shaders
  12234. // with different order of buffers can result in issues with buffer assignments inside the driver.
  12235. struct Resource
  12236. {
  12237. SPIRVariable *var;
  12238. SPIRVariable *discrete_descriptor_alias;
  12239. string name;
  12240. SPIRType::BaseType basetype;
  12241. uint32_t index;
  12242. uint32_t plane;
  12243. uint32_t secondary_index;
  12244. };
  12245. SmallVector<Resource> resources;
  12246. entry_point_bindings.clear();
  12247. ir.for_each_typed_id<SPIRVariable>([&](uint32_t var_id, SPIRVariable &var) {
  12248. if ((var.storage == StorageClassUniform || var.storage == StorageClassUniformConstant ||
  12249. var.storage == StorageClassPushConstant || var.storage == StorageClassStorageBuffer) &&
  12250. !is_hidden_variable(var))
  12251. {
  12252. auto &type = get_variable_data_type(var);
  12253. uint32_t desc_set = get_decoration(var_id, DecorationDescriptorSet);
  12254. if (is_supported_argument_buffer_type(type) && var.storage != StorageClassPushConstant)
  12255. {
  12256. if (descriptor_set_is_argument_buffer(desc_set))
  12257. {
  12258. if (is_var_runtime_size_array(var))
  12259. {
  12260. // Runtime arrays need to be wrapped in spvDescriptorArray from argument buffer payload.
  12261. entry_point_bindings.push_back(&var);
  12262. // We'll wrap this, so to_name() will always use non-qualified name.
  12263. // We'll need the qualified name to create temporary variable instead.
  12264. ir.meta[var_id].decoration.qualified_alias_explicit_override = true;
  12265. }
  12266. return;
  12267. }
  12268. }
  12269. // Handle descriptor aliasing of simple discrete cases.
  12270. // We can handle aliasing of buffers by casting pointers.
  12271. // The amount of aliasing we can perform for discrete descriptors is very limited.
  12272. // For fully mutable-style aliasing, we need argument buffers where we can exploit the fact
  12273. // that descriptors are all 8 bytes.
  12274. SPIRVariable *discrete_descriptor_alias = nullptr;
  12275. if (var.storage == StorageClassUniform || var.storage == StorageClassStorageBuffer)
  12276. {
  12277. for (auto &resource : resources)
  12278. {
  12279. if (get_decoration(resource.var->self, DecorationDescriptorSet) ==
  12280. get_decoration(var_id, DecorationDescriptorSet) &&
  12281. get_decoration(resource.var->self, DecorationBinding) ==
  12282. get_decoration(var_id, DecorationBinding) &&
  12283. resource.basetype == SPIRType::Struct && type.basetype == SPIRType::Struct &&
  12284. (resource.var->storage == StorageClassUniform ||
  12285. resource.var->storage == StorageClassStorageBuffer))
  12286. {
  12287. discrete_descriptor_alias = resource.var;
  12288. // Self-reference marks that we should declare the resource,
  12289. // and it's being used as an alias (so we can emit void* instead).
  12290. resource.discrete_descriptor_alias = resource.var;
  12291. // Need to promote interlocked usage so that the primary declaration is correct.
  12292. if (interlocked_resources.count(var_id))
  12293. interlocked_resources.insert(resource.var->self);
  12294. break;
  12295. }
  12296. }
  12297. }
  12298. const MSLConstexprSampler *constexpr_sampler = nullptr;
  12299. if (type.basetype == SPIRType::SampledImage || type.basetype == SPIRType::Sampler)
  12300. {
  12301. constexpr_sampler = find_constexpr_sampler(var_id);
  12302. if (constexpr_sampler)
  12303. {
  12304. // Mark this ID as a constexpr sampler for later in case it came from set/bindings.
  12305. constexpr_samplers_by_id[var_id] = *constexpr_sampler;
  12306. }
  12307. }
  12308. // Emulate texture2D atomic operations
  12309. uint32_t secondary_index = 0;
  12310. if (atomic_image_vars_emulated.count(var.self))
  12311. {
  12312. secondary_index = get_metal_resource_index(var, SPIRType::AtomicCounter, 0);
  12313. }
  12314. if (type.basetype == SPIRType::SampledImage)
  12315. {
  12316. add_resource_name(var_id);
  12317. uint32_t plane_count = 1;
  12318. if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable)
  12319. plane_count = constexpr_sampler->planes;
  12320. entry_point_bindings.push_back(&var);
  12321. for (uint32_t i = 0; i < plane_count; i++)
  12322. resources.push_back({&var, discrete_descriptor_alias, to_name(var_id), SPIRType::Image,
  12323. get_metal_resource_index(var, SPIRType::Image, i), i, secondary_index });
  12324. if (type.image.dim != DimBuffer && !constexpr_sampler)
  12325. {
  12326. resources.push_back({&var, discrete_descriptor_alias, to_sampler_expression(var_id), SPIRType::Sampler,
  12327. get_metal_resource_index(var, SPIRType::Sampler), 0, 0 });
  12328. }
  12329. }
  12330. else if (!constexpr_sampler)
  12331. {
  12332. // constexpr samplers are not declared as resources.
  12333. add_resource_name(var_id);
  12334. // Don't allocate resource indices for aliases.
  12335. uint32_t resource_index = ~0u;
  12336. if (!discrete_descriptor_alias)
  12337. resource_index = get_metal_resource_index(var, type.basetype);
  12338. entry_point_bindings.push_back(&var);
  12339. resources.push_back({&var, discrete_descriptor_alias, to_name(var_id), type.basetype,
  12340. resource_index, 0, secondary_index });
  12341. }
  12342. }
  12343. });
  12344. stable_sort(resources.begin(), resources.end(),
  12345. [](const Resource &lhs, const Resource &rhs)
  12346. { return tie(lhs.basetype, lhs.index) < tie(rhs.basetype, rhs.index); });
  12347. for (auto &r : resources)
  12348. {
  12349. auto &var = *r.var;
  12350. auto &type = get_variable_data_type(var);
  12351. uint32_t var_id = var.self;
  12352. switch (r.basetype)
  12353. {
  12354. case SPIRType::Struct:
  12355. {
  12356. auto &m = ir.meta[type.self];
  12357. if (m.members.size() == 0)
  12358. break;
  12359. if (r.discrete_descriptor_alias)
  12360. {
  12361. if (r.var == r.discrete_descriptor_alias)
  12362. {
  12363. auto primary_name = join("spvBufferAliasSet",
  12364. get_decoration(var_id, DecorationDescriptorSet),
  12365. "Binding",
  12366. get_decoration(var_id, DecorationBinding));
  12367. // Declare the primary alias as void*
  12368. if (!ep_args.empty())
  12369. ep_args += ", ";
  12370. ep_args += get_argument_address_space(var) + " void* " + primary_name;
  12371. ep_args += " [[buffer(" + convert_to_string(r.index) + ")";
  12372. if (interlocked_resources.count(var_id))
  12373. ep_args += ", raster_order_group(0)";
  12374. ep_args += "]]";
  12375. }
  12376. buffer_aliases_discrete.push_back(r.var->self);
  12377. }
  12378. else if (!type.array.empty())
  12379. {
  12380. if (type.array.size() > 1)
  12381. SPIRV_CROSS_THROW("Arrays of arrays of buffers are not supported.");
  12382. is_using_builtin_array = true;
  12383. if (is_var_runtime_size_array(var))
  12384. {
  12385. add_spv_func_and_recompile(SPVFuncImplVariableDescriptorArray);
  12386. if (!ep_args.empty())
  12387. ep_args += ", ";
  12388. const bool ssbo = has_decoration(type.self, DecorationBufferBlock);
  12389. if ((var.storage == spv::StorageClassStorageBuffer || ssbo) &&
  12390. msl_options.runtime_array_rich_descriptor)
  12391. {
  12392. add_spv_func_and_recompile(SPVFuncImplVariableSizedDescriptor);
  12393. ep_args += "const device spvBufferDescriptor<" + get_argument_address_space(var) + " " +
  12394. type_to_glsl(type) + "*>* ";
  12395. }
  12396. else
  12397. {
  12398. ep_args += "const device spvDescriptor<" + get_argument_address_space(var) + " " +
  12399. type_to_glsl(type) + "*>* ";
  12400. }
  12401. ep_args += to_restrict(var_id, true) + r.name + "_";
  12402. ep_args += " [[buffer(" + convert_to_string(r.index) + ")";
  12403. if (interlocked_resources.count(var_id))
  12404. ep_args += ", raster_order_group(0)";
  12405. ep_args += "]]";
  12406. }
  12407. else
  12408. {
  12409. uint32_t array_size = get_resource_array_size(type, var_id);
  12410. for (uint32_t i = 0; i < array_size; ++i)
  12411. {
  12412. if (!ep_args.empty())
  12413. ep_args += ", ";
  12414. ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "* " +
  12415. to_restrict(var_id, true) + r.name + "_" + convert_to_string(i);
  12416. ep_args += " [[buffer(" + convert_to_string(r.index + i) + ")";
  12417. if (interlocked_resources.count(var_id))
  12418. ep_args += ", raster_order_group(0)";
  12419. ep_args += "]]";
  12420. }
  12421. }
  12422. is_using_builtin_array = false;
  12423. }
  12424. else
  12425. {
  12426. if (!ep_args.empty())
  12427. ep_args += ", ";
  12428. ep_args += get_argument_address_space(var) + " ";
  12429. if (recursive_inputs.count(type.self))
  12430. ep_args += string("void* ") + to_restrict(var_id, true) + r.name + "_vp";
  12431. else
  12432. ep_args += type_to_glsl(type) + "& " + to_restrict(var_id, true) + r.name;
  12433. ep_args += " [[buffer(" + convert_to_string(r.index) + ")";
  12434. if (interlocked_resources.count(var_id))
  12435. ep_args += ", raster_order_group(0)";
  12436. ep_args += "]]";
  12437. }
  12438. break;
  12439. }
  12440. case SPIRType::Sampler:
  12441. if (!ep_args.empty())
  12442. ep_args += ", ";
  12443. ep_args += sampler_type(type, var_id, false) + " " + r.name;
  12444. if (is_var_runtime_size_array(var))
  12445. ep_args += "_ [[buffer(" + convert_to_string(r.index) + ")]]";
  12446. else
  12447. ep_args += " [[sampler(" + convert_to_string(r.index) + ")]]";
  12448. break;
  12449. case SPIRType::Image:
  12450. {
  12451. if (!ep_args.empty())
  12452. ep_args += ", ";
  12453. // Use Metal's native frame-buffer fetch API for subpass inputs.
  12454. const auto &basetype = get<SPIRType>(var.basetype);
  12455. if (!type_is_msl_framebuffer_fetch(basetype))
  12456. {
  12457. ep_args += image_type_glsl(type, var_id, false) + " " + r.name;
  12458. if (r.plane > 0)
  12459. ep_args += join(plane_name_suffix, r.plane);
  12460. if (is_var_runtime_size_array(var))
  12461. ep_args += "_ [[buffer(" + convert_to_string(r.index) + ")";
  12462. else
  12463. ep_args += " [[texture(" + convert_to_string(r.index) + ")";
  12464. if (interlocked_resources.count(var_id))
  12465. ep_args += ", raster_order_group(0)";
  12466. ep_args += "]]";
  12467. }
  12468. else
  12469. {
  12470. if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 3))
  12471. SPIRV_CROSS_THROW("Framebuffer fetch on Mac is not supported before MSL 2.3.");
  12472. ep_args += image_type_glsl(type, var_id, false) + " " + r.name;
  12473. ep_args += " [[color(" + convert_to_string(r.index) + ")]]";
  12474. }
  12475. // Emulate texture2D atomic operations
  12476. if (atomic_image_vars_emulated.count(var.self))
  12477. {
  12478. auto &flags = ir.get_decoration_bitset(var.self);
  12479. const char *cv_flags = decoration_flags_signal_volatile(flags) ? "volatile " : "";
  12480. ep_args += join(", ", cv_flags, "device atomic_", type_to_glsl(get<SPIRType>(basetype.image.type), 0));
  12481. ep_args += "* " + r.name + "_atomic";
  12482. ep_args += " [[buffer(" + convert_to_string(r.secondary_index) + ")";
  12483. if (interlocked_resources.count(var_id))
  12484. ep_args += ", raster_order_group(0)";
  12485. ep_args += "]]";
  12486. }
  12487. break;
  12488. }
  12489. case SPIRType::AccelerationStructure:
  12490. {
  12491. if (is_var_runtime_size_array(var))
  12492. {
  12493. add_spv_func_and_recompile(SPVFuncImplVariableDescriptor);
  12494. const auto &parent_type = get<SPIRType>(type.parent_type);
  12495. if (!ep_args.empty())
  12496. ep_args += ", ";
  12497. ep_args += "const device spvDescriptor<" + type_to_glsl(parent_type) + ">* " +
  12498. to_restrict(var_id, true) + r.name + "_";
  12499. ep_args += " [[buffer(" + convert_to_string(r.index) + ")]]";
  12500. }
  12501. else
  12502. {
  12503. if (!ep_args.empty())
  12504. ep_args += ", ";
  12505. ep_args += type_to_glsl(type, var_id) + " " + r.name;
  12506. ep_args += " [[buffer(" + convert_to_string(r.index) + ")]]";
  12507. }
  12508. break;
  12509. }
  12510. default:
  12511. if (!ep_args.empty())
  12512. ep_args += ", ";
  12513. if (!type.pointer)
  12514. ep_args += get_type_address_space(get<SPIRType>(var.basetype), var_id) + " " +
  12515. type_to_glsl(type, var_id) + "& " + r.name;
  12516. else
  12517. ep_args += type_to_glsl(type, var_id) + " " + r.name;
  12518. ep_args += " [[buffer(" + convert_to_string(r.index) + ")";
  12519. if (interlocked_resources.count(var_id))
  12520. ep_args += ", raster_order_group(0)";
  12521. ep_args += "]]";
  12522. break;
  12523. }
  12524. }
  12525. }
  12526. // Returns a string containing a comma-delimited list of args for the entry point function
  12527. // This is the "classic" method of MSL 1 when we don't have argument buffer support.
  12528. string CompilerMSL::entry_point_args_classic(bool append_comma)
  12529. {
  12530. string ep_args = entry_point_arg_stage_in();
  12531. entry_point_args_discrete_descriptors(ep_args);
  12532. entry_point_args_builtin(ep_args);
  12533. if (!ep_args.empty() && append_comma)
  12534. ep_args += ", ";
  12535. return ep_args;
  12536. }
  12537. void CompilerMSL::fix_up_shader_inputs_outputs()
  12538. {
  12539. auto &entry_func = this->get<SPIRFunction>(ir.default_entry_point);
  12540. // Emit a guard to ensure we don't execute beyond the last vertex.
  12541. // Vertex shaders shouldn't have the problems with barriers in non-uniform control flow that
  12542. // tessellation control shaders do, so early returns should be OK. We may need to revisit this
  12543. // if it ever becomes possible to use barriers from a vertex shader.
  12544. if (get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation)
  12545. {
  12546. entry_func.fixup_hooks_in.push_back([this]() {
  12547. statement("if (any(", to_expression(builtin_invocation_id_id),
  12548. " >= ", to_expression(builtin_stage_input_size_id), "))");
  12549. statement(" return;");
  12550. });
  12551. }
  12552. // Look for sampled images and buffer. Add hooks to set up the swizzle constants or array lengths.
  12553. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  12554. auto &type = get_variable_data_type(var);
  12555. uint32_t var_id = var.self;
  12556. bool ssbo = has_decoration(type.self, DecorationBufferBlock);
  12557. if (var.storage == StorageClassUniformConstant && !is_hidden_variable(var))
  12558. {
  12559. if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(type))
  12560. {
  12561. entry_func.fixup_hooks_in.push_back([this, &type, &var, var_id]() {
  12562. bool is_array_type = !type.array.empty();
  12563. uint32_t desc_set = get_decoration(var_id, DecorationDescriptorSet);
  12564. if (descriptor_set_is_argument_buffer(desc_set))
  12565. {
  12566. statement("constant uint", is_array_type ? "* " : "& ", to_swizzle_expression(var_id),
  12567. is_array_type ? " = &" : " = ", to_name(argument_buffer_ids[desc_set]),
  12568. ".spvSwizzleConstants", "[",
  12569. convert_to_string(get_metal_resource_index(var, SPIRType::Image)), "];");
  12570. }
  12571. else
  12572. {
  12573. // If we have an array of images, we need to be able to index into it, so take a pointer instead.
  12574. statement("constant uint", is_array_type ? "* " : "& ", to_swizzle_expression(var_id),
  12575. is_array_type ? " = &" : " = ", to_name(swizzle_buffer_id), "[",
  12576. convert_to_string(get_metal_resource_index(var, SPIRType::Image)), "];");
  12577. }
  12578. });
  12579. }
  12580. }
  12581. else if ((var.storage == StorageClassStorageBuffer || (var.storage == StorageClassUniform && ssbo)) &&
  12582. !is_hidden_variable(var))
  12583. {
  12584. if (buffer_requires_array_length(var.self))
  12585. {
  12586. entry_func.fixup_hooks_in.push_back(
  12587. [this, &type, &var, var_id]()
  12588. {
  12589. bool is_array_type = !type.array.empty() && !is_var_runtime_size_array(var);
  12590. uint32_t desc_set = get_decoration(var_id, DecorationDescriptorSet);
  12591. if (descriptor_set_is_argument_buffer(desc_set))
  12592. {
  12593. statement("constant uint", is_array_type ? "* " : "& ", to_buffer_size_expression(var_id),
  12594. is_array_type ? " = &" : " = ", to_name(argument_buffer_ids[desc_set]),
  12595. ".spvBufferSizeConstants", "[",
  12596. convert_to_string(get_metal_resource_index(var, SPIRType::UInt)), "];");
  12597. }
  12598. else
  12599. {
  12600. // If we have an array of images, we need to be able to index into it, so take a pointer instead.
  12601. statement("constant uint", is_array_type ? "* " : "& ", to_buffer_size_expression(var_id),
  12602. is_array_type ? " = &" : " = ", to_name(buffer_size_buffer_id), "[",
  12603. convert_to_string(get_metal_resource_index(var, type.basetype)), "];");
  12604. }
  12605. });
  12606. }
  12607. }
  12608. if (!msl_options.argument_buffers &&
  12609. msl_options.replace_recursive_inputs && type_contains_recursion(type) &&
  12610. (var.storage == StorageClassUniform || var.storage == StorageClassUniformConstant ||
  12611. var.storage == StorageClassPushConstant || var.storage == StorageClassStorageBuffer))
  12612. {
  12613. recursive_inputs.insert(type.self);
  12614. entry_func.fixup_hooks_in.push_back([this, &type, &var, var_id]() {
  12615. auto addr_space = get_argument_address_space(var);
  12616. auto var_name = to_name(var_id);
  12617. statement(addr_space, " auto& ", to_restrict(var_id, true), var_name,
  12618. " = *(", addr_space, " ", type_to_glsl(type), "*)", var_name, "_vp;");
  12619. });
  12620. }
  12621. });
  12622. // Builtin variables
  12623. ir.for_each_typed_id<SPIRVariable>([this, &entry_func](uint32_t, SPIRVariable &var) {
  12624. uint32_t var_id = var.self;
  12625. BuiltIn bi_type = ir.meta[var_id].decoration.builtin_type;
  12626. if (var.storage != StorageClassInput && var.storage != StorageClassOutput)
  12627. return;
  12628. if (!interface_variable_exists_in_entry_point(var.self))
  12629. return;
  12630. if (var.storage == StorageClassInput && is_builtin_variable(var) && active_input_builtins.get(bi_type))
  12631. {
  12632. switch (bi_type)
  12633. {
  12634. case BuiltInSamplePosition:
  12635. entry_func.fixup_hooks_in.push_back([=]() {
  12636. statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = get_sample_position(",
  12637. to_expression(builtin_sample_id_id), ");");
  12638. });
  12639. break;
  12640. case BuiltInFragCoord:
  12641. if (is_sample_rate())
  12642. {
  12643. entry_func.fixup_hooks_in.push_back([=]() {
  12644. statement(to_expression(var_id), ".xy += get_sample_position(",
  12645. to_expression(builtin_sample_id_id), ") - 0.5;");
  12646. });
  12647. }
  12648. break;
  12649. case BuiltInInvocationId:
  12650. // This is direct-mapped without multi-patch workgroups.
  12651. if (!is_tesc_shader() || !msl_options.multi_patch_workgroup)
  12652. break;
  12653. entry_func.fixup_hooks_in.push_back([=]() {
  12654. statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
  12655. to_expression(builtin_invocation_id_id), ".x % ", this->get_entry_point().output_vertices,
  12656. ";");
  12657. });
  12658. break;
  12659. case BuiltInPrimitiveId:
  12660. // This is natively supported by fragment and tessellation evaluation shaders.
  12661. // In tessellation control shaders, this is direct-mapped without multi-patch workgroups.
  12662. if (!is_tesc_shader() || !msl_options.multi_patch_workgroup)
  12663. break;
  12664. entry_func.fixup_hooks_in.push_back([=]() {
  12665. statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = min(",
  12666. to_expression(builtin_invocation_id_id), ".x / ", this->get_entry_point().output_vertices,
  12667. ", spvIndirectParams[1] - 1);");
  12668. });
  12669. break;
  12670. case BuiltInPatchVertices:
  12671. if (is_tese_shader())
  12672. {
  12673. if (msl_options.raw_buffer_tese_input)
  12674. {
  12675. entry_func.fixup_hooks_in.push_back(
  12676. [=]() {
  12677. statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
  12678. get_entry_point().output_vertices, ";");
  12679. });
  12680. }
  12681. else
  12682. {
  12683. entry_func.fixup_hooks_in.push_back(
  12684. [=]()
  12685. {
  12686. statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
  12687. to_expression(patch_stage_in_var_id), ".gl_in.size();");
  12688. });
  12689. }
  12690. }
  12691. else
  12692. {
  12693. entry_func.fixup_hooks_in.push_back([=]() {
  12694. statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = spvIndirectParams[0];");
  12695. });
  12696. }
  12697. break;
  12698. case BuiltInTessCoord:
  12699. if (get_entry_point().flags.get(ExecutionModeQuads))
  12700. {
  12701. // The entry point will only have a float2 TessCoord variable.
  12702. // Pad to float3.
  12703. entry_func.fixup_hooks_in.push_back([=]() {
  12704. auto name = builtin_to_glsl(BuiltInTessCoord, StorageClassInput);
  12705. statement("float3 " + name + " = float3(" + name + "In.x, " + name + "In.y, 0.0);");
  12706. });
  12707. }
  12708. // Emit a fixup to account for the shifted domain. Don't do this for triangles;
  12709. // MoltenVK will just reverse the winding order instead.
  12710. if (msl_options.tess_domain_origin_lower_left && !is_tessellating_triangles())
  12711. {
  12712. string tc = to_expression(var_id);
  12713. entry_func.fixup_hooks_in.push_back([=]() { statement(tc, ".y = 1.0 - ", tc, ".y;"); });
  12714. }
  12715. break;
  12716. case BuiltInSubgroupId:
  12717. if (!msl_options.emulate_subgroups)
  12718. break;
  12719. // For subgroup emulation, this is the same as the local invocation index.
  12720. entry_func.fixup_hooks_in.push_back([=]() {
  12721. statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
  12722. to_expression(builtin_local_invocation_index_id), ";");
  12723. });
  12724. break;
  12725. case BuiltInNumSubgroups:
  12726. if (!msl_options.emulate_subgroups)
  12727. break;
  12728. // For subgroup emulation, this is the same as the workgroup size.
  12729. entry_func.fixup_hooks_in.push_back([=]() {
  12730. auto &type = expression_type(builtin_workgroup_size_id);
  12731. string size_expr = to_expression(builtin_workgroup_size_id);
  12732. if (type.vecsize >= 3)
  12733. size_expr = join(size_expr, ".x * ", size_expr, ".y * ", size_expr, ".z");
  12734. else if (type.vecsize == 2)
  12735. size_expr = join(size_expr, ".x * ", size_expr, ".y");
  12736. statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", size_expr, ";");
  12737. });
  12738. break;
  12739. case BuiltInSubgroupLocalInvocationId:
  12740. if (!msl_options.emulate_subgroups)
  12741. break;
  12742. // For subgroup emulation, assume subgroups of size 1.
  12743. entry_func.fixup_hooks_in.push_back(
  12744. [=]() { statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = 0;"); });
  12745. break;
  12746. case BuiltInSubgroupSize:
  12747. if (msl_options.emulate_subgroups)
  12748. {
  12749. // For subgroup emulation, assume subgroups of size 1.
  12750. entry_func.fixup_hooks_in.push_back(
  12751. [=]() { statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = 1;"); });
  12752. }
  12753. else if (msl_options.fixed_subgroup_size != 0)
  12754. {
  12755. entry_func.fixup_hooks_in.push_back([=]() {
  12756. statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
  12757. msl_options.fixed_subgroup_size, ";");
  12758. });
  12759. }
  12760. break;
  12761. case BuiltInSubgroupEqMask:
  12762. if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 2))
  12763. SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.2 on iOS.");
  12764. if (!msl_options.supports_msl_version(2, 1))
  12765. SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
  12766. entry_func.fixup_hooks_in.push_back([=]() {
  12767. if (msl_options.is_ios())
  12768. {
  12769. statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", "uint4(1 << ",
  12770. to_expression(builtin_subgroup_invocation_id_id), ", uint3(0));");
  12771. }
  12772. else
  12773. {
  12774. statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
  12775. to_expression(builtin_subgroup_invocation_id_id), " >= 32 ? uint4(0, (1 << (",
  12776. to_expression(builtin_subgroup_invocation_id_id), " - 32)), uint2(0)) : uint4(1 << ",
  12777. to_expression(builtin_subgroup_invocation_id_id), ", uint3(0));");
  12778. }
  12779. });
  12780. break;
  12781. case BuiltInSubgroupGeMask:
  12782. if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 2))
  12783. SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.2 on iOS.");
  12784. if (!msl_options.supports_msl_version(2, 1))
  12785. SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
  12786. if (msl_options.fixed_subgroup_size != 0)
  12787. add_spv_func_and_recompile(SPVFuncImplSubgroupBallot);
  12788. entry_func.fixup_hooks_in.push_back([=]() {
  12789. // Case where index < 32, size < 32:
  12790. // mask0 = bfi(0, 0xFFFFFFFF, index, size - index);
  12791. // mask1 = bfi(0, 0xFFFFFFFF, 0, 0); // Gives 0
  12792. // Case where index < 32 but size >= 32:
  12793. // mask0 = bfi(0, 0xFFFFFFFF, index, 32 - index);
  12794. // mask1 = bfi(0, 0xFFFFFFFF, 0, size - 32);
  12795. // Case where index >= 32:
  12796. // mask0 = bfi(0, 0xFFFFFFFF, 32, 0); // Gives 0
  12797. // mask1 = bfi(0, 0xFFFFFFFF, index - 32, size - index);
  12798. // This is expressed without branches to avoid divergent
  12799. // control flow--hence the complicated min/max expressions.
  12800. // This is further complicated by the fact that if you attempt
  12801. // to bfi/bfe out-of-bounds on Metal, undefined behavior is the
  12802. // result.
  12803. if (msl_options.fixed_subgroup_size > 32)
  12804. {
  12805. // Don't use the subgroup size variable with fixed subgroup sizes,
  12806. // since the variables could be defined in the wrong order.
  12807. statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
  12808. " = uint4(insert_bits(0u, 0xFFFFFFFF, min(",
  12809. to_expression(builtin_subgroup_invocation_id_id), ", 32u), (uint)max(32 - (int)",
  12810. to_expression(builtin_subgroup_invocation_id_id),
  12811. ", 0)), insert_bits(0u, 0xFFFFFFFF,"
  12812. " (uint)max((int)",
  12813. to_expression(builtin_subgroup_invocation_id_id), " - 32, 0), ",
  12814. msl_options.fixed_subgroup_size, " - max(",
  12815. to_expression(builtin_subgroup_invocation_id_id),
  12816. ", 32u)), uint2(0));");
  12817. }
  12818. else if (msl_options.fixed_subgroup_size != 0)
  12819. {
  12820. statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
  12821. " = uint4(insert_bits(0u, 0xFFFFFFFF, ",
  12822. to_expression(builtin_subgroup_invocation_id_id), ", ",
  12823. msl_options.fixed_subgroup_size, " - ",
  12824. to_expression(builtin_subgroup_invocation_id_id),
  12825. "), uint3(0));");
  12826. }
  12827. else if (msl_options.is_ios())
  12828. {
  12829. // On iOS, the SIMD-group size will currently never exceed 32.
  12830. statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
  12831. " = uint4(insert_bits(0u, 0xFFFFFFFF, ",
  12832. to_expression(builtin_subgroup_invocation_id_id), ", ",
  12833. to_expression(builtin_subgroup_size_id), " - ",
  12834. to_expression(builtin_subgroup_invocation_id_id), "), uint3(0));");
  12835. }
  12836. else
  12837. {
  12838. statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
  12839. " = uint4(insert_bits(0u, 0xFFFFFFFF, min(",
  12840. to_expression(builtin_subgroup_invocation_id_id), ", 32u), (uint)max(min((int)",
  12841. to_expression(builtin_subgroup_size_id), ", 32) - (int)",
  12842. to_expression(builtin_subgroup_invocation_id_id),
  12843. ", 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)",
  12844. to_expression(builtin_subgroup_invocation_id_id), " - 32, 0), (uint)max((int)",
  12845. to_expression(builtin_subgroup_size_id), " - (int)max(",
  12846. to_expression(builtin_subgroup_invocation_id_id), ", 32u), 0)), uint2(0));");
  12847. }
  12848. });
  12849. break;
  12850. case BuiltInSubgroupGtMask:
  12851. if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 2))
  12852. SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.2 on iOS.");
  12853. if (!msl_options.supports_msl_version(2, 1))
  12854. SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
  12855. add_spv_func_and_recompile(SPVFuncImplSubgroupBallot);
  12856. entry_func.fixup_hooks_in.push_back([=]() {
  12857. // The same logic applies here, except now the index is one
  12858. // more than the subgroup invocation ID.
  12859. if (msl_options.fixed_subgroup_size > 32)
  12860. {
  12861. statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
  12862. " = uint4(insert_bits(0u, 0xFFFFFFFF, min(",
  12863. to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), (uint)max(32 - (int)",
  12864. to_expression(builtin_subgroup_invocation_id_id),
  12865. " - 1, 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)",
  12866. to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0), ",
  12867. msl_options.fixed_subgroup_size, " - max(",
  12868. to_expression(builtin_subgroup_invocation_id_id),
  12869. " + 1, 32u)), uint2(0));");
  12870. }
  12871. else if (msl_options.fixed_subgroup_size != 0)
  12872. {
  12873. statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
  12874. " = uint4(insert_bits(0u, 0xFFFFFFFF, ",
  12875. to_expression(builtin_subgroup_invocation_id_id), " + 1, ",
  12876. msl_options.fixed_subgroup_size, " - ",
  12877. to_expression(builtin_subgroup_invocation_id_id),
  12878. " - 1), uint3(0));");
  12879. }
  12880. else if (msl_options.is_ios())
  12881. {
  12882. statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
  12883. " = uint4(insert_bits(0u, 0xFFFFFFFF, ",
  12884. to_expression(builtin_subgroup_invocation_id_id), " + 1, ",
  12885. to_expression(builtin_subgroup_size_id), " - ",
  12886. to_expression(builtin_subgroup_invocation_id_id), " - 1), uint3(0));");
  12887. }
  12888. else
  12889. {
  12890. statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
  12891. " = uint4(insert_bits(0u, 0xFFFFFFFF, min(",
  12892. to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), (uint)max(min((int)",
  12893. to_expression(builtin_subgroup_size_id), ", 32) - (int)",
  12894. to_expression(builtin_subgroup_invocation_id_id),
  12895. " - 1, 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)",
  12896. to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0), (uint)max((int)",
  12897. to_expression(builtin_subgroup_size_id), " - (int)max(",
  12898. to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), 0)), uint2(0));");
  12899. }
  12900. });
  12901. break;
  12902. case BuiltInSubgroupLeMask:
  12903. if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 2))
  12904. SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.2 on iOS.");
  12905. if (!msl_options.supports_msl_version(2, 1))
  12906. SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
  12907. add_spv_func_and_recompile(SPVFuncImplSubgroupBallot);
  12908. entry_func.fixup_hooks_in.push_back([=]() {
  12909. if (msl_options.is_ios())
  12910. {
  12911. statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
  12912. " = uint4(extract_bits(0xFFFFFFFF, 0, ",
  12913. to_expression(builtin_subgroup_invocation_id_id), " + 1), uint3(0));");
  12914. }
  12915. else
  12916. {
  12917. statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
  12918. " = uint4(extract_bits(0xFFFFFFFF, 0, min(",
  12919. to_expression(builtin_subgroup_invocation_id_id),
  12920. " + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)",
  12921. to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0)), uint2(0));");
  12922. }
  12923. });
  12924. break;
  12925. case BuiltInSubgroupLtMask:
  12926. if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 2))
  12927. SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.2 on iOS.");
  12928. if (!msl_options.supports_msl_version(2, 1))
  12929. SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
  12930. add_spv_func_and_recompile(SPVFuncImplSubgroupBallot);
  12931. entry_func.fixup_hooks_in.push_back([=]() {
  12932. if (msl_options.is_ios())
  12933. {
  12934. statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
  12935. " = uint4(extract_bits(0xFFFFFFFF, 0, ",
  12936. to_expression(builtin_subgroup_invocation_id_id), "), uint3(0));");
  12937. }
  12938. else
  12939. {
  12940. statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
  12941. " = uint4(extract_bits(0xFFFFFFFF, 0, min(",
  12942. to_expression(builtin_subgroup_invocation_id_id),
  12943. ", 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)",
  12944. to_expression(builtin_subgroup_invocation_id_id), " - 32, 0)), uint2(0));");
  12945. }
  12946. });
  12947. break;
  12948. case BuiltInViewIndex:
  12949. if (!msl_options.multiview)
  12950. {
  12951. // According to the Vulkan spec, when not running under a multiview
  12952. // render pass, ViewIndex is 0.
  12953. entry_func.fixup_hooks_in.push_back([=]() {
  12954. statement("const ", builtin_type_decl(bi_type), " ", to_expression(var_id), " = 0;");
  12955. });
  12956. }
  12957. else if (msl_options.view_index_from_device_index)
  12958. {
  12959. // In this case, we take the view index from that of the device we're running on.
  12960. entry_func.fixup_hooks_in.push_back([=]() {
  12961. statement("const ", builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
  12962. msl_options.device_index, ";");
  12963. });
  12964. // We actually don't want to set the render_target_array_index here.
  12965. // Since every physical device is rendering a different view,
  12966. // there's no need for layered rendering here.
  12967. }
  12968. else if (!msl_options.multiview_layered_rendering)
  12969. {
  12970. // In this case, the views are rendered one at a time. The view index, then,
  12971. // is just the first part of the "view mask".
  12972. entry_func.fixup_hooks_in.push_back([=]() {
  12973. statement("const ", builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
  12974. to_expression(view_mask_buffer_id), "[0];");
  12975. });
  12976. }
  12977. else if (get_execution_model() == ExecutionModelFragment)
  12978. {
  12979. // Because we adjusted the view index in the vertex shader, we have to
  12980. // adjust it back here.
  12981. entry_func.fixup_hooks_in.push_back([=]() {
  12982. statement(to_expression(var_id), " += ", to_expression(view_mask_buffer_id), "[0];");
  12983. });
  12984. }
  12985. else if (get_execution_model() == ExecutionModelVertex)
  12986. {
  12987. // Metal provides no special support for multiview, so we smuggle
  12988. // the view index in the instance index.
  12989. entry_func.fixup_hooks_in.push_back([=]() {
  12990. statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
  12991. to_expression(view_mask_buffer_id), "[0] + (", to_expression(builtin_instance_idx_id),
  12992. " - ", to_expression(builtin_base_instance_id), ") % ",
  12993. to_expression(view_mask_buffer_id), "[1];");
  12994. statement(to_expression(builtin_instance_idx_id), " = (",
  12995. to_expression(builtin_instance_idx_id), " - ",
  12996. to_expression(builtin_base_instance_id), ") / ", to_expression(view_mask_buffer_id),
  12997. "[1] + ", to_expression(builtin_base_instance_id), ";");
  12998. });
  12999. // In addition to setting the variable itself, we also need to
  13000. // set the render_target_array_index with it on output. We have to
  13001. // offset this by the base view index, because Metal isn't in on
  13002. // our little game here.
  13003. entry_func.fixup_hooks_out.push_back([=]() {
  13004. statement(to_expression(builtin_layer_id), " = ", to_expression(var_id), " - ",
  13005. to_expression(view_mask_buffer_id), "[0];");
  13006. });
  13007. }
  13008. break;
  13009. case BuiltInDeviceIndex:
  13010. // Metal pipelines belong to the devices which create them, so we'll
  13011. // need to create a MTLPipelineState for every MTLDevice in a grouped
  13012. // VkDevice. We can assume, then, that the device index is constant.
  13013. entry_func.fixup_hooks_in.push_back([=]() {
  13014. statement("const ", builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
  13015. msl_options.device_index, ";");
  13016. });
  13017. break;
  13018. case BuiltInWorkgroupId:
  13019. if (!msl_options.dispatch_base || !active_input_builtins.get(BuiltInWorkgroupId))
  13020. break;
  13021. // The vkCmdDispatchBase() command lets the client set the base value
  13022. // of WorkgroupId. Metal has no direct equivalent; we must make this
  13023. // adjustment ourselves.
  13024. entry_func.fixup_hooks_in.push_back([=]() {
  13025. statement(to_expression(var_id), " += ", to_dereferenced_expression(builtin_dispatch_base_id), ";");
  13026. });
  13027. break;
  13028. case BuiltInGlobalInvocationId:
  13029. if (!msl_options.dispatch_base || !active_input_builtins.get(BuiltInGlobalInvocationId))
  13030. break;
  13031. // GlobalInvocationId is defined as LocalInvocationId + WorkgroupId * WorkgroupSize.
  13032. // This needs to be adjusted too.
  13033. entry_func.fixup_hooks_in.push_back([=]() {
  13034. auto &execution = this->get_entry_point();
  13035. uint32_t workgroup_size_id = execution.workgroup_size.constant;
  13036. if (workgroup_size_id)
  13037. statement(to_expression(var_id), " += ", to_dereferenced_expression(builtin_dispatch_base_id),
  13038. " * ", to_expression(workgroup_size_id), ";");
  13039. else
  13040. statement(to_expression(var_id), " += ", to_dereferenced_expression(builtin_dispatch_base_id),
  13041. " * uint3(", execution.workgroup_size.x, ", ", execution.workgroup_size.y, ", ",
  13042. execution.workgroup_size.z, ");");
  13043. });
  13044. break;
  13045. case BuiltInVertexId:
  13046. case BuiltInVertexIndex:
  13047. // This is direct-mapped normally.
  13048. if (!msl_options.vertex_for_tessellation)
  13049. break;
  13050. entry_func.fixup_hooks_in.push_back([=]() {
  13051. builtin_declaration = true;
  13052. switch (msl_options.vertex_index_type)
  13053. {
  13054. case Options::IndexType::None:
  13055. statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
  13056. to_expression(builtin_invocation_id_id), ".x + ",
  13057. to_expression(builtin_dispatch_base_id), ".x;");
  13058. break;
  13059. case Options::IndexType::UInt16:
  13060. case Options::IndexType::UInt32:
  13061. statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", index_buffer_var_name,
  13062. "[", to_expression(builtin_invocation_id_id), ".x] + ",
  13063. to_expression(builtin_dispatch_base_id), ".x;");
  13064. break;
  13065. }
  13066. builtin_declaration = false;
  13067. });
  13068. break;
  13069. case BuiltInBaseVertex:
  13070. // This is direct-mapped normally.
  13071. if (!msl_options.vertex_for_tessellation)
  13072. break;
  13073. entry_func.fixup_hooks_in.push_back([=]() {
  13074. statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
  13075. to_expression(builtin_dispatch_base_id), ".x;");
  13076. });
  13077. break;
  13078. case BuiltInInstanceId:
  13079. case BuiltInInstanceIndex:
  13080. // This is direct-mapped normally.
  13081. if (!msl_options.vertex_for_tessellation)
  13082. break;
  13083. entry_func.fixup_hooks_in.push_back([=]() {
  13084. builtin_declaration = true;
  13085. statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
  13086. to_expression(builtin_invocation_id_id), ".y + ", to_expression(builtin_dispatch_base_id),
  13087. ".y;");
  13088. builtin_declaration = false;
  13089. });
  13090. break;
  13091. case BuiltInBaseInstance:
  13092. // This is direct-mapped normally.
  13093. if (!msl_options.vertex_for_tessellation)
  13094. break;
  13095. entry_func.fixup_hooks_in.push_back([=]() {
  13096. statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
  13097. to_expression(builtin_dispatch_base_id), ".y;");
  13098. });
  13099. break;
  13100. default:
  13101. break;
  13102. }
  13103. }
  13104. else if (var.storage == StorageClassOutput && get_execution_model() == ExecutionModelFragment &&
  13105. is_builtin_variable(var) && active_output_builtins.get(bi_type))
  13106. {
  13107. switch (bi_type)
  13108. {
  13109. case BuiltInSampleMask:
  13110. if (has_additional_fixed_sample_mask())
  13111. {
  13112. // If the additional fixed sample mask was set, we need to adjust the sample_mask
  13113. // output to reflect that. If the shader outputs the sample_mask itself too, we need
  13114. // to AND the two masks to get the final one.
  13115. string op_str = does_shader_write_sample_mask ? " &= " : " = ";
  13116. entry_func.fixup_hooks_out.push_back([=]() {
  13117. statement(to_expression(builtin_sample_mask_id), op_str, additional_fixed_sample_mask_str(), ";");
  13118. });
  13119. }
  13120. break;
  13121. case BuiltInFragDepth:
  13122. if (msl_options.input_attachment_is_ds_attachment && !writes_to_depth)
  13123. {
  13124. entry_func.fixup_hooks_out.push_back([=]() {
  13125. statement(to_expression(builtin_frag_depth_id), " = ", to_expression(builtin_frag_coord_id), ".z;");
  13126. });
  13127. }
  13128. break;
  13129. default:
  13130. break;
  13131. }
  13132. }
  13133. });
  13134. }
  13135. // Returns the Metal index of the resource of the specified type as used by the specified variable.
  13136. uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype, uint32_t plane)
  13137. {
  13138. auto &execution = get_entry_point();
  13139. auto &var_dec = ir.meta[var.self].decoration;
  13140. auto &var_type = get<SPIRType>(var.basetype);
  13141. uint32_t var_desc_set = (var.storage == StorageClassPushConstant) ? kPushConstDescSet : var_dec.set;
  13142. uint32_t var_binding = (var.storage == StorageClassPushConstant) ? kPushConstBinding : var_dec.binding;
  13143. // If a matching binding has been specified, find and use it.
  13144. auto itr = resource_bindings.find({ execution.model, var_desc_set, var_binding });
  13145. // Atomic helper buffers for image atomics need to use secondary bindings as well.
  13146. bool use_secondary_binding = (var_type.basetype == SPIRType::SampledImage && basetype == SPIRType::Sampler) ||
  13147. basetype == SPIRType::AtomicCounter;
  13148. auto resource_decoration =
  13149. use_secondary_binding ? SPIRVCrossDecorationResourceIndexSecondary : SPIRVCrossDecorationResourceIndexPrimary;
  13150. if (plane == 1)
  13151. resource_decoration = SPIRVCrossDecorationResourceIndexTertiary;
  13152. if (plane == 2)
  13153. resource_decoration = SPIRVCrossDecorationResourceIndexQuaternary;
  13154. if (itr != end(resource_bindings))
  13155. {
  13156. auto &remap = itr->second;
  13157. remap.second = true;
  13158. switch (basetype)
  13159. {
  13160. case SPIRType::Image:
  13161. set_extended_decoration(var.self, resource_decoration, remap.first.msl_texture + plane);
  13162. return remap.first.msl_texture + plane;
  13163. case SPIRType::Sampler:
  13164. set_extended_decoration(var.self, resource_decoration, remap.first.msl_sampler);
  13165. return remap.first.msl_sampler;
  13166. default:
  13167. set_extended_decoration(var.self, resource_decoration, remap.first.msl_buffer);
  13168. return remap.first.msl_buffer;
  13169. }
  13170. }
  13171. // If we have already allocated an index, keep using it.
  13172. if (has_extended_decoration(var.self, resource_decoration))
  13173. return get_extended_decoration(var.self, resource_decoration);
  13174. auto &type = get<SPIRType>(var.basetype);
  13175. if (type_is_msl_framebuffer_fetch(type))
  13176. {
  13177. // Frame-buffer fetch gets its fallback resource index from the input attachment index,
  13178. // which is then treated as color index.
  13179. return get_decoration(var.self, DecorationInputAttachmentIndex);
  13180. }
  13181. else if (msl_options.enable_decoration_binding)
  13182. {
  13183. // Allow user to enable decoration binding.
  13184. // If there is no explicit mapping of bindings to MSL, use the declared binding as a fallback.
  13185. if (has_decoration(var.self, DecorationBinding))
  13186. {
  13187. var_binding = get_decoration(var.self, DecorationBinding);
  13188. // Avoid emitting sentinel bindings.
  13189. if (var_binding < 0x80000000u)
  13190. return var_binding;
  13191. }
  13192. }
  13193. // If we did not explicitly remap, allocate bindings on demand.
  13194. // We cannot reliably use Binding decorations since SPIR-V and MSL's binding models are very different.
  13195. bool allocate_argument_buffer_ids = false;
  13196. if (var.storage != StorageClassPushConstant)
  13197. allocate_argument_buffer_ids = descriptor_set_is_argument_buffer(var_desc_set);
  13198. uint32_t binding_stride = 1;
  13199. for (uint32_t i = 0; i < uint32_t(type.array.size()); i++)
  13200. binding_stride *= to_array_size_literal(type, i);
  13201. // If a binding has not been specified, revert to incrementing resource indices.
  13202. uint32_t resource_index;
  13203. if (allocate_argument_buffer_ids)
  13204. {
  13205. // Allocate from a flat ID binding space.
  13206. resource_index = next_metal_resource_ids[var_desc_set];
  13207. next_metal_resource_ids[var_desc_set] += binding_stride;
  13208. }
  13209. else
  13210. {
  13211. if (is_var_runtime_size_array(var))
  13212. {
  13213. basetype = SPIRType::Struct;
  13214. binding_stride = 1;
  13215. }
  13216. // Allocate from plain bindings which are allocated per resource type.
  13217. switch (basetype)
  13218. {
  13219. case SPIRType::Image:
  13220. resource_index = next_metal_resource_index_texture;
  13221. next_metal_resource_index_texture += binding_stride;
  13222. break;
  13223. case SPIRType::Sampler:
  13224. resource_index = next_metal_resource_index_sampler;
  13225. next_metal_resource_index_sampler += binding_stride;
  13226. break;
  13227. default:
  13228. resource_index = next_metal_resource_index_buffer;
  13229. next_metal_resource_index_buffer += binding_stride;
  13230. break;
  13231. }
  13232. }
  13233. set_extended_decoration(var.self, resource_decoration, resource_index);
  13234. return resource_index;
  13235. }
  13236. bool CompilerMSL::type_is_msl_framebuffer_fetch(const SPIRType &type) const
  13237. {
  13238. return type.basetype == SPIRType::Image && type.image.dim == DimSubpassData &&
  13239. msl_options.use_framebuffer_fetch_subpasses;
  13240. }
  13241. const char *CompilerMSL::descriptor_address_space(uint32_t id, StorageClass storage, const char *plain_address_space) const
  13242. {
  13243. if (msl_options.argument_buffers)
  13244. {
  13245. bool storage_class_is_descriptor = storage == StorageClassUniform ||
  13246. storage == StorageClassStorageBuffer ||
  13247. storage == StorageClassUniformConstant;
  13248. uint32_t desc_set = get_decoration(id, DecorationDescriptorSet);
  13249. if (storage_class_is_descriptor && descriptor_set_is_argument_buffer(desc_set))
  13250. {
  13251. // An awkward case where we need to emit *more* address space declarations (yay!).
  13252. // An example is where we pass down an array of buffer pointers to leaf functions.
  13253. // It's a constant array containing pointers to constants.
  13254. // The pointer array is always constant however. E.g.
  13255. // device SSBO * constant (&array)[N].
  13256. // const device SSBO * constant (&array)[N].
  13257. // constant SSBO * constant (&array)[N].
  13258. // However, this only matters for argument buffers, since for MSL 1.0 style codegen,
  13259. // we emit the buffer array on stack instead, and that seems to work just fine apparently.
  13260. // If the argument was marked as being in device address space, any pointer to member would
  13261. // be const device, not constant.
  13262. if (argument_buffer_device_storage_mask & (1u << desc_set))
  13263. return "const device";
  13264. else
  13265. return "constant";
  13266. }
  13267. }
  13268. return plain_address_space;
  13269. }
  13270. string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
  13271. {
  13272. auto &var = get<SPIRVariable>(arg.id);
  13273. auto &type = get_variable_data_type(var);
  13274. auto &var_type = get<SPIRType>(arg.type);
  13275. StorageClass type_storage = var_type.storage;
  13276. // If we need to modify the name of the variable, make sure we use the original variable.
  13277. // Our alias is just a shadow variable.
  13278. uint32_t name_id = var.self;
  13279. if (arg.alias_global_variable && var.basevariable)
  13280. name_id = var.basevariable;
  13281. bool constref = !arg.alias_global_variable && is_pointer(var_type) && arg.write_count == 0;
  13282. // Framebuffer fetch is plain value, const looks out of place, but it is not wrong.
  13283. if (type_is_msl_framebuffer_fetch(type))
  13284. constref = false;
  13285. else if (type_storage == StorageClassUniformConstant)
  13286. constref = true;
  13287. bool type_is_image = type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage ||
  13288. type.basetype == SPIRType::Sampler;
  13289. bool type_is_tlas = type.basetype == SPIRType::AccelerationStructure;
  13290. // For opaque types we handle const later due to descriptor address spaces.
  13291. const char *cv_qualifier = (constref && !type_is_image) ? "const " : "";
  13292. string decl;
  13293. // If this is a combined image-sampler for a 2D image with floating-point type,
  13294. // we emitted the 'spvDynamicImageSampler' type, and this is *not* an alias parameter
  13295. // for a global, then we need to emit a "dynamic" combined image-sampler.
  13296. // Unfortunately, this is necessary to properly support passing around
  13297. // combined image-samplers with Y'CbCr conversions on them.
  13298. bool is_dynamic_img_sampler = !arg.alias_global_variable && type.basetype == SPIRType::SampledImage &&
  13299. type.image.dim == Dim2D && type_is_floating_point(get<SPIRType>(type.image.type)) &&
  13300. spv_function_implementations.count(SPVFuncImplDynamicImageSampler);
  13301. // Allow Metal to use the array<T> template to make arrays a value type
  13302. string address_space = get_argument_address_space(var);
  13303. bool builtin = has_decoration(var.self, DecorationBuiltIn);
  13304. auto builtin_type = BuiltIn(get_decoration(arg.id, DecorationBuiltIn));
  13305. if (var.basevariable && (var.basevariable == stage_in_ptr_var_id || var.basevariable == stage_out_ptr_var_id))
  13306. decl = join(cv_qualifier, type_to_glsl(type, arg.id));
  13307. else if (builtin)
  13308. {
  13309. // Only use templated array for Clip/Cull distance when feasible.
  13310. // In other scenarios, we need need to override array length for tess levels (if used as outputs),
  13311. // or we need to emit the expected type for builtins (uint vs int).
  13312. auto storage = get<SPIRType>(var.basetype).storage;
  13313. if (storage == StorageClassInput &&
  13314. (builtin_type == BuiltInTessLevelInner || builtin_type == BuiltInTessLevelOuter))
  13315. {
  13316. is_using_builtin_array = false;
  13317. }
  13318. else if (builtin_type != BuiltInClipDistance && builtin_type != BuiltInCullDistance)
  13319. {
  13320. is_using_builtin_array = true;
  13321. }
  13322. if (storage == StorageClassOutput && variable_storage_requires_stage_io(storage) &&
  13323. !is_stage_output_builtin_masked(builtin_type))
  13324. is_using_builtin_array = true;
  13325. if (is_using_builtin_array)
  13326. decl = join(cv_qualifier, builtin_type_decl(builtin_type, arg.id));
  13327. else
  13328. decl = join(cv_qualifier, type_to_glsl(type, arg.id));
  13329. }
  13330. else if (is_var_runtime_size_array(var))
  13331. {
  13332. const auto *parent_type = &get<SPIRType>(type.parent_type);
  13333. auto type_name = type_to_glsl(*parent_type, arg.id);
  13334. if (type.basetype == SPIRType::AccelerationStructure)
  13335. decl = join("spvDescriptorArray<", type_name, ">");
  13336. else if (type_is_image)
  13337. decl = join("spvDescriptorArray<", cv_qualifier, type_name, ">");
  13338. else
  13339. decl = join("spvDescriptorArray<", address_space, " ", type_name, "*>");
  13340. address_space = "const";
  13341. }
  13342. else if ((type_storage == StorageClassUniform || type_storage == StorageClassStorageBuffer) && is_array(type))
  13343. {
  13344. is_using_builtin_array = true;
  13345. decl += join(cv_qualifier, type_to_glsl(type, arg.id), "*");
  13346. }
  13347. else if (is_dynamic_img_sampler)
  13348. {
  13349. decl = join(cv_qualifier, "spvDynamicImageSampler<", type_to_glsl(get<SPIRType>(type.image.type)), ">");
  13350. // Mark the variable so that we can handle passing it to another function.
  13351. set_extended_decoration(arg.id, SPIRVCrossDecorationDynamicImageSampler);
  13352. }
  13353. else
  13354. {
  13355. // The type is a pointer type we need to emit cv_qualifier late.
  13356. if (is_pointer(type))
  13357. {
  13358. decl = type_to_glsl(type, arg.id);
  13359. if (*cv_qualifier != '\0')
  13360. decl += join(" ", cv_qualifier);
  13361. }
  13362. else
  13363. {
  13364. decl = join(cv_qualifier, type_to_glsl(type, arg.id));
  13365. }
  13366. }
  13367. if (!builtin && !is_pointer(var_type) &&
  13368. (type_storage == StorageClassFunction || type_storage == StorageClassGeneric))
  13369. {
  13370. // If the argument is a pure value and not an opaque type, we will pass by value.
  13371. if (msl_options.force_native_arrays && is_array(type))
  13372. {
  13373. // We are receiving an array by value. This is problematic.
  13374. // We cannot be sure of the target address space since we are supposed to receive a copy,
  13375. // but this is not possible with MSL without some extra work.
  13376. // We will have to assume we're getting a reference in thread address space.
  13377. // If we happen to get a reference in constant address space, the caller must emit a copy and pass that.
  13378. // Thread const therefore becomes the only logical choice, since we cannot "create" a constant array from
  13379. // non-constant arrays, but we can create thread const from constant.
  13380. decl = string("thread const ") + decl;
  13381. decl += " (&";
  13382. const char *restrict_kw = to_restrict(name_id, true);
  13383. if (*restrict_kw)
  13384. {
  13385. decl += " ";
  13386. decl += restrict_kw;
  13387. }
  13388. decl += to_expression(name_id);
  13389. decl += ")";
  13390. decl += type_to_array_glsl(type, name_id);
  13391. }
  13392. else
  13393. {
  13394. if (!address_space.empty())
  13395. decl = join(address_space, " ", decl);
  13396. decl += " ";
  13397. decl += to_expression(name_id);
  13398. }
  13399. }
  13400. else if (is_array(type) && !type_is_image)
  13401. {
  13402. // Arrays of opaque types are special cased.
  13403. if (!address_space.empty())
  13404. decl = join(address_space, " ", decl);
  13405. // spvDescriptorArray absorbs the address space inside the template.
  13406. if (!is_var_runtime_size_array(var))
  13407. {
  13408. const char *argument_buffer_space = descriptor_address_space(name_id, type_storage, nullptr);
  13409. if (argument_buffer_space)
  13410. {
  13411. decl += " ";
  13412. decl += argument_buffer_space;
  13413. }
  13414. }
  13415. // Special case, need to override the array size here if we're using tess level as an argument.
  13416. if (is_tesc_shader() && builtin &&
  13417. (builtin_type == BuiltInTessLevelInner || builtin_type == BuiltInTessLevelOuter))
  13418. {
  13419. uint32_t array_size = get_physical_tess_level_array_size(builtin_type);
  13420. if (array_size == 1)
  13421. {
  13422. decl += " &";
  13423. decl += to_expression(name_id);
  13424. }
  13425. else
  13426. {
  13427. decl += " (&";
  13428. decl += to_expression(name_id);
  13429. decl += ")";
  13430. decl += join("[", array_size, "]");
  13431. }
  13432. }
  13433. else if (is_var_runtime_size_array(var))
  13434. {
  13435. decl += " " + to_expression(name_id);
  13436. }
  13437. else
  13438. {
  13439. auto array_size_decl = type_to_array_glsl(type, name_id);
  13440. if (array_size_decl.empty())
  13441. decl += "& ";
  13442. else
  13443. decl += " (&";
  13444. const char *restrict_kw = to_restrict(name_id, true);
  13445. if (*restrict_kw)
  13446. {
  13447. decl += " ";
  13448. decl += restrict_kw;
  13449. }
  13450. decl += to_expression(name_id);
  13451. if (!array_size_decl.empty())
  13452. {
  13453. decl += ")";
  13454. decl += array_size_decl;
  13455. }
  13456. }
  13457. }
  13458. else if (!type_is_image && !type_is_tlas &&
  13459. (!pull_model_inputs.count(var.basevariable) || type.basetype == SPIRType::Struct))
  13460. {
  13461. // If this is going to be a reference to a variable pointer, the address space
  13462. // for the reference has to go before the '&', but after the '*'.
  13463. if (!address_space.empty())
  13464. {
  13465. if (is_pointer(type))
  13466. {
  13467. if (*cv_qualifier == '\0')
  13468. decl += ' ';
  13469. decl += join(address_space, " ");
  13470. }
  13471. else
  13472. decl = join(address_space, " ", decl);
  13473. }
  13474. decl += "&";
  13475. decl += " ";
  13476. decl += to_restrict(name_id, true);
  13477. decl += to_expression(name_id);
  13478. }
  13479. else if (type_is_image || type_is_tlas)
  13480. {
  13481. if (is_var_runtime_size_array(var))
  13482. {
  13483. decl = address_space + " " + decl + " " + to_expression(name_id);
  13484. }
  13485. else if (type.array.empty())
  13486. {
  13487. // For non-arrayed types we can just pass opaque descriptors by value.
  13488. // This fixes problems if descriptors are passed by value from argument buffers and plain descriptors
  13489. // in same shader.
  13490. // There is no address space we can actually use, but value will work.
  13491. // This will break if applications attempt to pass down descriptor arrays as arguments, but
  13492. // fortunately that is extremely unlikely ...
  13493. decl += " ";
  13494. decl += to_expression(name_id);
  13495. }
  13496. else
  13497. {
  13498. const char *img_address_space = descriptor_address_space(name_id, type_storage, "thread const");
  13499. decl = join(img_address_space, " ", decl);
  13500. decl += "& ";
  13501. decl += to_expression(name_id);
  13502. }
  13503. }
  13504. else
  13505. {
  13506. if (!address_space.empty())
  13507. decl = join(address_space, " ", decl);
  13508. decl += " ";
  13509. decl += to_expression(name_id);
  13510. }
  13511. // Emulate texture2D atomic operations
  13512. auto *backing_var = maybe_get_backing_variable(name_id);
  13513. if (backing_var && atomic_image_vars_emulated.count(backing_var->self))
  13514. {
  13515. auto &flags = ir.get_decoration_bitset(backing_var->self);
  13516. const char *cv_flags = decoration_flags_signal_volatile(flags) ? "volatile " : "";
  13517. decl += join(", ", cv_flags, "device atomic_", type_to_glsl(get<SPIRType>(var_type.image.type), 0));
  13518. decl += "* " + to_expression(name_id) + "_atomic";
  13519. }
  13520. is_using_builtin_array = false;
  13521. return decl;
  13522. }
  13523. // If we're currently in the entry point function, and the object
  13524. // has a qualified name, use it, otherwise use the standard name.
  13525. string CompilerMSL::to_name(uint32_t id, bool allow_alias) const
  13526. {
  13527. if (current_function && (current_function->self == ir.default_entry_point))
  13528. {
  13529. auto *m = ir.find_meta(id);
  13530. if (m && !m->decoration.qualified_alias_explicit_override && !m->decoration.qualified_alias.empty())
  13531. return m->decoration.qualified_alias;
  13532. }
  13533. return Compiler::to_name(id, allow_alias);
  13534. }
  13535. // Appends the name of the member to the variable qualifier string, except for Builtins.
  13536. string CompilerMSL::append_member_name(const string &qualifier, const SPIRType &type, uint32_t index)
  13537. {
  13538. // Don't qualify Builtin names because they are unique and are treated as such when building expressions
  13539. BuiltIn builtin = BuiltInMax;
  13540. if (is_member_builtin(type, index, &builtin))
  13541. return builtin_to_glsl(builtin, type.storage);
  13542. // Strip any underscore prefix from member name
  13543. string mbr_name = to_member_name(type, index);
  13544. size_t startPos = mbr_name.find_first_not_of("_");
  13545. mbr_name = (startPos != string::npos) ? mbr_name.substr(startPos) : "";
  13546. return join(qualifier, "_", mbr_name);
  13547. }
  13548. // Ensures that the specified name is permanently usable by prepending a prefix
  13549. // if the first chars are _ and a digit, which indicate a transient name.
  13550. string CompilerMSL::ensure_valid_name(string name, string pfx)
  13551. {
  13552. return (name.size() >= 2 && name[0] == '_' && isdigit(name[1])) ? (pfx + name) : name;
  13553. }
  13554. const std::unordered_set<std::string> &CompilerMSL::get_reserved_keyword_set()
  13555. {
  13556. static const unordered_set<string> keywords = {
  13557. "kernel",
  13558. "vertex",
  13559. "fragment",
  13560. "compute",
  13561. "constant",
  13562. "device",
  13563. "bias",
  13564. "level",
  13565. "gradient2d",
  13566. "gradientcube",
  13567. "gradient3d",
  13568. "min_lod_clamp",
  13569. "assert",
  13570. "VARIABLE_TRACEPOINT",
  13571. "STATIC_DATA_TRACEPOINT",
  13572. "STATIC_DATA_TRACEPOINT_V",
  13573. "METAL_ALIGN",
  13574. "METAL_ASM",
  13575. "METAL_CONST",
  13576. "METAL_DEPRECATED",
  13577. "METAL_ENABLE_IF",
  13578. "METAL_FUNC",
  13579. "METAL_INTERNAL",
  13580. "METAL_NON_NULL_RETURN",
  13581. "METAL_NORETURN",
  13582. "METAL_NOTHROW",
  13583. "METAL_PURE",
  13584. "METAL_UNAVAILABLE",
  13585. "METAL_IMPLICIT",
  13586. "METAL_EXPLICIT",
  13587. "METAL_CONST_ARG",
  13588. "METAL_ARG_UNIFORM",
  13589. "METAL_ZERO_ARG",
  13590. "METAL_VALID_LOD_ARG",
  13591. "METAL_VALID_LEVEL_ARG",
  13592. "METAL_VALID_STORE_ORDER",
  13593. "METAL_VALID_LOAD_ORDER",
  13594. "METAL_VALID_COMPARE_EXCHANGE_FAILURE_ORDER",
  13595. "METAL_COMPATIBLE_COMPARE_EXCHANGE_ORDERS",
  13596. "METAL_VALID_RENDER_TARGET",
  13597. "is_function_constant_defined",
  13598. "CHAR_BIT",
  13599. "SCHAR_MAX",
  13600. "SCHAR_MIN",
  13601. "UCHAR_MAX",
  13602. "CHAR_MAX",
  13603. "CHAR_MIN",
  13604. "USHRT_MAX",
  13605. "SHRT_MAX",
  13606. "SHRT_MIN",
  13607. "UINT_MAX",
  13608. "INT_MAX",
  13609. "INT_MIN",
  13610. "FLT_DIG",
  13611. "FLT_MANT_DIG",
  13612. "FLT_MAX_10_EXP",
  13613. "FLT_MAX_EXP",
  13614. "FLT_MIN_10_EXP",
  13615. "FLT_MIN_EXP",
  13616. "FLT_RADIX",
  13617. "FLT_MAX",
  13618. "FLT_MIN",
  13619. "FLT_EPSILON",
  13620. "FP_ILOGB0",
  13621. "FP_ILOGBNAN",
  13622. "MAXFLOAT",
  13623. "HUGE_VALF",
  13624. "INFINITY",
  13625. "NAN",
  13626. "M_E_F",
  13627. "M_LOG2E_F",
  13628. "M_LOG10E_F",
  13629. "M_LN2_F",
  13630. "M_LN10_F",
  13631. "M_PI_F",
  13632. "M_PI_2_F",
  13633. "M_PI_4_F",
  13634. "M_1_PI_F",
  13635. "M_2_PI_F",
  13636. "M_2_SQRTPI_F",
  13637. "M_SQRT2_F",
  13638. "M_SQRT1_2_F",
  13639. "HALF_DIG",
  13640. "HALF_MANT_DIG",
  13641. "HALF_MAX_10_EXP",
  13642. "HALF_MAX_EXP",
  13643. "HALF_MIN_10_EXP",
  13644. "HALF_MIN_EXP",
  13645. "HALF_RADIX",
  13646. "HALF_MAX",
  13647. "HALF_MIN",
  13648. "HALF_EPSILON",
  13649. "MAXHALF",
  13650. "HUGE_VALH",
  13651. "M_E_H",
  13652. "M_LOG2E_H",
  13653. "M_LOG10E_H",
  13654. "M_LN2_H",
  13655. "M_LN10_H",
  13656. "M_PI_H",
  13657. "M_PI_2_H",
  13658. "M_PI_4_H",
  13659. "M_1_PI_H",
  13660. "M_2_PI_H",
  13661. "M_2_SQRTPI_H",
  13662. "M_SQRT2_H",
  13663. "M_SQRT1_2_H",
  13664. "DBL_DIG",
  13665. "DBL_MANT_DIG",
  13666. "DBL_MAX_10_EXP",
  13667. "DBL_MAX_EXP",
  13668. "DBL_MIN_10_EXP",
  13669. "DBL_MIN_EXP",
  13670. "DBL_RADIX",
  13671. "DBL_MAX",
  13672. "DBL_MIN",
  13673. "DBL_EPSILON",
  13674. "HUGE_VAL",
  13675. "M_E",
  13676. "M_LOG2E",
  13677. "M_LOG10E",
  13678. "M_LN2",
  13679. "M_LN10",
  13680. "M_PI",
  13681. "M_PI_2",
  13682. "M_PI_4",
  13683. "M_1_PI",
  13684. "M_2_PI",
  13685. "M_2_SQRTPI",
  13686. "M_SQRT2",
  13687. "M_SQRT1_2",
  13688. "quad_broadcast",
  13689. "thread",
  13690. "threadgroup",
  13691. };
  13692. return keywords;
  13693. }
  13694. const std::unordered_set<std::string> &CompilerMSL::get_illegal_func_names()
  13695. {
  13696. static const unordered_set<string> illegal_func_names = {
  13697. "main",
  13698. "saturate",
  13699. "assert",
  13700. "fmin3",
  13701. "fmax3",
  13702. "divide",
  13703. "median3",
  13704. "VARIABLE_TRACEPOINT",
  13705. "STATIC_DATA_TRACEPOINT",
  13706. "STATIC_DATA_TRACEPOINT_V",
  13707. "METAL_ALIGN",
  13708. "METAL_ASM",
  13709. "METAL_CONST",
  13710. "METAL_DEPRECATED",
  13711. "METAL_ENABLE_IF",
  13712. "METAL_FUNC",
  13713. "METAL_INTERNAL",
  13714. "METAL_NON_NULL_RETURN",
  13715. "METAL_NORETURN",
  13716. "METAL_NOTHROW",
  13717. "METAL_PURE",
  13718. "METAL_UNAVAILABLE",
  13719. "METAL_IMPLICIT",
  13720. "METAL_EXPLICIT",
  13721. "METAL_CONST_ARG",
  13722. "METAL_ARG_UNIFORM",
  13723. "METAL_ZERO_ARG",
  13724. "METAL_VALID_LOD_ARG",
  13725. "METAL_VALID_LEVEL_ARG",
  13726. "METAL_VALID_STORE_ORDER",
  13727. "METAL_VALID_LOAD_ORDER",
  13728. "METAL_VALID_COMPARE_EXCHANGE_FAILURE_ORDER",
  13729. "METAL_COMPATIBLE_COMPARE_EXCHANGE_ORDERS",
  13730. "METAL_VALID_RENDER_TARGET",
  13731. "is_function_constant_defined",
  13732. "CHAR_BIT",
  13733. "SCHAR_MAX",
  13734. "SCHAR_MIN",
  13735. "UCHAR_MAX",
  13736. "CHAR_MAX",
  13737. "CHAR_MIN",
  13738. "USHRT_MAX",
  13739. "SHRT_MAX",
  13740. "SHRT_MIN",
  13741. "UINT_MAX",
  13742. "INT_MAX",
  13743. "INT_MIN",
  13744. "FLT_DIG",
  13745. "FLT_MANT_DIG",
  13746. "FLT_MAX_10_EXP",
  13747. "FLT_MAX_EXP",
  13748. "FLT_MIN_10_EXP",
  13749. "FLT_MIN_EXP",
  13750. "FLT_RADIX",
  13751. "FLT_MAX",
  13752. "FLT_MIN",
  13753. "FLT_EPSILON",
  13754. "FP_ILOGB0",
  13755. "FP_ILOGBNAN",
  13756. "MAXFLOAT",
  13757. "HUGE_VALF",
  13758. "INFINITY",
  13759. "NAN",
  13760. "M_E_F",
  13761. "M_LOG2E_F",
  13762. "M_LOG10E_F",
  13763. "M_LN2_F",
  13764. "M_LN10_F",
  13765. "M_PI_F",
  13766. "M_PI_2_F",
  13767. "M_PI_4_F",
  13768. "M_1_PI_F",
  13769. "M_2_PI_F",
  13770. "M_2_SQRTPI_F",
  13771. "M_SQRT2_F",
  13772. "M_SQRT1_2_F",
  13773. "HALF_DIG",
  13774. "HALF_MANT_DIG",
  13775. "HALF_MAX_10_EXP",
  13776. "HALF_MAX_EXP",
  13777. "HALF_MIN_10_EXP",
  13778. "HALF_MIN_EXP",
  13779. "HALF_RADIX",
  13780. "HALF_MAX",
  13781. "HALF_MIN",
  13782. "HALF_EPSILON",
  13783. "MAXHALF",
  13784. "HUGE_VALH",
  13785. "M_E_H",
  13786. "M_LOG2E_H",
  13787. "M_LOG10E_H",
  13788. "M_LN2_H",
  13789. "M_LN10_H",
  13790. "M_PI_H",
  13791. "M_PI_2_H",
  13792. "M_PI_4_H",
  13793. "M_1_PI_H",
  13794. "M_2_PI_H",
  13795. "M_2_SQRTPI_H",
  13796. "M_SQRT2_H",
  13797. "M_SQRT1_2_H",
  13798. "DBL_DIG",
  13799. "DBL_MANT_DIG",
  13800. "DBL_MAX_10_EXP",
  13801. "DBL_MAX_EXP",
  13802. "DBL_MIN_10_EXP",
  13803. "DBL_MIN_EXP",
  13804. "DBL_RADIX",
  13805. "DBL_MAX",
  13806. "DBL_MIN",
  13807. "DBL_EPSILON",
  13808. "HUGE_VAL",
  13809. "M_E",
  13810. "M_LOG2E",
  13811. "M_LOG10E",
  13812. "M_LN2",
  13813. "M_LN10",
  13814. "M_PI",
  13815. "M_PI_2",
  13816. "M_PI_4",
  13817. "M_1_PI",
  13818. "M_2_PI",
  13819. "M_2_SQRTPI",
  13820. "M_SQRT2",
  13821. "M_SQRT1_2",
  13822. };
  13823. return illegal_func_names;
  13824. }
  13825. // Replace all names that match MSL keywords or Metal Standard Library functions.
  13826. void CompilerMSL::replace_illegal_names()
  13827. {
  13828. // FIXME: MSL and GLSL are doing two different things here.
  13829. // Agree on convention and remove this override.
  13830. auto &keywords = get_reserved_keyword_set();
  13831. auto &illegal_func_names = get_illegal_func_names();
  13832. ir.for_each_typed_id<SPIRVariable>([&](uint32_t self, SPIRVariable &) {
  13833. auto *meta = ir.find_meta(self);
  13834. if (!meta)
  13835. return;
  13836. auto &dec = meta->decoration;
  13837. if (keywords.find(dec.alias) != end(keywords))
  13838. dec.alias += "0";
  13839. });
  13840. ir.for_each_typed_id<SPIRFunction>([&](uint32_t self, SPIRFunction &) {
  13841. auto *meta = ir.find_meta(self);
  13842. if (!meta)
  13843. return;
  13844. auto &dec = meta->decoration;
  13845. if (illegal_func_names.find(dec.alias) != end(illegal_func_names))
  13846. dec.alias += "0";
  13847. });
  13848. ir.for_each_typed_id<SPIRType>([&](uint32_t self, SPIRType &) {
  13849. auto *meta = ir.find_meta(self);
  13850. if (!meta)
  13851. return;
  13852. for (auto &mbr_dec : meta->members)
  13853. if (keywords.find(mbr_dec.alias) != end(keywords))
  13854. mbr_dec.alias += "0";
  13855. });
  13856. CompilerGLSL::replace_illegal_names();
  13857. }
  13858. void CompilerMSL::replace_illegal_entry_point_names()
  13859. {
  13860. auto &illegal_func_names = get_illegal_func_names();
  13861. // It is important to this before we fixup identifiers,
  13862. // since if ep_name is reserved, we will need to fix that up,
  13863. // and then copy alias back into entry.name after the fixup.
  13864. for (auto &entry : ir.entry_points)
  13865. {
  13866. // Change both the entry point name and the alias, to keep them synced.
  13867. string &ep_name = entry.second.name;
  13868. if (illegal_func_names.find(ep_name) != end(illegal_func_names))
  13869. ep_name += "0";
  13870. ir.meta[entry.first].decoration.alias = ep_name;
  13871. }
  13872. }
  13873. void CompilerMSL::sync_entry_point_aliases_and_names()
  13874. {
  13875. for (auto &entry : ir.entry_points)
  13876. entry.second.name = ir.meta[entry.first].decoration.alias;
  13877. }
  13878. string CompilerMSL::to_member_reference(uint32_t base, const SPIRType &type, uint32_t index, bool ptr_chain_is_resolved)
  13879. {
  13880. auto *var = maybe_get_backing_variable(base);
  13881. // If this is a buffer array, we have to dereference the buffer pointers.
  13882. // Otherwise, if this is a pointer expression, dereference it.
  13883. bool declared_as_pointer = false;
  13884. if (var)
  13885. {
  13886. // Only allow -> dereference for block types. This is so we get expressions like
  13887. // buffer[i]->first_member.second_member, rather than buffer[i]->first->second.
  13888. const bool is_block =
  13889. has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock);
  13890. bool is_buffer_variable =
  13891. is_block && (var->storage == StorageClassUniform || var->storage == StorageClassStorageBuffer);
  13892. declared_as_pointer = is_buffer_variable && is_array(get_pointee_type(var->basetype));
  13893. }
  13894. if (declared_as_pointer || (!ptr_chain_is_resolved && should_dereference(base)))
  13895. return join("->", to_member_name(type, index));
  13896. else
  13897. return join(".", to_member_name(type, index));
  13898. }
  13899. string CompilerMSL::to_qualifiers_glsl(uint32_t id)
  13900. {
  13901. string quals;
  13902. auto *var = maybe_get<SPIRVariable>(id);
  13903. auto &type = expression_type(id);
  13904. if (type.storage == StorageClassWorkgroup || (var && variable_decl_is_remapped_storage(*var, StorageClassWorkgroup)))
  13905. quals += "threadgroup ";
  13906. return quals;
  13907. }
  13908. // The optional id parameter indicates the object whose type we are trying
  13909. // to find the description for. It is optional. Most type descriptions do not
  13910. // depend on a specific object's use of that type.
  13911. string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id, bool member)
  13912. {
  13913. string type_name;
  13914. // Pointer?
  13915. if (is_pointer(type) || type_is_array_of_pointers(type))
  13916. {
  13917. assert(type.pointer_depth > 0);
  13918. const char *restrict_kw;
  13919. auto type_address_space = get_type_address_space(type, id);
  13920. const auto *p_parent_type = &get<SPIRType>(type.parent_type);
  13921. // If we're wrapping buffer descriptors in a spvDescriptorArray, we'll have to handle it as a special case.
  13922. if (member && id)
  13923. {
  13924. auto &var = get<SPIRVariable>(id);
  13925. if (is_var_runtime_size_array(var) && is_runtime_size_array(*p_parent_type))
  13926. {
  13927. const bool ssbo = has_decoration(p_parent_type->self, DecorationBufferBlock);
  13928. bool buffer_desc =
  13929. (var.storage == StorageClassStorageBuffer || ssbo) &&
  13930. msl_options.runtime_array_rich_descriptor;
  13931. const char *wrapper_type = buffer_desc ? "spvBufferDescriptor" : "spvDescriptor";
  13932. add_spv_func_and_recompile(SPVFuncImplVariableDescriptorArray);
  13933. add_spv_func_and_recompile(buffer_desc ? SPVFuncImplVariableSizedDescriptor : SPVFuncImplVariableDescriptor);
  13934. type_name = join(wrapper_type, "<", type_address_space, " ", type_to_glsl(*p_parent_type, id), " *>");
  13935. return type_name;
  13936. }
  13937. }
  13938. // Work around C pointer qualifier rules. If glsl_type is a pointer type as well
  13939. // we'll need to emit the address space to the right.
  13940. // We could always go this route, but it makes the code unnatural.
  13941. // Prefer emitting thread T *foo over T thread* foo since it's more readable,
  13942. // but we'll have to emit thread T * thread * T constant bar; for example.
  13943. if (is_pointer(type) && is_pointer(*p_parent_type))
  13944. type_name = join(type_to_glsl(*p_parent_type, id), " ", type_address_space, " ");
  13945. else
  13946. {
  13947. // Since this is not a pointer-to-pointer, ensure we've dug down to the base type.
  13948. // Some situations chain pointers even though they are not formally pointers-of-pointers.
  13949. while (is_pointer(*p_parent_type))
  13950. p_parent_type = &get<SPIRType>(p_parent_type->parent_type);
  13951. // If we're emitting BDA, just use the templated type.
  13952. // Emitting builtin arrays need a lot of cooperation with other code to ensure
  13953. // the C-style nesting works right.
  13954. // FIXME: This is somewhat of a hack.
  13955. bool old_is_using_builtin_array = is_using_builtin_array;
  13956. if (is_physical_pointer(type))
  13957. is_using_builtin_array = false;
  13958. type_name = join(type_address_space, " ", type_to_glsl(*p_parent_type, id));
  13959. is_using_builtin_array = old_is_using_builtin_array;
  13960. }
  13961. switch (type.basetype)
  13962. {
  13963. case SPIRType::Image:
  13964. case SPIRType::SampledImage:
  13965. case SPIRType::Sampler:
  13966. // These are handles.
  13967. break;
  13968. default:
  13969. // Anything else can be a raw pointer.
  13970. type_name += "*";
  13971. restrict_kw = to_restrict(id, false);
  13972. if (*restrict_kw)
  13973. {
  13974. type_name += " ";
  13975. type_name += restrict_kw;
  13976. }
  13977. break;
  13978. }
  13979. return type_name;
  13980. }
  13981. switch (type.basetype)
  13982. {
  13983. case SPIRType::Struct:
  13984. // Need OpName lookup here to get a "sensible" name for a struct.
  13985. // Allow Metal to use the array<T> template to make arrays a value type
  13986. type_name = to_name(type.self);
  13987. break;
  13988. case SPIRType::Image:
  13989. case SPIRType::SampledImage:
  13990. return image_type_glsl(type, id, member);
  13991. case SPIRType::Sampler:
  13992. return sampler_type(type, id, member);
  13993. case SPIRType::Void:
  13994. return "void";
  13995. case SPIRType::AtomicCounter:
  13996. return "atomic_uint";
  13997. case SPIRType::ControlPointArray:
  13998. return join("patch_control_point<", type_to_glsl(get<SPIRType>(type.parent_type), id), ">");
  13999. case SPIRType::Interpolant:
  14000. return join("interpolant<", type_to_glsl(get<SPIRType>(type.parent_type), id), ", interpolation::",
  14001. has_decoration(type.self, DecorationNoPerspective) ? "no_perspective" : "perspective", ">");
  14002. // Scalars
  14003. case SPIRType::Boolean:
  14004. {
  14005. auto *var = maybe_get_backing_variable(id);
  14006. if (var && var->basevariable)
  14007. var = &get<SPIRVariable>(var->basevariable);
  14008. // Need to special-case threadgroup booleans. They are supposed to be logical
  14009. // storage, but MSL compilers will sometimes crash if you use threadgroup bool.
  14010. // Workaround this by using 16-bit types instead and fixup on load-store to this data.
  14011. if ((var && var->storage == StorageClassWorkgroup) || type.storage == StorageClassWorkgroup || member)
  14012. type_name = "short";
  14013. else
  14014. type_name = "bool";
  14015. break;
  14016. }
  14017. case SPIRType::Char:
  14018. case SPIRType::SByte:
  14019. type_name = "char";
  14020. break;
  14021. case SPIRType::UByte:
  14022. type_name = "uchar";
  14023. break;
  14024. case SPIRType::Short:
  14025. type_name = "short";
  14026. break;
  14027. case SPIRType::UShort:
  14028. type_name = "ushort";
  14029. break;
  14030. case SPIRType::Int:
  14031. type_name = "int";
  14032. break;
  14033. case SPIRType::UInt:
  14034. type_name = "uint";
  14035. break;
  14036. case SPIRType::Int64:
  14037. if (!msl_options.supports_msl_version(2, 2))
  14038. SPIRV_CROSS_THROW("64-bit integers are only supported in MSL 2.2 and above.");
  14039. type_name = "long";
  14040. break;
  14041. case SPIRType::UInt64:
  14042. if (!msl_options.supports_msl_version(2, 2))
  14043. SPIRV_CROSS_THROW("64-bit integers are only supported in MSL 2.2 and above.");
  14044. type_name = "ulong";
  14045. break;
  14046. case SPIRType::Half:
  14047. type_name = "half";
  14048. break;
  14049. case SPIRType::Float:
  14050. type_name = "float";
  14051. break;
  14052. case SPIRType::Double:
  14053. type_name = "double"; // Currently unsupported
  14054. break;
  14055. case SPIRType::AccelerationStructure:
  14056. if (msl_options.supports_msl_version(2, 4))
  14057. type_name = "raytracing::acceleration_structure<raytracing::instancing>";
  14058. else if (msl_options.supports_msl_version(2, 3))
  14059. type_name = "raytracing::instance_acceleration_structure";
  14060. else
  14061. SPIRV_CROSS_THROW("Acceleration Structure Type is supported in MSL 2.3 and above.");
  14062. break;
  14063. case SPIRType::RayQuery:
  14064. return "raytracing::intersection_query<raytracing::instancing, raytracing::triangle_data>";
  14065. default:
  14066. return "unknown_type";
  14067. }
  14068. // Matrix?
  14069. if (type.columns > 1)
  14070. {
  14071. auto *var = maybe_get_backing_variable(id);
  14072. if (var && var->basevariable)
  14073. var = &get<SPIRVariable>(var->basevariable);
  14074. // Need to special-case threadgroup matrices. Due to an oversight, Metal's
  14075. // matrix struct prior to Metal 3 lacks constructors in the threadgroup AS,
  14076. // preventing us from default-constructing or initializing matrices in threadgroup storage.
  14077. // Work around this by using our own type as storage.
  14078. if (((var && var->storage == StorageClassWorkgroup) || type.storage == StorageClassWorkgroup) &&
  14079. !msl_options.supports_msl_version(3, 0))
  14080. {
  14081. add_spv_func_and_recompile(SPVFuncImplStorageMatrix);
  14082. type_name = "spvStorage_" + type_name;
  14083. }
  14084. type_name += to_string(type.columns) + "x";
  14085. }
  14086. // Vector or Matrix?
  14087. if (type.vecsize > 1)
  14088. type_name += to_string(type.vecsize);
  14089. if (type.array.empty() || using_builtin_array())
  14090. {
  14091. return type_name;
  14092. }
  14093. else
  14094. {
  14095. // Allow Metal to use the array<T> template to make arrays a value type
  14096. add_spv_func_and_recompile(SPVFuncImplUnsafeArray);
  14097. string res;
  14098. string sizes;
  14099. for (uint32_t i = 0; i < uint32_t(type.array.size()); i++)
  14100. {
  14101. res += "spvUnsafeArray<";
  14102. sizes += ", ";
  14103. sizes += to_array_size(type, i);
  14104. sizes += ">";
  14105. }
  14106. res += type_name + sizes;
  14107. return res;
  14108. }
  14109. }
  14110. string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id)
  14111. {
  14112. return type_to_glsl(type, id, false);
  14113. }
  14114. string CompilerMSL::type_to_array_glsl(const SPIRType &type, uint32_t variable_id)
  14115. {
  14116. // Allow Metal to use the array<T> template to make arrays a value type
  14117. switch (type.basetype)
  14118. {
  14119. case SPIRType::AtomicCounter:
  14120. case SPIRType::ControlPointArray:
  14121. case SPIRType::RayQuery:
  14122. return CompilerGLSL::type_to_array_glsl(type, variable_id);
  14123. default:
  14124. if (type_is_array_of_pointers(type) || using_builtin_array())
  14125. {
  14126. const SPIRVariable *var = variable_id ? &get<SPIRVariable>(variable_id) : nullptr;
  14127. if (var && (var->storage == StorageClassUniform || var->storage == StorageClassStorageBuffer) &&
  14128. is_array(get_variable_data_type(*var)))
  14129. {
  14130. return join("[", get_resource_array_size(type, variable_id), "]");
  14131. }
  14132. else
  14133. return CompilerGLSL::type_to_array_glsl(type, variable_id);
  14134. }
  14135. else
  14136. return "";
  14137. }
  14138. }
  14139. string CompilerMSL::constant_op_expression(const SPIRConstantOp &cop)
  14140. {
  14141. switch (cop.opcode)
  14142. {
  14143. case OpQuantizeToF16:
  14144. add_spv_func_and_recompile(SPVFuncImplQuantizeToF16);
  14145. return join("spvQuantizeToF16(", to_expression(cop.arguments[0]), ")");
  14146. default:
  14147. return CompilerGLSL::constant_op_expression(cop);
  14148. }
  14149. }
  14150. bool CompilerMSL::variable_decl_is_remapped_storage(const SPIRVariable &variable, spv::StorageClass storage) const
  14151. {
  14152. if (variable.storage == storage)
  14153. return true;
  14154. if (storage == StorageClassWorkgroup)
  14155. {
  14156. // Specially masked IO block variable.
  14157. // Normally, we will never access IO blocks directly here.
  14158. // The only scenario which that should occur is with a masked IO block.
  14159. if (is_tesc_shader() && variable.storage == StorageClassOutput &&
  14160. has_decoration(get<SPIRType>(variable.basetype).self, DecorationBlock))
  14161. {
  14162. return true;
  14163. }
  14164. return variable.storage == StorageClassOutput && is_tesc_shader() && is_stage_output_variable_masked(variable);
  14165. }
  14166. else if (storage == StorageClassStorageBuffer)
  14167. {
  14168. // These builtins are passed directly; we don't want to use remapping
  14169. // for them.
  14170. auto builtin = (BuiltIn)get_decoration(variable.self, DecorationBuiltIn);
  14171. if (is_tese_shader() && is_builtin_variable(variable) && (builtin == BuiltInTessCoord || builtin == BuiltInPrimitiveId))
  14172. return false;
  14173. // We won't be able to catch writes to control point outputs here since variable
  14174. // refers to a function local pointer.
  14175. // This is fine, as there cannot be concurrent writers to that memory anyways,
  14176. // so we just ignore that case.
  14177. return (variable.storage == StorageClassOutput || variable.storage == StorageClassInput) &&
  14178. !variable_storage_requires_stage_io(variable.storage) &&
  14179. (variable.storage != StorageClassOutput || !is_stage_output_variable_masked(variable));
  14180. }
  14181. else
  14182. {
  14183. return false;
  14184. }
  14185. }
  14186. // GCC workaround of lambdas calling protected funcs
  14187. std::string CompilerMSL::variable_decl(const SPIRType &type, const std::string &name, uint32_t id)
  14188. {
  14189. return CompilerGLSL::variable_decl(type, name, id);
  14190. }
  14191. std::string CompilerMSL::sampler_type(const SPIRType &type, uint32_t id, bool member)
  14192. {
  14193. auto *var = maybe_get<SPIRVariable>(id);
  14194. if (var && var->basevariable)
  14195. {
  14196. // Check against the base variable, and not a fake ID which might have been generated for this variable.
  14197. id = var->basevariable;
  14198. }
  14199. if (!type.array.empty())
  14200. {
  14201. if (!msl_options.supports_msl_version(2))
  14202. SPIRV_CROSS_THROW("MSL 2.0 or greater is required for arrays of samplers.");
  14203. if (type.array.size() > 1)
  14204. SPIRV_CROSS_THROW("Arrays of arrays of samplers are not supported in MSL.");
  14205. // Arrays of samplers in MSL must be declared with a special array<T, N> syntax ala C++11 std::array.
  14206. // If we have a runtime array, it could be a variable-count descriptor set binding.
  14207. auto &parent = get<SPIRType>(get_pointee_type(type).parent_type);
  14208. uint32_t array_size = get_resource_array_size(type, id);
  14209. if (array_size == 0)
  14210. {
  14211. add_spv_func_and_recompile(SPVFuncImplVariableDescriptor);
  14212. add_spv_func_and_recompile(SPVFuncImplVariableDescriptorArray);
  14213. const char *descriptor_wrapper = processing_entry_point ? "const device spvDescriptor" : "const spvDescriptorArray";
  14214. if (member)
  14215. descriptor_wrapper = "spvDescriptor";
  14216. return join(descriptor_wrapper, "<", sampler_type(parent, id, false), ">",
  14217. processing_entry_point ? "*" : "");
  14218. }
  14219. else
  14220. {
  14221. return join("array<", sampler_type(parent, id, false), ", ", array_size, ">");
  14222. }
  14223. }
  14224. else
  14225. return "sampler";
  14226. }
  14227. // Returns an MSL string describing the SPIR-V image type
  14228. string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id, bool member)
  14229. {
  14230. auto *var = maybe_get<SPIRVariable>(id);
  14231. if (var && var->basevariable)
  14232. {
  14233. // For comparison images, check against the base variable,
  14234. // and not the fake ID which might have been generated for this variable.
  14235. id = var->basevariable;
  14236. }
  14237. if (!type.array.empty())
  14238. {
  14239. uint32_t major = 2, minor = 0;
  14240. if (msl_options.is_ios())
  14241. {
  14242. major = 1;
  14243. minor = 2;
  14244. }
  14245. if (!msl_options.supports_msl_version(major, minor))
  14246. {
  14247. if (msl_options.is_ios())
  14248. SPIRV_CROSS_THROW("MSL 1.2 or greater is required for arrays of textures.");
  14249. else
  14250. SPIRV_CROSS_THROW("MSL 2.0 or greater is required for arrays of textures.");
  14251. }
  14252. if (type.array.size() > 1)
  14253. SPIRV_CROSS_THROW("Arrays of arrays of textures are not supported in MSL.");
  14254. // Arrays of images in MSL must be declared with a special array<T, N> syntax ala C++11 std::array.
  14255. // If we have a runtime array, it could be a variable-count descriptor set binding.
  14256. auto &parent = get<SPIRType>(get_pointee_type(type).parent_type);
  14257. uint32_t array_size = get_resource_array_size(type, id);
  14258. if (array_size == 0)
  14259. {
  14260. add_spv_func_and_recompile(SPVFuncImplVariableDescriptor);
  14261. add_spv_func_and_recompile(SPVFuncImplVariableDescriptorArray);
  14262. const char *descriptor_wrapper = processing_entry_point ? "const device spvDescriptor" : "const spvDescriptorArray";
  14263. if (member)
  14264. {
  14265. descriptor_wrapper = "spvDescriptor";
  14266. // This requires a specialized wrapper type that packs image and sampler side by side.
  14267. // It is possible in theory.
  14268. if (type.basetype == SPIRType::SampledImage)
  14269. SPIRV_CROSS_THROW("Argument buffer runtime array currently not supported for combined image sampler.");
  14270. }
  14271. return join(descriptor_wrapper, "<", image_type_glsl(parent, id, false), ">",
  14272. processing_entry_point ? "*" : "");
  14273. }
  14274. else
  14275. {
  14276. return join("array<", image_type_glsl(parent, id, false), ", ", array_size, ">");
  14277. }
  14278. }
  14279. string img_type_name;
  14280. auto &img_type = type.image;
  14281. if (is_depth_image(type, id))
  14282. {
  14283. switch (img_type.dim)
  14284. {
  14285. case Dim1D:
  14286. case Dim2D:
  14287. if (img_type.dim == Dim1D && !msl_options.texture_1D_as_2D)
  14288. {
  14289. // Use a native Metal 1D texture
  14290. img_type_name += "depth1d_unsupported_by_metal";
  14291. break;
  14292. }
  14293. if (img_type.ms && img_type.arrayed)
  14294. {
  14295. if (!msl_options.supports_msl_version(2, 1))
  14296. SPIRV_CROSS_THROW("Multisampled array textures are supported from 2.1.");
  14297. img_type_name += "depth2d_ms_array";
  14298. }
  14299. else if (img_type.ms)
  14300. img_type_name += "depth2d_ms";
  14301. else if (img_type.arrayed)
  14302. img_type_name += "depth2d_array";
  14303. else
  14304. img_type_name += "depth2d";
  14305. break;
  14306. case Dim3D:
  14307. img_type_name += "depth3d_unsupported_by_metal";
  14308. break;
  14309. case DimCube:
  14310. if (!msl_options.emulate_cube_array)
  14311. img_type_name += (img_type.arrayed ? "depthcube_array" : "depthcube");
  14312. else
  14313. img_type_name += (img_type.arrayed ? "depth2d_array" : "depthcube");
  14314. break;
  14315. default:
  14316. img_type_name += "unknown_depth_texture_type";
  14317. break;
  14318. }
  14319. }
  14320. else
  14321. {
  14322. switch (img_type.dim)
  14323. {
  14324. case DimBuffer:
  14325. if (img_type.ms || img_type.arrayed)
  14326. SPIRV_CROSS_THROW("Cannot use texel buffers with multisampling or array layers.");
  14327. if (msl_options.texture_buffer_native)
  14328. {
  14329. if (!msl_options.supports_msl_version(2, 1))
  14330. SPIRV_CROSS_THROW("Native texture_buffer type is only supported in MSL 2.1.");
  14331. img_type_name = "texture_buffer";
  14332. }
  14333. else
  14334. img_type_name += "texture2d";
  14335. break;
  14336. case Dim1D:
  14337. case Dim2D:
  14338. case DimSubpassData:
  14339. {
  14340. bool subpass_array =
  14341. img_type.dim == DimSubpassData && (msl_options.multiview || msl_options.arrayed_subpass_input);
  14342. if (img_type.dim == Dim1D && !msl_options.texture_1D_as_2D)
  14343. {
  14344. // Use a native Metal 1D texture
  14345. img_type_name += (img_type.arrayed ? "texture1d_array" : "texture1d");
  14346. break;
  14347. }
  14348. // Use Metal's native frame-buffer fetch API for subpass inputs.
  14349. if (type_is_msl_framebuffer_fetch(type))
  14350. {
  14351. auto img_type_4 = get<SPIRType>(img_type.type);
  14352. img_type_4.vecsize = 4;
  14353. return type_to_glsl(img_type_4);
  14354. }
  14355. if (img_type.ms && (img_type.arrayed || subpass_array))
  14356. {
  14357. if (!msl_options.supports_msl_version(2, 1))
  14358. SPIRV_CROSS_THROW("Multisampled array textures are supported from 2.1.");
  14359. img_type_name += "texture2d_ms_array";
  14360. }
  14361. else if (img_type.ms)
  14362. img_type_name += "texture2d_ms";
  14363. else if (img_type.arrayed || subpass_array)
  14364. img_type_name += "texture2d_array";
  14365. else
  14366. img_type_name += "texture2d";
  14367. break;
  14368. }
  14369. case Dim3D:
  14370. img_type_name += "texture3d";
  14371. break;
  14372. case DimCube:
  14373. if (!msl_options.emulate_cube_array)
  14374. img_type_name += (img_type.arrayed ? "texturecube_array" : "texturecube");
  14375. else
  14376. img_type_name += (img_type.arrayed ? "texture2d_array" : "texturecube");
  14377. break;
  14378. default:
  14379. img_type_name += "unknown_texture_type";
  14380. break;
  14381. }
  14382. }
  14383. // Append the pixel type
  14384. img_type_name += "<";
  14385. img_type_name += type_to_glsl(get<SPIRType>(img_type.type));
  14386. // For unsampled images, append the sample/read/write access qualifier.
  14387. // For kernel images, the access qualifier my be supplied directly by SPIR-V.
  14388. // Otherwise it may be set based on whether the image is read from or written to within the shader.
  14389. if (type.basetype == SPIRType::Image && type.image.sampled == 2 && type.image.dim != DimSubpassData)
  14390. {
  14391. switch (img_type.access)
  14392. {
  14393. case AccessQualifierReadOnly:
  14394. img_type_name += ", access::read";
  14395. break;
  14396. case AccessQualifierWriteOnly:
  14397. img_type_name += ", access::write";
  14398. break;
  14399. case AccessQualifierReadWrite:
  14400. img_type_name += ", access::read_write";
  14401. break;
  14402. default:
  14403. {
  14404. auto *p_var = maybe_get_backing_variable(id);
  14405. if (p_var && p_var->basevariable)
  14406. p_var = maybe_get<SPIRVariable>(p_var->basevariable);
  14407. if (p_var && !has_decoration(p_var->self, DecorationNonWritable))
  14408. {
  14409. img_type_name += ", access::";
  14410. if (!has_decoration(p_var->self, DecorationNonReadable))
  14411. img_type_name += "read_";
  14412. img_type_name += "write";
  14413. }
  14414. break;
  14415. }
  14416. }
  14417. }
  14418. img_type_name += ">";
  14419. return img_type_name;
  14420. }
  14421. void CompilerMSL::emit_subgroup_op(const Instruction &i)
  14422. {
  14423. const uint32_t *ops = stream(i);
  14424. auto op = static_cast<Op>(i.op);
  14425. if (msl_options.emulate_subgroups)
  14426. {
  14427. // In this mode, only the GroupNonUniform cap is supported. The only op
  14428. // we need to handle, then, is OpGroupNonUniformElect.
  14429. if (op != OpGroupNonUniformElect)
  14430. SPIRV_CROSS_THROW("Subgroup emulation does not support operations other than Elect.");
  14431. // In this mode, the subgroup size is assumed to be one, so every invocation
  14432. // is elected.
  14433. emit_op(ops[0], ops[1], "true", true);
  14434. return;
  14435. }
  14436. // Metal 2.0 is required. iOS only supports quad ops on 11.0 (2.0), with
  14437. // full support in 13.0 (2.2). macOS only supports broadcast and shuffle on
  14438. // 10.13 (2.0), with full support in 10.14 (2.1).
  14439. // Note that Apple GPUs before A13 make no distinction between a quad-group
  14440. // and a SIMD-group; all SIMD-groups are quad-groups on those.
  14441. if (!msl_options.supports_msl_version(2))
  14442. SPIRV_CROSS_THROW("Subgroups are only supported in Metal 2.0 and up.");
  14443. // If we need to do implicit bitcasts, make sure we do it with the correct type.
  14444. uint32_t integer_width = get_integer_width_for_instruction(i);
  14445. auto int_type = to_signed_basetype(integer_width);
  14446. auto uint_type = to_unsigned_basetype(integer_width);
  14447. if (msl_options.is_ios() && (!msl_options.supports_msl_version(2, 3) || !msl_options.ios_use_simdgroup_functions))
  14448. {
  14449. switch (op)
  14450. {
  14451. default:
  14452. SPIRV_CROSS_THROW("Subgroup ops beyond broadcast, ballot, and shuffle on iOS require Metal 2.3 and up.");
  14453. case OpGroupNonUniformBroadcastFirst:
  14454. if (!msl_options.supports_msl_version(2, 2))
  14455. SPIRV_CROSS_THROW("BroadcastFirst on iOS requires Metal 2.2 and up.");
  14456. break;
  14457. case OpGroupNonUniformElect:
  14458. if (!msl_options.supports_msl_version(2, 2))
  14459. SPIRV_CROSS_THROW("Elect on iOS requires Metal 2.2 and up.");
  14460. break;
  14461. case OpGroupNonUniformAny:
  14462. case OpGroupNonUniformAll:
  14463. case OpGroupNonUniformAllEqual:
  14464. case OpGroupNonUniformBallot:
  14465. case OpGroupNonUniformInverseBallot:
  14466. case OpGroupNonUniformBallotBitExtract:
  14467. case OpGroupNonUniformBallotFindLSB:
  14468. case OpGroupNonUniformBallotFindMSB:
  14469. case OpGroupNonUniformBallotBitCount:
  14470. case OpSubgroupBallotKHR:
  14471. case OpSubgroupAllKHR:
  14472. case OpSubgroupAnyKHR:
  14473. case OpSubgroupAllEqualKHR:
  14474. if (!msl_options.supports_msl_version(2, 2))
  14475. SPIRV_CROSS_THROW("Ballot ops on iOS requires Metal 2.2 and up.");
  14476. break;
  14477. case OpGroupNonUniformBroadcast:
  14478. case OpGroupNonUniformShuffle:
  14479. case OpGroupNonUniformShuffleXor:
  14480. case OpGroupNonUniformShuffleUp:
  14481. case OpGroupNonUniformShuffleDown:
  14482. case OpGroupNonUniformQuadSwap:
  14483. case OpGroupNonUniformQuadBroadcast:
  14484. case OpSubgroupReadInvocationKHR:
  14485. break;
  14486. }
  14487. }
  14488. if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 1))
  14489. {
  14490. switch (op)
  14491. {
  14492. default:
  14493. SPIRV_CROSS_THROW("Subgroup ops beyond broadcast and shuffle on macOS require Metal 2.1 and up.");
  14494. case OpGroupNonUniformBroadcast:
  14495. case OpGroupNonUniformShuffle:
  14496. case OpGroupNonUniformShuffleXor:
  14497. case OpGroupNonUniformShuffleUp:
  14498. case OpGroupNonUniformShuffleDown:
  14499. case OpSubgroupReadInvocationKHR:
  14500. break;
  14501. }
  14502. }
  14503. uint32_t op_idx = 0;
  14504. uint32_t result_type = ops[op_idx++];
  14505. uint32_t id = ops[op_idx++];
  14506. Scope scope;
  14507. switch (op)
  14508. {
  14509. case OpSubgroupBallotKHR:
  14510. case OpSubgroupFirstInvocationKHR:
  14511. case OpSubgroupReadInvocationKHR:
  14512. case OpSubgroupAllKHR:
  14513. case OpSubgroupAnyKHR:
  14514. case OpSubgroupAllEqualKHR:
  14515. // These earlier instructions don't have the scope operand.
  14516. scope = ScopeSubgroup;
  14517. break;
  14518. default:
  14519. scope = static_cast<Scope>(evaluate_constant_u32(ops[op_idx++]));
  14520. break;
  14521. }
  14522. if (scope != ScopeSubgroup)
  14523. SPIRV_CROSS_THROW("Only subgroup scope is supported.");
  14524. switch (op)
  14525. {
  14526. case OpGroupNonUniformElect:
  14527. if (msl_options.use_quadgroup_operation())
  14528. emit_op(result_type, id, "quad_is_first()", false);
  14529. else
  14530. emit_op(result_type, id, "simd_is_first()", false);
  14531. break;
  14532. case OpGroupNonUniformBroadcast:
  14533. case OpSubgroupReadInvocationKHR:
  14534. emit_binary_func_op(result_type, id, ops[op_idx], ops[op_idx + 1], "spvSubgroupBroadcast");
  14535. break;
  14536. case OpGroupNonUniformBroadcastFirst:
  14537. case OpSubgroupFirstInvocationKHR:
  14538. emit_unary_func_op(result_type, id, ops[op_idx], "spvSubgroupBroadcastFirst");
  14539. break;
  14540. case OpGroupNonUniformBallot:
  14541. case OpSubgroupBallotKHR:
  14542. emit_unary_func_op(result_type, id, ops[op_idx], "spvSubgroupBallot");
  14543. break;
  14544. case OpGroupNonUniformInverseBallot:
  14545. emit_binary_func_op(result_type, id, ops[op_idx], builtin_subgroup_invocation_id_id, "spvSubgroupBallotBitExtract");
  14546. break;
  14547. case OpGroupNonUniformBallotBitExtract:
  14548. emit_binary_func_op(result_type, id, ops[op_idx], ops[op_idx + 1], "spvSubgroupBallotBitExtract");
  14549. break;
  14550. case OpGroupNonUniformBallotFindLSB:
  14551. emit_binary_func_op(result_type, id, ops[op_idx], builtin_subgroup_size_id, "spvSubgroupBallotFindLSB");
  14552. break;
  14553. case OpGroupNonUniformBallotFindMSB:
  14554. emit_binary_func_op(result_type, id, ops[op_idx], builtin_subgroup_size_id, "spvSubgroupBallotFindMSB");
  14555. break;
  14556. case OpGroupNonUniformBallotBitCount:
  14557. {
  14558. auto operation = static_cast<GroupOperation>(ops[op_idx++]);
  14559. switch (operation)
  14560. {
  14561. case GroupOperationReduce:
  14562. emit_binary_func_op(result_type, id, ops[op_idx], builtin_subgroup_size_id, "spvSubgroupBallotBitCount");
  14563. break;
  14564. case GroupOperationInclusiveScan:
  14565. emit_binary_func_op(result_type, id, ops[op_idx], builtin_subgroup_invocation_id_id,
  14566. "spvSubgroupBallotInclusiveBitCount");
  14567. break;
  14568. case GroupOperationExclusiveScan:
  14569. emit_binary_func_op(result_type, id, ops[op_idx], builtin_subgroup_invocation_id_id,
  14570. "spvSubgroupBallotExclusiveBitCount");
  14571. break;
  14572. default:
  14573. SPIRV_CROSS_THROW("Invalid BitCount operation.");
  14574. }
  14575. break;
  14576. }
  14577. case OpGroupNonUniformShuffle:
  14578. emit_binary_func_op(result_type, id, ops[op_idx], ops[op_idx + 1], "spvSubgroupShuffle");
  14579. break;
  14580. case OpGroupNonUniformShuffleXor:
  14581. emit_binary_func_op(result_type, id, ops[op_idx], ops[op_idx + 1], "spvSubgroupShuffleXor");
  14582. break;
  14583. case OpGroupNonUniformShuffleUp:
  14584. emit_binary_func_op(result_type, id, ops[op_idx], ops[op_idx + 1], "spvSubgroupShuffleUp");
  14585. break;
  14586. case OpGroupNonUniformShuffleDown:
  14587. emit_binary_func_op(result_type, id, ops[op_idx], ops[op_idx + 1], "spvSubgroupShuffleDown");
  14588. break;
  14589. case OpGroupNonUniformAll:
  14590. case OpSubgroupAllKHR:
  14591. if (msl_options.use_quadgroup_operation())
  14592. emit_unary_func_op(result_type, id, ops[op_idx], "quad_all");
  14593. else
  14594. emit_unary_func_op(result_type, id, ops[op_idx], "simd_all");
  14595. break;
  14596. case OpGroupNonUniformAny:
  14597. case OpSubgroupAnyKHR:
  14598. if (msl_options.use_quadgroup_operation())
  14599. emit_unary_func_op(result_type, id, ops[op_idx], "quad_any");
  14600. else
  14601. emit_unary_func_op(result_type, id, ops[op_idx], "simd_any");
  14602. break;
  14603. case OpGroupNonUniformAllEqual:
  14604. case OpSubgroupAllEqualKHR:
  14605. emit_unary_func_op(result_type, id, ops[op_idx], "spvSubgroupAllEqual");
  14606. break;
  14607. // clang-format off
  14608. #define MSL_GROUP_OP(op, msl_op) \
  14609. case OpGroupNonUniform##op: \
  14610. { \
  14611. auto operation = static_cast<GroupOperation>(ops[op_idx++]); \
  14612. if (operation == GroupOperationReduce) \
  14613. emit_unary_func_op(result_type, id, ops[op_idx], "simd_" #msl_op); \
  14614. else if (operation == GroupOperationInclusiveScan) \
  14615. emit_unary_func_op(result_type, id, ops[op_idx], "simd_prefix_inclusive_" #msl_op); \
  14616. else if (operation == GroupOperationExclusiveScan) \
  14617. emit_unary_func_op(result_type, id, ops[op_idx], "simd_prefix_exclusive_" #msl_op); \
  14618. else if (operation == GroupOperationClusteredReduce) \
  14619. { \
  14620. /* Only cluster sizes of 4 are supported. */ \
  14621. uint32_t cluster_size = evaluate_constant_u32(ops[op_idx + 1]); \
  14622. if (cluster_size != 4) \
  14623. SPIRV_CROSS_THROW("Metal only supports quad ClusteredReduce."); \
  14624. emit_unary_func_op(result_type, id, ops[op_idx], "quad_" #msl_op); \
  14625. } \
  14626. else \
  14627. SPIRV_CROSS_THROW("Invalid group operation."); \
  14628. break; \
  14629. }
  14630. MSL_GROUP_OP(FAdd, sum)
  14631. MSL_GROUP_OP(FMul, product)
  14632. MSL_GROUP_OP(IAdd, sum)
  14633. MSL_GROUP_OP(IMul, product)
  14634. #undef MSL_GROUP_OP
  14635. // The others, unfortunately, don't support InclusiveScan or ExclusiveScan.
  14636. #define MSL_GROUP_OP(op, msl_op) \
  14637. case OpGroupNonUniform##op: \
  14638. { \
  14639. auto operation = static_cast<GroupOperation>(ops[op_idx++]); \
  14640. if (operation == GroupOperationReduce) \
  14641. emit_unary_func_op(result_type, id, ops[op_idx], "simd_" #msl_op); \
  14642. else if (operation == GroupOperationInclusiveScan) \
  14643. SPIRV_CROSS_THROW("Metal doesn't support InclusiveScan for OpGroupNonUniform" #op "."); \
  14644. else if (operation == GroupOperationExclusiveScan) \
  14645. SPIRV_CROSS_THROW("Metal doesn't support ExclusiveScan for OpGroupNonUniform" #op "."); \
  14646. else if (operation == GroupOperationClusteredReduce) \
  14647. { \
  14648. /* Only cluster sizes of 4 are supported. */ \
  14649. uint32_t cluster_size = evaluate_constant_u32(ops[op_idx + 1]); \
  14650. if (cluster_size != 4) \
  14651. SPIRV_CROSS_THROW("Metal only supports quad ClusteredReduce."); \
  14652. emit_unary_func_op(result_type, id, ops[op_idx], "quad_" #msl_op); \
  14653. } \
  14654. else \
  14655. SPIRV_CROSS_THROW("Invalid group operation."); \
  14656. break; \
  14657. }
  14658. #define MSL_GROUP_OP_CAST(op, msl_op, type) \
  14659. case OpGroupNonUniform##op: \
  14660. { \
  14661. auto operation = static_cast<GroupOperation>(ops[op_idx++]); \
  14662. if (operation == GroupOperationReduce) \
  14663. emit_unary_func_op_cast(result_type, id, ops[op_idx], "simd_" #msl_op, type, type); \
  14664. else if (operation == GroupOperationInclusiveScan) \
  14665. SPIRV_CROSS_THROW("Metal doesn't support InclusiveScan for OpGroupNonUniform" #op "."); \
  14666. else if (operation == GroupOperationExclusiveScan) \
  14667. SPIRV_CROSS_THROW("Metal doesn't support ExclusiveScan for OpGroupNonUniform" #op "."); \
  14668. else if (operation == GroupOperationClusteredReduce) \
  14669. { \
  14670. /* Only cluster sizes of 4 are supported. */ \
  14671. uint32_t cluster_size = evaluate_constant_u32(ops[op_idx + 1]); \
  14672. if (cluster_size != 4) \
  14673. SPIRV_CROSS_THROW("Metal only supports quad ClusteredReduce."); \
  14674. emit_unary_func_op_cast(result_type, id, ops[op_idx], "quad_" #msl_op, type, type); \
  14675. } \
  14676. else \
  14677. SPIRV_CROSS_THROW("Invalid group operation."); \
  14678. break; \
  14679. }
  14680. MSL_GROUP_OP(FMin, min)
  14681. MSL_GROUP_OP(FMax, max)
  14682. MSL_GROUP_OP_CAST(SMin, min, int_type)
  14683. MSL_GROUP_OP_CAST(SMax, max, int_type)
  14684. MSL_GROUP_OP_CAST(UMin, min, uint_type)
  14685. MSL_GROUP_OP_CAST(UMax, max, uint_type)
  14686. MSL_GROUP_OP(BitwiseAnd, and)
  14687. MSL_GROUP_OP(BitwiseOr, or)
  14688. MSL_GROUP_OP(BitwiseXor, xor)
  14689. MSL_GROUP_OP(LogicalAnd, and)
  14690. MSL_GROUP_OP(LogicalOr, or)
  14691. MSL_GROUP_OP(LogicalXor, xor)
  14692. // clang-format on
  14693. #undef MSL_GROUP_OP
  14694. #undef MSL_GROUP_OP_CAST
  14695. case OpGroupNonUniformQuadSwap:
  14696. emit_binary_func_op(result_type, id, ops[op_idx], ops[op_idx + 1], "spvQuadSwap");
  14697. break;
  14698. case OpGroupNonUniformQuadBroadcast:
  14699. emit_binary_func_op(result_type, id, ops[op_idx], ops[op_idx + 1], "spvQuadBroadcast");
  14700. break;
  14701. default:
  14702. SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
  14703. }
  14704. register_control_dependent_expression(id);
  14705. }
  14706. string CompilerMSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
  14707. {
  14708. if (out_type.basetype == in_type.basetype)
  14709. return "";
  14710. assert(out_type.basetype != SPIRType::Boolean);
  14711. assert(in_type.basetype != SPIRType::Boolean);
  14712. bool integral_cast = type_is_integral(out_type) && type_is_integral(in_type) && (out_type.vecsize == in_type.vecsize);
  14713. bool same_size_cast = (out_type.width * out_type.vecsize) == (in_type.width * in_type.vecsize);
  14714. // Bitcasting can only be used between types of the same overall size.
  14715. // And always formally cast between integers, because it's trivial, and also
  14716. // because Metal can internally cast the results of some integer ops to a larger
  14717. // size (eg. short shift right becomes int), which means chaining integer ops
  14718. // together may introduce size variations that SPIR-V doesn't know about.
  14719. if (same_size_cast && !integral_cast)
  14720. return "as_type<" + type_to_glsl(out_type) + ">";
  14721. else
  14722. return type_to_glsl(out_type);
  14723. }
  14724. bool CompilerMSL::emit_complex_bitcast(uint32_t, uint32_t, uint32_t)
  14725. {
  14726. // This is handled from the outside where we deal with PtrToU/UToPtr and friends.
  14727. return false;
  14728. }
  14729. // Returns an MSL string identifying the name of a SPIR-V builtin.
  14730. // Output builtins are qualified with the name of the stage out structure.
  14731. string CompilerMSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
  14732. {
  14733. switch (builtin)
  14734. {
  14735. // Handle HLSL-style 0-based vertex/instance index.
  14736. // Override GLSL compiler strictness
  14737. case BuiltInVertexId:
  14738. ensure_builtin(StorageClassInput, BuiltInVertexId);
  14739. if (msl_options.enable_base_index_zero && msl_options.supports_msl_version(1, 1) &&
  14740. (msl_options.ios_support_base_vertex_instance || msl_options.is_macos()))
  14741. {
  14742. if (builtin_declaration)
  14743. {
  14744. if (needs_base_vertex_arg != TriState::No)
  14745. needs_base_vertex_arg = TriState::Yes;
  14746. return "gl_VertexID";
  14747. }
  14748. else
  14749. {
  14750. ensure_builtin(StorageClassInput, BuiltInBaseVertex);
  14751. return "(gl_VertexID - gl_BaseVertex)";
  14752. }
  14753. }
  14754. else
  14755. {
  14756. return "gl_VertexID";
  14757. }
  14758. case BuiltInInstanceId:
  14759. ensure_builtin(StorageClassInput, BuiltInInstanceId);
  14760. if (msl_options.enable_base_index_zero && msl_options.supports_msl_version(1, 1) &&
  14761. (msl_options.ios_support_base_vertex_instance || msl_options.is_macos()))
  14762. {
  14763. if (builtin_declaration)
  14764. {
  14765. if (needs_base_instance_arg != TriState::No)
  14766. needs_base_instance_arg = TriState::Yes;
  14767. return "gl_InstanceID";
  14768. }
  14769. else
  14770. {
  14771. ensure_builtin(StorageClassInput, BuiltInBaseInstance);
  14772. return "(gl_InstanceID - gl_BaseInstance)";
  14773. }
  14774. }
  14775. else
  14776. {
  14777. return "gl_InstanceID";
  14778. }
  14779. case BuiltInVertexIndex:
  14780. ensure_builtin(StorageClassInput, BuiltInVertexIndex);
  14781. if (msl_options.enable_base_index_zero && msl_options.supports_msl_version(1, 1) &&
  14782. (msl_options.ios_support_base_vertex_instance || msl_options.is_macos()))
  14783. {
  14784. if (builtin_declaration)
  14785. {
  14786. if (needs_base_vertex_arg != TriState::No)
  14787. needs_base_vertex_arg = TriState::Yes;
  14788. return "gl_VertexIndex";
  14789. }
  14790. else
  14791. {
  14792. ensure_builtin(StorageClassInput, BuiltInBaseVertex);
  14793. return "(gl_VertexIndex - gl_BaseVertex)";
  14794. }
  14795. }
  14796. else
  14797. {
  14798. return "gl_VertexIndex";
  14799. }
  14800. case BuiltInInstanceIndex:
  14801. ensure_builtin(StorageClassInput, BuiltInInstanceIndex);
  14802. if (msl_options.enable_base_index_zero && msl_options.supports_msl_version(1, 1) &&
  14803. (msl_options.ios_support_base_vertex_instance || msl_options.is_macos()))
  14804. {
  14805. if (builtin_declaration)
  14806. {
  14807. if (needs_base_instance_arg != TriState::No)
  14808. needs_base_instance_arg = TriState::Yes;
  14809. return "gl_InstanceIndex";
  14810. }
  14811. else
  14812. {
  14813. ensure_builtin(StorageClassInput, BuiltInBaseInstance);
  14814. return "(gl_InstanceIndex - gl_BaseInstance)";
  14815. }
  14816. }
  14817. else
  14818. {
  14819. return "gl_InstanceIndex";
  14820. }
  14821. case BuiltInBaseVertex:
  14822. if (msl_options.supports_msl_version(1, 1) &&
  14823. (msl_options.ios_support_base_vertex_instance || msl_options.is_macos()))
  14824. {
  14825. needs_base_vertex_arg = TriState::No;
  14826. return "gl_BaseVertex";
  14827. }
  14828. else
  14829. {
  14830. SPIRV_CROSS_THROW("BaseVertex requires Metal 1.1 and Mac or Apple A9+ hardware.");
  14831. }
  14832. case BuiltInBaseInstance:
  14833. if (msl_options.supports_msl_version(1, 1) &&
  14834. (msl_options.ios_support_base_vertex_instance || msl_options.is_macos()))
  14835. {
  14836. needs_base_instance_arg = TriState::No;
  14837. return "gl_BaseInstance";
  14838. }
  14839. else
  14840. {
  14841. SPIRV_CROSS_THROW("BaseInstance requires Metal 1.1 and Mac or Apple A9+ hardware.");
  14842. }
  14843. case BuiltInDrawIndex:
  14844. SPIRV_CROSS_THROW("DrawIndex is not supported in MSL.");
  14845. // When used in the entry function, output builtins are qualified with output struct name.
  14846. // Test storage class as NOT Input, as output builtins might be part of generic type.
  14847. // Also don't do this for tessellation control shaders.
  14848. case BuiltInViewportIndex:
  14849. if (!msl_options.supports_msl_version(2, 0))
  14850. SPIRV_CROSS_THROW("ViewportIndex requires Metal 2.0.");
  14851. /* fallthrough */
  14852. case BuiltInFragDepth:
  14853. case BuiltInFragStencilRefEXT:
  14854. if ((builtin == BuiltInFragDepth && !msl_options.enable_frag_depth_builtin) ||
  14855. (builtin == BuiltInFragStencilRefEXT && !msl_options.enable_frag_stencil_ref_builtin))
  14856. break;
  14857. /* fallthrough */
  14858. case BuiltInPosition:
  14859. case BuiltInPointSize:
  14860. case BuiltInClipDistance:
  14861. case BuiltInCullDistance:
  14862. case BuiltInLayer:
  14863. if (is_tesc_shader())
  14864. break;
  14865. if (storage != StorageClassInput && current_function && (current_function->self == ir.default_entry_point) &&
  14866. !is_stage_output_builtin_masked(builtin))
  14867. return stage_out_var_name + "." + CompilerGLSL::builtin_to_glsl(builtin, storage);
  14868. break;
  14869. case BuiltInSampleMask:
  14870. if (storage == StorageClassInput && current_function && (current_function->self == ir.default_entry_point) &&
  14871. (has_additional_fixed_sample_mask() || needs_sample_id))
  14872. {
  14873. string samp_mask_in;
  14874. samp_mask_in += "(" + CompilerGLSL::builtin_to_glsl(builtin, storage);
  14875. if (has_additional_fixed_sample_mask())
  14876. samp_mask_in += " & " + additional_fixed_sample_mask_str();
  14877. if (needs_sample_id)
  14878. samp_mask_in += " & (1 << gl_SampleID)";
  14879. samp_mask_in += ")";
  14880. return samp_mask_in;
  14881. }
  14882. if (storage != StorageClassInput && current_function && (current_function->self == ir.default_entry_point) &&
  14883. !is_stage_output_builtin_masked(builtin))
  14884. return stage_out_var_name + "." + CompilerGLSL::builtin_to_glsl(builtin, storage);
  14885. break;
  14886. case BuiltInBaryCoordKHR:
  14887. case BuiltInBaryCoordNoPerspKHR:
  14888. if (storage == StorageClassInput && current_function && (current_function->self == ir.default_entry_point))
  14889. return stage_in_var_name + "." + CompilerGLSL::builtin_to_glsl(builtin, storage);
  14890. break;
  14891. case BuiltInTessLevelOuter:
  14892. if (is_tesc_shader() && storage != StorageClassInput && current_function &&
  14893. (current_function->self == ir.default_entry_point))
  14894. {
  14895. return join(tess_factor_buffer_var_name, "[", to_expression(builtin_primitive_id_id),
  14896. "].edgeTessellationFactor");
  14897. }
  14898. break;
  14899. case BuiltInTessLevelInner:
  14900. if (is_tesc_shader() && storage != StorageClassInput && current_function &&
  14901. (current_function->self == ir.default_entry_point))
  14902. {
  14903. return join(tess_factor_buffer_var_name, "[", to_expression(builtin_primitive_id_id),
  14904. "].insideTessellationFactor");
  14905. }
  14906. break;
  14907. case BuiltInHelperInvocation:
  14908. if (needs_manual_helper_invocation_updates())
  14909. break;
  14910. if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 3))
  14911. SPIRV_CROSS_THROW("simd_is_helper_thread() requires version 2.3 on iOS.");
  14912. else if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 1))
  14913. SPIRV_CROSS_THROW("simd_is_helper_thread() requires version 2.1 on macOS.");
  14914. // In SPIR-V 1.6 with Volatile HelperInvocation, we cannot emit a fixup early.
  14915. return "simd_is_helper_thread()";
  14916. default:
  14917. break;
  14918. }
  14919. return CompilerGLSL::builtin_to_glsl(builtin, storage);
  14920. }
  14921. // Returns an MSL string attribute qualifer for a SPIR-V builtin
  14922. string CompilerMSL::builtin_qualifier(BuiltIn builtin)
  14923. {
  14924. auto &execution = get_entry_point();
  14925. switch (builtin)
  14926. {
  14927. // Vertex function in
  14928. case BuiltInVertexId:
  14929. return "vertex_id";
  14930. case BuiltInVertexIndex:
  14931. return "vertex_id";
  14932. case BuiltInBaseVertex:
  14933. return "base_vertex";
  14934. case BuiltInInstanceId:
  14935. return "instance_id";
  14936. case BuiltInInstanceIndex:
  14937. return "instance_id";
  14938. case BuiltInBaseInstance:
  14939. return "base_instance";
  14940. case BuiltInDrawIndex:
  14941. SPIRV_CROSS_THROW("DrawIndex is not supported in MSL.");
  14942. // Vertex function out
  14943. case BuiltInClipDistance:
  14944. return "clip_distance";
  14945. case BuiltInPointSize:
  14946. return "point_size";
  14947. case BuiltInPosition:
  14948. if (position_invariant)
  14949. {
  14950. if (!msl_options.supports_msl_version(2, 1))
  14951. SPIRV_CROSS_THROW("Invariant position is only supported on MSL 2.1 and up.");
  14952. return "position, invariant";
  14953. }
  14954. else
  14955. return "position";
  14956. case BuiltInLayer:
  14957. return "render_target_array_index";
  14958. case BuiltInViewportIndex:
  14959. if (!msl_options.supports_msl_version(2, 0))
  14960. SPIRV_CROSS_THROW("ViewportIndex requires Metal 2.0.");
  14961. return "viewport_array_index";
  14962. // Tess. control function in
  14963. case BuiltInInvocationId:
  14964. if (msl_options.multi_patch_workgroup)
  14965. {
  14966. // Shouldn't be reached.
  14967. SPIRV_CROSS_THROW("InvocationId is computed manually with multi-patch workgroups in MSL.");
  14968. }
  14969. return "thread_index_in_threadgroup";
  14970. case BuiltInPatchVertices:
  14971. // Shouldn't be reached.
  14972. SPIRV_CROSS_THROW("PatchVertices is derived from the auxiliary buffer in MSL.");
  14973. case BuiltInPrimitiveId:
  14974. switch (execution.model)
  14975. {
  14976. case ExecutionModelTessellationControl:
  14977. if (msl_options.multi_patch_workgroup)
  14978. {
  14979. // Shouldn't be reached.
  14980. SPIRV_CROSS_THROW("PrimitiveId is computed manually with multi-patch workgroups in MSL.");
  14981. }
  14982. return "threadgroup_position_in_grid";
  14983. case ExecutionModelTessellationEvaluation:
  14984. return "patch_id";
  14985. case ExecutionModelFragment:
  14986. if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 3))
  14987. SPIRV_CROSS_THROW("PrimitiveId on iOS requires MSL 2.3.");
  14988. else if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 2))
  14989. SPIRV_CROSS_THROW("PrimitiveId on macOS requires MSL 2.2.");
  14990. return "primitive_id";
  14991. default:
  14992. SPIRV_CROSS_THROW("PrimitiveId is not supported in this execution model.");
  14993. }
  14994. // Tess. control function out
  14995. case BuiltInTessLevelOuter:
  14996. case BuiltInTessLevelInner:
  14997. // Shouldn't be reached.
  14998. SPIRV_CROSS_THROW("Tessellation levels are handled specially in MSL.");
  14999. // Tess. evaluation function in
  15000. case BuiltInTessCoord:
  15001. return "position_in_patch";
  15002. // Fragment function in
  15003. case BuiltInFrontFacing:
  15004. return "front_facing";
  15005. case BuiltInPointCoord:
  15006. return "point_coord";
  15007. case BuiltInFragCoord:
  15008. return "position";
  15009. case BuiltInSampleId:
  15010. return "sample_id";
  15011. case BuiltInSampleMask:
  15012. return "sample_mask";
  15013. case BuiltInSamplePosition:
  15014. // Shouldn't be reached.
  15015. SPIRV_CROSS_THROW("Sample position is retrieved by a function in MSL.");
  15016. case BuiltInViewIndex:
  15017. if (execution.model != ExecutionModelFragment)
  15018. SPIRV_CROSS_THROW("ViewIndex is handled specially outside fragment shaders.");
  15019. // The ViewIndex was implicitly used in the prior stages to set the render_target_array_index,
  15020. // so we can get it from there.
  15021. return "render_target_array_index";
  15022. // Fragment function out
  15023. case BuiltInFragDepth:
  15024. if (execution.flags.get(ExecutionModeDepthGreater))
  15025. return "depth(greater)";
  15026. else if (execution.flags.get(ExecutionModeDepthLess))
  15027. return "depth(less)";
  15028. else
  15029. return "depth(any)";
  15030. case BuiltInFragStencilRefEXT:
  15031. return "stencil";
  15032. // Compute function in
  15033. case BuiltInGlobalInvocationId:
  15034. return "thread_position_in_grid";
  15035. case BuiltInWorkgroupId:
  15036. return "threadgroup_position_in_grid";
  15037. case BuiltInNumWorkgroups:
  15038. return "threadgroups_per_grid";
  15039. case BuiltInLocalInvocationId:
  15040. return "thread_position_in_threadgroup";
  15041. case BuiltInLocalInvocationIndex:
  15042. return "thread_index_in_threadgroup";
  15043. case BuiltInSubgroupSize:
  15044. if (msl_options.emulate_subgroups || msl_options.fixed_subgroup_size != 0)
  15045. // Shouldn't be reached.
  15046. SPIRV_CROSS_THROW("Emitting threads_per_simdgroup attribute with fixed subgroup size??");
  15047. if (execution.model == ExecutionModelFragment)
  15048. {
  15049. if (!msl_options.supports_msl_version(2, 2))
  15050. SPIRV_CROSS_THROW("threads_per_simdgroup requires Metal 2.2 in fragment shaders.");
  15051. return "threads_per_simdgroup";
  15052. }
  15053. else
  15054. {
  15055. // thread_execution_width is an alias for threads_per_simdgroup, and it's only available since 1.0,
  15056. // but not in fragment.
  15057. return "thread_execution_width";
  15058. }
  15059. case BuiltInNumSubgroups:
  15060. if (msl_options.emulate_subgroups)
  15061. // Shouldn't be reached.
  15062. SPIRV_CROSS_THROW("NumSubgroups is handled specially with emulation.");
  15063. if (!msl_options.supports_msl_version(2))
  15064. SPIRV_CROSS_THROW("Subgroup builtins require Metal 2.0.");
  15065. return msl_options.use_quadgroup_operation() ? "quadgroups_per_threadgroup" : "simdgroups_per_threadgroup";
  15066. case BuiltInSubgroupId:
  15067. if (msl_options.emulate_subgroups)
  15068. // Shouldn't be reached.
  15069. SPIRV_CROSS_THROW("SubgroupId is handled specially with emulation.");
  15070. if (!msl_options.supports_msl_version(2))
  15071. SPIRV_CROSS_THROW("Subgroup builtins require Metal 2.0.");
  15072. return msl_options.use_quadgroup_operation() ? "quadgroup_index_in_threadgroup" : "simdgroup_index_in_threadgroup";
  15073. case BuiltInSubgroupLocalInvocationId:
  15074. if (msl_options.emulate_subgroups)
  15075. // Shouldn't be reached.
  15076. SPIRV_CROSS_THROW("SubgroupLocalInvocationId is handled specially with emulation.");
  15077. if (execution.model == ExecutionModelFragment)
  15078. {
  15079. if (!msl_options.supports_msl_version(2, 2))
  15080. SPIRV_CROSS_THROW("thread_index_in_simdgroup requires Metal 2.2 in fragment shaders.");
  15081. return "thread_index_in_simdgroup";
  15082. }
  15083. else if (execution.model == ExecutionModelKernel || execution.model == ExecutionModelGLCompute ||
  15084. execution.model == ExecutionModelTessellationControl ||
  15085. (execution.model == ExecutionModelVertex && msl_options.vertex_for_tessellation))
  15086. {
  15087. // We are generating a Metal kernel function.
  15088. if (!msl_options.supports_msl_version(2))
  15089. SPIRV_CROSS_THROW("Subgroup builtins in kernel functions require Metal 2.0.");
  15090. return msl_options.use_quadgroup_operation() ? "thread_index_in_quadgroup" : "thread_index_in_simdgroup";
  15091. }
  15092. else
  15093. SPIRV_CROSS_THROW("Subgroup builtins are not available in this type of function.");
  15094. case BuiltInSubgroupEqMask:
  15095. case BuiltInSubgroupGeMask:
  15096. case BuiltInSubgroupGtMask:
  15097. case BuiltInSubgroupLeMask:
  15098. case BuiltInSubgroupLtMask:
  15099. // Shouldn't be reached.
  15100. SPIRV_CROSS_THROW("Subgroup ballot masks are handled specially in MSL.");
  15101. case BuiltInBaryCoordKHR:
  15102. if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 3))
  15103. SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.3 and above on iOS.");
  15104. else if (!msl_options.supports_msl_version(2, 2))
  15105. SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.2 and above on macOS.");
  15106. return "barycentric_coord, center_perspective";
  15107. case BuiltInBaryCoordNoPerspKHR:
  15108. if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 3))
  15109. SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.3 and above on iOS.");
  15110. else if (!msl_options.supports_msl_version(2, 2))
  15111. SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.2 and above on macOS.");
  15112. return "barycentric_coord, center_no_perspective";
  15113. default:
  15114. return "unsupported-built-in";
  15115. }
  15116. }
  15117. // Returns an MSL string type declaration for a SPIR-V builtin
  15118. string CompilerMSL::builtin_type_decl(BuiltIn builtin, uint32_t id)
  15119. {
  15120. switch (builtin)
  15121. {
  15122. // Vertex function in
  15123. case BuiltInVertexId:
  15124. return "uint";
  15125. case BuiltInVertexIndex:
  15126. return "uint";
  15127. case BuiltInBaseVertex:
  15128. return "uint";
  15129. case BuiltInInstanceId:
  15130. return "uint";
  15131. case BuiltInInstanceIndex:
  15132. return "uint";
  15133. case BuiltInBaseInstance:
  15134. return "uint";
  15135. case BuiltInDrawIndex:
  15136. SPIRV_CROSS_THROW("DrawIndex is not supported in MSL.");
  15137. // Vertex function out
  15138. case BuiltInClipDistance:
  15139. case BuiltInCullDistance:
  15140. return "float";
  15141. case BuiltInPointSize:
  15142. return "float";
  15143. case BuiltInPosition:
  15144. return "float4";
  15145. case BuiltInLayer:
  15146. return "uint";
  15147. case BuiltInViewportIndex:
  15148. if (!msl_options.supports_msl_version(2, 0))
  15149. SPIRV_CROSS_THROW("ViewportIndex requires Metal 2.0.");
  15150. return "uint";
  15151. // Tess. control function in
  15152. case BuiltInInvocationId:
  15153. return "uint";
  15154. case BuiltInPatchVertices:
  15155. return "uint";
  15156. case BuiltInPrimitiveId:
  15157. return "uint";
  15158. // Tess. control function out
  15159. case BuiltInTessLevelInner:
  15160. if (is_tese_shader())
  15161. return (msl_options.raw_buffer_tese_input || is_tessellating_triangles()) ? "float" : "float2";
  15162. return "half";
  15163. case BuiltInTessLevelOuter:
  15164. if (is_tese_shader())
  15165. return (msl_options.raw_buffer_tese_input || is_tessellating_triangles()) ? "float" : "float4";
  15166. return "half";
  15167. // Tess. evaluation function in
  15168. case BuiltInTessCoord:
  15169. return "float3";
  15170. // Fragment function in
  15171. case BuiltInFrontFacing:
  15172. return "bool";
  15173. case BuiltInPointCoord:
  15174. return "float2";
  15175. case BuiltInFragCoord:
  15176. return "float4";
  15177. case BuiltInSampleId:
  15178. return "uint";
  15179. case BuiltInSampleMask:
  15180. return "uint";
  15181. case BuiltInSamplePosition:
  15182. return "float2";
  15183. case BuiltInViewIndex:
  15184. return "uint";
  15185. case BuiltInHelperInvocation:
  15186. return "bool";
  15187. case BuiltInBaryCoordKHR:
  15188. case BuiltInBaryCoordNoPerspKHR:
  15189. // Use the type as declared, can be 1, 2 or 3 components.
  15190. return type_to_glsl(get_variable_data_type(get<SPIRVariable>(id)));
  15191. // Fragment function out
  15192. case BuiltInFragDepth:
  15193. return "float";
  15194. case BuiltInFragStencilRefEXT:
  15195. return "uint";
  15196. // Compute function in
  15197. case BuiltInGlobalInvocationId:
  15198. case BuiltInLocalInvocationId:
  15199. case BuiltInNumWorkgroups:
  15200. case BuiltInWorkgroupId:
  15201. return "uint3";
  15202. case BuiltInLocalInvocationIndex:
  15203. case BuiltInNumSubgroups:
  15204. case BuiltInSubgroupId:
  15205. case BuiltInSubgroupSize:
  15206. case BuiltInSubgroupLocalInvocationId:
  15207. return "uint";
  15208. case BuiltInSubgroupEqMask:
  15209. case BuiltInSubgroupGeMask:
  15210. case BuiltInSubgroupGtMask:
  15211. case BuiltInSubgroupLeMask:
  15212. case BuiltInSubgroupLtMask:
  15213. return "uint4";
  15214. case BuiltInDeviceIndex:
  15215. return "int";
  15216. default:
  15217. return "unsupported-built-in-type";
  15218. }
  15219. }
  15220. // Returns the declaration of a built-in argument to a function
  15221. string CompilerMSL::built_in_func_arg(BuiltIn builtin, bool prefix_comma)
  15222. {
  15223. string bi_arg;
  15224. if (prefix_comma)
  15225. bi_arg += ", ";
  15226. // Handle HLSL-style 0-based vertex/instance index.
  15227. builtin_declaration = true;
  15228. bi_arg += builtin_type_decl(builtin);
  15229. bi_arg += string(" ") + builtin_to_glsl(builtin, StorageClassInput);
  15230. bi_arg += string(" [[") + builtin_qualifier(builtin) + string("]]");
  15231. builtin_declaration = false;
  15232. return bi_arg;
  15233. }
  15234. const SPIRType &CompilerMSL::get_physical_member_type(const SPIRType &type, uint32_t index) const
  15235. {
  15236. if (member_is_remapped_physical_type(type, index))
  15237. return get<SPIRType>(get_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID));
  15238. else
  15239. return get<SPIRType>(type.member_types[index]);
  15240. }
  15241. SPIRType CompilerMSL::get_presumed_input_type(const SPIRType &ib_type, uint32_t index) const
  15242. {
  15243. SPIRType type = get_physical_member_type(ib_type, index);
  15244. uint32_t loc = get_member_decoration(ib_type.self, index, DecorationLocation);
  15245. uint32_t cmp = get_member_decoration(ib_type.self, index, DecorationComponent);
  15246. auto p_va = inputs_by_location.find({loc, cmp});
  15247. if (p_va != end(inputs_by_location) && p_va->second.vecsize > type.vecsize)
  15248. type.vecsize = p_va->second.vecsize;
  15249. return type;
  15250. }
  15251. uint32_t CompilerMSL::get_declared_type_array_stride_msl(const SPIRType &type, bool is_packed, bool row_major) const
  15252. {
  15253. // Array stride in MSL is always size * array_size. sizeof(float3) == 16,
  15254. // unlike GLSL and HLSL where array stride would be 16 and size 12.
  15255. // We could use parent type here and recurse, but that makes creating physical type remappings
  15256. // far more complicated. We'd rather just create the final type, and ignore having to create the entire type
  15257. // hierarchy in order to compute this value, so make a temporary type on the stack.
  15258. auto basic_type = type;
  15259. basic_type.array.clear();
  15260. basic_type.array_size_literal.clear();
  15261. uint32_t value_size = get_declared_type_size_msl(basic_type, is_packed, row_major);
  15262. uint32_t dimensions = uint32_t(type.array.size());
  15263. assert(dimensions > 0);
  15264. dimensions--;
  15265. // Multiply together every dimension, except the last one.
  15266. for (uint32_t dim = 0; dim < dimensions; dim++)
  15267. {
  15268. uint32_t array_size = to_array_size_literal(type, dim);
  15269. value_size *= max<uint32_t>(array_size, 1u);
  15270. }
  15271. return value_size;
  15272. }
  15273. uint32_t CompilerMSL::get_declared_struct_member_array_stride_msl(const SPIRType &type, uint32_t index) const
  15274. {
  15275. return get_declared_type_array_stride_msl(get_physical_member_type(type, index),
  15276. member_is_packed_physical_type(type, index),
  15277. has_member_decoration(type.self, index, DecorationRowMajor));
  15278. }
  15279. uint32_t CompilerMSL::get_declared_input_array_stride_msl(const SPIRType &type, uint32_t index) const
  15280. {
  15281. return get_declared_type_array_stride_msl(get_presumed_input_type(type, index), false,
  15282. has_member_decoration(type.self, index, DecorationRowMajor));
  15283. }
  15284. uint32_t CompilerMSL::get_declared_type_matrix_stride_msl(const SPIRType &type, bool packed, bool row_major) const
  15285. {
  15286. // For packed matrices, we just use the size of the vector type.
  15287. // Otherwise, MatrixStride == alignment, which is the size of the underlying vector type.
  15288. if (packed)
  15289. return (type.width / 8) * ((row_major && type.columns > 1) ? type.columns : type.vecsize);
  15290. else
  15291. return get_declared_type_alignment_msl(type, false, row_major);
  15292. }
  15293. uint32_t CompilerMSL::get_declared_struct_member_matrix_stride_msl(const SPIRType &type, uint32_t index) const
  15294. {
  15295. return get_declared_type_matrix_stride_msl(get_physical_member_type(type, index),
  15296. member_is_packed_physical_type(type, index),
  15297. has_member_decoration(type.self, index, DecorationRowMajor));
  15298. }
  15299. uint32_t CompilerMSL::get_declared_input_matrix_stride_msl(const SPIRType &type, uint32_t index) const
  15300. {
  15301. return get_declared_type_matrix_stride_msl(get_presumed_input_type(type, index), false,
  15302. has_member_decoration(type.self, index, DecorationRowMajor));
  15303. }
  15304. uint32_t CompilerMSL::get_declared_struct_size_msl(const SPIRType &struct_type, bool ignore_alignment,
  15305. bool ignore_padding) const
  15306. {
  15307. // If we have a target size, that is the declared size as well.
  15308. if (!ignore_padding && has_extended_decoration(struct_type.self, SPIRVCrossDecorationPaddingTarget))
  15309. return get_extended_decoration(struct_type.self, SPIRVCrossDecorationPaddingTarget);
  15310. if (struct_type.member_types.empty())
  15311. return 0;
  15312. uint32_t mbr_cnt = uint32_t(struct_type.member_types.size());
  15313. // In MSL, a struct's alignment is equal to the maximum alignment of any of its members.
  15314. uint32_t alignment = 1;
  15315. if (!ignore_alignment)
  15316. {
  15317. for (uint32_t i = 0; i < mbr_cnt; i++)
  15318. {
  15319. uint32_t mbr_alignment = get_declared_struct_member_alignment_msl(struct_type, i);
  15320. alignment = max(alignment, mbr_alignment);
  15321. }
  15322. }
  15323. // Last member will always be matched to the final Offset decoration, but size of struct in MSL now depends
  15324. // on physical size in MSL, and the size of the struct itself is then aligned to struct alignment.
  15325. uint32_t spirv_offset = type_struct_member_offset(struct_type, mbr_cnt - 1);
  15326. uint32_t msl_size = spirv_offset + get_declared_struct_member_size_msl(struct_type, mbr_cnt - 1);
  15327. msl_size = (msl_size + alignment - 1) & ~(alignment - 1);
  15328. return msl_size;
  15329. }
  15330. uint32_t CompilerMSL::get_physical_type_stride(const SPIRType &type) const
  15331. {
  15332. // This should only be relevant for plain types such as scalars and vectors?
  15333. // If we're pointing to a struct, it will recursively pick up packed/row-major state.
  15334. return get_declared_type_size_msl(type, false, false);
  15335. }
  15336. // Returns the byte size of a struct member.
  15337. uint32_t CompilerMSL::get_declared_type_size_msl(const SPIRType &type, bool is_packed, bool row_major) const
  15338. {
  15339. // Pointers take 8 bytes each
  15340. // Match both pointer and array-of-pointer here.
  15341. if (type.pointer && type.storage == StorageClassPhysicalStorageBuffer)
  15342. {
  15343. uint32_t type_size = 8;
  15344. // Work our way through potentially layered arrays,
  15345. // stopping when we hit a pointer that is not also an array.
  15346. int32_t dim_idx = (int32_t)type.array.size() - 1;
  15347. auto *p_type = &type;
  15348. while (!is_pointer(*p_type) && dim_idx >= 0)
  15349. {
  15350. type_size *= to_array_size_literal(*p_type, dim_idx);
  15351. p_type = &get<SPIRType>(p_type->parent_type);
  15352. dim_idx--;
  15353. }
  15354. return type_size;
  15355. }
  15356. switch (type.basetype)
  15357. {
  15358. case SPIRType::Unknown:
  15359. case SPIRType::Void:
  15360. case SPIRType::AtomicCounter:
  15361. case SPIRType::Image:
  15362. case SPIRType::SampledImage:
  15363. case SPIRType::Sampler:
  15364. SPIRV_CROSS_THROW("Querying size of opaque object.");
  15365. default:
  15366. {
  15367. if (!type.array.empty())
  15368. {
  15369. uint32_t array_size = to_array_size_literal(type);
  15370. return get_declared_type_array_stride_msl(type, is_packed, row_major) * max<uint32_t>(array_size, 1u);
  15371. }
  15372. if (type.basetype == SPIRType::Struct)
  15373. return get_declared_struct_size_msl(type);
  15374. if (is_packed)
  15375. {
  15376. return type.vecsize * type.columns * (type.width / 8);
  15377. }
  15378. else
  15379. {
  15380. // An unpacked 3-element vector or matrix column is the same memory size as a 4-element.
  15381. uint32_t vecsize = type.vecsize;
  15382. uint32_t columns = type.columns;
  15383. if (row_major && columns > 1)
  15384. swap(vecsize, columns);
  15385. if (vecsize == 3)
  15386. vecsize = 4;
  15387. return vecsize * columns * (type.width / 8);
  15388. }
  15389. }
  15390. }
  15391. }
  15392. uint32_t CompilerMSL::get_declared_struct_member_size_msl(const SPIRType &type, uint32_t index) const
  15393. {
  15394. return get_declared_type_size_msl(get_physical_member_type(type, index),
  15395. member_is_packed_physical_type(type, index),
  15396. has_member_decoration(type.self, index, DecorationRowMajor));
  15397. }
  15398. uint32_t CompilerMSL::get_declared_input_size_msl(const SPIRType &type, uint32_t index) const
  15399. {
  15400. return get_declared_type_size_msl(get_presumed_input_type(type, index), false,
  15401. has_member_decoration(type.self, index, DecorationRowMajor));
  15402. }
  15403. // Returns the byte alignment of a type.
  15404. uint32_t CompilerMSL::get_declared_type_alignment_msl(const SPIRType &type, bool is_packed, bool row_major) const
  15405. {
  15406. // Pointers align on multiples of 8 bytes.
  15407. // Deliberately ignore array-ness here. It's not relevant for alignment.
  15408. if (type.pointer && type.storage == StorageClassPhysicalStorageBuffer)
  15409. return 8;
  15410. switch (type.basetype)
  15411. {
  15412. case SPIRType::Unknown:
  15413. case SPIRType::Void:
  15414. case SPIRType::AtomicCounter:
  15415. case SPIRType::Image:
  15416. case SPIRType::SampledImage:
  15417. case SPIRType::Sampler:
  15418. SPIRV_CROSS_THROW("Querying alignment of opaque object.");
  15419. case SPIRType::Double:
  15420. SPIRV_CROSS_THROW("double types are not supported in buffers in MSL.");
  15421. case SPIRType::Struct:
  15422. {
  15423. // In MSL, a struct's alignment is equal to the maximum alignment of any of its members.
  15424. uint32_t alignment = 1;
  15425. for (uint32_t i = 0; i < type.member_types.size(); i++)
  15426. alignment = max(alignment, uint32_t(get_declared_struct_member_alignment_msl(type, i)));
  15427. return alignment;
  15428. }
  15429. default:
  15430. {
  15431. if (type.basetype == SPIRType::Int64 && !msl_options.supports_msl_version(2, 3))
  15432. SPIRV_CROSS_THROW("long types in buffers are only supported in MSL 2.3 and above.");
  15433. if (type.basetype == SPIRType::UInt64 && !msl_options.supports_msl_version(2, 3))
  15434. SPIRV_CROSS_THROW("ulong types in buffers are only supported in MSL 2.3 and above.");
  15435. // Alignment of packed type is the same as the underlying component or column size.
  15436. // Alignment of unpacked type is the same as the vector size.
  15437. // Alignment of 3-elements vector is the same as 4-elements (including packed using column).
  15438. if (is_packed)
  15439. {
  15440. // If we have packed_T and friends, the alignment is always scalar.
  15441. return type.width / 8;
  15442. }
  15443. else
  15444. {
  15445. // This is the general rule for MSL. Size == alignment.
  15446. uint32_t vecsize = (row_major && type.columns > 1) ? type.columns : type.vecsize;
  15447. return (type.width / 8) * (vecsize == 3 ? 4 : vecsize);
  15448. }
  15449. }
  15450. }
  15451. }
  15452. uint32_t CompilerMSL::get_declared_struct_member_alignment_msl(const SPIRType &type, uint32_t index) const
  15453. {
  15454. return get_declared_type_alignment_msl(get_physical_member_type(type, index),
  15455. member_is_packed_physical_type(type, index),
  15456. has_member_decoration(type.self, index, DecorationRowMajor));
  15457. }
  15458. uint32_t CompilerMSL::get_declared_input_alignment_msl(const SPIRType &type, uint32_t index) const
  15459. {
  15460. return get_declared_type_alignment_msl(get_presumed_input_type(type, index), false,
  15461. has_member_decoration(type.self, index, DecorationRowMajor));
  15462. }
  15463. bool CompilerMSL::skip_argument(uint32_t) const
  15464. {
  15465. return false;
  15466. }
  15467. void CompilerMSL::analyze_sampled_image_usage()
  15468. {
  15469. if (msl_options.swizzle_texture_samples)
  15470. {
  15471. SampledImageScanner scanner(*this);
  15472. traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), scanner);
  15473. }
  15474. }
  15475. bool CompilerMSL::SampledImageScanner::handle(spv::Op opcode, const uint32_t *args, uint32_t length)
  15476. {
  15477. switch (opcode)
  15478. {
  15479. case OpLoad:
  15480. case OpImage:
  15481. case OpSampledImage:
  15482. {
  15483. if (length < 3)
  15484. return false;
  15485. uint32_t result_type = args[0];
  15486. auto &type = compiler.get<SPIRType>(result_type);
  15487. if ((type.basetype != SPIRType::Image && type.basetype != SPIRType::SampledImage) || type.image.sampled != 1)
  15488. return true;
  15489. uint32_t id = args[1];
  15490. compiler.set<SPIRExpression>(id, "", result_type, true);
  15491. break;
  15492. }
  15493. case OpImageSampleExplicitLod:
  15494. case OpImageSampleProjExplicitLod:
  15495. case OpImageSampleDrefExplicitLod:
  15496. case OpImageSampleProjDrefExplicitLod:
  15497. case OpImageSampleImplicitLod:
  15498. case OpImageSampleProjImplicitLod:
  15499. case OpImageSampleDrefImplicitLod:
  15500. case OpImageSampleProjDrefImplicitLod:
  15501. case OpImageFetch:
  15502. case OpImageGather:
  15503. case OpImageDrefGather:
  15504. compiler.has_sampled_images =
  15505. compiler.has_sampled_images || compiler.is_sampled_image_type(compiler.expression_type(args[2]));
  15506. compiler.needs_swizzle_buffer_def = compiler.needs_swizzle_buffer_def || compiler.has_sampled_images;
  15507. break;
  15508. default:
  15509. break;
  15510. }
  15511. return true;
  15512. }
  15513. // If a needed custom function wasn't added before, add it and force a recompile.
  15514. void CompilerMSL::add_spv_func_and_recompile(SPVFuncImpl spv_func)
  15515. {
  15516. if (spv_function_implementations.count(spv_func) == 0)
  15517. {
  15518. spv_function_implementations.insert(spv_func);
  15519. suppress_missing_prototypes = true;
  15520. force_recompile();
  15521. }
  15522. }
  15523. bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, uint32_t length)
  15524. {
  15525. // Since MSL exists in a single execution scope, function prototype declarations are not
  15526. // needed, and clutter the output. If secondary functions are output (either as a SPIR-V
  15527. // function implementation or as indicated by the presence of OpFunctionCall), then set
  15528. // suppress_missing_prototypes to suppress compiler warnings of missing function prototypes.
  15529. // Mark if the input requires the implementation of an SPIR-V function that does not exist in Metal.
  15530. SPVFuncImpl spv_func = get_spv_func_impl(opcode, args);
  15531. if (spv_func != SPVFuncImplNone)
  15532. {
  15533. compiler.spv_function_implementations.insert(spv_func);
  15534. suppress_missing_prototypes = true;
  15535. }
  15536. switch (opcode)
  15537. {
  15538. case OpFunctionCall:
  15539. suppress_missing_prototypes = true;
  15540. break;
  15541. case OpDemoteToHelperInvocationEXT:
  15542. uses_discard = true;
  15543. break;
  15544. // Emulate texture2D atomic operations
  15545. case OpImageTexelPointer:
  15546. {
  15547. if (!compiler.msl_options.supports_msl_version(3, 1))
  15548. {
  15549. auto *var = compiler.maybe_get_backing_variable(args[2]);
  15550. image_pointers_emulated[args[1]] = var ? var->self : ID(0);
  15551. }
  15552. break;
  15553. }
  15554. case OpImageWrite:
  15555. uses_image_write = true;
  15556. break;
  15557. case OpStore:
  15558. check_resource_write(args[0]);
  15559. break;
  15560. // Emulate texture2D atomic operations
  15561. case OpAtomicExchange:
  15562. case OpAtomicCompareExchange:
  15563. case OpAtomicCompareExchangeWeak:
  15564. case OpAtomicIIncrement:
  15565. case OpAtomicIDecrement:
  15566. case OpAtomicIAdd:
  15567. case OpAtomicFAddEXT:
  15568. case OpAtomicISub:
  15569. case OpAtomicSMin:
  15570. case OpAtomicUMin:
  15571. case OpAtomicSMax:
  15572. case OpAtomicUMax:
  15573. case OpAtomicAnd:
  15574. case OpAtomicOr:
  15575. case OpAtomicXor:
  15576. {
  15577. uses_atomics = true;
  15578. auto it = image_pointers_emulated.find(args[2]);
  15579. if (it != image_pointers_emulated.end())
  15580. {
  15581. uses_image_write = true;
  15582. compiler.atomic_image_vars_emulated.insert(it->second);
  15583. }
  15584. else
  15585. check_resource_write(args[2]);
  15586. break;
  15587. }
  15588. case OpAtomicStore:
  15589. {
  15590. uses_atomics = true;
  15591. auto it = image_pointers_emulated.find(args[0]);
  15592. if (it != image_pointers_emulated.end())
  15593. {
  15594. compiler.atomic_image_vars_emulated.insert(it->second);
  15595. uses_image_write = true;
  15596. }
  15597. else
  15598. check_resource_write(args[0]);
  15599. break;
  15600. }
  15601. case OpAtomicLoad:
  15602. {
  15603. uses_atomics = true;
  15604. auto it = image_pointers_emulated.find(args[2]);
  15605. if (it != image_pointers_emulated.end())
  15606. {
  15607. compiler.atomic_image_vars_emulated.insert(it->second);
  15608. }
  15609. break;
  15610. }
  15611. case OpGroupNonUniformInverseBallot:
  15612. needs_subgroup_invocation_id = true;
  15613. break;
  15614. case OpGroupNonUniformBallotFindLSB:
  15615. case OpGroupNonUniformBallotFindMSB:
  15616. needs_subgroup_size = true;
  15617. break;
  15618. case OpGroupNonUniformBallotBitCount:
  15619. if (args[3] == GroupOperationReduce)
  15620. needs_subgroup_size = true;
  15621. else
  15622. needs_subgroup_invocation_id = true;
  15623. break;
  15624. case OpArrayLength:
  15625. {
  15626. auto *var = compiler.maybe_get_backing_variable(args[2]);
  15627. if (var != nullptr)
  15628. {
  15629. if (!compiler.is_var_runtime_size_array(*var))
  15630. compiler.buffers_requiring_array_length.insert(var->self);
  15631. }
  15632. break;
  15633. }
  15634. case OpInBoundsAccessChain:
  15635. case OpAccessChain:
  15636. case OpPtrAccessChain:
  15637. {
  15638. // OpArrayLength might want to know if taking ArrayLength of an array of SSBOs.
  15639. uint32_t result_type = args[0];
  15640. uint32_t id = args[1];
  15641. uint32_t ptr = args[2];
  15642. compiler.set<SPIRExpression>(id, "", result_type, true);
  15643. compiler.register_read(id, ptr, true);
  15644. compiler.ir.ids[id].set_allow_type_rewrite();
  15645. break;
  15646. }
  15647. case OpExtInst:
  15648. {
  15649. uint32_t extension_set = args[2];
  15650. if (compiler.get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL)
  15651. {
  15652. auto op_450 = static_cast<GLSLstd450>(args[3]);
  15653. switch (op_450)
  15654. {
  15655. case GLSLstd450InterpolateAtCentroid:
  15656. case GLSLstd450InterpolateAtSample:
  15657. case GLSLstd450InterpolateAtOffset:
  15658. {
  15659. if (!compiler.msl_options.supports_msl_version(2, 3))
  15660. SPIRV_CROSS_THROW("Pull-model interpolation requires MSL 2.3.");
  15661. // Fragment varyings used with pull-model interpolation need special handling,
  15662. // due to the way pull-model interpolation works in Metal.
  15663. auto *var = compiler.maybe_get_backing_variable(args[4]);
  15664. if (var)
  15665. {
  15666. compiler.pull_model_inputs.insert(var->self);
  15667. auto &var_type = compiler.get_variable_element_type(*var);
  15668. // In addition, if this variable has a 'Sample' decoration, we need the sample ID
  15669. // in order to do default interpolation.
  15670. if (compiler.has_decoration(var->self, DecorationSample))
  15671. {
  15672. needs_sample_id = true;
  15673. }
  15674. else if (var_type.basetype == SPIRType::Struct)
  15675. {
  15676. // Now we need to check each member and see if it has this decoration.
  15677. for (uint32_t i = 0; i < var_type.member_types.size(); ++i)
  15678. {
  15679. if (compiler.has_member_decoration(var_type.self, i, DecorationSample))
  15680. {
  15681. needs_sample_id = true;
  15682. break;
  15683. }
  15684. }
  15685. }
  15686. }
  15687. break;
  15688. }
  15689. default:
  15690. break;
  15691. }
  15692. }
  15693. break;
  15694. }
  15695. case OpIsHelperInvocationEXT:
  15696. if (compiler.needs_manual_helper_invocation_updates())
  15697. needs_helper_invocation = true;
  15698. break;
  15699. default:
  15700. break;
  15701. }
  15702. // If it has one, keep track of the instruction's result type, mapped by ID
  15703. uint32_t result_type, result_id;
  15704. if (compiler.instruction_to_result_type(result_type, result_id, opcode, args, length))
  15705. result_types[result_id] = result_type;
  15706. return true;
  15707. }
  15708. // If the variable is a Uniform or StorageBuffer, mark that a resource has been written to.
  15709. void CompilerMSL::OpCodePreprocessor::check_resource_write(uint32_t var_id)
  15710. {
  15711. auto *p_var = compiler.maybe_get_backing_variable(var_id);
  15712. StorageClass sc = p_var ? p_var->storage : StorageClassMax;
  15713. if (sc == StorageClassUniform || sc == StorageClassStorageBuffer)
  15714. uses_buffer_write = true;
  15715. }
  15716. // Returns an enumeration of a SPIR-V function that needs to be output for certain Op codes.
  15717. CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op opcode, const uint32_t *args)
  15718. {
  15719. switch (opcode)
  15720. {
  15721. case OpFMod:
  15722. return SPVFuncImplMod;
  15723. case OpFAdd:
  15724. case OpFSub:
  15725. if (compiler.msl_options.invariant_float_math ||
  15726. compiler.has_decoration(args[1], DecorationNoContraction))
  15727. {
  15728. return opcode == OpFAdd ? SPVFuncImplFAdd : SPVFuncImplFSub;
  15729. }
  15730. break;
  15731. case OpFMul:
  15732. case OpOuterProduct:
  15733. case OpMatrixTimesVector:
  15734. case OpVectorTimesMatrix:
  15735. case OpMatrixTimesMatrix:
  15736. if (compiler.msl_options.invariant_float_math ||
  15737. compiler.has_decoration(args[1], DecorationNoContraction))
  15738. {
  15739. return SPVFuncImplFMul;
  15740. }
  15741. break;
  15742. case OpQuantizeToF16:
  15743. return SPVFuncImplQuantizeToF16;
  15744. case OpTypeArray:
  15745. {
  15746. // Allow Metal to use the array<T> template to make arrays a value type
  15747. return SPVFuncImplUnsafeArray;
  15748. }
  15749. // Emulate texture2D atomic operations
  15750. case OpAtomicExchange:
  15751. case OpAtomicCompareExchange:
  15752. case OpAtomicCompareExchangeWeak:
  15753. case OpAtomicIIncrement:
  15754. case OpAtomicIDecrement:
  15755. case OpAtomicIAdd:
  15756. case OpAtomicFAddEXT:
  15757. case OpAtomicISub:
  15758. case OpAtomicSMin:
  15759. case OpAtomicUMin:
  15760. case OpAtomicSMax:
  15761. case OpAtomicUMax:
  15762. case OpAtomicAnd:
  15763. case OpAtomicOr:
  15764. case OpAtomicXor:
  15765. case OpAtomicLoad:
  15766. case OpAtomicStore:
  15767. {
  15768. auto it = image_pointers_emulated.find(args[opcode == OpAtomicStore ? 0 : 2]);
  15769. if (it != image_pointers_emulated.end())
  15770. {
  15771. uint32_t tid = compiler.get<SPIRVariable>(it->second).basetype;
  15772. if (tid && compiler.get<SPIRType>(tid).image.dim == Dim2D)
  15773. return SPVFuncImplImage2DAtomicCoords;
  15774. }
  15775. break;
  15776. }
  15777. case OpImageFetch:
  15778. case OpImageRead:
  15779. case OpImageWrite:
  15780. {
  15781. // Retrieve the image type, and if it's a Buffer, emit a texel coordinate function
  15782. uint32_t tid = result_types[args[opcode == OpImageWrite ? 0 : 2]];
  15783. if (tid && compiler.get<SPIRType>(tid).image.dim == DimBuffer && !compiler.msl_options.texture_buffer_native)
  15784. return SPVFuncImplTexelBufferCoords;
  15785. break;
  15786. }
  15787. case OpExtInst:
  15788. {
  15789. uint32_t extension_set = args[2];
  15790. if (compiler.get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL)
  15791. {
  15792. auto op_450 = static_cast<GLSLstd450>(args[3]);
  15793. switch (op_450)
  15794. {
  15795. case GLSLstd450Radians:
  15796. return SPVFuncImplRadians;
  15797. case GLSLstd450Degrees:
  15798. return SPVFuncImplDegrees;
  15799. case GLSLstd450FindILsb:
  15800. return SPVFuncImplFindILsb;
  15801. case GLSLstd450FindSMsb:
  15802. return SPVFuncImplFindSMsb;
  15803. case GLSLstd450FindUMsb:
  15804. return SPVFuncImplFindUMsb;
  15805. case GLSLstd450SSign:
  15806. return SPVFuncImplSSign;
  15807. case GLSLstd450Reflect:
  15808. {
  15809. auto &type = compiler.get<SPIRType>(args[0]);
  15810. if (type.vecsize == 1)
  15811. return SPVFuncImplReflectScalar;
  15812. break;
  15813. }
  15814. case GLSLstd450Refract:
  15815. {
  15816. auto &type = compiler.get<SPIRType>(args[0]);
  15817. if (type.vecsize == 1)
  15818. return SPVFuncImplRefractScalar;
  15819. break;
  15820. }
  15821. case GLSLstd450FaceForward:
  15822. {
  15823. auto &type = compiler.get<SPIRType>(args[0]);
  15824. if (type.vecsize == 1)
  15825. return SPVFuncImplFaceForwardScalar;
  15826. break;
  15827. }
  15828. case GLSLstd450MatrixInverse:
  15829. {
  15830. auto &mat_type = compiler.get<SPIRType>(args[0]);
  15831. switch (mat_type.columns)
  15832. {
  15833. case 2:
  15834. return SPVFuncImplInverse2x2;
  15835. case 3:
  15836. return SPVFuncImplInverse3x3;
  15837. case 4:
  15838. return SPVFuncImplInverse4x4;
  15839. default:
  15840. break;
  15841. }
  15842. break;
  15843. }
  15844. default:
  15845. break;
  15846. }
  15847. }
  15848. break;
  15849. }
  15850. case OpGroupNonUniformBroadcast:
  15851. case OpSubgroupReadInvocationKHR:
  15852. return SPVFuncImplSubgroupBroadcast;
  15853. case OpGroupNonUniformBroadcastFirst:
  15854. case OpSubgroupFirstInvocationKHR:
  15855. return SPVFuncImplSubgroupBroadcastFirst;
  15856. case OpGroupNonUniformBallot:
  15857. case OpSubgroupBallotKHR:
  15858. return SPVFuncImplSubgroupBallot;
  15859. case OpGroupNonUniformInverseBallot:
  15860. case OpGroupNonUniformBallotBitExtract:
  15861. return SPVFuncImplSubgroupBallotBitExtract;
  15862. case OpGroupNonUniformBallotFindLSB:
  15863. return SPVFuncImplSubgroupBallotFindLSB;
  15864. case OpGroupNonUniformBallotFindMSB:
  15865. return SPVFuncImplSubgroupBallotFindMSB;
  15866. case OpGroupNonUniformBallotBitCount:
  15867. return SPVFuncImplSubgroupBallotBitCount;
  15868. case OpGroupNonUniformAllEqual:
  15869. case OpSubgroupAllEqualKHR:
  15870. return SPVFuncImplSubgroupAllEqual;
  15871. case OpGroupNonUniformShuffle:
  15872. return SPVFuncImplSubgroupShuffle;
  15873. case OpGroupNonUniformShuffleXor:
  15874. return SPVFuncImplSubgroupShuffleXor;
  15875. case OpGroupNonUniformShuffleUp:
  15876. return SPVFuncImplSubgroupShuffleUp;
  15877. case OpGroupNonUniformShuffleDown:
  15878. return SPVFuncImplSubgroupShuffleDown;
  15879. case OpGroupNonUniformQuadBroadcast:
  15880. return SPVFuncImplQuadBroadcast;
  15881. case OpGroupNonUniformQuadSwap:
  15882. return SPVFuncImplQuadSwap;
  15883. case OpSDot:
  15884. case OpUDot:
  15885. case OpSUDot:
  15886. case OpSDotAccSat:
  15887. case OpUDotAccSat:
  15888. case OpSUDotAccSat:
  15889. return SPVFuncImplReduceAdd;
  15890. default:
  15891. break;
  15892. }
  15893. return SPVFuncImplNone;
  15894. }
  15895. // Sort both type and meta member content based on builtin status (put builtins at end),
  15896. // then by the required sorting aspect.
  15897. void CompilerMSL::MemberSorter::sort()
  15898. {
  15899. // Create a temporary array of consecutive member indices and sort it based on how
  15900. // the members should be reordered, based on builtin and sorting aspect meta info.
  15901. size_t mbr_cnt = type.member_types.size();
  15902. SmallVector<uint32_t> mbr_idxs(mbr_cnt);
  15903. std::iota(mbr_idxs.begin(), mbr_idxs.end(), 0); // Fill with consecutive indices
  15904. std::stable_sort(mbr_idxs.begin(), mbr_idxs.end(), *this); // Sort member indices based on sorting aspect
  15905. bool sort_is_identity = true;
  15906. for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
  15907. {
  15908. if (mbr_idx != mbr_idxs[mbr_idx])
  15909. {
  15910. sort_is_identity = false;
  15911. break;
  15912. }
  15913. }
  15914. if (sort_is_identity)
  15915. return;
  15916. if (meta.members.size() < type.member_types.size())
  15917. {
  15918. // This should never trigger in normal circumstances, but to be safe.
  15919. meta.members.resize(type.member_types.size());
  15920. }
  15921. // Move type and meta member info to the order defined by the sorted member indices.
  15922. // This is done by creating temporary copies of both member types and meta, and then
  15923. // copying back to the original content at the sorted indices.
  15924. auto mbr_types_cpy = type.member_types;
  15925. auto mbr_meta_cpy = meta.members;
  15926. for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
  15927. {
  15928. type.member_types[mbr_idx] = mbr_types_cpy[mbr_idxs[mbr_idx]];
  15929. meta.members[mbr_idx] = mbr_meta_cpy[mbr_idxs[mbr_idx]];
  15930. }
  15931. // If we're sorting by Offset, this might affect user code which accesses a buffer block.
  15932. // We will need to redirect member indices from defined index to sorted index using reverse lookup.
  15933. if (sort_aspect == SortAspect::Offset)
  15934. {
  15935. type.member_type_index_redirection.resize(mbr_cnt);
  15936. for (uint32_t map_idx = 0; map_idx < mbr_cnt; map_idx++)
  15937. type.member_type_index_redirection[mbr_idxs[map_idx]] = map_idx;
  15938. }
  15939. }
  15940. bool CompilerMSL::MemberSorter::operator()(uint32_t mbr_idx1, uint32_t mbr_idx2)
  15941. {
  15942. auto &mbr_meta1 = meta.members[mbr_idx1];
  15943. auto &mbr_meta2 = meta.members[mbr_idx2];
  15944. if (sort_aspect == LocationThenBuiltInType)
  15945. {
  15946. // Sort first by builtin status (put builtins at end), then by the sorting aspect.
  15947. if (mbr_meta1.builtin != mbr_meta2.builtin)
  15948. return mbr_meta2.builtin;
  15949. else if (mbr_meta1.builtin)
  15950. return mbr_meta1.builtin_type < mbr_meta2.builtin_type;
  15951. else if (mbr_meta1.location == mbr_meta2.location)
  15952. return mbr_meta1.component < mbr_meta2.component;
  15953. else
  15954. return mbr_meta1.location < mbr_meta2.location;
  15955. }
  15956. else
  15957. return mbr_meta1.offset < mbr_meta2.offset;
  15958. }
  15959. CompilerMSL::MemberSorter::MemberSorter(SPIRType &t, Meta &m, SortAspect sa)
  15960. : type(t)
  15961. , meta(m)
  15962. , sort_aspect(sa)
  15963. {
  15964. // Ensure enough meta info is available
  15965. meta.members.resize(max(type.member_types.size(), meta.members.size()));
  15966. }
  15967. void CompilerMSL::remap_constexpr_sampler(VariableID id, const MSLConstexprSampler &sampler)
  15968. {
  15969. auto &type = get<SPIRType>(get<SPIRVariable>(id).basetype);
  15970. if (type.basetype != SPIRType::SampledImage && type.basetype != SPIRType::Sampler)
  15971. SPIRV_CROSS_THROW("Can only remap SampledImage and Sampler type.");
  15972. if (!type.array.empty())
  15973. SPIRV_CROSS_THROW("Can not remap array of samplers.");
  15974. constexpr_samplers_by_id[id] = sampler;
  15975. }
  15976. void CompilerMSL::remap_constexpr_sampler_by_binding(uint32_t desc_set, uint32_t binding,
  15977. const MSLConstexprSampler &sampler)
  15978. {
  15979. constexpr_samplers_by_binding[{ desc_set, binding }] = sampler;
  15980. }
  15981. void CompilerMSL::cast_from_variable_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type)
  15982. {
  15983. bool is_packed = has_extended_decoration(source_id, SPIRVCrossDecorationPhysicalTypePacked);
  15984. auto *source_expr = maybe_get<SPIRExpression>(source_id);
  15985. auto *var = maybe_get_backing_variable(source_id);
  15986. const SPIRType *var_type = nullptr, *phys_type = nullptr;
  15987. if (uint32_t phys_id = get_extended_decoration(source_id, SPIRVCrossDecorationPhysicalTypeID))
  15988. phys_type = &get<SPIRType>(phys_id);
  15989. else
  15990. phys_type = &expr_type;
  15991. if (var)
  15992. {
  15993. source_id = var->self;
  15994. var_type = &get_variable_data_type(*var);
  15995. }
  15996. bool rewrite_boolean_load =
  15997. expr_type.basetype == SPIRType::Boolean &&
  15998. (var && (var->storage == StorageClassWorkgroup || var_type->basetype == SPIRType::Struct));
  15999. // Type fixups for workgroup variables if they are booleans.
  16000. if (rewrite_boolean_load)
  16001. {
  16002. if (is_array(expr_type))
  16003. expr = to_rerolled_array_expression(expr_type, expr, expr_type);
  16004. else
  16005. expr = join(type_to_glsl(expr_type), "(", expr, ")");
  16006. }
  16007. // Type fixups for workgroup variables if they are matrices.
  16008. // Don't do fixup for packed types; those are handled specially.
  16009. // FIXME: Maybe use a type like spvStorageMatrix for packed matrices?
  16010. if (!msl_options.supports_msl_version(3, 0) && var &&
  16011. (var->storage == StorageClassWorkgroup ||
  16012. (var_type->basetype == SPIRType::Struct &&
  16013. has_extended_decoration(var_type->self, SPIRVCrossDecorationWorkgroupStruct) && !is_packed)) &&
  16014. expr_type.columns > 1)
  16015. {
  16016. SPIRType matrix_type = *phys_type;
  16017. if (source_expr && source_expr->need_transpose)
  16018. swap(matrix_type.vecsize, matrix_type.columns);
  16019. matrix_type.array.clear();
  16020. matrix_type.array_size_literal.clear();
  16021. expr = join(type_to_glsl(matrix_type), "(", expr, ")");
  16022. }
  16023. // Only interested in standalone builtin variables in the switch below.
  16024. if (!has_decoration(source_id, DecorationBuiltIn))
  16025. {
  16026. // If the backing variable does not match our expected sign, we can fix it up here.
  16027. // See ensure_correct_input_type().
  16028. if (var && var->storage == StorageClassInput)
  16029. {
  16030. auto &base_type = get<SPIRType>(var->basetype);
  16031. if (base_type.basetype != SPIRType::Struct && expr_type.basetype != base_type.basetype)
  16032. expr = join(type_to_glsl(expr_type), "(", expr, ")");
  16033. }
  16034. return;
  16035. }
  16036. auto builtin = static_cast<BuiltIn>(get_decoration(source_id, DecorationBuiltIn));
  16037. auto expected_type = expr_type.basetype;
  16038. auto expected_width = expr_type.width;
  16039. switch (builtin)
  16040. {
  16041. case BuiltInGlobalInvocationId:
  16042. case BuiltInLocalInvocationId:
  16043. case BuiltInWorkgroupId:
  16044. case BuiltInLocalInvocationIndex:
  16045. case BuiltInWorkgroupSize:
  16046. case BuiltInNumWorkgroups:
  16047. case BuiltInLayer:
  16048. case BuiltInViewportIndex:
  16049. case BuiltInFragStencilRefEXT:
  16050. case BuiltInPrimitiveId:
  16051. case BuiltInSubgroupSize:
  16052. case BuiltInSubgroupLocalInvocationId:
  16053. case BuiltInViewIndex:
  16054. case BuiltInVertexIndex:
  16055. case BuiltInInstanceIndex:
  16056. case BuiltInBaseInstance:
  16057. case BuiltInBaseVertex:
  16058. case BuiltInSampleMask:
  16059. expected_type = SPIRType::UInt;
  16060. expected_width = 32;
  16061. break;
  16062. case BuiltInTessLevelInner:
  16063. case BuiltInTessLevelOuter:
  16064. if (is_tesc_shader())
  16065. {
  16066. expected_type = SPIRType::Half;
  16067. expected_width = 16;
  16068. }
  16069. break;
  16070. default:
  16071. break;
  16072. }
  16073. if (is_array(expr_type) && builtin == BuiltInSampleMask)
  16074. {
  16075. // Needs special handling.
  16076. auto wrap_expr = join(type_to_glsl(expr_type), "({ ");
  16077. wrap_expr += join(type_to_glsl(get<SPIRType>(expr_type.parent_type)), "(", expr, ")");
  16078. wrap_expr += " })";
  16079. expr = std::move(wrap_expr);
  16080. }
  16081. else if (expected_type != expr_type.basetype)
  16082. {
  16083. if (is_array(expr_type) && (builtin == BuiltInTessLevelInner || builtin == BuiltInTessLevelOuter))
  16084. {
  16085. // Triggers when loading TessLevel directly as an array.
  16086. // Need explicit padding + cast.
  16087. auto wrap_expr = join(type_to_glsl(expr_type), "({ ");
  16088. uint32_t array_size = get_physical_tess_level_array_size(builtin);
  16089. for (uint32_t i = 0; i < array_size; i++)
  16090. {
  16091. if (array_size > 1)
  16092. wrap_expr += join("float(", expr, "[", i, "])");
  16093. else
  16094. wrap_expr += join("float(", expr, ")");
  16095. if (i + 1 < array_size)
  16096. wrap_expr += ", ";
  16097. }
  16098. if (is_tessellating_triangles())
  16099. wrap_expr += ", 0.0";
  16100. wrap_expr += " })";
  16101. expr = std::move(wrap_expr);
  16102. }
  16103. else
  16104. {
  16105. // These are of different widths, so we cannot do a straight bitcast.
  16106. if (expected_width != expr_type.width)
  16107. expr = join(type_to_glsl(expr_type), "(", expr, ")");
  16108. else
  16109. expr = bitcast_expression(expr_type, expected_type, expr);
  16110. }
  16111. }
  16112. }
  16113. void CompilerMSL::cast_to_variable_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type)
  16114. {
  16115. bool is_packed = has_extended_decoration(target_id, SPIRVCrossDecorationPhysicalTypePacked);
  16116. auto *target_expr = maybe_get<SPIRExpression>(target_id);
  16117. auto *var = maybe_get_backing_variable(target_id);
  16118. const SPIRType *var_type = nullptr, *phys_type = nullptr;
  16119. if (uint32_t phys_id = get_extended_decoration(target_id, SPIRVCrossDecorationPhysicalTypeID))
  16120. phys_type = &get<SPIRType>(phys_id);
  16121. else
  16122. phys_type = &expr_type;
  16123. if (var)
  16124. {
  16125. target_id = var->self;
  16126. var_type = &get_variable_data_type(*var);
  16127. }
  16128. bool rewrite_boolean_store =
  16129. expr_type.basetype == SPIRType::Boolean &&
  16130. (var && (var->storage == StorageClassWorkgroup || var_type->basetype == SPIRType::Struct));
  16131. // Type fixups for workgroup variables or struct members if they are booleans.
  16132. if (rewrite_boolean_store)
  16133. {
  16134. if (is_array(expr_type))
  16135. {
  16136. expr = to_rerolled_array_expression(*var_type, expr, expr_type);
  16137. }
  16138. else
  16139. {
  16140. auto short_type = expr_type;
  16141. short_type.basetype = SPIRType::Short;
  16142. expr = join(type_to_glsl(short_type), "(", expr, ")");
  16143. }
  16144. }
  16145. // Type fixups for workgroup variables if they are matrices.
  16146. // Don't do fixup for packed types; those are handled specially.
  16147. // FIXME: Maybe use a type like spvStorageMatrix for packed matrices?
  16148. if (!msl_options.supports_msl_version(3, 0) && var &&
  16149. (var->storage == StorageClassWorkgroup ||
  16150. (var_type->basetype == SPIRType::Struct &&
  16151. has_extended_decoration(var_type->self, SPIRVCrossDecorationWorkgroupStruct) && !is_packed)) &&
  16152. expr_type.columns > 1)
  16153. {
  16154. SPIRType matrix_type = *phys_type;
  16155. if (target_expr && target_expr->need_transpose)
  16156. swap(matrix_type.vecsize, matrix_type.columns);
  16157. expr = join("spvStorage_", type_to_glsl(matrix_type), "(", expr, ")");
  16158. }
  16159. // Only interested in standalone builtin variables.
  16160. if (!has_decoration(target_id, DecorationBuiltIn))
  16161. return;
  16162. auto builtin = static_cast<BuiltIn>(get_decoration(target_id, DecorationBuiltIn));
  16163. auto expected_type = expr_type.basetype;
  16164. auto expected_width = expr_type.width;
  16165. switch (builtin)
  16166. {
  16167. case BuiltInLayer:
  16168. case BuiltInViewportIndex:
  16169. case BuiltInFragStencilRefEXT:
  16170. case BuiltInPrimitiveId:
  16171. case BuiltInViewIndex:
  16172. expected_type = SPIRType::UInt;
  16173. expected_width = 32;
  16174. break;
  16175. case BuiltInTessLevelInner:
  16176. case BuiltInTessLevelOuter:
  16177. expected_type = SPIRType::Half;
  16178. expected_width = 16;
  16179. break;
  16180. default:
  16181. break;
  16182. }
  16183. if (expected_type != expr_type.basetype)
  16184. {
  16185. if (expected_width != expr_type.width)
  16186. {
  16187. // These are of different widths, so we cannot do a straight bitcast.
  16188. auto type = expr_type;
  16189. type.basetype = expected_type;
  16190. type.width = expected_width;
  16191. expr = join(type_to_glsl(type), "(", expr, ")");
  16192. }
  16193. else
  16194. {
  16195. auto type = expr_type;
  16196. type.basetype = expected_type;
  16197. expr = bitcast_expression(type, expr_type.basetype, expr);
  16198. }
  16199. }
  16200. }
  16201. string CompilerMSL::to_initializer_expression(const SPIRVariable &var)
  16202. {
  16203. // We risk getting an array initializer here with MSL. If we have an array.
  16204. // FIXME: We cannot handle non-constant arrays being initialized.
  16205. // We will need to inject spvArrayCopy here somehow ...
  16206. auto &type = get<SPIRType>(var.basetype);
  16207. string expr;
  16208. if (ir.ids[var.initializer].get_type() == TypeConstant &&
  16209. (!type.array.empty() || type.basetype == SPIRType::Struct))
  16210. expr = constant_expression(get<SPIRConstant>(var.initializer));
  16211. else
  16212. expr = CompilerGLSL::to_initializer_expression(var);
  16213. // If the initializer has more vector components than the variable, add a swizzle.
  16214. // FIXME: This can't handle arrays or structs.
  16215. auto &init_type = expression_type(var.initializer);
  16216. if (type.array.empty() && type.basetype != SPIRType::Struct && init_type.vecsize > type.vecsize)
  16217. expr = enclose_expression(expr + vector_swizzle(type.vecsize, 0));
  16218. return expr;
  16219. }
  16220. string CompilerMSL::to_zero_initialized_expression(uint32_t)
  16221. {
  16222. return "{}";
  16223. }
  16224. bool CompilerMSL::descriptor_set_is_argument_buffer(uint32_t desc_set) const
  16225. {
  16226. if (!msl_options.argument_buffers)
  16227. return false;
  16228. if (desc_set >= kMaxArgumentBuffers)
  16229. return false;
  16230. return (argument_buffer_discrete_mask & (1u << desc_set)) == 0;
  16231. }
  16232. bool CompilerMSL::is_supported_argument_buffer_type(const SPIRType &type) const
  16233. {
  16234. // iOS Tier 1 argument buffers do not support writable images.
  16235. // When the argument buffer is encoded, we don't know whether this image will have a
  16236. // NonWritable decoration, so just use discrete arguments for all storage images on iOS.
  16237. bool is_supported_type = !(type.basetype == SPIRType::Image &&
  16238. type.image.sampled == 2 &&
  16239. msl_options.is_ios() &&
  16240. msl_options.argument_buffers_tier <= Options::ArgumentBuffersTier::Tier1);
  16241. return is_supported_type && !type_is_msl_framebuffer_fetch(type);
  16242. }
  16243. void CompilerMSL::emit_argument_buffer_aliased_descriptor(const SPIRVariable &aliased_var,
  16244. const SPIRVariable &base_var)
  16245. {
  16246. // To deal with buffer <-> image aliasing, we need to perform an unholy UB ritual.
  16247. // A texture type in Metal 3.0 is a pointer. However, we cannot simply cast a pointer to texture.
  16248. // What we *can* do is to cast pointer-to-pointer to pointer-to-texture.
  16249. // We need to explicitly reach into the descriptor buffer lvalue, not any spvDescriptorArray wrapper.
  16250. auto *var_meta = ir.find_meta(base_var.self);
  16251. bool old_explicit_qualifier = var_meta && var_meta->decoration.qualified_alias_explicit_override;
  16252. if (var_meta)
  16253. var_meta->decoration.qualified_alias_explicit_override = false;
  16254. auto unqualified_name = to_name(base_var.self, false);
  16255. if (var_meta)
  16256. var_meta->decoration.qualified_alias_explicit_override = old_explicit_qualifier;
  16257. // For non-arrayed buffers, we have already performed a de-reference.
  16258. // We need a proper lvalue to cast, so strip away the de-reference.
  16259. if (unqualified_name.size() > 2 && unqualified_name[0] == '(' && unqualified_name[1] == '*')
  16260. {
  16261. unqualified_name.erase(unqualified_name.begin(), unqualified_name.begin() + 2);
  16262. unqualified_name.pop_back();
  16263. }
  16264. string name;
  16265. auto &var_type = get<SPIRType>(aliased_var.basetype);
  16266. auto &data_type = get_variable_data_type(aliased_var);
  16267. string descriptor_storage = descriptor_address_space(aliased_var.self, aliased_var.storage, "");
  16268. if (aliased_var.storage == StorageClassUniformConstant)
  16269. {
  16270. if (is_var_runtime_size_array(aliased_var))
  16271. {
  16272. // This becomes a plain pointer to spvDescriptor.
  16273. name = join("reinterpret_cast<", descriptor_storage, " ",
  16274. type_to_glsl(get_variable_data_type(aliased_var), aliased_var.self, true), ">(&",
  16275. unqualified_name, ")");
  16276. }
  16277. else
  16278. {
  16279. name = join("reinterpret_cast<", descriptor_storage, " ",
  16280. type_to_glsl(get_variable_data_type(aliased_var), aliased_var.self, true), " &>(",
  16281. unqualified_name, ");");
  16282. }
  16283. }
  16284. else
  16285. {
  16286. // Buffer types.
  16287. bool old_is_using_builtin_array = is_using_builtin_array;
  16288. is_using_builtin_array = true;
  16289. bool needs_post_cast_deref = !is_array(data_type);
  16290. string ref_type = needs_post_cast_deref ? "&" : join("(&)", type_to_array_glsl(var_type, aliased_var.self));
  16291. if (is_var_runtime_size_array(aliased_var))
  16292. {
  16293. name = join("reinterpret_cast<",
  16294. type_to_glsl(var_type, aliased_var.self, true), " ", descriptor_storage, " *>(&",
  16295. unqualified_name, ")");
  16296. }
  16297. else
  16298. {
  16299. name = join(needs_post_cast_deref ? "*" : "", "reinterpret_cast<",
  16300. type_to_glsl(var_type, aliased_var.self, true), " ", descriptor_storage, " ",
  16301. ref_type,
  16302. ">(", unqualified_name, ");");
  16303. }
  16304. if (needs_post_cast_deref)
  16305. descriptor_storage = get_type_address_space(var_type, aliased_var.self, false);
  16306. // These kinds of ridiculous casts trigger warnings in compiler. Just ignore them.
  16307. if (!suppress_incompatible_pointer_types_discard_qualifiers)
  16308. {
  16309. suppress_incompatible_pointer_types_discard_qualifiers = true;
  16310. force_recompile_guarantee_forward_progress();
  16311. }
  16312. is_using_builtin_array = old_is_using_builtin_array;
  16313. }
  16314. if (!is_var_runtime_size_array(aliased_var))
  16315. {
  16316. // Lower to temporary, so drop the qualification.
  16317. set_qualified_name(aliased_var.self, "");
  16318. statement(descriptor_storage, " auto &", to_name(aliased_var.self), " = ", name);
  16319. }
  16320. else
  16321. {
  16322. // This alias may have already been used to emit an entry point declaration. If there is a mismatch, we need a recompile.
  16323. // Moving this code to be run earlier will also conflict,
  16324. // because we need the qualified alias for the base resource,
  16325. // so forcing recompile until things sync up is the least invasive method for now.
  16326. if (ir.meta[aliased_var.self].decoration.qualified_alias != name)
  16327. force_recompile();
  16328. // This will get wrapped in a separate temporary when a spvDescriptorArray wrapper is emitted.
  16329. set_qualified_name(aliased_var.self, name);
  16330. }
  16331. }
  16332. void CompilerMSL::analyze_argument_buffers()
  16333. {
  16334. // Gather all used resources and sort them out into argument buffers.
  16335. // Each argument buffer corresponds to a descriptor set in SPIR-V.
  16336. // The [[id(N)]] values used correspond to the resource mapping we have for MSL.
  16337. // Otherwise, the binding number is used, but this is generally not safe some types like
  16338. // combined image samplers and arrays of resources. Metal needs different indices here,
  16339. // while SPIR-V can have one descriptor set binding. To use argument buffers in practice,
  16340. // you will need to use the remapping from the API.
  16341. for (auto &id : argument_buffer_ids)
  16342. id = 0;
  16343. // Output resources, sorted by resource index & type.
  16344. struct Resource
  16345. {
  16346. SPIRVariable *var;
  16347. string name;
  16348. SPIRType::BaseType basetype;
  16349. uint32_t index;
  16350. uint32_t plane_count;
  16351. uint32_t plane;
  16352. uint32_t overlapping_var_id;
  16353. };
  16354. SmallVector<Resource> resources_in_set[kMaxArgumentBuffers];
  16355. SmallVector<uint32_t> inline_block_vars;
  16356. bool set_needs_swizzle_buffer[kMaxArgumentBuffers] = {};
  16357. bool set_needs_buffer_sizes[kMaxArgumentBuffers] = {};
  16358. bool needs_buffer_sizes = false;
  16359. ir.for_each_typed_id<SPIRVariable>([&](uint32_t self, SPIRVariable &var) {
  16360. if ((var.storage == StorageClassUniform || var.storage == StorageClassUniformConstant ||
  16361. var.storage == StorageClassStorageBuffer) &&
  16362. !is_hidden_variable(var))
  16363. {
  16364. uint32_t desc_set = get_decoration(self, DecorationDescriptorSet);
  16365. // Ignore if it's part of a push descriptor set.
  16366. if (!descriptor_set_is_argument_buffer(desc_set))
  16367. return;
  16368. uint32_t var_id = var.self;
  16369. auto &type = get_variable_data_type(var);
  16370. if (desc_set >= kMaxArgumentBuffers)
  16371. SPIRV_CROSS_THROW("Descriptor set index is out of range.");
  16372. const MSLConstexprSampler *constexpr_sampler = nullptr;
  16373. if (type.basetype == SPIRType::SampledImage || type.basetype == SPIRType::Sampler)
  16374. {
  16375. constexpr_sampler = find_constexpr_sampler(var_id);
  16376. if (constexpr_sampler)
  16377. {
  16378. // Mark this ID as a constexpr sampler for later in case it came from set/bindings.
  16379. constexpr_samplers_by_id[var_id] = *constexpr_sampler;
  16380. }
  16381. }
  16382. uint32_t binding = get_decoration(var_id, DecorationBinding);
  16383. if (type.basetype == SPIRType::SampledImage)
  16384. {
  16385. add_resource_name(var_id);
  16386. uint32_t plane_count = 1;
  16387. if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable)
  16388. plane_count = constexpr_sampler->planes;
  16389. for (uint32_t i = 0; i < plane_count; i++)
  16390. {
  16391. uint32_t image_resource_index = get_metal_resource_index(var, SPIRType::Image, i);
  16392. resources_in_set[desc_set].push_back(
  16393. { &var, to_name(var_id), SPIRType::Image, image_resource_index, plane_count, i, 0 });
  16394. }
  16395. if (type.image.dim != DimBuffer && !constexpr_sampler)
  16396. {
  16397. uint32_t sampler_resource_index = get_metal_resource_index(var, SPIRType::Sampler);
  16398. resources_in_set[desc_set].push_back(
  16399. { &var, to_sampler_expression(var_id), SPIRType::Sampler, sampler_resource_index, 1, 0, 0 });
  16400. }
  16401. }
  16402. else if (inline_uniform_blocks.count(SetBindingPair{ desc_set, binding }))
  16403. {
  16404. inline_block_vars.push_back(var_id);
  16405. }
  16406. else if (!constexpr_sampler && is_supported_argument_buffer_type(type))
  16407. {
  16408. // constexpr samplers are not declared as resources.
  16409. // Inline uniform blocks are always emitted at the end.
  16410. add_resource_name(var_id);
  16411. uint32_t resource_index = get_metal_resource_index(var, type.basetype);
  16412. resources_in_set[desc_set].push_back(
  16413. { &var, to_name(var_id), type.basetype, resource_index, 1, 0, 0 });
  16414. // Emulate texture2D atomic operations
  16415. if (atomic_image_vars_emulated.count(var.self))
  16416. {
  16417. uint32_t buffer_resource_index = get_metal_resource_index(var, SPIRType::AtomicCounter, 0);
  16418. resources_in_set[desc_set].push_back(
  16419. { &var, to_name(var_id) + "_atomic", SPIRType::Struct, buffer_resource_index, 1, 0, 0 });
  16420. }
  16421. }
  16422. // Check if this descriptor set needs a swizzle buffer.
  16423. if (needs_swizzle_buffer_def && is_sampled_image_type(type))
  16424. set_needs_swizzle_buffer[desc_set] = true;
  16425. else if (buffer_requires_array_length(var_id))
  16426. {
  16427. set_needs_buffer_sizes[desc_set] = true;
  16428. needs_buffer_sizes = true;
  16429. }
  16430. }
  16431. });
  16432. if (needs_swizzle_buffer_def || needs_buffer_sizes)
  16433. {
  16434. uint32_t uint_ptr_type_id = 0;
  16435. // We might have to add a swizzle buffer resource to the set.
  16436. for (uint32_t desc_set = 0; desc_set < kMaxArgumentBuffers; desc_set++)
  16437. {
  16438. if (!set_needs_swizzle_buffer[desc_set] && !set_needs_buffer_sizes[desc_set])
  16439. continue;
  16440. if (uint_ptr_type_id == 0)
  16441. {
  16442. uint_ptr_type_id = ir.increase_bound_by(1);
  16443. // Create a buffer to hold extra data, including the swizzle constants.
  16444. SPIRType uint_type_pointer = get_uint_type();
  16445. uint_type_pointer.op = OpTypePointer;
  16446. uint_type_pointer.pointer = true;
  16447. uint_type_pointer.pointer_depth++;
  16448. uint_type_pointer.parent_type = get_uint_type_id();
  16449. uint_type_pointer.storage = StorageClassUniform;
  16450. set<SPIRType>(uint_ptr_type_id, uint_type_pointer);
  16451. set_decoration(uint_ptr_type_id, DecorationArrayStride, 4);
  16452. }
  16453. if (set_needs_swizzle_buffer[desc_set])
  16454. {
  16455. uint32_t var_id = ir.increase_bound_by(1);
  16456. auto &var = set<SPIRVariable>(var_id, uint_ptr_type_id, StorageClassUniformConstant);
  16457. set_name(var_id, "spvSwizzleConstants");
  16458. set_decoration(var_id, DecorationDescriptorSet, desc_set);
  16459. set_decoration(var_id, DecorationBinding, kSwizzleBufferBinding);
  16460. resources_in_set[desc_set].push_back(
  16461. { &var, to_name(var_id), SPIRType::UInt, get_metal_resource_index(var, SPIRType::UInt), 1, 0, 0 });
  16462. }
  16463. if (set_needs_buffer_sizes[desc_set])
  16464. {
  16465. uint32_t var_id = ir.increase_bound_by(1);
  16466. auto &var = set<SPIRVariable>(var_id, uint_ptr_type_id, StorageClassUniformConstant);
  16467. set_name(var_id, "spvBufferSizeConstants");
  16468. set_decoration(var_id, DecorationDescriptorSet, desc_set);
  16469. set_decoration(var_id, DecorationBinding, kBufferSizeBufferBinding);
  16470. resources_in_set[desc_set].push_back(
  16471. { &var, to_name(var_id), SPIRType::UInt, get_metal_resource_index(var, SPIRType::UInt), 1, 0, 0 });
  16472. }
  16473. }
  16474. }
  16475. // Now add inline uniform blocks.
  16476. for (uint32_t var_id : inline_block_vars)
  16477. {
  16478. auto &var = get<SPIRVariable>(var_id);
  16479. uint32_t desc_set = get_decoration(var_id, DecorationDescriptorSet);
  16480. add_resource_name(var_id);
  16481. resources_in_set[desc_set].push_back(
  16482. { &var, to_name(var_id), SPIRType::Struct, get_metal_resource_index(var, SPIRType::Struct), 1, 0, 0 });
  16483. }
  16484. for (uint32_t desc_set = 0; desc_set < kMaxArgumentBuffers; desc_set++)
  16485. {
  16486. auto &resources = resources_in_set[desc_set];
  16487. if (resources.empty())
  16488. continue;
  16489. assert(descriptor_set_is_argument_buffer(desc_set));
  16490. uint32_t next_id = ir.increase_bound_by(3);
  16491. uint32_t type_id = next_id + 1;
  16492. uint32_t ptr_type_id = next_id + 2;
  16493. argument_buffer_ids[desc_set] = next_id;
  16494. auto &buffer_type = set<SPIRType>(type_id, OpTypeStruct);
  16495. buffer_type.basetype = SPIRType::Struct;
  16496. if ((argument_buffer_device_storage_mask & (1u << desc_set)) != 0)
  16497. {
  16498. buffer_type.storage = StorageClassStorageBuffer;
  16499. // Make sure the argument buffer gets marked as const device.
  16500. set_decoration(next_id, DecorationNonWritable);
  16501. // Need to mark the type as a Block to enable this.
  16502. set_decoration(type_id, DecorationBlock);
  16503. }
  16504. else
  16505. buffer_type.storage = StorageClassUniform;
  16506. auto buffer_type_name = join("spvDescriptorSetBuffer", desc_set);
  16507. set_name(type_id, buffer_type_name);
  16508. auto &ptr_type = set<SPIRType>(ptr_type_id, OpTypePointer);
  16509. ptr_type = buffer_type;
  16510. ptr_type.op = spv::OpTypePointer;
  16511. ptr_type.pointer = true;
  16512. ptr_type.pointer_depth++;
  16513. ptr_type.parent_type = type_id;
  16514. uint32_t buffer_variable_id = next_id;
  16515. auto &buffer_var = set<SPIRVariable>(buffer_variable_id, ptr_type_id, StorageClassUniform);
  16516. auto buffer_name = join("spvDescriptorSet", desc_set);
  16517. set_name(buffer_variable_id, buffer_name);
  16518. // Ids must be emitted in ID order.
  16519. stable_sort(begin(resources), end(resources), [&](const Resource &lhs, const Resource &rhs) -> bool {
  16520. return tie(lhs.index, lhs.basetype) < tie(rhs.index, rhs.basetype);
  16521. });
  16522. for (size_t i = 0; i < resources.size() - 1; i++)
  16523. {
  16524. auto &r1 = resources[i];
  16525. auto &r2 = resources[i + 1];
  16526. if (r1.index == r2.index)
  16527. {
  16528. if (r1.overlapping_var_id)
  16529. r2.overlapping_var_id = r1.overlapping_var_id;
  16530. else
  16531. r2.overlapping_var_id = r1.var->self;
  16532. set_extended_decoration(r2.var->self, SPIRVCrossDecorationOverlappingBinding, r2.overlapping_var_id);
  16533. }
  16534. }
  16535. uint32_t member_index = 0;
  16536. uint32_t next_arg_buff_index = 0;
  16537. for (auto &resource : resources)
  16538. {
  16539. auto &var = *resource.var;
  16540. auto &type = get_variable_data_type(var);
  16541. if (is_var_runtime_size_array(var) && (argument_buffer_device_storage_mask & (1u << desc_set)) == 0)
  16542. SPIRV_CROSS_THROW("Runtime sized variables must be in device storage argument buffers.");
  16543. // If needed, synthesize and add padding members.
  16544. // member_index and next_arg_buff_index are incremented when padding members are added.
  16545. if (msl_options.pad_argument_buffer_resources && resource.plane == 0 && resource.overlapping_var_id == 0)
  16546. {
  16547. auto rez_bind = get_argument_buffer_resource(desc_set, next_arg_buff_index);
  16548. while (resource.index > next_arg_buff_index)
  16549. {
  16550. switch (rez_bind.basetype)
  16551. {
  16552. case SPIRType::Void:
  16553. case SPIRType::Boolean:
  16554. case SPIRType::SByte:
  16555. case SPIRType::UByte:
  16556. case SPIRType::Short:
  16557. case SPIRType::UShort:
  16558. case SPIRType::Int:
  16559. case SPIRType::UInt:
  16560. case SPIRType::Int64:
  16561. case SPIRType::UInt64:
  16562. case SPIRType::AtomicCounter:
  16563. case SPIRType::Half:
  16564. case SPIRType::Float:
  16565. case SPIRType::Double:
  16566. add_argument_buffer_padding_buffer_type(buffer_type, member_index, next_arg_buff_index, rez_bind);
  16567. break;
  16568. case SPIRType::Image:
  16569. add_argument_buffer_padding_image_type(buffer_type, member_index, next_arg_buff_index, rez_bind);
  16570. break;
  16571. case SPIRType::Sampler:
  16572. add_argument_buffer_padding_sampler_type(buffer_type, member_index, next_arg_buff_index, rez_bind);
  16573. break;
  16574. case SPIRType::SampledImage:
  16575. if (next_arg_buff_index == rez_bind.msl_sampler)
  16576. add_argument_buffer_padding_sampler_type(buffer_type, member_index, next_arg_buff_index, rez_bind);
  16577. else
  16578. add_argument_buffer_padding_image_type(buffer_type, member_index, next_arg_buff_index, rez_bind);
  16579. break;
  16580. default:
  16581. break;
  16582. }
  16583. // After padding, retrieve the resource again. It will either be more padding, or the actual resource.
  16584. rez_bind = get_argument_buffer_resource(desc_set, next_arg_buff_index);
  16585. }
  16586. // Adjust the number of slots consumed by current member itself.
  16587. // Use the count value from the app, instead of the shader, in case the
  16588. // shader is only accessing part, or even one element, of the array.
  16589. next_arg_buff_index += resource.plane_count * rez_bind.count;
  16590. }
  16591. string mbr_name = ensure_valid_name(resource.name, "m");
  16592. if (resource.plane > 0)
  16593. mbr_name += join(plane_name_suffix, resource.plane);
  16594. set_member_name(buffer_type.self, member_index, mbr_name);
  16595. if (resource.basetype == SPIRType::Sampler && type.basetype != SPIRType::Sampler)
  16596. {
  16597. // Have to synthesize a sampler type here.
  16598. bool type_is_array = !type.array.empty();
  16599. uint32_t sampler_type_id = ir.increase_bound_by(type_is_array ? 2 : 1);
  16600. auto &new_sampler_type = set<SPIRType>(sampler_type_id, OpTypeSampler);
  16601. new_sampler_type.basetype = SPIRType::Sampler;
  16602. new_sampler_type.storage = StorageClassUniformConstant;
  16603. if (type_is_array)
  16604. {
  16605. uint32_t sampler_type_array_id = sampler_type_id + 1;
  16606. auto &sampler_type_array = set<SPIRType>(sampler_type_array_id, OpTypeArray);
  16607. sampler_type_array = new_sampler_type;
  16608. sampler_type_array.array = type.array;
  16609. sampler_type_array.array_size_literal = type.array_size_literal;
  16610. sampler_type_array.parent_type = sampler_type_id;
  16611. buffer_type.member_types.push_back(sampler_type_array_id);
  16612. }
  16613. else
  16614. buffer_type.member_types.push_back(sampler_type_id);
  16615. }
  16616. else
  16617. {
  16618. uint32_t binding = get_decoration(var.self, DecorationBinding);
  16619. SetBindingPair pair = { desc_set, binding };
  16620. if (resource.basetype == SPIRType::Image || resource.basetype == SPIRType::Sampler ||
  16621. resource.basetype == SPIRType::SampledImage)
  16622. {
  16623. // Drop pointer information when we emit the resources into a struct.
  16624. buffer_type.member_types.push_back(get_variable_data_type_id(var));
  16625. if (has_extended_decoration(var.self, SPIRVCrossDecorationOverlappingBinding))
  16626. {
  16627. if (!msl_options.supports_msl_version(3, 0))
  16628. SPIRV_CROSS_THROW("Full mutable aliasing of argument buffer descriptors only works on Metal 3+.");
  16629. auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
  16630. entry_func.fixup_hooks_in.push_back([this, resource]() {
  16631. emit_argument_buffer_aliased_descriptor(*resource.var, this->get<SPIRVariable>(resource.overlapping_var_id));
  16632. });
  16633. }
  16634. else if (resource.plane == 0)
  16635. {
  16636. set_qualified_name(var.self, join(to_name(buffer_variable_id), ".", mbr_name));
  16637. }
  16638. }
  16639. else if (buffers_requiring_dynamic_offset.count(pair))
  16640. {
  16641. // Don't set the qualified name here; we'll define a variable holding the corrected buffer address later.
  16642. buffer_type.member_types.push_back(var.basetype);
  16643. buffers_requiring_dynamic_offset[pair].second = var.self;
  16644. }
  16645. else if (inline_uniform_blocks.count(pair))
  16646. {
  16647. // Put the buffer block itself into the argument buffer.
  16648. buffer_type.member_types.push_back(get_variable_data_type_id(var));
  16649. set_qualified_name(var.self, join(to_name(buffer_variable_id), ".", mbr_name));
  16650. }
  16651. else if (atomic_image_vars_emulated.count(var.self))
  16652. {
  16653. // Emulate texture2D atomic operations.
  16654. // Don't set the qualified name: it's already set for this variable,
  16655. // and the code that references the buffer manually appends "_atomic"
  16656. // to the name.
  16657. uint32_t offset = ir.increase_bound_by(2);
  16658. uint32_t atomic_type_id = offset;
  16659. uint32_t type_ptr_id = offset + 1;
  16660. SPIRType atomic_type { OpTypeInt };
  16661. atomic_type.basetype = SPIRType::AtomicCounter;
  16662. atomic_type.width = 32;
  16663. atomic_type.vecsize = 1;
  16664. set<SPIRType>(atomic_type_id, atomic_type);
  16665. atomic_type.op = OpTypePointer;
  16666. atomic_type.pointer = true;
  16667. atomic_type.pointer_depth++;
  16668. atomic_type.parent_type = atomic_type_id;
  16669. atomic_type.storage = StorageClassStorageBuffer;
  16670. auto &atomic_ptr_type = set<SPIRType>(type_ptr_id, atomic_type);
  16671. atomic_ptr_type.self = atomic_type_id;
  16672. buffer_type.member_types.push_back(type_ptr_id);
  16673. }
  16674. else
  16675. {
  16676. buffer_type.member_types.push_back(var.basetype);
  16677. if (has_extended_decoration(var.self, SPIRVCrossDecorationOverlappingBinding))
  16678. {
  16679. // Casting raw pointers is fine since their ABI is fixed, but anything opaque is deeply questionable on Metal 2.
  16680. if (get<SPIRVariable>(resource.overlapping_var_id).storage == StorageClassUniformConstant &&
  16681. !msl_options.supports_msl_version(3, 0))
  16682. {
  16683. SPIRV_CROSS_THROW("Full mutable aliasing of argument buffer descriptors only works on Metal 3+.");
  16684. }
  16685. auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
  16686. entry_func.fixup_hooks_in.push_back([this, resource]() {
  16687. emit_argument_buffer_aliased_descriptor(*resource.var, this->get<SPIRVariable>(resource.overlapping_var_id));
  16688. });
  16689. }
  16690. else if (type.array.empty())
  16691. set_qualified_name(var.self, join("(*", to_name(buffer_variable_id), ".", mbr_name, ")"));
  16692. else
  16693. set_qualified_name(var.self, join(to_name(buffer_variable_id), ".", mbr_name));
  16694. }
  16695. }
  16696. set_extended_member_decoration(buffer_type.self, member_index, SPIRVCrossDecorationResourceIndexPrimary,
  16697. resource.index);
  16698. set_extended_member_decoration(buffer_type.self, member_index, SPIRVCrossDecorationInterfaceOrigID,
  16699. var.self);
  16700. if (has_extended_decoration(var.self, SPIRVCrossDecorationOverlappingBinding))
  16701. set_extended_member_decoration(buffer_type.self, member_index, SPIRVCrossDecorationOverlappingBinding);
  16702. member_index++;
  16703. }
  16704. if (msl_options.replace_recursive_inputs && type_contains_recursion(buffer_type))
  16705. {
  16706. recursive_inputs.insert(type_id);
  16707. auto &entry_func = this->get<SPIRFunction>(ir.default_entry_point);
  16708. auto addr_space = get_argument_address_space(buffer_var);
  16709. entry_func.fixup_hooks_in.push_back([this, addr_space, buffer_name, buffer_type_name]() {
  16710. statement(addr_space, " auto& ", buffer_name, " = *(", addr_space, " ", buffer_type_name, "*)", buffer_name, "_vp;");
  16711. });
  16712. }
  16713. }
  16714. }
  16715. // Return the resource type of the app-provided resources for the descriptor set,
  16716. // that matches the resource index of the argument buffer index.
  16717. // This is a two-step lookup, first lookup the resource binding number from the argument buffer index,
  16718. // then lookup the resource binding using the binding number.
  16719. const MSLResourceBinding &CompilerMSL::get_argument_buffer_resource(uint32_t desc_set, uint32_t arg_idx) const
  16720. {
  16721. auto stage = get_entry_point().model;
  16722. StageSetBinding arg_idx_tuple = { stage, desc_set, arg_idx };
  16723. auto arg_itr = resource_arg_buff_idx_to_binding_number.find(arg_idx_tuple);
  16724. if (arg_itr != end(resource_arg_buff_idx_to_binding_number))
  16725. {
  16726. StageSetBinding bind_tuple = { stage, desc_set, arg_itr->second };
  16727. auto bind_itr = resource_bindings.find(bind_tuple);
  16728. if (bind_itr != end(resource_bindings))
  16729. return bind_itr->second.first;
  16730. }
  16731. SPIRV_CROSS_THROW("Argument buffer resource base type could not be determined. When padding argument buffer "
  16732. "elements, all descriptor set resources must be supplied with a base type by the app.");
  16733. }
  16734. // Adds an argument buffer padding argument buffer type as one or more members of the struct type at the member index.
  16735. // Metal does not support arrays of buffers, so these are emitted as multiple struct members.
  16736. void CompilerMSL::add_argument_buffer_padding_buffer_type(SPIRType &struct_type, uint32_t &mbr_idx,
  16737. uint32_t &arg_buff_index, MSLResourceBinding &rez_bind)
  16738. {
  16739. if (!argument_buffer_padding_buffer_type_id)
  16740. {
  16741. uint32_t buff_type_id = ir.increase_bound_by(2);
  16742. auto &buff_type = set<SPIRType>(buff_type_id, OpNop);
  16743. buff_type.basetype = rez_bind.basetype;
  16744. buff_type.storage = StorageClassUniformConstant;
  16745. uint32_t ptr_type_id = buff_type_id + 1;
  16746. auto &ptr_type = set<SPIRType>(ptr_type_id, OpTypePointer);
  16747. ptr_type = buff_type;
  16748. ptr_type.op = spv::OpTypePointer;
  16749. ptr_type.pointer = true;
  16750. ptr_type.pointer_depth++;
  16751. ptr_type.parent_type = buff_type_id;
  16752. argument_buffer_padding_buffer_type_id = ptr_type_id;
  16753. }
  16754. add_argument_buffer_padding_type(argument_buffer_padding_buffer_type_id, struct_type, mbr_idx, arg_buff_index, rez_bind.count);
  16755. }
  16756. // Adds an argument buffer padding argument image type as a member of the struct type at the member index.
  16757. void CompilerMSL::add_argument_buffer_padding_image_type(SPIRType &struct_type, uint32_t &mbr_idx,
  16758. uint32_t &arg_buff_index, MSLResourceBinding &rez_bind)
  16759. {
  16760. if (!argument_buffer_padding_image_type_id)
  16761. {
  16762. uint32_t base_type_id = ir.increase_bound_by(2);
  16763. auto &base_type = set<SPIRType>(base_type_id, OpTypeFloat);
  16764. base_type.basetype = SPIRType::Float;
  16765. base_type.width = 32;
  16766. uint32_t img_type_id = base_type_id + 1;
  16767. auto &img_type = set<SPIRType>(img_type_id, OpTypeImage);
  16768. img_type.basetype = SPIRType::Image;
  16769. img_type.storage = StorageClassUniformConstant;
  16770. img_type.image.type = base_type_id;
  16771. img_type.image.dim = Dim2D;
  16772. img_type.image.depth = false;
  16773. img_type.image.arrayed = false;
  16774. img_type.image.ms = false;
  16775. img_type.image.sampled = 1;
  16776. img_type.image.format = ImageFormatUnknown;
  16777. img_type.image.access = AccessQualifierMax;
  16778. argument_buffer_padding_image_type_id = img_type_id;
  16779. }
  16780. add_argument_buffer_padding_type(argument_buffer_padding_image_type_id, struct_type, mbr_idx, arg_buff_index, rez_bind.count);
  16781. }
  16782. // Adds an argument buffer padding argument sampler type as a member of the struct type at the member index.
  16783. void CompilerMSL::add_argument_buffer_padding_sampler_type(SPIRType &struct_type, uint32_t &mbr_idx,
  16784. uint32_t &arg_buff_index, MSLResourceBinding &rez_bind)
  16785. {
  16786. if (!argument_buffer_padding_sampler_type_id)
  16787. {
  16788. uint32_t samp_type_id = ir.increase_bound_by(1);
  16789. auto &samp_type = set<SPIRType>(samp_type_id, OpTypeSampler);
  16790. samp_type.basetype = SPIRType::Sampler;
  16791. samp_type.storage = StorageClassUniformConstant;
  16792. argument_buffer_padding_sampler_type_id = samp_type_id;
  16793. }
  16794. add_argument_buffer_padding_type(argument_buffer_padding_sampler_type_id, struct_type, mbr_idx, arg_buff_index, rez_bind.count);
  16795. }
  16796. // Adds the argument buffer padding argument type as a member of the struct type at the member index.
  16797. // Advances both arg_buff_index and mbr_idx to next argument slots.
  16798. void CompilerMSL::add_argument_buffer_padding_type(uint32_t mbr_type_id, SPIRType &struct_type, uint32_t &mbr_idx,
  16799. uint32_t &arg_buff_index, uint32_t count)
  16800. {
  16801. uint32_t type_id = mbr_type_id;
  16802. if (count > 1)
  16803. {
  16804. uint32_t ary_type_id = ir.increase_bound_by(1);
  16805. auto &ary_type = set<SPIRType>(ary_type_id, get<SPIRType>(type_id));
  16806. ary_type.op = OpTypeArray;
  16807. ary_type.array.push_back(count);
  16808. ary_type.array_size_literal.push_back(true);
  16809. ary_type.parent_type = type_id;
  16810. type_id = ary_type_id;
  16811. }
  16812. set_member_name(struct_type.self, mbr_idx, join("_m", arg_buff_index, "_pad"));
  16813. set_extended_member_decoration(struct_type.self, mbr_idx, SPIRVCrossDecorationResourceIndexPrimary, arg_buff_index);
  16814. struct_type.member_types.push_back(type_id);
  16815. arg_buff_index += count;
  16816. mbr_idx++;
  16817. }
  16818. void CompilerMSL::activate_argument_buffer_resources()
  16819. {
  16820. // For ABI compatibility, force-enable all resources which are part of argument buffers.
  16821. ir.for_each_typed_id<SPIRVariable>([&](uint32_t self, const SPIRVariable &) {
  16822. if (!has_decoration(self, DecorationDescriptorSet))
  16823. return;
  16824. uint32_t desc_set = get_decoration(self, DecorationDescriptorSet);
  16825. if (descriptor_set_is_argument_buffer(desc_set))
  16826. add_active_interface_variable(self);
  16827. });
  16828. }
  16829. bool CompilerMSL::using_builtin_array() const
  16830. {
  16831. return msl_options.force_native_arrays || is_using_builtin_array;
  16832. }
  16833. void CompilerMSL::set_combined_sampler_suffix(const char *suffix)
  16834. {
  16835. sampler_name_suffix = suffix;
  16836. }
  16837. const char *CompilerMSL::get_combined_sampler_suffix() const
  16838. {
  16839. return sampler_name_suffix.c_str();
  16840. }
  16841. void CompilerMSL::emit_block_hints(const SPIRBlock &)
  16842. {
  16843. }
  16844. string CompilerMSL::additional_fixed_sample_mask_str() const
  16845. {
  16846. char print_buffer[32];
  16847. #ifdef _MSC_VER
  16848. // snprintf does not exist or is buggy on older MSVC versions, some of
  16849. // them being used by MinGW. Use sprintf instead and disable
  16850. // corresponding warning.
  16851. #pragma warning(push)
  16852. #pragma warning(disable : 4996)
  16853. #endif
  16854. #if _WIN32
  16855. sprintf(print_buffer, "0x%x", msl_options.additional_fixed_sample_mask);
  16856. #else
  16857. snprintf(print_buffer, sizeof(print_buffer), "0x%x", msl_options.additional_fixed_sample_mask);
  16858. #endif
  16859. #ifdef _MSC_VER
  16860. #pragma warning(pop)
  16861. #endif
  16862. return print_buffer;
  16863. }