spirv_glsl.cpp 493 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797779877997800780178027803780478057806780778087809781078117812781378147815781678177818781978207821782278237824782578267827782878297830783178327833783478357836783778387839784078417842784378447845784678477848784978507851785278537854785578567857785878597860786178627863786478657866786778687869787078717872787378747875787678777878787978807881788278837884788578867887788878897890789178927893789478957896789778987899790079017902790379047905790679077908790979107911791279137914791579167917791879197920792179227923792479257926792779287929793079317932793379347935793679377938793979407941794279437944794579467947794879497950795179527953795479557956795779587959796079617962796379647965796679677968796979707971797279737974797579767977797879797980798179827983798479857986798779887989799079917992799379947995799679977998799980008001800280038004800580068007800880098010801180128013801480158016801780188019802080218022802380248025802680278028802980308031803280338034803580368037803880398040804180428043804480458046804780488049805080518052805380548055805680578058805980608061806280638064806580668067806880698070807180728073807480758076807780788079808080818082808380848085808680878088808980908091809280938094809580968097809880998100810181028103810481058106810781088109811081118112811381148115811681178118811981208121812281238124812581268127812881298130813181328133813481358136813781388139814081418142814381448145814681478148814981508151815281538154815581568157815881598160816181628163816481658166816781688169817081718172817381748175817681778178817981808181818281838184818581868187818881898190819181928193819481958196819781988199820082018202820382048205820682078208820982108211821282138214821582168217821882198220822182228223822482258226822782288229823082318232823382348235823682378238823982408241824282438244824582468247824882498250825182528253825482558256825782588259826082618262826382648265826682678268826982708271827282738274827582768277827882798280828182828283828482858286828782888289829082918292829382948295829682978298829983008301830283038304830583068307830883098310831183128313831483158316831783188319832083218322832383248325832683278328832983308331833283338334833583368337833883398340834183428343834483458346834783488349835083518352835383548355835683578358835983608361836283638364836583668367836883698370837183728373837483758376837783788379838083818382838383848385838683878388838983908391839283938394839583968397839883998400840184028403840484058406840784088409841084118412841384148415841684178418841984208421842284238424842584268427842884298430843184328433843484358436843784388439844084418442844384448445844684478448844984508451845284538454845584568457845884598460846184628463846484658466846784688469847084718472847384748475847684778478847984808481848284838484848584868487848884898490849184928493849484958496849784988499850085018502850385048505850685078508850985108511851285138514851585168517851885198520852185228523852485258526852785288529853085318532853385348535853685378538853985408541854285438544854585468547854885498550855185528553855485558556855785588559856085618562856385648565856685678568856985708571857285738574857585768577857885798580858185828583858485858586858785888589859085918592859385948595859685978598859986008601860286038604860586068607860886098610861186128613861486158616861786188619862086218622862386248625862686278628862986308631863286338634863586368637863886398640864186428643864486458646864786488649865086518652865386548655865686578658865986608661866286638664866586668667866886698670867186728673867486758676867786788679868086818682868386848685868686878688868986908691869286938694869586968697869886998700870187028703870487058706870787088709871087118712871387148715871687178718871987208721872287238724872587268727872887298730873187328733873487358736873787388739874087418742874387448745874687478748874987508751875287538754875587568757875887598760876187628763876487658766876787688769877087718772877387748775877687778778877987808781878287838784878587868787878887898790879187928793879487958796879787988799880088018802880388048805880688078808880988108811881288138814881588168817881888198820882188228823882488258826882788288829883088318832883388348835883688378838883988408841884288438844884588468847884888498850885188528853885488558856885788588859886088618862886388648865886688678868886988708871887288738874887588768877887888798880888188828883888488858886888788888889889088918892889388948895889688978898889989008901890289038904890589068907890889098910891189128913891489158916891789188919892089218922892389248925892689278928892989308931893289338934893589368937893889398940894189428943894489458946894789488949895089518952895389548955895689578958895989608961896289638964896589668967896889698970897189728973897489758976897789788979898089818982898389848985898689878988898989908991899289938994899589968997899889999000900190029003900490059006900790089009901090119012901390149015901690179018901990209021902290239024902590269027902890299030903190329033903490359036903790389039904090419042904390449045904690479048904990509051905290539054905590569057905890599060906190629063906490659066906790689069907090719072907390749075907690779078907990809081908290839084908590869087908890899090909190929093909490959096909790989099910091019102910391049105910691079108910991109111911291139114911591169117911891199120912191229123912491259126912791289129913091319132913391349135913691379138913991409141914291439144914591469147914891499150915191529153915491559156915791589159916091619162916391649165916691679168916991709171917291739174917591769177917891799180918191829183918491859186918791889189919091919192919391949195919691979198919992009201920292039204920592069207920892099210921192129213921492159216921792189219922092219222922392249225922692279228922992309231923292339234923592369237923892399240924192429243924492459246924792489249925092519252925392549255925692579258925992609261926292639264926592669267926892699270927192729273927492759276927792789279928092819282928392849285928692879288928992909291929292939294929592969297929892999300930193029303930493059306930793089309931093119312931393149315931693179318931993209321932293239324932593269327932893299330933193329333933493359336933793389339934093419342934393449345934693479348934993509351935293539354935593569357935893599360936193629363936493659366936793689369937093719372937393749375937693779378937993809381938293839384938593869387938893899390939193929393939493959396939793989399940094019402940394049405940694079408940994109411941294139414941594169417941894199420942194229423942494259426942794289429943094319432943394349435943694379438943994409441944294439444944594469447944894499450945194529453945494559456945794589459946094619462946394649465946694679468946994709471947294739474947594769477947894799480948194829483948494859486948794889489949094919492949394949495949694979498949995009501950295039504950595069507950895099510951195129513951495159516951795189519952095219522952395249525952695279528952995309531953295339534953595369537953895399540954195429543954495459546954795489549955095519552955395549555955695579558955995609561956295639564956595669567956895699570957195729573957495759576957795789579958095819582958395849585958695879588958995909591959295939594959595969597959895999600960196029603960496059606960796089609961096119612961396149615961696179618961996209621962296239624962596269627962896299630963196329633963496359636963796389639964096419642964396449645964696479648964996509651965296539654965596569657965896599660966196629663966496659666966796689669967096719672967396749675967696779678967996809681968296839684968596869687968896899690969196929693969496959696969796989699970097019702970397049705970697079708970997109711971297139714971597169717971897199720972197229723972497259726972797289729973097319732973397349735973697379738973997409741974297439744974597469747974897499750975197529753975497559756975797589759976097619762976397649765976697679768976997709771977297739774977597769777977897799780978197829783978497859786978797889789979097919792979397949795979697979798979998009801980298039804980598069807980898099810981198129813981498159816981798189819982098219822982398249825982698279828982998309831983298339834983598369837983898399840984198429843984498459846984798489849985098519852985398549855985698579858985998609861986298639864986598669867986898699870987198729873987498759876987798789879988098819882988398849885988698879888988998909891989298939894989598969897989898999900990199029903990499059906990799089909991099119912991399149915991699179918991999209921992299239924992599269927992899299930993199329933993499359936993799389939994099419942994399449945994699479948994999509951995299539954995599569957995899599960996199629963996499659966996799689969997099719972997399749975997699779978997999809981998299839984998599869987998899899990999199929993999499959996999799989999100001000110002100031000410005100061000710008100091001010011100121001310014100151001610017100181001910020100211002210023100241002510026100271002810029100301003110032100331003410035100361003710038100391004010041100421004310044100451004610047100481004910050100511005210053100541005510056100571005810059100601006110062100631006410065100661006710068100691007010071100721007310074100751007610077100781007910080100811008210083100841008510086100871008810089100901009110092100931009410095100961009710098100991010010101101021010310104101051010610107101081010910110101111011210113101141011510116101171011810119101201012110122101231012410125101261012710128101291013010131101321013310134101351013610137101381013910140101411014210143101441014510146101471014810149101501015110152101531015410155101561015710158101591016010161101621016310164101651016610167101681016910170101711017210173101741017510176101771017810179101801018110182101831018410185101861018710188101891019010191101921019310194101951019610197101981019910200102011020210203102041020510206102071020810209102101021110212102131021410215102161021710218102191022010221102221022310224102251022610227102281022910230102311023210233102341023510236102371023810239102401024110242102431024410245102461024710248102491025010251102521025310254102551025610257102581025910260102611026210263102641026510266102671026810269102701027110272102731027410275102761027710278102791028010281102821028310284102851028610287102881028910290102911029210293102941029510296102971029810299103001030110302103031030410305103061030710308103091031010311103121031310314103151031610317103181031910320103211032210323103241032510326103271032810329103301033110332103331033410335103361033710338103391034010341103421034310344103451034610347103481034910350103511035210353103541035510356103571035810359103601036110362103631036410365103661036710368103691037010371103721037310374103751037610377103781037910380103811038210383103841038510386103871038810389103901039110392103931039410395103961039710398103991040010401104021040310404104051040610407104081040910410104111041210413104141041510416104171041810419104201042110422104231042410425104261042710428104291043010431104321043310434104351043610437104381043910440104411044210443104441044510446104471044810449104501045110452104531045410455104561045710458104591046010461104621046310464104651046610467104681046910470104711047210473104741047510476104771047810479104801048110482104831048410485104861048710488104891049010491104921049310494104951049610497104981049910500105011050210503105041050510506105071050810509105101051110512105131051410515105161051710518105191052010521105221052310524105251052610527105281052910530105311053210533105341053510536105371053810539105401054110542105431054410545105461054710548105491055010551105521055310554105551055610557105581055910560105611056210563105641056510566105671056810569105701057110572105731057410575105761057710578105791058010581105821058310584105851058610587105881058910590105911059210593105941059510596105971059810599106001060110602106031060410605106061060710608106091061010611106121061310614106151061610617106181061910620106211062210623106241062510626106271062810629106301063110632106331063410635106361063710638106391064010641106421064310644106451064610647106481064910650106511065210653106541065510656106571065810659106601066110662106631066410665106661066710668106691067010671106721067310674106751067610677106781067910680106811068210683106841068510686106871068810689106901069110692106931069410695106961069710698106991070010701107021070310704107051070610707107081070910710107111071210713107141071510716107171071810719107201072110722107231072410725107261072710728107291073010731107321073310734107351073610737107381073910740107411074210743107441074510746107471074810749107501075110752107531075410755107561075710758107591076010761107621076310764107651076610767107681076910770107711077210773107741077510776107771077810779107801078110782107831078410785107861078710788107891079010791107921079310794107951079610797107981079910800108011080210803108041080510806108071080810809108101081110812108131081410815108161081710818108191082010821108221082310824108251082610827108281082910830108311083210833108341083510836108371083810839108401084110842108431084410845108461084710848108491085010851108521085310854108551085610857108581085910860108611086210863108641086510866108671086810869108701087110872108731087410875108761087710878108791088010881108821088310884108851088610887108881088910890108911089210893108941089510896108971089810899109001090110902109031090410905109061090710908109091091010911109121091310914109151091610917109181091910920109211092210923109241092510926109271092810929109301093110932109331093410935109361093710938109391094010941109421094310944109451094610947109481094910950109511095210953109541095510956109571095810959109601096110962109631096410965109661096710968109691097010971109721097310974109751097610977109781097910980109811098210983109841098510986109871098810989109901099110992109931099410995109961099710998109991100011001110021100311004110051100611007110081100911010110111101211013110141101511016110171101811019110201102111022110231102411025110261102711028110291103011031110321103311034110351103611037110381103911040110411104211043110441104511046110471104811049110501105111052110531105411055110561105711058110591106011061110621106311064110651106611067110681106911070110711107211073110741107511076110771107811079110801108111082110831108411085110861108711088110891109011091110921109311094110951109611097110981109911100111011110211103111041110511106111071110811109111101111111112111131111411115111161111711118111191112011121111221112311124111251112611127111281112911130111311113211133111341113511136111371113811139111401114111142111431114411145111461114711148111491115011151111521115311154111551115611157111581115911160111611116211163111641116511166111671116811169111701117111172111731117411175111761117711178111791118011181111821118311184111851118611187111881118911190111911119211193111941119511196111971119811199112001120111202112031120411205112061120711208112091121011211112121121311214112151121611217112181121911220112211122211223112241122511226112271122811229112301123111232112331123411235112361123711238112391124011241112421124311244112451124611247112481124911250112511125211253112541125511256112571125811259112601126111262112631126411265112661126711268112691127011271112721127311274112751127611277112781127911280112811128211283112841128511286112871128811289112901129111292112931129411295112961129711298112991130011301113021130311304113051130611307113081130911310113111131211313113141131511316113171131811319113201132111322113231132411325113261132711328113291133011331113321133311334113351133611337113381133911340113411134211343113441134511346113471134811349113501135111352113531135411355113561135711358113591136011361113621136311364113651136611367113681136911370113711137211373113741137511376113771137811379113801138111382113831138411385113861138711388113891139011391113921139311394113951139611397113981139911400114011140211403114041140511406114071140811409114101141111412114131141411415114161141711418114191142011421114221142311424114251142611427114281142911430114311143211433114341143511436114371143811439114401144111442114431144411445114461144711448114491145011451114521145311454114551145611457114581145911460114611146211463114641146511466114671146811469114701147111472114731147411475114761147711478114791148011481114821148311484114851148611487114881148911490114911149211493114941149511496114971149811499115001150111502115031150411505115061150711508115091151011511115121151311514115151151611517115181151911520115211152211523115241152511526115271152811529115301153111532115331153411535115361153711538115391154011541115421154311544115451154611547115481154911550115511155211553115541155511556115571155811559115601156111562115631156411565115661156711568115691157011571115721157311574115751157611577115781157911580115811158211583115841158511586115871158811589115901159111592115931159411595115961159711598115991160011601116021160311604116051160611607116081160911610116111161211613116141161511616116171161811619116201162111622116231162411625116261162711628116291163011631116321163311634116351163611637116381163911640116411164211643116441164511646116471164811649116501165111652116531165411655116561165711658116591166011661116621166311664116651166611667116681166911670116711167211673116741167511676116771167811679116801168111682116831168411685116861168711688116891169011691116921169311694116951169611697116981169911700117011170211703117041170511706117071170811709117101171111712117131171411715117161171711718117191172011721117221172311724117251172611727117281172911730117311173211733117341173511736117371173811739117401174111742117431174411745117461174711748117491175011751117521175311754117551175611757117581175911760117611176211763117641176511766117671176811769117701177111772117731177411775117761177711778117791178011781117821178311784117851178611787117881178911790117911179211793117941179511796117971179811799118001180111802118031180411805118061180711808118091181011811118121181311814118151181611817118181181911820118211182211823118241182511826118271182811829118301183111832118331183411835118361183711838118391184011841118421184311844118451184611847118481184911850118511185211853118541185511856118571185811859118601186111862118631186411865118661186711868118691187011871118721187311874118751187611877118781187911880118811188211883118841188511886118871188811889118901189111892118931189411895118961189711898118991190011901119021190311904119051190611907119081190911910119111191211913119141191511916119171191811919119201192111922119231192411925119261192711928119291193011931119321193311934119351193611937119381193911940119411194211943119441194511946119471194811949119501195111952119531195411955119561195711958119591196011961119621196311964119651196611967119681196911970119711197211973119741197511976119771197811979119801198111982119831198411985119861198711988119891199011991119921199311994119951199611997119981199912000120011200212003120041200512006120071200812009120101201112012120131201412015120161201712018120191202012021120221202312024120251202612027120281202912030120311203212033120341203512036120371203812039120401204112042120431204412045120461204712048120491205012051120521205312054120551205612057120581205912060120611206212063120641206512066120671206812069120701207112072120731207412075120761207712078120791208012081120821208312084120851208612087120881208912090120911209212093120941209512096120971209812099121001210112102121031210412105121061210712108121091211012111121121211312114121151211612117121181211912120121211212212123121241212512126121271212812129121301213112132121331213412135121361213712138121391214012141121421214312144121451214612147121481214912150121511215212153121541215512156121571215812159121601216112162121631216412165121661216712168121691217012171121721217312174121751217612177121781217912180121811218212183121841218512186121871218812189121901219112192121931219412195121961219712198121991220012201122021220312204122051220612207122081220912210122111221212213122141221512216122171221812219122201222112222122231222412225122261222712228122291223012231122321223312234122351223612237122381223912240122411224212243122441224512246122471224812249122501225112252122531225412255122561225712258122591226012261122621226312264122651226612267122681226912270122711227212273122741227512276122771227812279122801228112282122831228412285122861228712288122891229012291122921229312294122951229612297122981229912300123011230212303123041230512306123071230812309123101231112312123131231412315123161231712318123191232012321123221232312324123251232612327123281232912330123311233212333123341233512336123371233812339123401234112342123431234412345123461234712348123491235012351123521235312354123551235612357123581235912360123611236212363123641236512366123671236812369123701237112372123731237412375123761237712378123791238012381123821238312384123851238612387123881238912390123911239212393123941239512396123971239812399124001240112402124031240412405124061240712408124091241012411124121241312414124151241612417124181241912420124211242212423124241242512426124271242812429124301243112432124331243412435124361243712438124391244012441124421244312444124451244612447124481244912450124511245212453124541245512456124571245812459124601246112462124631246412465124661246712468124691247012471124721247312474124751247612477124781247912480124811248212483124841248512486124871248812489124901249112492124931249412495124961249712498124991250012501125021250312504125051250612507125081250912510125111251212513125141251512516125171251812519125201252112522125231252412525125261252712528125291253012531125321253312534125351253612537125381253912540125411254212543125441254512546125471254812549125501255112552125531255412555125561255712558125591256012561125621256312564125651256612567125681256912570125711257212573125741257512576125771257812579125801258112582125831258412585125861258712588125891259012591125921259312594125951259612597125981259912600126011260212603126041260512606126071260812609126101261112612126131261412615126161261712618126191262012621126221262312624126251262612627126281262912630126311263212633126341263512636126371263812639126401264112642126431264412645126461264712648126491265012651126521265312654126551265612657126581265912660126611266212663126641266512666126671266812669126701267112672126731267412675126761267712678126791268012681126821268312684126851268612687126881268912690126911269212693126941269512696126971269812699127001270112702127031270412705127061270712708127091271012711127121271312714127151271612717127181271912720127211272212723127241272512726127271272812729127301273112732127331273412735127361273712738127391274012741127421274312744127451274612747127481274912750127511275212753127541275512756127571275812759127601276112762127631276412765127661276712768127691277012771127721277312774127751277612777127781277912780127811278212783127841278512786127871278812789127901279112792127931279412795127961279712798127991280012801128021280312804128051280612807128081280912810128111281212813128141281512816128171281812819128201282112822128231282412825128261282712828128291283012831128321283312834128351283612837128381283912840128411284212843128441284512846128471284812849128501285112852128531285412855128561285712858128591286012861128621286312864128651286612867128681286912870128711287212873128741287512876128771287812879128801288112882128831288412885128861288712888128891289012891128921289312894128951289612897128981289912900129011290212903129041290512906129071290812909129101291112912129131291412915129161291712918129191292012921129221292312924129251292612927129281292912930129311293212933129341293512936129371293812939129401294112942129431294412945129461294712948129491295012951129521295312954129551295612957129581295912960129611296212963129641296512966129671296812969129701297112972129731297412975129761297712978129791298012981129821298312984129851298612987129881298912990129911299212993129941299512996129971299812999130001300113002130031300413005130061300713008130091301013011130121301313014130151301613017130181301913020130211302213023130241302513026130271302813029130301303113032130331303413035130361303713038130391304013041130421304313044130451304613047130481304913050130511305213053130541305513056130571305813059130601306113062130631306413065130661306713068130691307013071130721307313074130751307613077130781307913080130811308213083130841308513086130871308813089130901309113092130931309413095130961309713098130991310013101131021310313104131051310613107131081310913110131111311213113131141311513116131171311813119131201312113122131231312413125131261312713128131291313013131131321313313134131351313613137131381313913140131411314213143131441314513146131471314813149131501315113152131531315413155131561315713158131591316013161131621316313164131651316613167131681316913170131711317213173131741317513176131771317813179131801318113182131831318413185131861318713188131891319013191131921319313194131951319613197131981319913200132011320213203132041320513206132071320813209132101321113212132131321413215132161321713218132191322013221132221322313224132251322613227132281322913230132311323213233132341323513236132371323813239132401324113242132431324413245132461324713248132491325013251132521325313254132551325613257132581325913260132611326213263132641326513266132671326813269132701327113272132731327413275132761327713278132791328013281132821328313284132851328613287132881328913290132911329213293132941329513296132971329813299133001330113302133031330413305133061330713308133091331013311133121331313314133151331613317133181331913320133211332213323133241332513326133271332813329133301333113332133331333413335133361333713338133391334013341133421334313344133451334613347133481334913350133511335213353133541335513356133571335813359133601336113362133631336413365133661336713368133691337013371133721337313374133751337613377133781337913380133811338213383133841338513386133871338813389133901339113392133931339413395133961339713398133991340013401134021340313404134051340613407134081340913410134111341213413134141341513416134171341813419134201342113422134231342413425134261342713428134291343013431134321343313434134351343613437134381343913440134411344213443134441344513446134471344813449134501345113452134531345413455134561345713458134591346013461134621346313464134651346613467134681346913470134711347213473134741347513476134771347813479134801348113482134831348413485134861348713488134891349013491134921349313494134951349613497134981349913500135011350213503135041350513506135071350813509135101351113512135131351413515135161351713518135191352013521135221352313524135251352613527135281352913530135311353213533135341353513536135371353813539135401354113542135431354413545135461354713548135491355013551135521355313554135551355613557135581355913560135611356213563135641356513566135671356813569135701357113572135731357413575135761357713578135791358013581135821358313584135851358613587135881358913590135911359213593135941359513596135971359813599136001360113602136031360413605136061360713608136091361013611136121361313614136151361613617136181361913620136211362213623136241362513626136271362813629136301363113632136331363413635136361363713638136391364013641136421364313644136451364613647136481364913650136511365213653136541365513656136571365813659136601366113662136631366413665136661366713668136691367013671136721367313674136751367613677136781367913680136811368213683136841368513686136871368813689136901369113692136931369413695136961369713698136991370013701137021370313704137051370613707137081370913710137111371213713137141371513716137171371813719137201372113722137231372413725137261372713728137291373013731137321373313734137351373613737137381373913740137411374213743137441374513746137471374813749137501375113752137531375413755137561375713758137591376013761137621376313764137651376613767137681376913770137711377213773137741377513776137771377813779137801378113782137831378413785137861378713788137891379013791137921379313794137951379613797137981379913800138011380213803138041380513806138071380813809138101381113812138131381413815138161381713818138191382013821138221382313824138251382613827138281382913830138311383213833138341383513836138371383813839138401384113842138431384413845138461384713848138491385013851138521385313854138551385613857138581385913860138611386213863138641386513866138671386813869138701387113872138731387413875138761387713878138791388013881138821388313884138851388613887138881388913890138911389213893138941389513896138971389813899139001390113902139031390413905139061390713908139091391013911139121391313914139151391613917139181391913920139211392213923139241392513926139271392813929139301393113932139331393413935139361393713938139391394013941139421394313944139451394613947139481394913950139511395213953139541395513956139571395813959139601396113962139631396413965139661396713968139691397013971139721397313974139751397613977139781397913980139811398213983139841398513986139871398813989139901399113992139931399413995139961399713998139991400014001140021400314004140051400614007140081400914010140111401214013140141401514016140171401814019140201402114022140231402414025140261402714028140291403014031140321403314034140351403614037140381403914040140411404214043140441404514046140471404814049140501405114052140531405414055140561405714058140591406014061140621406314064140651406614067140681406914070140711407214073140741407514076140771407814079140801408114082140831408414085140861408714088140891409014091140921409314094140951409614097140981409914100141011410214103141041410514106141071410814109141101411114112141131411414115141161411714118141191412014121141221412314124141251412614127141281412914130141311413214133141341413514136141371413814139141401414114142141431414414145141461414714148141491415014151141521415314154141551415614157141581415914160141611416214163141641416514166141671416814169141701417114172141731417414175141761417714178141791418014181141821418314184141851418614187141881418914190141911419214193141941419514196141971419814199142001420114202142031420414205142061420714208142091421014211142121421314214142151421614217142181421914220142211422214223142241422514226142271422814229142301423114232142331423414235142361423714238142391424014241142421424314244142451424614247142481424914250142511425214253142541425514256142571425814259142601426114262142631426414265142661426714268142691427014271142721427314274142751427614277142781427914280142811428214283142841428514286142871428814289142901429114292142931429414295142961429714298142991430014301143021430314304143051430614307143081430914310143111431214313143141431514316143171431814319143201432114322143231432414325143261432714328143291433014331143321433314334143351433614337143381433914340143411434214343143441434514346143471434814349143501435114352143531435414355143561435714358143591436014361143621436314364143651436614367143681436914370143711437214373143741437514376143771437814379143801438114382143831438414385143861438714388143891439014391143921439314394143951439614397143981439914400144011440214403144041440514406144071440814409144101441114412144131441414415144161441714418144191442014421144221442314424144251442614427144281442914430144311443214433144341443514436144371443814439144401444114442144431444414445144461444714448144491445014451144521445314454144551445614457144581445914460144611446214463144641446514466144671446814469144701447114472144731447414475144761447714478144791448014481144821448314484144851448614487144881448914490144911449214493144941449514496144971449814499145001450114502145031450414505145061450714508145091451014511145121451314514145151451614517145181451914520145211452214523145241452514526145271452814529145301453114532145331453414535145361453714538145391454014541145421454314544145451454614547145481454914550145511455214553145541455514556145571455814559145601456114562145631456414565145661456714568145691457014571145721457314574145751457614577145781457914580145811458214583145841458514586145871458814589145901459114592145931459414595145961459714598145991460014601146021460314604146051460614607146081460914610146111461214613146141461514616146171461814619146201462114622146231462414625146261462714628146291463014631146321463314634146351463614637146381463914640146411464214643146441464514646146471464814649146501465114652146531465414655146561465714658146591466014661146621466314664146651466614667146681466914670146711467214673146741467514676146771467814679146801468114682146831468414685146861468714688146891469014691146921469314694146951469614697146981469914700147011470214703147041470514706147071470814709147101471114712147131471414715147161471714718147191472014721147221472314724147251472614727147281472914730147311473214733147341473514736147371473814739147401474114742147431474414745147461474714748147491475014751147521475314754147551475614757147581475914760147611476214763147641476514766147671476814769147701477114772147731477414775147761477714778147791478014781147821478314784147851478614787147881478914790147911479214793147941479514796147971479814799148001480114802148031480414805148061480714808148091481014811148121481314814148151481614817148181481914820148211482214823148241482514826148271482814829148301483114832148331483414835148361483714838148391484014841148421484314844148451484614847148481484914850148511485214853148541485514856148571485814859148601486114862148631486414865148661486714868148691487014871148721487314874148751487614877148781487914880148811488214883148841488514886148871488814889148901489114892148931489414895148961489714898148991490014901149021490314904149051490614907149081490914910149111491214913149141491514916149171491814919149201492114922149231492414925149261492714928149291493014931149321493314934149351493614937149381493914940149411494214943149441494514946149471494814949149501495114952149531495414955149561495714958149591496014961149621496314964149651496614967149681496914970149711497214973149741497514976149771497814979149801498114982149831498414985149861498714988149891499014991149921499314994149951499614997149981499915000150011500215003150041500515006150071500815009150101501115012150131501415015150161501715018150191502015021150221502315024150251502615027150281502915030150311503215033150341503515036150371503815039150401504115042150431504415045150461504715048150491505015051150521505315054150551505615057150581505915060150611506215063150641506515066150671506815069150701507115072150731507415075150761507715078150791508015081150821508315084150851508615087150881508915090150911509215093150941509515096150971509815099151001510115102151031510415105151061510715108151091511015111151121511315114151151511615117151181511915120151211512215123151241512515126151271512815129151301513115132151331513415135151361513715138151391514015141151421514315144151451514615147151481514915150151511515215153151541515515156151571515815159151601516115162151631516415165151661516715168151691517015171151721517315174151751517615177151781517915180151811518215183151841518515186151871518815189151901519115192151931519415195151961519715198151991520015201152021520315204152051520615207152081520915210152111521215213152141521515216152171521815219152201522115222152231522415225152261522715228152291523015231152321523315234152351523615237152381523915240152411524215243152441524515246152471524815249152501525115252152531525415255152561525715258152591526015261152621526315264152651526615267152681526915270152711527215273152741527515276152771527815279152801528115282152831528415285152861528715288152891529015291152921529315294152951529615297152981529915300153011530215303153041530515306153071530815309153101531115312153131531415315153161531715318153191532015321153221532315324153251532615327153281532915330153311533215333153341533515336153371533815339153401534115342153431534415345153461534715348153491535015351153521535315354153551535615357153581535915360153611536215363153641536515366153671536815369153701537115372153731537415375153761537715378153791538015381153821538315384153851538615387153881538915390153911539215393153941539515396153971539815399154001540115402154031540415405154061540715408154091541015411154121541315414154151541615417154181541915420154211542215423154241542515426154271542815429154301543115432154331543415435154361543715438154391544015441154421544315444154451544615447154481544915450154511545215453154541545515456154571545815459154601546115462154631546415465154661546715468154691547015471154721547315474154751547615477154781547915480154811548215483154841548515486154871548815489154901549115492154931549415495154961549715498154991550015501155021550315504155051550615507155081550915510155111551215513155141551515516155171551815519155201552115522155231552415525155261552715528155291553015531155321553315534155351553615537155381553915540155411554215543155441554515546155471554815549155501555115552155531555415555155561555715558155591556015561155621556315564155651556615567155681556915570155711557215573155741557515576155771557815579155801558115582155831558415585155861558715588155891559015591155921559315594155951559615597155981559915600156011560215603156041560515606156071560815609156101561115612156131561415615156161561715618156191562015621156221562315624156251562615627156281562915630156311563215633156341563515636156371563815639156401564115642156431564415645156461564715648156491565015651156521565315654156551565615657156581565915660156611566215663156641566515666156671566815669156701567115672156731567415675156761567715678
  1. /*
  2. * Copyright 2015-2021 Arm Limited
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /*
  17. * At your option, you may choose to accept this material under either:
  18. * 1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
  19. * 2. The MIT License, found at <http://opensource.org/licenses/MIT>.
  20. * SPDX-License-Identifier: Apache-2.0 OR MIT.
  21. */
  22. #include "spirv_glsl.hpp"
  23. #include "GLSL.std.450.h"
  24. #include "spirv_common.hpp"
  25. #include <algorithm>
  26. #include <assert.h>
  27. #include <cmath>
  28. #include <limits>
  29. #include <locale.h>
  30. #include <utility>
  31. #ifndef _WIN32
  32. #include <langinfo.h>
  33. #endif
  34. #include <locale.h>
  35. using namespace spv;
  36. using namespace SPIRV_CROSS_NAMESPACE;
  37. using namespace std;
  38. static bool is_unsigned_opcode(Op op)
  39. {
  40. // Don't have to be exhaustive, only relevant for legacy target checking ...
  41. switch (op)
  42. {
  43. case OpShiftRightLogical:
  44. case OpUGreaterThan:
  45. case OpUGreaterThanEqual:
  46. case OpULessThan:
  47. case OpULessThanEqual:
  48. case OpUConvert:
  49. case OpUDiv:
  50. case OpUMod:
  51. case OpUMulExtended:
  52. case OpConvertUToF:
  53. case OpConvertFToU:
  54. return true;
  55. default:
  56. return false;
  57. }
  58. }
  59. static bool is_unsigned_glsl_opcode(GLSLstd450 op)
  60. {
  61. // Don't have to be exhaustive, only relevant for legacy target checking ...
  62. switch (op)
  63. {
  64. case GLSLstd450UClamp:
  65. case GLSLstd450UMin:
  66. case GLSLstd450UMax:
  67. case GLSLstd450FindUMsb:
  68. return true;
  69. default:
  70. return false;
  71. }
  72. }
  73. static bool packing_is_vec4_padded(BufferPackingStandard packing)
  74. {
  75. switch (packing)
  76. {
  77. case BufferPackingHLSLCbuffer:
  78. case BufferPackingHLSLCbufferPackOffset:
  79. case BufferPackingStd140:
  80. case BufferPackingStd140EnhancedLayout:
  81. return true;
  82. default:
  83. return false;
  84. }
  85. }
  86. static bool packing_is_hlsl(BufferPackingStandard packing)
  87. {
  88. switch (packing)
  89. {
  90. case BufferPackingHLSLCbuffer:
  91. case BufferPackingHLSLCbufferPackOffset:
  92. return true;
  93. default:
  94. return false;
  95. }
  96. }
  97. static bool packing_has_flexible_offset(BufferPackingStandard packing)
  98. {
  99. switch (packing)
  100. {
  101. case BufferPackingStd140:
  102. case BufferPackingStd430:
  103. case BufferPackingScalar:
  104. case BufferPackingHLSLCbuffer:
  105. return false;
  106. default:
  107. return true;
  108. }
  109. }
  110. static bool packing_is_scalar(BufferPackingStandard packing)
  111. {
  112. switch (packing)
  113. {
  114. case BufferPackingScalar:
  115. case BufferPackingScalarEnhancedLayout:
  116. return true;
  117. default:
  118. return false;
  119. }
  120. }
  121. static BufferPackingStandard packing_to_substruct_packing(BufferPackingStandard packing)
  122. {
  123. switch (packing)
  124. {
  125. case BufferPackingStd140EnhancedLayout:
  126. return BufferPackingStd140;
  127. case BufferPackingStd430EnhancedLayout:
  128. return BufferPackingStd430;
  129. case BufferPackingHLSLCbufferPackOffset:
  130. return BufferPackingHLSLCbuffer;
  131. case BufferPackingScalarEnhancedLayout:
  132. return BufferPackingScalar;
  133. default:
  134. return packing;
  135. }
  136. }
  137. void CompilerGLSL::init()
  138. {
  139. if (ir.source.known)
  140. {
  141. options.es = ir.source.es;
  142. options.version = ir.source.version;
  143. }
  144. // Query the locale to see what the decimal point is.
  145. // We'll rely on fixing it up ourselves in the rare case we have a comma-as-decimal locale
  146. // rather than setting locales ourselves. Settings locales in a safe and isolated way is rather
  147. // tricky.
  148. #ifdef _WIN32
  149. // On Windows, localeconv uses thread-local storage, so it should be fine.
  150. const struct lconv *conv = localeconv();
  151. if (conv && conv->decimal_point)
  152. current_locale_radix_character = *conv->decimal_point;
  153. #elif defined(__ANDROID__) && __ANDROID_API__ < 26
  154. // nl_langinfo is not supported on this platform, fall back to the worse alternative.
  155. const struct lconv *conv = localeconv();
  156. if (conv && conv->decimal_point)
  157. current_locale_radix_character = *conv->decimal_point;
  158. #else
  159. // localeconv, the portable function is not MT safe ...
  160. const char *decimal_point = nl_langinfo(RADIXCHAR);
  161. if (decimal_point && *decimal_point != '\0')
  162. current_locale_radix_character = *decimal_point;
  163. #endif
  164. }
  165. static const char *to_pls_layout(PlsFormat format)
  166. {
  167. switch (format)
  168. {
  169. case PlsR11FG11FB10F:
  170. return "layout(r11f_g11f_b10f) ";
  171. case PlsR32F:
  172. return "layout(r32f) ";
  173. case PlsRG16F:
  174. return "layout(rg16f) ";
  175. case PlsRGB10A2:
  176. return "layout(rgb10_a2) ";
  177. case PlsRGBA8:
  178. return "layout(rgba8) ";
  179. case PlsRG16:
  180. return "layout(rg16) ";
  181. case PlsRGBA8I:
  182. return "layout(rgba8i)";
  183. case PlsRG16I:
  184. return "layout(rg16i) ";
  185. case PlsRGB10A2UI:
  186. return "layout(rgb10_a2ui) ";
  187. case PlsRGBA8UI:
  188. return "layout(rgba8ui) ";
  189. case PlsRG16UI:
  190. return "layout(rg16ui) ";
  191. case PlsR32UI:
  192. return "layout(r32ui) ";
  193. default:
  194. return "";
  195. }
  196. }
  197. static SPIRType::BaseType pls_format_to_basetype(PlsFormat format)
  198. {
  199. switch (format)
  200. {
  201. default:
  202. case PlsR11FG11FB10F:
  203. case PlsR32F:
  204. case PlsRG16F:
  205. case PlsRGB10A2:
  206. case PlsRGBA8:
  207. case PlsRG16:
  208. return SPIRType::Float;
  209. case PlsRGBA8I:
  210. case PlsRG16I:
  211. return SPIRType::Int;
  212. case PlsRGB10A2UI:
  213. case PlsRGBA8UI:
  214. case PlsRG16UI:
  215. case PlsR32UI:
  216. return SPIRType::UInt;
  217. }
  218. }
  219. static uint32_t pls_format_to_components(PlsFormat format)
  220. {
  221. switch (format)
  222. {
  223. default:
  224. case PlsR32F:
  225. case PlsR32UI:
  226. return 1;
  227. case PlsRG16F:
  228. case PlsRG16:
  229. case PlsRG16UI:
  230. case PlsRG16I:
  231. return 2;
  232. case PlsR11FG11FB10F:
  233. return 3;
  234. case PlsRGB10A2:
  235. case PlsRGBA8:
  236. case PlsRGBA8I:
  237. case PlsRGB10A2UI:
  238. case PlsRGBA8UI:
  239. return 4;
  240. }
  241. }
  242. const char *CompilerGLSL::vector_swizzle(int vecsize, int index)
  243. {
  244. static const char *const swizzle[4][4] = {
  245. { ".x", ".y", ".z", ".w" },
  246. { ".xy", ".yz", ".zw", nullptr },
  247. { ".xyz", ".yzw", nullptr, nullptr },
  248. #if defined(__GNUC__) && (__GNUC__ == 9)
  249. // This works around a GCC 9 bug, see details in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90947.
  250. // This array ends up being compiled as all nullptrs, tripping the assertions below.
  251. { "", nullptr, nullptr, "$" },
  252. #else
  253. { "", nullptr, nullptr, nullptr },
  254. #endif
  255. };
  256. assert(vecsize >= 1 && vecsize <= 4);
  257. assert(index >= 0 && index < 4);
  258. assert(swizzle[vecsize - 1][index]);
  259. return swizzle[vecsize - 1][index];
  260. }
  261. void CompilerGLSL::reset()
  262. {
  263. // We do some speculative optimizations which should pretty much always work out,
  264. // but just in case the SPIR-V is rather weird, recompile until it's happy.
  265. // This typically only means one extra pass.
  266. clear_force_recompile();
  267. // Clear invalid expression tracking.
  268. invalid_expressions.clear();
  269. current_function = nullptr;
  270. // Clear temporary usage tracking.
  271. expression_usage_counts.clear();
  272. forwarded_temporaries.clear();
  273. suppressed_usage_tracking.clear();
  274. // Ensure that we declare phi-variable copies even if the original declaration isn't deferred
  275. flushed_phi_variables.clear();
  276. reset_name_caches();
  277. ir.for_each_typed_id<SPIRFunction>([&](uint32_t, SPIRFunction &func) {
  278. func.active = false;
  279. func.flush_undeclared = true;
  280. });
  281. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { var.dependees.clear(); });
  282. ir.reset_all_of_type<SPIRExpression>();
  283. ir.reset_all_of_type<SPIRAccessChain>();
  284. statement_count = 0;
  285. indent = 0;
  286. current_loop_level = 0;
  287. }
  288. void CompilerGLSL::remap_pls_variables()
  289. {
  290. for (auto &input : pls_inputs)
  291. {
  292. auto &var = get<SPIRVariable>(input.id);
  293. bool input_is_target = false;
  294. if (var.storage == StorageClassUniformConstant)
  295. {
  296. auto &type = get<SPIRType>(var.basetype);
  297. input_is_target = type.image.dim == DimSubpassData;
  298. }
  299. if (var.storage != StorageClassInput && !input_is_target)
  300. SPIRV_CROSS_THROW("Can only use in and target variables for PLS inputs.");
  301. var.remapped_variable = true;
  302. }
  303. for (auto &output : pls_outputs)
  304. {
  305. auto &var = get<SPIRVariable>(output.id);
  306. if (var.storage != StorageClassOutput)
  307. SPIRV_CROSS_THROW("Can only use out variables for PLS outputs.");
  308. var.remapped_variable = true;
  309. }
  310. }
  311. void CompilerGLSL::remap_ext_framebuffer_fetch(uint32_t input_attachment_index, uint32_t color_location)
  312. {
  313. subpass_to_framebuffer_fetch_attachment.push_back({ input_attachment_index, color_location });
  314. inout_color_attachments.insert(color_location);
  315. }
  316. void CompilerGLSL::find_static_extensions()
  317. {
  318. ir.for_each_typed_id<SPIRType>([&](uint32_t, const SPIRType &type) {
  319. if (type.basetype == SPIRType::Double)
  320. {
  321. if (options.es)
  322. SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
  323. if (!options.es && options.version < 400)
  324. require_extension_internal("GL_ARB_gpu_shader_fp64");
  325. }
  326. else if (type.basetype == SPIRType::Int64 || type.basetype == SPIRType::UInt64)
  327. {
  328. if (options.es)
  329. SPIRV_CROSS_THROW("64-bit integers not supported in ES profile.");
  330. if (!options.es)
  331. require_extension_internal("GL_ARB_gpu_shader_int64");
  332. }
  333. else if (type.basetype == SPIRType::Half)
  334. {
  335. require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_float16");
  336. if (options.vulkan_semantics)
  337. require_extension_internal("GL_EXT_shader_16bit_storage");
  338. }
  339. else if (type.basetype == SPIRType::SByte || type.basetype == SPIRType::UByte)
  340. {
  341. require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int8");
  342. if (options.vulkan_semantics)
  343. require_extension_internal("GL_EXT_shader_8bit_storage");
  344. }
  345. else if (type.basetype == SPIRType::Short || type.basetype == SPIRType::UShort)
  346. {
  347. require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int16");
  348. if (options.vulkan_semantics)
  349. require_extension_internal("GL_EXT_shader_16bit_storage");
  350. }
  351. });
  352. auto &execution = get_entry_point();
  353. switch (execution.model)
  354. {
  355. case ExecutionModelGLCompute:
  356. if (!options.es && options.version < 430)
  357. require_extension_internal("GL_ARB_compute_shader");
  358. if (options.es && options.version < 310)
  359. SPIRV_CROSS_THROW("At least ESSL 3.10 required for compute shaders.");
  360. break;
  361. case ExecutionModelGeometry:
  362. if (options.es && options.version < 320)
  363. require_extension_internal("GL_EXT_geometry_shader");
  364. if (!options.es && options.version < 150)
  365. require_extension_internal("GL_ARB_geometry_shader4");
  366. if (execution.flags.get(ExecutionModeInvocations) && execution.invocations != 1)
  367. {
  368. // Instanced GS is part of 400 core or this extension.
  369. if (!options.es && options.version < 400)
  370. require_extension_internal("GL_ARB_gpu_shader5");
  371. }
  372. break;
  373. case ExecutionModelTessellationEvaluation:
  374. case ExecutionModelTessellationControl:
  375. if (options.es && options.version < 320)
  376. require_extension_internal("GL_EXT_tessellation_shader");
  377. if (!options.es && options.version < 400)
  378. require_extension_internal("GL_ARB_tessellation_shader");
  379. break;
  380. case ExecutionModelRayGenerationKHR:
  381. case ExecutionModelIntersectionKHR:
  382. case ExecutionModelAnyHitKHR:
  383. case ExecutionModelClosestHitKHR:
  384. case ExecutionModelMissKHR:
  385. case ExecutionModelCallableKHR:
  386. // NV enums are aliases.
  387. if (options.es || options.version < 460)
  388. SPIRV_CROSS_THROW("Ray tracing shaders require non-es profile with version 460 or above.");
  389. if (!options.vulkan_semantics)
  390. SPIRV_CROSS_THROW("Ray tracing requires Vulkan semantics.");
  391. // Need to figure out if we should target KHR or NV extension based on capabilities.
  392. for (auto &cap : ir.declared_capabilities)
  393. {
  394. if (cap == CapabilityRayTracingKHR || cap == CapabilityRayQueryKHR)
  395. {
  396. ray_tracing_is_khr = true;
  397. break;
  398. }
  399. }
  400. if (ray_tracing_is_khr)
  401. {
  402. // In KHR ray tracing we pass payloads by pointer instead of location,
  403. // so make sure we assign locations properly.
  404. ray_tracing_khr_fixup_locations();
  405. require_extension_internal("GL_EXT_ray_tracing");
  406. }
  407. else
  408. require_extension_internal("GL_NV_ray_tracing");
  409. break;
  410. default:
  411. break;
  412. }
  413. if (!pls_inputs.empty() || !pls_outputs.empty())
  414. {
  415. if (execution.model != ExecutionModelFragment)
  416. SPIRV_CROSS_THROW("Can only use GL_EXT_shader_pixel_local_storage in fragment shaders.");
  417. require_extension_internal("GL_EXT_shader_pixel_local_storage");
  418. }
  419. if (!inout_color_attachments.empty())
  420. {
  421. if (execution.model != ExecutionModelFragment)
  422. SPIRV_CROSS_THROW("Can only use GL_EXT_shader_framebuffer_fetch in fragment shaders.");
  423. if (options.vulkan_semantics)
  424. SPIRV_CROSS_THROW("Cannot use EXT_shader_framebuffer_fetch in Vulkan GLSL.");
  425. require_extension_internal("GL_EXT_shader_framebuffer_fetch");
  426. }
  427. if (options.separate_shader_objects && !options.es && options.version < 410)
  428. require_extension_internal("GL_ARB_separate_shader_objects");
  429. if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
  430. {
  431. if (!options.vulkan_semantics)
  432. SPIRV_CROSS_THROW("GL_EXT_buffer_reference is only supported in Vulkan GLSL.");
  433. if (options.es && options.version < 320)
  434. SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires ESSL 320.");
  435. else if (!options.es && options.version < 450)
  436. SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires GLSL 450.");
  437. require_extension_internal("GL_EXT_buffer_reference");
  438. }
  439. else if (ir.addressing_model != AddressingModelLogical)
  440. {
  441. SPIRV_CROSS_THROW("Only Logical and PhysicalStorageBuffer64EXT addressing models are supported.");
  442. }
  443. // Check for nonuniform qualifier and passthrough.
  444. // Instead of looping over all decorations to find this, just look at capabilities.
  445. for (auto &cap : ir.declared_capabilities)
  446. {
  447. switch (cap)
  448. {
  449. case CapabilityShaderNonUniformEXT:
  450. if (!options.vulkan_semantics)
  451. require_extension_internal("GL_NV_gpu_shader5");
  452. else
  453. require_extension_internal("GL_EXT_nonuniform_qualifier");
  454. break;
  455. case CapabilityRuntimeDescriptorArrayEXT:
  456. if (!options.vulkan_semantics)
  457. SPIRV_CROSS_THROW("GL_EXT_nonuniform_qualifier is only supported in Vulkan GLSL.");
  458. require_extension_internal("GL_EXT_nonuniform_qualifier");
  459. break;
  460. case CapabilityGeometryShaderPassthroughNV:
  461. if (execution.model == ExecutionModelGeometry)
  462. {
  463. require_extension_internal("GL_NV_geometry_shader_passthrough");
  464. execution.geometry_passthrough = true;
  465. }
  466. break;
  467. case CapabilityVariablePointers:
  468. case CapabilityVariablePointersStorageBuffer:
  469. SPIRV_CROSS_THROW("VariablePointers capability is not supported in GLSL.");
  470. default:
  471. break;
  472. }
  473. }
  474. }
  475. void CompilerGLSL::ray_tracing_khr_fixup_locations()
  476. {
  477. uint32_t location = 0;
  478. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  479. // Incoming payload storage can also be used for tracing.
  480. if (var.storage != StorageClassRayPayloadKHR && var.storage != StorageClassCallableDataKHR &&
  481. var.storage != StorageClassIncomingRayPayloadKHR && var.storage != StorageClassIncomingCallableDataKHR)
  482. return;
  483. if (is_hidden_variable(var))
  484. return;
  485. set_decoration(var.self, DecorationLocation, location++);
  486. });
  487. }
  488. string CompilerGLSL::compile()
  489. {
  490. ir.fixup_reserved_names();
  491. if (options.vulkan_semantics)
  492. backend.allow_precision_qualifiers = true;
  493. else
  494. {
  495. // only NV_gpu_shader5 supports divergent indexing on OpenGL, and it does so without extra qualifiers
  496. backend.nonuniform_qualifier = "";
  497. backend.needs_row_major_load_workaround = true;
  498. }
  499. backend.force_gl_in_out_block = true;
  500. backend.supports_extensions = true;
  501. backend.use_array_constructor = true;
  502. if (is_legacy_es())
  503. backend.support_case_fallthrough = false;
  504. // Scan the SPIR-V to find trivial uses of extensions.
  505. fixup_type_alias();
  506. reorder_type_alias();
  507. build_function_control_flow_graphs_and_analyze();
  508. find_static_extensions();
  509. fixup_image_load_store_access();
  510. update_active_builtins();
  511. analyze_image_and_sampler_usage();
  512. analyze_interlocked_resource_usage();
  513. if (!inout_color_attachments.empty())
  514. emit_inout_fragment_outputs_copy_to_subpass_inputs();
  515. // Shaders might cast unrelated data to pointers of non-block types.
  516. // Find all such instances and make sure we can cast the pointers to a synthesized block type.
  517. if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
  518. analyze_non_block_pointer_types();
  519. uint32_t pass_count = 0;
  520. do
  521. {
  522. if (pass_count >= 3)
  523. SPIRV_CROSS_THROW("Over 3 compilation loops detected. Must be a bug!");
  524. reset();
  525. buffer.reset();
  526. emit_header();
  527. emit_resources();
  528. emit_extension_workarounds(get_execution_model());
  529. emit_function(get<SPIRFunction>(ir.default_entry_point), Bitset());
  530. pass_count++;
  531. } while (is_forcing_recompilation());
  532. // Implement the interlocked wrapper function at the end.
  533. // The body was implemented in lieu of main().
  534. if (interlocked_is_complex)
  535. {
  536. statement("void main()");
  537. begin_scope();
  538. statement("// Interlocks were used in a way not compatible with GLSL, this is very slow.");
  539. if (options.es)
  540. statement("beginInvocationInterlockNV();");
  541. else
  542. statement("beginInvocationInterlockARB();");
  543. statement("spvMainInterlockedBody();");
  544. if (options.es)
  545. statement("endInvocationInterlockNV();");
  546. else
  547. statement("endInvocationInterlockARB();");
  548. end_scope();
  549. }
  550. // Entry point in GLSL is always main().
  551. get_entry_point().name = "main";
  552. return buffer.str();
  553. }
  554. std::string CompilerGLSL::get_partial_source()
  555. {
  556. return buffer.str();
  557. }
  558. void CompilerGLSL::build_workgroup_size(SmallVector<string> &arguments, const SpecializationConstant &wg_x,
  559. const SpecializationConstant &wg_y, const SpecializationConstant &wg_z)
  560. {
  561. auto &execution = get_entry_point();
  562. if (wg_x.id)
  563. {
  564. if (options.vulkan_semantics)
  565. arguments.push_back(join("local_size_x_id = ", wg_x.constant_id));
  566. else
  567. arguments.push_back(join("local_size_x = ", get<SPIRConstant>(wg_x.id).specialization_constant_macro_name));
  568. }
  569. else
  570. arguments.push_back(join("local_size_x = ", execution.workgroup_size.x));
  571. if (wg_y.id)
  572. {
  573. if (options.vulkan_semantics)
  574. arguments.push_back(join("local_size_y_id = ", wg_y.constant_id));
  575. else
  576. arguments.push_back(join("local_size_y = ", get<SPIRConstant>(wg_y.id).specialization_constant_macro_name));
  577. }
  578. else
  579. arguments.push_back(join("local_size_y = ", execution.workgroup_size.y));
  580. if (wg_z.id)
  581. {
  582. if (options.vulkan_semantics)
  583. arguments.push_back(join("local_size_z_id = ", wg_z.constant_id));
  584. else
  585. arguments.push_back(join("local_size_z = ", get<SPIRConstant>(wg_z.id).specialization_constant_macro_name));
  586. }
  587. else
  588. arguments.push_back(join("local_size_z = ", execution.workgroup_size.z));
  589. }
  590. void CompilerGLSL::request_subgroup_feature(ShaderSubgroupSupportHelper::Feature feature)
  591. {
  592. if (options.vulkan_semantics)
  593. {
  594. auto khr_extension = ShaderSubgroupSupportHelper::get_KHR_extension_for_feature(feature);
  595. require_extension_internal(ShaderSubgroupSupportHelper::get_extension_name(khr_extension));
  596. }
  597. else
  598. {
  599. if (!shader_subgroup_supporter.is_feature_requested(feature))
  600. force_recompile();
  601. shader_subgroup_supporter.request_feature(feature);
  602. }
  603. }
  604. void CompilerGLSL::emit_header()
  605. {
  606. auto &execution = get_entry_point();
  607. statement("#version ", options.version, options.es && options.version > 100 ? " es" : "");
  608. if (!options.es && options.version < 420)
  609. {
  610. // Needed for binding = # on UBOs, etc.
  611. if (options.enable_420pack_extension)
  612. {
  613. statement("#ifdef GL_ARB_shading_language_420pack");
  614. statement("#extension GL_ARB_shading_language_420pack : require");
  615. statement("#endif");
  616. }
  617. // Needed for: layout(early_fragment_tests) in;
  618. if (execution.flags.get(ExecutionModeEarlyFragmentTests))
  619. require_extension_internal("GL_ARB_shader_image_load_store");
  620. }
  621. // Needed for: layout(post_depth_coverage) in;
  622. if (execution.flags.get(ExecutionModePostDepthCoverage))
  623. require_extension_internal("GL_ARB_post_depth_coverage");
  624. // Needed for: layout({pixel,sample}_interlock_[un]ordered) in;
  625. if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT) ||
  626. execution.flags.get(ExecutionModePixelInterlockUnorderedEXT) ||
  627. execution.flags.get(ExecutionModeSampleInterlockOrderedEXT) ||
  628. execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT))
  629. {
  630. if (options.es)
  631. {
  632. if (options.version < 310)
  633. SPIRV_CROSS_THROW("At least ESSL 3.10 required for fragment shader interlock.");
  634. require_extension_internal("GL_NV_fragment_shader_interlock");
  635. }
  636. else
  637. {
  638. if (options.version < 420)
  639. require_extension_internal("GL_ARB_shader_image_load_store");
  640. require_extension_internal("GL_ARB_fragment_shader_interlock");
  641. }
  642. }
  643. for (auto &ext : forced_extensions)
  644. {
  645. if (ext == "GL_EXT_shader_explicit_arithmetic_types_float16")
  646. {
  647. // Special case, this extension has a potential fallback to another vendor extension in normal GLSL.
  648. // GL_AMD_gpu_shader_half_float is a superset, so try that first.
  649. statement("#if defined(GL_AMD_gpu_shader_half_float)");
  650. statement("#extension GL_AMD_gpu_shader_half_float : require");
  651. if (!options.vulkan_semantics)
  652. {
  653. statement("#elif defined(GL_NV_gpu_shader5)");
  654. statement("#extension GL_NV_gpu_shader5 : require");
  655. }
  656. else
  657. {
  658. statement("#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)");
  659. statement("#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require");
  660. }
  661. statement("#else");
  662. statement("#error No extension available for FP16.");
  663. statement("#endif");
  664. }
  665. else if (ext == "GL_EXT_shader_explicit_arithmetic_types_int16")
  666. {
  667. if (options.vulkan_semantics)
  668. statement("#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require");
  669. else
  670. {
  671. statement("#if defined(GL_AMD_gpu_shader_int16)");
  672. statement("#extension GL_AMD_gpu_shader_int16 : require");
  673. statement("#else");
  674. statement("#error No extension available for Int16.");
  675. statement("#endif");
  676. }
  677. }
  678. else if (ext == "GL_ARB_post_depth_coverage")
  679. {
  680. if (options.es)
  681. statement("#extension GL_EXT_post_depth_coverage : require");
  682. else
  683. {
  684. statement("#if defined(GL_ARB_post_depth_coverge)");
  685. statement("#extension GL_ARB_post_depth_coverage : require");
  686. statement("#else");
  687. statement("#extension GL_EXT_post_depth_coverage : require");
  688. statement("#endif");
  689. }
  690. }
  691. else if (!options.vulkan_semantics && ext == "GL_ARB_shader_draw_parameters")
  692. {
  693. // Soft-enable this extension on plain GLSL.
  694. statement("#ifdef ", ext);
  695. statement("#extension ", ext, " : enable");
  696. statement("#endif");
  697. }
  698. else
  699. statement("#extension ", ext, " : require");
  700. }
  701. if (!options.vulkan_semantics)
  702. {
  703. using Supp = ShaderSubgroupSupportHelper;
  704. auto result = shader_subgroup_supporter.resolve();
  705. for (uint32_t feature_index = 0; feature_index < Supp::FeatureCount; feature_index++)
  706. {
  707. auto feature = static_cast<Supp::Feature>(feature_index);
  708. if (!shader_subgroup_supporter.is_feature_requested(feature))
  709. continue;
  710. auto exts = Supp::get_candidates_for_feature(feature, result);
  711. if (exts.empty())
  712. continue;
  713. statement("");
  714. for (auto &ext : exts)
  715. {
  716. const char *name = Supp::get_extension_name(ext);
  717. const char *extra_predicate = Supp::get_extra_required_extension_predicate(ext);
  718. auto extra_names = Supp::get_extra_required_extension_names(ext);
  719. statement(&ext != &exts.front() ? "#elif" : "#if", " defined(", name, ")",
  720. (*extra_predicate != '\0' ? " && " : ""), extra_predicate);
  721. for (const auto &e : extra_names)
  722. statement("#extension ", e, " : enable");
  723. statement("#extension ", name, " : require");
  724. }
  725. if (!Supp::can_feature_be_implemented_without_extensions(feature))
  726. {
  727. statement("#else");
  728. statement("#error No extensions available to emulate requested subgroup feature.");
  729. }
  730. statement("#endif");
  731. }
  732. }
  733. for (auto &header : header_lines)
  734. statement(header);
  735. SmallVector<string> inputs;
  736. SmallVector<string> outputs;
  737. switch (execution.model)
  738. {
  739. case ExecutionModelGeometry:
  740. if ((execution.flags.get(ExecutionModeInvocations)) && execution.invocations != 1)
  741. inputs.push_back(join("invocations = ", execution.invocations));
  742. if (execution.flags.get(ExecutionModeInputPoints))
  743. inputs.push_back("points");
  744. if (execution.flags.get(ExecutionModeInputLines))
  745. inputs.push_back("lines");
  746. if (execution.flags.get(ExecutionModeInputLinesAdjacency))
  747. inputs.push_back("lines_adjacency");
  748. if (execution.flags.get(ExecutionModeTriangles))
  749. inputs.push_back("triangles");
  750. if (execution.flags.get(ExecutionModeInputTrianglesAdjacency))
  751. inputs.push_back("triangles_adjacency");
  752. if (!execution.geometry_passthrough)
  753. {
  754. // For passthrough, these are implies and cannot be declared in shader.
  755. outputs.push_back(join("max_vertices = ", execution.output_vertices));
  756. if (execution.flags.get(ExecutionModeOutputTriangleStrip))
  757. outputs.push_back("triangle_strip");
  758. if (execution.flags.get(ExecutionModeOutputPoints))
  759. outputs.push_back("points");
  760. if (execution.flags.get(ExecutionModeOutputLineStrip))
  761. outputs.push_back("line_strip");
  762. }
  763. break;
  764. case ExecutionModelTessellationControl:
  765. if (execution.flags.get(ExecutionModeOutputVertices))
  766. outputs.push_back(join("vertices = ", execution.output_vertices));
  767. break;
  768. case ExecutionModelTessellationEvaluation:
  769. if (execution.flags.get(ExecutionModeQuads))
  770. inputs.push_back("quads");
  771. if (execution.flags.get(ExecutionModeTriangles))
  772. inputs.push_back("triangles");
  773. if (execution.flags.get(ExecutionModeIsolines))
  774. inputs.push_back("isolines");
  775. if (execution.flags.get(ExecutionModePointMode))
  776. inputs.push_back("point_mode");
  777. if (!execution.flags.get(ExecutionModeIsolines))
  778. {
  779. if (execution.flags.get(ExecutionModeVertexOrderCw))
  780. inputs.push_back("cw");
  781. if (execution.flags.get(ExecutionModeVertexOrderCcw))
  782. inputs.push_back("ccw");
  783. }
  784. if (execution.flags.get(ExecutionModeSpacingFractionalEven))
  785. inputs.push_back("fractional_even_spacing");
  786. if (execution.flags.get(ExecutionModeSpacingFractionalOdd))
  787. inputs.push_back("fractional_odd_spacing");
  788. if (execution.flags.get(ExecutionModeSpacingEqual))
  789. inputs.push_back("equal_spacing");
  790. break;
  791. case ExecutionModelGLCompute:
  792. {
  793. if (execution.workgroup_size.constant != 0)
  794. {
  795. SpecializationConstant wg_x, wg_y, wg_z;
  796. get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
  797. // If there are any spec constants on legacy GLSL, defer declaration, we need to set up macro
  798. // declarations before we can emit the work group size.
  799. if (options.vulkan_semantics ||
  800. ((wg_x.id == ConstantID(0)) && (wg_y.id == ConstantID(0)) && (wg_z.id == ConstantID(0))))
  801. build_workgroup_size(inputs, wg_x, wg_y, wg_z);
  802. }
  803. else
  804. {
  805. inputs.push_back(join("local_size_x = ", execution.workgroup_size.x));
  806. inputs.push_back(join("local_size_y = ", execution.workgroup_size.y));
  807. inputs.push_back(join("local_size_z = ", execution.workgroup_size.z));
  808. }
  809. break;
  810. }
  811. case ExecutionModelFragment:
  812. if (options.es)
  813. {
  814. switch (options.fragment.default_float_precision)
  815. {
  816. case Options::Lowp:
  817. statement("precision lowp float;");
  818. break;
  819. case Options::Mediump:
  820. statement("precision mediump float;");
  821. break;
  822. case Options::Highp:
  823. statement("precision highp float;");
  824. break;
  825. default:
  826. break;
  827. }
  828. switch (options.fragment.default_int_precision)
  829. {
  830. case Options::Lowp:
  831. statement("precision lowp int;");
  832. break;
  833. case Options::Mediump:
  834. statement("precision mediump int;");
  835. break;
  836. case Options::Highp:
  837. statement("precision highp int;");
  838. break;
  839. default:
  840. break;
  841. }
  842. }
  843. if (execution.flags.get(ExecutionModeEarlyFragmentTests))
  844. inputs.push_back("early_fragment_tests");
  845. if (execution.flags.get(ExecutionModePostDepthCoverage))
  846. inputs.push_back("post_depth_coverage");
  847. if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT))
  848. inputs.push_back("pixel_interlock_ordered");
  849. else if (execution.flags.get(ExecutionModePixelInterlockUnorderedEXT))
  850. inputs.push_back("pixel_interlock_unordered");
  851. else if (execution.flags.get(ExecutionModeSampleInterlockOrderedEXT))
  852. inputs.push_back("sample_interlock_ordered");
  853. else if (execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT))
  854. inputs.push_back("sample_interlock_unordered");
  855. if (!options.es && execution.flags.get(ExecutionModeDepthGreater))
  856. statement("layout(depth_greater) out float gl_FragDepth;");
  857. else if (!options.es && execution.flags.get(ExecutionModeDepthLess))
  858. statement("layout(depth_less) out float gl_FragDepth;");
  859. break;
  860. default:
  861. break;
  862. }
  863. if (!inputs.empty())
  864. statement("layout(", merge(inputs), ") in;");
  865. if (!outputs.empty())
  866. statement("layout(", merge(outputs), ") out;");
  867. statement("");
  868. }
  869. bool CompilerGLSL::type_is_empty(const SPIRType &type)
  870. {
  871. return type.basetype == SPIRType::Struct && type.member_types.empty();
  872. }
  873. void CompilerGLSL::emit_struct(SPIRType &type)
  874. {
  875. // Struct types can be stamped out multiple times
  876. // with just different offsets, matrix layouts, etc ...
  877. // Type-punning with these types is legal, which complicates things
  878. // when we are storing struct and array types in an SSBO for example.
  879. // If the type master is packed however, we can no longer assume that the struct declaration will be redundant.
  880. if (type.type_alias != TypeID(0) &&
  881. !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
  882. return;
  883. add_resource_name(type.self);
  884. auto name = type_to_glsl(type);
  885. statement(!backend.explicit_struct_type ? "struct " : "", name);
  886. begin_scope();
  887. type.member_name_cache.clear();
  888. uint32_t i = 0;
  889. bool emitted = false;
  890. for (auto &member : type.member_types)
  891. {
  892. add_member_name(type, i);
  893. emit_struct_member(type, member, i);
  894. i++;
  895. emitted = true;
  896. }
  897. // Don't declare empty structs in GLSL, this is not allowed.
  898. if (type_is_empty(type) && !backend.supports_empty_struct)
  899. {
  900. statement("int empty_struct_member;");
  901. emitted = true;
  902. }
  903. if (has_extended_decoration(type.self, SPIRVCrossDecorationPaddingTarget))
  904. emit_struct_padding_target(type);
  905. end_scope_decl();
  906. if (emitted)
  907. statement("");
  908. }
  909. string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags)
  910. {
  911. string res;
  912. //if (flags & (1ull << DecorationSmooth))
  913. // res += "smooth ";
  914. if (flags.get(DecorationFlat))
  915. res += "flat ";
  916. if (flags.get(DecorationNoPerspective))
  917. res += "noperspective ";
  918. if (flags.get(DecorationCentroid))
  919. res += "centroid ";
  920. if (flags.get(DecorationPatch))
  921. res += "patch ";
  922. if (flags.get(DecorationSample))
  923. res += "sample ";
  924. if (flags.get(DecorationInvariant))
  925. res += "invariant ";
  926. if (flags.get(DecorationExplicitInterpAMD))
  927. res += "__explicitInterpAMD ";
  928. return res;
  929. }
  930. string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index)
  931. {
  932. if (is_legacy())
  933. return "";
  934. bool is_block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
  935. ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
  936. if (!is_block)
  937. return "";
  938. auto &memb = ir.meta[type.self].members;
  939. if (index >= memb.size())
  940. return "";
  941. auto &dec = memb[index];
  942. SmallVector<string> attr;
  943. if (has_member_decoration(type.self, index, DecorationPassthroughNV))
  944. attr.push_back("passthrough");
  945. // We can only apply layouts on members in block interfaces.
  946. // This is a bit problematic because in SPIR-V decorations are applied on the struct types directly.
  947. // This is not supported on GLSL, so we have to make the assumption that if a struct within our buffer block struct
  948. // has a decoration, it was originally caused by a top-level layout() qualifier in GLSL.
  949. //
  950. // We would like to go from (SPIR-V style):
  951. //
  952. // struct Foo { layout(row_major) mat4 matrix; };
  953. // buffer UBO { Foo foo; };
  954. //
  955. // to
  956. //
  957. // struct Foo { mat4 matrix; }; // GLSL doesn't support any layout shenanigans in raw struct declarations.
  958. // buffer UBO { layout(row_major) Foo foo; }; // Apply the layout on top-level.
  959. auto flags = combined_decoration_for_member(type, index);
  960. if (flags.get(DecorationRowMajor))
  961. attr.push_back("row_major");
  962. // We don't emit any global layouts, so column_major is default.
  963. //if (flags & (1ull << DecorationColMajor))
  964. // attr.push_back("column_major");
  965. if (dec.decoration_flags.get(DecorationLocation) && can_use_io_location(type.storage, true))
  966. attr.push_back(join("location = ", dec.location));
  967. // Can only declare component if we can declare location.
  968. if (dec.decoration_flags.get(DecorationComponent) && can_use_io_location(type.storage, true))
  969. {
  970. if (!options.es)
  971. {
  972. if (options.version < 440 && options.version >= 140)
  973. require_extension_internal("GL_ARB_enhanced_layouts");
  974. else if (options.version < 140)
  975. SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40.");
  976. attr.push_back(join("component = ", dec.component));
  977. }
  978. else
  979. SPIRV_CROSS_THROW("Component decoration is not supported in ES targets.");
  980. }
  981. // SPIRVCrossDecorationPacked is set by layout_for_variable earlier to mark that we need to emit offset qualifiers.
  982. // This is only done selectively in GLSL as needed.
  983. if (has_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset) &&
  984. dec.decoration_flags.get(DecorationOffset))
  985. attr.push_back(join("offset = ", dec.offset));
  986. else if (type.storage == StorageClassOutput && dec.decoration_flags.get(DecorationOffset))
  987. attr.push_back(join("xfb_offset = ", dec.offset));
  988. if (attr.empty())
  989. return "";
  990. string res = "layout(";
  991. res += merge(attr);
  992. res += ") ";
  993. return res;
  994. }
  995. const char *CompilerGLSL::format_to_glsl(spv::ImageFormat format)
  996. {
  997. if (options.es && is_desktop_only_format(format))
  998. SPIRV_CROSS_THROW("Attempting to use image format not supported in ES profile.");
  999. switch (format)
  1000. {
  1001. case ImageFormatRgba32f:
  1002. return "rgba32f";
  1003. case ImageFormatRgba16f:
  1004. return "rgba16f";
  1005. case ImageFormatR32f:
  1006. return "r32f";
  1007. case ImageFormatRgba8:
  1008. return "rgba8";
  1009. case ImageFormatRgba8Snorm:
  1010. return "rgba8_snorm";
  1011. case ImageFormatRg32f:
  1012. return "rg32f";
  1013. case ImageFormatRg16f:
  1014. return "rg16f";
  1015. case ImageFormatRgba32i:
  1016. return "rgba32i";
  1017. case ImageFormatRgba16i:
  1018. return "rgba16i";
  1019. case ImageFormatR32i:
  1020. return "r32i";
  1021. case ImageFormatRgba8i:
  1022. return "rgba8i";
  1023. case ImageFormatRg32i:
  1024. return "rg32i";
  1025. case ImageFormatRg16i:
  1026. return "rg16i";
  1027. case ImageFormatRgba32ui:
  1028. return "rgba32ui";
  1029. case ImageFormatRgba16ui:
  1030. return "rgba16ui";
  1031. case ImageFormatR32ui:
  1032. return "r32ui";
  1033. case ImageFormatRgba8ui:
  1034. return "rgba8ui";
  1035. case ImageFormatRg32ui:
  1036. return "rg32ui";
  1037. case ImageFormatRg16ui:
  1038. return "rg16ui";
  1039. case ImageFormatR11fG11fB10f:
  1040. return "r11f_g11f_b10f";
  1041. case ImageFormatR16f:
  1042. return "r16f";
  1043. case ImageFormatRgb10A2:
  1044. return "rgb10_a2";
  1045. case ImageFormatR8:
  1046. return "r8";
  1047. case ImageFormatRg8:
  1048. return "rg8";
  1049. case ImageFormatR16:
  1050. return "r16";
  1051. case ImageFormatRg16:
  1052. return "rg16";
  1053. case ImageFormatRgba16:
  1054. return "rgba16";
  1055. case ImageFormatR16Snorm:
  1056. return "r16_snorm";
  1057. case ImageFormatRg16Snorm:
  1058. return "rg16_snorm";
  1059. case ImageFormatRgba16Snorm:
  1060. return "rgba16_snorm";
  1061. case ImageFormatR8Snorm:
  1062. return "r8_snorm";
  1063. case ImageFormatRg8Snorm:
  1064. return "rg8_snorm";
  1065. case ImageFormatR8ui:
  1066. return "r8ui";
  1067. case ImageFormatRg8ui:
  1068. return "rg8ui";
  1069. case ImageFormatR16ui:
  1070. return "r16ui";
  1071. case ImageFormatRgb10a2ui:
  1072. return "rgb10_a2ui";
  1073. case ImageFormatR8i:
  1074. return "r8i";
  1075. case ImageFormatRg8i:
  1076. return "rg8i";
  1077. case ImageFormatR16i:
  1078. return "r16i";
  1079. default:
  1080. case ImageFormatUnknown:
  1081. return nullptr;
  1082. }
  1083. }
  1084. uint32_t CompilerGLSL::type_to_packed_base_size(const SPIRType &type, BufferPackingStandard)
  1085. {
  1086. switch (type.basetype)
  1087. {
  1088. case SPIRType::Double:
  1089. case SPIRType::Int64:
  1090. case SPIRType::UInt64:
  1091. return 8;
  1092. case SPIRType::Float:
  1093. case SPIRType::Int:
  1094. case SPIRType::UInt:
  1095. return 4;
  1096. case SPIRType::Half:
  1097. case SPIRType::Short:
  1098. case SPIRType::UShort:
  1099. return 2;
  1100. case SPIRType::SByte:
  1101. case SPIRType::UByte:
  1102. return 1;
  1103. default:
  1104. SPIRV_CROSS_THROW("Unrecognized type in type_to_packed_base_size.");
  1105. }
  1106. }
  1107. uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, const Bitset &flags,
  1108. BufferPackingStandard packing)
  1109. {
  1110. // If using PhysicalStorageBufferEXT storage class, this is a pointer,
  1111. // and is 64-bit.
  1112. if (type.storage == StorageClassPhysicalStorageBufferEXT)
  1113. {
  1114. if (!type.pointer)
  1115. SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers.");
  1116. if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
  1117. {
  1118. if (packing_is_vec4_padded(packing) && type_is_array_of_pointers(type))
  1119. return 16;
  1120. else
  1121. return 8;
  1122. }
  1123. else
  1124. SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT.");
  1125. }
  1126. if (!type.array.empty())
  1127. {
  1128. uint32_t minimum_alignment = 1;
  1129. if (packing_is_vec4_padded(packing))
  1130. minimum_alignment = 16;
  1131. auto *tmp = &get<SPIRType>(type.parent_type);
  1132. while (!tmp->array.empty())
  1133. tmp = &get<SPIRType>(tmp->parent_type);
  1134. // Get the alignment of the base type, then maybe round up.
  1135. return max(minimum_alignment, type_to_packed_alignment(*tmp, flags, packing));
  1136. }
  1137. if (type.basetype == SPIRType::Struct)
  1138. {
  1139. // Rule 9. Structs alignments are maximum alignment of its members.
  1140. uint32_t alignment = 1;
  1141. for (uint32_t i = 0; i < type.member_types.size(); i++)
  1142. {
  1143. auto member_flags = ir.meta[type.self].members[i].decoration_flags;
  1144. alignment =
  1145. max(alignment, type_to_packed_alignment(get<SPIRType>(type.member_types[i]), member_flags, packing));
  1146. }
  1147. // In std140, struct alignment is rounded up to 16.
  1148. if (packing_is_vec4_padded(packing))
  1149. alignment = max(alignment, 16u);
  1150. return alignment;
  1151. }
  1152. else
  1153. {
  1154. const uint32_t base_alignment = type_to_packed_base_size(type, packing);
  1155. // Alignment requirement for scalar block layout is always the alignment for the most basic component.
  1156. if (packing_is_scalar(packing))
  1157. return base_alignment;
  1158. // Vectors are *not* aligned in HLSL, but there's an extra rule where vectors cannot straddle
  1159. // a vec4, this is handled outside since that part knows our current offset.
  1160. if (type.columns == 1 && packing_is_hlsl(packing))
  1161. return base_alignment;
  1162. // From 7.6.2.2 in GL 4.5 core spec.
  1163. // Rule 1
  1164. if (type.vecsize == 1 && type.columns == 1)
  1165. return base_alignment;
  1166. // Rule 2
  1167. if ((type.vecsize == 2 || type.vecsize == 4) && type.columns == 1)
  1168. return type.vecsize * base_alignment;
  1169. // Rule 3
  1170. if (type.vecsize == 3 && type.columns == 1)
  1171. return 4 * base_alignment;
  1172. // Rule 4 implied. Alignment does not change in std430.
  1173. // Rule 5. Column-major matrices are stored as arrays of
  1174. // vectors.
  1175. if (flags.get(DecorationColMajor) && type.columns > 1)
  1176. {
  1177. if (packing_is_vec4_padded(packing))
  1178. return 4 * base_alignment;
  1179. else if (type.vecsize == 3)
  1180. return 4 * base_alignment;
  1181. else
  1182. return type.vecsize * base_alignment;
  1183. }
  1184. // Rule 6 implied.
  1185. // Rule 7.
  1186. if (flags.get(DecorationRowMajor) && type.vecsize > 1)
  1187. {
  1188. if (packing_is_vec4_padded(packing))
  1189. return 4 * base_alignment;
  1190. else if (type.columns == 3)
  1191. return 4 * base_alignment;
  1192. else
  1193. return type.columns * base_alignment;
  1194. }
  1195. // Rule 8 implied.
  1196. }
  1197. SPIRV_CROSS_THROW("Did not find suitable rule for type. Bogus decorations?");
  1198. }
  1199. uint32_t CompilerGLSL::type_to_packed_array_stride(const SPIRType &type, const Bitset &flags,
  1200. BufferPackingStandard packing)
  1201. {
  1202. // Array stride is equal to aligned size of the underlying type.
  1203. uint32_t parent = type.parent_type;
  1204. assert(parent);
  1205. auto &tmp = get<SPIRType>(parent);
  1206. uint32_t size = type_to_packed_size(tmp, flags, packing);
  1207. uint32_t alignment = type_to_packed_alignment(type, flags, packing);
  1208. return (size + alignment - 1) & ~(alignment - 1);
  1209. }
  1210. uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing)
  1211. {
  1212. if (!type.array.empty())
  1213. {
  1214. uint32_t packed_size = to_array_size_literal(type) * type_to_packed_array_stride(type, flags, packing);
  1215. // For arrays of vectors and matrices in HLSL, the last element has a size which depends on its vector size,
  1216. // so that it is possible to pack other vectors into the last element.
  1217. if (packing_is_hlsl(packing) && type.basetype != SPIRType::Struct)
  1218. packed_size -= (4 - type.vecsize) * (type.width / 8);
  1219. return packed_size;
  1220. }
  1221. // If using PhysicalStorageBufferEXT storage class, this is a pointer,
  1222. // and is 64-bit.
  1223. if (type.storage == StorageClassPhysicalStorageBufferEXT)
  1224. {
  1225. if (!type.pointer)
  1226. SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers.");
  1227. if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
  1228. return 8;
  1229. else
  1230. SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT.");
  1231. }
  1232. uint32_t size = 0;
  1233. if (type.basetype == SPIRType::Struct)
  1234. {
  1235. uint32_t pad_alignment = 1;
  1236. for (uint32_t i = 0; i < type.member_types.size(); i++)
  1237. {
  1238. auto member_flags = ir.meta[type.self].members[i].decoration_flags;
  1239. auto &member_type = get<SPIRType>(type.member_types[i]);
  1240. uint32_t packed_alignment = type_to_packed_alignment(member_type, member_flags, packing);
  1241. uint32_t alignment = max(packed_alignment, pad_alignment);
  1242. // The next member following a struct member is aligned to the base alignment of the struct that came before.
  1243. // GL 4.5 spec, 7.6.2.2.
  1244. if (member_type.basetype == SPIRType::Struct)
  1245. pad_alignment = packed_alignment;
  1246. else
  1247. pad_alignment = 1;
  1248. size = (size + alignment - 1) & ~(alignment - 1);
  1249. size += type_to_packed_size(member_type, member_flags, packing);
  1250. }
  1251. }
  1252. else
  1253. {
  1254. const uint32_t base_alignment = type_to_packed_base_size(type, packing);
  1255. if (packing_is_scalar(packing))
  1256. {
  1257. size = type.vecsize * type.columns * base_alignment;
  1258. }
  1259. else
  1260. {
  1261. if (type.columns == 1)
  1262. size = type.vecsize * base_alignment;
  1263. if (flags.get(DecorationColMajor) && type.columns > 1)
  1264. {
  1265. if (packing_is_vec4_padded(packing))
  1266. size = type.columns * 4 * base_alignment;
  1267. else if (type.vecsize == 3)
  1268. size = type.columns * 4 * base_alignment;
  1269. else
  1270. size = type.columns * type.vecsize * base_alignment;
  1271. }
  1272. if (flags.get(DecorationRowMajor) && type.vecsize > 1)
  1273. {
  1274. if (packing_is_vec4_padded(packing))
  1275. size = type.vecsize * 4 * base_alignment;
  1276. else if (type.columns == 3)
  1277. size = type.vecsize * 4 * base_alignment;
  1278. else
  1279. size = type.vecsize * type.columns * base_alignment;
  1280. }
  1281. // For matrices in HLSL, the last element has a size which depends on its vector size,
  1282. // so that it is possible to pack other vectors into the last element.
  1283. if (packing_is_hlsl(packing) && type.columns > 1)
  1284. size -= (4 - type.vecsize) * (type.width / 8);
  1285. }
  1286. }
  1287. return size;
  1288. }
  1289. bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing,
  1290. uint32_t *failed_validation_index, uint32_t start_offset,
  1291. uint32_t end_offset)
  1292. {
  1293. // This is very tricky and error prone, but try to be exhaustive and correct here.
  1294. // SPIR-V doesn't directly say if we're using std430 or std140.
  1295. // SPIR-V communicates this using Offset and ArrayStride decorations (which is what really matters),
  1296. // so we have to try to infer whether or not the original GLSL source was std140 or std430 based on this information.
  1297. // We do not have to consider shared or packed since these layouts are not allowed in Vulkan SPIR-V (they are useless anyways, and custom offsets would do the same thing).
  1298. //
  1299. // It is almost certain that we're using std430, but it gets tricky with arrays in particular.
  1300. // We will assume std430, but infer std140 if we can prove the struct is not compliant with std430.
  1301. //
  1302. // The only two differences between std140 and std430 are related to padding alignment/array stride
  1303. // in arrays and structs. In std140 they take minimum vec4 alignment.
  1304. // std430 only removes the vec4 requirement.
  1305. uint32_t offset = 0;
  1306. uint32_t pad_alignment = 1;
  1307. bool is_top_level_block =
  1308. has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock);
  1309. for (uint32_t i = 0; i < type.member_types.size(); i++)
  1310. {
  1311. auto &memb_type = get<SPIRType>(type.member_types[i]);
  1312. auto member_flags = ir.meta[type.self].members[i].decoration_flags;
  1313. // Verify alignment rules.
  1314. uint32_t packed_alignment = type_to_packed_alignment(memb_type, member_flags, packing);
  1315. // This is a rather dirty workaround to deal with some cases of OpSpecConstantOp used as array size, e.g:
  1316. // layout(constant_id = 0) const int s = 10;
  1317. // const int S = s + 5; // SpecConstantOp
  1318. // buffer Foo { int data[S]; }; // <-- Very hard for us to deduce a fixed value here,
  1319. // we would need full implementation of compile-time constant folding. :(
  1320. // If we are the last member of a struct, there might be cases where the actual size of that member is irrelevant
  1321. // for our analysis (e.g. unsized arrays).
  1322. // This lets us simply ignore that there are spec constant op sized arrays in our buffers.
  1323. // Querying size of this member will fail, so just don't call it unless we have to.
  1324. //
  1325. // This is likely "best effort" we can support without going into unacceptably complicated workarounds.
  1326. bool member_can_be_unsized =
  1327. is_top_level_block && size_t(i + 1) == type.member_types.size() && !memb_type.array.empty();
  1328. uint32_t packed_size = 0;
  1329. if (!member_can_be_unsized || packing_is_hlsl(packing))
  1330. packed_size = type_to_packed_size(memb_type, member_flags, packing);
  1331. // We only need to care about this if we have non-array types which can straddle the vec4 boundary.
  1332. if (packing_is_hlsl(packing))
  1333. {
  1334. // If a member straddles across a vec4 boundary, alignment is actually vec4.
  1335. uint32_t begin_word = offset / 16;
  1336. uint32_t end_word = (offset + packed_size - 1) / 16;
  1337. if (begin_word != end_word)
  1338. packed_alignment = max(packed_alignment, 16u);
  1339. }
  1340. uint32_t actual_offset = type_struct_member_offset(type, i);
  1341. // Field is not in the specified range anymore and we can ignore any further fields.
  1342. if (actual_offset >= end_offset)
  1343. break;
  1344. uint32_t alignment = max(packed_alignment, pad_alignment);
  1345. offset = (offset + alignment - 1) & ~(alignment - 1);
  1346. // The next member following a struct member is aligned to the base alignment of the struct that came before.
  1347. // GL 4.5 spec, 7.6.2.2.
  1348. if (memb_type.basetype == SPIRType::Struct && !memb_type.pointer)
  1349. pad_alignment = packed_alignment;
  1350. else
  1351. pad_alignment = 1;
  1352. // Only care about packing if we are in the given range
  1353. if (actual_offset >= start_offset)
  1354. {
  1355. // We only care about offsets in std140, std430, etc ...
  1356. // For EnhancedLayout variants, we have the flexibility to choose our own offsets.
  1357. if (!packing_has_flexible_offset(packing))
  1358. {
  1359. if (actual_offset != offset) // This cannot be the packing we're looking for.
  1360. {
  1361. if (failed_validation_index)
  1362. *failed_validation_index = i;
  1363. return false;
  1364. }
  1365. }
  1366. else if ((actual_offset & (alignment - 1)) != 0)
  1367. {
  1368. // We still need to verify that alignment rules are observed, even if we have explicit offset.
  1369. if (failed_validation_index)
  1370. *failed_validation_index = i;
  1371. return false;
  1372. }
  1373. // Verify array stride rules.
  1374. if (!memb_type.array.empty() && type_to_packed_array_stride(memb_type, member_flags, packing) !=
  1375. type_struct_member_array_stride(type, i))
  1376. {
  1377. if (failed_validation_index)
  1378. *failed_validation_index = i;
  1379. return false;
  1380. }
  1381. // Verify that sub-structs also follow packing rules.
  1382. // We cannot use enhanced layouts on substructs, so they better be up to spec.
  1383. auto substruct_packing = packing_to_substruct_packing(packing);
  1384. if (!memb_type.pointer && !memb_type.member_types.empty() &&
  1385. !buffer_is_packing_standard(memb_type, substruct_packing))
  1386. {
  1387. if (failed_validation_index)
  1388. *failed_validation_index = i;
  1389. return false;
  1390. }
  1391. }
  1392. // Bump size.
  1393. offset = actual_offset + packed_size;
  1394. }
  1395. return true;
  1396. }
  1397. bool CompilerGLSL::can_use_io_location(StorageClass storage, bool block)
  1398. {
  1399. // Location specifiers are must have in SPIR-V, but they aren't really supported in earlier versions of GLSL.
  1400. // Be very explicit here about how to solve the issue.
  1401. if ((get_execution_model() != ExecutionModelVertex && storage == StorageClassInput) ||
  1402. (get_execution_model() != ExecutionModelFragment && storage == StorageClassOutput))
  1403. {
  1404. uint32_t minimum_desktop_version = block ? 440 : 410;
  1405. // ARB_enhanced_layouts vs ARB_separate_shader_objects ...
  1406. if (!options.es && options.version < minimum_desktop_version && !options.separate_shader_objects)
  1407. return false;
  1408. else if (options.es && options.version < 310)
  1409. return false;
  1410. }
  1411. if ((get_execution_model() == ExecutionModelVertex && storage == StorageClassInput) ||
  1412. (get_execution_model() == ExecutionModelFragment && storage == StorageClassOutput))
  1413. {
  1414. if (options.es && options.version < 300)
  1415. return false;
  1416. else if (!options.es && options.version < 330)
  1417. return false;
  1418. }
  1419. if (storage == StorageClassUniform || storage == StorageClassUniformConstant || storage == StorageClassPushConstant)
  1420. {
  1421. if (options.es && options.version < 310)
  1422. return false;
  1423. else if (!options.es && options.version < 430)
  1424. return false;
  1425. }
  1426. return true;
  1427. }
  1428. string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
  1429. {
  1430. // FIXME: Come up with a better solution for when to disable layouts.
  1431. // Having layouts depend on extensions as well as which types
  1432. // of layouts are used. For now, the simple solution is to just disable
  1433. // layouts for legacy versions.
  1434. if (is_legacy())
  1435. return "";
  1436. if (subpass_input_is_framebuffer_fetch(var.self))
  1437. return "";
  1438. SmallVector<string> attr;
  1439. auto &type = get<SPIRType>(var.basetype);
  1440. auto &flags = get_decoration_bitset(var.self);
  1441. auto &typeflags = get_decoration_bitset(type.self);
  1442. if (flags.get(DecorationPassthroughNV))
  1443. attr.push_back("passthrough");
  1444. if (options.vulkan_semantics && var.storage == StorageClassPushConstant)
  1445. attr.push_back("push_constant");
  1446. else if (var.storage == StorageClassShaderRecordBufferKHR)
  1447. attr.push_back(ray_tracing_is_khr ? "shaderRecordEXT" : "shaderRecordNV");
  1448. if (flags.get(DecorationRowMajor))
  1449. attr.push_back("row_major");
  1450. if (flags.get(DecorationColMajor))
  1451. attr.push_back("column_major");
  1452. if (options.vulkan_semantics)
  1453. {
  1454. if (flags.get(DecorationInputAttachmentIndex))
  1455. attr.push_back(join("input_attachment_index = ", get_decoration(var.self, DecorationInputAttachmentIndex)));
  1456. }
  1457. bool is_block = has_decoration(type.self, DecorationBlock);
  1458. if (flags.get(DecorationLocation) && can_use_io_location(var.storage, is_block))
  1459. {
  1460. Bitset combined_decoration;
  1461. for (uint32_t i = 0; i < ir.meta[type.self].members.size(); i++)
  1462. combined_decoration.merge_or(combined_decoration_for_member(type, i));
  1463. // If our members have location decorations, we don't need to
  1464. // emit location decorations at the top as well (looks weird).
  1465. if (!combined_decoration.get(DecorationLocation))
  1466. attr.push_back(join("location = ", get_decoration(var.self, DecorationLocation)));
  1467. }
  1468. // Transform feedback
  1469. bool uses_enhanced_layouts = false;
  1470. if (is_block && var.storage == StorageClassOutput)
  1471. {
  1472. // For blocks, there is a restriction where xfb_stride/xfb_buffer must only be declared on the block itself,
  1473. // since all members must match the same xfb_buffer. The only thing we will declare for members of the block
  1474. // is the xfb_offset.
  1475. uint32_t member_count = uint32_t(type.member_types.size());
  1476. bool have_xfb_buffer_stride = false;
  1477. bool have_any_xfb_offset = false;
  1478. bool have_geom_stream = false;
  1479. uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0;
  1480. if (flags.get(DecorationXfbBuffer) && flags.get(DecorationXfbStride))
  1481. {
  1482. have_xfb_buffer_stride = true;
  1483. xfb_buffer = get_decoration(var.self, DecorationXfbBuffer);
  1484. xfb_stride = get_decoration(var.self, DecorationXfbStride);
  1485. }
  1486. if (flags.get(DecorationStream))
  1487. {
  1488. have_geom_stream = true;
  1489. geom_stream = get_decoration(var.self, DecorationStream);
  1490. }
  1491. // Verify that none of the members violate our assumption.
  1492. for (uint32_t i = 0; i < member_count; i++)
  1493. {
  1494. if (has_member_decoration(type.self, i, DecorationStream))
  1495. {
  1496. uint32_t member_geom_stream = get_member_decoration(type.self, i, DecorationStream);
  1497. if (have_geom_stream && member_geom_stream != geom_stream)
  1498. SPIRV_CROSS_THROW("IO block member Stream mismatch.");
  1499. have_geom_stream = true;
  1500. geom_stream = member_geom_stream;
  1501. }
  1502. // Only members with an Offset decoration participate in XFB.
  1503. if (!has_member_decoration(type.self, i, DecorationOffset))
  1504. continue;
  1505. have_any_xfb_offset = true;
  1506. if (has_member_decoration(type.self, i, DecorationXfbBuffer))
  1507. {
  1508. uint32_t buffer_index = get_member_decoration(type.self, i, DecorationXfbBuffer);
  1509. if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
  1510. SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
  1511. have_xfb_buffer_stride = true;
  1512. xfb_buffer = buffer_index;
  1513. }
  1514. if (has_member_decoration(type.self, i, DecorationXfbStride))
  1515. {
  1516. uint32_t stride = get_member_decoration(type.self, i, DecorationXfbStride);
  1517. if (have_xfb_buffer_stride && stride != xfb_stride)
  1518. SPIRV_CROSS_THROW("IO block member XfbStride mismatch.");
  1519. have_xfb_buffer_stride = true;
  1520. xfb_stride = stride;
  1521. }
  1522. }
  1523. if (have_xfb_buffer_stride && have_any_xfb_offset)
  1524. {
  1525. attr.push_back(join("xfb_buffer = ", xfb_buffer));
  1526. attr.push_back(join("xfb_stride = ", xfb_stride));
  1527. uses_enhanced_layouts = true;
  1528. }
  1529. if (have_geom_stream)
  1530. {
  1531. if (get_execution_model() != ExecutionModelGeometry)
  1532. SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
  1533. if (options.es)
  1534. SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
  1535. if (options.version < 400)
  1536. require_extension_internal("GL_ARB_transform_feedback3");
  1537. attr.push_back(join("stream = ", get_decoration(var.self, DecorationStream)));
  1538. }
  1539. }
  1540. else if (var.storage == StorageClassOutput)
  1541. {
  1542. if (flags.get(DecorationXfbBuffer) && flags.get(DecorationXfbStride) && flags.get(DecorationOffset))
  1543. {
  1544. // XFB for standalone variables, we can emit all decorations.
  1545. attr.push_back(join("xfb_buffer = ", get_decoration(var.self, DecorationXfbBuffer)));
  1546. attr.push_back(join("xfb_stride = ", get_decoration(var.self, DecorationXfbStride)));
  1547. attr.push_back(join("xfb_offset = ", get_decoration(var.self, DecorationOffset)));
  1548. uses_enhanced_layouts = true;
  1549. }
  1550. if (flags.get(DecorationStream))
  1551. {
  1552. if (get_execution_model() != ExecutionModelGeometry)
  1553. SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
  1554. if (options.es)
  1555. SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
  1556. if (options.version < 400)
  1557. require_extension_internal("GL_ARB_transform_feedback3");
  1558. attr.push_back(join("stream = ", get_decoration(var.self, DecorationStream)));
  1559. }
  1560. }
  1561. // Can only declare Component if we can declare location.
  1562. if (flags.get(DecorationComponent) && can_use_io_location(var.storage, is_block))
  1563. {
  1564. uses_enhanced_layouts = true;
  1565. attr.push_back(join("component = ", get_decoration(var.self, DecorationComponent)));
  1566. }
  1567. if (uses_enhanced_layouts)
  1568. {
  1569. if (!options.es)
  1570. {
  1571. if (options.version < 440 && options.version >= 140)
  1572. require_extension_internal("GL_ARB_enhanced_layouts");
  1573. else if (options.version < 140)
  1574. SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in targets below GLSL 1.40.");
  1575. if (!options.es && options.version < 440)
  1576. require_extension_internal("GL_ARB_enhanced_layouts");
  1577. }
  1578. else if (options.es)
  1579. SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in ESSL.");
  1580. }
  1581. if (flags.get(DecorationIndex))
  1582. attr.push_back(join("index = ", get_decoration(var.self, DecorationIndex)));
  1583. // Do not emit set = decoration in regular GLSL output, but
  1584. // we need to preserve it in Vulkan GLSL mode.
  1585. if (var.storage != StorageClassPushConstant && var.storage != StorageClassShaderRecordBufferKHR)
  1586. {
  1587. if (flags.get(DecorationDescriptorSet) && options.vulkan_semantics)
  1588. attr.push_back(join("set = ", get_decoration(var.self, DecorationDescriptorSet)));
  1589. }
  1590. bool push_constant_block = options.vulkan_semantics && var.storage == StorageClassPushConstant;
  1591. bool ssbo_block = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferKHR ||
  1592. (var.storage == StorageClassUniform && typeflags.get(DecorationBufferBlock));
  1593. bool emulated_ubo = var.storage == StorageClassPushConstant && options.emit_push_constant_as_uniform_buffer;
  1594. bool ubo_block = var.storage == StorageClassUniform && typeflags.get(DecorationBlock);
  1595. // GL 3.0/GLSL 1.30 is not considered legacy, but it doesn't have UBOs ...
  1596. bool can_use_buffer_blocks = (options.es && options.version >= 300) || (!options.es && options.version >= 140);
  1597. // pretend no UBOs when options say so
  1598. if (ubo_block && options.emit_uniform_buffer_as_plain_uniforms)
  1599. can_use_buffer_blocks = false;
  1600. bool can_use_binding;
  1601. if (options.es)
  1602. can_use_binding = options.version >= 310;
  1603. else
  1604. can_use_binding = options.enable_420pack_extension || (options.version >= 420);
  1605. // Make sure we don't emit binding layout for a classic uniform on GLSL 1.30.
  1606. if (!can_use_buffer_blocks && var.storage == StorageClassUniform)
  1607. can_use_binding = false;
  1608. if (var.storage == StorageClassShaderRecordBufferKHR)
  1609. can_use_binding = false;
  1610. if (can_use_binding && flags.get(DecorationBinding))
  1611. attr.push_back(join("binding = ", get_decoration(var.self, DecorationBinding)));
  1612. if (var.storage != StorageClassOutput && flags.get(DecorationOffset))
  1613. attr.push_back(join("offset = ", get_decoration(var.self, DecorationOffset)));
  1614. // Instead of adding explicit offsets for every element here, just assume we're using std140 or std430.
  1615. // If SPIR-V does not comply with either layout, we cannot really work around it.
  1616. if (can_use_buffer_blocks && (ubo_block || emulated_ubo))
  1617. {
  1618. attr.push_back(buffer_to_packing_standard(type, false));
  1619. }
  1620. else if (can_use_buffer_blocks && (push_constant_block || ssbo_block))
  1621. {
  1622. attr.push_back(buffer_to_packing_standard(type, true));
  1623. }
  1624. // For images, the type itself adds a layout qualifer.
  1625. // Only emit the format for storage images.
  1626. if (type.basetype == SPIRType::Image && type.image.sampled == 2)
  1627. {
  1628. const char *fmt = format_to_glsl(type.image.format);
  1629. if (fmt)
  1630. attr.push_back(fmt);
  1631. }
  1632. if (attr.empty())
  1633. return "";
  1634. string res = "layout(";
  1635. res += merge(attr);
  1636. res += ") ";
  1637. return res;
  1638. }
  1639. string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool support_std430_without_scalar_layout)
  1640. {
  1641. if (support_std430_without_scalar_layout && buffer_is_packing_standard(type, BufferPackingStd430))
  1642. return "std430";
  1643. else if (buffer_is_packing_standard(type, BufferPackingStd140))
  1644. return "std140";
  1645. else if (options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingScalar))
  1646. {
  1647. require_extension_internal("GL_EXT_scalar_block_layout");
  1648. return "scalar";
  1649. }
  1650. else if (support_std430_without_scalar_layout &&
  1651. buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout))
  1652. {
  1653. if (options.es && !options.vulkan_semantics)
  1654. SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
  1655. "not support GL_ARB_enhanced_layouts.");
  1656. if (!options.es && !options.vulkan_semantics && options.version < 440)
  1657. require_extension_internal("GL_ARB_enhanced_layouts");
  1658. set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
  1659. return "std430";
  1660. }
  1661. else if (buffer_is_packing_standard(type, BufferPackingStd140EnhancedLayout))
  1662. {
  1663. // Fallback time. We might be able to use the ARB_enhanced_layouts to deal with this difference,
  1664. // however, we can only use layout(offset) on the block itself, not any substructs, so the substructs better be the appropriate layout.
  1665. // Enhanced layouts seem to always work in Vulkan GLSL, so no need for extensions there.
  1666. if (options.es && !options.vulkan_semantics)
  1667. SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
  1668. "not support GL_ARB_enhanced_layouts.");
  1669. if (!options.es && !options.vulkan_semantics && options.version < 440)
  1670. require_extension_internal("GL_ARB_enhanced_layouts");
  1671. set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
  1672. return "std140";
  1673. }
  1674. else if (options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingScalarEnhancedLayout))
  1675. {
  1676. set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
  1677. require_extension_internal("GL_EXT_scalar_block_layout");
  1678. return "scalar";
  1679. }
  1680. else if (!support_std430_without_scalar_layout && options.vulkan_semantics &&
  1681. buffer_is_packing_standard(type, BufferPackingStd430))
  1682. {
  1683. // UBOs can support std430 with GL_EXT_scalar_block_layout.
  1684. require_extension_internal("GL_EXT_scalar_block_layout");
  1685. return "std430";
  1686. }
  1687. else if (!support_std430_without_scalar_layout && options.vulkan_semantics &&
  1688. buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout))
  1689. {
  1690. // UBOs can support std430 with GL_EXT_scalar_block_layout.
  1691. set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
  1692. require_extension_internal("GL_EXT_scalar_block_layout");
  1693. return "std430";
  1694. }
  1695. else
  1696. {
  1697. SPIRV_CROSS_THROW("Buffer block cannot be expressed as any of std430, std140, scalar, even with enhanced "
  1698. "layouts. You can try flattening this block to support a more flexible layout.");
  1699. }
  1700. }
  1701. void CompilerGLSL::emit_push_constant_block(const SPIRVariable &var)
  1702. {
  1703. if (flattened_buffer_blocks.count(var.self))
  1704. emit_buffer_block_flattened(var);
  1705. else if (options.vulkan_semantics)
  1706. emit_push_constant_block_vulkan(var);
  1707. else if (options.emit_push_constant_as_uniform_buffer)
  1708. emit_buffer_block_native(var);
  1709. else
  1710. emit_push_constant_block_glsl(var);
  1711. }
  1712. void CompilerGLSL::emit_push_constant_block_vulkan(const SPIRVariable &var)
  1713. {
  1714. emit_buffer_block(var);
  1715. }
  1716. void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var)
  1717. {
  1718. // OpenGL has no concept of push constant blocks, implement it as a uniform struct.
  1719. auto &type = get<SPIRType>(var.basetype);
  1720. auto &flags = ir.meta[var.self].decoration.decoration_flags;
  1721. flags.clear(DecorationBinding);
  1722. flags.clear(DecorationDescriptorSet);
  1723. #if 0
  1724. if (flags & ((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet)))
  1725. SPIRV_CROSS_THROW("Push constant blocks cannot be compiled to GLSL with Binding or Set syntax. "
  1726. "Remap to location with reflection API first or disable these decorations.");
  1727. #endif
  1728. // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
  1729. // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
  1730. auto &block_flags = ir.meta[type.self].decoration.decoration_flags;
  1731. bool block_flag = block_flags.get(DecorationBlock);
  1732. block_flags.clear(DecorationBlock);
  1733. emit_struct(type);
  1734. if (block_flag)
  1735. block_flags.set(DecorationBlock);
  1736. emit_uniform(var);
  1737. statement("");
  1738. }
  1739. void CompilerGLSL::emit_buffer_block(const SPIRVariable &var)
  1740. {
  1741. auto &type = get<SPIRType>(var.basetype);
  1742. bool ubo_block = var.storage == StorageClassUniform && has_decoration(type.self, DecorationBlock);
  1743. if (flattened_buffer_blocks.count(var.self))
  1744. emit_buffer_block_flattened(var);
  1745. else if (is_legacy() || (!options.es && options.version == 130) ||
  1746. (ubo_block && options.emit_uniform_buffer_as_plain_uniforms))
  1747. emit_buffer_block_legacy(var);
  1748. else
  1749. emit_buffer_block_native(var);
  1750. }
  1751. void CompilerGLSL::emit_buffer_block_legacy(const SPIRVariable &var)
  1752. {
  1753. auto &type = get<SPIRType>(var.basetype);
  1754. bool ssbo = var.storage == StorageClassStorageBuffer ||
  1755. ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
  1756. if (ssbo)
  1757. SPIRV_CROSS_THROW("SSBOs not supported in legacy targets.");
  1758. // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
  1759. // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
  1760. auto &block_flags = ir.meta[type.self].decoration.decoration_flags;
  1761. bool block_flag = block_flags.get(DecorationBlock);
  1762. block_flags.clear(DecorationBlock);
  1763. emit_struct(type);
  1764. if (block_flag)
  1765. block_flags.set(DecorationBlock);
  1766. emit_uniform(var);
  1767. statement("");
  1768. }
  1769. void CompilerGLSL::emit_buffer_reference_block(SPIRType &type, bool forward_declaration)
  1770. {
  1771. string buffer_name;
  1772. if (forward_declaration)
  1773. {
  1774. // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ...
  1775. // Allow aliased name since we might be declaring the block twice. Once with buffer reference (forward declared) and one proper declaration.
  1776. // The names must match up.
  1777. buffer_name = to_name(type.self, false);
  1778. // Shaders never use the block by interface name, so we don't
  1779. // have to track this other than updating name caches.
  1780. // If we have a collision for any reason, just fallback immediately.
  1781. if (ir.meta[type.self].decoration.alias.empty() ||
  1782. block_ssbo_names.find(buffer_name) != end(block_ssbo_names) ||
  1783. resource_names.find(buffer_name) != end(resource_names))
  1784. {
  1785. buffer_name = join("_", type.self);
  1786. }
  1787. // Make sure we get something unique for both global name scope and block name scope.
  1788. // See GLSL 4.5 spec: section 4.3.9 for details.
  1789. add_variable(block_ssbo_names, resource_names, buffer_name);
  1790. // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
  1791. // This cannot conflict with anything else, so we're safe now.
  1792. // We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope.
  1793. if (buffer_name.empty())
  1794. buffer_name = join("_", type.self);
  1795. block_names.insert(buffer_name);
  1796. block_ssbo_names.insert(buffer_name);
  1797. // Ensure we emit the correct name when emitting non-forward pointer type.
  1798. ir.meta[type.self].decoration.alias = buffer_name;
  1799. }
  1800. else if (type.basetype != SPIRType::Struct)
  1801. buffer_name = type_to_glsl(type);
  1802. else
  1803. buffer_name = to_name(type.self, false);
  1804. if (!forward_declaration)
  1805. {
  1806. if (type.basetype == SPIRType::Struct)
  1807. {
  1808. auto flags = ir.get_buffer_block_type_flags(type);
  1809. string decorations;
  1810. if (flags.get(DecorationRestrict))
  1811. decorations += " restrict";
  1812. if (flags.get(DecorationCoherent))
  1813. decorations += " coherent";
  1814. if (flags.get(DecorationNonReadable))
  1815. decorations += " writeonly";
  1816. if (flags.get(DecorationNonWritable))
  1817. decorations += " readonly";
  1818. statement("layout(buffer_reference, ", buffer_to_packing_standard(type, true),
  1819. ")", decorations, " buffer ", buffer_name);
  1820. }
  1821. else
  1822. statement("layout(buffer_reference) buffer ", buffer_name);
  1823. begin_scope();
  1824. if (type.basetype == SPIRType::Struct)
  1825. {
  1826. type.member_name_cache.clear();
  1827. uint32_t i = 0;
  1828. for (auto &member : type.member_types)
  1829. {
  1830. add_member_name(type, i);
  1831. emit_struct_member(type, member, i);
  1832. i++;
  1833. }
  1834. }
  1835. else
  1836. {
  1837. auto &pointee_type = get_pointee_type(type);
  1838. statement(type_to_glsl(pointee_type), " value", type_to_array_glsl(pointee_type), ";");
  1839. }
  1840. end_scope_decl();
  1841. statement("");
  1842. }
  1843. else
  1844. {
  1845. statement("layout(buffer_reference) buffer ", buffer_name, ";");
  1846. }
  1847. }
  1848. void CompilerGLSL::emit_buffer_block_native(const SPIRVariable &var)
  1849. {
  1850. auto &type = get<SPIRType>(var.basetype);
  1851. Bitset flags = ir.get_buffer_block_flags(var);
  1852. bool ssbo = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferKHR ||
  1853. ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
  1854. bool is_restrict = ssbo && flags.get(DecorationRestrict);
  1855. bool is_writeonly = ssbo && flags.get(DecorationNonReadable);
  1856. bool is_readonly = ssbo && flags.get(DecorationNonWritable);
  1857. bool is_coherent = ssbo && flags.get(DecorationCoherent);
  1858. // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ...
  1859. auto buffer_name = to_name(type.self, false);
  1860. auto &block_namespace = ssbo ? block_ssbo_names : block_ubo_names;
  1861. // Shaders never use the block by interface name, so we don't
  1862. // have to track this other than updating name caches.
  1863. // If we have a collision for any reason, just fallback immediately.
  1864. if (ir.meta[type.self].decoration.alias.empty() || block_namespace.find(buffer_name) != end(block_namespace) ||
  1865. resource_names.find(buffer_name) != end(resource_names))
  1866. {
  1867. buffer_name = get_block_fallback_name(var.self);
  1868. }
  1869. // Make sure we get something unique for both global name scope and block name scope.
  1870. // See GLSL 4.5 spec: section 4.3.9 for details.
  1871. add_variable(block_namespace, resource_names, buffer_name);
  1872. // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
  1873. // This cannot conflict with anything else, so we're safe now.
  1874. // We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope.
  1875. if (buffer_name.empty())
  1876. buffer_name = join("_", get<SPIRType>(var.basetype).self, "_", var.self);
  1877. block_names.insert(buffer_name);
  1878. block_namespace.insert(buffer_name);
  1879. // Save for post-reflection later.
  1880. declared_block_names[var.self] = buffer_name;
  1881. statement(layout_for_variable(var), is_coherent ? "coherent " : "", is_restrict ? "restrict " : "",
  1882. is_writeonly ? "writeonly " : "", is_readonly ? "readonly " : "", ssbo ? "buffer " : "uniform ",
  1883. buffer_name);
  1884. begin_scope();
  1885. type.member_name_cache.clear();
  1886. uint32_t i = 0;
  1887. for (auto &member : type.member_types)
  1888. {
  1889. add_member_name(type, i);
  1890. emit_struct_member(type, member, i);
  1891. i++;
  1892. }
  1893. // var.self can be used as a backup name for the block name,
  1894. // so we need to make sure we don't disturb the name here on a recompile.
  1895. // It will need to be reset if we have to recompile.
  1896. preserve_alias_on_reset(var.self);
  1897. add_resource_name(var.self);
  1898. end_scope_decl(to_name(var.self) + type_to_array_glsl(type));
  1899. statement("");
  1900. }
  1901. void CompilerGLSL::emit_buffer_block_flattened(const SPIRVariable &var)
  1902. {
  1903. auto &type = get<SPIRType>(var.basetype);
  1904. // Block names should never alias.
  1905. auto buffer_name = to_name(type.self, false);
  1906. size_t buffer_size = (get_declared_struct_size(type) + 15) / 16;
  1907. SPIRType::BaseType basic_type;
  1908. if (get_common_basic_type(type, basic_type))
  1909. {
  1910. SPIRType tmp;
  1911. tmp.basetype = basic_type;
  1912. tmp.vecsize = 4;
  1913. if (basic_type != SPIRType::Float && basic_type != SPIRType::Int && basic_type != SPIRType::UInt)
  1914. SPIRV_CROSS_THROW("Basic types in a flattened UBO must be float, int or uint.");
  1915. auto flags = ir.get_buffer_block_flags(var);
  1916. statement("uniform ", flags_to_qualifiers_glsl(tmp, flags), type_to_glsl(tmp), " ", buffer_name, "[",
  1917. buffer_size, "];");
  1918. }
  1919. else
  1920. SPIRV_CROSS_THROW("All basic types in a flattened block must be the same.");
  1921. }
  1922. const char *CompilerGLSL::to_storage_qualifiers_glsl(const SPIRVariable &var)
  1923. {
  1924. auto &execution = get_entry_point();
  1925. if (subpass_input_is_framebuffer_fetch(var.self))
  1926. return "";
  1927. if (var.storage == StorageClassInput || var.storage == StorageClassOutput)
  1928. {
  1929. if (is_legacy() && execution.model == ExecutionModelVertex)
  1930. return var.storage == StorageClassInput ? "attribute " : "varying ";
  1931. else if (is_legacy() && execution.model == ExecutionModelFragment)
  1932. return "varying "; // Fragment outputs are renamed so they never hit this case.
  1933. else if (execution.model == ExecutionModelFragment && var.storage == StorageClassOutput)
  1934. {
  1935. if (inout_color_attachments.count(get_decoration(var.self, DecorationLocation)) != 0)
  1936. return "inout ";
  1937. else
  1938. return "out ";
  1939. }
  1940. else
  1941. return var.storage == StorageClassInput ? "in " : "out ";
  1942. }
  1943. else if (var.storage == StorageClassUniformConstant || var.storage == StorageClassUniform ||
  1944. var.storage == StorageClassPushConstant)
  1945. {
  1946. return "uniform ";
  1947. }
  1948. else if (var.storage == StorageClassRayPayloadKHR)
  1949. {
  1950. return ray_tracing_is_khr ? "rayPayloadEXT " : "rayPayloadNV ";
  1951. }
  1952. else if (var.storage == StorageClassIncomingRayPayloadKHR)
  1953. {
  1954. return ray_tracing_is_khr ? "rayPayloadInEXT " : "rayPayloadInNV ";
  1955. }
  1956. else if (var.storage == StorageClassHitAttributeKHR)
  1957. {
  1958. return ray_tracing_is_khr ? "hitAttributeEXT " : "hitAttributeNV ";
  1959. }
  1960. else if (var.storage == StorageClassCallableDataKHR)
  1961. {
  1962. return ray_tracing_is_khr ? "callableDataEXT " : "callableDataNV ";
  1963. }
  1964. else if (var.storage == StorageClassIncomingCallableDataKHR)
  1965. {
  1966. return ray_tracing_is_khr ? "callableDataInEXT " : "callableDataInNV ";
  1967. }
  1968. return "";
  1969. }
  1970. void CompilerGLSL::emit_flattened_io_block_member(const std::string &basename, const SPIRType &type, const char *qual,
  1971. const SmallVector<uint32_t> &indices)
  1972. {
  1973. uint32_t member_type_id = type.self;
  1974. const SPIRType *member_type = &type;
  1975. const SPIRType *parent_type = nullptr;
  1976. auto flattened_name = basename;
  1977. for (auto &index : indices)
  1978. {
  1979. flattened_name += "_";
  1980. flattened_name += to_member_name(*member_type, index);
  1981. parent_type = member_type;
  1982. member_type_id = member_type->member_types[index];
  1983. member_type = &get<SPIRType>(member_type_id);
  1984. }
  1985. assert(member_type->basetype != SPIRType::Struct);
  1986. // We're overriding struct member names, so ensure we do so on the primary type.
  1987. if (parent_type->type_alias)
  1988. parent_type = &get<SPIRType>(parent_type->type_alias);
  1989. // Sanitize underscores because joining the two identifiers might create more than 1 underscore in a row,
  1990. // which is not allowed.
  1991. ParsedIR::sanitize_underscores(flattened_name);
  1992. uint32_t last_index = indices.back();
  1993. // Pass in the varying qualifier here so it will appear in the correct declaration order.
  1994. // Replace member name while emitting it so it encodes both struct name and member name.
  1995. auto backup_name = get_member_name(parent_type->self, last_index);
  1996. auto member_name = to_member_name(*parent_type, last_index);
  1997. set_member_name(parent_type->self, last_index, flattened_name);
  1998. emit_struct_member(*parent_type, member_type_id, last_index, qual);
  1999. // Restore member name.
  2000. set_member_name(parent_type->self, last_index, member_name);
  2001. }
  2002. void CompilerGLSL::emit_flattened_io_block_struct(const std::string &basename, const SPIRType &type, const char *qual,
  2003. const SmallVector<uint32_t> &indices)
  2004. {
  2005. auto sub_indices = indices;
  2006. sub_indices.push_back(0);
  2007. const SPIRType *member_type = &type;
  2008. for (auto &index : indices)
  2009. member_type = &get<SPIRType>(member_type->member_types[index]);
  2010. assert(member_type->basetype == SPIRType::Struct);
  2011. if (!member_type->array.empty())
  2012. SPIRV_CROSS_THROW("Cannot flatten array of structs in I/O blocks.");
  2013. for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++)
  2014. {
  2015. sub_indices.back() = i;
  2016. if (get<SPIRType>(member_type->member_types[i]).basetype == SPIRType::Struct)
  2017. emit_flattened_io_block_struct(basename, type, qual, sub_indices);
  2018. else
  2019. emit_flattened_io_block_member(basename, type, qual, sub_indices);
  2020. }
  2021. }
  2022. void CompilerGLSL::emit_flattened_io_block(const SPIRVariable &var, const char *qual)
  2023. {
  2024. auto &var_type = get<SPIRType>(var.basetype);
  2025. if (!var_type.array.empty())
  2026. SPIRV_CROSS_THROW("Array of varying structs cannot be flattened to legacy-compatible varyings.");
  2027. // Emit flattened types based on the type alias. Normally, we are never supposed to emit
  2028. // struct declarations for aliased types.
  2029. auto &type = var_type.type_alias ? get<SPIRType>(var_type.type_alias) : var_type;
  2030. auto old_flags = ir.meta[type.self].decoration.decoration_flags;
  2031. // Emit the members as if they are part of a block to get all qualifiers.
  2032. ir.meta[type.self].decoration.decoration_flags.set(DecorationBlock);
  2033. type.member_name_cache.clear();
  2034. SmallVector<uint32_t> member_indices;
  2035. member_indices.push_back(0);
  2036. auto basename = to_name(var.self);
  2037. uint32_t i = 0;
  2038. for (auto &member : type.member_types)
  2039. {
  2040. add_member_name(type, i);
  2041. auto &membertype = get<SPIRType>(member);
  2042. member_indices.back() = i;
  2043. if (membertype.basetype == SPIRType::Struct)
  2044. emit_flattened_io_block_struct(basename, type, qual, member_indices);
  2045. else
  2046. emit_flattened_io_block_member(basename, type, qual, member_indices);
  2047. i++;
  2048. }
  2049. ir.meta[type.self].decoration.decoration_flags = old_flags;
  2050. // Treat this variable as fully flattened from now on.
  2051. flattened_structs[var.self] = true;
  2052. }
  2053. void CompilerGLSL::emit_interface_block(const SPIRVariable &var)
  2054. {
  2055. auto &type = get<SPIRType>(var.basetype);
  2056. if (var.storage == StorageClassInput && type.basetype == SPIRType::Double &&
  2057. !options.es && options.version < 410)
  2058. {
  2059. require_extension_internal("GL_ARB_vertex_attrib_64bit");
  2060. }
  2061. // Either make it plain in/out or in/out blocks depending on what shader is doing ...
  2062. bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock);
  2063. const char *qual = to_storage_qualifiers_glsl(var);
  2064. if (block)
  2065. {
  2066. // ESSL earlier than 310 and GLSL earlier than 150 did not support
  2067. // I/O variables which are struct types.
  2068. // To support this, flatten the struct into separate varyings instead.
  2069. if (options.force_flattened_io_blocks || (options.es && options.version < 310) ||
  2070. (!options.es && options.version < 150))
  2071. {
  2072. // I/O blocks on ES require version 310 with Android Extension Pack extensions, or core version 320.
  2073. // On desktop, I/O blocks were introduced with geometry shaders in GL 3.2 (GLSL 150).
  2074. emit_flattened_io_block(var, qual);
  2075. }
  2076. else
  2077. {
  2078. if (options.es && options.version < 320)
  2079. {
  2080. // Geometry and tessellation extensions imply this extension.
  2081. if (!has_extension("GL_EXT_geometry_shader") && !has_extension("GL_EXT_tessellation_shader"))
  2082. require_extension_internal("GL_EXT_shader_io_blocks");
  2083. }
  2084. // Workaround to make sure we can emit "patch in/out" correctly.
  2085. fixup_io_block_patch_qualifiers(var);
  2086. // Block names should never alias.
  2087. auto block_name = to_name(type.self, false);
  2088. // The namespace for I/O blocks is separate from other variables in GLSL.
  2089. auto &block_namespace = type.storage == StorageClassInput ? block_input_names : block_output_names;
  2090. // Shaders never use the block by interface name, so we don't
  2091. // have to track this other than updating name caches.
  2092. if (block_name.empty() || block_namespace.find(block_name) != end(block_namespace))
  2093. block_name = get_fallback_name(type.self);
  2094. else
  2095. block_namespace.insert(block_name);
  2096. // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
  2097. // This cannot conflict with anything else, so we're safe now.
  2098. if (block_name.empty())
  2099. block_name = join("_", get<SPIRType>(var.basetype).self, "_", var.self);
  2100. // Instance names cannot alias block names.
  2101. resource_names.insert(block_name);
  2102. bool is_patch = has_decoration(var.self, DecorationPatch);
  2103. statement(layout_for_variable(var), (is_patch ? "patch " : ""), qual, block_name);
  2104. begin_scope();
  2105. type.member_name_cache.clear();
  2106. uint32_t i = 0;
  2107. for (auto &member : type.member_types)
  2108. {
  2109. add_member_name(type, i);
  2110. emit_struct_member(type, member, i);
  2111. i++;
  2112. }
  2113. add_resource_name(var.self);
  2114. end_scope_decl(join(to_name(var.self), type_to_array_glsl(type)));
  2115. statement("");
  2116. }
  2117. }
  2118. else
  2119. {
  2120. // ESSL earlier than 310 and GLSL earlier than 150 did not support
  2121. // I/O variables which are struct types.
  2122. // To support this, flatten the struct into separate varyings instead.
  2123. if (type.basetype == SPIRType::Struct &&
  2124. (options.force_flattened_io_blocks || (options.es && options.version < 310) ||
  2125. (!options.es && options.version < 150)))
  2126. {
  2127. emit_flattened_io_block(var, qual);
  2128. }
  2129. else
  2130. {
  2131. add_resource_name(var.self);
  2132. // Tessellation control and evaluation shaders must have either gl_MaxPatchVertices or unsized arrays for input arrays.
  2133. // Opt for unsized as it's the more "correct" variant to use.
  2134. bool control_point_input_array = type.storage == StorageClassInput && !type.array.empty() &&
  2135. !has_decoration(var.self, DecorationPatch) &&
  2136. (get_entry_point().model == ExecutionModelTessellationControl ||
  2137. get_entry_point().model == ExecutionModelTessellationEvaluation);
  2138. uint32_t old_array_size = 0;
  2139. bool old_array_size_literal = true;
  2140. if (control_point_input_array)
  2141. {
  2142. swap(type.array.back(), old_array_size);
  2143. swap(type.array_size_literal.back(), old_array_size_literal);
  2144. }
  2145. statement(layout_for_variable(var), to_qualifiers_glsl(var.self),
  2146. variable_decl(type, to_name(var.self), var.self), ";");
  2147. if (control_point_input_array)
  2148. {
  2149. swap(type.array.back(), old_array_size);
  2150. swap(type.array_size_literal.back(), old_array_size_literal);
  2151. }
  2152. }
  2153. }
  2154. }
  2155. void CompilerGLSL::emit_uniform(const SPIRVariable &var)
  2156. {
  2157. auto &type = get<SPIRType>(var.basetype);
  2158. if (type.basetype == SPIRType::Image && type.image.sampled == 2 && type.image.dim != DimSubpassData)
  2159. {
  2160. if (!options.es && options.version < 420)
  2161. require_extension_internal("GL_ARB_shader_image_load_store");
  2162. else if (options.es && options.version < 310)
  2163. SPIRV_CROSS_THROW("At least ESSL 3.10 required for shader image load store.");
  2164. }
  2165. add_resource_name(var.self);
  2166. statement(layout_for_variable(var), variable_decl(var), ";");
  2167. }
  2168. string CompilerGLSL::constant_value_macro_name(uint32_t id)
  2169. {
  2170. return join("SPIRV_CROSS_CONSTANT_ID_", id);
  2171. }
  2172. void CompilerGLSL::emit_specialization_constant_op(const SPIRConstantOp &constant)
  2173. {
  2174. auto &type = get<SPIRType>(constant.basetype);
  2175. auto name = to_name(constant.self);
  2176. statement("const ", variable_decl(type, name), " = ", constant_op_expression(constant), ";");
  2177. }
  2178. void CompilerGLSL::emit_constant(const SPIRConstant &constant)
  2179. {
  2180. auto &type = get<SPIRType>(constant.constant_type);
  2181. auto name = to_name(constant.self);
  2182. SpecializationConstant wg_x, wg_y, wg_z;
  2183. ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
  2184. // This specialization constant is implicitly declared by emitting layout() in;
  2185. if (constant.self == workgroup_size_id)
  2186. return;
  2187. // These specialization constants are implicitly declared by emitting layout() in;
  2188. // In legacy GLSL, we will still need to emit macros for these, so a layout() in; declaration
  2189. // later can use macro overrides for work group size.
  2190. bool is_workgroup_size_constant = ConstantID(constant.self) == wg_x.id || ConstantID(constant.self) == wg_y.id ||
  2191. ConstantID(constant.self) == wg_z.id;
  2192. if (options.vulkan_semantics && is_workgroup_size_constant)
  2193. {
  2194. // Vulkan GLSL does not need to declare workgroup spec constants explicitly, it is handled in layout().
  2195. return;
  2196. }
  2197. else if (!options.vulkan_semantics && is_workgroup_size_constant &&
  2198. !has_decoration(constant.self, DecorationSpecId))
  2199. {
  2200. // Only bother declaring a workgroup size if it is actually a specialization constant, because we need macros.
  2201. return;
  2202. }
  2203. // Only scalars have constant IDs.
  2204. if (has_decoration(constant.self, DecorationSpecId))
  2205. {
  2206. if (options.vulkan_semantics)
  2207. {
  2208. statement("layout(constant_id = ", get_decoration(constant.self, DecorationSpecId), ") const ",
  2209. variable_decl(type, name), " = ", constant_expression(constant), ";");
  2210. }
  2211. else
  2212. {
  2213. const string &macro_name = constant.specialization_constant_macro_name;
  2214. statement("#ifndef ", macro_name);
  2215. statement("#define ", macro_name, " ", constant_expression(constant));
  2216. statement("#endif");
  2217. // For workgroup size constants, only emit the macros.
  2218. if (!is_workgroup_size_constant)
  2219. statement("const ", variable_decl(type, name), " = ", macro_name, ";");
  2220. }
  2221. }
  2222. else
  2223. {
  2224. statement("const ", variable_decl(type, name), " = ", constant_expression(constant), ";");
  2225. }
  2226. }
  2227. void CompilerGLSL::emit_entry_point_declarations()
  2228. {
  2229. }
  2230. void CompilerGLSL::replace_illegal_names(const unordered_set<string> &keywords)
  2231. {
  2232. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
  2233. if (is_hidden_variable(var))
  2234. return;
  2235. auto *meta = ir.find_meta(var.self);
  2236. if (!meta)
  2237. return;
  2238. auto &m = meta->decoration;
  2239. if (keywords.find(m.alias) != end(keywords))
  2240. m.alias = join("_", m.alias);
  2241. });
  2242. ir.for_each_typed_id<SPIRFunction>([&](uint32_t, const SPIRFunction &func) {
  2243. auto *meta = ir.find_meta(func.self);
  2244. if (!meta)
  2245. return;
  2246. auto &m = meta->decoration;
  2247. if (keywords.find(m.alias) != end(keywords))
  2248. m.alias = join("_", m.alias);
  2249. });
  2250. ir.for_each_typed_id<SPIRType>([&](uint32_t, const SPIRType &type) {
  2251. auto *meta = ir.find_meta(type.self);
  2252. if (!meta)
  2253. return;
  2254. auto &m = meta->decoration;
  2255. if (keywords.find(m.alias) != end(keywords))
  2256. m.alias = join("_", m.alias);
  2257. for (auto &memb : meta->members)
  2258. if (keywords.find(memb.alias) != end(keywords))
  2259. memb.alias = join("_", memb.alias);
  2260. });
  2261. }
  2262. void CompilerGLSL::replace_illegal_names()
  2263. {
  2264. // clang-format off
  2265. static const unordered_set<string> keywords = {
  2266. "abs", "acos", "acosh", "all", "any", "asin", "asinh", "atan", "atanh",
  2267. "atomicAdd", "atomicCompSwap", "atomicCounter", "atomicCounterDecrement", "atomicCounterIncrement",
  2268. "atomicExchange", "atomicMax", "atomicMin", "atomicOr", "atomicXor",
  2269. "bitCount", "bitfieldExtract", "bitfieldInsert", "bitfieldReverse",
  2270. "ceil", "cos", "cosh", "cross", "degrees",
  2271. "dFdx", "dFdxCoarse", "dFdxFine",
  2272. "dFdy", "dFdyCoarse", "dFdyFine",
  2273. "distance", "dot", "EmitStreamVertex", "EmitVertex", "EndPrimitive", "EndStreamPrimitive", "equal", "exp", "exp2",
  2274. "faceforward", "findLSB", "findMSB", "float16BitsToInt16", "float16BitsToUint16", "floatBitsToInt", "floatBitsToUint", "floor", "fma", "fract",
  2275. "frexp", "fwidth", "fwidthCoarse", "fwidthFine",
  2276. "greaterThan", "greaterThanEqual", "groupMemoryBarrier",
  2277. "imageAtomicAdd", "imageAtomicAnd", "imageAtomicCompSwap", "imageAtomicExchange", "imageAtomicMax", "imageAtomicMin", "imageAtomicOr", "imageAtomicXor",
  2278. "imageLoad", "imageSamples", "imageSize", "imageStore", "imulExtended", "int16BitsToFloat16", "intBitsToFloat", "interpolateAtOffset", "interpolateAtCentroid", "interpolateAtSample",
  2279. "inverse", "inversesqrt", "isinf", "isnan", "ldexp", "length", "lessThan", "lessThanEqual", "log", "log2",
  2280. "matrixCompMult", "max", "memoryBarrier", "memoryBarrierAtomicCounter", "memoryBarrierBuffer", "memoryBarrierImage", "memoryBarrierShared",
  2281. "min", "mix", "mod", "modf", "noise", "noise1", "noise2", "noise3", "noise4", "normalize", "not", "notEqual",
  2282. "outerProduct", "packDouble2x32", "packHalf2x16", "packInt2x16", "packInt4x16", "packSnorm2x16", "packSnorm4x8",
  2283. "packUint2x16", "packUint4x16", "packUnorm2x16", "packUnorm4x8", "pow",
  2284. "radians", "reflect", "refract", "round", "roundEven", "sign", "sin", "sinh", "smoothstep", "sqrt", "step",
  2285. "tan", "tanh", "texelFetch", "texelFetchOffset", "texture", "textureGather", "textureGatherOffset", "textureGatherOffsets",
  2286. "textureGrad", "textureGradOffset", "textureLod", "textureLodOffset", "textureOffset", "textureProj", "textureProjGrad",
  2287. "textureProjGradOffset", "textureProjLod", "textureProjLodOffset", "textureProjOffset", "textureQueryLevels", "textureQueryLod", "textureSamples", "textureSize",
  2288. "transpose", "trunc", "uaddCarry", "uint16BitsToFloat16", "uintBitsToFloat", "umulExtended", "unpackDouble2x32", "unpackHalf2x16", "unpackInt2x16", "unpackInt4x16",
  2289. "unpackSnorm2x16", "unpackSnorm4x8", "unpackUint2x16", "unpackUint4x16", "unpackUnorm2x16", "unpackUnorm4x8", "usubBorrow",
  2290. "active", "asm", "atomic_uint", "attribute", "bool", "break", "buffer",
  2291. "bvec2", "bvec3", "bvec4", "case", "cast", "centroid", "class", "coherent", "common", "const", "continue", "default", "discard",
  2292. "dmat2", "dmat2x2", "dmat2x3", "dmat2x4", "dmat3", "dmat3x2", "dmat3x3", "dmat3x4", "dmat4", "dmat4x2", "dmat4x3", "dmat4x4",
  2293. "do", "double", "dvec2", "dvec3", "dvec4", "else", "enum", "extern", "external", "false", "filter", "fixed", "flat", "float",
  2294. "for", "fvec2", "fvec3", "fvec4", "goto", "half", "highp", "hvec2", "hvec3", "hvec4", "if", "iimage1D", "iimage1DArray",
  2295. "iimage2D", "iimage2DArray", "iimage2DMS", "iimage2DMSArray", "iimage2DRect", "iimage3D", "iimageBuffer", "iimageCube",
  2296. "iimageCubeArray", "image1D", "image1DArray", "image2D", "image2DArray", "image2DMS", "image2DMSArray", "image2DRect",
  2297. "image3D", "imageBuffer", "imageCube", "imageCubeArray", "in", "inline", "inout", "input", "int", "interface", "invariant",
  2298. "isampler1D", "isampler1DArray", "isampler2D", "isampler2DArray", "isampler2DMS", "isampler2DMSArray", "isampler2DRect",
  2299. "isampler3D", "isamplerBuffer", "isamplerCube", "isamplerCubeArray", "ivec2", "ivec3", "ivec4", "layout", "long", "lowp",
  2300. "mat2", "mat2x2", "mat2x3", "mat2x4", "mat3", "mat3x2", "mat3x3", "mat3x4", "mat4", "mat4x2", "mat4x3", "mat4x4", "mediump",
  2301. "namespace", "noinline", "noperspective", "out", "output", "packed", "partition", "patch", "precise", "precision", "public", "readonly",
  2302. "resource", "restrict", "return", "sample", "sampler1D", "sampler1DArray", "sampler1DArrayShadow",
  2303. "sampler1DShadow", "sampler2D", "sampler2DArray", "sampler2DArrayShadow", "sampler2DMS", "sampler2DMSArray",
  2304. "sampler2DRect", "sampler2DRectShadow", "sampler2DShadow", "sampler3D", "sampler3DRect", "samplerBuffer",
  2305. "samplerCube", "samplerCubeArray", "samplerCubeArrayShadow", "samplerCubeShadow", "shared", "short", "sizeof", "smooth", "static",
  2306. "struct", "subroutine", "superp", "switch", "template", "this", "true", "typedef", "uimage1D", "uimage1DArray", "uimage2D",
  2307. "uimage2DArray", "uimage2DMS", "uimage2DMSArray", "uimage2DRect", "uimage3D", "uimageBuffer", "uimageCube",
  2308. "uimageCubeArray", "uint", "uniform", "union", "unsigned", "usampler1D", "usampler1DArray", "usampler2D", "usampler2DArray",
  2309. "usampler2DMS", "usampler2DMSArray", "usampler2DRect", "usampler3D", "usamplerBuffer", "usamplerCube",
  2310. "usamplerCubeArray", "using", "uvec2", "uvec3", "uvec4", "varying", "vec2", "vec3", "vec4", "void", "volatile",
  2311. "while", "writeonly",
  2312. };
  2313. // clang-format on
  2314. replace_illegal_names(keywords);
  2315. }
  2316. void CompilerGLSL::replace_fragment_output(SPIRVariable &var)
  2317. {
  2318. auto &m = ir.meta[var.self].decoration;
  2319. uint32_t location = 0;
  2320. if (m.decoration_flags.get(DecorationLocation))
  2321. location = m.location;
  2322. // If our variable is arrayed, we must not emit the array part of this as the SPIR-V will
  2323. // do the access chain part of this for us.
  2324. auto &type = get<SPIRType>(var.basetype);
  2325. if (type.array.empty())
  2326. {
  2327. // Redirect the write to a specific render target in legacy GLSL.
  2328. m.alias = join("gl_FragData[", location, "]");
  2329. if (is_legacy_es() && location != 0)
  2330. require_extension_internal("GL_EXT_draw_buffers");
  2331. }
  2332. else if (type.array.size() == 1)
  2333. {
  2334. // If location is non-zero, we probably have to add an offset.
  2335. // This gets really tricky since we'd have to inject an offset in the access chain.
  2336. // FIXME: This seems like an extremely odd-ball case, so it's probably fine to leave it like this for now.
  2337. m.alias = "gl_FragData";
  2338. if (location != 0)
  2339. SPIRV_CROSS_THROW("Arrayed output variable used, but location is not 0. "
  2340. "This is unimplemented in SPIRV-Cross.");
  2341. if (is_legacy_es())
  2342. require_extension_internal("GL_EXT_draw_buffers");
  2343. }
  2344. else
  2345. SPIRV_CROSS_THROW("Array-of-array output variable used. This cannot be implemented in legacy GLSL.");
  2346. var.compat_builtin = true; // We don't want to declare this variable, but use the name as-is.
  2347. }
  2348. void CompilerGLSL::replace_fragment_outputs()
  2349. {
  2350. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  2351. auto &type = this->get<SPIRType>(var.basetype);
  2352. if (!is_builtin_variable(var) && !var.remapped_variable && type.pointer && var.storage == StorageClassOutput)
  2353. replace_fragment_output(var);
  2354. });
  2355. }
  2356. string CompilerGLSL::remap_swizzle(const SPIRType &out_type, uint32_t input_components, const string &expr)
  2357. {
  2358. if (out_type.vecsize == input_components)
  2359. return expr;
  2360. else if (input_components == 1 && !backend.can_swizzle_scalar)
  2361. return join(type_to_glsl(out_type), "(", expr, ")");
  2362. else
  2363. {
  2364. // FIXME: This will not work with packed expressions.
  2365. auto e = enclose_expression(expr) + ".";
  2366. // Just clamp the swizzle index if we have more outputs than inputs.
  2367. for (uint32_t c = 0; c < out_type.vecsize; c++)
  2368. e += index_to_swizzle(min(c, input_components - 1));
  2369. if (backend.swizzle_is_function && out_type.vecsize > 1)
  2370. e += "()";
  2371. remove_duplicate_swizzle(e);
  2372. return e;
  2373. }
  2374. }
  2375. void CompilerGLSL::emit_pls()
  2376. {
  2377. auto &execution = get_entry_point();
  2378. if (execution.model != ExecutionModelFragment)
  2379. SPIRV_CROSS_THROW("Pixel local storage only supported in fragment shaders.");
  2380. if (!options.es)
  2381. SPIRV_CROSS_THROW("Pixel local storage only supported in OpenGL ES.");
  2382. if (options.version < 300)
  2383. SPIRV_CROSS_THROW("Pixel local storage only supported in ESSL 3.0 and above.");
  2384. if (!pls_inputs.empty())
  2385. {
  2386. statement("__pixel_local_inEXT _PLSIn");
  2387. begin_scope();
  2388. for (auto &input : pls_inputs)
  2389. statement(pls_decl(input), ";");
  2390. end_scope_decl();
  2391. statement("");
  2392. }
  2393. if (!pls_outputs.empty())
  2394. {
  2395. statement("__pixel_local_outEXT _PLSOut");
  2396. begin_scope();
  2397. for (auto &output : pls_outputs)
  2398. statement(pls_decl(output), ";");
  2399. end_scope_decl();
  2400. statement("");
  2401. }
  2402. }
  2403. void CompilerGLSL::fixup_image_load_store_access()
  2404. {
  2405. if (!options.enable_storage_image_qualifier_deduction)
  2406. return;
  2407. ir.for_each_typed_id<SPIRVariable>([&](uint32_t var, const SPIRVariable &) {
  2408. auto &vartype = expression_type(var);
  2409. if (vartype.basetype == SPIRType::Image && vartype.image.sampled == 2)
  2410. {
  2411. // Very old glslangValidator and HLSL compilers do not emit required qualifiers here.
  2412. // Solve this by making the image access as restricted as possible and loosen up if we need to.
  2413. // If any no-read/no-write flags are actually set, assume that the compiler knows what it's doing.
  2414. auto &flags = ir.meta[var].decoration.decoration_flags;
  2415. if (!flags.get(DecorationNonWritable) && !flags.get(DecorationNonReadable))
  2416. {
  2417. flags.set(DecorationNonWritable);
  2418. flags.set(DecorationNonReadable);
  2419. }
  2420. }
  2421. });
  2422. }
  2423. static bool is_block_builtin(BuiltIn builtin)
  2424. {
  2425. return builtin == BuiltInPosition || builtin == BuiltInPointSize || builtin == BuiltInClipDistance ||
  2426. builtin == BuiltInCullDistance;
  2427. }
  2428. bool CompilerGLSL::should_force_emit_builtin_block(StorageClass storage)
  2429. {
  2430. // If the builtin block uses XFB, we need to force explicit redeclaration of the builtin block.
  2431. if (storage != StorageClassOutput)
  2432. return false;
  2433. bool should_force = false;
  2434. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  2435. if (should_force)
  2436. return;
  2437. auto &type = this->get<SPIRType>(var.basetype);
  2438. bool block = has_decoration(type.self, DecorationBlock);
  2439. if (var.storage == storage && block && is_builtin_variable(var))
  2440. {
  2441. uint32_t member_count = uint32_t(type.member_types.size());
  2442. for (uint32_t i = 0; i < member_count; i++)
  2443. {
  2444. if (has_member_decoration(type.self, i, DecorationBuiltIn) &&
  2445. is_block_builtin(BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn))) &&
  2446. has_member_decoration(type.self, i, DecorationOffset))
  2447. {
  2448. should_force = true;
  2449. }
  2450. }
  2451. }
  2452. else if (var.storage == storage && !block && is_builtin_variable(var))
  2453. {
  2454. if (is_block_builtin(BuiltIn(get_decoration(type.self, DecorationBuiltIn))) &&
  2455. has_decoration(var.self, DecorationOffset))
  2456. {
  2457. should_force = true;
  2458. }
  2459. }
  2460. });
  2461. // If we're declaring clip/cull planes with control points we need to force block declaration.
  2462. if (get_execution_model() == ExecutionModelTessellationControl &&
  2463. (clip_distance_count || cull_distance_count))
  2464. {
  2465. should_force = true;
  2466. }
  2467. return should_force;
  2468. }
  2469. void CompilerGLSL::fixup_implicit_builtin_block_names()
  2470. {
  2471. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  2472. auto &type = this->get<SPIRType>(var.basetype);
  2473. bool block = has_decoration(type.self, DecorationBlock);
  2474. if ((var.storage == StorageClassOutput || var.storage == StorageClassInput) && block &&
  2475. is_builtin_variable(var))
  2476. {
  2477. // Make sure the array has a supported name in the code.
  2478. if (var.storage == StorageClassOutput)
  2479. set_name(var.self, "gl_out");
  2480. else if (var.storage == StorageClassInput)
  2481. set_name(var.self, "gl_in");
  2482. }
  2483. });
  2484. }
  2485. void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionModel model)
  2486. {
  2487. Bitset emitted_builtins;
  2488. Bitset global_builtins;
  2489. const SPIRVariable *block_var = nullptr;
  2490. bool emitted_block = false;
  2491. bool builtin_array = false;
  2492. // Need to use declared size in the type.
  2493. // These variables might have been declared, but not statically used, so we haven't deduced their size yet.
  2494. uint32_t cull_distance_size = 0;
  2495. uint32_t clip_distance_size = 0;
  2496. bool have_xfb_buffer_stride = false;
  2497. bool have_geom_stream = false;
  2498. bool have_any_xfb_offset = false;
  2499. uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0;
  2500. std::unordered_map<uint32_t, uint32_t> builtin_xfb_offsets;
  2501. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  2502. auto &type = this->get<SPIRType>(var.basetype);
  2503. bool block = has_decoration(type.self, DecorationBlock);
  2504. Bitset builtins;
  2505. if (var.storage == storage && block && is_builtin_variable(var))
  2506. {
  2507. uint32_t index = 0;
  2508. for (auto &m : ir.meta[type.self].members)
  2509. {
  2510. if (m.builtin)
  2511. {
  2512. builtins.set(m.builtin_type);
  2513. if (m.builtin_type == BuiltInCullDistance)
  2514. cull_distance_size = to_array_size_literal(this->get<SPIRType>(type.member_types[index]));
  2515. else if (m.builtin_type == BuiltInClipDistance)
  2516. clip_distance_size = to_array_size_literal(this->get<SPIRType>(type.member_types[index]));
  2517. if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationOffset))
  2518. {
  2519. have_any_xfb_offset = true;
  2520. builtin_xfb_offsets[m.builtin_type] = m.offset;
  2521. }
  2522. if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationStream))
  2523. {
  2524. uint32_t stream = m.stream;
  2525. if (have_geom_stream && geom_stream != stream)
  2526. SPIRV_CROSS_THROW("IO block member Stream mismatch.");
  2527. have_geom_stream = true;
  2528. geom_stream = stream;
  2529. }
  2530. }
  2531. index++;
  2532. }
  2533. if (storage == StorageClassOutput && has_decoration(var.self, DecorationXfbBuffer) &&
  2534. has_decoration(var.self, DecorationXfbStride))
  2535. {
  2536. uint32_t buffer_index = get_decoration(var.self, DecorationXfbBuffer);
  2537. uint32_t stride = get_decoration(var.self, DecorationXfbStride);
  2538. if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
  2539. SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
  2540. if (have_xfb_buffer_stride && stride != xfb_stride)
  2541. SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
  2542. have_xfb_buffer_stride = true;
  2543. xfb_buffer = buffer_index;
  2544. xfb_stride = stride;
  2545. }
  2546. if (storage == StorageClassOutput && has_decoration(var.self, DecorationStream))
  2547. {
  2548. uint32_t stream = get_decoration(var.self, DecorationStream);
  2549. if (have_geom_stream && geom_stream != stream)
  2550. SPIRV_CROSS_THROW("IO block member Stream mismatch.");
  2551. have_geom_stream = true;
  2552. geom_stream = stream;
  2553. }
  2554. }
  2555. else if (var.storage == storage && !block && is_builtin_variable(var))
  2556. {
  2557. // While we're at it, collect all declared global builtins (HLSL mostly ...).
  2558. auto &m = ir.meta[var.self].decoration;
  2559. if (m.builtin)
  2560. {
  2561. global_builtins.set(m.builtin_type);
  2562. if (m.builtin_type == BuiltInCullDistance)
  2563. cull_distance_size = to_array_size_literal(type);
  2564. else if (m.builtin_type == BuiltInClipDistance)
  2565. clip_distance_size = to_array_size_literal(type);
  2566. if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationXfbStride) &&
  2567. m.decoration_flags.get(DecorationXfbBuffer) && m.decoration_flags.get(DecorationOffset))
  2568. {
  2569. have_any_xfb_offset = true;
  2570. builtin_xfb_offsets[m.builtin_type] = m.offset;
  2571. uint32_t buffer_index = m.xfb_buffer;
  2572. uint32_t stride = m.xfb_stride;
  2573. if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
  2574. SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
  2575. if (have_xfb_buffer_stride && stride != xfb_stride)
  2576. SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
  2577. have_xfb_buffer_stride = true;
  2578. xfb_buffer = buffer_index;
  2579. xfb_stride = stride;
  2580. }
  2581. if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationStream))
  2582. {
  2583. uint32_t stream = get_decoration(var.self, DecorationStream);
  2584. if (have_geom_stream && geom_stream != stream)
  2585. SPIRV_CROSS_THROW("IO block member Stream mismatch.");
  2586. have_geom_stream = true;
  2587. geom_stream = stream;
  2588. }
  2589. }
  2590. }
  2591. if (builtins.empty())
  2592. return;
  2593. if (emitted_block)
  2594. SPIRV_CROSS_THROW("Cannot use more than one builtin I/O block.");
  2595. emitted_builtins = builtins;
  2596. emitted_block = true;
  2597. builtin_array = !type.array.empty();
  2598. block_var = &var;
  2599. });
  2600. global_builtins =
  2601. Bitset(global_builtins.get_lower() & ((1ull << BuiltInPosition) | (1ull << BuiltInPointSize) |
  2602. (1ull << BuiltInClipDistance) | (1ull << BuiltInCullDistance)));
  2603. // Try to collect all other declared builtins.
  2604. if (!emitted_block)
  2605. emitted_builtins = global_builtins;
  2606. // Can't declare an empty interface block.
  2607. if (emitted_builtins.empty())
  2608. return;
  2609. if (storage == StorageClassOutput)
  2610. {
  2611. SmallVector<string> attr;
  2612. if (have_xfb_buffer_stride && have_any_xfb_offset)
  2613. {
  2614. if (!options.es)
  2615. {
  2616. if (options.version < 440 && options.version >= 140)
  2617. require_extension_internal("GL_ARB_enhanced_layouts");
  2618. else if (options.version < 140)
  2619. SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40.");
  2620. if (!options.es && options.version < 440)
  2621. require_extension_internal("GL_ARB_enhanced_layouts");
  2622. }
  2623. else if (options.es)
  2624. SPIRV_CROSS_THROW("Need GL_ARB_enhanced_layouts for xfb_stride or xfb_buffer.");
  2625. attr.push_back(join("xfb_buffer = ", xfb_buffer, ", xfb_stride = ", xfb_stride));
  2626. }
  2627. if (have_geom_stream)
  2628. {
  2629. if (get_execution_model() != ExecutionModelGeometry)
  2630. SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
  2631. if (options.es)
  2632. SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
  2633. if (options.version < 400)
  2634. require_extension_internal("GL_ARB_transform_feedback3");
  2635. attr.push_back(join("stream = ", geom_stream));
  2636. }
  2637. if (!attr.empty())
  2638. statement("layout(", merge(attr), ") out gl_PerVertex");
  2639. else
  2640. statement("out gl_PerVertex");
  2641. }
  2642. else
  2643. {
  2644. // If we have passthrough, there is no way PerVertex cannot be passthrough.
  2645. if (get_entry_point().geometry_passthrough)
  2646. statement("layout(passthrough) in gl_PerVertex");
  2647. else
  2648. statement("in gl_PerVertex");
  2649. }
  2650. begin_scope();
  2651. if (emitted_builtins.get(BuiltInPosition))
  2652. {
  2653. auto itr = builtin_xfb_offsets.find(BuiltInPosition);
  2654. if (itr != end(builtin_xfb_offsets))
  2655. statement("layout(xfb_offset = ", itr->second, ") vec4 gl_Position;");
  2656. else
  2657. statement("vec4 gl_Position;");
  2658. }
  2659. if (emitted_builtins.get(BuiltInPointSize))
  2660. {
  2661. auto itr = builtin_xfb_offsets.find(BuiltInPointSize);
  2662. if (itr != end(builtin_xfb_offsets))
  2663. statement("layout(xfb_offset = ", itr->second, ") float gl_PointSize;");
  2664. else
  2665. statement("float gl_PointSize;");
  2666. }
  2667. if (emitted_builtins.get(BuiltInClipDistance))
  2668. {
  2669. auto itr = builtin_xfb_offsets.find(BuiltInClipDistance);
  2670. if (itr != end(builtin_xfb_offsets))
  2671. statement("layout(xfb_offset = ", itr->second, ") float gl_ClipDistance[", clip_distance_size, "];");
  2672. else
  2673. statement("float gl_ClipDistance[", clip_distance_size, "];");
  2674. }
  2675. if (emitted_builtins.get(BuiltInCullDistance))
  2676. {
  2677. auto itr = builtin_xfb_offsets.find(BuiltInCullDistance);
  2678. if (itr != end(builtin_xfb_offsets))
  2679. statement("layout(xfb_offset = ", itr->second, ") float gl_CullDistance[", cull_distance_size, "];");
  2680. else
  2681. statement("float gl_CullDistance[", cull_distance_size, "];");
  2682. }
  2683. if (builtin_array)
  2684. {
  2685. if (model == ExecutionModelTessellationControl && storage == StorageClassOutput)
  2686. end_scope_decl(join(to_name(block_var->self), "[", get_entry_point().output_vertices, "]"));
  2687. else
  2688. end_scope_decl(join(to_name(block_var->self), "[]"));
  2689. }
  2690. else
  2691. end_scope_decl();
  2692. statement("");
  2693. }
  2694. void CompilerGLSL::declare_undefined_values()
  2695. {
  2696. bool emitted = false;
  2697. ir.for_each_typed_id<SPIRUndef>([&](uint32_t, const SPIRUndef &undef) {
  2698. auto &type = this->get<SPIRType>(undef.basetype);
  2699. // OpUndef can be void for some reason ...
  2700. if (type.basetype == SPIRType::Void)
  2701. return;
  2702. string initializer;
  2703. if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
  2704. initializer = join(" = ", to_zero_initialized_expression(undef.basetype));
  2705. statement(variable_decl(type, to_name(undef.self), undef.self), initializer, ";");
  2706. emitted = true;
  2707. });
  2708. if (emitted)
  2709. statement("");
  2710. }
  2711. bool CompilerGLSL::variable_is_lut(const SPIRVariable &var) const
  2712. {
  2713. bool statically_assigned = var.statically_assigned && var.static_expression != ID(0) && var.remapped_variable;
  2714. if (statically_assigned)
  2715. {
  2716. auto *constant = maybe_get<SPIRConstant>(var.static_expression);
  2717. if (constant && constant->is_used_as_lut)
  2718. return true;
  2719. }
  2720. return false;
  2721. }
  2722. void CompilerGLSL::emit_resources()
  2723. {
  2724. auto &execution = get_entry_point();
  2725. replace_illegal_names();
  2726. // Legacy GL uses gl_FragData[], redeclare all fragment outputs
  2727. // with builtins.
  2728. if (execution.model == ExecutionModelFragment && is_legacy())
  2729. replace_fragment_outputs();
  2730. // Emit PLS blocks if we have such variables.
  2731. if (!pls_inputs.empty() || !pls_outputs.empty())
  2732. emit_pls();
  2733. switch (execution.model)
  2734. {
  2735. case ExecutionModelGeometry:
  2736. case ExecutionModelTessellationControl:
  2737. case ExecutionModelTessellationEvaluation:
  2738. fixup_implicit_builtin_block_names();
  2739. break;
  2740. default:
  2741. break;
  2742. }
  2743. // Emit custom gl_PerVertex for SSO compatibility.
  2744. if (options.separate_shader_objects && !options.es && execution.model != ExecutionModelFragment)
  2745. {
  2746. switch (execution.model)
  2747. {
  2748. case ExecutionModelGeometry:
  2749. case ExecutionModelTessellationControl:
  2750. case ExecutionModelTessellationEvaluation:
  2751. emit_declared_builtin_block(StorageClassInput, execution.model);
  2752. emit_declared_builtin_block(StorageClassOutput, execution.model);
  2753. break;
  2754. case ExecutionModelVertex:
  2755. emit_declared_builtin_block(StorageClassOutput, execution.model);
  2756. break;
  2757. default:
  2758. break;
  2759. }
  2760. }
  2761. else if (should_force_emit_builtin_block(StorageClassOutput))
  2762. {
  2763. emit_declared_builtin_block(StorageClassOutput, execution.model);
  2764. }
  2765. else if (execution.geometry_passthrough)
  2766. {
  2767. // Need to declare gl_in with Passthrough.
  2768. // If we're doing passthrough, we cannot emit an output block, so the output block test above will never pass.
  2769. emit_declared_builtin_block(StorageClassInput, execution.model);
  2770. }
  2771. else
  2772. {
  2773. // Need to redeclare clip/cull distance with explicit size to use them.
  2774. // SPIR-V mandates these builtins have a size declared.
  2775. const char *storage = execution.model == ExecutionModelFragment ? "in" : "out";
  2776. if (clip_distance_count != 0)
  2777. statement(storage, " float gl_ClipDistance[", clip_distance_count, "];");
  2778. if (cull_distance_count != 0)
  2779. statement(storage, " float gl_CullDistance[", cull_distance_count, "];");
  2780. if (clip_distance_count != 0 || cull_distance_count != 0)
  2781. statement("");
  2782. }
  2783. if (position_invariant)
  2784. {
  2785. statement("invariant gl_Position;");
  2786. statement("");
  2787. }
  2788. bool emitted = false;
  2789. // If emitted Vulkan GLSL,
  2790. // emit specialization constants as actual floats,
  2791. // spec op expressions will redirect to the constant name.
  2792. //
  2793. {
  2794. auto loop_lock = ir.create_loop_hard_lock();
  2795. for (auto &id_ : ir.ids_for_constant_or_type)
  2796. {
  2797. auto &id = ir.ids[id_];
  2798. if (id.get_type() == TypeConstant)
  2799. {
  2800. auto &c = id.get<SPIRConstant>();
  2801. bool needs_declaration = c.specialization || c.is_used_as_lut;
  2802. if (needs_declaration)
  2803. {
  2804. if (!options.vulkan_semantics && c.specialization)
  2805. {
  2806. c.specialization_constant_macro_name =
  2807. constant_value_macro_name(get_decoration(c.self, DecorationSpecId));
  2808. }
  2809. emit_constant(c);
  2810. emitted = true;
  2811. }
  2812. }
  2813. else if (id.get_type() == TypeConstantOp)
  2814. {
  2815. emit_specialization_constant_op(id.get<SPIRConstantOp>());
  2816. emitted = true;
  2817. }
  2818. else if (id.get_type() == TypeType)
  2819. {
  2820. auto *type = &id.get<SPIRType>();
  2821. bool is_natural_struct = type->basetype == SPIRType::Struct && type->array.empty() && !type->pointer &&
  2822. (!has_decoration(type->self, DecorationBlock) &&
  2823. !has_decoration(type->self, DecorationBufferBlock));
  2824. // Special case, ray payload and hit attribute blocks are not really blocks, just regular structs.
  2825. if (type->basetype == SPIRType::Struct && type->pointer &&
  2826. has_decoration(type->self, DecorationBlock) &&
  2827. (type->storage == StorageClassRayPayloadKHR || type->storage == StorageClassIncomingRayPayloadKHR ||
  2828. type->storage == StorageClassHitAttributeKHR))
  2829. {
  2830. type = &get<SPIRType>(type->parent_type);
  2831. is_natural_struct = true;
  2832. }
  2833. if (is_natural_struct)
  2834. {
  2835. if (emitted)
  2836. statement("");
  2837. emitted = false;
  2838. emit_struct(*type);
  2839. }
  2840. }
  2841. }
  2842. }
  2843. if (emitted)
  2844. statement("");
  2845. // If we needed to declare work group size late, check here.
  2846. // If the work group size depends on a specialization constant, we need to declare the layout() block
  2847. // after constants (and their macros) have been declared.
  2848. if (execution.model == ExecutionModelGLCompute && !options.vulkan_semantics &&
  2849. execution.workgroup_size.constant != 0)
  2850. {
  2851. SpecializationConstant wg_x, wg_y, wg_z;
  2852. get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
  2853. if ((wg_x.id != ConstantID(0)) || (wg_y.id != ConstantID(0)) || (wg_z.id != ConstantID(0)))
  2854. {
  2855. SmallVector<string> inputs;
  2856. build_workgroup_size(inputs, wg_x, wg_y, wg_z);
  2857. statement("layout(", merge(inputs), ") in;");
  2858. statement("");
  2859. }
  2860. }
  2861. emitted = false;
  2862. if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
  2863. {
  2864. for (auto type : physical_storage_non_block_pointer_types)
  2865. {
  2866. emit_buffer_reference_block(get<SPIRType>(type), false);
  2867. }
  2868. // Output buffer reference blocks.
  2869. // Do this in two stages, one with forward declaration,
  2870. // and one without. Buffer reference blocks can reference themselves
  2871. // to support things like linked lists.
  2872. ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) {
  2873. bool has_block_flags = has_decoration(type.self, DecorationBlock);
  2874. if (has_block_flags && type.pointer && type.pointer_depth == 1 && !type_is_array_of_pointers(type) &&
  2875. type.storage == StorageClassPhysicalStorageBufferEXT)
  2876. {
  2877. emit_buffer_reference_block(type, true);
  2878. }
  2879. });
  2880. ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) {
  2881. bool has_block_flags = has_decoration(type.self, DecorationBlock);
  2882. if (has_block_flags && type.pointer && type.pointer_depth == 1 && !type_is_array_of_pointers(type) &&
  2883. type.storage == StorageClassPhysicalStorageBufferEXT)
  2884. {
  2885. emit_buffer_reference_block(type, false);
  2886. }
  2887. });
  2888. }
  2889. // Output UBOs and SSBOs
  2890. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  2891. auto &type = this->get<SPIRType>(var.basetype);
  2892. bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform ||
  2893. type.storage == StorageClassShaderRecordBufferKHR;
  2894. bool has_block_flags = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
  2895. ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
  2896. if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) &&
  2897. has_block_flags)
  2898. {
  2899. emit_buffer_block(var);
  2900. }
  2901. });
  2902. // Output push constant blocks
  2903. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  2904. auto &type = this->get<SPIRType>(var.basetype);
  2905. if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassPushConstant &&
  2906. !is_hidden_variable(var))
  2907. {
  2908. emit_push_constant_block(var);
  2909. }
  2910. });
  2911. bool skip_separate_image_sampler = !combined_image_samplers.empty() || !options.vulkan_semantics;
  2912. // Output Uniform Constants (values, samplers, images, etc).
  2913. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  2914. auto &type = this->get<SPIRType>(var.basetype);
  2915. // If we're remapping separate samplers and images, only emit the combined samplers.
  2916. if (skip_separate_image_sampler)
  2917. {
  2918. // Sampler buffers are always used without a sampler, and they will also work in regular GL.
  2919. bool sampler_buffer = type.basetype == SPIRType::Image && type.image.dim == DimBuffer;
  2920. bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1;
  2921. bool separate_sampler = type.basetype == SPIRType::Sampler;
  2922. if (!sampler_buffer && (separate_image || separate_sampler))
  2923. return;
  2924. }
  2925. if (var.storage != StorageClassFunction && type.pointer &&
  2926. (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter ||
  2927. type.storage == StorageClassRayPayloadKHR || type.storage == StorageClassIncomingRayPayloadKHR ||
  2928. type.storage == StorageClassCallableDataKHR || type.storage == StorageClassIncomingCallableDataKHR ||
  2929. type.storage == StorageClassHitAttributeKHR) &&
  2930. !is_hidden_variable(var))
  2931. {
  2932. emit_uniform(var);
  2933. emitted = true;
  2934. }
  2935. });
  2936. if (emitted)
  2937. statement("");
  2938. emitted = false;
  2939. bool emitted_base_instance = false;
  2940. // Output in/out interfaces.
  2941. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  2942. auto &type = this->get<SPIRType>(var.basetype);
  2943. bool is_hidden = is_hidden_variable(var);
  2944. // Unused output I/O variables might still be required to implement framebuffer fetch.
  2945. if (var.storage == StorageClassOutput && !is_legacy() &&
  2946. inout_color_attachments.count(get_decoration(var.self, DecorationLocation)) != 0)
  2947. {
  2948. is_hidden = false;
  2949. }
  2950. if (var.storage != StorageClassFunction && type.pointer &&
  2951. (var.storage == StorageClassInput || var.storage == StorageClassOutput) &&
  2952. interface_variable_exists_in_entry_point(var.self) && !is_hidden)
  2953. {
  2954. emit_interface_block(var);
  2955. emitted = true;
  2956. }
  2957. else if (is_builtin_variable(var))
  2958. {
  2959. auto builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
  2960. // For gl_InstanceIndex emulation on GLES, the API user needs to
  2961. // supply this uniform.
  2962. // The draw parameter extension is soft-enabled on GL with some fallbacks.
  2963. if (!options.vulkan_semantics)
  2964. {
  2965. if (!emitted_base_instance &&
  2966. ((options.vertex.support_nonzero_base_instance && builtin == BuiltInInstanceIndex) ||
  2967. (builtin == BuiltInBaseInstance)))
  2968. {
  2969. statement("#ifdef GL_ARB_shader_draw_parameters");
  2970. statement("#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB");
  2971. statement("#else");
  2972. // A crude, but simple workaround which should be good enough for non-indirect draws.
  2973. statement("uniform int SPIRV_Cross_BaseInstance;");
  2974. statement("#endif");
  2975. emitted = true;
  2976. emitted_base_instance = true;
  2977. }
  2978. else if (builtin == BuiltInBaseVertex)
  2979. {
  2980. statement("#ifdef GL_ARB_shader_draw_parameters");
  2981. statement("#define SPIRV_Cross_BaseVertex gl_BaseVertexARB");
  2982. statement("#else");
  2983. // A crude, but simple workaround which should be good enough for non-indirect draws.
  2984. statement("uniform int SPIRV_Cross_BaseVertex;");
  2985. statement("#endif");
  2986. }
  2987. else if (builtin == BuiltInDrawIndex)
  2988. {
  2989. statement("#ifndef GL_ARB_shader_draw_parameters");
  2990. // Cannot really be worked around.
  2991. statement("#error GL_ARB_shader_draw_parameters is not supported.");
  2992. statement("#endif");
  2993. }
  2994. }
  2995. }
  2996. });
  2997. // Global variables.
  2998. for (auto global : global_variables)
  2999. {
  3000. auto &var = get<SPIRVariable>(global);
  3001. if (is_hidden_variable(var, true))
  3002. continue;
  3003. if (var.storage != StorageClassOutput)
  3004. {
  3005. if (!variable_is_lut(var))
  3006. {
  3007. add_resource_name(var.self);
  3008. string initializer;
  3009. if (options.force_zero_initialized_variables && var.storage == StorageClassPrivate &&
  3010. !var.initializer && !var.static_expression && type_can_zero_initialize(get_variable_data_type(var)))
  3011. {
  3012. initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(var)));
  3013. }
  3014. statement(variable_decl(var), initializer, ";");
  3015. emitted = true;
  3016. }
  3017. }
  3018. else if (var.initializer && maybe_get<SPIRConstant>(var.initializer) != nullptr)
  3019. {
  3020. emit_output_variable_initializer(var);
  3021. }
  3022. }
  3023. if (emitted)
  3024. statement("");
  3025. declare_undefined_values();
  3026. }
  3027. void CompilerGLSL::emit_output_variable_initializer(const SPIRVariable &var)
  3028. {
  3029. // If a StorageClassOutput variable has an initializer, we need to initialize it in main().
  3030. auto &entry_func = this->get<SPIRFunction>(ir.default_entry_point);
  3031. auto &type = get<SPIRType>(var.basetype);
  3032. bool is_patch = has_decoration(var.self, DecorationPatch);
  3033. bool is_block = has_decoration(type.self, DecorationBlock);
  3034. bool is_control_point = get_execution_model() == ExecutionModelTessellationControl && !is_patch;
  3035. if (is_block)
  3036. {
  3037. uint32_t member_count = uint32_t(type.member_types.size());
  3038. bool type_is_array = type.array.size() == 1;
  3039. uint32_t array_size = 1;
  3040. if (type_is_array)
  3041. array_size = to_array_size_literal(type);
  3042. uint32_t iteration_count = is_control_point ? 1 : array_size;
  3043. // If the initializer is a block, we must initialize each block member one at a time.
  3044. for (uint32_t i = 0; i < member_count; i++)
  3045. {
  3046. // These outputs might not have been properly declared, so don't initialize them in that case.
  3047. if (has_member_decoration(type.self, i, DecorationBuiltIn))
  3048. {
  3049. if (get_member_decoration(type.self, i, DecorationBuiltIn) == BuiltInCullDistance &&
  3050. !cull_distance_count)
  3051. continue;
  3052. if (get_member_decoration(type.self, i, DecorationBuiltIn) == BuiltInClipDistance &&
  3053. !clip_distance_count)
  3054. continue;
  3055. }
  3056. // We need to build a per-member array first, essentially transposing from AoS to SoA.
  3057. // This code path hits when we have an array of blocks.
  3058. string lut_name;
  3059. if (type_is_array)
  3060. {
  3061. lut_name = join("_", var.self, "_", i, "_init");
  3062. uint32_t member_type_id = get<SPIRType>(var.basetype).member_types[i];
  3063. auto &member_type = get<SPIRType>(member_type_id);
  3064. auto array_type = member_type;
  3065. array_type.parent_type = member_type_id;
  3066. array_type.array.push_back(array_size);
  3067. array_type.array_size_literal.push_back(true);
  3068. SmallVector<string> exprs;
  3069. exprs.reserve(array_size);
  3070. auto &c = get<SPIRConstant>(var.initializer);
  3071. for (uint32_t j = 0; j < array_size; j++)
  3072. exprs.push_back(to_expression(get<SPIRConstant>(c.subconstants[j]).subconstants[i]));
  3073. statement("const ", type_to_glsl(array_type), " ", lut_name, type_to_array_glsl(array_type), " = ",
  3074. type_to_glsl_constructor(array_type), "(", merge(exprs, ", "), ");");
  3075. }
  3076. for (uint32_t j = 0; j < iteration_count; j++)
  3077. {
  3078. entry_func.fixup_hooks_in.push_back([=, &var]() {
  3079. AccessChainMeta meta;
  3080. auto &c = this->get<SPIRConstant>(var.initializer);
  3081. uint32_t invocation_id = 0;
  3082. uint32_t member_index_id = 0;
  3083. if (is_control_point)
  3084. {
  3085. uint32_t ids = ir.increase_bound_by(3);
  3086. SPIRType uint_type;
  3087. uint_type.basetype = SPIRType::UInt;
  3088. uint_type.width = 32;
  3089. set<SPIRType>(ids, uint_type);
  3090. set<SPIRExpression>(ids + 1, builtin_to_glsl(BuiltInInvocationId, StorageClassInput), ids, true);
  3091. set<SPIRConstant>(ids + 2, ids, i, false);
  3092. invocation_id = ids + 1;
  3093. member_index_id = ids + 2;
  3094. }
  3095. if (is_patch)
  3096. {
  3097. statement("if (gl_InvocationID == 0)");
  3098. begin_scope();
  3099. }
  3100. if (type_is_array && !is_control_point)
  3101. {
  3102. uint32_t indices[2] = { j, i };
  3103. auto chain = access_chain_internal(var.self, indices, 2, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta);
  3104. statement(chain, " = ", lut_name, "[", j, "];");
  3105. }
  3106. else if (is_control_point)
  3107. {
  3108. uint32_t indices[2] = { invocation_id, member_index_id };
  3109. auto chain = access_chain_internal(var.self, indices, 2, 0, &meta);
  3110. statement(chain, " = ", lut_name, "[", builtin_to_glsl(BuiltInInvocationId, StorageClassInput), "];");
  3111. }
  3112. else
  3113. {
  3114. auto chain =
  3115. access_chain_internal(var.self, &i, 1, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta);
  3116. statement(chain, " = ", to_expression(c.subconstants[i]), ";");
  3117. }
  3118. if (is_patch)
  3119. end_scope();
  3120. });
  3121. }
  3122. }
  3123. }
  3124. else if (is_control_point)
  3125. {
  3126. auto lut_name = join("_", var.self, "_init");
  3127. statement("const ", type_to_glsl(type), " ", lut_name, type_to_array_glsl(type),
  3128. " = ", to_expression(var.initializer), ";");
  3129. entry_func.fixup_hooks_in.push_back([&, lut_name]() {
  3130. statement(to_expression(var.self), "[gl_InvocationID] = ", lut_name, "[gl_InvocationID];");
  3131. });
  3132. }
  3133. else
  3134. {
  3135. auto lut_name = join("_", var.self, "_init");
  3136. statement("const ", type_to_glsl(type), " ", lut_name,
  3137. type_to_array_glsl(type), " = ", to_expression(var.initializer), ";");
  3138. entry_func.fixup_hooks_in.push_back([&, lut_name, is_patch]() {
  3139. if (is_patch)
  3140. {
  3141. statement("if (gl_InvocationID == 0)");
  3142. begin_scope();
  3143. }
  3144. statement(to_expression(var.self), " = ", lut_name, ";");
  3145. if (is_patch)
  3146. end_scope();
  3147. });
  3148. }
  3149. }
  3150. void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model)
  3151. {
  3152. static const char *workaround_types[] = { "int", "ivec2", "ivec3", "ivec4", "uint", "uvec2", "uvec3", "uvec4",
  3153. "float", "vec2", "vec3", "vec4", "double", "dvec2", "dvec3", "dvec4" };
  3154. if (!options.vulkan_semantics)
  3155. {
  3156. using Supp = ShaderSubgroupSupportHelper;
  3157. auto result = shader_subgroup_supporter.resolve();
  3158. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupMask))
  3159. {
  3160. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupMask, result);
  3161. for (auto &e : exts)
  3162. {
  3163. const char *name = Supp::get_extension_name(e);
  3164. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3165. switch (e)
  3166. {
  3167. case Supp::NV_shader_thread_group:
  3168. statement("#define gl_SubgroupEqMask uvec4(gl_ThreadEqMaskNV, 0u, 0u, 0u)");
  3169. statement("#define gl_SubgroupGeMask uvec4(gl_ThreadGeMaskNV, 0u, 0u, 0u)");
  3170. statement("#define gl_SubgroupGtMask uvec4(gl_ThreadGtMaskNV, 0u, 0u, 0u)");
  3171. statement("#define gl_SubgroupLeMask uvec4(gl_ThreadLeMaskNV, 0u, 0u, 0u)");
  3172. statement("#define gl_SubgroupLtMask uvec4(gl_ThreadLtMaskNV, 0u, 0u, 0u)");
  3173. break;
  3174. case Supp::ARB_shader_ballot:
  3175. statement("#define gl_SubgroupEqMask uvec4(unpackUint2x32(gl_SubGroupEqMaskARB), 0u, 0u)");
  3176. statement("#define gl_SubgroupGeMask uvec4(unpackUint2x32(gl_SubGroupGeMaskARB), 0u, 0u)");
  3177. statement("#define gl_SubgroupGtMask uvec4(unpackUint2x32(gl_SubGroupGtMaskARB), 0u, 0u)");
  3178. statement("#define gl_SubgroupLeMask uvec4(unpackUint2x32(gl_SubGroupLeMaskARB), 0u, 0u)");
  3179. statement("#define gl_SubgroupLtMask uvec4(unpackUint2x32(gl_SubGroupLtMaskARB), 0u, 0u)");
  3180. break;
  3181. default:
  3182. break;
  3183. }
  3184. }
  3185. statement("#endif");
  3186. statement("");
  3187. }
  3188. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupSize))
  3189. {
  3190. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupSize, result);
  3191. for (auto &e : exts)
  3192. {
  3193. const char *name = Supp::get_extension_name(e);
  3194. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3195. switch (e)
  3196. {
  3197. case Supp::NV_shader_thread_group:
  3198. statement("#define gl_SubgroupSize gl_WarpSizeNV");
  3199. break;
  3200. case Supp::ARB_shader_ballot:
  3201. statement("#define gl_SubgroupSize gl_SubGroupSizeARB");
  3202. break;
  3203. case Supp::AMD_gcn_shader:
  3204. statement("#define gl_SubgroupSize uint(gl_SIMDGroupSizeAMD)");
  3205. break;
  3206. default:
  3207. break;
  3208. }
  3209. }
  3210. statement("#endif");
  3211. statement("");
  3212. }
  3213. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupInvocationID))
  3214. {
  3215. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupInvocationID, result);
  3216. for (auto &e : exts)
  3217. {
  3218. const char *name = Supp::get_extension_name(e);
  3219. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3220. switch (e)
  3221. {
  3222. case Supp::NV_shader_thread_group:
  3223. statement("#define gl_SubgroupInvocationID gl_ThreadInWarpNV");
  3224. break;
  3225. case Supp::ARB_shader_ballot:
  3226. statement("#define gl_SubgroupInvocationID gl_SubGroupInvocationARB");
  3227. break;
  3228. default:
  3229. break;
  3230. }
  3231. }
  3232. statement("#endif");
  3233. statement("");
  3234. }
  3235. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupID))
  3236. {
  3237. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupID, result);
  3238. for (auto &e : exts)
  3239. {
  3240. const char *name = Supp::get_extension_name(e);
  3241. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3242. switch (e)
  3243. {
  3244. case Supp::NV_shader_thread_group:
  3245. statement("#define gl_SubgroupID gl_WarpIDNV");
  3246. break;
  3247. default:
  3248. break;
  3249. }
  3250. }
  3251. statement("#endif");
  3252. statement("");
  3253. }
  3254. if (shader_subgroup_supporter.is_feature_requested(Supp::NumSubgroups))
  3255. {
  3256. auto exts = Supp::get_candidates_for_feature(Supp::NumSubgroups, result);
  3257. for (auto &e : exts)
  3258. {
  3259. const char *name = Supp::get_extension_name(e);
  3260. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3261. switch (e)
  3262. {
  3263. case Supp::NV_shader_thread_group:
  3264. statement("#define gl_NumSubgroups gl_WarpsPerSMNV");
  3265. break;
  3266. default:
  3267. break;
  3268. }
  3269. }
  3270. statement("#endif");
  3271. statement("");
  3272. }
  3273. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBroadcast_First))
  3274. {
  3275. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBroadcast_First, result);
  3276. for (auto &e : exts)
  3277. {
  3278. const char *name = Supp::get_extension_name(e);
  3279. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3280. switch (e)
  3281. {
  3282. case Supp::NV_shader_thread_shuffle:
  3283. for (const char *t : workaround_types)
  3284. {
  3285. statement(t, " subgroupBroadcastFirst(", t,
  3286. " value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }");
  3287. }
  3288. for (const char *t : workaround_types)
  3289. {
  3290. statement(t, " subgroupBroadcast(", t,
  3291. " value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }");
  3292. }
  3293. break;
  3294. case Supp::ARB_shader_ballot:
  3295. for (const char *t : workaround_types)
  3296. {
  3297. statement(t, " subgroupBroadcastFirst(", t,
  3298. " value) { return readFirstInvocationARB(value); }");
  3299. }
  3300. for (const char *t : workaround_types)
  3301. {
  3302. statement(t, " subgroupBroadcast(", t,
  3303. " value, uint id) { return readInvocationARB(value, id); }");
  3304. }
  3305. break;
  3306. default:
  3307. break;
  3308. }
  3309. }
  3310. statement("#endif");
  3311. statement("");
  3312. }
  3313. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotFindLSB_MSB))
  3314. {
  3315. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBallotFindLSB_MSB, result);
  3316. for (auto &e : exts)
  3317. {
  3318. const char *name = Supp::get_extension_name(e);
  3319. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3320. switch (e)
  3321. {
  3322. case Supp::NV_shader_thread_group:
  3323. statement("uint subgroupBallotFindLSB(uvec4 value) { return findLSB(value.x); }");
  3324. statement("uint subgroupBallotFindMSB(uvec4 value) { return findMSB(value.x); }");
  3325. break;
  3326. default:
  3327. break;
  3328. }
  3329. }
  3330. statement("#else");
  3331. statement("uint subgroupBallotFindLSB(uvec4 value)");
  3332. begin_scope();
  3333. statement("int firstLive = findLSB(value.x);");
  3334. statement("return uint(firstLive != -1 ? firstLive : (findLSB(value.y) + 32));");
  3335. end_scope();
  3336. statement("uint subgroupBallotFindMSB(uvec4 value)");
  3337. begin_scope();
  3338. statement("int firstLive = findMSB(value.y);");
  3339. statement("return uint(firstLive != -1 ? (firstLive + 32) : findMSB(value.x));");
  3340. end_scope();
  3341. statement("#endif");
  3342. statement("");
  3343. }
  3344. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupAll_Any_AllEqualBool))
  3345. {
  3346. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupAll_Any_AllEqualBool, result);
  3347. for (auto &e : exts)
  3348. {
  3349. const char *name = Supp::get_extension_name(e);
  3350. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3351. switch (e)
  3352. {
  3353. case Supp::NV_gpu_shader_5:
  3354. statement("bool subgroupAll(bool value) { return allThreadsNV(value); }");
  3355. statement("bool subgroupAny(bool value) { return anyThreadNV(value); }");
  3356. statement("bool subgroupAllEqual(bool value) { return allThreadsEqualNV(value); }");
  3357. break;
  3358. case Supp::ARB_shader_group_vote:
  3359. statement("bool subgroupAll(bool v) { return allInvocationsARB(v); }");
  3360. statement("bool subgroupAny(bool v) { return anyInvocationARB(v); }");
  3361. statement("bool subgroupAllEqual(bool v) { return allInvocationsEqualARB(v); }");
  3362. break;
  3363. case Supp::AMD_gcn_shader:
  3364. statement("bool subgroupAll(bool value) { return ballotAMD(value) == ballotAMD(true); }");
  3365. statement("bool subgroupAny(bool value) { return ballotAMD(value) != 0ull; }");
  3366. statement("bool subgroupAllEqual(bool value) { uint64_t b = ballotAMD(value); return b == 0ull || "
  3367. "b == ballotAMD(true); }");
  3368. break;
  3369. default:
  3370. break;
  3371. }
  3372. }
  3373. statement("#endif");
  3374. statement("");
  3375. }
  3376. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupAllEqualT))
  3377. {
  3378. statement("#ifndef GL_KHR_shader_subgroup_vote");
  3379. statement(
  3380. "#define _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(type) bool subgroupAllEqual(type value) { return "
  3381. "subgroupAllEqual(subgroupBroadcastFirst(value) == value); }");
  3382. for (const char *t : workaround_types)
  3383. statement("_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(", t, ")");
  3384. statement("#undef _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND");
  3385. statement("#endif");
  3386. statement("");
  3387. }
  3388. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallot))
  3389. {
  3390. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBallot, result);
  3391. for (auto &e : exts)
  3392. {
  3393. const char *name = Supp::get_extension_name(e);
  3394. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3395. switch (e)
  3396. {
  3397. case Supp::NV_shader_thread_group:
  3398. statement("uvec4 subgroupBallot(bool v) { return uvec4(ballotThreadNV(v), 0u, 0u, 0u); }");
  3399. break;
  3400. case Supp::ARB_shader_ballot:
  3401. statement("uvec4 subgroupBallot(bool v) { return uvec4(unpackUint2x32(ballotARB(v)), 0u, 0u); }");
  3402. break;
  3403. default:
  3404. break;
  3405. }
  3406. }
  3407. statement("#endif");
  3408. statement("");
  3409. }
  3410. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupElect))
  3411. {
  3412. statement("#ifndef GL_KHR_shader_subgroup_basic");
  3413. statement("bool subgroupElect()");
  3414. begin_scope();
  3415. statement("uvec4 activeMask = subgroupBallot(true);");
  3416. statement("uint firstLive = subgroupBallotFindLSB(activeMask);");
  3417. statement("return gl_SubgroupInvocationID == firstLive;");
  3418. end_scope();
  3419. statement("#endif");
  3420. statement("");
  3421. }
  3422. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBarrier))
  3423. {
  3424. // Extensions we're using in place of GL_KHR_shader_subgroup_basic state
  3425. // that subgroup execute in lockstep so this barrier is implicit.
  3426. // However the GL 4.6 spec also states that `barrier` implies a shared memory barrier,
  3427. // and a specific test of optimizing scans by leveraging lock-step invocation execution,
  3428. // has shown that a `memoryBarrierShared` is needed in place of a `subgroupBarrier`.
  3429. // https://github.com/buildaworldnet/IrrlichtBAW/commit/d8536857991b89a30a6b65d29441e51b64c2c7ad#diff-9f898d27be1ea6fc79b03d9b361e299334c1a347b6e4dc344ee66110c6aa596aR19
  3430. statement("#ifndef GL_KHR_shader_subgroup_basic");
  3431. statement("void subgroupBarrier() { memoryBarrierShared(); }");
  3432. statement("#endif");
  3433. statement("");
  3434. }
  3435. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupMemBarrier))
  3436. {
  3437. if (model == spv::ExecutionModelGLCompute)
  3438. {
  3439. statement("#ifndef GL_KHR_shader_subgroup_basic");
  3440. statement("void subgroupMemoryBarrier() { groupMemoryBarrier(); }");
  3441. statement("void subgroupMemoryBarrierBuffer() { groupMemoryBarrier(); }");
  3442. statement("void subgroupMemoryBarrierShared() { memoryBarrierShared(); }");
  3443. statement("void subgroupMemoryBarrierImage() { groupMemoryBarrier(); }");
  3444. statement("#endif");
  3445. }
  3446. else
  3447. {
  3448. statement("#ifndef GL_KHR_shader_subgroup_basic");
  3449. statement("void subgroupMemoryBarrier() { memoryBarrier(); }");
  3450. statement("void subgroupMemoryBarrierBuffer() { memoryBarrierBuffer(); }");
  3451. statement("void subgroupMemoryBarrierImage() { memoryBarrierImage(); }");
  3452. statement("#endif");
  3453. }
  3454. statement("");
  3455. }
  3456. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupInverseBallot_InclBitCount_ExclBitCout))
  3457. {
  3458. statement("#ifndef GL_KHR_shader_subgroup_ballot");
  3459. statement("bool subgroupInverseBallot(uvec4 value)");
  3460. begin_scope();
  3461. statement("return any(notEqual(value.xy & gl_SubgroupEqMask.xy, uvec2(0u)));");
  3462. end_scope();
  3463. statement("uint subgroupBallotInclusiveBitCount(uvec4 value)");
  3464. begin_scope();
  3465. statement("uvec2 v = value.xy & gl_SubgroupLeMask.xy;");
  3466. statement("ivec2 c = bitCount(v);");
  3467. statement_no_indent("#ifdef GL_NV_shader_thread_group");
  3468. statement("return uint(c.x);");
  3469. statement_no_indent("#else");
  3470. statement("return uint(c.x + c.y);");
  3471. statement_no_indent("#endif");
  3472. end_scope();
  3473. statement("uint subgroupBallotExclusiveBitCount(uvec4 value)");
  3474. begin_scope();
  3475. statement("uvec2 v = value.xy & gl_SubgroupLtMask.xy;");
  3476. statement("ivec2 c = bitCount(v);");
  3477. statement_no_indent("#ifdef GL_NV_shader_thread_group");
  3478. statement("return uint(c.x);");
  3479. statement_no_indent("#else");
  3480. statement("return uint(c.x + c.y);");
  3481. statement_no_indent("#endif");
  3482. end_scope();
  3483. statement("#endif");
  3484. statement("");
  3485. }
  3486. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotBitCount))
  3487. {
  3488. statement("#ifndef GL_KHR_shader_subgroup_ballot");
  3489. statement("uint subgroupBallotBitCount(uvec4 value)");
  3490. begin_scope();
  3491. statement("ivec2 c = bitCount(value.xy);");
  3492. statement_no_indent("#ifdef GL_NV_shader_thread_group");
  3493. statement("return uint(c.x);");
  3494. statement_no_indent("#else");
  3495. statement("return uint(c.x + c.y);");
  3496. statement_no_indent("#endif");
  3497. end_scope();
  3498. statement("#endif");
  3499. statement("");
  3500. }
  3501. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotBitExtract))
  3502. {
  3503. statement("#ifndef GL_KHR_shader_subgroup_ballot");
  3504. statement("bool subgroupBallotBitExtract(uvec4 value, uint index)");
  3505. begin_scope();
  3506. statement_no_indent("#ifdef GL_NV_shader_thread_group");
  3507. statement("uint shifted = value.x >> index;");
  3508. statement_no_indent("#else");
  3509. statement("uint shifted = value[index >> 5u] >> (index & 0x1fu);");
  3510. statement_no_indent("#endif");
  3511. statement("return (shifted & 1u) != 0u;");
  3512. end_scope();
  3513. statement("#endif");
  3514. statement("");
  3515. }
  3516. }
  3517. if (!workaround_ubo_load_overload_types.empty())
  3518. {
  3519. for (auto &type_id : workaround_ubo_load_overload_types)
  3520. {
  3521. auto &type = get<SPIRType>(type_id);
  3522. statement(type_to_glsl(type), " spvWorkaroundRowMajor(", type_to_glsl(type),
  3523. " wrap) { return wrap; }");
  3524. }
  3525. statement("");
  3526. }
  3527. if (requires_transpose_2x2)
  3528. {
  3529. statement("mat2 spvTranspose(mat2 m)");
  3530. begin_scope();
  3531. statement("return mat2(m[0][0], m[1][0], m[0][1], m[1][1]);");
  3532. end_scope();
  3533. statement("");
  3534. }
  3535. if (requires_transpose_3x3)
  3536. {
  3537. statement("mat3 spvTranspose(mat3 m)");
  3538. begin_scope();
  3539. statement("return mat3(m[0][0], m[1][0], m[2][0], m[0][1], m[1][1], m[2][1], m[0][2], m[1][2], m[2][2]);");
  3540. end_scope();
  3541. statement("");
  3542. }
  3543. if (requires_transpose_4x4)
  3544. {
  3545. statement("mat4 spvTranspose(mat4 m)");
  3546. begin_scope();
  3547. statement("return mat4(m[0][0], m[1][0], m[2][0], m[3][0], m[0][1], m[1][1], m[2][1], m[3][1], m[0][2], "
  3548. "m[1][2], m[2][2], m[3][2], m[0][3], m[1][3], m[2][3], m[3][3]);");
  3549. end_scope();
  3550. statement("");
  3551. }
  3552. }
  3553. // Returns a string representation of the ID, usable as a function arg.
  3554. // Default is to simply return the expression representation fo the arg ID.
  3555. // Subclasses may override to modify the return value.
  3556. string CompilerGLSL::to_func_call_arg(const SPIRFunction::Parameter &, uint32_t id)
  3557. {
  3558. // Make sure that we use the name of the original variable, and not the parameter alias.
  3559. uint32_t name_id = id;
  3560. auto *var = maybe_get<SPIRVariable>(id);
  3561. if (var && var->basevariable)
  3562. name_id = var->basevariable;
  3563. return to_expression(name_id);
  3564. }
  3565. void CompilerGLSL::handle_invalid_expression(uint32_t id)
  3566. {
  3567. // We tried to read an invalidated expression.
  3568. // This means we need another pass at compilation, but next time, force temporary variables so that they cannot be invalidated.
  3569. forced_temporaries.insert(id);
  3570. force_recompile();
  3571. }
  3572. // Converts the format of the current expression from packed to unpacked,
  3573. // by wrapping the expression in a constructor of the appropriate type.
  3574. // GLSL does not support packed formats, so simply return the expression.
  3575. // Subclasses that do will override.
  3576. string CompilerGLSL::unpack_expression_type(string expr_str, const SPIRType &, uint32_t, bool, bool)
  3577. {
  3578. return expr_str;
  3579. }
  3580. // Sometimes we proactively enclosed an expression where it turns out we might have not needed it after all.
  3581. void CompilerGLSL::strip_enclosed_expression(string &expr)
  3582. {
  3583. if (expr.size() < 2 || expr.front() != '(' || expr.back() != ')')
  3584. return;
  3585. // Have to make sure that our first and last parens actually enclose everything inside it.
  3586. uint32_t paren_count = 0;
  3587. for (auto &c : expr)
  3588. {
  3589. if (c == '(')
  3590. paren_count++;
  3591. else if (c == ')')
  3592. {
  3593. paren_count--;
  3594. // If we hit 0 and this is not the final char, our first and final parens actually don't
  3595. // enclose the expression, and we cannot strip, e.g.: (a + b) * (c + d).
  3596. if (paren_count == 0 && &c != &expr.back())
  3597. return;
  3598. }
  3599. }
  3600. expr.erase(expr.size() - 1, 1);
  3601. expr.erase(begin(expr));
  3602. }
  3603. string CompilerGLSL::enclose_expression(const string &expr)
  3604. {
  3605. bool need_parens = false;
  3606. // If the expression starts with a unary we need to enclose to deal with cases where we have back-to-back
  3607. // unary expressions.
  3608. if (!expr.empty())
  3609. {
  3610. auto c = expr.front();
  3611. if (c == '-' || c == '+' || c == '!' || c == '~' || c == '&' || c == '*')
  3612. need_parens = true;
  3613. }
  3614. if (!need_parens)
  3615. {
  3616. uint32_t paren_count = 0;
  3617. for (auto c : expr)
  3618. {
  3619. if (c == '(' || c == '[')
  3620. paren_count++;
  3621. else if (c == ')' || c == ']')
  3622. {
  3623. assert(paren_count);
  3624. paren_count--;
  3625. }
  3626. else if (c == ' ' && paren_count == 0)
  3627. {
  3628. need_parens = true;
  3629. break;
  3630. }
  3631. }
  3632. assert(paren_count == 0);
  3633. }
  3634. // If this expression contains any spaces which are not enclosed by parentheses,
  3635. // we need to enclose it so we can treat the whole string as an expression.
  3636. // This happens when two expressions have been part of a binary op earlier.
  3637. if (need_parens)
  3638. return join('(', expr, ')');
  3639. else
  3640. return expr;
  3641. }
  3642. string CompilerGLSL::dereference_expression(const SPIRType &expr_type, const std::string &expr)
  3643. {
  3644. // If this expression starts with an address-of operator ('&'), then
  3645. // just return the part after the operator.
  3646. // TODO: Strip parens if unnecessary?
  3647. if (expr.front() == '&')
  3648. return expr.substr(1);
  3649. else if (backend.native_pointers)
  3650. return join('*', expr);
  3651. else if (expr_type.storage == StorageClassPhysicalStorageBufferEXT && expr_type.basetype != SPIRType::Struct &&
  3652. expr_type.pointer_depth == 1)
  3653. {
  3654. return join(enclose_expression(expr), ".value");
  3655. }
  3656. else
  3657. return expr;
  3658. }
  3659. string CompilerGLSL::address_of_expression(const std::string &expr)
  3660. {
  3661. if (expr.size() > 3 && expr[0] == '(' && expr[1] == '*' && expr.back() == ')')
  3662. {
  3663. // If we have an expression which looks like (*foo), taking the address of it is the same as stripping
  3664. // the first two and last characters. We might have to enclose the expression.
  3665. // This doesn't work for cases like (*foo + 10),
  3666. // but this is an r-value expression which we cannot take the address of anyways.
  3667. return enclose_expression(expr.substr(2, expr.size() - 3));
  3668. }
  3669. else if (expr.front() == '*')
  3670. {
  3671. // If this expression starts with a dereference operator ('*'), then
  3672. // just return the part after the operator.
  3673. return expr.substr(1);
  3674. }
  3675. else
  3676. return join('&', enclose_expression(expr));
  3677. }
  3678. // Just like to_expression except that we enclose the expression inside parentheses if needed.
  3679. string CompilerGLSL::to_enclosed_expression(uint32_t id, bool register_expression_read)
  3680. {
  3681. return enclose_expression(to_expression(id, register_expression_read));
  3682. }
  3683. // Used explicitly when we want to read a row-major expression, but without any transpose shenanigans.
  3684. // need_transpose must be forced to false.
  3685. string CompilerGLSL::to_unpacked_row_major_matrix_expression(uint32_t id)
  3686. {
  3687. return unpack_expression_type(to_expression(id), expression_type(id),
  3688. get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
  3689. has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), true);
  3690. }
  3691. string CompilerGLSL::to_unpacked_expression(uint32_t id, bool register_expression_read)
  3692. {
  3693. // If we need to transpose, it will also take care of unpacking rules.
  3694. auto *e = maybe_get<SPIRExpression>(id);
  3695. bool need_transpose = e && e->need_transpose;
  3696. bool is_remapped = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
  3697. bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
  3698. if (!need_transpose && (is_remapped || is_packed))
  3699. {
  3700. return unpack_expression_type(to_expression(id, register_expression_read),
  3701. get_pointee_type(expression_type_id(id)),
  3702. get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
  3703. has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), false);
  3704. }
  3705. else
  3706. return to_expression(id, register_expression_read);
  3707. }
  3708. string CompilerGLSL::to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read)
  3709. {
  3710. // If we need to transpose, it will also take care of unpacking rules.
  3711. auto *e = maybe_get<SPIRExpression>(id);
  3712. bool need_transpose = e && e->need_transpose;
  3713. bool is_remapped = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
  3714. bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
  3715. if (!need_transpose && (is_remapped || is_packed))
  3716. {
  3717. return unpack_expression_type(to_expression(id, register_expression_read), expression_type(id),
  3718. get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
  3719. has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), false);
  3720. }
  3721. else
  3722. return to_enclosed_expression(id, register_expression_read);
  3723. }
  3724. string CompilerGLSL::to_dereferenced_expression(uint32_t id, bool register_expression_read)
  3725. {
  3726. auto &type = expression_type(id);
  3727. if (type.pointer && should_dereference(id))
  3728. return dereference_expression(type, to_enclosed_expression(id, register_expression_read));
  3729. else
  3730. return to_expression(id, register_expression_read);
  3731. }
  3732. string CompilerGLSL::to_pointer_expression(uint32_t id, bool register_expression_read)
  3733. {
  3734. auto &type = expression_type(id);
  3735. if (type.pointer && expression_is_lvalue(id) && !should_dereference(id))
  3736. return address_of_expression(to_enclosed_expression(id, register_expression_read));
  3737. else
  3738. return to_unpacked_expression(id, register_expression_read);
  3739. }
  3740. string CompilerGLSL::to_enclosed_pointer_expression(uint32_t id, bool register_expression_read)
  3741. {
  3742. auto &type = expression_type(id);
  3743. if (type.pointer && expression_is_lvalue(id) && !should_dereference(id))
  3744. return address_of_expression(to_enclosed_expression(id, register_expression_read));
  3745. else
  3746. return to_enclosed_unpacked_expression(id, register_expression_read);
  3747. }
  3748. string CompilerGLSL::to_extract_component_expression(uint32_t id, uint32_t index)
  3749. {
  3750. auto expr = to_enclosed_expression(id);
  3751. if (has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked))
  3752. return join(expr, "[", index, "]");
  3753. else
  3754. return join(expr, ".", index_to_swizzle(index));
  3755. }
  3756. string CompilerGLSL::to_extract_constant_composite_expression(uint32_t result_type, const SPIRConstant &c,
  3757. const uint32_t *chain, uint32_t length)
  3758. {
  3759. // It is kinda silly if application actually enter this path since they know the constant up front.
  3760. // It is useful here to extract the plain constant directly.
  3761. SPIRConstant tmp;
  3762. tmp.constant_type = result_type;
  3763. auto &composite_type = get<SPIRType>(c.constant_type);
  3764. assert(composite_type.basetype != SPIRType::Struct && composite_type.array.empty());
  3765. assert(!c.specialization);
  3766. if (is_matrix(composite_type))
  3767. {
  3768. if (length == 2)
  3769. {
  3770. tmp.m.c[0].vecsize = 1;
  3771. tmp.m.columns = 1;
  3772. tmp.m.c[0].r[0] = c.m.c[chain[0]].r[chain[1]];
  3773. }
  3774. else
  3775. {
  3776. assert(length == 1);
  3777. tmp.m.c[0].vecsize = composite_type.vecsize;
  3778. tmp.m.columns = 1;
  3779. tmp.m.c[0] = c.m.c[chain[0]];
  3780. }
  3781. }
  3782. else
  3783. {
  3784. assert(length == 1);
  3785. tmp.m.c[0].vecsize = 1;
  3786. tmp.m.columns = 1;
  3787. tmp.m.c[0].r[0] = c.m.c[0].r[chain[0]];
  3788. }
  3789. return constant_expression(tmp);
  3790. }
  3791. string CompilerGLSL::to_rerolled_array_expression(const string &base_expr, const SPIRType &type)
  3792. {
  3793. uint32_t size = to_array_size_literal(type);
  3794. auto &parent = get<SPIRType>(type.parent_type);
  3795. string expr = "{ ";
  3796. for (uint32_t i = 0; i < size; i++)
  3797. {
  3798. auto subexpr = join(base_expr, "[", convert_to_string(i), "]");
  3799. if (parent.array.empty())
  3800. expr += subexpr;
  3801. else
  3802. expr += to_rerolled_array_expression(subexpr, parent);
  3803. if (i + 1 < size)
  3804. expr += ", ";
  3805. }
  3806. expr += " }";
  3807. return expr;
  3808. }
  3809. string CompilerGLSL::to_composite_constructor_expression(uint32_t id, bool uses_buffer_offset)
  3810. {
  3811. auto &type = expression_type(id);
  3812. bool reroll_array = !type.array.empty() && (!backend.array_is_value_type ||
  3813. (uses_buffer_offset && !backend.buffer_offset_array_is_value_type));
  3814. if (reroll_array)
  3815. {
  3816. // For this case, we need to "re-roll" an array initializer from a temporary.
  3817. // We cannot simply pass the array directly, since it decays to a pointer and it cannot
  3818. // participate in a struct initializer. E.g.
  3819. // float arr[2] = { 1.0, 2.0 };
  3820. // Foo foo = { arr }; must be transformed to
  3821. // Foo foo = { { arr[0], arr[1] } };
  3822. // The array sizes cannot be deduced from specialization constants since we cannot use any loops.
  3823. // We're only triggering one read of the array expression, but this is fine since arrays have to be declared
  3824. // as temporaries anyways.
  3825. return to_rerolled_array_expression(to_enclosed_expression(id), type);
  3826. }
  3827. else
  3828. return to_unpacked_expression(id);
  3829. }
  3830. string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read)
  3831. {
  3832. auto itr = invalid_expressions.find(id);
  3833. if (itr != end(invalid_expressions))
  3834. handle_invalid_expression(id);
  3835. if (ir.ids[id].get_type() == TypeExpression)
  3836. {
  3837. // We might have a more complex chain of dependencies.
  3838. // A possible scenario is that we
  3839. //
  3840. // %1 = OpLoad
  3841. // %2 = OpDoSomething %1 %1. here %2 will have a dependency on %1.
  3842. // %3 = OpDoSomethingAgain %2 %2. Here %3 will lose the link to %1 since we don't propagate the dependencies like that.
  3843. // OpStore %1 %foo // Here we can invalidate %1, and hence all expressions which depend on %1. Only %2 will know since it's part of invalid_expressions.
  3844. // %4 = OpDoSomethingAnotherTime %3 %3 // If we forward all expressions we will see %1 expression after store, not before.
  3845. //
  3846. // However, we can propagate up a list of depended expressions when we used %2, so we can check if %2 is invalid when reading %3 after the store,
  3847. // and see that we should not forward reads of the original variable.
  3848. auto &expr = get<SPIRExpression>(id);
  3849. for (uint32_t dep : expr.expression_dependencies)
  3850. if (invalid_expressions.find(dep) != end(invalid_expressions))
  3851. handle_invalid_expression(dep);
  3852. }
  3853. if (register_expression_read)
  3854. track_expression_read(id);
  3855. switch (ir.ids[id].get_type())
  3856. {
  3857. case TypeExpression:
  3858. {
  3859. auto &e = get<SPIRExpression>(id);
  3860. if (e.base_expression)
  3861. return to_enclosed_expression(e.base_expression) + e.expression;
  3862. else if (e.need_transpose)
  3863. {
  3864. // This should not be reached for access chains, since we always deal explicitly with transpose state
  3865. // when consuming an access chain expression.
  3866. uint32_t physical_type_id = get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
  3867. bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
  3868. return convert_row_major_matrix(e.expression, get<SPIRType>(e.expression_type), physical_type_id,
  3869. is_packed);
  3870. }
  3871. else if (flattened_structs.count(id))
  3872. {
  3873. return load_flattened_struct(e.expression, get<SPIRType>(e.expression_type));
  3874. }
  3875. else
  3876. {
  3877. if (is_forcing_recompilation())
  3878. {
  3879. // During first compilation phase, certain expression patterns can trigger exponential growth of memory.
  3880. // Avoid this by returning dummy expressions during this phase.
  3881. // Do not use empty expressions here, because those are sentinels for other cases.
  3882. return "_";
  3883. }
  3884. else
  3885. return e.expression;
  3886. }
  3887. }
  3888. case TypeConstant:
  3889. {
  3890. auto &c = get<SPIRConstant>(id);
  3891. auto &type = get<SPIRType>(c.constant_type);
  3892. // WorkGroupSize may be a constant.
  3893. auto &dec = ir.meta[c.self].decoration;
  3894. if (dec.builtin)
  3895. return builtin_to_glsl(dec.builtin_type, StorageClassGeneric);
  3896. else if (c.specialization)
  3897. return to_name(id);
  3898. else if (c.is_used_as_lut)
  3899. return to_name(id);
  3900. else if (type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline)
  3901. return to_name(id);
  3902. else if (!type.array.empty() && !backend.can_declare_arrays_inline)
  3903. return to_name(id);
  3904. else
  3905. return constant_expression(c);
  3906. }
  3907. case TypeConstantOp:
  3908. return to_name(id);
  3909. case TypeVariable:
  3910. {
  3911. auto &var = get<SPIRVariable>(id);
  3912. // If we try to use a loop variable before the loop header, we have to redirect it to the static expression,
  3913. // the variable has not been declared yet.
  3914. if (var.statically_assigned || (var.loop_variable && !var.loop_variable_enable))
  3915. return to_expression(var.static_expression);
  3916. else if (var.deferred_declaration)
  3917. {
  3918. var.deferred_declaration = false;
  3919. return variable_decl(var);
  3920. }
  3921. else if (flattened_structs.count(id))
  3922. {
  3923. return load_flattened_struct(to_name(id), get<SPIRType>(var.basetype));
  3924. }
  3925. else
  3926. {
  3927. auto &dec = ir.meta[var.self].decoration;
  3928. if (dec.builtin)
  3929. return builtin_to_glsl(dec.builtin_type, var.storage);
  3930. else
  3931. return to_name(id);
  3932. }
  3933. }
  3934. case TypeCombinedImageSampler:
  3935. // This type should never be taken the expression of directly.
  3936. // The intention is that texture sampling functions will extract the image and samplers
  3937. // separately and take their expressions as needed.
  3938. // GLSL does not use this type because OpSampledImage immediately creates a combined image sampler
  3939. // expression ala sampler2D(texture, sampler).
  3940. SPIRV_CROSS_THROW("Combined image samplers have no default expression representation.");
  3941. case TypeAccessChain:
  3942. // We cannot express this type. They only have meaning in other OpAccessChains, OpStore or OpLoad.
  3943. SPIRV_CROSS_THROW("Access chains have no default expression representation.");
  3944. default:
  3945. return to_name(id);
  3946. }
  3947. }
  3948. string CompilerGLSL::constant_op_expression(const SPIRConstantOp &cop)
  3949. {
  3950. auto &type = get<SPIRType>(cop.basetype);
  3951. bool binary = false;
  3952. bool unary = false;
  3953. string op;
  3954. if (is_legacy() && is_unsigned_opcode(cop.opcode))
  3955. SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets.");
  3956. // TODO: Find a clean way to reuse emit_instruction.
  3957. switch (cop.opcode)
  3958. {
  3959. case OpSConvert:
  3960. case OpUConvert:
  3961. case OpFConvert:
  3962. op = type_to_glsl_constructor(type);
  3963. break;
  3964. #define GLSL_BOP(opname, x) \
  3965. case Op##opname: \
  3966. binary = true; \
  3967. op = x; \
  3968. break
  3969. #define GLSL_UOP(opname, x) \
  3970. case Op##opname: \
  3971. unary = true; \
  3972. op = x; \
  3973. break
  3974. GLSL_UOP(SNegate, "-");
  3975. GLSL_UOP(Not, "~");
  3976. GLSL_BOP(IAdd, "+");
  3977. GLSL_BOP(ISub, "-");
  3978. GLSL_BOP(IMul, "*");
  3979. GLSL_BOP(SDiv, "/");
  3980. GLSL_BOP(UDiv, "/");
  3981. GLSL_BOP(UMod, "%");
  3982. GLSL_BOP(SMod, "%");
  3983. GLSL_BOP(ShiftRightLogical, ">>");
  3984. GLSL_BOP(ShiftRightArithmetic, ">>");
  3985. GLSL_BOP(ShiftLeftLogical, "<<");
  3986. GLSL_BOP(BitwiseOr, "|");
  3987. GLSL_BOP(BitwiseXor, "^");
  3988. GLSL_BOP(BitwiseAnd, "&");
  3989. GLSL_BOP(LogicalOr, "||");
  3990. GLSL_BOP(LogicalAnd, "&&");
  3991. GLSL_UOP(LogicalNot, "!");
  3992. GLSL_BOP(LogicalEqual, "==");
  3993. GLSL_BOP(LogicalNotEqual, "!=");
  3994. GLSL_BOP(IEqual, "==");
  3995. GLSL_BOP(INotEqual, "!=");
  3996. GLSL_BOP(ULessThan, "<");
  3997. GLSL_BOP(SLessThan, "<");
  3998. GLSL_BOP(ULessThanEqual, "<=");
  3999. GLSL_BOP(SLessThanEqual, "<=");
  4000. GLSL_BOP(UGreaterThan, ">");
  4001. GLSL_BOP(SGreaterThan, ">");
  4002. GLSL_BOP(UGreaterThanEqual, ">=");
  4003. GLSL_BOP(SGreaterThanEqual, ">=");
  4004. case OpSelect:
  4005. {
  4006. if (cop.arguments.size() < 3)
  4007. SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
  4008. // This one is pretty annoying. It's triggered from
  4009. // uint(bool), int(bool) from spec constants.
  4010. // In order to preserve its compile-time constness in Vulkan GLSL,
  4011. // we need to reduce the OpSelect expression back to this simplified model.
  4012. // If we cannot, fail.
  4013. if (to_trivial_mix_op(type, op, cop.arguments[2], cop.arguments[1], cop.arguments[0]))
  4014. {
  4015. // Implement as a simple cast down below.
  4016. }
  4017. else
  4018. {
  4019. // Implement a ternary and pray the compiler understands it :)
  4020. return to_ternary_expression(type, cop.arguments[0], cop.arguments[1], cop.arguments[2]);
  4021. }
  4022. break;
  4023. }
  4024. case OpVectorShuffle:
  4025. {
  4026. string expr = type_to_glsl_constructor(type);
  4027. expr += "(";
  4028. uint32_t left_components = expression_type(cop.arguments[0]).vecsize;
  4029. string left_arg = to_enclosed_expression(cop.arguments[0]);
  4030. string right_arg = to_enclosed_expression(cop.arguments[1]);
  4031. for (uint32_t i = 2; i < uint32_t(cop.arguments.size()); i++)
  4032. {
  4033. uint32_t index = cop.arguments[i];
  4034. if (index >= left_components)
  4035. expr += right_arg + "." + "xyzw"[index - left_components];
  4036. else
  4037. expr += left_arg + "." + "xyzw"[index];
  4038. if (i + 1 < uint32_t(cop.arguments.size()))
  4039. expr += ", ";
  4040. }
  4041. expr += ")";
  4042. return expr;
  4043. }
  4044. case OpCompositeExtract:
  4045. {
  4046. auto expr = access_chain_internal(cop.arguments[0], &cop.arguments[1], uint32_t(cop.arguments.size() - 1),
  4047. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
  4048. return expr;
  4049. }
  4050. case OpCompositeInsert:
  4051. SPIRV_CROSS_THROW("OpCompositeInsert spec constant op is not supported.");
  4052. default:
  4053. // Some opcodes are unimplemented here, these are currently not possible to test from glslang.
  4054. SPIRV_CROSS_THROW("Unimplemented spec constant op.");
  4055. }
  4056. uint32_t bit_width = 0;
  4057. if (unary || binary || cop.opcode == OpSConvert || cop.opcode == OpUConvert)
  4058. bit_width = expression_type(cop.arguments[0]).width;
  4059. SPIRType::BaseType input_type;
  4060. bool skip_cast_if_equal_type = opcode_is_sign_invariant(cop.opcode);
  4061. switch (cop.opcode)
  4062. {
  4063. case OpIEqual:
  4064. case OpINotEqual:
  4065. input_type = to_signed_basetype(bit_width);
  4066. break;
  4067. case OpSLessThan:
  4068. case OpSLessThanEqual:
  4069. case OpSGreaterThan:
  4070. case OpSGreaterThanEqual:
  4071. case OpSMod:
  4072. case OpSDiv:
  4073. case OpShiftRightArithmetic:
  4074. case OpSConvert:
  4075. case OpSNegate:
  4076. input_type = to_signed_basetype(bit_width);
  4077. break;
  4078. case OpULessThan:
  4079. case OpULessThanEqual:
  4080. case OpUGreaterThan:
  4081. case OpUGreaterThanEqual:
  4082. case OpUMod:
  4083. case OpUDiv:
  4084. case OpShiftRightLogical:
  4085. case OpUConvert:
  4086. input_type = to_unsigned_basetype(bit_width);
  4087. break;
  4088. default:
  4089. input_type = type.basetype;
  4090. break;
  4091. }
  4092. #undef GLSL_BOP
  4093. #undef GLSL_UOP
  4094. if (binary)
  4095. {
  4096. if (cop.arguments.size() < 2)
  4097. SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
  4098. string cast_op0;
  4099. string cast_op1;
  4100. auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, cop.arguments[0],
  4101. cop.arguments[1], skip_cast_if_equal_type);
  4102. if (type.basetype != input_type && type.basetype != SPIRType::Boolean)
  4103. {
  4104. expected_type.basetype = input_type;
  4105. auto expr = bitcast_glsl_op(type, expected_type);
  4106. expr += '(';
  4107. expr += join(cast_op0, " ", op, " ", cast_op1);
  4108. expr += ')';
  4109. return expr;
  4110. }
  4111. else
  4112. return join("(", cast_op0, " ", op, " ", cast_op1, ")");
  4113. }
  4114. else if (unary)
  4115. {
  4116. if (cop.arguments.size() < 1)
  4117. SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
  4118. // Auto-bitcast to result type as needed.
  4119. // Works around various casting scenarios in glslang as there is no OpBitcast for specialization constants.
  4120. return join("(", op, bitcast_glsl(type, cop.arguments[0]), ")");
  4121. }
  4122. else if (cop.opcode == OpSConvert || cop.opcode == OpUConvert)
  4123. {
  4124. if (cop.arguments.size() < 1)
  4125. SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
  4126. auto &arg_type = expression_type(cop.arguments[0]);
  4127. if (arg_type.width < type.width && input_type != arg_type.basetype)
  4128. {
  4129. auto expected = arg_type;
  4130. expected.basetype = input_type;
  4131. return join(op, "(", bitcast_glsl(expected, cop.arguments[0]), ")");
  4132. }
  4133. else
  4134. return join(op, "(", to_expression(cop.arguments[0]), ")");
  4135. }
  4136. else
  4137. {
  4138. if (cop.arguments.size() < 1)
  4139. SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
  4140. return join(op, "(", to_expression(cop.arguments[0]), ")");
  4141. }
  4142. }
  4143. string CompilerGLSL::constant_expression(const SPIRConstant &c)
  4144. {
  4145. auto &type = get<SPIRType>(c.constant_type);
  4146. if (type.pointer)
  4147. {
  4148. return backend.null_pointer_literal;
  4149. }
  4150. else if (!c.subconstants.empty())
  4151. {
  4152. // Handles Arrays and structures.
  4153. string res;
  4154. // Allow Metal to use the array<T> template to make arrays a value type
  4155. bool needs_trailing_tracket = false;
  4156. if (backend.use_initializer_list && backend.use_typed_initializer_list && type.basetype == SPIRType::Struct &&
  4157. type.array.empty())
  4158. {
  4159. res = type_to_glsl_constructor(type) + "{ ";
  4160. }
  4161. else if (backend.use_initializer_list && backend.use_typed_initializer_list && backend.array_is_value_type &&
  4162. !type.array.empty())
  4163. {
  4164. res = type_to_glsl_constructor(type) + "({ ";
  4165. needs_trailing_tracket = true;
  4166. }
  4167. else if (backend.use_initializer_list)
  4168. {
  4169. res = "{ ";
  4170. }
  4171. else
  4172. {
  4173. res = type_to_glsl_constructor(type) + "(";
  4174. }
  4175. for (auto &elem : c.subconstants)
  4176. {
  4177. auto &subc = get<SPIRConstant>(elem);
  4178. if (subc.specialization)
  4179. res += to_name(elem);
  4180. else
  4181. res += constant_expression(subc);
  4182. if (&elem != &c.subconstants.back())
  4183. res += ", ";
  4184. }
  4185. res += backend.use_initializer_list ? " }" : ")";
  4186. if (needs_trailing_tracket)
  4187. res += ")";
  4188. return res;
  4189. }
  4190. else if (type.basetype == SPIRType::Struct && type.member_types.size() == 0)
  4191. {
  4192. // Metal tessellation likes empty structs which are then constant expressions.
  4193. if (backend.supports_empty_struct)
  4194. return "{ }";
  4195. else if (backend.use_typed_initializer_list)
  4196. return join(type_to_glsl(get<SPIRType>(c.constant_type)), "{ 0 }");
  4197. else if (backend.use_initializer_list)
  4198. return "{ 0 }";
  4199. else
  4200. return join(type_to_glsl(get<SPIRType>(c.constant_type)), "(0)");
  4201. }
  4202. else if (c.columns() == 1)
  4203. {
  4204. return constant_expression_vector(c, 0);
  4205. }
  4206. else
  4207. {
  4208. string res = type_to_glsl(get<SPIRType>(c.constant_type)) + "(";
  4209. for (uint32_t col = 0; col < c.columns(); col++)
  4210. {
  4211. if (c.specialization_constant_id(col) != 0)
  4212. res += to_name(c.specialization_constant_id(col));
  4213. else
  4214. res += constant_expression_vector(c, col);
  4215. if (col + 1 < c.columns())
  4216. res += ", ";
  4217. }
  4218. res += ")";
  4219. return res;
  4220. }
  4221. }
  4222. #ifdef _MSC_VER
  4223. // sprintf warning.
  4224. // We cannot rely on snprintf existing because, ..., MSVC.
  4225. #pragma warning(push)
  4226. #pragma warning(disable : 4996)
  4227. #endif
  4228. string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
  4229. {
  4230. string res;
  4231. float float_value = c.scalar_f16(col, row);
  4232. // There is no literal "hf" in GL_NV_gpu_shader5, so to avoid lots
  4233. // of complicated workarounds, just value-cast to the half type always.
  4234. if (std::isnan(float_value) || std::isinf(float_value))
  4235. {
  4236. SPIRType type;
  4237. type.basetype = SPIRType::Half;
  4238. type.vecsize = 1;
  4239. type.columns = 1;
  4240. if (float_value == numeric_limits<float>::infinity())
  4241. res = join(type_to_glsl(type), "(1.0 / 0.0)");
  4242. else if (float_value == -numeric_limits<float>::infinity())
  4243. res = join(type_to_glsl(type), "(-1.0 / 0.0)");
  4244. else if (std::isnan(float_value))
  4245. res = join(type_to_glsl(type), "(0.0 / 0.0)");
  4246. else
  4247. SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
  4248. }
  4249. else
  4250. {
  4251. SPIRType type;
  4252. type.basetype = SPIRType::Half;
  4253. type.vecsize = 1;
  4254. type.columns = 1;
  4255. res = join(type_to_glsl(type), "(", convert_to_string(float_value, current_locale_radix_character), ")");
  4256. }
  4257. return res;
  4258. }
  4259. string CompilerGLSL::convert_float_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
  4260. {
  4261. string res;
  4262. float float_value = c.scalar_f32(col, row);
  4263. if (std::isnan(float_value) || std::isinf(float_value))
  4264. {
  4265. // Use special representation.
  4266. if (!is_legacy())
  4267. {
  4268. SPIRType out_type;
  4269. SPIRType in_type;
  4270. out_type.basetype = SPIRType::Float;
  4271. in_type.basetype = SPIRType::UInt;
  4272. out_type.vecsize = 1;
  4273. in_type.vecsize = 1;
  4274. out_type.width = 32;
  4275. in_type.width = 32;
  4276. char print_buffer[32];
  4277. sprintf(print_buffer, "0x%xu", c.scalar(col, row));
  4278. res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, ")");
  4279. }
  4280. else
  4281. {
  4282. if (float_value == numeric_limits<float>::infinity())
  4283. {
  4284. if (backend.float_literal_suffix)
  4285. res = "(1.0f / 0.0f)";
  4286. else
  4287. res = "(1.0 / 0.0)";
  4288. }
  4289. else if (float_value == -numeric_limits<float>::infinity())
  4290. {
  4291. if (backend.float_literal_suffix)
  4292. res = "(-1.0f / 0.0f)";
  4293. else
  4294. res = "(-1.0 / 0.0)";
  4295. }
  4296. else if (std::isnan(float_value))
  4297. {
  4298. if (backend.float_literal_suffix)
  4299. res = "(0.0f / 0.0f)";
  4300. else
  4301. res = "(0.0 / 0.0)";
  4302. }
  4303. else
  4304. SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
  4305. }
  4306. }
  4307. else
  4308. {
  4309. res = convert_to_string(float_value, current_locale_radix_character);
  4310. if (backend.float_literal_suffix)
  4311. res += "f";
  4312. }
  4313. return res;
  4314. }
  4315. std::string CompilerGLSL::convert_double_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
  4316. {
  4317. string res;
  4318. double double_value = c.scalar_f64(col, row);
  4319. if (std::isnan(double_value) || std::isinf(double_value))
  4320. {
  4321. // Use special representation.
  4322. if (!is_legacy())
  4323. {
  4324. SPIRType out_type;
  4325. SPIRType in_type;
  4326. out_type.basetype = SPIRType::Double;
  4327. in_type.basetype = SPIRType::UInt64;
  4328. out_type.vecsize = 1;
  4329. in_type.vecsize = 1;
  4330. out_type.width = 64;
  4331. in_type.width = 64;
  4332. uint64_t u64_value = c.scalar_u64(col, row);
  4333. if (options.es)
  4334. SPIRV_CROSS_THROW("64-bit integers/float not supported in ES profile.");
  4335. require_extension_internal("GL_ARB_gpu_shader_int64");
  4336. char print_buffer[64];
  4337. sprintf(print_buffer, "0x%llx%s", static_cast<unsigned long long>(u64_value),
  4338. backend.long_long_literal_suffix ? "ull" : "ul");
  4339. res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, ")");
  4340. }
  4341. else
  4342. {
  4343. if (options.es)
  4344. SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
  4345. if (options.version < 400)
  4346. require_extension_internal("GL_ARB_gpu_shader_fp64");
  4347. if (double_value == numeric_limits<double>::infinity())
  4348. {
  4349. if (backend.double_literal_suffix)
  4350. res = "(1.0lf / 0.0lf)";
  4351. else
  4352. res = "(1.0 / 0.0)";
  4353. }
  4354. else if (double_value == -numeric_limits<double>::infinity())
  4355. {
  4356. if (backend.double_literal_suffix)
  4357. res = "(-1.0lf / 0.0lf)";
  4358. else
  4359. res = "(-1.0 / 0.0)";
  4360. }
  4361. else if (std::isnan(double_value))
  4362. {
  4363. if (backend.double_literal_suffix)
  4364. res = "(0.0lf / 0.0lf)";
  4365. else
  4366. res = "(0.0 / 0.0)";
  4367. }
  4368. else
  4369. SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
  4370. }
  4371. }
  4372. else
  4373. {
  4374. res = convert_to_string(double_value, current_locale_radix_character);
  4375. if (backend.double_literal_suffix)
  4376. res += "lf";
  4377. }
  4378. return res;
  4379. }
  4380. #ifdef _MSC_VER
  4381. #pragma warning(pop)
  4382. #endif
  4383. string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t vector)
  4384. {
  4385. auto type = get<SPIRType>(c.constant_type);
  4386. type.columns = 1;
  4387. auto scalar_type = type;
  4388. scalar_type.vecsize = 1;
  4389. string res;
  4390. bool splat = backend.use_constructor_splatting && c.vector_size() > 1;
  4391. bool swizzle_splat = backend.can_swizzle_scalar && c.vector_size() > 1;
  4392. if (!type_is_floating_point(type))
  4393. {
  4394. // Cannot swizzle literal integers as a special case.
  4395. swizzle_splat = false;
  4396. }
  4397. if (splat || swizzle_splat)
  4398. {
  4399. // Cannot use constant splatting if we have specialization constants somewhere in the vector.
  4400. for (uint32_t i = 0; i < c.vector_size(); i++)
  4401. {
  4402. if (c.specialization_constant_id(vector, i) != 0)
  4403. {
  4404. splat = false;
  4405. swizzle_splat = false;
  4406. break;
  4407. }
  4408. }
  4409. }
  4410. if (splat || swizzle_splat)
  4411. {
  4412. if (type.width == 64)
  4413. {
  4414. uint64_t ident = c.scalar_u64(vector, 0);
  4415. for (uint32_t i = 1; i < c.vector_size(); i++)
  4416. {
  4417. if (ident != c.scalar_u64(vector, i))
  4418. {
  4419. splat = false;
  4420. swizzle_splat = false;
  4421. break;
  4422. }
  4423. }
  4424. }
  4425. else
  4426. {
  4427. uint32_t ident = c.scalar(vector, 0);
  4428. for (uint32_t i = 1; i < c.vector_size(); i++)
  4429. {
  4430. if (ident != c.scalar(vector, i))
  4431. {
  4432. splat = false;
  4433. swizzle_splat = false;
  4434. }
  4435. }
  4436. }
  4437. }
  4438. if (c.vector_size() > 1 && !swizzle_splat)
  4439. res += type_to_glsl(type) + "(";
  4440. switch (type.basetype)
  4441. {
  4442. case SPIRType::Half:
  4443. if (splat || swizzle_splat)
  4444. {
  4445. res += convert_half_to_string(c, vector, 0);
  4446. if (swizzle_splat)
  4447. res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
  4448. }
  4449. else
  4450. {
  4451. for (uint32_t i = 0; i < c.vector_size(); i++)
  4452. {
  4453. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  4454. res += to_name(c.specialization_constant_id(vector, i));
  4455. else
  4456. res += convert_half_to_string(c, vector, i);
  4457. if (i + 1 < c.vector_size())
  4458. res += ", ";
  4459. }
  4460. }
  4461. break;
  4462. case SPIRType::Float:
  4463. if (splat || swizzle_splat)
  4464. {
  4465. res += convert_float_to_string(c, vector, 0);
  4466. if (swizzle_splat)
  4467. res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
  4468. }
  4469. else
  4470. {
  4471. for (uint32_t i = 0; i < c.vector_size(); i++)
  4472. {
  4473. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  4474. res += to_name(c.specialization_constant_id(vector, i));
  4475. else
  4476. res += convert_float_to_string(c, vector, i);
  4477. if (i + 1 < c.vector_size())
  4478. res += ", ";
  4479. }
  4480. }
  4481. break;
  4482. case SPIRType::Double:
  4483. if (splat || swizzle_splat)
  4484. {
  4485. res += convert_double_to_string(c, vector, 0);
  4486. if (swizzle_splat)
  4487. res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
  4488. }
  4489. else
  4490. {
  4491. for (uint32_t i = 0; i < c.vector_size(); i++)
  4492. {
  4493. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  4494. res += to_name(c.specialization_constant_id(vector, i));
  4495. else
  4496. res += convert_double_to_string(c, vector, i);
  4497. if (i + 1 < c.vector_size())
  4498. res += ", ";
  4499. }
  4500. }
  4501. break;
  4502. case SPIRType::Int64:
  4503. if (splat)
  4504. {
  4505. res += convert_to_string(c.scalar_i64(vector, 0));
  4506. if (backend.long_long_literal_suffix)
  4507. res += "ll";
  4508. else
  4509. res += "l";
  4510. }
  4511. else
  4512. {
  4513. for (uint32_t i = 0; i < c.vector_size(); i++)
  4514. {
  4515. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  4516. res += to_name(c.specialization_constant_id(vector, i));
  4517. else
  4518. {
  4519. res += convert_to_string(c.scalar_i64(vector, i));
  4520. if (backend.long_long_literal_suffix)
  4521. res += "ll";
  4522. else
  4523. res += "l";
  4524. }
  4525. if (i + 1 < c.vector_size())
  4526. res += ", ";
  4527. }
  4528. }
  4529. break;
  4530. case SPIRType::UInt64:
  4531. if (splat)
  4532. {
  4533. res += convert_to_string(c.scalar_u64(vector, 0));
  4534. if (backend.long_long_literal_suffix)
  4535. res += "ull";
  4536. else
  4537. res += "ul";
  4538. }
  4539. else
  4540. {
  4541. for (uint32_t i = 0; i < c.vector_size(); i++)
  4542. {
  4543. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  4544. res += to_name(c.specialization_constant_id(vector, i));
  4545. else
  4546. {
  4547. res += convert_to_string(c.scalar_u64(vector, i));
  4548. if (backend.long_long_literal_suffix)
  4549. res += "ull";
  4550. else
  4551. res += "ul";
  4552. }
  4553. if (i + 1 < c.vector_size())
  4554. res += ", ";
  4555. }
  4556. }
  4557. break;
  4558. case SPIRType::UInt:
  4559. if (splat)
  4560. {
  4561. res += convert_to_string(c.scalar(vector, 0));
  4562. if (is_legacy())
  4563. {
  4564. // Fake unsigned constant literals with signed ones if possible.
  4565. // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
  4566. if (c.scalar_i32(vector, 0) < 0)
  4567. SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made the literal negative.");
  4568. }
  4569. else if (backend.uint32_t_literal_suffix)
  4570. res += "u";
  4571. }
  4572. else
  4573. {
  4574. for (uint32_t i = 0; i < c.vector_size(); i++)
  4575. {
  4576. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  4577. res += to_name(c.specialization_constant_id(vector, i));
  4578. else
  4579. {
  4580. res += convert_to_string(c.scalar(vector, i));
  4581. if (is_legacy())
  4582. {
  4583. // Fake unsigned constant literals with signed ones if possible.
  4584. // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
  4585. if (c.scalar_i32(vector, i) < 0)
  4586. SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made "
  4587. "the literal negative.");
  4588. }
  4589. else if (backend.uint32_t_literal_suffix)
  4590. res += "u";
  4591. }
  4592. if (i + 1 < c.vector_size())
  4593. res += ", ";
  4594. }
  4595. }
  4596. break;
  4597. case SPIRType::Int:
  4598. if (splat)
  4599. res += convert_to_string(c.scalar_i32(vector, 0));
  4600. else
  4601. {
  4602. for (uint32_t i = 0; i < c.vector_size(); i++)
  4603. {
  4604. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  4605. res += to_name(c.specialization_constant_id(vector, i));
  4606. else
  4607. res += convert_to_string(c.scalar_i32(vector, i));
  4608. if (i + 1 < c.vector_size())
  4609. res += ", ";
  4610. }
  4611. }
  4612. break;
  4613. case SPIRType::UShort:
  4614. if (splat)
  4615. {
  4616. res += convert_to_string(c.scalar(vector, 0));
  4617. }
  4618. else
  4619. {
  4620. for (uint32_t i = 0; i < c.vector_size(); i++)
  4621. {
  4622. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  4623. res += to_name(c.specialization_constant_id(vector, i));
  4624. else
  4625. {
  4626. if (*backend.uint16_t_literal_suffix)
  4627. {
  4628. res += convert_to_string(c.scalar_u16(vector, i));
  4629. res += backend.uint16_t_literal_suffix;
  4630. }
  4631. else
  4632. {
  4633. // If backend doesn't have a literal suffix, we need to value cast.
  4634. res += type_to_glsl(scalar_type);
  4635. res += "(";
  4636. res += convert_to_string(c.scalar_u16(vector, i));
  4637. res += ")";
  4638. }
  4639. }
  4640. if (i + 1 < c.vector_size())
  4641. res += ", ";
  4642. }
  4643. }
  4644. break;
  4645. case SPIRType::Short:
  4646. if (splat)
  4647. {
  4648. res += convert_to_string(c.scalar_i16(vector, 0));
  4649. }
  4650. else
  4651. {
  4652. for (uint32_t i = 0; i < c.vector_size(); i++)
  4653. {
  4654. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  4655. res += to_name(c.specialization_constant_id(vector, i));
  4656. else
  4657. {
  4658. if (*backend.int16_t_literal_suffix)
  4659. {
  4660. res += convert_to_string(c.scalar_i16(vector, i));
  4661. res += backend.int16_t_literal_suffix;
  4662. }
  4663. else
  4664. {
  4665. // If backend doesn't have a literal suffix, we need to value cast.
  4666. res += type_to_glsl(scalar_type);
  4667. res += "(";
  4668. res += convert_to_string(c.scalar_i16(vector, i));
  4669. res += ")";
  4670. }
  4671. }
  4672. if (i + 1 < c.vector_size())
  4673. res += ", ";
  4674. }
  4675. }
  4676. break;
  4677. case SPIRType::UByte:
  4678. if (splat)
  4679. {
  4680. res += convert_to_string(c.scalar_u8(vector, 0));
  4681. }
  4682. else
  4683. {
  4684. for (uint32_t i = 0; i < c.vector_size(); i++)
  4685. {
  4686. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  4687. res += to_name(c.specialization_constant_id(vector, i));
  4688. else
  4689. {
  4690. res += type_to_glsl(scalar_type);
  4691. res += "(";
  4692. res += convert_to_string(c.scalar_u8(vector, i));
  4693. res += ")";
  4694. }
  4695. if (i + 1 < c.vector_size())
  4696. res += ", ";
  4697. }
  4698. }
  4699. break;
  4700. case SPIRType::SByte:
  4701. if (splat)
  4702. {
  4703. res += convert_to_string(c.scalar_i8(vector, 0));
  4704. }
  4705. else
  4706. {
  4707. for (uint32_t i = 0; i < c.vector_size(); i++)
  4708. {
  4709. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  4710. res += to_name(c.specialization_constant_id(vector, i));
  4711. else
  4712. {
  4713. res += type_to_glsl(scalar_type);
  4714. res += "(";
  4715. res += convert_to_string(c.scalar_i8(vector, i));
  4716. res += ")";
  4717. }
  4718. if (i + 1 < c.vector_size())
  4719. res += ", ";
  4720. }
  4721. }
  4722. break;
  4723. case SPIRType::Boolean:
  4724. if (splat)
  4725. res += c.scalar(vector, 0) ? "true" : "false";
  4726. else
  4727. {
  4728. for (uint32_t i = 0; i < c.vector_size(); i++)
  4729. {
  4730. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  4731. res += to_name(c.specialization_constant_id(vector, i));
  4732. else
  4733. res += c.scalar(vector, i) ? "true" : "false";
  4734. if (i + 1 < c.vector_size())
  4735. res += ", ";
  4736. }
  4737. }
  4738. break;
  4739. default:
  4740. SPIRV_CROSS_THROW("Invalid constant expression basetype.");
  4741. }
  4742. if (c.vector_size() > 1 && !swizzle_splat)
  4743. res += ")";
  4744. return res;
  4745. }
  4746. SPIRExpression &CompilerGLSL::emit_uninitialized_temporary_expression(uint32_t type, uint32_t id)
  4747. {
  4748. forced_temporaries.insert(id);
  4749. emit_uninitialized_temporary(type, id);
  4750. return set<SPIRExpression>(id, to_name(id), type, true);
  4751. }
  4752. void CompilerGLSL::emit_uninitialized_temporary(uint32_t result_type, uint32_t result_id)
  4753. {
  4754. // If we're declaring temporaries inside continue blocks,
  4755. // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
  4756. if (current_continue_block && !hoisted_temporaries.count(result_id))
  4757. {
  4758. auto &header = get<SPIRBlock>(current_continue_block->loop_dominator);
  4759. if (find_if(begin(header.declare_temporary), end(header.declare_temporary),
  4760. [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
  4761. return tmp.first == result_type && tmp.second == result_id;
  4762. }) == end(header.declare_temporary))
  4763. {
  4764. header.declare_temporary.emplace_back(result_type, result_id);
  4765. hoisted_temporaries.insert(result_id);
  4766. force_recompile();
  4767. }
  4768. }
  4769. else if (hoisted_temporaries.count(result_id) == 0)
  4770. {
  4771. auto &type = get<SPIRType>(result_type);
  4772. auto &flags = ir.meta[result_id].decoration.decoration_flags;
  4773. // The result_id has not been made into an expression yet, so use flags interface.
  4774. add_local_variable_name(result_id);
  4775. string initializer;
  4776. if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
  4777. initializer = join(" = ", to_zero_initialized_expression(result_type));
  4778. statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), initializer, ";");
  4779. }
  4780. }
  4781. string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id)
  4782. {
  4783. auto &type = get<SPIRType>(result_type);
  4784. auto &flags = ir.meta[result_id].decoration.decoration_flags;
  4785. // If we're declaring temporaries inside continue blocks,
  4786. // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
  4787. if (current_continue_block && !hoisted_temporaries.count(result_id))
  4788. {
  4789. auto &header = get<SPIRBlock>(current_continue_block->loop_dominator);
  4790. if (find_if(begin(header.declare_temporary), end(header.declare_temporary),
  4791. [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
  4792. return tmp.first == result_type && tmp.second == result_id;
  4793. }) == end(header.declare_temporary))
  4794. {
  4795. header.declare_temporary.emplace_back(result_type, result_id);
  4796. hoisted_temporaries.insert(result_id);
  4797. force_recompile();
  4798. }
  4799. return join(to_name(result_id), " = ");
  4800. }
  4801. else if (hoisted_temporaries.count(result_id))
  4802. {
  4803. // The temporary has already been declared earlier, so just "declare" the temporary by writing to it.
  4804. return join(to_name(result_id), " = ");
  4805. }
  4806. else
  4807. {
  4808. // The result_id has not been made into an expression yet, so use flags interface.
  4809. add_local_variable_name(result_id);
  4810. return join(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), " = ");
  4811. }
  4812. }
  4813. bool CompilerGLSL::expression_is_forwarded(uint32_t id) const
  4814. {
  4815. return forwarded_temporaries.count(id) != 0;
  4816. }
  4817. bool CompilerGLSL::expression_suppresses_usage_tracking(uint32_t id) const
  4818. {
  4819. return suppressed_usage_tracking.count(id) != 0;
  4820. }
  4821. bool CompilerGLSL::expression_read_implies_multiple_reads(uint32_t id) const
  4822. {
  4823. auto *expr = maybe_get<SPIRExpression>(id);
  4824. if (!expr)
  4825. return false;
  4826. // If we're emitting code at a deeper loop level than when we emitted the expression,
  4827. // we're probably reading the same expression over and over.
  4828. return current_loop_level > expr->emitted_loop_level;
  4829. }
  4830. SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding,
  4831. bool suppress_usage_tracking)
  4832. {
  4833. if (forwarding && (forced_temporaries.find(result_id) == end(forced_temporaries)))
  4834. {
  4835. // Just forward it without temporary.
  4836. // If the forward is trivial, we do not force flushing to temporary for this expression.
  4837. forwarded_temporaries.insert(result_id);
  4838. if (suppress_usage_tracking)
  4839. suppressed_usage_tracking.insert(result_id);
  4840. return set<SPIRExpression>(result_id, rhs, result_type, true);
  4841. }
  4842. else
  4843. {
  4844. // If expression isn't immutable, bind it to a temporary and make the new temporary immutable (they always are).
  4845. statement(declare_temporary(result_type, result_id), rhs, ";");
  4846. return set<SPIRExpression>(result_id, to_name(result_id), result_type, true);
  4847. }
  4848. }
  4849. void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
  4850. {
  4851. bool forward = should_forward(op0);
  4852. emit_op(result_type, result_id, join(op, to_enclosed_unpacked_expression(op0)), forward);
  4853. inherit_expression_dependencies(result_id, op0);
  4854. }
  4855. void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op)
  4856. {
  4857. bool forward = should_forward(op0) && should_forward(op1);
  4858. emit_op(result_type, result_id,
  4859. join(to_enclosed_unpacked_expression(op0), " ", op, " ", to_enclosed_unpacked_expression(op1)), forward);
  4860. inherit_expression_dependencies(result_id, op0);
  4861. inherit_expression_dependencies(result_id, op1);
  4862. }
  4863. void CompilerGLSL::emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op)
  4864. {
  4865. auto &type = get<SPIRType>(result_type);
  4866. auto expr = type_to_glsl_constructor(type);
  4867. expr += '(';
  4868. for (uint32_t i = 0; i < type.vecsize; i++)
  4869. {
  4870. // Make sure to call to_expression multiple times to ensure
  4871. // that these expressions are properly flushed to temporaries if needed.
  4872. expr += op;
  4873. expr += to_extract_component_expression(operand, i);
  4874. if (i + 1 < type.vecsize)
  4875. expr += ", ";
  4876. }
  4877. expr += ')';
  4878. emit_op(result_type, result_id, expr, should_forward(operand));
  4879. inherit_expression_dependencies(result_id, operand);
  4880. }
  4881. void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  4882. const char *op, bool negate, SPIRType::BaseType expected_type)
  4883. {
  4884. auto &type0 = expression_type(op0);
  4885. auto &type1 = expression_type(op1);
  4886. SPIRType target_type0 = type0;
  4887. SPIRType target_type1 = type1;
  4888. target_type0.basetype = expected_type;
  4889. target_type1.basetype = expected_type;
  4890. target_type0.vecsize = 1;
  4891. target_type1.vecsize = 1;
  4892. auto &type = get<SPIRType>(result_type);
  4893. auto expr = type_to_glsl_constructor(type);
  4894. expr += '(';
  4895. for (uint32_t i = 0; i < type.vecsize; i++)
  4896. {
  4897. // Make sure to call to_expression multiple times to ensure
  4898. // that these expressions are properly flushed to temporaries if needed.
  4899. if (negate)
  4900. expr += "!(";
  4901. if (expected_type != SPIRType::Unknown && type0.basetype != expected_type)
  4902. expr += bitcast_expression(target_type0, type0.basetype, to_extract_component_expression(op0, i));
  4903. else
  4904. expr += to_extract_component_expression(op0, i);
  4905. expr += ' ';
  4906. expr += op;
  4907. expr += ' ';
  4908. if (expected_type != SPIRType::Unknown && type1.basetype != expected_type)
  4909. expr += bitcast_expression(target_type1, type1.basetype, to_extract_component_expression(op1, i));
  4910. else
  4911. expr += to_extract_component_expression(op1, i);
  4912. if (negate)
  4913. expr += ")";
  4914. if (i + 1 < type.vecsize)
  4915. expr += ", ";
  4916. }
  4917. expr += ')';
  4918. emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
  4919. inherit_expression_dependencies(result_id, op0);
  4920. inherit_expression_dependencies(result_id, op1);
  4921. }
  4922. SPIRType CompilerGLSL::binary_op_bitcast_helper(string &cast_op0, string &cast_op1, SPIRType::BaseType &input_type,
  4923. uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type)
  4924. {
  4925. auto &type0 = expression_type(op0);
  4926. auto &type1 = expression_type(op1);
  4927. // We have to bitcast if our inputs are of different type, or if our types are not equal to expected inputs.
  4928. // For some functions like OpIEqual and INotEqual, we don't care if inputs are of different types than expected
  4929. // since equality test is exactly the same.
  4930. bool cast = (type0.basetype != type1.basetype) || (!skip_cast_if_equal_type && type0.basetype != input_type);
  4931. // Create a fake type so we can bitcast to it.
  4932. // We only deal with regular arithmetic types here like int, uints and so on.
  4933. SPIRType expected_type;
  4934. expected_type.basetype = input_type;
  4935. expected_type.vecsize = type0.vecsize;
  4936. expected_type.columns = type0.columns;
  4937. expected_type.width = type0.width;
  4938. if (cast)
  4939. {
  4940. cast_op0 = bitcast_glsl(expected_type, op0);
  4941. cast_op1 = bitcast_glsl(expected_type, op1);
  4942. }
  4943. else
  4944. {
  4945. // If we don't cast, our actual input type is that of the first (or second) argument.
  4946. cast_op0 = to_enclosed_unpacked_expression(op0);
  4947. cast_op1 = to_enclosed_unpacked_expression(op1);
  4948. input_type = type0.basetype;
  4949. }
  4950. return expected_type;
  4951. }
  4952. bool CompilerGLSL::emit_complex_bitcast(uint32_t result_type, uint32_t id, uint32_t op0)
  4953. {
  4954. // Some bitcasts may require complex casting sequences, and are implemented here.
  4955. // Otherwise a simply unary function will do with bitcast_glsl_op.
  4956. auto &output_type = get<SPIRType>(result_type);
  4957. auto &input_type = expression_type(op0);
  4958. string expr;
  4959. if (output_type.basetype == SPIRType::Half && input_type.basetype == SPIRType::Float && input_type.vecsize == 1)
  4960. expr = join("unpackFloat2x16(floatBitsToUint(", to_unpacked_expression(op0), "))");
  4961. else if (output_type.basetype == SPIRType::Float && input_type.basetype == SPIRType::Half &&
  4962. input_type.vecsize == 2)
  4963. expr = join("uintBitsToFloat(packFloat2x16(", to_unpacked_expression(op0), "))");
  4964. else
  4965. return false;
  4966. emit_op(result_type, id, expr, should_forward(op0));
  4967. return true;
  4968. }
  4969. void CompilerGLSL::emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  4970. const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
  4971. {
  4972. string cast_op0, cast_op1;
  4973. auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
  4974. auto &out_type = get<SPIRType>(result_type);
  4975. // We might have casted away from the result type, so bitcast again.
  4976. // For example, arithmetic right shift with uint inputs.
  4977. // Special case boolean outputs since relational opcodes output booleans instead of int/uint.
  4978. string expr;
  4979. if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
  4980. {
  4981. expected_type.basetype = input_type;
  4982. expr = bitcast_glsl_op(out_type, expected_type);
  4983. expr += '(';
  4984. expr += join(cast_op0, " ", op, " ", cast_op1);
  4985. expr += ')';
  4986. }
  4987. else
  4988. expr += join(cast_op0, " ", op, " ", cast_op1);
  4989. emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
  4990. inherit_expression_dependencies(result_id, op0);
  4991. inherit_expression_dependencies(result_id, op1);
  4992. }
  4993. void CompilerGLSL::emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
  4994. {
  4995. bool forward = should_forward(op0);
  4996. emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ")"), forward);
  4997. inherit_expression_dependencies(result_id, op0);
  4998. }
  4999. void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  5000. const char *op)
  5001. {
  5002. bool forward = should_forward(op0) && should_forward(op1);
  5003. emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ")"),
  5004. forward);
  5005. inherit_expression_dependencies(result_id, op0);
  5006. inherit_expression_dependencies(result_id, op1);
  5007. }
  5008. void CompilerGLSL::emit_unary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op,
  5009. SPIRType::BaseType input_type, SPIRType::BaseType expected_result_type)
  5010. {
  5011. auto &out_type = get<SPIRType>(result_type);
  5012. auto &expr_type = expression_type(op0);
  5013. auto expected_type = out_type;
  5014. // Bit-widths might be different in unary cases because we use it for SConvert/UConvert and friends.
  5015. expected_type.basetype = input_type;
  5016. expected_type.width = expr_type.width;
  5017. string cast_op = expr_type.basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
  5018. string expr;
  5019. if (out_type.basetype != expected_result_type)
  5020. {
  5021. expected_type.basetype = expected_result_type;
  5022. expected_type.width = out_type.width;
  5023. expr = bitcast_glsl_op(out_type, expected_type);
  5024. expr += '(';
  5025. expr += join(op, "(", cast_op, ")");
  5026. expr += ')';
  5027. }
  5028. else
  5029. {
  5030. expr += join(op, "(", cast_op, ")");
  5031. }
  5032. emit_op(result_type, result_id, expr, should_forward(op0));
  5033. inherit_expression_dependencies(result_id, op0);
  5034. }
  5035. // Very special case. Handling bitfieldExtract requires us to deal with different bitcasts of different signs
  5036. // and different vector sizes all at once. Need a special purpose method here.
  5037. void CompilerGLSL::emit_trinary_func_op_bitextract(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  5038. uint32_t op2, const char *op,
  5039. SPIRType::BaseType expected_result_type,
  5040. SPIRType::BaseType input_type0, SPIRType::BaseType input_type1,
  5041. SPIRType::BaseType input_type2)
  5042. {
  5043. auto &out_type = get<SPIRType>(result_type);
  5044. auto expected_type = out_type;
  5045. expected_type.basetype = input_type0;
  5046. string cast_op0 =
  5047. expression_type(op0).basetype != input_type0 ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
  5048. auto op1_expr = to_unpacked_expression(op1);
  5049. auto op2_expr = to_unpacked_expression(op2);
  5050. // Use value casts here instead. Input must be exactly int or uint, but SPIR-V might be 16-bit.
  5051. expected_type.basetype = input_type1;
  5052. expected_type.vecsize = 1;
  5053. string cast_op1 = expression_type(op1).basetype != input_type1 ?
  5054. join(type_to_glsl_constructor(expected_type), "(", op1_expr, ")") :
  5055. op1_expr;
  5056. expected_type.basetype = input_type2;
  5057. expected_type.vecsize = 1;
  5058. string cast_op2 = expression_type(op2).basetype != input_type2 ?
  5059. join(type_to_glsl_constructor(expected_type), "(", op2_expr, ")") :
  5060. op2_expr;
  5061. string expr;
  5062. if (out_type.basetype != expected_result_type)
  5063. {
  5064. expected_type.vecsize = out_type.vecsize;
  5065. expected_type.basetype = expected_result_type;
  5066. expr = bitcast_glsl_op(out_type, expected_type);
  5067. expr += '(';
  5068. expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
  5069. expr += ')';
  5070. }
  5071. else
  5072. {
  5073. expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
  5074. }
  5075. emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2));
  5076. inherit_expression_dependencies(result_id, op0);
  5077. inherit_expression_dependencies(result_id, op1);
  5078. inherit_expression_dependencies(result_id, op2);
  5079. }
  5080. void CompilerGLSL::emit_trinary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  5081. uint32_t op2, const char *op, SPIRType::BaseType input_type)
  5082. {
  5083. auto &out_type = get<SPIRType>(result_type);
  5084. auto expected_type = out_type;
  5085. expected_type.basetype = input_type;
  5086. string cast_op0 =
  5087. expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
  5088. string cast_op1 =
  5089. expression_type(op1).basetype != input_type ? bitcast_glsl(expected_type, op1) : to_unpacked_expression(op1);
  5090. string cast_op2 =
  5091. expression_type(op2).basetype != input_type ? bitcast_glsl(expected_type, op2) : to_unpacked_expression(op2);
  5092. string expr;
  5093. if (out_type.basetype != input_type)
  5094. {
  5095. expr = bitcast_glsl_op(out_type, expected_type);
  5096. expr += '(';
  5097. expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
  5098. expr += ')';
  5099. }
  5100. else
  5101. {
  5102. expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
  5103. }
  5104. emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2));
  5105. inherit_expression_dependencies(result_id, op0);
  5106. inherit_expression_dependencies(result_id, op1);
  5107. inherit_expression_dependencies(result_id, op2);
  5108. }
  5109. void CompilerGLSL::emit_binary_func_op_cast_clustered(uint32_t result_type, uint32_t result_id, uint32_t op0,
  5110. uint32_t op1, const char *op, SPIRType::BaseType input_type)
  5111. {
  5112. // Special purpose method for implementing clustered subgroup opcodes.
  5113. // Main difference is that op1 does not participate in any casting, it needs to be a literal.
  5114. auto &out_type = get<SPIRType>(result_type);
  5115. auto expected_type = out_type;
  5116. expected_type.basetype = input_type;
  5117. string cast_op0 =
  5118. expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
  5119. string expr;
  5120. if (out_type.basetype != input_type)
  5121. {
  5122. expr = bitcast_glsl_op(out_type, expected_type);
  5123. expr += '(';
  5124. expr += join(op, "(", cast_op0, ", ", to_expression(op1), ")");
  5125. expr += ')';
  5126. }
  5127. else
  5128. {
  5129. expr += join(op, "(", cast_op0, ", ", to_expression(op1), ")");
  5130. }
  5131. emit_op(result_type, result_id, expr, should_forward(op0));
  5132. inherit_expression_dependencies(result_id, op0);
  5133. }
  5134. void CompilerGLSL::emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  5135. const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
  5136. {
  5137. string cast_op0, cast_op1;
  5138. auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
  5139. auto &out_type = get<SPIRType>(result_type);
  5140. // Special case boolean outputs since relational opcodes output booleans instead of int/uint.
  5141. string expr;
  5142. if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
  5143. {
  5144. expected_type.basetype = input_type;
  5145. expr = bitcast_glsl_op(out_type, expected_type);
  5146. expr += '(';
  5147. expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
  5148. expr += ')';
  5149. }
  5150. else
  5151. {
  5152. expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
  5153. }
  5154. emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
  5155. inherit_expression_dependencies(result_id, op0);
  5156. inherit_expression_dependencies(result_id, op1);
  5157. }
  5158. void CompilerGLSL::emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  5159. uint32_t op2, const char *op)
  5160. {
  5161. bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2);
  5162. emit_op(result_type, result_id,
  5163. join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ",
  5164. to_unpacked_expression(op2), ")"),
  5165. forward);
  5166. inherit_expression_dependencies(result_id, op0);
  5167. inherit_expression_dependencies(result_id, op1);
  5168. inherit_expression_dependencies(result_id, op2);
  5169. }
  5170. void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  5171. uint32_t op2, uint32_t op3, const char *op)
  5172. {
  5173. bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3);
  5174. emit_op(result_type, result_id,
  5175. join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ",
  5176. to_unpacked_expression(op2), ", ", to_unpacked_expression(op3), ")"),
  5177. forward);
  5178. inherit_expression_dependencies(result_id, op0);
  5179. inherit_expression_dependencies(result_id, op1);
  5180. inherit_expression_dependencies(result_id, op2);
  5181. inherit_expression_dependencies(result_id, op3);
  5182. }
  5183. void CompilerGLSL::emit_bitfield_insert_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  5184. uint32_t op2, uint32_t op3, const char *op,
  5185. SPIRType::BaseType offset_count_type)
  5186. {
  5187. // Only need to cast offset/count arguments. Types of base/insert must be same as result type,
  5188. // and bitfieldInsert is sign invariant.
  5189. bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3);
  5190. auto op0_expr = to_unpacked_expression(op0);
  5191. auto op1_expr = to_unpacked_expression(op1);
  5192. auto op2_expr = to_unpacked_expression(op2);
  5193. auto op3_expr = to_unpacked_expression(op3);
  5194. SPIRType target_type;
  5195. target_type.vecsize = 1;
  5196. target_type.basetype = offset_count_type;
  5197. if (expression_type(op2).basetype != offset_count_type)
  5198. {
  5199. // Value-cast here. Input might be 16-bit. GLSL requires int.
  5200. op2_expr = join(type_to_glsl_constructor(target_type), "(", op2_expr, ")");
  5201. }
  5202. if (expression_type(op3).basetype != offset_count_type)
  5203. {
  5204. // Value-cast here. Input might be 16-bit. GLSL requires int.
  5205. op3_expr = join(type_to_glsl_constructor(target_type), "(", op3_expr, ")");
  5206. }
  5207. emit_op(result_type, result_id, join(op, "(", op0_expr, ", ", op1_expr, ", ", op2_expr, ", ", op3_expr, ")"),
  5208. forward);
  5209. inherit_expression_dependencies(result_id, op0);
  5210. inherit_expression_dependencies(result_id, op1);
  5211. inherit_expression_dependencies(result_id, op2);
  5212. inherit_expression_dependencies(result_id, op3);
  5213. }
  5214. string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t tex)
  5215. {
  5216. const char *type;
  5217. switch (imgtype.image.dim)
  5218. {
  5219. case spv::Dim1D:
  5220. type = (imgtype.image.arrayed && !options.es) ? "1DArray" : "1D";
  5221. break;
  5222. case spv::Dim2D:
  5223. type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D";
  5224. break;
  5225. case spv::Dim3D:
  5226. type = "3D";
  5227. break;
  5228. case spv::DimCube:
  5229. type = "Cube";
  5230. break;
  5231. case spv::DimRect:
  5232. type = "2DRect";
  5233. break;
  5234. case spv::DimBuffer:
  5235. type = "Buffer";
  5236. break;
  5237. case spv::DimSubpassData:
  5238. type = "2D";
  5239. break;
  5240. default:
  5241. type = "";
  5242. break;
  5243. }
  5244. // In legacy GLSL, an extension is required for textureLod in the fragment
  5245. // shader or textureGrad anywhere.
  5246. bool legacy_lod_ext = false;
  5247. auto &execution = get_entry_point();
  5248. if (op == "textureGrad" || op == "textureProjGrad" ||
  5249. ((op == "textureLod" || op == "textureProjLod") && execution.model != ExecutionModelVertex))
  5250. {
  5251. if (is_legacy_es())
  5252. {
  5253. legacy_lod_ext = true;
  5254. require_extension_internal("GL_EXT_shader_texture_lod");
  5255. }
  5256. else if (is_legacy_desktop())
  5257. require_extension_internal("GL_ARB_shader_texture_lod");
  5258. }
  5259. if (op == "textureLodOffset" || op == "textureProjLodOffset")
  5260. {
  5261. if (is_legacy_es())
  5262. SPIRV_CROSS_THROW(join(op, " not allowed in legacy ES"));
  5263. require_extension_internal("GL_EXT_gpu_shader4");
  5264. }
  5265. // GLES has very limited support for shadow samplers.
  5266. // Basically shadow2D and shadow2DProj work through EXT_shadow_samplers,
  5267. // everything else can just throw
  5268. bool is_comparison = image_is_comparison(imgtype, tex);
  5269. if (is_comparison && is_legacy_es())
  5270. {
  5271. if (op == "texture" || op == "textureProj")
  5272. require_extension_internal("GL_EXT_shadow_samplers");
  5273. else
  5274. SPIRV_CROSS_THROW(join(op, " not allowed on depth samplers in legacy ES"));
  5275. }
  5276. if (op == "textureSize")
  5277. {
  5278. if (is_legacy_es())
  5279. SPIRV_CROSS_THROW("textureSize not supported in legacy ES");
  5280. if (is_comparison)
  5281. SPIRV_CROSS_THROW("textureSize not supported on shadow sampler in legacy GLSL");
  5282. require_extension_internal("GL_EXT_gpu_shader4");
  5283. }
  5284. if (op == "texelFetch" && is_legacy_es())
  5285. SPIRV_CROSS_THROW("texelFetch not supported in legacy ES");
  5286. bool is_es_and_depth = is_legacy_es() && is_comparison;
  5287. std::string type_prefix = is_comparison ? "shadow" : "texture";
  5288. if (op == "texture")
  5289. return is_es_and_depth ? join(type_prefix, type, "EXT") : join(type_prefix, type);
  5290. else if (op == "textureLod")
  5291. return join(type_prefix, type, legacy_lod_ext ? "LodEXT" : "Lod");
  5292. else if (op == "textureProj")
  5293. return join(type_prefix, type, is_es_and_depth ? "ProjEXT" : "Proj");
  5294. else if (op == "textureGrad")
  5295. return join(type_prefix, type, is_legacy_es() ? "GradEXT" : is_legacy_desktop() ? "GradARB" : "Grad");
  5296. else if (op == "textureProjLod")
  5297. return join(type_prefix, type, legacy_lod_ext ? "ProjLodEXT" : "ProjLod");
  5298. else if (op == "textureLodOffset")
  5299. return join(type_prefix, type, "LodOffset");
  5300. else if (op == "textureProjGrad")
  5301. return join(type_prefix, type,
  5302. is_legacy_es() ? "ProjGradEXT" : is_legacy_desktop() ? "ProjGradARB" : "ProjGrad");
  5303. else if (op == "textureProjLodOffset")
  5304. return join(type_prefix, type, "ProjLodOffset");
  5305. else if (op == "textureSize")
  5306. return join("textureSize", type);
  5307. else if (op == "texelFetch")
  5308. return join("texelFetch", type);
  5309. else
  5310. {
  5311. SPIRV_CROSS_THROW(join("Unsupported legacy texture op: ", op));
  5312. }
  5313. }
  5314. bool CompilerGLSL::to_trivial_mix_op(const SPIRType &type, string &op, uint32_t left, uint32_t right, uint32_t lerp)
  5315. {
  5316. auto *cleft = maybe_get<SPIRConstant>(left);
  5317. auto *cright = maybe_get<SPIRConstant>(right);
  5318. auto &lerptype = expression_type(lerp);
  5319. // If our targets aren't constants, we cannot use construction.
  5320. if (!cleft || !cright)
  5321. return false;
  5322. // If our targets are spec constants, we cannot use construction.
  5323. if (cleft->specialization || cright->specialization)
  5324. return false;
  5325. // We can only use trivial construction if we have a scalar
  5326. // (should be possible to do it for vectors as well, but that is overkill for now).
  5327. if (lerptype.basetype != SPIRType::Boolean || lerptype.vecsize > 1)
  5328. return false;
  5329. // If our bool selects between 0 and 1, we can cast from bool instead, making our trivial constructor.
  5330. bool ret = false;
  5331. switch (type.basetype)
  5332. {
  5333. case SPIRType::Short:
  5334. case SPIRType::UShort:
  5335. ret = cleft->scalar_u16() == 0 && cright->scalar_u16() == 1;
  5336. break;
  5337. case SPIRType::Int:
  5338. case SPIRType::UInt:
  5339. ret = cleft->scalar() == 0 && cright->scalar() == 1;
  5340. break;
  5341. case SPIRType::Half:
  5342. ret = cleft->scalar_f16() == 0.0f && cright->scalar_f16() == 1.0f;
  5343. break;
  5344. case SPIRType::Float:
  5345. ret = cleft->scalar_f32() == 0.0f && cright->scalar_f32() == 1.0f;
  5346. break;
  5347. case SPIRType::Double:
  5348. ret = cleft->scalar_f64() == 0.0 && cright->scalar_f64() == 1.0;
  5349. break;
  5350. case SPIRType::Int64:
  5351. case SPIRType::UInt64:
  5352. ret = cleft->scalar_u64() == 0 && cright->scalar_u64() == 1;
  5353. break;
  5354. default:
  5355. break;
  5356. }
  5357. if (ret)
  5358. op = type_to_glsl_constructor(type);
  5359. return ret;
  5360. }
  5361. string CompilerGLSL::to_ternary_expression(const SPIRType &restype, uint32_t select, uint32_t true_value,
  5362. uint32_t false_value)
  5363. {
  5364. string expr;
  5365. auto &lerptype = expression_type(select);
  5366. if (lerptype.vecsize == 1)
  5367. expr = join(to_enclosed_expression(select), " ? ", to_enclosed_pointer_expression(true_value), " : ",
  5368. to_enclosed_pointer_expression(false_value));
  5369. else
  5370. {
  5371. auto swiz = [this](uint32_t expression, uint32_t i) { return to_extract_component_expression(expression, i); };
  5372. expr = type_to_glsl_constructor(restype);
  5373. expr += "(";
  5374. for (uint32_t i = 0; i < restype.vecsize; i++)
  5375. {
  5376. expr += swiz(select, i);
  5377. expr += " ? ";
  5378. expr += swiz(true_value, i);
  5379. expr += " : ";
  5380. expr += swiz(false_value, i);
  5381. if (i + 1 < restype.vecsize)
  5382. expr += ", ";
  5383. }
  5384. expr += ")";
  5385. }
  5386. return expr;
  5387. }
  5388. void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp)
  5389. {
  5390. auto &lerptype = expression_type(lerp);
  5391. auto &restype = get<SPIRType>(result_type);
  5392. // If this results in a variable pointer, assume it may be written through.
  5393. if (restype.pointer)
  5394. {
  5395. register_write(left);
  5396. register_write(right);
  5397. }
  5398. string mix_op;
  5399. bool has_boolean_mix = *backend.boolean_mix_function &&
  5400. ((options.es && options.version >= 310) || (!options.es && options.version >= 450));
  5401. bool trivial_mix = to_trivial_mix_op(restype, mix_op, left, right, lerp);
  5402. // Cannot use boolean mix when the lerp argument is just one boolean,
  5403. // fall back to regular trinary statements.
  5404. if (lerptype.vecsize == 1)
  5405. has_boolean_mix = false;
  5406. // If we can reduce the mix to a simple cast, do so.
  5407. // This helps for cases like int(bool), uint(bool) which is implemented with
  5408. // OpSelect bool 1 0.
  5409. if (trivial_mix)
  5410. {
  5411. emit_unary_func_op(result_type, id, lerp, mix_op.c_str());
  5412. }
  5413. else if (!has_boolean_mix && lerptype.basetype == SPIRType::Boolean)
  5414. {
  5415. // Boolean mix not supported on desktop without extension.
  5416. // Was added in OpenGL 4.5 with ES 3.1 compat.
  5417. //
  5418. // Could use GL_EXT_shader_integer_mix on desktop at least,
  5419. // but Apple doesn't support it. :(
  5420. // Just implement it as ternary expressions.
  5421. auto expr = to_ternary_expression(get<SPIRType>(result_type), lerp, right, left);
  5422. emit_op(result_type, id, expr, should_forward(left) && should_forward(right) && should_forward(lerp));
  5423. inherit_expression_dependencies(id, left);
  5424. inherit_expression_dependencies(id, right);
  5425. inherit_expression_dependencies(id, lerp);
  5426. }
  5427. else if (lerptype.basetype == SPIRType::Boolean)
  5428. emit_trinary_func_op(result_type, id, left, right, lerp, backend.boolean_mix_function);
  5429. else
  5430. emit_trinary_func_op(result_type, id, left, right, lerp, "mix");
  5431. }
  5432. string CompilerGLSL::to_combined_image_sampler(VariableID image_id, VariableID samp_id)
  5433. {
  5434. // Keep track of the array indices we have used to load the image.
  5435. // We'll need to use the same array index into the combined image sampler array.
  5436. auto image_expr = to_expression(image_id);
  5437. string array_expr;
  5438. auto array_index = image_expr.find_first_of('[');
  5439. if (array_index != string::npos)
  5440. array_expr = image_expr.substr(array_index, string::npos);
  5441. auto &args = current_function->arguments;
  5442. // For GLSL and ESSL targets, we must enumerate all possible combinations for sampler2D(texture2D, sampler) and redirect
  5443. // all possible combinations into new sampler2D uniforms.
  5444. auto *image = maybe_get_backing_variable(image_id);
  5445. auto *samp = maybe_get_backing_variable(samp_id);
  5446. if (image)
  5447. image_id = image->self;
  5448. if (samp)
  5449. samp_id = samp->self;
  5450. auto image_itr = find_if(begin(args), end(args),
  5451. [image_id](const SPIRFunction::Parameter &param) { return image_id == param.id; });
  5452. auto sampler_itr = find_if(begin(args), end(args),
  5453. [samp_id](const SPIRFunction::Parameter &param) { return samp_id == param.id; });
  5454. if (image_itr != end(args) || sampler_itr != end(args))
  5455. {
  5456. // If any parameter originates from a parameter, we will find it in our argument list.
  5457. bool global_image = image_itr == end(args);
  5458. bool global_sampler = sampler_itr == end(args);
  5459. VariableID iid = global_image ? image_id : VariableID(uint32_t(image_itr - begin(args)));
  5460. VariableID sid = global_sampler ? samp_id : VariableID(uint32_t(sampler_itr - begin(args)));
  5461. auto &combined = current_function->combined_parameters;
  5462. auto itr = find_if(begin(combined), end(combined), [=](const SPIRFunction::CombinedImageSamplerParameter &p) {
  5463. return p.global_image == global_image && p.global_sampler == global_sampler && p.image_id == iid &&
  5464. p.sampler_id == sid;
  5465. });
  5466. if (itr != end(combined))
  5467. return to_expression(itr->id) + array_expr;
  5468. else
  5469. {
  5470. SPIRV_CROSS_THROW("Cannot find mapping for combined sampler parameter, was "
  5471. "build_combined_image_samplers() used "
  5472. "before compile() was called?");
  5473. }
  5474. }
  5475. else
  5476. {
  5477. // For global sampler2D, look directly at the global remapping table.
  5478. auto &mapping = combined_image_samplers;
  5479. auto itr = find_if(begin(mapping), end(mapping), [image_id, samp_id](const CombinedImageSampler &combined) {
  5480. return combined.image_id == image_id && combined.sampler_id == samp_id;
  5481. });
  5482. if (itr != end(combined_image_samplers))
  5483. return to_expression(itr->combined_id) + array_expr;
  5484. else
  5485. {
  5486. SPIRV_CROSS_THROW("Cannot find mapping for combined sampler, was build_combined_image_samplers() used "
  5487. "before compile() was called?");
  5488. }
  5489. }
  5490. }
  5491. bool CompilerGLSL::is_supported_subgroup_op_in_opengl(spv::Op op)
  5492. {
  5493. switch (op)
  5494. {
  5495. case OpGroupNonUniformElect:
  5496. case OpGroupNonUniformBallot:
  5497. case OpGroupNonUniformBallotFindLSB:
  5498. case OpGroupNonUniformBallotFindMSB:
  5499. case OpGroupNonUniformBroadcast:
  5500. case OpGroupNonUniformBroadcastFirst:
  5501. case OpGroupNonUniformAll:
  5502. case OpGroupNonUniformAny:
  5503. case OpGroupNonUniformAllEqual:
  5504. case OpControlBarrier:
  5505. case OpMemoryBarrier:
  5506. case OpGroupNonUniformBallotBitCount:
  5507. case OpGroupNonUniformBallotBitExtract:
  5508. case OpGroupNonUniformInverseBallot:
  5509. return true;
  5510. default:
  5511. return false;
  5512. }
  5513. }
  5514. void CompilerGLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id)
  5515. {
  5516. if (options.vulkan_semantics && combined_image_samplers.empty())
  5517. {
  5518. emit_binary_func_op(result_type, result_id, image_id, samp_id,
  5519. type_to_glsl(get<SPIRType>(result_type), result_id).c_str());
  5520. }
  5521. else
  5522. {
  5523. // Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types.
  5524. emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true, true);
  5525. }
  5526. // Make sure to suppress usage tracking and any expression invalidation.
  5527. // It is illegal to create temporaries of opaque types.
  5528. forwarded_temporaries.erase(result_id);
  5529. }
  5530. static inline bool image_opcode_is_sample_no_dref(Op op)
  5531. {
  5532. switch (op)
  5533. {
  5534. case OpImageSampleExplicitLod:
  5535. case OpImageSampleImplicitLod:
  5536. case OpImageSampleProjExplicitLod:
  5537. case OpImageSampleProjImplicitLod:
  5538. case OpImageFetch:
  5539. case OpImageRead:
  5540. case OpImageSparseSampleExplicitLod:
  5541. case OpImageSparseSampleImplicitLod:
  5542. case OpImageSparseSampleProjExplicitLod:
  5543. case OpImageSparseSampleProjImplicitLod:
  5544. case OpImageSparseFetch:
  5545. case OpImageSparseRead:
  5546. return true;
  5547. default:
  5548. return false;
  5549. }
  5550. }
  5551. void CompilerGLSL::emit_sparse_feedback_temporaries(uint32_t result_type_id, uint32_t id, uint32_t &feedback_id,
  5552. uint32_t &texel_id)
  5553. {
  5554. // Need to allocate two temporaries.
  5555. if (options.es)
  5556. SPIRV_CROSS_THROW("Sparse texture feedback is not supported on ESSL.");
  5557. require_extension_internal("GL_ARB_sparse_texture2");
  5558. auto &temps = extra_sub_expressions[id];
  5559. if (temps == 0)
  5560. temps = ir.increase_bound_by(2);
  5561. feedback_id = temps + 0;
  5562. texel_id = temps + 1;
  5563. auto &return_type = get<SPIRType>(result_type_id);
  5564. if (return_type.basetype != SPIRType::Struct || return_type.member_types.size() != 2)
  5565. SPIRV_CROSS_THROW("Invalid return type for sparse feedback.");
  5566. emit_uninitialized_temporary(return_type.member_types[0], feedback_id);
  5567. emit_uninitialized_temporary(return_type.member_types[1], texel_id);
  5568. }
  5569. uint32_t CompilerGLSL::get_sparse_feedback_texel_id(uint32_t id) const
  5570. {
  5571. auto itr = extra_sub_expressions.find(id);
  5572. if (itr == extra_sub_expressions.end())
  5573. return 0;
  5574. else
  5575. return itr->second + 1;
  5576. }
  5577. void CompilerGLSL::emit_texture_op(const Instruction &i, bool sparse)
  5578. {
  5579. auto *ops = stream(i);
  5580. auto op = static_cast<Op>(i.op);
  5581. SmallVector<uint32_t> inherited_expressions;
  5582. uint32_t result_type_id = ops[0];
  5583. uint32_t id = ops[1];
  5584. auto &return_type = get<SPIRType>(result_type_id);
  5585. uint32_t sparse_code_id = 0;
  5586. uint32_t sparse_texel_id = 0;
  5587. if (sparse)
  5588. emit_sparse_feedback_temporaries(result_type_id, id, sparse_code_id, sparse_texel_id);
  5589. bool forward = false;
  5590. string expr = to_texture_op(i, sparse, &forward, inherited_expressions);
  5591. if (sparse)
  5592. {
  5593. statement(to_expression(sparse_code_id), " = ", expr, ";");
  5594. expr = join(type_to_glsl(return_type), "(", to_expression(sparse_code_id), ", ", to_expression(sparse_texel_id),
  5595. ")");
  5596. forward = true;
  5597. inherited_expressions.clear();
  5598. }
  5599. emit_op(result_type_id, id, expr, forward);
  5600. for (auto &inherit : inherited_expressions)
  5601. inherit_expression_dependencies(id, inherit);
  5602. // Do not register sparse ops as control dependent as they are always lowered to a temporary.
  5603. switch (op)
  5604. {
  5605. case OpImageSampleDrefImplicitLod:
  5606. case OpImageSampleImplicitLod:
  5607. case OpImageSampleProjImplicitLod:
  5608. case OpImageSampleProjDrefImplicitLod:
  5609. register_control_dependent_expression(id);
  5610. break;
  5611. default:
  5612. break;
  5613. }
  5614. }
  5615. std::string CompilerGLSL::to_texture_op(const Instruction &i, bool sparse, bool *forward,
  5616. SmallVector<uint32_t> &inherited_expressions)
  5617. {
  5618. auto *ops = stream(i);
  5619. auto op = static_cast<Op>(i.op);
  5620. uint32_t length = i.length;
  5621. uint32_t result_type_id = ops[0];
  5622. VariableID img = ops[2];
  5623. uint32_t coord = ops[3];
  5624. uint32_t dref = 0;
  5625. uint32_t comp = 0;
  5626. bool gather = false;
  5627. bool proj = false;
  5628. bool fetch = false;
  5629. bool nonuniform_expression = false;
  5630. const uint32_t *opt = nullptr;
  5631. auto &result_type = get<SPIRType>(result_type_id);
  5632. inherited_expressions.push_back(coord);
  5633. // Make sure non-uniform decoration is back-propagated to where it needs to be.
  5634. if (has_decoration(img, DecorationNonUniformEXT))
  5635. {
  5636. // In Vulkan GLSL, we cannot back-propgate nonuniform qualifiers if we
  5637. // use a combined image sampler constructor.
  5638. // We're only interested in back-propagating if we can trace back through access chains.
  5639. // If not, we will apply nonuniform to the sampled image expression itself.
  5640. auto *backing = maybe_get_backing_variable(img);
  5641. if (backing)
  5642. propagate_nonuniform_qualifier(img);
  5643. else
  5644. nonuniform_expression = true;
  5645. }
  5646. switch (op)
  5647. {
  5648. case OpImageSampleDrefImplicitLod:
  5649. case OpImageSampleDrefExplicitLod:
  5650. case OpImageSparseSampleDrefImplicitLod:
  5651. case OpImageSparseSampleDrefExplicitLod:
  5652. dref = ops[4];
  5653. opt = &ops[5];
  5654. length -= 5;
  5655. break;
  5656. case OpImageSampleProjDrefImplicitLod:
  5657. case OpImageSampleProjDrefExplicitLod:
  5658. case OpImageSparseSampleProjDrefImplicitLod:
  5659. case OpImageSparseSampleProjDrefExplicitLod:
  5660. dref = ops[4];
  5661. opt = &ops[5];
  5662. length -= 5;
  5663. proj = true;
  5664. break;
  5665. case OpImageDrefGather:
  5666. case OpImageSparseDrefGather:
  5667. dref = ops[4];
  5668. opt = &ops[5];
  5669. length -= 5;
  5670. gather = true;
  5671. if (options.es && options.version < 310)
  5672. SPIRV_CROSS_THROW("textureGather requires ESSL 310.");
  5673. else if (!options.es && options.version < 400)
  5674. SPIRV_CROSS_THROW("textureGather with depth compare requires GLSL 400.");
  5675. break;
  5676. case OpImageGather:
  5677. case OpImageSparseGather:
  5678. comp = ops[4];
  5679. opt = &ops[5];
  5680. length -= 5;
  5681. gather = true;
  5682. if (options.es && options.version < 310)
  5683. SPIRV_CROSS_THROW("textureGather requires ESSL 310.");
  5684. else if (!options.es && options.version < 400)
  5685. {
  5686. if (!expression_is_constant_null(comp))
  5687. SPIRV_CROSS_THROW("textureGather with component requires GLSL 400.");
  5688. require_extension_internal("GL_ARB_texture_gather");
  5689. }
  5690. break;
  5691. case OpImageFetch:
  5692. case OpImageSparseFetch:
  5693. case OpImageRead: // Reads == fetches in Metal (other langs will not get here)
  5694. opt = &ops[4];
  5695. length -= 4;
  5696. fetch = true;
  5697. break;
  5698. case OpImageSampleProjImplicitLod:
  5699. case OpImageSampleProjExplicitLod:
  5700. case OpImageSparseSampleProjImplicitLod:
  5701. case OpImageSparseSampleProjExplicitLod:
  5702. opt = &ops[4];
  5703. length -= 4;
  5704. proj = true;
  5705. break;
  5706. default:
  5707. opt = &ops[4];
  5708. length -= 4;
  5709. break;
  5710. }
  5711. // Bypass pointers because we need the real image struct
  5712. auto &type = expression_type(img);
  5713. auto &imgtype = get<SPIRType>(type.self);
  5714. uint32_t coord_components = 0;
  5715. switch (imgtype.image.dim)
  5716. {
  5717. case spv::Dim1D:
  5718. coord_components = 1;
  5719. break;
  5720. case spv::Dim2D:
  5721. coord_components = 2;
  5722. break;
  5723. case spv::Dim3D:
  5724. coord_components = 3;
  5725. break;
  5726. case spv::DimCube:
  5727. coord_components = 3;
  5728. break;
  5729. case spv::DimBuffer:
  5730. coord_components = 1;
  5731. break;
  5732. default:
  5733. coord_components = 2;
  5734. break;
  5735. }
  5736. if (dref)
  5737. inherited_expressions.push_back(dref);
  5738. if (proj)
  5739. coord_components++;
  5740. if (imgtype.image.arrayed)
  5741. coord_components++;
  5742. uint32_t bias = 0;
  5743. uint32_t lod = 0;
  5744. uint32_t grad_x = 0;
  5745. uint32_t grad_y = 0;
  5746. uint32_t coffset = 0;
  5747. uint32_t offset = 0;
  5748. uint32_t coffsets = 0;
  5749. uint32_t sample = 0;
  5750. uint32_t minlod = 0;
  5751. uint32_t flags = 0;
  5752. if (length)
  5753. {
  5754. flags = *opt++;
  5755. length--;
  5756. }
  5757. auto test = [&](uint32_t &v, uint32_t flag) {
  5758. if (length && (flags & flag))
  5759. {
  5760. v = *opt++;
  5761. inherited_expressions.push_back(v);
  5762. length--;
  5763. }
  5764. };
  5765. test(bias, ImageOperandsBiasMask);
  5766. test(lod, ImageOperandsLodMask);
  5767. test(grad_x, ImageOperandsGradMask);
  5768. test(grad_y, ImageOperandsGradMask);
  5769. test(coffset, ImageOperandsConstOffsetMask);
  5770. test(offset, ImageOperandsOffsetMask);
  5771. test(coffsets, ImageOperandsConstOffsetsMask);
  5772. test(sample, ImageOperandsSampleMask);
  5773. test(minlod, ImageOperandsMinLodMask);
  5774. TextureFunctionBaseArguments base_args = {};
  5775. base_args.img = img;
  5776. base_args.imgtype = &imgtype;
  5777. base_args.is_fetch = fetch != 0;
  5778. base_args.is_gather = gather != 0;
  5779. base_args.is_proj = proj != 0;
  5780. string expr;
  5781. TextureFunctionNameArguments name_args = {};
  5782. name_args.base = base_args;
  5783. name_args.has_array_offsets = coffsets != 0;
  5784. name_args.has_offset = coffset != 0 || offset != 0;
  5785. name_args.has_grad = grad_x != 0 || grad_y != 0;
  5786. name_args.has_dref = dref != 0;
  5787. name_args.is_sparse_feedback = sparse;
  5788. name_args.has_min_lod = minlod != 0;
  5789. name_args.lod = lod;
  5790. expr += to_function_name(name_args);
  5791. expr += "(";
  5792. uint32_t sparse_texel_id = 0;
  5793. if (sparse)
  5794. sparse_texel_id = get_sparse_feedback_texel_id(ops[1]);
  5795. TextureFunctionArguments args = {};
  5796. args.base = base_args;
  5797. args.coord = coord;
  5798. args.coord_components = coord_components;
  5799. args.dref = dref;
  5800. args.grad_x = grad_x;
  5801. args.grad_y = grad_y;
  5802. args.lod = lod;
  5803. args.coffset = coffset;
  5804. args.offset = offset;
  5805. args.bias = bias;
  5806. args.component = comp;
  5807. args.sample = sample;
  5808. args.sparse_texel = sparse_texel_id;
  5809. args.min_lod = minlod;
  5810. args.nonuniform_expression = nonuniform_expression;
  5811. expr += to_function_args(args, forward);
  5812. expr += ")";
  5813. // texture(samplerXShadow) returns float. shadowX() returns vec4. Swizzle here.
  5814. if (is_legacy() && image_is_comparison(imgtype, img))
  5815. expr += ".r";
  5816. // Sampling from a texture which was deduced to be a depth image, might actually return 1 component here.
  5817. // Remap back to 4 components as sampling opcodes expect.
  5818. if (backend.comparison_image_samples_scalar && image_opcode_is_sample_no_dref(op))
  5819. {
  5820. bool image_is_depth = false;
  5821. const auto *combined = maybe_get<SPIRCombinedImageSampler>(img);
  5822. VariableID image_id = combined ? combined->image : img;
  5823. if (combined && image_is_comparison(imgtype, combined->image))
  5824. image_is_depth = true;
  5825. else if (image_is_comparison(imgtype, img))
  5826. image_is_depth = true;
  5827. // We must also check the backing variable for the image.
  5828. // We might have loaded an OpImage, and used that handle for two different purposes.
  5829. // Once with comparison, once without.
  5830. auto *image_variable = maybe_get_backing_variable(image_id);
  5831. if (image_variable && image_is_comparison(get<SPIRType>(image_variable->basetype), image_variable->self))
  5832. image_is_depth = true;
  5833. if (image_is_depth)
  5834. expr = remap_swizzle(result_type, 1, expr);
  5835. }
  5836. if (!sparse && !backend.support_small_type_sampling_result && result_type.width < 32)
  5837. {
  5838. // Just value cast (narrowing) to expected type since we cannot rely on narrowing to work automatically.
  5839. // Hopefully compiler picks this up and converts the texturing instruction to the appropriate precision.
  5840. expr = join(type_to_glsl_constructor(result_type), "(", expr, ")");
  5841. }
  5842. // Deals with reads from MSL. We might need to downconvert to fewer components.
  5843. if (op == OpImageRead)
  5844. expr = remap_swizzle(result_type, 4, expr);
  5845. return expr;
  5846. }
  5847. bool CompilerGLSL::expression_is_constant_null(uint32_t id) const
  5848. {
  5849. auto *c = maybe_get<SPIRConstant>(id);
  5850. if (!c)
  5851. return false;
  5852. return c->constant_is_null();
  5853. }
  5854. bool CompilerGLSL::expression_is_non_value_type_array(uint32_t ptr)
  5855. {
  5856. auto &type = expression_type(ptr);
  5857. if (type.array.empty())
  5858. return false;
  5859. if (!backend.array_is_value_type)
  5860. return true;
  5861. auto *var = maybe_get_backing_variable(ptr);
  5862. if (!var)
  5863. return false;
  5864. auto &backed_type = get<SPIRType>(var->basetype);
  5865. return !backend.buffer_offset_array_is_value_type && backed_type.basetype == SPIRType::Struct &&
  5866. has_member_decoration(backed_type.self, 0, DecorationOffset);
  5867. }
  5868. // Returns the function name for a texture sampling function for the specified image and sampling characteristics.
  5869. // For some subclasses, the function is a method on the specified image.
  5870. string CompilerGLSL::to_function_name(const TextureFunctionNameArguments &args)
  5871. {
  5872. if (args.has_min_lod)
  5873. {
  5874. if (options.es)
  5875. SPIRV_CROSS_THROW("Sparse residency is not supported in ESSL.");
  5876. require_extension_internal("GL_ARB_sparse_texture_clamp");
  5877. }
  5878. string fname;
  5879. auto &imgtype = *args.base.imgtype;
  5880. VariableID tex = args.base.img;
  5881. // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
  5882. // To emulate this, we will have to use textureGrad with a constant gradient of 0.
  5883. // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
  5884. // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
  5885. bool workaround_lod_array_shadow_as_grad = false;
  5886. if (((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) &&
  5887. image_is_comparison(imgtype, tex) && args.lod)
  5888. {
  5889. if (!expression_is_constant_null(args.lod))
  5890. {
  5891. SPIRV_CROSS_THROW("textureLod on sampler2DArrayShadow is not constant 0.0. This cannot be "
  5892. "expressed in GLSL.");
  5893. }
  5894. workaround_lod_array_shadow_as_grad = true;
  5895. }
  5896. if (args.is_sparse_feedback)
  5897. fname += "sparse";
  5898. if (args.base.is_fetch)
  5899. fname += args.is_sparse_feedback ? "TexelFetch" : "texelFetch";
  5900. else
  5901. {
  5902. fname += args.is_sparse_feedback ? "Texture" : "texture";
  5903. if (args.base.is_gather)
  5904. fname += "Gather";
  5905. if (args.has_array_offsets)
  5906. fname += "Offsets";
  5907. if (args.base.is_proj)
  5908. fname += "Proj";
  5909. if (args.has_grad || workaround_lod_array_shadow_as_grad)
  5910. fname += "Grad";
  5911. if (args.lod != 0 && !workaround_lod_array_shadow_as_grad)
  5912. fname += "Lod";
  5913. }
  5914. if (args.has_offset)
  5915. fname += "Offset";
  5916. if (args.has_min_lod)
  5917. fname += "Clamp";
  5918. if (args.is_sparse_feedback || args.has_min_lod)
  5919. fname += "ARB";
  5920. return (is_legacy() && !args.base.is_gather) ? legacy_tex_op(fname, imgtype, tex) : fname;
  5921. }
  5922. std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id)
  5923. {
  5924. auto *var = maybe_get_backing_variable(id);
  5925. // If we are fetching from a plain OpTypeImage, we must combine with a dummy sampler in GLSL.
  5926. // In Vulkan GLSL, we can make use of the newer GL_EXT_samplerless_texture_functions.
  5927. if (var)
  5928. {
  5929. auto &type = get<SPIRType>(var->basetype);
  5930. if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer)
  5931. {
  5932. if (options.vulkan_semantics)
  5933. {
  5934. if (dummy_sampler_id)
  5935. {
  5936. // Don't need to consider Shadow state since the dummy sampler is always non-shadow.
  5937. auto sampled_type = type;
  5938. sampled_type.basetype = SPIRType::SampledImage;
  5939. return join(type_to_glsl(sampled_type), "(", to_expression(id), ", ",
  5940. to_expression(dummy_sampler_id), ")");
  5941. }
  5942. else
  5943. {
  5944. // Newer glslang supports this extension to deal with texture2D as argument to texture functions.
  5945. require_extension_internal("GL_EXT_samplerless_texture_functions");
  5946. }
  5947. }
  5948. else
  5949. {
  5950. if (!dummy_sampler_id)
  5951. SPIRV_CROSS_THROW("Cannot find dummy sampler ID. Was "
  5952. "build_dummy_sampler_for_combined_images() called?");
  5953. return to_combined_image_sampler(id, dummy_sampler_id);
  5954. }
  5955. }
  5956. }
  5957. return to_expression(id);
  5958. }
  5959. // Returns the function args for a texture sampling function for the specified image and sampling characteristics.
  5960. string CompilerGLSL::to_function_args(const TextureFunctionArguments &args, bool *p_forward)
  5961. {
  5962. VariableID img = args.base.img;
  5963. auto &imgtype = *args.base.imgtype;
  5964. string farg_str;
  5965. if (args.base.is_fetch)
  5966. farg_str = convert_separate_image_to_expression(img);
  5967. else
  5968. farg_str = to_expression(img);
  5969. if (args.nonuniform_expression && farg_str.find_first_of('[') != string::npos)
  5970. {
  5971. // Only emit nonuniformEXT() wrapper if the underlying expression is arrayed in some way.
  5972. farg_str = join(backend.nonuniform_qualifier, "(", farg_str, ")");
  5973. }
  5974. bool swizz_func = backend.swizzle_is_function;
  5975. auto swizzle = [swizz_func](uint32_t comps, uint32_t in_comps) -> const char * {
  5976. if (comps == in_comps)
  5977. return "";
  5978. switch (comps)
  5979. {
  5980. case 1:
  5981. return ".x";
  5982. case 2:
  5983. return swizz_func ? ".xy()" : ".xy";
  5984. case 3:
  5985. return swizz_func ? ".xyz()" : ".xyz";
  5986. default:
  5987. return "";
  5988. }
  5989. };
  5990. bool forward = should_forward(args.coord);
  5991. // The IR can give us more components than we need, so chop them off as needed.
  5992. auto swizzle_expr = swizzle(args.coord_components, expression_type(args.coord).vecsize);
  5993. // Only enclose the UV expression if needed.
  5994. auto coord_expr =
  5995. (*swizzle_expr == '\0') ? to_expression(args.coord) : (to_enclosed_expression(args.coord) + swizzle_expr);
  5996. // texelFetch only takes int, not uint.
  5997. auto &coord_type = expression_type(args.coord);
  5998. if (coord_type.basetype == SPIRType::UInt)
  5999. {
  6000. auto expected_type = coord_type;
  6001. expected_type.vecsize = args.coord_components;
  6002. expected_type.basetype = SPIRType::Int;
  6003. coord_expr = bitcast_expression(expected_type, coord_type.basetype, coord_expr);
  6004. }
  6005. // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
  6006. // To emulate this, we will have to use textureGrad with a constant gradient of 0.
  6007. // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
  6008. // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
  6009. bool workaround_lod_array_shadow_as_grad =
  6010. ((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) &&
  6011. image_is_comparison(imgtype, img) && args.lod != 0;
  6012. if (args.dref)
  6013. {
  6014. forward = forward && should_forward(args.dref);
  6015. // SPIR-V splits dref and coordinate.
  6016. if (args.base.is_gather ||
  6017. args.coord_components == 4) // GLSL also splits the arguments in two. Same for textureGather.
  6018. {
  6019. farg_str += ", ";
  6020. farg_str += to_expression(args.coord);
  6021. farg_str += ", ";
  6022. farg_str += to_expression(args.dref);
  6023. }
  6024. else if (args.base.is_proj)
  6025. {
  6026. // Have to reshuffle so we get vec4(coord, dref, proj), special case.
  6027. // Other shading languages splits up the arguments for coord and compare value like SPIR-V.
  6028. // The coordinate type for textureProj shadow is always vec4 even for sampler1DShadow.
  6029. farg_str += ", vec4(";
  6030. if (imgtype.image.dim == Dim1D)
  6031. {
  6032. // Could reuse coord_expr, but we will mess up the temporary usage checking.
  6033. farg_str += to_enclosed_expression(args.coord) + ".x";
  6034. farg_str += ", ";
  6035. farg_str += "0.0, ";
  6036. farg_str += to_expression(args.dref);
  6037. farg_str += ", ";
  6038. farg_str += to_enclosed_expression(args.coord) + ".y)";
  6039. }
  6040. else if (imgtype.image.dim == Dim2D)
  6041. {
  6042. // Could reuse coord_expr, but we will mess up the temporary usage checking.
  6043. farg_str += to_enclosed_expression(args.coord) + (swizz_func ? ".xy()" : ".xy");
  6044. farg_str += ", ";
  6045. farg_str += to_expression(args.dref);
  6046. farg_str += ", ";
  6047. farg_str += to_enclosed_expression(args.coord) + ".z)";
  6048. }
  6049. else
  6050. SPIRV_CROSS_THROW("Invalid type for textureProj with shadow.");
  6051. }
  6052. else
  6053. {
  6054. // Create a composite which merges coord/dref into a single vector.
  6055. auto type = expression_type(args.coord);
  6056. type.vecsize = args.coord_components + 1;
  6057. farg_str += ", ";
  6058. farg_str += type_to_glsl_constructor(type);
  6059. farg_str += "(";
  6060. farg_str += coord_expr;
  6061. farg_str += ", ";
  6062. farg_str += to_expression(args.dref);
  6063. farg_str += ")";
  6064. }
  6065. }
  6066. else
  6067. {
  6068. farg_str += ", ";
  6069. farg_str += coord_expr;
  6070. }
  6071. if (args.grad_x || args.grad_y)
  6072. {
  6073. forward = forward && should_forward(args.grad_x);
  6074. forward = forward && should_forward(args.grad_y);
  6075. farg_str += ", ";
  6076. farg_str += to_expression(args.grad_x);
  6077. farg_str += ", ";
  6078. farg_str += to_expression(args.grad_y);
  6079. }
  6080. if (args.lod)
  6081. {
  6082. if (workaround_lod_array_shadow_as_grad)
  6083. {
  6084. // Implement textureGrad() instead. LOD == 0.0 is implemented as gradient of 0.0.
  6085. // Implementing this as plain texture() is not safe on some implementations.
  6086. if (imgtype.image.dim == Dim2D)
  6087. farg_str += ", vec2(0.0), vec2(0.0)";
  6088. else if (imgtype.image.dim == DimCube)
  6089. farg_str += ", vec3(0.0), vec3(0.0)";
  6090. }
  6091. else
  6092. {
  6093. forward = forward && should_forward(args.lod);
  6094. farg_str += ", ";
  6095. auto &lod_expr_type = expression_type(args.lod);
  6096. // Lod expression for TexelFetch in GLSL must be int, and only int.
  6097. if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms &&
  6098. lod_expr_type.basetype != SPIRType::Int)
  6099. {
  6100. farg_str += join("int(", to_expression(args.lod), ")");
  6101. }
  6102. else
  6103. {
  6104. farg_str += to_expression(args.lod);
  6105. }
  6106. }
  6107. }
  6108. else if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms)
  6109. {
  6110. // Lod argument is optional in OpImageFetch, but we require a LOD value, pick 0 as the default.
  6111. farg_str += ", 0";
  6112. }
  6113. if (args.coffset)
  6114. {
  6115. forward = forward && should_forward(args.coffset);
  6116. farg_str += ", ";
  6117. farg_str += to_expression(args.coffset);
  6118. }
  6119. else if (args.offset)
  6120. {
  6121. forward = forward && should_forward(args.offset);
  6122. farg_str += ", ";
  6123. farg_str += to_expression(args.offset);
  6124. }
  6125. if (args.sample)
  6126. {
  6127. farg_str += ", ";
  6128. farg_str += to_expression(args.sample);
  6129. }
  6130. if (args.min_lod)
  6131. {
  6132. farg_str += ", ";
  6133. farg_str += to_expression(args.min_lod);
  6134. }
  6135. if (args.sparse_texel)
  6136. {
  6137. // Sparse texel output parameter comes after everything else, except it's before the optional, component/bias arguments.
  6138. farg_str += ", ";
  6139. farg_str += to_expression(args.sparse_texel);
  6140. }
  6141. if (args.bias)
  6142. {
  6143. forward = forward && should_forward(args.bias);
  6144. farg_str += ", ";
  6145. farg_str += to_expression(args.bias);
  6146. }
  6147. if (args.component && !expression_is_constant_null(args.component))
  6148. {
  6149. forward = forward && should_forward(args.component);
  6150. farg_str += ", ";
  6151. farg_str += to_expression(args.component);
  6152. }
  6153. *p_forward = forward;
  6154. return farg_str;
  6155. }
  6156. void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t length)
  6157. {
  6158. auto op = static_cast<GLSLstd450>(eop);
  6159. if (is_legacy() && is_unsigned_glsl_opcode(op))
  6160. SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy GLSL targets.");
  6161. // If we need to do implicit bitcasts, make sure we do it with the correct type.
  6162. uint32_t integer_width = get_integer_width_for_glsl_instruction(op, args, length);
  6163. auto int_type = to_signed_basetype(integer_width);
  6164. auto uint_type = to_unsigned_basetype(integer_width);
  6165. switch (op)
  6166. {
  6167. // FP fiddling
  6168. case GLSLstd450Round:
  6169. if (!is_legacy())
  6170. emit_unary_func_op(result_type, id, args[0], "round");
  6171. else
  6172. {
  6173. auto op0 = to_enclosed_expression(args[0]);
  6174. auto &op0_type = expression_type(args[0]);
  6175. auto expr = join("floor(", op0, " + ", type_to_glsl_constructor(op0_type), "(0.5))");
  6176. bool forward = should_forward(args[0]);
  6177. emit_op(result_type, id, expr, forward);
  6178. inherit_expression_dependencies(id, args[0]);
  6179. }
  6180. break;
  6181. case GLSLstd450RoundEven:
  6182. if (!is_legacy())
  6183. emit_unary_func_op(result_type, id, args[0], "roundEven");
  6184. else if (!options.es)
  6185. {
  6186. // This extension provides round() with round-to-even semantics.
  6187. require_extension_internal("GL_EXT_gpu_shader4");
  6188. emit_unary_func_op(result_type, id, args[0], "round");
  6189. }
  6190. else
  6191. SPIRV_CROSS_THROW("roundEven supported only in ESSL 300.");
  6192. break;
  6193. case GLSLstd450Trunc:
  6194. emit_unary_func_op(result_type, id, args[0], "trunc");
  6195. break;
  6196. case GLSLstd450SAbs:
  6197. emit_unary_func_op_cast(result_type, id, args[0], "abs", int_type, int_type);
  6198. break;
  6199. case GLSLstd450FAbs:
  6200. emit_unary_func_op(result_type, id, args[0], "abs");
  6201. break;
  6202. case GLSLstd450SSign:
  6203. emit_unary_func_op_cast(result_type, id, args[0], "sign", int_type, int_type);
  6204. break;
  6205. case GLSLstd450FSign:
  6206. emit_unary_func_op(result_type, id, args[0], "sign");
  6207. break;
  6208. case GLSLstd450Floor:
  6209. emit_unary_func_op(result_type, id, args[0], "floor");
  6210. break;
  6211. case GLSLstd450Ceil:
  6212. emit_unary_func_op(result_type, id, args[0], "ceil");
  6213. break;
  6214. case GLSLstd450Fract:
  6215. emit_unary_func_op(result_type, id, args[0], "fract");
  6216. break;
  6217. case GLSLstd450Radians:
  6218. emit_unary_func_op(result_type, id, args[0], "radians");
  6219. break;
  6220. case GLSLstd450Degrees:
  6221. emit_unary_func_op(result_type, id, args[0], "degrees");
  6222. break;
  6223. case GLSLstd450Fma:
  6224. if ((!options.es && options.version < 400) || (options.es && options.version < 320))
  6225. {
  6226. auto expr = join(to_enclosed_expression(args[0]), " * ", to_enclosed_expression(args[1]), " + ",
  6227. to_enclosed_expression(args[2]));
  6228. emit_op(result_type, id, expr,
  6229. should_forward(args[0]) && should_forward(args[1]) && should_forward(args[2]));
  6230. for (uint32_t i = 0; i < 3; i++)
  6231. inherit_expression_dependencies(id, args[i]);
  6232. }
  6233. else
  6234. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "fma");
  6235. break;
  6236. case GLSLstd450Modf:
  6237. register_call_out_argument(args[1]);
  6238. forced_temporaries.insert(id);
  6239. emit_binary_func_op(result_type, id, args[0], args[1], "modf");
  6240. break;
  6241. case GLSLstd450ModfStruct:
  6242. {
  6243. auto &type = get<SPIRType>(result_type);
  6244. emit_uninitialized_temporary_expression(result_type, id);
  6245. statement(to_expression(id), ".", to_member_name(type, 0), " = ", "modf(", to_expression(args[0]), ", ",
  6246. to_expression(id), ".", to_member_name(type, 1), ");");
  6247. break;
  6248. }
  6249. // Minmax
  6250. case GLSLstd450UMin:
  6251. emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", uint_type, false);
  6252. break;
  6253. case GLSLstd450SMin:
  6254. emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", int_type, false);
  6255. break;
  6256. case GLSLstd450FMin:
  6257. emit_binary_func_op(result_type, id, args[0], args[1], "min");
  6258. break;
  6259. case GLSLstd450FMax:
  6260. emit_binary_func_op(result_type, id, args[0], args[1], "max");
  6261. break;
  6262. case GLSLstd450UMax:
  6263. emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", uint_type, false);
  6264. break;
  6265. case GLSLstd450SMax:
  6266. emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", int_type, false);
  6267. break;
  6268. case GLSLstd450FClamp:
  6269. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp");
  6270. break;
  6271. case GLSLstd450UClamp:
  6272. emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", uint_type);
  6273. break;
  6274. case GLSLstd450SClamp:
  6275. emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", int_type);
  6276. break;
  6277. // Trig
  6278. case GLSLstd450Sin:
  6279. emit_unary_func_op(result_type, id, args[0], "sin");
  6280. break;
  6281. case GLSLstd450Cos:
  6282. emit_unary_func_op(result_type, id, args[0], "cos");
  6283. break;
  6284. case GLSLstd450Tan:
  6285. emit_unary_func_op(result_type, id, args[0], "tan");
  6286. break;
  6287. case GLSLstd450Asin:
  6288. emit_unary_func_op(result_type, id, args[0], "asin");
  6289. break;
  6290. case GLSLstd450Acos:
  6291. emit_unary_func_op(result_type, id, args[0], "acos");
  6292. break;
  6293. case GLSLstd450Atan:
  6294. emit_unary_func_op(result_type, id, args[0], "atan");
  6295. break;
  6296. case GLSLstd450Sinh:
  6297. emit_unary_func_op(result_type, id, args[0], "sinh");
  6298. break;
  6299. case GLSLstd450Cosh:
  6300. emit_unary_func_op(result_type, id, args[0], "cosh");
  6301. break;
  6302. case GLSLstd450Tanh:
  6303. emit_unary_func_op(result_type, id, args[0], "tanh");
  6304. break;
  6305. case GLSLstd450Asinh:
  6306. emit_unary_func_op(result_type, id, args[0], "asinh");
  6307. break;
  6308. case GLSLstd450Acosh:
  6309. emit_unary_func_op(result_type, id, args[0], "acosh");
  6310. break;
  6311. case GLSLstd450Atanh:
  6312. emit_unary_func_op(result_type, id, args[0], "atanh");
  6313. break;
  6314. case GLSLstd450Atan2:
  6315. emit_binary_func_op(result_type, id, args[0], args[1], "atan");
  6316. break;
  6317. // Exponentials
  6318. case GLSLstd450Pow:
  6319. emit_binary_func_op(result_type, id, args[0], args[1], "pow");
  6320. break;
  6321. case GLSLstd450Exp:
  6322. emit_unary_func_op(result_type, id, args[0], "exp");
  6323. break;
  6324. case GLSLstd450Log:
  6325. emit_unary_func_op(result_type, id, args[0], "log");
  6326. break;
  6327. case GLSLstd450Exp2:
  6328. emit_unary_func_op(result_type, id, args[0], "exp2");
  6329. break;
  6330. case GLSLstd450Log2:
  6331. emit_unary_func_op(result_type, id, args[0], "log2");
  6332. break;
  6333. case GLSLstd450Sqrt:
  6334. emit_unary_func_op(result_type, id, args[0], "sqrt");
  6335. break;
  6336. case GLSLstd450InverseSqrt:
  6337. emit_unary_func_op(result_type, id, args[0], "inversesqrt");
  6338. break;
  6339. // Matrix math
  6340. case GLSLstd450Determinant:
  6341. emit_unary_func_op(result_type, id, args[0], "determinant");
  6342. break;
  6343. case GLSLstd450MatrixInverse:
  6344. emit_unary_func_op(result_type, id, args[0], "inverse");
  6345. break;
  6346. // Lerping
  6347. case GLSLstd450FMix:
  6348. case GLSLstd450IMix:
  6349. {
  6350. emit_mix_op(result_type, id, args[0], args[1], args[2]);
  6351. break;
  6352. }
  6353. case GLSLstd450Step:
  6354. emit_binary_func_op(result_type, id, args[0], args[1], "step");
  6355. break;
  6356. case GLSLstd450SmoothStep:
  6357. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "smoothstep");
  6358. break;
  6359. // Packing
  6360. case GLSLstd450Frexp:
  6361. register_call_out_argument(args[1]);
  6362. forced_temporaries.insert(id);
  6363. emit_binary_func_op(result_type, id, args[0], args[1], "frexp");
  6364. break;
  6365. case GLSLstd450FrexpStruct:
  6366. {
  6367. auto &type = get<SPIRType>(result_type);
  6368. emit_uninitialized_temporary_expression(result_type, id);
  6369. statement(to_expression(id), ".", to_member_name(type, 0), " = ", "frexp(", to_expression(args[0]), ", ",
  6370. to_expression(id), ".", to_member_name(type, 1), ");");
  6371. break;
  6372. }
  6373. case GLSLstd450Ldexp:
  6374. {
  6375. bool forward = should_forward(args[0]) && should_forward(args[1]);
  6376. auto op0 = to_unpacked_expression(args[0]);
  6377. auto op1 = to_unpacked_expression(args[1]);
  6378. auto &op1_type = expression_type(args[1]);
  6379. if (op1_type.basetype != SPIRType::Int)
  6380. {
  6381. // Need a value cast here.
  6382. auto target_type = op1_type;
  6383. target_type.basetype = SPIRType::Int;
  6384. op1 = join(type_to_glsl_constructor(target_type), "(", op1, ")");
  6385. }
  6386. auto expr = join("ldexp(", op0, ", ", op1, ")");
  6387. emit_op(result_type, id, expr, forward);
  6388. inherit_expression_dependencies(id, args[0]);
  6389. inherit_expression_dependencies(id, args[1]);
  6390. break;
  6391. }
  6392. case GLSLstd450PackSnorm4x8:
  6393. emit_unary_func_op(result_type, id, args[0], "packSnorm4x8");
  6394. break;
  6395. case GLSLstd450PackUnorm4x8:
  6396. emit_unary_func_op(result_type, id, args[0], "packUnorm4x8");
  6397. break;
  6398. case GLSLstd450PackSnorm2x16:
  6399. emit_unary_func_op(result_type, id, args[0], "packSnorm2x16");
  6400. break;
  6401. case GLSLstd450PackUnorm2x16:
  6402. emit_unary_func_op(result_type, id, args[0], "packUnorm2x16");
  6403. break;
  6404. case GLSLstd450PackHalf2x16:
  6405. emit_unary_func_op(result_type, id, args[0], "packHalf2x16");
  6406. break;
  6407. case GLSLstd450UnpackSnorm4x8:
  6408. emit_unary_func_op(result_type, id, args[0], "unpackSnorm4x8");
  6409. break;
  6410. case GLSLstd450UnpackUnorm4x8:
  6411. emit_unary_func_op(result_type, id, args[0], "unpackUnorm4x8");
  6412. break;
  6413. case GLSLstd450UnpackSnorm2x16:
  6414. emit_unary_func_op(result_type, id, args[0], "unpackSnorm2x16");
  6415. break;
  6416. case GLSLstd450UnpackUnorm2x16:
  6417. emit_unary_func_op(result_type, id, args[0], "unpackUnorm2x16");
  6418. break;
  6419. case GLSLstd450UnpackHalf2x16:
  6420. emit_unary_func_op(result_type, id, args[0], "unpackHalf2x16");
  6421. break;
  6422. case GLSLstd450PackDouble2x32:
  6423. emit_unary_func_op(result_type, id, args[0], "packDouble2x32");
  6424. break;
  6425. case GLSLstd450UnpackDouble2x32:
  6426. emit_unary_func_op(result_type, id, args[0], "unpackDouble2x32");
  6427. break;
  6428. // Vector math
  6429. case GLSLstd450Length:
  6430. emit_unary_func_op(result_type, id, args[0], "length");
  6431. break;
  6432. case GLSLstd450Distance:
  6433. emit_binary_func_op(result_type, id, args[0], args[1], "distance");
  6434. break;
  6435. case GLSLstd450Cross:
  6436. emit_binary_func_op(result_type, id, args[0], args[1], "cross");
  6437. break;
  6438. case GLSLstd450Normalize:
  6439. emit_unary_func_op(result_type, id, args[0], "normalize");
  6440. break;
  6441. case GLSLstd450FaceForward:
  6442. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "faceforward");
  6443. break;
  6444. case GLSLstd450Reflect:
  6445. emit_binary_func_op(result_type, id, args[0], args[1], "reflect");
  6446. break;
  6447. case GLSLstd450Refract:
  6448. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "refract");
  6449. break;
  6450. // Bit-fiddling
  6451. case GLSLstd450FindILsb:
  6452. // findLSB always returns int.
  6453. emit_unary_func_op_cast(result_type, id, args[0], "findLSB", expression_type(args[0]).basetype, int_type);
  6454. break;
  6455. case GLSLstd450FindSMsb:
  6456. emit_unary_func_op_cast(result_type, id, args[0], "findMSB", int_type, int_type);
  6457. break;
  6458. case GLSLstd450FindUMsb:
  6459. emit_unary_func_op_cast(result_type, id, args[0], "findMSB", uint_type,
  6460. int_type); // findMSB always returns int.
  6461. break;
  6462. // Multisampled varying
  6463. case GLSLstd450InterpolateAtCentroid:
  6464. emit_unary_func_op(result_type, id, args[0], "interpolateAtCentroid");
  6465. break;
  6466. case GLSLstd450InterpolateAtSample:
  6467. emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtSample");
  6468. break;
  6469. case GLSLstd450InterpolateAtOffset:
  6470. emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtOffset");
  6471. break;
  6472. // Panos: Treat NMin as FMin because I hate these isnan
  6473. #if 0
  6474. case GLSLstd450NMin:
  6475. case GLSLstd450NMax:
  6476. {
  6477. emit_nminmax_op(result_type, id, args[0], args[1], op);
  6478. break;
  6479. }
  6480. #else
  6481. case GLSLstd450NMin:
  6482. emit_binary_func_op(result_type, id, args[0], args[1], "min");
  6483. break;
  6484. case GLSLstd450NMax:
  6485. emit_binary_func_op(result_type, id, args[0], args[1], "max");
  6486. break;
  6487. #endif
  6488. case GLSLstd450NClamp:
  6489. {
  6490. // Make sure we have a unique ID here to avoid aliasing the extra sub-expressions between clamp and NMin sub-op.
  6491. // IDs cannot exceed 24 bits, so we can make use of the higher bits for some unique flags.
  6492. uint32_t &max_id = extra_sub_expressions[id | 0x80000000u];
  6493. if (!max_id)
  6494. max_id = ir.increase_bound_by(1);
  6495. // Inherit precision qualifiers.
  6496. ir.meta[max_id] = ir.meta[id];
  6497. emit_nminmax_op(result_type, max_id, args[0], args[1], GLSLstd450NMax);
  6498. emit_nminmax_op(result_type, id, max_id, args[2], GLSLstd450NMin);
  6499. break;
  6500. }
  6501. default:
  6502. statement("// unimplemented GLSL op ", eop);
  6503. break;
  6504. }
  6505. }
  6506. void CompilerGLSL::emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op)
  6507. {
  6508. // Need to emulate this call.
  6509. uint32_t &ids = extra_sub_expressions[id];
  6510. if (!ids)
  6511. {
  6512. ids = ir.increase_bound_by(5);
  6513. auto btype = get<SPIRType>(result_type);
  6514. btype.basetype = SPIRType::Boolean;
  6515. set<SPIRType>(ids, btype);
  6516. }
  6517. uint32_t btype_id = ids + 0;
  6518. uint32_t left_nan_id = ids + 1;
  6519. uint32_t right_nan_id = ids + 2;
  6520. uint32_t tmp_id = ids + 3;
  6521. uint32_t mixed_first_id = ids + 4;
  6522. // Inherit precision qualifiers.
  6523. ir.meta[tmp_id] = ir.meta[id];
  6524. ir.meta[mixed_first_id] = ir.meta[id];
  6525. emit_unary_func_op(btype_id, left_nan_id, op0, "isnan");
  6526. emit_unary_func_op(btype_id, right_nan_id, op1, "isnan");
  6527. emit_binary_func_op(result_type, tmp_id, op0, op1, op == GLSLstd450NMin ? "min" : "max");
  6528. emit_mix_op(result_type, mixed_first_id, tmp_id, op1, left_nan_id);
  6529. emit_mix_op(result_type, id, mixed_first_id, op0, right_nan_id);
  6530. }
  6531. void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
  6532. uint32_t)
  6533. {
  6534. require_extension_internal("GL_AMD_shader_ballot");
  6535. enum AMDShaderBallot
  6536. {
  6537. SwizzleInvocationsAMD = 1,
  6538. SwizzleInvocationsMaskedAMD = 2,
  6539. WriteInvocationAMD = 3,
  6540. MbcntAMD = 4
  6541. };
  6542. auto op = static_cast<AMDShaderBallot>(eop);
  6543. switch (op)
  6544. {
  6545. case SwizzleInvocationsAMD:
  6546. emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsAMD");
  6547. register_control_dependent_expression(id);
  6548. break;
  6549. case SwizzleInvocationsMaskedAMD:
  6550. emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsMaskedAMD");
  6551. register_control_dependent_expression(id);
  6552. break;
  6553. case WriteInvocationAMD:
  6554. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "writeInvocationAMD");
  6555. register_control_dependent_expression(id);
  6556. break;
  6557. case MbcntAMD:
  6558. emit_unary_func_op(result_type, id, args[0], "mbcntAMD");
  6559. register_control_dependent_expression(id);
  6560. break;
  6561. default:
  6562. statement("// unimplemented SPV AMD shader ballot op ", eop);
  6563. break;
  6564. }
  6565. }
  6566. void CompilerGLSL::emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type, uint32_t id, uint32_t eop,
  6567. const uint32_t *args, uint32_t)
  6568. {
  6569. require_extension_internal("GL_AMD_shader_explicit_vertex_parameter");
  6570. enum AMDShaderExplicitVertexParameter
  6571. {
  6572. InterpolateAtVertexAMD = 1
  6573. };
  6574. auto op = static_cast<AMDShaderExplicitVertexParameter>(eop);
  6575. switch (op)
  6576. {
  6577. case InterpolateAtVertexAMD:
  6578. emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtVertexAMD");
  6579. break;
  6580. default:
  6581. statement("// unimplemented SPV AMD shader explicit vertex parameter op ", eop);
  6582. break;
  6583. }
  6584. }
  6585. void CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop,
  6586. const uint32_t *args, uint32_t)
  6587. {
  6588. require_extension_internal("GL_AMD_shader_trinary_minmax");
  6589. enum AMDShaderTrinaryMinMax
  6590. {
  6591. FMin3AMD = 1,
  6592. UMin3AMD = 2,
  6593. SMin3AMD = 3,
  6594. FMax3AMD = 4,
  6595. UMax3AMD = 5,
  6596. SMax3AMD = 6,
  6597. FMid3AMD = 7,
  6598. UMid3AMD = 8,
  6599. SMid3AMD = 9
  6600. };
  6601. auto op = static_cast<AMDShaderTrinaryMinMax>(eop);
  6602. switch (op)
  6603. {
  6604. case FMin3AMD:
  6605. case UMin3AMD:
  6606. case SMin3AMD:
  6607. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "min3");
  6608. break;
  6609. case FMax3AMD:
  6610. case UMax3AMD:
  6611. case SMax3AMD:
  6612. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "max3");
  6613. break;
  6614. case FMid3AMD:
  6615. case UMid3AMD:
  6616. case SMid3AMD:
  6617. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "mid3");
  6618. break;
  6619. default:
  6620. statement("// unimplemented SPV AMD shader trinary minmax op ", eop);
  6621. break;
  6622. }
  6623. }
  6624. void CompilerGLSL::emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
  6625. uint32_t)
  6626. {
  6627. require_extension_internal("GL_AMD_gcn_shader");
  6628. enum AMDGCNShader
  6629. {
  6630. CubeFaceIndexAMD = 1,
  6631. CubeFaceCoordAMD = 2,
  6632. TimeAMD = 3
  6633. };
  6634. auto op = static_cast<AMDGCNShader>(eop);
  6635. switch (op)
  6636. {
  6637. case CubeFaceIndexAMD:
  6638. emit_unary_func_op(result_type, id, args[0], "cubeFaceIndexAMD");
  6639. break;
  6640. case CubeFaceCoordAMD:
  6641. emit_unary_func_op(result_type, id, args[0], "cubeFaceCoordAMD");
  6642. break;
  6643. case TimeAMD:
  6644. {
  6645. string expr = "timeAMD()";
  6646. emit_op(result_type, id, expr, true);
  6647. register_control_dependent_expression(id);
  6648. break;
  6649. }
  6650. default:
  6651. statement("// unimplemented SPV AMD gcn shader op ", eop);
  6652. break;
  6653. }
  6654. }
  6655. void CompilerGLSL::emit_subgroup_op(const Instruction &i)
  6656. {
  6657. const uint32_t *ops = stream(i);
  6658. auto op = static_cast<Op>(i.op);
  6659. if (!options.vulkan_semantics && !is_supported_subgroup_op_in_opengl(op))
  6660. SPIRV_CROSS_THROW("This subgroup operation is only supported in Vulkan semantics.");
  6661. // If we need to do implicit bitcasts, make sure we do it with the correct type.
  6662. uint32_t integer_width = get_integer_width_for_instruction(i);
  6663. auto int_type = to_signed_basetype(integer_width);
  6664. auto uint_type = to_unsigned_basetype(integer_width);
  6665. switch (op)
  6666. {
  6667. case OpGroupNonUniformElect:
  6668. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupElect);
  6669. break;
  6670. case OpGroupNonUniformBallotBitCount:
  6671. {
  6672. const GroupOperation operation = static_cast<GroupOperation>(ops[3]);
  6673. if (operation == GroupOperationReduce)
  6674. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotBitCount);
  6675. else if (operation == GroupOperationInclusiveScan || operation == GroupOperationExclusiveScan)
  6676. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout);
  6677. }
  6678. break;
  6679. case OpGroupNonUniformBallotBitExtract:
  6680. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotBitExtract);
  6681. break;
  6682. case OpGroupNonUniformInverseBallot:
  6683. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout);
  6684. break;
  6685. case OpGroupNonUniformBallot:
  6686. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallot);
  6687. break;
  6688. case OpGroupNonUniformBallotFindLSB:
  6689. case OpGroupNonUniformBallotFindMSB:
  6690. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotFindLSB_MSB);
  6691. break;
  6692. case OpGroupNonUniformBroadcast:
  6693. case OpGroupNonUniformBroadcastFirst:
  6694. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBroadcast_First);
  6695. break;
  6696. case OpGroupNonUniformShuffle:
  6697. case OpGroupNonUniformShuffleXor:
  6698. require_extension_internal("GL_KHR_shader_subgroup_shuffle");
  6699. break;
  6700. case OpGroupNonUniformShuffleUp:
  6701. case OpGroupNonUniformShuffleDown:
  6702. require_extension_internal("GL_KHR_shader_subgroup_shuffle_relative");
  6703. break;
  6704. case OpGroupNonUniformAll:
  6705. case OpGroupNonUniformAny:
  6706. case OpGroupNonUniformAllEqual:
  6707. {
  6708. const SPIRType &type = expression_type(ops[3]);
  6709. if (type.basetype == SPIRType::BaseType::Boolean && type.vecsize == 1u)
  6710. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupAll_Any_AllEqualBool);
  6711. else
  6712. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupAllEqualT);
  6713. }
  6714. break;
  6715. case OpGroupNonUniformFAdd:
  6716. case OpGroupNonUniformFMul:
  6717. case OpGroupNonUniformFMin:
  6718. case OpGroupNonUniformFMax:
  6719. case OpGroupNonUniformIAdd:
  6720. case OpGroupNonUniformIMul:
  6721. case OpGroupNonUniformSMin:
  6722. case OpGroupNonUniformSMax:
  6723. case OpGroupNonUniformUMin:
  6724. case OpGroupNonUniformUMax:
  6725. case OpGroupNonUniformBitwiseAnd:
  6726. case OpGroupNonUniformBitwiseOr:
  6727. case OpGroupNonUniformBitwiseXor:
  6728. {
  6729. auto operation = static_cast<GroupOperation>(ops[3]);
  6730. if (operation == GroupOperationClusteredReduce)
  6731. {
  6732. require_extension_internal("GL_KHR_shader_subgroup_clustered");
  6733. }
  6734. else if (operation == GroupOperationExclusiveScan || operation == GroupOperationInclusiveScan ||
  6735. operation == GroupOperationReduce)
  6736. {
  6737. require_extension_internal("GL_KHR_shader_subgroup_arithmetic");
  6738. }
  6739. else
  6740. SPIRV_CROSS_THROW("Invalid group operation.");
  6741. break;
  6742. }
  6743. case OpGroupNonUniformQuadSwap:
  6744. case OpGroupNonUniformQuadBroadcast:
  6745. require_extension_internal("GL_KHR_shader_subgroup_quad");
  6746. break;
  6747. default:
  6748. SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
  6749. }
  6750. uint32_t result_type = ops[0];
  6751. uint32_t id = ops[1];
  6752. auto scope = static_cast<Scope>(evaluate_constant_u32(ops[2]));
  6753. if (scope != ScopeSubgroup)
  6754. SPIRV_CROSS_THROW("Only subgroup scope is supported.");
  6755. switch (op)
  6756. {
  6757. case OpGroupNonUniformElect:
  6758. emit_op(result_type, id, "subgroupElect()", true);
  6759. break;
  6760. case OpGroupNonUniformBroadcast:
  6761. emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBroadcast");
  6762. break;
  6763. case OpGroupNonUniformBroadcastFirst:
  6764. emit_unary_func_op(result_type, id, ops[3], "subgroupBroadcastFirst");
  6765. break;
  6766. case OpGroupNonUniformBallot:
  6767. emit_unary_func_op(result_type, id, ops[3], "subgroupBallot");
  6768. break;
  6769. case OpGroupNonUniformInverseBallot:
  6770. emit_unary_func_op(result_type, id, ops[3], "subgroupInverseBallot");
  6771. break;
  6772. case OpGroupNonUniformBallotBitExtract:
  6773. emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBallotBitExtract");
  6774. break;
  6775. case OpGroupNonUniformBallotFindLSB:
  6776. emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindLSB");
  6777. break;
  6778. case OpGroupNonUniformBallotFindMSB:
  6779. emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindMSB");
  6780. break;
  6781. case OpGroupNonUniformBallotBitCount:
  6782. {
  6783. auto operation = static_cast<GroupOperation>(ops[3]);
  6784. if (operation == GroupOperationReduce)
  6785. emit_unary_func_op(result_type, id, ops[4], "subgroupBallotBitCount");
  6786. else if (operation == GroupOperationInclusiveScan)
  6787. emit_unary_func_op(result_type, id, ops[4], "subgroupBallotInclusiveBitCount");
  6788. else if (operation == GroupOperationExclusiveScan)
  6789. emit_unary_func_op(result_type, id, ops[4], "subgroupBallotExclusiveBitCount");
  6790. else
  6791. SPIRV_CROSS_THROW("Invalid BitCount operation.");
  6792. break;
  6793. }
  6794. case OpGroupNonUniformShuffle:
  6795. emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffle");
  6796. break;
  6797. case OpGroupNonUniformShuffleXor:
  6798. emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleXor");
  6799. break;
  6800. case OpGroupNonUniformShuffleUp:
  6801. emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleUp");
  6802. break;
  6803. case OpGroupNonUniformShuffleDown:
  6804. emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleDown");
  6805. break;
  6806. case OpGroupNonUniformAll:
  6807. emit_unary_func_op(result_type, id, ops[3], "subgroupAll");
  6808. break;
  6809. case OpGroupNonUniformAny:
  6810. emit_unary_func_op(result_type, id, ops[3], "subgroupAny");
  6811. break;
  6812. case OpGroupNonUniformAllEqual:
  6813. emit_unary_func_op(result_type, id, ops[3], "subgroupAllEqual");
  6814. break;
  6815. // clang-format off
  6816. #define GLSL_GROUP_OP(op, glsl_op) \
  6817. case OpGroupNonUniform##op: \
  6818. { \
  6819. auto operation = static_cast<GroupOperation>(ops[3]); \
  6820. if (operation == GroupOperationReduce) \
  6821. emit_unary_func_op(result_type, id, ops[4], "subgroup" #glsl_op); \
  6822. else if (operation == GroupOperationInclusiveScan) \
  6823. emit_unary_func_op(result_type, id, ops[4], "subgroupInclusive" #glsl_op); \
  6824. else if (operation == GroupOperationExclusiveScan) \
  6825. emit_unary_func_op(result_type, id, ops[4], "subgroupExclusive" #glsl_op); \
  6826. else if (operation == GroupOperationClusteredReduce) \
  6827. emit_binary_func_op(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op); \
  6828. else \
  6829. SPIRV_CROSS_THROW("Invalid group operation."); \
  6830. break; \
  6831. }
  6832. #define GLSL_GROUP_OP_CAST(op, glsl_op, type) \
  6833. case OpGroupNonUniform##op: \
  6834. { \
  6835. auto operation = static_cast<GroupOperation>(ops[3]); \
  6836. if (operation == GroupOperationReduce) \
  6837. emit_unary_func_op_cast(result_type, id, ops[4], "subgroup" #glsl_op, type, type); \
  6838. else if (operation == GroupOperationInclusiveScan) \
  6839. emit_unary_func_op_cast(result_type, id, ops[4], "subgroupInclusive" #glsl_op, type, type); \
  6840. else if (operation == GroupOperationExclusiveScan) \
  6841. emit_unary_func_op_cast(result_type, id, ops[4], "subgroupExclusive" #glsl_op, type, type); \
  6842. else if (operation == GroupOperationClusteredReduce) \
  6843. emit_binary_func_op_cast_clustered(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op, type); \
  6844. else \
  6845. SPIRV_CROSS_THROW("Invalid group operation."); \
  6846. break; \
  6847. }
  6848. GLSL_GROUP_OP(FAdd, Add)
  6849. GLSL_GROUP_OP(FMul, Mul)
  6850. GLSL_GROUP_OP(FMin, Min)
  6851. GLSL_GROUP_OP(FMax, Max)
  6852. GLSL_GROUP_OP(IAdd, Add)
  6853. GLSL_GROUP_OP(IMul, Mul)
  6854. GLSL_GROUP_OP_CAST(SMin, Min, int_type)
  6855. GLSL_GROUP_OP_CAST(SMax, Max, int_type)
  6856. GLSL_GROUP_OP_CAST(UMin, Min, uint_type)
  6857. GLSL_GROUP_OP_CAST(UMax, Max, uint_type)
  6858. GLSL_GROUP_OP(BitwiseAnd, And)
  6859. GLSL_GROUP_OP(BitwiseOr, Or)
  6860. GLSL_GROUP_OP(BitwiseXor, Xor)
  6861. #undef GLSL_GROUP_OP
  6862. #undef GLSL_GROUP_OP_CAST
  6863. // clang-format on
  6864. case OpGroupNonUniformQuadSwap:
  6865. {
  6866. uint32_t direction = evaluate_constant_u32(ops[4]);
  6867. if (direction == 0)
  6868. emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapHorizontal");
  6869. else if (direction == 1)
  6870. emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapVertical");
  6871. else if (direction == 2)
  6872. emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapDiagonal");
  6873. else
  6874. SPIRV_CROSS_THROW("Invalid quad swap direction.");
  6875. break;
  6876. }
  6877. case OpGroupNonUniformQuadBroadcast:
  6878. {
  6879. emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupQuadBroadcast");
  6880. break;
  6881. }
  6882. default:
  6883. SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
  6884. }
  6885. register_control_dependent_expression(id);
  6886. }
  6887. string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
  6888. {
  6889. // OpBitcast can deal with pointers.
  6890. if (out_type.pointer || in_type.pointer)
  6891. return type_to_glsl(out_type);
  6892. if (out_type.basetype == in_type.basetype)
  6893. return "";
  6894. assert(out_type.basetype != SPIRType::Boolean);
  6895. assert(in_type.basetype != SPIRType::Boolean);
  6896. bool integral_cast = type_is_integral(out_type) && type_is_integral(in_type);
  6897. bool same_size_cast = out_type.width == in_type.width;
  6898. // Trivial bitcast case, casts between integers.
  6899. if (integral_cast && same_size_cast)
  6900. return type_to_glsl(out_type);
  6901. // Catch-all 8-bit arithmetic casts (GL_EXT_shader_explicit_arithmetic_types).
  6902. if (out_type.width == 8 && in_type.width >= 16 && integral_cast && in_type.vecsize == 1)
  6903. return "unpack8";
  6904. else if (in_type.width == 8 && out_type.width == 16 && integral_cast && out_type.vecsize == 1)
  6905. return "pack16";
  6906. else if (in_type.width == 8 && out_type.width == 32 && integral_cast && out_type.vecsize == 1)
  6907. return "pack32";
  6908. // Floating <-> Integer special casts. Just have to enumerate all cases. :(
  6909. // 16-bit, 32-bit and 64-bit floats.
  6910. if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float)
  6911. {
  6912. if (is_legacy_es())
  6913. SPIRV_CROSS_THROW("Float -> Uint bitcast not supported on legacy ESSL.");
  6914. else if (!options.es && options.version < 330)
  6915. require_extension_internal("GL_ARB_shader_bit_encoding");
  6916. return "floatBitsToUint";
  6917. }
  6918. else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float)
  6919. {
  6920. if (is_legacy_es())
  6921. SPIRV_CROSS_THROW("Float -> Int bitcast not supported on legacy ESSL.");
  6922. else if (!options.es && options.version < 330)
  6923. require_extension_internal("GL_ARB_shader_bit_encoding");
  6924. return "floatBitsToInt";
  6925. }
  6926. else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt)
  6927. {
  6928. if (is_legacy_es())
  6929. SPIRV_CROSS_THROW("Uint -> Float bitcast not supported on legacy ESSL.");
  6930. else if (!options.es && options.version < 330)
  6931. require_extension_internal("GL_ARB_shader_bit_encoding");
  6932. return "uintBitsToFloat";
  6933. }
  6934. else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int)
  6935. {
  6936. if (is_legacy_es())
  6937. SPIRV_CROSS_THROW("Int -> Float bitcast not supported on legacy ESSL.");
  6938. else if (!options.es && options.version < 330)
  6939. require_extension_internal("GL_ARB_shader_bit_encoding");
  6940. return "intBitsToFloat";
  6941. }
  6942. else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double)
  6943. return "doubleBitsToInt64";
  6944. else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double)
  6945. return "doubleBitsToUint64";
  6946. else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64)
  6947. return "int64BitsToDouble";
  6948. else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64)
  6949. return "uint64BitsToDouble";
  6950. else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Half)
  6951. return "float16BitsToInt16";
  6952. else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::Half)
  6953. return "float16BitsToUint16";
  6954. else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::Short)
  6955. return "int16BitsToFloat16";
  6956. else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UShort)
  6957. return "uint16BitsToFloat16";
  6958. // And finally, some even more special purpose casts.
  6959. if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UInt && in_type.vecsize == 2)
  6960. return "packUint2x32";
  6961. else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UInt64 && out_type.vecsize == 2)
  6962. return "unpackUint2x32";
  6963. else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
  6964. return "unpackFloat2x16";
  6965. else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2)
  6966. return "packFloat2x16";
  6967. else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Short && in_type.vecsize == 2)
  6968. return "packInt2x16";
  6969. else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int && in_type.vecsize == 1)
  6970. return "unpackInt2x16";
  6971. else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UShort && in_type.vecsize == 2)
  6972. return "packUint2x16";
  6973. else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
  6974. return "unpackUint2x16";
  6975. else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Short && in_type.vecsize == 4)
  6976. return "packInt4x16";
  6977. else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int64 && in_type.vecsize == 1)
  6978. return "unpackInt4x16";
  6979. else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UShort && in_type.vecsize == 4)
  6980. return "packUint4x16";
  6981. else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt64 && in_type.vecsize == 1)
  6982. return "unpackUint4x16";
  6983. return "";
  6984. }
  6985. string CompilerGLSL::bitcast_glsl(const SPIRType &result_type, uint32_t argument)
  6986. {
  6987. auto op = bitcast_glsl_op(result_type, expression_type(argument));
  6988. if (op.empty())
  6989. return to_enclosed_unpacked_expression(argument);
  6990. else
  6991. return join(op, "(", to_unpacked_expression(argument), ")");
  6992. }
  6993. std::string CompilerGLSL::bitcast_expression(SPIRType::BaseType target_type, uint32_t arg)
  6994. {
  6995. auto expr = to_expression(arg);
  6996. auto &src_type = expression_type(arg);
  6997. if (src_type.basetype != target_type)
  6998. {
  6999. auto target = src_type;
  7000. target.basetype = target_type;
  7001. expr = join(bitcast_glsl_op(target, src_type), "(", expr, ")");
  7002. }
  7003. return expr;
  7004. }
  7005. std::string CompilerGLSL::bitcast_expression(const SPIRType &target_type, SPIRType::BaseType expr_type,
  7006. const std::string &expr)
  7007. {
  7008. if (target_type.basetype == expr_type)
  7009. return expr;
  7010. auto src_type = target_type;
  7011. src_type.basetype = expr_type;
  7012. return join(bitcast_glsl_op(target_type, src_type), "(", expr, ")");
  7013. }
  7014. string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
  7015. {
  7016. switch (builtin)
  7017. {
  7018. case BuiltInPosition:
  7019. return "gl_Position";
  7020. case BuiltInPointSize:
  7021. return "gl_PointSize";
  7022. case BuiltInClipDistance:
  7023. return "gl_ClipDistance";
  7024. case BuiltInCullDistance:
  7025. return "gl_CullDistance";
  7026. case BuiltInVertexId:
  7027. if (options.vulkan_semantics)
  7028. SPIRV_CROSS_THROW("Cannot implement gl_VertexID in Vulkan GLSL. This shader was created "
  7029. "with GL semantics.");
  7030. return "gl_VertexID";
  7031. case BuiltInInstanceId:
  7032. if (options.vulkan_semantics)
  7033. {
  7034. auto model = get_entry_point().model;
  7035. switch (model)
  7036. {
  7037. case spv::ExecutionModelIntersectionKHR:
  7038. case spv::ExecutionModelAnyHitKHR:
  7039. case spv::ExecutionModelClosestHitKHR:
  7040. // gl_InstanceID is allowed in these shaders.
  7041. break;
  7042. default:
  7043. SPIRV_CROSS_THROW("Cannot implement gl_InstanceID in Vulkan GLSL. This shader was "
  7044. "created with GL semantics.");
  7045. }
  7046. }
  7047. if (!options.es && options.version < 140)
  7048. {
  7049. require_extension_internal("GL_ARB_draw_instanced");
  7050. }
  7051. return "gl_InstanceID";
  7052. case BuiltInVertexIndex:
  7053. if (options.vulkan_semantics)
  7054. return "gl_VertexIndex";
  7055. else
  7056. return "gl_VertexID"; // gl_VertexID already has the base offset applied.
  7057. case BuiltInInstanceIndex:
  7058. if (options.vulkan_semantics)
  7059. return "gl_InstanceIndex";
  7060. if (!options.es && options.version < 140)
  7061. {
  7062. require_extension_internal("GL_ARB_draw_instanced");
  7063. }
  7064. if (options.vertex.support_nonzero_base_instance)
  7065. {
  7066. if (!options.vulkan_semantics)
  7067. {
  7068. // This is a soft-enable. We will opt-in to using gl_BaseInstanceARB if supported.
  7069. require_extension_internal("GL_ARB_shader_draw_parameters");
  7070. }
  7071. return "(gl_InstanceID + SPIRV_Cross_BaseInstance)"; // ... but not gl_InstanceID.
  7072. }
  7073. else
  7074. return "gl_InstanceID";
  7075. case BuiltInPrimitiveId:
  7076. if (storage == StorageClassInput && get_entry_point().model == ExecutionModelGeometry)
  7077. return "gl_PrimitiveIDIn";
  7078. else
  7079. return "gl_PrimitiveID";
  7080. case BuiltInInvocationId:
  7081. return "gl_InvocationID";
  7082. case BuiltInLayer:
  7083. return "gl_Layer";
  7084. case BuiltInViewportIndex:
  7085. return "gl_ViewportIndex";
  7086. case BuiltInTessLevelOuter:
  7087. return "gl_TessLevelOuter";
  7088. case BuiltInTessLevelInner:
  7089. return "gl_TessLevelInner";
  7090. case BuiltInTessCoord:
  7091. return "gl_TessCoord";
  7092. case BuiltInFragCoord:
  7093. return "gl_FragCoord";
  7094. case BuiltInPointCoord:
  7095. return "gl_PointCoord";
  7096. case BuiltInFrontFacing:
  7097. return "gl_FrontFacing";
  7098. case BuiltInFragDepth:
  7099. return "gl_FragDepth";
  7100. case BuiltInNumWorkgroups:
  7101. return "gl_NumWorkGroups";
  7102. case BuiltInWorkgroupSize:
  7103. return "gl_WorkGroupSize";
  7104. case BuiltInWorkgroupId:
  7105. return "gl_WorkGroupID";
  7106. case BuiltInLocalInvocationId:
  7107. return "gl_LocalInvocationID";
  7108. case BuiltInGlobalInvocationId:
  7109. return "gl_GlobalInvocationID";
  7110. case BuiltInLocalInvocationIndex:
  7111. return "gl_LocalInvocationIndex";
  7112. case BuiltInHelperInvocation:
  7113. return "gl_HelperInvocation";
  7114. case BuiltInBaseVertex:
  7115. if (options.es)
  7116. SPIRV_CROSS_THROW("BaseVertex not supported in ES profile.");
  7117. if (options.vulkan_semantics)
  7118. {
  7119. if (options.version < 460)
  7120. {
  7121. require_extension_internal("GL_ARB_shader_draw_parameters");
  7122. return "gl_BaseVertexARB";
  7123. }
  7124. return "gl_BaseVertex";
  7125. }
  7126. else
  7127. {
  7128. // On regular GL, this is soft-enabled and we emit ifdefs in code.
  7129. require_extension_internal("GL_ARB_shader_draw_parameters");
  7130. return "SPIRV_Cross_BaseVertex";
  7131. }
  7132. break;
  7133. case BuiltInBaseInstance:
  7134. if (options.es)
  7135. SPIRV_CROSS_THROW("BaseInstance not supported in ES profile.");
  7136. if (options.vulkan_semantics)
  7137. {
  7138. if (options.version < 460)
  7139. {
  7140. require_extension_internal("GL_ARB_shader_draw_parameters");
  7141. return "gl_BaseInstanceARB";
  7142. }
  7143. return "gl_BaseInstance";
  7144. }
  7145. else
  7146. {
  7147. // On regular GL, this is soft-enabled and we emit ifdefs in code.
  7148. require_extension_internal("GL_ARB_shader_draw_parameters");
  7149. return "SPIRV_Cross_BaseInstance";
  7150. }
  7151. break;
  7152. case BuiltInDrawIndex:
  7153. if (options.es)
  7154. SPIRV_CROSS_THROW("DrawIndex not supported in ES profile.");
  7155. if (options.vulkan_semantics)
  7156. {
  7157. if (options.version < 460)
  7158. {
  7159. require_extension_internal("GL_ARB_shader_draw_parameters");
  7160. return "gl_DrawIDARB";
  7161. }
  7162. return "gl_DrawID";
  7163. }
  7164. else
  7165. {
  7166. // On regular GL, this is soft-enabled and we emit ifdefs in code.
  7167. require_extension_internal("GL_ARB_shader_draw_parameters");
  7168. return "gl_DrawIDARB";
  7169. }
  7170. break;
  7171. case BuiltInSampleId:
  7172. if (options.es && options.version < 320)
  7173. require_extension_internal("GL_OES_sample_variables");
  7174. if (!options.es && options.version < 400)
  7175. SPIRV_CROSS_THROW("gl_SampleID not supported before GLSL 400.");
  7176. return "gl_SampleID";
  7177. case BuiltInSampleMask:
  7178. if (options.es && options.version < 320)
  7179. require_extension_internal("GL_OES_sample_variables");
  7180. if (!options.es && options.version < 400)
  7181. SPIRV_CROSS_THROW("gl_SampleMask/gl_SampleMaskIn not supported before GLSL 400.");
  7182. if (storage == StorageClassInput)
  7183. return "gl_SampleMaskIn";
  7184. else
  7185. return "gl_SampleMask";
  7186. case BuiltInSamplePosition:
  7187. if (options.es && options.version < 320)
  7188. require_extension_internal("GL_OES_sample_variables");
  7189. if (!options.es && options.version < 400)
  7190. SPIRV_CROSS_THROW("gl_SamplePosition not supported before GLSL 400.");
  7191. return "gl_SamplePosition";
  7192. case BuiltInViewIndex:
  7193. if (options.vulkan_semantics)
  7194. {
  7195. require_extension_internal("GL_EXT_multiview");
  7196. return "gl_ViewIndex";
  7197. }
  7198. else
  7199. {
  7200. require_extension_internal("GL_OVR_multiview2");
  7201. return "gl_ViewID_OVR";
  7202. }
  7203. case BuiltInNumSubgroups:
  7204. request_subgroup_feature(ShaderSubgroupSupportHelper::NumSubgroups);
  7205. return "gl_NumSubgroups";
  7206. case BuiltInSubgroupId:
  7207. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupID);
  7208. return "gl_SubgroupID";
  7209. case BuiltInSubgroupSize:
  7210. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupSize);
  7211. return "gl_SubgroupSize";
  7212. case BuiltInSubgroupLocalInvocationId:
  7213. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInvocationID);
  7214. return "gl_SubgroupInvocationID";
  7215. case BuiltInSubgroupEqMask:
  7216. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
  7217. return "gl_SubgroupEqMask";
  7218. case BuiltInSubgroupGeMask:
  7219. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
  7220. return "gl_SubgroupGeMask";
  7221. case BuiltInSubgroupGtMask:
  7222. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
  7223. return "gl_SubgroupGtMask";
  7224. case BuiltInSubgroupLeMask:
  7225. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
  7226. return "gl_SubgroupLeMask";
  7227. case BuiltInSubgroupLtMask:
  7228. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
  7229. return "gl_SubgroupLtMask";
  7230. case BuiltInLaunchIdKHR:
  7231. return ray_tracing_is_khr ? "gl_LaunchIDEXT" : "gl_LaunchIDNV";
  7232. case BuiltInLaunchSizeKHR:
  7233. return ray_tracing_is_khr ? "gl_LaunchSizeEXT" : "gl_LaunchSizeNV";
  7234. case BuiltInWorldRayOriginKHR:
  7235. return ray_tracing_is_khr ? "gl_WorldRayOriginEXT" : "gl_WorldRayOriginNV";
  7236. case BuiltInWorldRayDirectionKHR:
  7237. return ray_tracing_is_khr ? "gl_WorldRayDirectionEXT" : "gl_WorldRayDirectionNV";
  7238. case BuiltInObjectRayOriginKHR:
  7239. return ray_tracing_is_khr ? "gl_ObjectRayOriginEXT" : "gl_ObjectRayOriginNV";
  7240. case BuiltInObjectRayDirectionKHR:
  7241. return ray_tracing_is_khr ? "gl_ObjectRayDirectionEXT" : "gl_ObjectRayDirectionNV";
  7242. case BuiltInRayTminKHR:
  7243. return ray_tracing_is_khr ? "gl_RayTminEXT" : "gl_RayTminNV";
  7244. case BuiltInRayTmaxKHR:
  7245. return ray_tracing_is_khr ? "gl_RayTmaxEXT" : "gl_RayTmaxNV";
  7246. case BuiltInInstanceCustomIndexKHR:
  7247. return ray_tracing_is_khr ? "gl_InstanceCustomIndexEXT" : "gl_InstanceCustomIndexNV";
  7248. case BuiltInObjectToWorldKHR:
  7249. return ray_tracing_is_khr ? "gl_ObjectToWorldEXT" : "gl_ObjectToWorldNV";
  7250. case BuiltInWorldToObjectKHR:
  7251. return ray_tracing_is_khr ? "gl_WorldToObjectEXT" : "gl_WorldToObjectNV";
  7252. case BuiltInHitTNV:
  7253. // gl_HitTEXT is an alias of RayTMax in KHR.
  7254. return "gl_HitTNV";
  7255. case BuiltInHitKindKHR:
  7256. return ray_tracing_is_khr ? "gl_HitKindEXT" : "gl_HitKindNV";
  7257. case BuiltInIncomingRayFlagsKHR:
  7258. return ray_tracing_is_khr ? "gl_IncomingRayFlagsEXT" : "gl_IncomingRayFlagsNV";
  7259. case BuiltInBaryCoordNV:
  7260. {
  7261. if (options.es && options.version < 320)
  7262. SPIRV_CROSS_THROW("gl_BaryCoordNV requires ESSL 320.");
  7263. else if (!options.es && options.version < 450)
  7264. SPIRV_CROSS_THROW("gl_BaryCoordNV requires GLSL 450.");
  7265. require_extension_internal("GL_NV_fragment_shader_barycentric");
  7266. return "gl_BaryCoordNV";
  7267. }
  7268. case BuiltInBaryCoordNoPerspNV:
  7269. {
  7270. if (options.es && options.version < 320)
  7271. SPIRV_CROSS_THROW("gl_BaryCoordNoPerspNV requires ESSL 320.");
  7272. else if (!options.es && options.version < 450)
  7273. SPIRV_CROSS_THROW("gl_BaryCoordNoPerspNV requires GLSL 450.");
  7274. require_extension_internal("GL_NV_fragment_shader_barycentric");
  7275. return "gl_BaryCoordNoPerspNV";
  7276. }
  7277. case BuiltInFragStencilRefEXT:
  7278. {
  7279. if (!options.es)
  7280. {
  7281. require_extension_internal("GL_ARB_shader_stencil_export");
  7282. return "gl_FragStencilRefARB";
  7283. }
  7284. else
  7285. SPIRV_CROSS_THROW("Stencil export not supported in GLES.");
  7286. }
  7287. case BuiltInDeviceIndex:
  7288. if (!options.vulkan_semantics)
  7289. SPIRV_CROSS_THROW("Need Vulkan semantics for device group support.");
  7290. require_extension_internal("GL_EXT_device_group");
  7291. return "gl_DeviceIndex";
  7292. default:
  7293. return join("gl_BuiltIn_", convert_to_string(builtin));
  7294. }
  7295. }
  7296. const char *CompilerGLSL::index_to_swizzle(uint32_t index)
  7297. {
  7298. switch (index)
  7299. {
  7300. case 0:
  7301. return "x";
  7302. case 1:
  7303. return "y";
  7304. case 2:
  7305. return "z";
  7306. case 3:
  7307. return "w";
  7308. default:
  7309. SPIRV_CROSS_THROW("Swizzle index out of range");
  7310. }
  7311. }
  7312. void CompilerGLSL::access_chain_internal_append_index(std::string &expr, uint32_t /*base*/, const SPIRType *type,
  7313. AccessChainFlags flags, bool & /*access_chain_is_arrayed*/,
  7314. uint32_t index)
  7315. {
  7316. bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0;
  7317. bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
  7318. expr += "[";
  7319. // If we are indexing into an array of SSBOs or UBOs, we need to index it with a non-uniform qualifier.
  7320. bool nonuniform_index =
  7321. has_decoration(index, DecorationNonUniformEXT) &&
  7322. (has_decoration(type->self, DecorationBlock) || has_decoration(type->self, DecorationBufferBlock));
  7323. if (nonuniform_index)
  7324. {
  7325. expr += backend.nonuniform_qualifier;
  7326. expr += "(";
  7327. }
  7328. if (index_is_literal)
  7329. expr += convert_to_string(index);
  7330. else
  7331. expr += to_expression(index, register_expression_read);
  7332. if (nonuniform_index)
  7333. expr += ")";
  7334. expr += "]";
  7335. }
  7336. string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count,
  7337. AccessChainFlags flags, AccessChainMeta *meta)
  7338. {
  7339. string expr;
  7340. bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0;
  7341. bool msb_is_id = (flags & ACCESS_CHAIN_LITERAL_MSB_FORCE_ID) != 0;
  7342. bool chain_only = (flags & ACCESS_CHAIN_CHAIN_ONLY_BIT) != 0;
  7343. bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0;
  7344. bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
  7345. bool flatten_member_reference = (flags & ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT) != 0;
  7346. if (!chain_only)
  7347. {
  7348. // We handle transpose explicitly, so don't resolve that here.
  7349. auto *e = maybe_get<SPIRExpression>(base);
  7350. bool old_transpose = e && e->need_transpose;
  7351. if (e)
  7352. e->need_transpose = false;
  7353. expr = to_enclosed_expression(base, register_expression_read);
  7354. if (e)
  7355. e->need_transpose = old_transpose;
  7356. }
  7357. // Start traversing type hierarchy at the proper non-pointer types,
  7358. // but keep type_id referencing the original pointer for use below.
  7359. uint32_t type_id = expression_type_id(base);
  7360. if (!backend.native_pointers)
  7361. {
  7362. if (ptr_chain)
  7363. SPIRV_CROSS_THROW("Backend does not support native pointers and does not support OpPtrAccessChain.");
  7364. // Wrapped buffer reference pointer types will need to poke into the internal "value" member before
  7365. // continuing the access chain.
  7366. if (should_dereference(base))
  7367. {
  7368. auto &type = get<SPIRType>(type_id);
  7369. expr = dereference_expression(type, expr);
  7370. }
  7371. }
  7372. const auto *type = &get_pointee_type(type_id);
  7373. bool access_chain_is_arrayed = expr.find_first_of('[') != string::npos;
  7374. bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base);
  7375. bool is_packed = has_extended_decoration(base, SPIRVCrossDecorationPhysicalTypePacked);
  7376. uint32_t physical_type = get_extended_decoration(base, SPIRVCrossDecorationPhysicalTypeID);
  7377. bool is_invariant = has_decoration(base, DecorationInvariant);
  7378. bool pending_array_enclose = false;
  7379. bool dimension_flatten = false;
  7380. const auto append_index = [&](uint32_t index, bool is_literal) {
  7381. AccessChainFlags mod_flags = flags;
  7382. if (!is_literal)
  7383. mod_flags &= ~ACCESS_CHAIN_INDEX_IS_LITERAL_BIT;
  7384. access_chain_internal_append_index(expr, base, type, mod_flags, access_chain_is_arrayed, index);
  7385. };
  7386. for (uint32_t i = 0; i < count; i++)
  7387. {
  7388. uint32_t index = indices[i];
  7389. bool is_literal = index_is_literal;
  7390. if (is_literal && msb_is_id && (index >> 31u) != 0u)
  7391. {
  7392. is_literal = false;
  7393. index &= 0x7fffffffu;
  7394. }
  7395. // Pointer chains
  7396. if (ptr_chain && i == 0)
  7397. {
  7398. // If we are flattening multidimensional arrays, only create opening bracket on first
  7399. // array index.
  7400. if (options.flatten_multidimensional_arrays)
  7401. {
  7402. dimension_flatten = type->array.size() >= 1;
  7403. pending_array_enclose = dimension_flatten;
  7404. if (pending_array_enclose)
  7405. expr += "[";
  7406. }
  7407. if (options.flatten_multidimensional_arrays && dimension_flatten)
  7408. {
  7409. // If we are flattening multidimensional arrays, do manual stride computation.
  7410. if (is_literal)
  7411. expr += convert_to_string(index);
  7412. else
  7413. expr += to_enclosed_expression(index, register_expression_read);
  7414. for (auto j = uint32_t(type->array.size()); j; j--)
  7415. {
  7416. expr += " * ";
  7417. expr += enclose_expression(to_array_size(*type, j - 1));
  7418. }
  7419. if (type->array.empty())
  7420. pending_array_enclose = false;
  7421. else
  7422. expr += " + ";
  7423. if (!pending_array_enclose)
  7424. expr += "]";
  7425. }
  7426. else
  7427. {
  7428. append_index(index, is_literal);
  7429. }
  7430. if (type->basetype == SPIRType::ControlPointArray)
  7431. {
  7432. type_id = type->parent_type;
  7433. type = &get<SPIRType>(type_id);
  7434. }
  7435. access_chain_is_arrayed = true;
  7436. }
  7437. // Arrays
  7438. else if (!type->array.empty())
  7439. {
  7440. // If we are flattening multidimensional arrays, only create opening bracket on first
  7441. // array index.
  7442. if (options.flatten_multidimensional_arrays && !pending_array_enclose)
  7443. {
  7444. dimension_flatten = type->array.size() > 1;
  7445. pending_array_enclose = dimension_flatten;
  7446. if (pending_array_enclose)
  7447. expr += "[";
  7448. }
  7449. assert(type->parent_type);
  7450. auto *var = maybe_get<SPIRVariable>(base);
  7451. if (backend.force_gl_in_out_block && i == 0 && var && is_builtin_variable(*var) &&
  7452. !has_decoration(type->self, DecorationBlock))
  7453. {
  7454. // This deals with scenarios for tesc/geom where arrays of gl_Position[] are declared.
  7455. // Normally, these variables live in blocks when compiled from GLSL,
  7456. // but HLSL seems to just emit straight arrays here.
  7457. // We must pretend this access goes through gl_in/gl_out arrays
  7458. // to be able to access certain builtins as arrays.
  7459. auto builtin = ir.meta[base].decoration.builtin_type;
  7460. switch (builtin)
  7461. {
  7462. // case BuiltInCullDistance: // These are already arrays, need to figure out rules for these in tess/geom.
  7463. // case BuiltInClipDistance:
  7464. case BuiltInPosition:
  7465. case BuiltInPointSize:
  7466. if (var->storage == StorageClassInput)
  7467. expr = join("gl_in[", to_expression(index, register_expression_read), "].", expr);
  7468. else if (var->storage == StorageClassOutput)
  7469. expr = join("gl_out[", to_expression(index, register_expression_read), "].", expr);
  7470. else
  7471. append_index(index, is_literal);
  7472. break;
  7473. default:
  7474. append_index(index, is_literal);
  7475. break;
  7476. }
  7477. }
  7478. else if (options.flatten_multidimensional_arrays && dimension_flatten)
  7479. {
  7480. // If we are flattening multidimensional arrays, do manual stride computation.
  7481. auto &parent_type = get<SPIRType>(type->parent_type);
  7482. if (is_literal)
  7483. expr += convert_to_string(index);
  7484. else
  7485. expr += to_enclosed_expression(index, register_expression_read);
  7486. for (auto j = uint32_t(parent_type.array.size()); j; j--)
  7487. {
  7488. expr += " * ";
  7489. expr += enclose_expression(to_array_size(parent_type, j - 1));
  7490. }
  7491. if (parent_type.array.empty())
  7492. pending_array_enclose = false;
  7493. else
  7494. expr += " + ";
  7495. if (!pending_array_enclose)
  7496. expr += "]";
  7497. }
  7498. // Some builtins are arrays in SPIR-V but not in other languages, e.g. gl_SampleMask[] is an array in SPIR-V but not in Metal.
  7499. // By throwing away the index, we imply the index was 0, which it must be for gl_SampleMask.
  7500. else if (!builtin_translates_to_nonarray(BuiltIn(get_decoration(base, DecorationBuiltIn))))
  7501. {
  7502. append_index(index, is_literal);
  7503. }
  7504. type_id = type->parent_type;
  7505. type = &get<SPIRType>(type_id);
  7506. access_chain_is_arrayed = true;
  7507. }
  7508. // For structs, the index refers to a constant, which indexes into the members.
  7509. // We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
  7510. else if (type->basetype == SPIRType::Struct)
  7511. {
  7512. if (!is_literal)
  7513. index = evaluate_constant_u32(index);
  7514. if (index >= type->member_types.size())
  7515. SPIRV_CROSS_THROW("Member index is out of bounds!");
  7516. BuiltIn builtin;
  7517. if (is_member_builtin(*type, index, &builtin))
  7518. {
  7519. if (access_chain_is_arrayed)
  7520. {
  7521. expr += ".";
  7522. expr += builtin_to_glsl(builtin, type->storage);
  7523. }
  7524. else
  7525. expr = builtin_to_glsl(builtin, type->storage);
  7526. }
  7527. else
  7528. {
  7529. // If the member has a qualified name, use it as the entire chain
  7530. string qual_mbr_name = get_member_qualified_name(type_id, index);
  7531. if (!qual_mbr_name.empty())
  7532. expr = qual_mbr_name;
  7533. else if (flatten_member_reference)
  7534. expr += join("_", to_member_name(*type, index));
  7535. else
  7536. expr += to_member_reference(base, *type, index, ptr_chain);
  7537. }
  7538. if (has_member_decoration(type->self, index, DecorationInvariant))
  7539. is_invariant = true;
  7540. is_packed = member_is_packed_physical_type(*type, index);
  7541. if (member_is_remapped_physical_type(*type, index))
  7542. physical_type = get_extended_member_decoration(type->self, index, SPIRVCrossDecorationPhysicalTypeID);
  7543. else
  7544. physical_type = 0;
  7545. row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(*type, index);
  7546. type = &get<SPIRType>(type->member_types[index]);
  7547. }
  7548. // Matrix -> Vector
  7549. else if (type->columns > 1)
  7550. {
  7551. // If we have a row-major matrix here, we need to defer any transpose in case this access chain
  7552. // is used to store a column. We can resolve it right here and now if we access a scalar directly,
  7553. // by flipping indexing order of the matrix.
  7554. expr += "[";
  7555. if (is_literal)
  7556. expr += convert_to_string(index);
  7557. else
  7558. expr += to_expression(index, register_expression_read);
  7559. expr += "]";
  7560. type_id = type->parent_type;
  7561. type = &get<SPIRType>(type_id);
  7562. }
  7563. // Vector -> Scalar
  7564. else if (type->vecsize > 1)
  7565. {
  7566. string deferred_index;
  7567. if (row_major_matrix_needs_conversion)
  7568. {
  7569. // Flip indexing order.
  7570. auto column_index = expr.find_last_of('[');
  7571. if (column_index != string::npos)
  7572. {
  7573. deferred_index = expr.substr(column_index);
  7574. expr.resize(column_index);
  7575. }
  7576. }
  7577. // Internally, access chain implementation can also be used on composites,
  7578. // ignore scalar access workarounds in this case.
  7579. StorageClass effective_storage;
  7580. if (expression_type(base).pointer)
  7581. effective_storage = get_expression_effective_storage_class(base);
  7582. else
  7583. effective_storage = StorageClassGeneric;
  7584. if (!row_major_matrix_needs_conversion)
  7585. {
  7586. // On some backends, we might not be able to safely access individual scalars in a vector.
  7587. // To work around this, we might have to cast the access chain reference to something which can,
  7588. // like a pointer to scalar, which we can then index into.
  7589. prepare_access_chain_for_scalar_access(expr, get<SPIRType>(type->parent_type), effective_storage,
  7590. is_packed);
  7591. }
  7592. if (is_literal && !is_packed && !row_major_matrix_needs_conversion)
  7593. {
  7594. expr += ".";
  7595. expr += index_to_swizzle(index);
  7596. }
  7597. else if (ir.ids[index].get_type() == TypeConstant && !is_packed && !row_major_matrix_needs_conversion)
  7598. {
  7599. auto &c = get<SPIRConstant>(index);
  7600. if (c.specialization)
  7601. {
  7602. // If the index is a spec constant, we cannot turn extract into a swizzle.
  7603. expr += join("[", to_expression(index), "]");
  7604. }
  7605. else
  7606. {
  7607. expr += ".";
  7608. expr += index_to_swizzle(c.scalar());
  7609. }
  7610. }
  7611. else if (is_literal)
  7612. {
  7613. // For packed vectors, we can only access them as an array, not by swizzle.
  7614. expr += join("[", index, "]");
  7615. }
  7616. else
  7617. {
  7618. expr += "[";
  7619. expr += to_expression(index, register_expression_read);
  7620. expr += "]";
  7621. }
  7622. if (row_major_matrix_needs_conversion)
  7623. {
  7624. prepare_access_chain_for_scalar_access(expr, get<SPIRType>(type->parent_type), effective_storage,
  7625. is_packed);
  7626. }
  7627. expr += deferred_index;
  7628. row_major_matrix_needs_conversion = false;
  7629. is_packed = false;
  7630. physical_type = 0;
  7631. type_id = type->parent_type;
  7632. type = &get<SPIRType>(type_id);
  7633. }
  7634. else if (!backend.allow_truncated_access_chain)
  7635. SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
  7636. }
  7637. if (pending_array_enclose)
  7638. {
  7639. SPIRV_CROSS_THROW("Flattening of multidimensional arrays were enabled, "
  7640. "but the access chain was terminated in the middle of a multidimensional array. "
  7641. "This is not supported.");
  7642. }
  7643. if (meta)
  7644. {
  7645. meta->need_transpose = row_major_matrix_needs_conversion;
  7646. meta->storage_is_packed = is_packed;
  7647. meta->storage_is_invariant = is_invariant;
  7648. meta->storage_physical_type = physical_type;
  7649. }
  7650. return expr;
  7651. }
  7652. void CompilerGLSL::prepare_access_chain_for_scalar_access(std::string &, const SPIRType &, spv::StorageClass, bool &)
  7653. {
  7654. }
  7655. string CompilerGLSL::to_flattened_struct_member(const string &basename, const SPIRType &type, uint32_t index)
  7656. {
  7657. auto ret = join(basename, "_", to_member_name(type, index));
  7658. ParsedIR::sanitize_underscores(ret);
  7659. return ret;
  7660. }
  7661. string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type,
  7662. AccessChainMeta *meta, bool ptr_chain)
  7663. {
  7664. if (flattened_buffer_blocks.count(base))
  7665. {
  7666. uint32_t matrix_stride = 0;
  7667. uint32_t array_stride = 0;
  7668. bool need_transpose = false;
  7669. flattened_access_chain_offset(expression_type(base), indices, count, 0, 16, &need_transpose, &matrix_stride,
  7670. &array_stride, ptr_chain);
  7671. if (meta)
  7672. {
  7673. meta->need_transpose = target_type.columns > 1 && need_transpose;
  7674. meta->storage_is_packed = false;
  7675. }
  7676. return flattened_access_chain(base, indices, count, target_type, 0, matrix_stride, array_stride,
  7677. need_transpose);
  7678. }
  7679. else if (flattened_structs.count(base) && count > 0)
  7680. {
  7681. AccessChainFlags flags = ACCESS_CHAIN_CHAIN_ONLY_BIT | ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT;
  7682. if (ptr_chain)
  7683. flags |= ACCESS_CHAIN_PTR_CHAIN_BIT;
  7684. if (flattened_structs[base])
  7685. {
  7686. flags |= ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT;
  7687. if (meta)
  7688. meta->flattened_struct = target_type.basetype == SPIRType::Struct;
  7689. }
  7690. auto chain = access_chain_internal(base, indices, count, flags, nullptr).substr(1);
  7691. if (meta)
  7692. {
  7693. meta->need_transpose = false;
  7694. meta->storage_is_packed = false;
  7695. }
  7696. auto basename = to_flattened_access_chain_expression(base);
  7697. auto ret = join(basename, "_", chain);
  7698. ParsedIR::sanitize_underscores(ret);
  7699. return ret;
  7700. }
  7701. else
  7702. {
  7703. AccessChainFlags flags = ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT;
  7704. if (ptr_chain)
  7705. flags |= ACCESS_CHAIN_PTR_CHAIN_BIT;
  7706. return access_chain_internal(base, indices, count, flags, meta);
  7707. }
  7708. }
  7709. string CompilerGLSL::load_flattened_struct(const string &basename, const SPIRType &type)
  7710. {
  7711. auto expr = type_to_glsl_constructor(type);
  7712. expr += '(';
  7713. for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
  7714. {
  7715. if (i)
  7716. expr += ", ";
  7717. auto &member_type = get<SPIRType>(type.member_types[i]);
  7718. if (member_type.basetype == SPIRType::Struct)
  7719. expr += load_flattened_struct(to_flattened_struct_member(basename, type, i), member_type);
  7720. else
  7721. expr += to_flattened_struct_member(basename, type, i);
  7722. }
  7723. expr += ')';
  7724. return expr;
  7725. }
  7726. std::string CompilerGLSL::to_flattened_access_chain_expression(uint32_t id)
  7727. {
  7728. // Do not use to_expression as that will unflatten access chains.
  7729. string basename;
  7730. if (const auto *var = maybe_get<SPIRVariable>(id))
  7731. basename = to_name(var->self);
  7732. else if (const auto *expr = maybe_get<SPIRExpression>(id))
  7733. basename = expr->expression;
  7734. else
  7735. basename = to_expression(id);
  7736. return basename;
  7737. }
  7738. void CompilerGLSL::store_flattened_struct(const string &basename, uint32_t rhs_id, const SPIRType &type,
  7739. const SmallVector<uint32_t> &indices)
  7740. {
  7741. SmallVector<uint32_t> sub_indices = indices;
  7742. sub_indices.push_back(0);
  7743. auto *member_type = &type;
  7744. for (auto &index : indices)
  7745. member_type = &get<SPIRType>(member_type->member_types[index]);
  7746. for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++)
  7747. {
  7748. sub_indices.back() = i;
  7749. auto lhs = join(basename, "_", to_member_name(*member_type, i));
  7750. ParsedIR::sanitize_underscores(lhs);
  7751. if (get<SPIRType>(member_type->member_types[i]).basetype == SPIRType::Struct)
  7752. {
  7753. store_flattened_struct(lhs, rhs_id, type, sub_indices);
  7754. }
  7755. else
  7756. {
  7757. auto rhs = to_expression(rhs_id) + to_multi_member_reference(type, sub_indices);
  7758. statement(lhs, " = ", rhs, ";");
  7759. }
  7760. }
  7761. }
  7762. void CompilerGLSL::store_flattened_struct(uint32_t lhs_id, uint32_t value)
  7763. {
  7764. auto &type = expression_type(lhs_id);
  7765. auto basename = to_flattened_access_chain_expression(lhs_id);
  7766. store_flattened_struct(basename, value, type, {});
  7767. }
  7768. std::string CompilerGLSL::flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count,
  7769. const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride,
  7770. uint32_t /* array_stride */, bool need_transpose)
  7771. {
  7772. if (!target_type.array.empty())
  7773. SPIRV_CROSS_THROW("Access chains that result in an array can not be flattened");
  7774. else if (target_type.basetype == SPIRType::Struct)
  7775. return flattened_access_chain_struct(base, indices, count, target_type, offset);
  7776. else if (target_type.columns > 1)
  7777. return flattened_access_chain_matrix(base, indices, count, target_type, offset, matrix_stride, need_transpose);
  7778. else
  7779. return flattened_access_chain_vector(base, indices, count, target_type, offset, matrix_stride, need_transpose);
  7780. }
  7781. std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count,
  7782. const SPIRType &target_type, uint32_t offset)
  7783. {
  7784. std::string expr;
  7785. expr += type_to_glsl_constructor(target_type);
  7786. expr += "(";
  7787. for (uint32_t i = 0; i < uint32_t(target_type.member_types.size()); ++i)
  7788. {
  7789. if (i != 0)
  7790. expr += ", ";
  7791. const SPIRType &member_type = get<SPIRType>(target_type.member_types[i]);
  7792. uint32_t member_offset = type_struct_member_offset(target_type, i);
  7793. // The access chain terminates at the struct, so we need to find matrix strides and row-major information
  7794. // ahead of time.
  7795. bool need_transpose = false;
  7796. uint32_t matrix_stride = 0;
  7797. if (member_type.columns > 1)
  7798. {
  7799. need_transpose = combined_decoration_for_member(target_type, i).get(DecorationRowMajor);
  7800. matrix_stride = type_struct_member_matrix_stride(target_type, i);
  7801. }
  7802. auto tmp = flattened_access_chain(base, indices, count, member_type, offset + member_offset, matrix_stride,
  7803. 0 /* array_stride */, need_transpose);
  7804. // Cannot forward transpositions, so resolve them here.
  7805. if (need_transpose)
  7806. expr += convert_row_major_matrix(tmp, member_type, 0, false);
  7807. else
  7808. expr += tmp;
  7809. }
  7810. expr += ")";
  7811. return expr;
  7812. }
  7813. std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count,
  7814. const SPIRType &target_type, uint32_t offset,
  7815. uint32_t matrix_stride, bool need_transpose)
  7816. {
  7817. assert(matrix_stride);
  7818. SPIRType tmp_type = target_type;
  7819. if (need_transpose)
  7820. swap(tmp_type.vecsize, tmp_type.columns);
  7821. std::string expr;
  7822. expr += type_to_glsl_constructor(tmp_type);
  7823. expr += "(";
  7824. for (uint32_t i = 0; i < tmp_type.columns; i++)
  7825. {
  7826. if (i != 0)
  7827. expr += ", ";
  7828. expr += flattened_access_chain_vector(base, indices, count, tmp_type, offset + i * matrix_stride, matrix_stride,
  7829. /* need_transpose= */ false);
  7830. }
  7831. expr += ")";
  7832. return expr;
  7833. }
  7834. std::string CompilerGLSL::flattened_access_chain_vector(uint32_t base, const uint32_t *indices, uint32_t count,
  7835. const SPIRType &target_type, uint32_t offset,
  7836. uint32_t matrix_stride, bool need_transpose)
  7837. {
  7838. auto result = flattened_access_chain_offset(expression_type(base), indices, count, offset, 16);
  7839. auto buffer_name = to_name(expression_type(base).self);
  7840. if (need_transpose)
  7841. {
  7842. std::string expr;
  7843. if (target_type.vecsize > 1)
  7844. {
  7845. expr += type_to_glsl_constructor(target_type);
  7846. expr += "(";
  7847. }
  7848. for (uint32_t i = 0; i < target_type.vecsize; ++i)
  7849. {
  7850. if (i != 0)
  7851. expr += ", ";
  7852. uint32_t component_offset = result.second + i * matrix_stride;
  7853. assert(component_offset % (target_type.width / 8) == 0);
  7854. uint32_t index = component_offset / (target_type.width / 8);
  7855. expr += buffer_name;
  7856. expr += "[";
  7857. expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
  7858. expr += convert_to_string(index / 4);
  7859. expr += "]";
  7860. expr += vector_swizzle(1, index % 4);
  7861. }
  7862. if (target_type.vecsize > 1)
  7863. {
  7864. expr += ")";
  7865. }
  7866. return expr;
  7867. }
  7868. else
  7869. {
  7870. assert(result.second % (target_type.width / 8) == 0);
  7871. uint32_t index = result.second / (target_type.width / 8);
  7872. std::string expr;
  7873. expr += buffer_name;
  7874. expr += "[";
  7875. expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
  7876. expr += convert_to_string(index / 4);
  7877. expr += "]";
  7878. expr += vector_swizzle(target_type.vecsize, index % 4);
  7879. return expr;
  7880. }
  7881. }
  7882. std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(
  7883. const SPIRType &basetype, const uint32_t *indices, uint32_t count, uint32_t offset, uint32_t word_stride,
  7884. bool *need_transpose, uint32_t *out_matrix_stride, uint32_t *out_array_stride, bool ptr_chain)
  7885. {
  7886. // Start traversing type hierarchy at the proper non-pointer types.
  7887. const auto *type = &get_pointee_type(basetype);
  7888. std::string expr;
  7889. // Inherit matrix information in case we are access chaining a vector which might have come from a row major layout.
  7890. bool row_major_matrix_needs_conversion = need_transpose ? *need_transpose : false;
  7891. uint32_t matrix_stride = out_matrix_stride ? *out_matrix_stride : 0;
  7892. uint32_t array_stride = out_array_stride ? *out_array_stride : 0;
  7893. for (uint32_t i = 0; i < count; i++)
  7894. {
  7895. uint32_t index = indices[i];
  7896. // Pointers
  7897. if (ptr_chain && i == 0)
  7898. {
  7899. // Here, the pointer type will be decorated with an array stride.
  7900. array_stride = get_decoration(basetype.self, DecorationArrayStride);
  7901. if (!array_stride)
  7902. SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block.");
  7903. auto *constant = maybe_get<SPIRConstant>(index);
  7904. if (constant)
  7905. {
  7906. // Constant array access.
  7907. offset += constant->scalar() * array_stride;
  7908. }
  7909. else
  7910. {
  7911. // Dynamic array access.
  7912. if (array_stride % word_stride)
  7913. {
  7914. SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size "
  7915. "of a 4-component vector. "
  7916. "Likely culprit here is a float or vec2 array inside a push "
  7917. "constant block which is std430. "
  7918. "This cannot be flattened. Try using std140 layout instead.");
  7919. }
  7920. expr += to_enclosed_expression(index);
  7921. expr += " * ";
  7922. expr += convert_to_string(array_stride / word_stride);
  7923. expr += " + ";
  7924. }
  7925. }
  7926. // Arrays
  7927. else if (!type->array.empty())
  7928. {
  7929. auto *constant = maybe_get<SPIRConstant>(index);
  7930. if (constant)
  7931. {
  7932. // Constant array access.
  7933. offset += constant->scalar() * array_stride;
  7934. }
  7935. else
  7936. {
  7937. // Dynamic array access.
  7938. if (array_stride % word_stride)
  7939. {
  7940. SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size "
  7941. "of a 4-component vector. "
  7942. "Likely culprit here is a float or vec2 array inside a push "
  7943. "constant block which is std430. "
  7944. "This cannot be flattened. Try using std140 layout instead.");
  7945. }
  7946. expr += to_enclosed_expression(index, false);
  7947. expr += " * ";
  7948. expr += convert_to_string(array_stride / word_stride);
  7949. expr += " + ";
  7950. }
  7951. uint32_t parent_type = type->parent_type;
  7952. type = &get<SPIRType>(parent_type);
  7953. if (!type->array.empty())
  7954. array_stride = get_decoration(parent_type, DecorationArrayStride);
  7955. }
  7956. // For structs, the index refers to a constant, which indexes into the members.
  7957. // We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
  7958. else if (type->basetype == SPIRType::Struct)
  7959. {
  7960. index = evaluate_constant_u32(index);
  7961. if (index >= type->member_types.size())
  7962. SPIRV_CROSS_THROW("Member index is out of bounds!");
  7963. offset += type_struct_member_offset(*type, index);
  7964. auto &struct_type = *type;
  7965. type = &get<SPIRType>(type->member_types[index]);
  7966. if (type->columns > 1)
  7967. {
  7968. matrix_stride = type_struct_member_matrix_stride(struct_type, index);
  7969. row_major_matrix_needs_conversion =
  7970. combined_decoration_for_member(struct_type, index).get(DecorationRowMajor);
  7971. }
  7972. else
  7973. row_major_matrix_needs_conversion = false;
  7974. if (!type->array.empty())
  7975. array_stride = type_struct_member_array_stride(struct_type, index);
  7976. }
  7977. // Matrix -> Vector
  7978. else if (type->columns > 1)
  7979. {
  7980. auto *constant = maybe_get<SPIRConstant>(index);
  7981. if (constant)
  7982. {
  7983. index = evaluate_constant_u32(index);
  7984. offset += index * (row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride);
  7985. }
  7986. else
  7987. {
  7988. uint32_t indexing_stride = row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride;
  7989. // Dynamic array access.
  7990. if (indexing_stride % word_stride)
  7991. {
  7992. SPIRV_CROSS_THROW("Matrix stride for dynamic indexing must be divisible by the size of a "
  7993. "4-component vector. "
  7994. "Likely culprit here is a row-major matrix being accessed dynamically. "
  7995. "This cannot be flattened. Try using std140 layout instead.");
  7996. }
  7997. expr += to_enclosed_expression(index, false);
  7998. expr += " * ";
  7999. expr += convert_to_string(indexing_stride / word_stride);
  8000. expr += " + ";
  8001. }
  8002. type = &get<SPIRType>(type->parent_type);
  8003. }
  8004. // Vector -> Scalar
  8005. else if (type->vecsize > 1)
  8006. {
  8007. auto *constant = maybe_get<SPIRConstant>(index);
  8008. if (constant)
  8009. {
  8010. index = evaluate_constant_u32(index);
  8011. offset += index * (row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8));
  8012. }
  8013. else
  8014. {
  8015. uint32_t indexing_stride = row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8);
  8016. // Dynamic array access.
  8017. if (indexing_stride % word_stride)
  8018. {
  8019. SPIRV_CROSS_THROW("Stride for dynamic vector indexing must be divisible by the "
  8020. "size of a 4-component vector. "
  8021. "This cannot be flattened in legacy targets.");
  8022. }
  8023. expr += to_enclosed_expression(index, false);
  8024. expr += " * ";
  8025. expr += convert_to_string(indexing_stride / word_stride);
  8026. expr += " + ";
  8027. }
  8028. type = &get<SPIRType>(type->parent_type);
  8029. }
  8030. else
  8031. SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
  8032. }
  8033. if (need_transpose)
  8034. *need_transpose = row_major_matrix_needs_conversion;
  8035. if (out_matrix_stride)
  8036. *out_matrix_stride = matrix_stride;
  8037. if (out_array_stride)
  8038. *out_array_stride = array_stride;
  8039. return std::make_pair(expr, offset);
  8040. }
  8041. bool CompilerGLSL::should_dereference(uint32_t id)
  8042. {
  8043. const auto &type = expression_type(id);
  8044. // Non-pointer expressions don't need to be dereferenced.
  8045. if (!type.pointer)
  8046. return false;
  8047. // Handles shouldn't be dereferenced either.
  8048. if (!expression_is_lvalue(id))
  8049. return false;
  8050. // If id is a variable but not a phi variable, we should not dereference it.
  8051. if (auto *var = maybe_get<SPIRVariable>(id))
  8052. return var->phi_variable;
  8053. // If id is an access chain, we should not dereference it.
  8054. if (auto *expr = maybe_get<SPIRExpression>(id))
  8055. return !expr->access_chain;
  8056. // Otherwise, we should dereference this pointer expression.
  8057. return true;
  8058. }
  8059. bool CompilerGLSL::should_forward(uint32_t id) const
  8060. {
  8061. // If id is a variable we will try to forward it regardless of force_temporary check below
  8062. // This is important because otherwise we'll get local sampler copies (highp sampler2D foo = bar) that are invalid in OpenGL GLSL
  8063. auto *var = maybe_get<SPIRVariable>(id);
  8064. if (var && var->forwardable)
  8065. return true;
  8066. // For debugging emit temporary variables for all expressions
  8067. if (options.force_temporary)
  8068. return false;
  8069. // Immutable expression can always be forwarded.
  8070. if (is_immutable(id))
  8071. return true;
  8072. return false;
  8073. }
  8074. bool CompilerGLSL::should_suppress_usage_tracking(uint32_t id) const
  8075. {
  8076. // Used only by opcodes which don't do any real "work", they just swizzle data in some fashion.
  8077. return !expression_is_forwarded(id) || expression_suppresses_usage_tracking(id);
  8078. }
  8079. void CompilerGLSL::track_expression_read(uint32_t id)
  8080. {
  8081. switch (ir.ids[id].get_type())
  8082. {
  8083. case TypeExpression:
  8084. {
  8085. auto &e = get<SPIRExpression>(id);
  8086. for (auto implied_read : e.implied_read_expressions)
  8087. track_expression_read(implied_read);
  8088. break;
  8089. }
  8090. case TypeAccessChain:
  8091. {
  8092. auto &e = get<SPIRAccessChain>(id);
  8093. for (auto implied_read : e.implied_read_expressions)
  8094. track_expression_read(implied_read);
  8095. break;
  8096. }
  8097. default:
  8098. break;
  8099. }
  8100. // If we try to read a forwarded temporary more than once we will stamp out possibly complex code twice.
  8101. // In this case, it's better to just bind the complex expression to the temporary and read that temporary twice.
  8102. if (expression_is_forwarded(id) && !expression_suppresses_usage_tracking(id))
  8103. {
  8104. auto &v = expression_usage_counts[id];
  8105. v++;
  8106. // If we create an expression outside a loop,
  8107. // but access it inside a loop, we're implicitly reading it multiple times.
  8108. // If the expression in question is expensive, we should hoist it out to avoid relying on loop-invariant code motion
  8109. // working inside the backend compiler.
  8110. if (expression_read_implies_multiple_reads(id))
  8111. v++;
  8112. if (v >= 2)
  8113. {
  8114. //if (v == 2)
  8115. // fprintf(stderr, "ID %u was forced to temporary due to more than 1 expression use!\n", id);
  8116. forced_temporaries.insert(id);
  8117. // Force a recompile after this pass to avoid forwarding this variable.
  8118. force_recompile();
  8119. }
  8120. }
  8121. }
  8122. bool CompilerGLSL::args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure)
  8123. {
  8124. if (forced_temporaries.find(id) != end(forced_temporaries))
  8125. return false;
  8126. for (uint32_t i = 0; i < num_args; i++)
  8127. if (!should_forward(args[i]))
  8128. return false;
  8129. // We need to forward globals as well.
  8130. if (!pure)
  8131. {
  8132. for (auto global : global_variables)
  8133. if (!should_forward(global))
  8134. return false;
  8135. for (auto aliased : aliased_variables)
  8136. if (!should_forward(aliased))
  8137. return false;
  8138. }
  8139. return true;
  8140. }
  8141. void CompilerGLSL::register_impure_function_call()
  8142. {
  8143. // Impure functions can modify globals and aliased variables, so invalidate them as well.
  8144. for (auto global : global_variables)
  8145. flush_dependees(get<SPIRVariable>(global));
  8146. for (auto aliased : aliased_variables)
  8147. flush_dependees(get<SPIRVariable>(aliased));
  8148. }
  8149. void CompilerGLSL::register_call_out_argument(uint32_t id)
  8150. {
  8151. register_write(id);
  8152. auto *var = maybe_get<SPIRVariable>(id);
  8153. if (var)
  8154. flush_variable_declaration(var->self);
  8155. }
  8156. string CompilerGLSL::variable_decl_function_local(SPIRVariable &var)
  8157. {
  8158. // These variables are always function local,
  8159. // so make sure we emit the variable without storage qualifiers.
  8160. // Some backends will inject custom variables locally in a function
  8161. // with a storage qualifier which is not function-local.
  8162. auto old_storage = var.storage;
  8163. var.storage = StorageClassFunction;
  8164. auto expr = variable_decl(var);
  8165. var.storage = old_storage;
  8166. return expr;
  8167. }
  8168. void CompilerGLSL::emit_variable_temporary_copies(const SPIRVariable &var)
  8169. {
  8170. // Ensure that we declare phi-variable copies even if the original declaration isn't deferred
  8171. if (var.allocate_temporary_copy && !flushed_phi_variables.count(var.self))
  8172. {
  8173. auto &type = get<SPIRType>(var.basetype);
  8174. auto &flags = get_decoration_bitset(var.self);
  8175. statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, join("_", var.self, "_copy")), ";");
  8176. flushed_phi_variables.insert(var.self);
  8177. }
  8178. }
  8179. void CompilerGLSL::flush_variable_declaration(uint32_t id)
  8180. {
  8181. // Ensure that we declare phi-variable copies even if the original declaration isn't deferred
  8182. auto *var = maybe_get<SPIRVariable>(id);
  8183. if (var && var->deferred_declaration)
  8184. {
  8185. string initializer;
  8186. if (options.force_zero_initialized_variables &&
  8187. (var->storage == StorageClassFunction || var->storage == StorageClassGeneric ||
  8188. var->storage == StorageClassPrivate) &&
  8189. !var->initializer && type_can_zero_initialize(get_variable_data_type(*var)))
  8190. {
  8191. initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(*var)));
  8192. }
  8193. statement(variable_decl_function_local(*var), initializer, ";");
  8194. var->deferred_declaration = false;
  8195. }
  8196. if (var)
  8197. {
  8198. emit_variable_temporary_copies(*var);
  8199. }
  8200. }
  8201. bool CompilerGLSL::remove_duplicate_swizzle(string &op)
  8202. {
  8203. auto pos = op.find_last_of('.');
  8204. if (pos == string::npos || pos == 0)
  8205. return false;
  8206. string final_swiz = op.substr(pos + 1, string::npos);
  8207. if (backend.swizzle_is_function)
  8208. {
  8209. if (final_swiz.size() < 2)
  8210. return false;
  8211. if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()")
  8212. final_swiz.erase(final_swiz.size() - 2, string::npos);
  8213. else
  8214. return false;
  8215. }
  8216. // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
  8217. // If so, and previous swizzle is of same length,
  8218. // we can drop the final swizzle altogether.
  8219. for (uint32_t i = 0; i < final_swiz.size(); i++)
  8220. {
  8221. static const char expected[] = { 'x', 'y', 'z', 'w' };
  8222. if (i >= 4 || final_swiz[i] != expected[i])
  8223. return false;
  8224. }
  8225. auto prevpos = op.find_last_of('.', pos - 1);
  8226. if (prevpos == string::npos)
  8227. return false;
  8228. prevpos++;
  8229. // Make sure there are only swizzles here ...
  8230. for (auto i = prevpos; i < pos; i++)
  8231. {
  8232. if (op[i] < 'w' || op[i] > 'z')
  8233. {
  8234. // If swizzles are foo.xyz() like in C++ backend for example, check for that.
  8235. if (backend.swizzle_is_function && i + 2 == pos && op[i] == '(' && op[i + 1] == ')')
  8236. break;
  8237. return false;
  8238. }
  8239. }
  8240. // If original swizzle is large enough, just carve out the components we need.
  8241. // E.g. foobar.wyx.xy will turn into foobar.wy.
  8242. if (pos - prevpos >= final_swiz.size())
  8243. {
  8244. op.erase(prevpos + final_swiz.size(), string::npos);
  8245. // Add back the function call ...
  8246. if (backend.swizzle_is_function)
  8247. op += "()";
  8248. }
  8249. return true;
  8250. }
  8251. // Optimizes away vector swizzles where we have something like
  8252. // vec3 foo;
  8253. // foo.xyz <-- swizzle expression does nothing.
  8254. // This is a very common pattern after OpCompositeCombine.
  8255. bool CompilerGLSL::remove_unity_swizzle(uint32_t base, string &op)
  8256. {
  8257. auto pos = op.find_last_of('.');
  8258. if (pos == string::npos || pos == 0)
  8259. return false;
  8260. string final_swiz = op.substr(pos + 1, string::npos);
  8261. if (backend.swizzle_is_function)
  8262. {
  8263. if (final_swiz.size() < 2)
  8264. return false;
  8265. if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()")
  8266. final_swiz.erase(final_swiz.size() - 2, string::npos);
  8267. else
  8268. return false;
  8269. }
  8270. // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
  8271. // If so, and previous swizzle is of same length,
  8272. // we can drop the final swizzle altogether.
  8273. for (uint32_t i = 0; i < final_swiz.size(); i++)
  8274. {
  8275. static const char expected[] = { 'x', 'y', 'z', 'w' };
  8276. if (i >= 4 || final_swiz[i] != expected[i])
  8277. return false;
  8278. }
  8279. auto &type = expression_type(base);
  8280. // Sanity checking ...
  8281. assert(type.columns == 1 && type.array.empty());
  8282. if (type.vecsize == final_swiz.size())
  8283. op.erase(pos, string::npos);
  8284. return true;
  8285. }
  8286. string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32_t *elems, uint32_t length)
  8287. {
  8288. ID base = 0;
  8289. string op;
  8290. string subop;
  8291. // Can only merge swizzles for vectors.
  8292. auto &type = get<SPIRType>(return_type);
  8293. bool can_apply_swizzle_opt = type.basetype != SPIRType::Struct && type.array.empty() && type.columns == 1;
  8294. bool swizzle_optimization = false;
  8295. for (uint32_t i = 0; i < length; i++)
  8296. {
  8297. auto *e = maybe_get<SPIRExpression>(elems[i]);
  8298. // If we're merging another scalar which belongs to the same base
  8299. // object, just merge the swizzles to avoid triggering more than 1 expression read as much as possible!
  8300. if (can_apply_swizzle_opt && e && e->base_expression && e->base_expression == base)
  8301. {
  8302. // Only supposed to be used for vector swizzle -> scalar.
  8303. assert(!e->expression.empty() && e->expression.front() == '.');
  8304. subop += e->expression.substr(1, string::npos);
  8305. swizzle_optimization = true;
  8306. }
  8307. else
  8308. {
  8309. // We'll likely end up with duplicated swizzles, e.g.
  8310. // foobar.xyz.xyz from patterns like
  8311. // OpVectorShuffle
  8312. // OpCompositeExtract x 3
  8313. // OpCompositeConstruct 3x + other scalar.
  8314. // Just modify op in-place.
  8315. if (swizzle_optimization)
  8316. {
  8317. if (backend.swizzle_is_function)
  8318. subop += "()";
  8319. // Don't attempt to remove unity swizzling if we managed to remove duplicate swizzles.
  8320. // The base "foo" might be vec4, while foo.xyz is vec3 (OpVectorShuffle) and looks like a vec3 due to the .xyz tacked on.
  8321. // We only want to remove the swizzles if we're certain that the resulting base will be the same vecsize.
  8322. // Essentially, we can only remove one set of swizzles, since that's what we have control over ...
  8323. // Case 1:
  8324. // foo.yxz.xyz: Duplicate swizzle kicks in, giving foo.yxz, we are done.
  8325. // foo.yxz was the result of OpVectorShuffle and we don't know the type of foo.
  8326. // Case 2:
  8327. // foo.xyz: Duplicate swizzle won't kick in.
  8328. // If foo is vec3, we can remove xyz, giving just foo.
  8329. if (!remove_duplicate_swizzle(subop))
  8330. remove_unity_swizzle(base, subop);
  8331. // Strips away redundant parens if we created them during component extraction.
  8332. strip_enclosed_expression(subop);
  8333. swizzle_optimization = false;
  8334. op += subop;
  8335. }
  8336. else
  8337. op += subop;
  8338. if (i)
  8339. op += ", ";
  8340. bool uses_buffer_offset =
  8341. type.basetype == SPIRType::Struct && has_member_decoration(type.self, i, DecorationOffset);
  8342. subop = to_composite_constructor_expression(elems[i], uses_buffer_offset);
  8343. }
  8344. base = e ? e->base_expression : ID(0);
  8345. }
  8346. if (swizzle_optimization)
  8347. {
  8348. if (backend.swizzle_is_function)
  8349. subop += "()";
  8350. if (!remove_duplicate_swizzle(subop))
  8351. remove_unity_swizzle(base, subop);
  8352. // Strips away redundant parens if we created them during component extraction.
  8353. strip_enclosed_expression(subop);
  8354. }
  8355. op += subop;
  8356. return op;
  8357. }
  8358. bool CompilerGLSL::skip_argument(uint32_t id) const
  8359. {
  8360. if (!combined_image_samplers.empty() || !options.vulkan_semantics)
  8361. {
  8362. auto &type = expression_type(id);
  8363. if (type.basetype == SPIRType::Sampler || (type.basetype == SPIRType::Image && type.image.sampled == 1))
  8364. return true;
  8365. }
  8366. return false;
  8367. }
  8368. bool CompilerGLSL::optimize_read_modify_write(const SPIRType &type, const string &lhs, const string &rhs)
  8369. {
  8370. // Do this with strings because we have a very clear pattern we can check for and it avoids
  8371. // adding lots of special cases to the code emission.
  8372. if (rhs.size() < lhs.size() + 3)
  8373. return false;
  8374. // Do not optimize matrices. They are a bit awkward to reason about in general
  8375. // (in which order does operation happen?), and it does not work on MSL anyways.
  8376. if (type.vecsize > 1 && type.columns > 1)
  8377. return false;
  8378. auto index = rhs.find(lhs);
  8379. if (index != 0)
  8380. return false;
  8381. // TODO: Shift operators, but it's not important for now.
  8382. auto op = rhs.find_first_of("+-/*%|&^", lhs.size() + 1);
  8383. if (op != lhs.size() + 1)
  8384. return false;
  8385. // Check that the op is followed by space. This excludes && and ||.
  8386. if (rhs[op + 1] != ' ')
  8387. return false;
  8388. char bop = rhs[op];
  8389. auto expr = rhs.substr(lhs.size() + 3);
  8390. // Try to find increments and decrements. Makes it look neater as += 1, -= 1 is fairly rare to see in real code.
  8391. // Find some common patterns which are equivalent.
  8392. if ((bop == '+' || bop == '-') && (expr == "1" || expr == "uint(1)" || expr == "1u" || expr == "int(1u)"))
  8393. statement(lhs, bop, bop, ";");
  8394. else
  8395. statement(lhs, " ", bop, "= ", expr, ";");
  8396. return true;
  8397. }
  8398. void CompilerGLSL::register_control_dependent_expression(uint32_t expr)
  8399. {
  8400. if (forwarded_temporaries.find(expr) == end(forwarded_temporaries))
  8401. return;
  8402. assert(current_emitting_block);
  8403. current_emitting_block->invalidate_expressions.push_back(expr);
  8404. }
  8405. void CompilerGLSL::emit_block_instructions(SPIRBlock &block)
  8406. {
  8407. current_emitting_block = &block;
  8408. for (auto &op : block.ops)
  8409. emit_instruction(op);
  8410. current_emitting_block = nullptr;
  8411. }
  8412. void CompilerGLSL::disallow_forwarding_in_expression_chain(const SPIRExpression &expr)
  8413. {
  8414. // Allow trivially forwarded expressions like OpLoad or trivial shuffles,
  8415. // these will be marked as having suppressed usage tracking.
  8416. // Our only concern is to make sure arithmetic operations are done in similar ways.
  8417. if (expression_is_forwarded(expr.self) && !expression_suppresses_usage_tracking(expr.self) &&
  8418. forced_invariant_temporaries.count(expr.self) == 0)
  8419. {
  8420. forced_temporaries.insert(expr.self);
  8421. forced_invariant_temporaries.insert(expr.self);
  8422. force_recompile();
  8423. for (auto &dependent : expr.expression_dependencies)
  8424. disallow_forwarding_in_expression_chain(get<SPIRExpression>(dependent));
  8425. }
  8426. }
  8427. void CompilerGLSL::handle_store_to_invariant_variable(uint32_t store_id, uint32_t value_id)
  8428. {
  8429. // Variables or access chains marked invariant are complicated. We will need to make sure the code-gen leading up to
  8430. // this variable is consistent. The failure case for SPIRV-Cross is when an expression is forced to a temporary
  8431. // in one translation unit, but not another, e.g. due to multiple use of an expression.
  8432. // This causes variance despite the output variable being marked invariant, so the solution here is to force all dependent
  8433. // expressions to be temporaries.
  8434. // It is uncertain if this is enough to support invariant in all possible cases, but it should be good enough
  8435. // for all reasonable uses of invariant.
  8436. if (!has_decoration(store_id, DecorationInvariant))
  8437. return;
  8438. auto *expr = maybe_get<SPIRExpression>(value_id);
  8439. if (!expr)
  8440. return;
  8441. disallow_forwarding_in_expression_chain(*expr);
  8442. }
  8443. void CompilerGLSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression)
  8444. {
  8445. auto rhs = to_pointer_expression(rhs_expression);
  8446. // Statements to OpStore may be empty if it is a struct with zero members. Just forward the store to /dev/null.
  8447. if (!rhs.empty())
  8448. {
  8449. handle_store_to_invariant_variable(lhs_expression, rhs_expression);
  8450. auto lhs = to_dereferenced_expression(lhs_expression);
  8451. // We might need to cast in order to store to a builtin.
  8452. cast_to_builtin_store(lhs_expression, rhs, expression_type(rhs_expression));
  8453. // Tries to optimize assignments like "<lhs> = <lhs> op expr".
  8454. // While this is purely cosmetic, this is important for legacy ESSL where loop
  8455. // variable increments must be in either i++ or i += const-expr.
  8456. // Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0.
  8457. if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs))
  8458. statement(lhs, " = ", rhs, ";");
  8459. register_write(lhs_expression);
  8460. }
  8461. }
  8462. uint32_t CompilerGLSL::get_integer_width_for_instruction(const Instruction &instr) const
  8463. {
  8464. if (instr.length < 3)
  8465. return 32;
  8466. auto *ops = stream(instr);
  8467. switch (instr.op)
  8468. {
  8469. case OpSConvert:
  8470. case OpConvertSToF:
  8471. case OpUConvert:
  8472. case OpConvertUToF:
  8473. case OpIEqual:
  8474. case OpINotEqual:
  8475. case OpSLessThan:
  8476. case OpSLessThanEqual:
  8477. case OpSGreaterThan:
  8478. case OpSGreaterThanEqual:
  8479. case OpULessThan:
  8480. case OpULessThanEqual:
  8481. case OpUGreaterThan:
  8482. case OpUGreaterThanEqual:
  8483. return expression_type(ops[2]).width;
  8484. default:
  8485. {
  8486. // We can look at result type which is more robust.
  8487. auto *type = maybe_get<SPIRType>(ops[0]);
  8488. if (type && type_is_integral(*type))
  8489. return type->width;
  8490. else
  8491. return 32;
  8492. }
  8493. }
  8494. }
  8495. uint32_t CompilerGLSL::get_integer_width_for_glsl_instruction(GLSLstd450 op, const uint32_t *ops, uint32_t length) const
  8496. {
  8497. if (length < 1)
  8498. return 32;
  8499. switch (op)
  8500. {
  8501. case GLSLstd450SAbs:
  8502. case GLSLstd450SSign:
  8503. case GLSLstd450UMin:
  8504. case GLSLstd450SMin:
  8505. case GLSLstd450UMax:
  8506. case GLSLstd450SMax:
  8507. case GLSLstd450UClamp:
  8508. case GLSLstd450SClamp:
  8509. case GLSLstd450FindSMsb:
  8510. case GLSLstd450FindUMsb:
  8511. return expression_type(ops[0]).width;
  8512. default:
  8513. {
  8514. // We don't need to care about other opcodes, just return 32.
  8515. return 32;
  8516. }
  8517. }
  8518. }
  8519. void CompilerGLSL::emit_instruction(const Instruction &instruction)
  8520. {
  8521. auto ops = stream(instruction);
  8522. auto opcode = static_cast<Op>(instruction.op);
  8523. uint32_t length = instruction.length;
  8524. #define GLSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op)
  8525. #define GLSL_BOP_CAST(op, type) \
  8526. emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
  8527. #define GLSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op)
  8528. #define GLSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op)
  8529. #define GLSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op)
  8530. #define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
  8531. #define GLSL_BFOP_CAST(op, type) \
  8532. emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
  8533. #define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
  8534. #define GLSL_UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op)
  8535. // If we need to do implicit bitcasts, make sure we do it with the correct type.
  8536. uint32_t integer_width = get_integer_width_for_instruction(instruction);
  8537. auto int_type = to_signed_basetype(integer_width);
  8538. auto uint_type = to_unsigned_basetype(integer_width);
  8539. switch (opcode)
  8540. {
  8541. // Dealing with memory
  8542. case OpLoad:
  8543. {
  8544. uint32_t result_type = ops[0];
  8545. uint32_t id = ops[1];
  8546. uint32_t ptr = ops[2];
  8547. flush_variable_declaration(ptr);
  8548. // If we're loading from memory that cannot be changed by the shader,
  8549. // just forward the expression directly to avoid needless temporaries.
  8550. // If an expression is mutable and forwardable, we speculate that it is immutable.
  8551. bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries);
  8552. // If loading a non-native row-major matrix, mark the expression as need_transpose.
  8553. bool need_transpose = false;
  8554. bool old_need_transpose = false;
  8555. auto *ptr_expression = maybe_get<SPIRExpression>(ptr);
  8556. if (forward)
  8557. {
  8558. // If we're forwarding the load, we're also going to forward transpose state, so don't transpose while
  8559. // taking the expression.
  8560. if (ptr_expression && ptr_expression->need_transpose)
  8561. {
  8562. old_need_transpose = true;
  8563. ptr_expression->need_transpose = false;
  8564. need_transpose = true;
  8565. }
  8566. else if (is_non_native_row_major_matrix(ptr))
  8567. need_transpose = true;
  8568. }
  8569. // If we are forwarding this load,
  8570. // don't register the read to access chain here, defer that to when we actually use the expression,
  8571. // using the add_implied_read_expression mechanism.
  8572. string expr;
  8573. bool is_packed = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked);
  8574. bool is_remapped = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID);
  8575. if (forward || (!is_packed && !is_remapped))
  8576. {
  8577. // For the simple case, we do not need to deal with repacking.
  8578. expr = to_dereferenced_expression(ptr, false);
  8579. }
  8580. else
  8581. {
  8582. // If we are not forwarding the expression, we need to unpack and resolve any physical type remapping here before
  8583. // storing the expression to a temporary.
  8584. expr = to_unpacked_expression(ptr);
  8585. }
  8586. auto &type = get<SPIRType>(result_type);
  8587. auto &expr_type = expression_type(ptr);
  8588. // If the expression has more vector components than the result type, insert
  8589. // a swizzle. This shouldn't happen normally on valid SPIR-V, but it might
  8590. // happen with e.g. the MSL backend replacing the type of an input variable.
  8591. if (expr_type.vecsize > type.vecsize)
  8592. expr = enclose_expression(expr + vector_swizzle(type.vecsize, 0));
  8593. // We might need to cast in order to load from a builtin.
  8594. cast_from_builtin_load(ptr, expr, type);
  8595. // We might be trying to load a gl_Position[N], where we should be
  8596. // doing float4[](gl_in[i].gl_Position, ...) instead.
  8597. // Similar workarounds are required for input arrays in tessellation.
  8598. unroll_array_from_complex_load(id, ptr, expr);
  8599. // Shouldn't need to check for ID, but current glslang codegen requires it in some cases
  8600. // when loading Image/Sampler descriptors. It does not hurt to check ID as well.
  8601. if (has_decoration(id, DecorationNonUniformEXT) || has_decoration(ptr, DecorationNonUniformEXT))
  8602. {
  8603. propagate_nonuniform_qualifier(ptr);
  8604. convert_non_uniform_expression(type, expr);
  8605. }
  8606. if (forward && ptr_expression)
  8607. ptr_expression->need_transpose = old_need_transpose;
  8608. bool flattened = ptr_expression && flattened_buffer_blocks.count(ptr_expression->loaded_from) != 0;
  8609. if (backend.needs_row_major_load_workaround && !is_non_native_row_major_matrix(ptr) && !flattened)
  8610. rewrite_load_for_wrapped_row_major(expr, result_type, ptr);
  8611. // By default, suppress usage tracking since using same expression multiple times does not imply any extra work.
  8612. // However, if we try to load a complex, composite object from a flattened buffer,
  8613. // we should avoid emitting the same code over and over and lower the result to a temporary.
  8614. bool usage_tracking = flattened && (type.basetype == SPIRType::Struct || (type.columns > 1));
  8615. SPIRExpression *e = nullptr;
  8616. if (!forward && expression_is_non_value_type_array(ptr))
  8617. {
  8618. // Complicated load case where we need to make a copy of ptr, but we cannot, because
  8619. // it is an array, and our backend does not support arrays as value types.
  8620. // Emit the temporary, and copy it explicitly.
  8621. e = &emit_uninitialized_temporary_expression(result_type, id);
  8622. emit_array_copy(to_expression(id), ptr, StorageClassFunction, get_expression_effective_storage_class(ptr));
  8623. }
  8624. else
  8625. e = &emit_op(result_type, id, expr, forward, !usage_tracking);
  8626. e->need_transpose = need_transpose;
  8627. register_read(id, ptr, forward);
  8628. if (forward)
  8629. {
  8630. // Pass through whether the result is of a packed type and the physical type ID.
  8631. if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked))
  8632. set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
  8633. if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID))
  8634. {
  8635. set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID,
  8636. get_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID));
  8637. }
  8638. }
  8639. else
  8640. {
  8641. // This might have been set on an earlier compilation iteration, force it to be unset.
  8642. unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
  8643. unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
  8644. }
  8645. inherit_expression_dependencies(id, ptr);
  8646. if (forward)
  8647. add_implied_read_expression(*e, ptr);
  8648. break;
  8649. }
  8650. case OpInBoundsAccessChain:
  8651. case OpAccessChain:
  8652. case OpPtrAccessChain:
  8653. {
  8654. auto *var = maybe_get<SPIRVariable>(ops[2]);
  8655. if (var)
  8656. flush_variable_declaration(var->self);
  8657. // If the base is immutable, the access chain pointer must also be.
  8658. // If an expression is mutable and forwardable, we speculate that it is immutable.
  8659. AccessChainMeta meta;
  8660. bool ptr_chain = opcode == OpPtrAccessChain;
  8661. auto e = access_chain(ops[2], &ops[3], length - 3, get<SPIRType>(ops[0]), &meta, ptr_chain);
  8662. auto &expr = set<SPIRExpression>(ops[1], move(e), ops[0], should_forward(ops[2]));
  8663. auto *backing_variable = maybe_get_backing_variable(ops[2]);
  8664. expr.loaded_from = backing_variable ? backing_variable->self : ID(ops[2]);
  8665. expr.need_transpose = meta.need_transpose;
  8666. expr.access_chain = true;
  8667. // Mark the result as being packed. Some platforms handled packed vectors differently than non-packed.
  8668. if (meta.storage_is_packed)
  8669. set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypePacked);
  8670. if (meta.storage_physical_type != 0)
  8671. set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type);
  8672. if (meta.storage_is_invariant)
  8673. set_decoration(ops[1], DecorationInvariant);
  8674. if (meta.flattened_struct)
  8675. flattened_structs[ops[1]] = true;
  8676. // If we have some expression dependencies in our access chain, this access chain is technically a forwarded
  8677. // temporary which could be subject to invalidation.
  8678. // Need to assume we're forwarded while calling inherit_expression_depdendencies.
  8679. forwarded_temporaries.insert(ops[1]);
  8680. // The access chain itself is never forced to a temporary, but its dependencies might.
  8681. suppressed_usage_tracking.insert(ops[1]);
  8682. for (uint32_t i = 2; i < length; i++)
  8683. {
  8684. inherit_expression_dependencies(ops[1], ops[i]);
  8685. add_implied_read_expression(expr, ops[i]);
  8686. }
  8687. // If we have no dependencies after all, i.e., all indices in the access chain are immutable temporaries,
  8688. // we're not forwarded after all.
  8689. if (expr.expression_dependencies.empty())
  8690. forwarded_temporaries.erase(ops[1]);
  8691. if (has_decoration(ops[1], DecorationNonUniformEXT))
  8692. propagate_nonuniform_qualifier(ops[1]);
  8693. break;
  8694. }
  8695. case OpStore:
  8696. {
  8697. auto *var = maybe_get<SPIRVariable>(ops[0]);
  8698. if (var && var->statically_assigned)
  8699. var->static_expression = ops[1];
  8700. else if (var && var->loop_variable && !var->loop_variable_enable)
  8701. var->static_expression = ops[1];
  8702. else if (var && var->remapped_variable && var->static_expression)
  8703. {
  8704. // Skip the write.
  8705. }
  8706. else if (flattened_structs.count(ops[0]))
  8707. {
  8708. store_flattened_struct(ops[0], ops[1]);
  8709. register_write(ops[0]);
  8710. }
  8711. else
  8712. {
  8713. emit_store_statement(ops[0], ops[1]);
  8714. }
  8715. // Storing a pointer results in a variable pointer, so we must conservatively assume
  8716. // we can write through it.
  8717. if (expression_type(ops[1]).pointer)
  8718. register_write(ops[1]);
  8719. break;
  8720. }
  8721. case OpArrayLength:
  8722. {
  8723. uint32_t result_type = ops[0];
  8724. uint32_t id = ops[1];
  8725. auto e = access_chain_internal(ops[2], &ops[3], length - 3, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
  8726. set<SPIRExpression>(id, join(type_to_glsl(get<SPIRType>(result_type)), "(", e, ".length())"), result_type,
  8727. true);
  8728. break;
  8729. }
  8730. // Function calls
  8731. case OpFunctionCall:
  8732. {
  8733. uint32_t result_type = ops[0];
  8734. uint32_t id = ops[1];
  8735. uint32_t func = ops[2];
  8736. const auto *arg = &ops[3];
  8737. length -= 3;
  8738. auto &callee = get<SPIRFunction>(func);
  8739. auto &return_type = get<SPIRType>(callee.return_type);
  8740. bool pure = function_is_pure(callee);
  8741. bool callee_has_out_variables = false;
  8742. bool emit_return_value_as_argument = false;
  8743. // Invalidate out variables passed to functions since they can be OpStore'd to.
  8744. for (uint32_t i = 0; i < length; i++)
  8745. {
  8746. if (callee.arguments[i].write_count)
  8747. {
  8748. register_call_out_argument(arg[i]);
  8749. callee_has_out_variables = true;
  8750. }
  8751. flush_variable_declaration(arg[i]);
  8752. }
  8753. if (!return_type.array.empty() && !backend.can_return_array)
  8754. {
  8755. callee_has_out_variables = true;
  8756. emit_return_value_as_argument = true;
  8757. }
  8758. if (!pure)
  8759. register_impure_function_call();
  8760. string funexpr;
  8761. SmallVector<string> arglist;
  8762. funexpr += to_name(func) + "(";
  8763. if (emit_return_value_as_argument)
  8764. {
  8765. statement(type_to_glsl(return_type), " ", to_name(id), type_to_array_glsl(return_type), ";");
  8766. arglist.push_back(to_name(id));
  8767. }
  8768. for (uint32_t i = 0; i < length; i++)
  8769. {
  8770. // Do not pass in separate images or samplers if we're remapping
  8771. // to combined image samplers.
  8772. if (skip_argument(arg[i]))
  8773. continue;
  8774. arglist.push_back(to_func_call_arg(callee.arguments[i], arg[i]));
  8775. }
  8776. for (auto &combined : callee.combined_parameters)
  8777. {
  8778. auto image_id = combined.global_image ? combined.image_id : VariableID(arg[combined.image_id]);
  8779. auto sampler_id = combined.global_sampler ? combined.sampler_id : VariableID(arg[combined.sampler_id]);
  8780. arglist.push_back(to_combined_image_sampler(image_id, sampler_id));
  8781. }
  8782. append_global_func_args(callee, length, arglist);
  8783. funexpr += merge(arglist);
  8784. funexpr += ")";
  8785. // Check for function call constraints.
  8786. check_function_call_constraints(arg, length);
  8787. if (return_type.basetype != SPIRType::Void)
  8788. {
  8789. // If the function actually writes to an out variable,
  8790. // take the conservative route and do not forward.
  8791. // The problem is that we might not read the function
  8792. // result (and emit the function) before an out variable
  8793. // is read (common case when return value is ignored!
  8794. // In order to avoid start tracking invalid variables,
  8795. // just avoid the forwarding problem altogether.
  8796. bool forward = args_will_forward(id, arg, length, pure) && !callee_has_out_variables && pure &&
  8797. (forced_temporaries.find(id) == end(forced_temporaries));
  8798. if (emit_return_value_as_argument)
  8799. {
  8800. statement(funexpr, ";");
  8801. set<SPIRExpression>(id, to_name(id), result_type, true);
  8802. }
  8803. else
  8804. emit_op(result_type, id, funexpr, forward);
  8805. // Function calls are implicit loads from all variables in question.
  8806. // Set dependencies for them.
  8807. for (uint32_t i = 0; i < length; i++)
  8808. register_read(id, arg[i], forward);
  8809. // If we're going to forward the temporary result,
  8810. // put dependencies on every variable that must not change.
  8811. if (forward)
  8812. register_global_read_dependencies(callee, id);
  8813. }
  8814. else
  8815. statement(funexpr, ";");
  8816. break;
  8817. }
  8818. // Composite munging
  8819. case OpCompositeConstruct:
  8820. {
  8821. uint32_t result_type = ops[0];
  8822. uint32_t id = ops[1];
  8823. const auto *const elems = &ops[2];
  8824. length -= 2;
  8825. bool forward = true;
  8826. for (uint32_t i = 0; i < length; i++)
  8827. forward = forward && should_forward(elems[i]);
  8828. auto &out_type = get<SPIRType>(result_type);
  8829. auto *in_type = length > 0 ? &expression_type(elems[0]) : nullptr;
  8830. // Only splat if we have vector constructors.
  8831. // Arrays and structs must be initialized properly in full.
  8832. bool composite = !out_type.array.empty() || out_type.basetype == SPIRType::Struct;
  8833. bool splat = false;
  8834. bool swizzle_splat = false;
  8835. if (in_type)
  8836. {
  8837. splat = in_type->vecsize == 1 && in_type->columns == 1 && !composite && backend.use_constructor_splatting;
  8838. swizzle_splat = in_type->vecsize == 1 && in_type->columns == 1 && backend.can_swizzle_scalar;
  8839. if (ir.ids[elems[0]].get_type() == TypeConstant && !type_is_floating_point(*in_type))
  8840. {
  8841. // Cannot swizzle literal integers as a special case.
  8842. swizzle_splat = false;
  8843. }
  8844. }
  8845. if (splat || swizzle_splat)
  8846. {
  8847. uint32_t input = elems[0];
  8848. for (uint32_t i = 0; i < length; i++)
  8849. {
  8850. if (input != elems[i])
  8851. {
  8852. splat = false;
  8853. swizzle_splat = false;
  8854. }
  8855. }
  8856. }
  8857. if (out_type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline)
  8858. forward = false;
  8859. if (!out_type.array.empty() && !backend.can_declare_arrays_inline)
  8860. forward = false;
  8861. if (type_is_empty(out_type) && !backend.supports_empty_struct)
  8862. forward = false;
  8863. string constructor_op;
  8864. if (backend.use_initializer_list && composite)
  8865. {
  8866. bool needs_trailing_tracket = false;
  8867. // Only use this path if we are building composites.
  8868. // This path cannot be used for arithmetic.
  8869. if (backend.use_typed_initializer_list && out_type.basetype == SPIRType::Struct && out_type.array.empty())
  8870. constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type));
  8871. else if (backend.use_typed_initializer_list && backend.array_is_value_type && !out_type.array.empty())
  8872. {
  8873. // MSL path. Array constructor is baked into type here, do not use _constructor variant.
  8874. constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type)) + "(";
  8875. needs_trailing_tracket = true;
  8876. }
  8877. constructor_op += "{ ";
  8878. if (type_is_empty(out_type) && !backend.supports_empty_struct)
  8879. constructor_op += "0";
  8880. else if (splat)
  8881. constructor_op += to_unpacked_expression(elems[0]);
  8882. else
  8883. constructor_op += build_composite_combiner(result_type, elems, length);
  8884. constructor_op += " }";
  8885. if (needs_trailing_tracket)
  8886. constructor_op += ")";
  8887. }
  8888. else if (swizzle_splat && !composite)
  8889. {
  8890. constructor_op = remap_swizzle(get<SPIRType>(result_type), 1, to_unpacked_expression(elems[0]));
  8891. }
  8892. else
  8893. {
  8894. constructor_op = type_to_glsl_constructor(get<SPIRType>(result_type)) + "(";
  8895. if (type_is_empty(out_type) && !backend.supports_empty_struct)
  8896. constructor_op += "0";
  8897. else if (splat)
  8898. constructor_op += to_unpacked_expression(elems[0]);
  8899. else
  8900. constructor_op += build_composite_combiner(result_type, elems, length);
  8901. constructor_op += ")";
  8902. }
  8903. if (!constructor_op.empty())
  8904. {
  8905. emit_op(result_type, id, constructor_op, forward);
  8906. for (uint32_t i = 0; i < length; i++)
  8907. inherit_expression_dependencies(id, elems[i]);
  8908. }
  8909. break;
  8910. }
  8911. case OpVectorInsertDynamic:
  8912. {
  8913. uint32_t result_type = ops[0];
  8914. uint32_t id = ops[1];
  8915. uint32_t vec = ops[2];
  8916. uint32_t comp = ops[3];
  8917. uint32_t index = ops[4];
  8918. flush_variable_declaration(vec);
  8919. // Make a copy, then use access chain to store the variable.
  8920. statement(declare_temporary(result_type, id), to_expression(vec), ";");
  8921. set<SPIRExpression>(id, to_name(id), result_type, true);
  8922. auto chain = access_chain_internal(id, &index, 1, 0, nullptr);
  8923. statement(chain, " = ", to_unpacked_expression(comp), ";");
  8924. break;
  8925. }
  8926. case OpVectorExtractDynamic:
  8927. {
  8928. uint32_t result_type = ops[0];
  8929. uint32_t id = ops[1];
  8930. auto expr = access_chain_internal(ops[2], &ops[3], 1, 0, nullptr);
  8931. emit_op(result_type, id, expr, should_forward(ops[2]));
  8932. inherit_expression_dependencies(id, ops[2]);
  8933. inherit_expression_dependencies(id, ops[3]);
  8934. break;
  8935. }
  8936. case OpCompositeExtract:
  8937. {
  8938. uint32_t result_type = ops[0];
  8939. uint32_t id = ops[1];
  8940. length -= 3;
  8941. auto &type = get<SPIRType>(result_type);
  8942. // We can only split the expression here if our expression is forwarded as a temporary.
  8943. bool allow_base_expression = forced_temporaries.find(id) == end(forced_temporaries);
  8944. // Do not allow base expression for struct members. We risk doing "swizzle" optimizations in this case.
  8945. auto &composite_type = expression_type(ops[2]);
  8946. bool composite_type_is_complex = composite_type.basetype == SPIRType::Struct || !composite_type.array.empty();
  8947. if (composite_type_is_complex)
  8948. allow_base_expression = false;
  8949. // Packed expressions or physical ID mapped expressions cannot be split up.
  8950. if (has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypePacked) ||
  8951. has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypeID))
  8952. allow_base_expression = false;
  8953. // Cannot use base expression for row-major matrix row-extraction since we need to interleave access pattern
  8954. // into the base expression.
  8955. if (is_non_native_row_major_matrix(ops[2]))
  8956. allow_base_expression = false;
  8957. AccessChainMeta meta;
  8958. SPIRExpression *e = nullptr;
  8959. auto *c = maybe_get<SPIRConstant>(ops[2]);
  8960. if (c && !c->specialization && !composite_type_is_complex)
  8961. {
  8962. auto expr = to_extract_constant_composite_expression(result_type, *c, ops + 3, length);
  8963. e = &emit_op(result_type, id, expr, true, true);
  8964. }
  8965. else if (allow_base_expression && should_forward(ops[2]) && type.vecsize == 1 && type.columns == 1 && length == 1)
  8966. {
  8967. // Only apply this optimization if result is scalar.
  8968. // We want to split the access chain from the base.
  8969. // This is so we can later combine different CompositeExtract results
  8970. // with CompositeConstruct without emitting code like
  8971. //
  8972. // vec3 temp = texture(...).xyz
  8973. // vec4(temp.x, temp.y, temp.z, 1.0).
  8974. //
  8975. // when we actually wanted to emit this
  8976. // vec4(texture(...).xyz, 1.0).
  8977. //
  8978. // Including the base will prevent this and would trigger multiple reads
  8979. // from expression causing it to be forced to an actual temporary in GLSL.
  8980. auto expr = access_chain_internal(ops[2], &ops[3], length,
  8981. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_CHAIN_ONLY_BIT, &meta);
  8982. e = &emit_op(result_type, id, expr, true, should_suppress_usage_tracking(ops[2]));
  8983. inherit_expression_dependencies(id, ops[2]);
  8984. e->base_expression = ops[2];
  8985. }
  8986. else
  8987. {
  8988. auto expr = access_chain_internal(ops[2], &ops[3], length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta);
  8989. e = &emit_op(result_type, id, expr, should_forward(ops[2]), should_suppress_usage_tracking(ops[2]));
  8990. inherit_expression_dependencies(id, ops[2]);
  8991. }
  8992. // Pass through some meta information to the loaded expression.
  8993. // We can still end up loading a buffer type to a variable, then CompositeExtract from it
  8994. // instead of loading everything through an access chain.
  8995. e->need_transpose = meta.need_transpose;
  8996. if (meta.storage_is_packed)
  8997. set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
  8998. if (meta.storage_physical_type != 0)
  8999. set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type);
  9000. if (meta.storage_is_invariant)
  9001. set_decoration(id, DecorationInvariant);
  9002. break;
  9003. }
  9004. case OpCompositeInsert:
  9005. {
  9006. uint32_t result_type = ops[0];
  9007. uint32_t id = ops[1];
  9008. uint32_t obj = ops[2];
  9009. uint32_t composite = ops[3];
  9010. const auto *elems = &ops[4];
  9011. length -= 4;
  9012. flush_variable_declaration(composite);
  9013. // Make a copy, then use access chain to store the variable.
  9014. statement(declare_temporary(result_type, id), to_expression(composite), ";");
  9015. set<SPIRExpression>(id, to_name(id), result_type, true);
  9016. auto chain = access_chain_internal(id, elems, length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
  9017. statement(chain, " = ", to_unpacked_expression(obj), ";");
  9018. break;
  9019. }
  9020. case OpCopyMemory:
  9021. {
  9022. uint32_t lhs = ops[0];
  9023. uint32_t rhs = ops[1];
  9024. if (lhs != rhs)
  9025. {
  9026. flush_variable_declaration(lhs);
  9027. flush_variable_declaration(rhs);
  9028. statement(to_expression(lhs), " = ", to_unpacked_expression(rhs), ";");
  9029. register_write(lhs);
  9030. }
  9031. break;
  9032. }
  9033. case OpCopyLogical:
  9034. {
  9035. // This is used for copying object of different types, arrays and structs.
  9036. // We need to unroll the copy, element-by-element.
  9037. uint32_t result_type = ops[0];
  9038. uint32_t id = ops[1];
  9039. uint32_t rhs = ops[2];
  9040. emit_uninitialized_temporary_expression(result_type, id);
  9041. emit_copy_logical_type(id, result_type, rhs, expression_type_id(rhs), {});
  9042. break;
  9043. }
  9044. case OpCopyObject:
  9045. {
  9046. uint32_t result_type = ops[0];
  9047. uint32_t id = ops[1];
  9048. uint32_t rhs = ops[2];
  9049. bool pointer = get<SPIRType>(result_type).pointer;
  9050. auto *chain = maybe_get<SPIRAccessChain>(rhs);
  9051. auto *imgsamp = maybe_get<SPIRCombinedImageSampler>(rhs);
  9052. if (chain)
  9053. {
  9054. // Cannot lower to a SPIRExpression, just copy the object.
  9055. auto &e = set<SPIRAccessChain>(id, *chain);
  9056. e.self = id;
  9057. }
  9058. else if (imgsamp)
  9059. {
  9060. // Cannot lower to a SPIRExpression, just copy the object.
  9061. // GLSL does not currently use this type and will never get here, but MSL does.
  9062. // Handled here instead of CompilerMSL for better integration and general handling,
  9063. // and in case GLSL or other subclasses require it in the future.
  9064. auto &e = set<SPIRCombinedImageSampler>(id, *imgsamp);
  9065. e.self = id;
  9066. }
  9067. else if (expression_is_lvalue(rhs) && !pointer)
  9068. {
  9069. // Need a copy.
  9070. // For pointer types, we copy the pointer itself.
  9071. statement(declare_temporary(result_type, id), to_unpacked_expression(rhs), ";");
  9072. set<SPIRExpression>(id, to_name(id), result_type, true);
  9073. }
  9074. else
  9075. {
  9076. // RHS expression is immutable, so just forward it.
  9077. // Copying these things really make no sense, but
  9078. // seems to be allowed anyways.
  9079. auto &e = set<SPIRExpression>(id, to_expression(rhs), result_type, true);
  9080. if (pointer)
  9081. {
  9082. auto *var = maybe_get_backing_variable(rhs);
  9083. e.loaded_from = var ? var->self : ID(0);
  9084. }
  9085. // If we're copying an access chain, need to inherit the read expressions.
  9086. auto *rhs_expr = maybe_get<SPIRExpression>(rhs);
  9087. if (rhs_expr)
  9088. {
  9089. e.implied_read_expressions = rhs_expr->implied_read_expressions;
  9090. e.expression_dependencies = rhs_expr->expression_dependencies;
  9091. }
  9092. }
  9093. break;
  9094. }
  9095. case OpVectorShuffle:
  9096. {
  9097. uint32_t result_type = ops[0];
  9098. uint32_t id = ops[1];
  9099. uint32_t vec0 = ops[2];
  9100. uint32_t vec1 = ops[3];
  9101. const auto *elems = &ops[4];
  9102. length -= 4;
  9103. auto &type0 = expression_type(vec0);
  9104. // If we have the undefined swizzle index -1, we need to swizzle in undefined data,
  9105. // or in our case, T(0).
  9106. bool shuffle = false;
  9107. for (uint32_t i = 0; i < length; i++)
  9108. if (elems[i] >= type0.vecsize || elems[i] == 0xffffffffu)
  9109. shuffle = true;
  9110. // Cannot use swizzles with packed expressions, force shuffle path.
  9111. if (!shuffle && has_extended_decoration(vec0, SPIRVCrossDecorationPhysicalTypePacked))
  9112. shuffle = true;
  9113. string expr;
  9114. bool should_fwd, trivial_forward;
  9115. if (shuffle)
  9116. {
  9117. should_fwd = should_forward(vec0) && should_forward(vec1);
  9118. trivial_forward = should_suppress_usage_tracking(vec0) && should_suppress_usage_tracking(vec1);
  9119. // Constructor style and shuffling from two different vectors.
  9120. SmallVector<string> args;
  9121. for (uint32_t i = 0; i < length; i++)
  9122. {
  9123. if (elems[i] == 0xffffffffu)
  9124. {
  9125. // Use a constant 0 here.
  9126. // We could use the first component or similar, but then we risk propagating
  9127. // a value we might not need, and bog down codegen.
  9128. SPIRConstant c;
  9129. c.constant_type = type0.parent_type;
  9130. assert(type0.parent_type != ID(0));
  9131. args.push_back(constant_expression(c));
  9132. }
  9133. else if (elems[i] >= type0.vecsize)
  9134. args.push_back(to_extract_component_expression(vec1, elems[i] - type0.vecsize));
  9135. else
  9136. args.push_back(to_extract_component_expression(vec0, elems[i]));
  9137. }
  9138. expr += join(type_to_glsl_constructor(get<SPIRType>(result_type)), "(", merge(args), ")");
  9139. }
  9140. else
  9141. {
  9142. should_fwd = should_forward(vec0);
  9143. trivial_forward = should_suppress_usage_tracking(vec0);
  9144. // We only source from first vector, so can use swizzle.
  9145. // If the vector is packed, unpack it before applying a swizzle (needed for MSL)
  9146. expr += to_enclosed_unpacked_expression(vec0);
  9147. expr += ".";
  9148. for (uint32_t i = 0; i < length; i++)
  9149. {
  9150. assert(elems[i] != 0xffffffffu);
  9151. expr += index_to_swizzle(elems[i]);
  9152. }
  9153. if (backend.swizzle_is_function && length > 1)
  9154. expr += "()";
  9155. }
  9156. // A shuffle is trivial in that it doesn't actually *do* anything.
  9157. // We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed.
  9158. emit_op(result_type, id, expr, should_fwd, trivial_forward);
  9159. inherit_expression_dependencies(id, vec0);
  9160. if (vec0 != vec1)
  9161. inherit_expression_dependencies(id, vec1);
  9162. break;
  9163. }
  9164. // ALU
  9165. case OpIsNan:
  9166. GLSL_UFOP(isnan);
  9167. break;
  9168. case OpIsInf:
  9169. GLSL_UFOP(isinf);
  9170. break;
  9171. case OpSNegate:
  9172. case OpFNegate:
  9173. GLSL_UOP(-);
  9174. break;
  9175. case OpIAdd:
  9176. {
  9177. // For simple arith ops, prefer the output type if there's a mismatch to avoid extra bitcasts.
  9178. auto type = get<SPIRType>(ops[0]).basetype;
  9179. GLSL_BOP_CAST(+, type);
  9180. break;
  9181. }
  9182. case OpFAdd:
  9183. GLSL_BOP(+);
  9184. break;
  9185. case OpISub:
  9186. {
  9187. auto type = get<SPIRType>(ops[0]).basetype;
  9188. GLSL_BOP_CAST(-, type);
  9189. break;
  9190. }
  9191. case OpFSub:
  9192. GLSL_BOP(-);
  9193. break;
  9194. case OpIMul:
  9195. {
  9196. auto type = get<SPIRType>(ops[0]).basetype;
  9197. GLSL_BOP_CAST(*, type);
  9198. break;
  9199. }
  9200. case OpVectorTimesMatrix:
  9201. case OpMatrixTimesVector:
  9202. {
  9203. // If the matrix needs transpose, just flip the multiply order.
  9204. auto *e = maybe_get<SPIRExpression>(ops[opcode == OpMatrixTimesVector ? 2 : 3]);
  9205. if (e && e->need_transpose)
  9206. {
  9207. e->need_transpose = false;
  9208. string expr;
  9209. if (opcode == OpMatrixTimesVector)
  9210. expr = join(to_enclosed_unpacked_expression(ops[3]), " * ",
  9211. enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])));
  9212. else
  9213. expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ",
  9214. to_enclosed_unpacked_expression(ops[2]));
  9215. bool forward = should_forward(ops[2]) && should_forward(ops[3]);
  9216. emit_op(ops[0], ops[1], expr, forward);
  9217. e->need_transpose = true;
  9218. inherit_expression_dependencies(ops[1], ops[2]);
  9219. inherit_expression_dependencies(ops[1], ops[3]);
  9220. }
  9221. else
  9222. GLSL_BOP(*);
  9223. break;
  9224. }
  9225. case OpMatrixTimesMatrix:
  9226. {
  9227. auto *a = maybe_get<SPIRExpression>(ops[2]);
  9228. auto *b = maybe_get<SPIRExpression>(ops[3]);
  9229. // If both matrices need transpose, we can multiply in flipped order and tag the expression as transposed.
  9230. // a^T * b^T = (b * a)^T.
  9231. if (a && b && a->need_transpose && b->need_transpose)
  9232. {
  9233. a->need_transpose = false;
  9234. b->need_transpose = false;
  9235. auto expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ",
  9236. enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])));
  9237. bool forward = should_forward(ops[2]) && should_forward(ops[3]);
  9238. auto &e = emit_op(ops[0], ops[1], expr, forward);
  9239. e.need_transpose = true;
  9240. a->need_transpose = true;
  9241. b->need_transpose = true;
  9242. inherit_expression_dependencies(ops[1], ops[2]);
  9243. inherit_expression_dependencies(ops[1], ops[3]);
  9244. }
  9245. else
  9246. GLSL_BOP(*);
  9247. break;
  9248. }
  9249. case OpFMul:
  9250. case OpMatrixTimesScalar:
  9251. case OpVectorTimesScalar:
  9252. GLSL_BOP(*);
  9253. break;
  9254. case OpOuterProduct:
  9255. GLSL_BFOP(outerProduct);
  9256. break;
  9257. case OpDot:
  9258. GLSL_BFOP(dot);
  9259. break;
  9260. case OpTranspose:
  9261. if (options.version < 120) // Matches GLSL 1.10 / ESSL 1.00
  9262. {
  9263. // transpose() is not available, so instead, flip need_transpose,
  9264. // which can later be turned into an emulated transpose op by
  9265. // convert_row_major_matrix(), if necessary.
  9266. uint32_t result_type = ops[0];
  9267. uint32_t result_id = ops[1];
  9268. uint32_t input = ops[2];
  9269. // Force need_transpose to false temporarily to prevent
  9270. // to_expression() from doing the transpose.
  9271. bool need_transpose = false;
  9272. auto *input_e = maybe_get<SPIRExpression>(input);
  9273. if (input_e)
  9274. swap(need_transpose, input_e->need_transpose);
  9275. bool forward = should_forward(input);
  9276. auto &e = emit_op(result_type, result_id, to_expression(input), forward);
  9277. e.need_transpose = !need_transpose;
  9278. // Restore the old need_transpose flag.
  9279. if (input_e)
  9280. input_e->need_transpose = need_transpose;
  9281. }
  9282. else
  9283. GLSL_UFOP(transpose);
  9284. break;
  9285. case OpSRem:
  9286. {
  9287. uint32_t result_type = ops[0];
  9288. uint32_t result_id = ops[1];
  9289. uint32_t op0 = ops[2];
  9290. uint32_t op1 = ops[3];
  9291. // Needs special handling.
  9292. bool forward = should_forward(op0) && should_forward(op1);
  9293. auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "(",
  9294. to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")");
  9295. emit_op(result_type, result_id, expr, forward);
  9296. inherit_expression_dependencies(result_id, op0);
  9297. inherit_expression_dependencies(result_id, op1);
  9298. break;
  9299. }
  9300. case OpSDiv:
  9301. GLSL_BOP_CAST(/, int_type);
  9302. break;
  9303. case OpUDiv:
  9304. GLSL_BOP_CAST(/, uint_type);
  9305. break;
  9306. case OpIAddCarry:
  9307. case OpISubBorrow:
  9308. {
  9309. if (options.es && options.version < 310)
  9310. SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310.");
  9311. else if (!options.es && options.version < 400)
  9312. SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 400.");
  9313. uint32_t result_type = ops[0];
  9314. uint32_t result_id = ops[1];
  9315. uint32_t op0 = ops[2];
  9316. uint32_t op1 = ops[3];
  9317. auto &type = get<SPIRType>(result_type);
  9318. emit_uninitialized_temporary_expression(result_type, result_id);
  9319. const char *op = opcode == OpIAddCarry ? "uaddCarry" : "usubBorrow";
  9320. statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", op, "(", to_expression(op0), ", ",
  9321. to_expression(op1), ", ", to_expression(result_id), ".", to_member_name(type, 1), ");");
  9322. break;
  9323. }
  9324. case OpUMulExtended:
  9325. case OpSMulExtended:
  9326. {
  9327. if (options.es && options.version < 310)
  9328. SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310.");
  9329. else if (!options.es && options.version < 400)
  9330. SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 4000.");
  9331. uint32_t result_type = ops[0];
  9332. uint32_t result_id = ops[1];
  9333. uint32_t op0 = ops[2];
  9334. uint32_t op1 = ops[3];
  9335. auto &type = get<SPIRType>(result_type);
  9336. emit_uninitialized_temporary_expression(result_type, result_id);
  9337. const char *op = opcode == OpUMulExtended ? "umulExtended" : "imulExtended";
  9338. statement(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(result_id), ".",
  9339. to_member_name(type, 1), ", ", to_expression(result_id), ".", to_member_name(type, 0), ");");
  9340. break;
  9341. }
  9342. case OpFDiv:
  9343. GLSL_BOP(/);
  9344. break;
  9345. case OpShiftRightLogical:
  9346. GLSL_BOP_CAST(>>, uint_type);
  9347. break;
  9348. case OpShiftRightArithmetic:
  9349. GLSL_BOP_CAST(>>, int_type);
  9350. break;
  9351. case OpShiftLeftLogical:
  9352. {
  9353. auto type = get<SPIRType>(ops[0]).basetype;
  9354. GLSL_BOP_CAST(<<, type);
  9355. break;
  9356. }
  9357. case OpBitwiseOr:
  9358. {
  9359. auto type = get<SPIRType>(ops[0]).basetype;
  9360. GLSL_BOP_CAST(|, type);
  9361. break;
  9362. }
  9363. case OpBitwiseXor:
  9364. {
  9365. auto type = get<SPIRType>(ops[0]).basetype;
  9366. GLSL_BOP_CAST(^, type);
  9367. break;
  9368. }
  9369. case OpBitwiseAnd:
  9370. {
  9371. auto type = get<SPIRType>(ops[0]).basetype;
  9372. GLSL_BOP_CAST(&, type);
  9373. break;
  9374. }
  9375. case OpNot:
  9376. GLSL_UOP(~);
  9377. break;
  9378. case OpUMod:
  9379. GLSL_BOP_CAST(%, uint_type);
  9380. break;
  9381. case OpSMod:
  9382. GLSL_BOP_CAST(%, int_type);
  9383. break;
  9384. case OpFMod:
  9385. GLSL_BFOP(mod);
  9386. break;
  9387. case OpFRem:
  9388. {
  9389. if (is_legacy())
  9390. SPIRV_CROSS_THROW("OpFRem requires trunc() and is only supported on non-legacy targets. A workaround is "
  9391. "needed for legacy.");
  9392. uint32_t result_type = ops[0];
  9393. uint32_t result_id = ops[1];
  9394. uint32_t op0 = ops[2];
  9395. uint32_t op1 = ops[3];
  9396. // Needs special handling.
  9397. bool forward = should_forward(op0) && should_forward(op1);
  9398. auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "trunc(",
  9399. to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")");
  9400. emit_op(result_type, result_id, expr, forward);
  9401. inherit_expression_dependencies(result_id, op0);
  9402. inherit_expression_dependencies(result_id, op1);
  9403. break;
  9404. }
  9405. // Relational
  9406. case OpAny:
  9407. GLSL_UFOP(any);
  9408. break;
  9409. case OpAll:
  9410. GLSL_UFOP(all);
  9411. break;
  9412. case OpSelect:
  9413. emit_mix_op(ops[0], ops[1], ops[4], ops[3], ops[2]);
  9414. break;
  9415. case OpLogicalOr:
  9416. {
  9417. // No vector variant in GLSL for logical OR.
  9418. auto result_type = ops[0];
  9419. auto id = ops[1];
  9420. auto &type = get<SPIRType>(result_type);
  9421. if (type.vecsize > 1)
  9422. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "||", false, SPIRType::Unknown);
  9423. else
  9424. GLSL_BOP(||);
  9425. break;
  9426. }
  9427. case OpLogicalAnd:
  9428. {
  9429. // No vector variant in GLSL for logical AND.
  9430. auto result_type = ops[0];
  9431. auto id = ops[1];
  9432. auto &type = get<SPIRType>(result_type);
  9433. if (type.vecsize > 1)
  9434. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "&&", false, SPIRType::Unknown);
  9435. else
  9436. GLSL_BOP(&&);
  9437. break;
  9438. }
  9439. case OpLogicalNot:
  9440. {
  9441. auto &type = get<SPIRType>(ops[0]);
  9442. if (type.vecsize > 1)
  9443. GLSL_UFOP(not );
  9444. else
  9445. GLSL_UOP(!);
  9446. break;
  9447. }
  9448. case OpIEqual:
  9449. {
  9450. if (expression_type(ops[2]).vecsize > 1)
  9451. GLSL_BFOP_CAST(equal, int_type);
  9452. else
  9453. GLSL_BOP_CAST(==, int_type);
  9454. break;
  9455. }
  9456. case OpLogicalEqual:
  9457. case OpFOrdEqual:
  9458. {
  9459. if (expression_type(ops[2]).vecsize > 1)
  9460. GLSL_BFOP(equal);
  9461. else
  9462. GLSL_BOP(==);
  9463. break;
  9464. }
  9465. case OpINotEqual:
  9466. {
  9467. if (expression_type(ops[2]).vecsize > 1)
  9468. GLSL_BFOP_CAST(notEqual, int_type);
  9469. else
  9470. GLSL_BOP_CAST(!=, int_type);
  9471. break;
  9472. }
  9473. case OpLogicalNotEqual:
  9474. case OpFOrdNotEqual:
  9475. {
  9476. if (expression_type(ops[2]).vecsize > 1)
  9477. GLSL_BFOP(notEqual);
  9478. else
  9479. GLSL_BOP(!=);
  9480. break;
  9481. }
  9482. case OpUGreaterThan:
  9483. case OpSGreaterThan:
  9484. {
  9485. auto type = opcode == OpUGreaterThan ? uint_type : int_type;
  9486. if (expression_type(ops[2]).vecsize > 1)
  9487. GLSL_BFOP_CAST(greaterThan, type);
  9488. else
  9489. GLSL_BOP_CAST(>, type);
  9490. break;
  9491. }
  9492. case OpFOrdGreaterThan:
  9493. {
  9494. if (expression_type(ops[2]).vecsize > 1)
  9495. GLSL_BFOP(greaterThan);
  9496. else
  9497. GLSL_BOP(>);
  9498. break;
  9499. }
  9500. case OpUGreaterThanEqual:
  9501. case OpSGreaterThanEqual:
  9502. {
  9503. auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type;
  9504. if (expression_type(ops[2]).vecsize > 1)
  9505. GLSL_BFOP_CAST(greaterThanEqual, type);
  9506. else
  9507. GLSL_BOP_CAST(>=, type);
  9508. break;
  9509. }
  9510. case OpFOrdGreaterThanEqual:
  9511. {
  9512. if (expression_type(ops[2]).vecsize > 1)
  9513. GLSL_BFOP(greaterThanEqual);
  9514. else
  9515. GLSL_BOP(>=);
  9516. break;
  9517. }
  9518. case OpULessThan:
  9519. case OpSLessThan:
  9520. {
  9521. auto type = opcode == OpULessThan ? uint_type : int_type;
  9522. if (expression_type(ops[2]).vecsize > 1)
  9523. GLSL_BFOP_CAST(lessThan, type);
  9524. else
  9525. GLSL_BOP_CAST(<, type);
  9526. break;
  9527. }
  9528. case OpFOrdLessThan:
  9529. {
  9530. if (expression_type(ops[2]).vecsize > 1)
  9531. GLSL_BFOP(lessThan);
  9532. else
  9533. GLSL_BOP(<);
  9534. break;
  9535. }
  9536. case OpULessThanEqual:
  9537. case OpSLessThanEqual:
  9538. {
  9539. auto type = opcode == OpULessThanEqual ? uint_type : int_type;
  9540. if (expression_type(ops[2]).vecsize > 1)
  9541. GLSL_BFOP_CAST(lessThanEqual, type);
  9542. else
  9543. GLSL_BOP_CAST(<=, type);
  9544. break;
  9545. }
  9546. case OpFOrdLessThanEqual:
  9547. {
  9548. if (expression_type(ops[2]).vecsize > 1)
  9549. GLSL_BFOP(lessThanEqual);
  9550. else
  9551. GLSL_BOP(<=);
  9552. break;
  9553. }
  9554. // Conversion
  9555. case OpSConvert:
  9556. case OpConvertSToF:
  9557. case OpUConvert:
  9558. case OpConvertUToF:
  9559. {
  9560. auto input_type = opcode == OpSConvert || opcode == OpConvertSToF ? int_type : uint_type;
  9561. uint32_t result_type = ops[0];
  9562. uint32_t id = ops[1];
  9563. auto &type = get<SPIRType>(result_type);
  9564. auto &arg_type = expression_type(ops[2]);
  9565. auto func = type_to_glsl_constructor(type);
  9566. if (arg_type.width < type.width || type_is_floating_point(type))
  9567. emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), input_type, type.basetype);
  9568. else
  9569. emit_unary_func_op(result_type, id, ops[2], func.c_str());
  9570. break;
  9571. }
  9572. case OpConvertFToU:
  9573. case OpConvertFToS:
  9574. {
  9575. // Cast to expected arithmetic type, then potentially bitcast away to desired signedness.
  9576. uint32_t result_type = ops[0];
  9577. uint32_t id = ops[1];
  9578. auto &type = get<SPIRType>(result_type);
  9579. auto expected_type = type;
  9580. auto &float_type = expression_type(ops[2]);
  9581. expected_type.basetype =
  9582. opcode == OpConvertFToS ? to_signed_basetype(type.width) : to_unsigned_basetype(type.width);
  9583. auto func = type_to_glsl_constructor(expected_type);
  9584. emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), float_type.basetype, expected_type.basetype);
  9585. break;
  9586. }
  9587. case OpFConvert:
  9588. {
  9589. uint32_t result_type = ops[0];
  9590. uint32_t id = ops[1];
  9591. auto func = type_to_glsl_constructor(get<SPIRType>(result_type));
  9592. emit_unary_func_op(result_type, id, ops[2], func.c_str());
  9593. break;
  9594. }
  9595. case OpBitcast:
  9596. {
  9597. uint32_t result_type = ops[0];
  9598. uint32_t id = ops[1];
  9599. uint32_t arg = ops[2];
  9600. if (!emit_complex_bitcast(result_type, id, arg))
  9601. {
  9602. auto op = bitcast_glsl_op(get<SPIRType>(result_type), expression_type(arg));
  9603. emit_unary_func_op(result_type, id, arg, op.c_str());
  9604. }
  9605. break;
  9606. }
  9607. case OpQuantizeToF16:
  9608. {
  9609. uint32_t result_type = ops[0];
  9610. uint32_t id = ops[1];
  9611. uint32_t arg = ops[2];
  9612. string op;
  9613. auto &type = get<SPIRType>(result_type);
  9614. switch (type.vecsize)
  9615. {
  9616. case 1:
  9617. op = join("unpackHalf2x16(packHalf2x16(vec2(", to_expression(arg), "))).x");
  9618. break;
  9619. case 2:
  9620. op = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), "))");
  9621. break;
  9622. case 3:
  9623. {
  9624. auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))");
  9625. auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zz)).x");
  9626. op = join("vec3(", op0, ", ", op1, ")");
  9627. break;
  9628. }
  9629. case 4:
  9630. {
  9631. auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))");
  9632. auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zw))");
  9633. op = join("vec4(", op0, ", ", op1, ")");
  9634. break;
  9635. }
  9636. default:
  9637. SPIRV_CROSS_THROW("Illegal argument to OpQuantizeToF16.");
  9638. }
  9639. emit_op(result_type, id, op, should_forward(arg));
  9640. inherit_expression_dependencies(id, arg);
  9641. break;
  9642. }
  9643. // Derivatives
  9644. case OpDPdx:
  9645. GLSL_UFOP(dFdx);
  9646. if (is_legacy_es())
  9647. require_extension_internal("GL_OES_standard_derivatives");
  9648. register_control_dependent_expression(ops[1]);
  9649. break;
  9650. case OpDPdy:
  9651. GLSL_UFOP(dFdy);
  9652. if (is_legacy_es())
  9653. require_extension_internal("GL_OES_standard_derivatives");
  9654. register_control_dependent_expression(ops[1]);
  9655. break;
  9656. case OpDPdxFine:
  9657. GLSL_UFOP(dFdxFine);
  9658. if (options.es)
  9659. {
  9660. SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
  9661. }
  9662. if (options.version < 450)
  9663. require_extension_internal("GL_ARB_derivative_control");
  9664. register_control_dependent_expression(ops[1]);
  9665. break;
  9666. case OpDPdyFine:
  9667. GLSL_UFOP(dFdyFine);
  9668. if (options.es)
  9669. {
  9670. SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
  9671. }
  9672. if (options.version < 450)
  9673. require_extension_internal("GL_ARB_derivative_control");
  9674. register_control_dependent_expression(ops[1]);
  9675. break;
  9676. case OpDPdxCoarse:
  9677. if (options.es)
  9678. {
  9679. SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
  9680. }
  9681. GLSL_UFOP(dFdxCoarse);
  9682. if (options.version < 450)
  9683. require_extension_internal("GL_ARB_derivative_control");
  9684. register_control_dependent_expression(ops[1]);
  9685. break;
  9686. case OpDPdyCoarse:
  9687. GLSL_UFOP(dFdyCoarse);
  9688. if (options.es)
  9689. {
  9690. SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
  9691. }
  9692. if (options.version < 450)
  9693. require_extension_internal("GL_ARB_derivative_control");
  9694. register_control_dependent_expression(ops[1]);
  9695. break;
  9696. case OpFwidth:
  9697. GLSL_UFOP(fwidth);
  9698. if (is_legacy_es())
  9699. require_extension_internal("GL_OES_standard_derivatives");
  9700. register_control_dependent_expression(ops[1]);
  9701. break;
  9702. case OpFwidthCoarse:
  9703. GLSL_UFOP(fwidthCoarse);
  9704. if (options.es)
  9705. {
  9706. SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
  9707. }
  9708. if (options.version < 450)
  9709. require_extension_internal("GL_ARB_derivative_control");
  9710. register_control_dependent_expression(ops[1]);
  9711. break;
  9712. case OpFwidthFine:
  9713. GLSL_UFOP(fwidthFine);
  9714. if (options.es)
  9715. {
  9716. SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
  9717. }
  9718. if (options.version < 450)
  9719. require_extension_internal("GL_ARB_derivative_control");
  9720. register_control_dependent_expression(ops[1]);
  9721. break;
  9722. // Bitfield
  9723. case OpBitFieldInsert:
  9724. {
  9725. emit_bitfield_insert_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], "bitfieldInsert", SPIRType::Int);
  9726. break;
  9727. }
  9728. case OpBitFieldSExtract:
  9729. {
  9730. emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", int_type, int_type,
  9731. SPIRType::Int, SPIRType::Int);
  9732. break;
  9733. }
  9734. case OpBitFieldUExtract:
  9735. {
  9736. emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", uint_type, uint_type,
  9737. SPIRType::Int, SPIRType::Int);
  9738. break;
  9739. }
  9740. case OpBitReverse:
  9741. // BitReverse does not have issues with sign since result type must match input type.
  9742. GLSL_UFOP(bitfieldReverse);
  9743. break;
  9744. case OpBitCount:
  9745. {
  9746. auto basetype = expression_type(ops[2]).basetype;
  9747. emit_unary_func_op_cast(ops[0], ops[1], ops[2], "bitCount", basetype, int_type);
  9748. break;
  9749. }
  9750. // Atomics
  9751. case OpAtomicExchange:
  9752. {
  9753. uint32_t result_type = ops[0];
  9754. uint32_t id = ops[1];
  9755. uint32_t ptr = ops[2];
  9756. // Ignore semantics for now, probably only relevant to CL.
  9757. uint32_t val = ops[5];
  9758. const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange";
  9759. forced_temporaries.insert(id);
  9760. emit_binary_func_op(result_type, id, ptr, val, op);
  9761. flush_all_atomic_capable_variables();
  9762. break;
  9763. }
  9764. case OpAtomicCompareExchange:
  9765. {
  9766. uint32_t result_type = ops[0];
  9767. uint32_t id = ops[1];
  9768. uint32_t ptr = ops[2];
  9769. uint32_t val = ops[6];
  9770. uint32_t comp = ops[7];
  9771. const char *op = check_atomic_image(ptr) ? "imageAtomicCompSwap" : "atomicCompSwap";
  9772. forced_temporaries.insert(id);
  9773. emit_trinary_func_op(result_type, id, ptr, comp, val, op);
  9774. flush_all_atomic_capable_variables();
  9775. break;
  9776. }
  9777. case OpAtomicLoad:
  9778. {
  9779. // In plain GLSL, we have no atomic loads, so emulate this by fetch adding by 0 and hope compiler figures it out.
  9780. // Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL.
  9781. auto &type = expression_type(ops[2]);
  9782. forced_temporaries.insert(ops[1]);
  9783. bool atomic_image = check_atomic_image(ops[2]);
  9784. bool unsigned_type = (type.basetype == SPIRType::UInt) ||
  9785. (atomic_image && get<SPIRType>(type.image.type).basetype == SPIRType::UInt);
  9786. const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd";
  9787. const char *increment = unsigned_type ? "0u" : "0";
  9788. emit_op(ops[0], ops[1], join(op, "(", to_expression(ops[2]), ", ", increment, ")"), false);
  9789. flush_all_atomic_capable_variables();
  9790. break;
  9791. }
  9792. case OpAtomicStore:
  9793. {
  9794. // In plain GLSL, we have no atomic stores, so emulate this with an atomic exchange where we don't consume the result.
  9795. // Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL.
  9796. uint32_t ptr = ops[0];
  9797. // Ignore semantics for now, probably only relevant to CL.
  9798. uint32_t val = ops[3];
  9799. const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange";
  9800. statement(op, "(", to_expression(ptr), ", ", to_expression(val), ");");
  9801. flush_all_atomic_capable_variables();
  9802. break;
  9803. }
  9804. case OpAtomicIIncrement:
  9805. case OpAtomicIDecrement:
  9806. {
  9807. forced_temporaries.insert(ops[1]);
  9808. auto &type = expression_type(ops[2]);
  9809. if (type.storage == StorageClassAtomicCounter)
  9810. {
  9811. // Legacy GLSL stuff, not sure if this is relevant to support.
  9812. if (opcode == OpAtomicIIncrement)
  9813. GLSL_UFOP(atomicCounterIncrement);
  9814. else
  9815. GLSL_UFOP(atomicCounterDecrement);
  9816. }
  9817. else
  9818. {
  9819. bool atomic_image = check_atomic_image(ops[2]);
  9820. bool unsigned_type = (type.basetype == SPIRType::UInt) ||
  9821. (atomic_image && get<SPIRType>(type.image.type).basetype == SPIRType::UInt);
  9822. const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd";
  9823. const char *increment = nullptr;
  9824. if (opcode == OpAtomicIIncrement && unsigned_type)
  9825. increment = "1u";
  9826. else if (opcode == OpAtomicIIncrement)
  9827. increment = "1";
  9828. else if (unsigned_type)
  9829. increment = "uint(-1)";
  9830. else
  9831. increment = "-1";
  9832. emit_op(ops[0], ops[1], join(op, "(", to_expression(ops[2]), ", ", increment, ")"), false);
  9833. }
  9834. flush_all_atomic_capable_variables();
  9835. break;
  9836. }
  9837. case OpAtomicIAdd:
  9838. {
  9839. const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
  9840. forced_temporaries.insert(ops[1]);
  9841. emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
  9842. flush_all_atomic_capable_variables();
  9843. break;
  9844. }
  9845. case OpAtomicISub:
  9846. {
  9847. const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
  9848. forced_temporaries.insert(ops[1]);
  9849. auto expr = join(op, "(", to_expression(ops[2]), ", -", to_enclosed_expression(ops[5]), ")");
  9850. emit_op(ops[0], ops[1], expr, should_forward(ops[2]) && should_forward(ops[5]));
  9851. flush_all_atomic_capable_variables();
  9852. break;
  9853. }
  9854. case OpAtomicSMin:
  9855. case OpAtomicUMin:
  9856. {
  9857. const char *op = check_atomic_image(ops[2]) ? "imageAtomicMin" : "atomicMin";
  9858. forced_temporaries.insert(ops[1]);
  9859. emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
  9860. flush_all_atomic_capable_variables();
  9861. break;
  9862. }
  9863. case OpAtomicSMax:
  9864. case OpAtomicUMax:
  9865. {
  9866. const char *op = check_atomic_image(ops[2]) ? "imageAtomicMax" : "atomicMax";
  9867. forced_temporaries.insert(ops[1]);
  9868. emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
  9869. flush_all_atomic_capable_variables();
  9870. break;
  9871. }
  9872. case OpAtomicAnd:
  9873. {
  9874. const char *op = check_atomic_image(ops[2]) ? "imageAtomicAnd" : "atomicAnd";
  9875. forced_temporaries.insert(ops[1]);
  9876. emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
  9877. flush_all_atomic_capable_variables();
  9878. break;
  9879. }
  9880. case OpAtomicOr:
  9881. {
  9882. const char *op = check_atomic_image(ops[2]) ? "imageAtomicOr" : "atomicOr";
  9883. forced_temporaries.insert(ops[1]);
  9884. emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
  9885. flush_all_atomic_capable_variables();
  9886. break;
  9887. }
  9888. case OpAtomicXor:
  9889. {
  9890. const char *op = check_atomic_image(ops[2]) ? "imageAtomicXor" : "atomicXor";
  9891. forced_temporaries.insert(ops[1]);
  9892. emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
  9893. flush_all_atomic_capable_variables();
  9894. break;
  9895. }
  9896. // Geometry shaders
  9897. case OpEmitVertex:
  9898. statement("EmitVertex();");
  9899. break;
  9900. case OpEndPrimitive:
  9901. statement("EndPrimitive();");
  9902. break;
  9903. case OpEmitStreamVertex:
  9904. {
  9905. if (options.es)
  9906. SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES.");
  9907. else if (!options.es && options.version < 400)
  9908. SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400.");
  9909. auto stream_expr = to_expression(ops[0]);
  9910. if (expression_type(ops[0]).basetype != SPIRType::Int)
  9911. stream_expr = join("int(", stream_expr, ")");
  9912. statement("EmitStreamVertex(", stream_expr, ");");
  9913. break;
  9914. }
  9915. case OpEndStreamPrimitive:
  9916. {
  9917. if (options.es)
  9918. SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES.");
  9919. else if (!options.es && options.version < 400)
  9920. SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400.");
  9921. auto stream_expr = to_expression(ops[0]);
  9922. if (expression_type(ops[0]).basetype != SPIRType::Int)
  9923. stream_expr = join("int(", stream_expr, ")");
  9924. statement("EndStreamPrimitive(", stream_expr, ");");
  9925. break;
  9926. }
  9927. // Textures
  9928. case OpImageSampleExplicitLod:
  9929. case OpImageSampleProjExplicitLod:
  9930. case OpImageSampleDrefExplicitLod:
  9931. case OpImageSampleProjDrefExplicitLod:
  9932. case OpImageSampleImplicitLod:
  9933. case OpImageSampleProjImplicitLod:
  9934. case OpImageSampleDrefImplicitLod:
  9935. case OpImageSampleProjDrefImplicitLod:
  9936. case OpImageFetch:
  9937. case OpImageGather:
  9938. case OpImageDrefGather:
  9939. // Gets a bit hairy, so move this to a separate instruction.
  9940. emit_texture_op(instruction, false);
  9941. break;
  9942. case OpImageSparseSampleExplicitLod:
  9943. case OpImageSparseSampleProjExplicitLod:
  9944. case OpImageSparseSampleDrefExplicitLod:
  9945. case OpImageSparseSampleProjDrefExplicitLod:
  9946. case OpImageSparseSampleImplicitLod:
  9947. case OpImageSparseSampleProjImplicitLod:
  9948. case OpImageSparseSampleDrefImplicitLod:
  9949. case OpImageSparseSampleProjDrefImplicitLod:
  9950. case OpImageSparseFetch:
  9951. case OpImageSparseGather:
  9952. case OpImageSparseDrefGather:
  9953. // Gets a bit hairy, so move this to a separate instruction.
  9954. emit_texture_op(instruction, true);
  9955. break;
  9956. case OpImageSparseTexelsResident:
  9957. if (options.es)
  9958. SPIRV_CROSS_THROW("Sparse feedback is not supported in GLSL.");
  9959. require_extension_internal("GL_ARB_sparse_texture2");
  9960. emit_unary_func_op_cast(ops[0], ops[1], ops[2], "sparseTexelsResidentARB", int_type, SPIRType::Boolean);
  9961. break;
  9962. case OpImage:
  9963. {
  9964. uint32_t result_type = ops[0];
  9965. uint32_t id = ops[1];
  9966. // Suppress usage tracking.
  9967. auto &e = emit_op(result_type, id, to_expression(ops[2]), true, true);
  9968. // When using the image, we need to know which variable it is actually loaded from.
  9969. auto *var = maybe_get_backing_variable(ops[2]);
  9970. e.loaded_from = var ? var->self : ID(0);
  9971. break;
  9972. }
  9973. case OpImageQueryLod:
  9974. {
  9975. if (!options.es && options.version < 400)
  9976. {
  9977. require_extension_internal("GL_ARB_texture_query_lod");
  9978. // For some reason, the ARB spec is all-caps.
  9979. GLSL_BFOP(textureQueryLOD);
  9980. }
  9981. else if (options.es)
  9982. SPIRV_CROSS_THROW("textureQueryLod not supported in ES profile.");
  9983. else
  9984. GLSL_BFOP(textureQueryLod);
  9985. register_control_dependent_expression(ops[1]);
  9986. break;
  9987. }
  9988. case OpImageQueryLevels:
  9989. {
  9990. uint32_t result_type = ops[0];
  9991. uint32_t id = ops[1];
  9992. if (!options.es && options.version < 430)
  9993. require_extension_internal("GL_ARB_texture_query_levels");
  9994. if (options.es)
  9995. SPIRV_CROSS_THROW("textureQueryLevels not supported in ES profile.");
  9996. auto expr = join("textureQueryLevels(", convert_separate_image_to_expression(ops[2]), ")");
  9997. auto &restype = get<SPIRType>(ops[0]);
  9998. expr = bitcast_expression(restype, SPIRType::Int, expr);
  9999. emit_op(result_type, id, expr, true);
  10000. break;
  10001. }
  10002. case OpImageQuerySamples:
  10003. {
  10004. auto &type = expression_type(ops[2]);
  10005. uint32_t result_type = ops[0];
  10006. uint32_t id = ops[1];
  10007. string expr;
  10008. if (type.image.sampled == 2)
  10009. expr = join("imageSamples(", to_expression(ops[2]), ")");
  10010. else
  10011. expr = join("textureSamples(", convert_separate_image_to_expression(ops[2]), ")");
  10012. auto &restype = get<SPIRType>(ops[0]);
  10013. expr = bitcast_expression(restype, SPIRType::Int, expr);
  10014. emit_op(result_type, id, expr, true);
  10015. break;
  10016. }
  10017. case OpSampledImage:
  10018. {
  10019. uint32_t result_type = ops[0];
  10020. uint32_t id = ops[1];
  10021. emit_sampled_image_op(result_type, id, ops[2], ops[3]);
  10022. inherit_expression_dependencies(id, ops[2]);
  10023. inherit_expression_dependencies(id, ops[3]);
  10024. break;
  10025. }
  10026. case OpImageQuerySizeLod:
  10027. {
  10028. uint32_t result_type = ops[0];
  10029. uint32_t id = ops[1];
  10030. uint32_t img = ops[2];
  10031. std::string fname = "textureSize";
  10032. if (is_legacy_desktop())
  10033. {
  10034. auto &type = expression_type(img);
  10035. auto &imgtype = get<SPIRType>(type.self);
  10036. fname = legacy_tex_op(fname, imgtype, img);
  10037. }
  10038. else if (is_legacy_es())
  10039. SPIRV_CROSS_THROW("textureSize is not supported in ESSL 100.");
  10040. auto expr = join(fname, "(", convert_separate_image_to_expression(img), ", ",
  10041. bitcast_expression(SPIRType::Int, ops[3]), ")");
  10042. auto &restype = get<SPIRType>(ops[0]);
  10043. expr = bitcast_expression(restype, SPIRType::Int, expr);
  10044. emit_op(result_type, id, expr, true);
  10045. break;
  10046. }
  10047. // Image load/store
  10048. case OpImageRead:
  10049. case OpImageSparseRead:
  10050. {
  10051. // We added Nonreadable speculatively to the OpImage variable due to glslangValidator
  10052. // not adding the proper qualifiers.
  10053. // If it turns out we need to read the image after all, remove the qualifier and recompile.
  10054. auto *var = maybe_get_backing_variable(ops[2]);
  10055. if (var)
  10056. {
  10057. auto &flags = ir.meta[var->self].decoration.decoration_flags;
  10058. if (flags.get(DecorationNonReadable))
  10059. {
  10060. flags.clear(DecorationNonReadable);
  10061. force_recompile();
  10062. }
  10063. }
  10064. uint32_t result_type = ops[0];
  10065. uint32_t id = ops[1];
  10066. bool pure;
  10067. string imgexpr;
  10068. auto &type = expression_type(ops[2]);
  10069. if (var && var->remapped_variable) // Remapped input, just read as-is without any op-code
  10070. {
  10071. if (type.image.ms)
  10072. SPIRV_CROSS_THROW("Trying to remap multisampled image to variable, this is not possible.");
  10073. auto itr =
  10074. find_if(begin(pls_inputs), end(pls_inputs), [var](const PlsRemap &pls) { return pls.id == var->self; });
  10075. if (itr == end(pls_inputs))
  10076. {
  10077. // For non-PLS inputs, we rely on subpass type remapping information to get it right
  10078. // since ImageRead always returns 4-component vectors and the backing type is opaque.
  10079. if (!var->remapped_components)
  10080. SPIRV_CROSS_THROW("subpassInput was remapped, but remap_components is not set correctly.");
  10081. imgexpr = remap_swizzle(get<SPIRType>(result_type), var->remapped_components, to_expression(ops[2]));
  10082. }
  10083. else
  10084. {
  10085. // PLS input could have different number of components than what the SPIR expects, swizzle to
  10086. // the appropriate vector size.
  10087. uint32_t components = pls_format_to_components(itr->format);
  10088. imgexpr = remap_swizzle(get<SPIRType>(result_type), components, to_expression(ops[2]));
  10089. }
  10090. pure = true;
  10091. }
  10092. else if (type.image.dim == DimSubpassData)
  10093. {
  10094. if (var && subpass_input_is_framebuffer_fetch(var->self))
  10095. {
  10096. imgexpr = to_expression(var->self);
  10097. }
  10098. else if (options.vulkan_semantics)
  10099. {
  10100. // With Vulkan semantics, use the proper Vulkan GLSL construct.
  10101. if (type.image.ms)
  10102. {
  10103. uint32_t operands = ops[4];
  10104. if (operands != ImageOperandsSampleMask || length != 6)
  10105. SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
  10106. "operand mask was used.");
  10107. uint32_t samples = ops[5];
  10108. imgexpr = join("subpassLoad(", to_expression(ops[2]), ", ", to_expression(samples), ")");
  10109. }
  10110. else
  10111. imgexpr = join("subpassLoad(", to_expression(ops[2]), ")");
  10112. }
  10113. else
  10114. {
  10115. if (type.image.ms)
  10116. {
  10117. uint32_t operands = ops[4];
  10118. if (operands != ImageOperandsSampleMask || length != 6)
  10119. SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
  10120. "operand mask was used.");
  10121. uint32_t samples = ops[5];
  10122. imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), ",
  10123. to_expression(samples), ")");
  10124. }
  10125. else
  10126. {
  10127. // Implement subpass loads via texture barrier style sampling.
  10128. imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), 0)");
  10129. }
  10130. }
  10131. imgexpr = remap_swizzle(get<SPIRType>(result_type), 4, imgexpr);
  10132. pure = true;
  10133. }
  10134. else
  10135. {
  10136. bool sparse = opcode == OpImageSparseRead;
  10137. uint32_t sparse_code_id = 0;
  10138. uint32_t sparse_texel_id = 0;
  10139. if (sparse)
  10140. emit_sparse_feedback_temporaries(ops[0], ops[1], sparse_code_id, sparse_texel_id);
  10141. // imageLoad only accepts int coords, not uint.
  10142. auto coord_expr = to_expression(ops[3]);
  10143. auto target_coord_type = expression_type(ops[3]);
  10144. target_coord_type.basetype = SPIRType::Int;
  10145. coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr);
  10146. // Plain image load/store.
  10147. if (sparse)
  10148. {
  10149. if (type.image.ms)
  10150. {
  10151. uint32_t operands = ops[4];
  10152. if (operands != ImageOperandsSampleMask || length != 6)
  10153. SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
  10154. "operand mask was used.");
  10155. uint32_t samples = ops[5];
  10156. statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_expression(ops[2]), ", ",
  10157. coord_expr, ", ", to_expression(samples), ", ", to_expression(sparse_texel_id), ");");
  10158. }
  10159. else
  10160. {
  10161. statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_expression(ops[2]), ", ",
  10162. coord_expr, ", ", to_expression(sparse_texel_id), ");");
  10163. }
  10164. imgexpr = join(type_to_glsl(get<SPIRType>(result_type)), "(", to_expression(sparse_code_id), ", ",
  10165. to_expression(sparse_texel_id), ")");
  10166. }
  10167. else
  10168. {
  10169. if (type.image.ms)
  10170. {
  10171. uint32_t operands = ops[4];
  10172. if (operands != ImageOperandsSampleMask || length != 6)
  10173. SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
  10174. "operand mask was used.");
  10175. uint32_t samples = ops[5];
  10176. imgexpr =
  10177. join("imageLoad(", to_expression(ops[2]), ", ", coord_expr, ", ", to_expression(samples), ")");
  10178. }
  10179. else
  10180. imgexpr = join("imageLoad(", to_expression(ops[2]), ", ", coord_expr, ")");
  10181. }
  10182. if (!sparse)
  10183. imgexpr = remap_swizzle(get<SPIRType>(result_type), 4, imgexpr);
  10184. pure = false;
  10185. }
  10186. if (var && var->forwardable)
  10187. {
  10188. bool forward = forced_temporaries.find(id) == end(forced_temporaries);
  10189. auto &e = emit_op(result_type, id, imgexpr, forward);
  10190. // We only need to track dependencies if we're reading from image load/store.
  10191. if (!pure)
  10192. {
  10193. e.loaded_from = var->self;
  10194. if (forward)
  10195. var->dependees.push_back(id);
  10196. }
  10197. }
  10198. else
  10199. emit_op(result_type, id, imgexpr, false);
  10200. inherit_expression_dependencies(id, ops[2]);
  10201. if (type.image.ms)
  10202. inherit_expression_dependencies(id, ops[5]);
  10203. break;
  10204. }
  10205. case OpImageTexelPointer:
  10206. {
  10207. uint32_t result_type = ops[0];
  10208. uint32_t id = ops[1];
  10209. auto coord_expr = to_expression(ops[3]);
  10210. auto target_coord_type = expression_type(ops[3]);
  10211. target_coord_type.basetype = SPIRType::Int;
  10212. coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr);
  10213. auto expr = join(to_expression(ops[2]), ", ", coord_expr);
  10214. if (has_decoration(id, DecorationNonUniformEXT) || has_decoration(ops[2], DecorationNonUniformEXT))
  10215. convert_non_uniform_expression(expression_type(ops[2]), expr);
  10216. auto &e = set<SPIRExpression>(id, expr, result_type, true);
  10217. // When using the pointer, we need to know which variable it is actually loaded from.
  10218. auto *var = maybe_get_backing_variable(ops[2]);
  10219. e.loaded_from = var ? var->self : ID(0);
  10220. inherit_expression_dependencies(id, ops[3]);
  10221. break;
  10222. }
  10223. case OpImageWrite:
  10224. {
  10225. // We added Nonwritable speculatively to the OpImage variable due to glslangValidator
  10226. // not adding the proper qualifiers.
  10227. // If it turns out we need to write to the image after all, remove the qualifier and recompile.
  10228. auto *var = maybe_get_backing_variable(ops[0]);
  10229. if (var)
  10230. {
  10231. auto &flags = ir.meta[var->self].decoration.decoration_flags;
  10232. if (flags.get(DecorationNonWritable))
  10233. {
  10234. flags.clear(DecorationNonWritable);
  10235. force_recompile();
  10236. }
  10237. }
  10238. auto &type = expression_type(ops[0]);
  10239. auto &value_type = expression_type(ops[2]);
  10240. auto store_type = value_type;
  10241. store_type.vecsize = 4;
  10242. // imageStore only accepts int coords, not uint.
  10243. auto coord_expr = to_expression(ops[1]);
  10244. auto target_coord_type = expression_type(ops[1]);
  10245. target_coord_type.basetype = SPIRType::Int;
  10246. coord_expr = bitcast_expression(target_coord_type, expression_type(ops[1]).basetype, coord_expr);
  10247. if (type.image.ms)
  10248. {
  10249. uint32_t operands = ops[3];
  10250. if (operands != ImageOperandsSampleMask || length != 5)
  10251. SPIRV_CROSS_THROW("Multisampled image used in OpImageWrite, but unexpected operand mask was used.");
  10252. uint32_t samples = ops[4];
  10253. statement("imageStore(", to_expression(ops[0]), ", ", coord_expr, ", ", to_expression(samples), ", ",
  10254. remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");");
  10255. }
  10256. else
  10257. statement("imageStore(", to_expression(ops[0]), ", ", coord_expr, ", ",
  10258. remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");");
  10259. if (var && variable_storage_is_aliased(*var))
  10260. flush_all_aliased_variables();
  10261. break;
  10262. }
  10263. case OpImageQuerySize:
  10264. {
  10265. auto &type = expression_type(ops[2]);
  10266. uint32_t result_type = ops[0];
  10267. uint32_t id = ops[1];
  10268. if (type.basetype == SPIRType::Image)
  10269. {
  10270. string expr;
  10271. if (type.image.sampled == 2)
  10272. {
  10273. if (!options.es && options.version < 430)
  10274. require_extension_internal("GL_ARB_shader_image_size");
  10275. else if (options.es && options.version < 310)
  10276. SPIRV_CROSS_THROW("At least ESSL 3.10 required for imageSize.");
  10277. // The size of an image is always constant.
  10278. expr = join("imageSize(", to_expression(ops[2]), ")");
  10279. }
  10280. else
  10281. {
  10282. // This path is hit for samplerBuffers and multisampled images which do not have LOD.
  10283. std::string fname = "textureSize";
  10284. if (is_legacy())
  10285. {
  10286. auto &imgtype = get<SPIRType>(type.self);
  10287. fname = legacy_tex_op(fname, imgtype, ops[2]);
  10288. }
  10289. expr = join(fname, "(", convert_separate_image_to_expression(ops[2]), ")");
  10290. }
  10291. auto &restype = get<SPIRType>(ops[0]);
  10292. expr = bitcast_expression(restype, SPIRType::Int, expr);
  10293. emit_op(result_type, id, expr, true);
  10294. }
  10295. else
  10296. SPIRV_CROSS_THROW("Invalid type for OpImageQuerySize.");
  10297. break;
  10298. }
  10299. // Compute
  10300. case OpControlBarrier:
  10301. case OpMemoryBarrier:
  10302. {
  10303. uint32_t execution_scope = 0;
  10304. uint32_t memory;
  10305. uint32_t semantics;
  10306. if (opcode == OpMemoryBarrier)
  10307. {
  10308. memory = evaluate_constant_u32(ops[0]);
  10309. semantics = evaluate_constant_u32(ops[1]);
  10310. }
  10311. else
  10312. {
  10313. execution_scope = evaluate_constant_u32(ops[0]);
  10314. memory = evaluate_constant_u32(ops[1]);
  10315. semantics = evaluate_constant_u32(ops[2]);
  10316. }
  10317. if (execution_scope == ScopeSubgroup || memory == ScopeSubgroup)
  10318. {
  10319. // OpControlBarrier with ScopeSubgroup is subgroupBarrier()
  10320. if (opcode != OpControlBarrier)
  10321. {
  10322. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMemBarrier);
  10323. }
  10324. else
  10325. {
  10326. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBarrier);
  10327. }
  10328. }
  10329. if (execution_scope != ScopeSubgroup && get_entry_point().model == ExecutionModelTessellationControl)
  10330. {
  10331. // Control shaders only have barriers, and it implies memory barriers.
  10332. if (opcode == OpControlBarrier)
  10333. statement("barrier();");
  10334. break;
  10335. }
  10336. // We only care about these flags, acquire/release and friends are not relevant to GLSL.
  10337. semantics = mask_relevant_memory_semantics(semantics);
  10338. if (opcode == OpMemoryBarrier)
  10339. {
  10340. // If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier
  10341. // does what we need, so we avoid redundant barriers.
  10342. const Instruction *next = get_next_instruction_in_block(instruction);
  10343. if (next && next->op == OpControlBarrier)
  10344. {
  10345. auto *next_ops = stream(*next);
  10346. uint32_t next_memory = evaluate_constant_u32(next_ops[1]);
  10347. uint32_t next_semantics = evaluate_constant_u32(next_ops[2]);
  10348. next_semantics = mask_relevant_memory_semantics(next_semantics);
  10349. bool memory_scope_covered = false;
  10350. if (next_memory == memory)
  10351. memory_scope_covered = true;
  10352. else if (next_semantics == MemorySemanticsWorkgroupMemoryMask)
  10353. {
  10354. // If we only care about workgroup memory, either Device or Workgroup scope is fine,
  10355. // scope does not have to match.
  10356. if ((next_memory == ScopeDevice || next_memory == ScopeWorkgroup) &&
  10357. (memory == ScopeDevice || memory == ScopeWorkgroup))
  10358. {
  10359. memory_scope_covered = true;
  10360. }
  10361. }
  10362. else if (memory == ScopeWorkgroup && next_memory == ScopeDevice)
  10363. {
  10364. // The control barrier has device scope, but the memory barrier just has workgroup scope.
  10365. memory_scope_covered = true;
  10366. }
  10367. // If we have the same memory scope, and all memory types are covered, we're good.
  10368. if (memory_scope_covered && (semantics & next_semantics) == semantics)
  10369. break;
  10370. }
  10371. }
  10372. // We are synchronizing some memory or syncing execution,
  10373. // so we cannot forward any loads beyond the memory barrier.
  10374. if (semantics || opcode == OpControlBarrier)
  10375. {
  10376. assert(current_emitting_block);
  10377. flush_control_dependent_expressions(current_emitting_block->self);
  10378. flush_all_active_variables();
  10379. }
  10380. if (memory == ScopeWorkgroup) // Only need to consider memory within a group
  10381. {
  10382. if (semantics == MemorySemanticsWorkgroupMemoryMask)
  10383. {
  10384. // OpControlBarrier implies a memory barrier for shared memory as well.
  10385. bool implies_shared_barrier = opcode == OpControlBarrier && execution_scope == ScopeWorkgroup;
  10386. if (!implies_shared_barrier)
  10387. statement("memoryBarrierShared();");
  10388. }
  10389. else if (semantics != 0)
  10390. statement("groupMemoryBarrier();");
  10391. }
  10392. else if (memory == ScopeSubgroup)
  10393. {
  10394. const uint32_t all_barriers =
  10395. MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask;
  10396. if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
  10397. {
  10398. // These are not relevant for GLSL, but assume it means memoryBarrier().
  10399. // memoryBarrier() does everything, so no need to test anything else.
  10400. statement("subgroupMemoryBarrier();");
  10401. }
  10402. else if ((semantics & all_barriers) == all_barriers)
  10403. {
  10404. // Short-hand instead of emitting 3 barriers.
  10405. statement("subgroupMemoryBarrier();");
  10406. }
  10407. else
  10408. {
  10409. // Pick out individual barriers.
  10410. if (semantics & MemorySemanticsWorkgroupMemoryMask)
  10411. statement("subgroupMemoryBarrierShared();");
  10412. if (semantics & MemorySemanticsUniformMemoryMask)
  10413. statement("subgroupMemoryBarrierBuffer();");
  10414. if (semantics & MemorySemanticsImageMemoryMask)
  10415. statement("subgroupMemoryBarrierImage();");
  10416. }
  10417. }
  10418. else
  10419. {
  10420. const uint32_t all_barriers =
  10421. MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask;
  10422. if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
  10423. {
  10424. // These are not relevant for GLSL, but assume it means memoryBarrier().
  10425. // memoryBarrier() does everything, so no need to test anything else.
  10426. statement("memoryBarrier();");
  10427. }
  10428. else if ((semantics & all_barriers) == all_barriers)
  10429. {
  10430. // Short-hand instead of emitting 4 barriers.
  10431. statement("memoryBarrier();");
  10432. }
  10433. else
  10434. {
  10435. // Pick out individual barriers.
  10436. if (semantics & MemorySemanticsWorkgroupMemoryMask)
  10437. statement("memoryBarrierShared();");
  10438. if (semantics & MemorySemanticsUniformMemoryMask)
  10439. statement("memoryBarrierBuffer();");
  10440. if (semantics & MemorySemanticsImageMemoryMask)
  10441. statement("memoryBarrierImage();");
  10442. }
  10443. }
  10444. if (opcode == OpControlBarrier)
  10445. {
  10446. if (execution_scope == ScopeSubgroup)
  10447. statement("subgroupBarrier();");
  10448. else
  10449. statement("barrier();");
  10450. }
  10451. break;
  10452. }
  10453. case OpExtInst:
  10454. {
  10455. uint32_t extension_set = ops[2];
  10456. if (get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL)
  10457. {
  10458. emit_glsl_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
  10459. }
  10460. else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_ballot)
  10461. {
  10462. emit_spv_amd_shader_ballot_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
  10463. }
  10464. else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter)
  10465. {
  10466. emit_spv_amd_shader_explicit_vertex_parameter_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
  10467. }
  10468. else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_trinary_minmax)
  10469. {
  10470. emit_spv_amd_shader_trinary_minmax_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
  10471. }
  10472. else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_gcn_shader)
  10473. {
  10474. emit_spv_amd_gcn_shader_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
  10475. }
  10476. else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_debug_info)
  10477. {
  10478. break; // Ignore SPIR-V debug information extended instructions.
  10479. }
  10480. else
  10481. {
  10482. statement("// unimplemented ext op ", instruction.op);
  10483. break;
  10484. }
  10485. break;
  10486. }
  10487. // Legacy sub-group stuff ...
  10488. case OpSubgroupBallotKHR:
  10489. {
  10490. uint32_t result_type = ops[0];
  10491. uint32_t id = ops[1];
  10492. string expr;
  10493. expr = join("uvec4(unpackUint2x32(ballotARB(" + to_expression(ops[2]) + ")), 0u, 0u)");
  10494. emit_op(result_type, id, expr, should_forward(ops[2]));
  10495. require_extension_internal("GL_ARB_shader_ballot");
  10496. inherit_expression_dependencies(id, ops[2]);
  10497. register_control_dependent_expression(ops[1]);
  10498. break;
  10499. }
  10500. case OpSubgroupFirstInvocationKHR:
  10501. {
  10502. uint32_t result_type = ops[0];
  10503. uint32_t id = ops[1];
  10504. emit_unary_func_op(result_type, id, ops[2], "readFirstInvocationARB");
  10505. require_extension_internal("GL_ARB_shader_ballot");
  10506. register_control_dependent_expression(ops[1]);
  10507. break;
  10508. }
  10509. case OpSubgroupReadInvocationKHR:
  10510. {
  10511. uint32_t result_type = ops[0];
  10512. uint32_t id = ops[1];
  10513. emit_binary_func_op(result_type, id, ops[2], ops[3], "readInvocationARB");
  10514. require_extension_internal("GL_ARB_shader_ballot");
  10515. register_control_dependent_expression(ops[1]);
  10516. break;
  10517. }
  10518. case OpSubgroupAllKHR:
  10519. {
  10520. uint32_t result_type = ops[0];
  10521. uint32_t id = ops[1];
  10522. emit_unary_func_op(result_type, id, ops[2], "allInvocationsARB");
  10523. require_extension_internal("GL_ARB_shader_group_vote");
  10524. register_control_dependent_expression(ops[1]);
  10525. break;
  10526. }
  10527. case OpSubgroupAnyKHR:
  10528. {
  10529. uint32_t result_type = ops[0];
  10530. uint32_t id = ops[1];
  10531. emit_unary_func_op(result_type, id, ops[2], "anyInvocationARB");
  10532. require_extension_internal("GL_ARB_shader_group_vote");
  10533. register_control_dependent_expression(ops[1]);
  10534. break;
  10535. }
  10536. case OpSubgroupAllEqualKHR:
  10537. {
  10538. uint32_t result_type = ops[0];
  10539. uint32_t id = ops[1];
  10540. emit_unary_func_op(result_type, id, ops[2], "allInvocationsEqualARB");
  10541. require_extension_internal("GL_ARB_shader_group_vote");
  10542. register_control_dependent_expression(ops[1]);
  10543. break;
  10544. }
  10545. case OpGroupIAddNonUniformAMD:
  10546. case OpGroupFAddNonUniformAMD:
  10547. {
  10548. uint32_t result_type = ops[0];
  10549. uint32_t id = ops[1];
  10550. emit_unary_func_op(result_type, id, ops[4], "addInvocationsNonUniformAMD");
  10551. require_extension_internal("GL_AMD_shader_ballot");
  10552. register_control_dependent_expression(ops[1]);
  10553. break;
  10554. }
  10555. case OpGroupFMinNonUniformAMD:
  10556. case OpGroupUMinNonUniformAMD:
  10557. case OpGroupSMinNonUniformAMD:
  10558. {
  10559. uint32_t result_type = ops[0];
  10560. uint32_t id = ops[1];
  10561. emit_unary_func_op(result_type, id, ops[4], "minInvocationsNonUniformAMD");
  10562. require_extension_internal("GL_AMD_shader_ballot");
  10563. register_control_dependent_expression(ops[1]);
  10564. break;
  10565. }
  10566. case OpGroupFMaxNonUniformAMD:
  10567. case OpGroupUMaxNonUniformAMD:
  10568. case OpGroupSMaxNonUniformAMD:
  10569. {
  10570. uint32_t result_type = ops[0];
  10571. uint32_t id = ops[1];
  10572. emit_unary_func_op(result_type, id, ops[4], "maxInvocationsNonUniformAMD");
  10573. require_extension_internal("GL_AMD_shader_ballot");
  10574. register_control_dependent_expression(ops[1]);
  10575. break;
  10576. }
  10577. case OpFragmentMaskFetchAMD:
  10578. {
  10579. auto &type = expression_type(ops[2]);
  10580. uint32_t result_type = ops[0];
  10581. uint32_t id = ops[1];
  10582. if (type.image.dim == spv::DimSubpassData)
  10583. {
  10584. emit_unary_func_op(result_type, id, ops[2], "fragmentMaskFetchAMD");
  10585. }
  10586. else
  10587. {
  10588. emit_binary_func_op(result_type, id, ops[2], ops[3], "fragmentMaskFetchAMD");
  10589. }
  10590. require_extension_internal("GL_AMD_shader_fragment_mask");
  10591. break;
  10592. }
  10593. case OpFragmentFetchAMD:
  10594. {
  10595. auto &type = expression_type(ops[2]);
  10596. uint32_t result_type = ops[0];
  10597. uint32_t id = ops[1];
  10598. if (type.image.dim == spv::DimSubpassData)
  10599. {
  10600. emit_binary_func_op(result_type, id, ops[2], ops[4], "fragmentFetchAMD");
  10601. }
  10602. else
  10603. {
  10604. emit_trinary_func_op(result_type, id, ops[2], ops[3], ops[4], "fragmentFetchAMD");
  10605. }
  10606. require_extension_internal("GL_AMD_shader_fragment_mask");
  10607. break;
  10608. }
  10609. // Vulkan 1.1 sub-group stuff ...
  10610. case OpGroupNonUniformElect:
  10611. case OpGroupNonUniformBroadcast:
  10612. case OpGroupNonUniformBroadcastFirst:
  10613. case OpGroupNonUniformBallot:
  10614. case OpGroupNonUniformInverseBallot:
  10615. case OpGroupNonUniformBallotBitExtract:
  10616. case OpGroupNonUniformBallotBitCount:
  10617. case OpGroupNonUniformBallotFindLSB:
  10618. case OpGroupNonUniformBallotFindMSB:
  10619. case OpGroupNonUniformShuffle:
  10620. case OpGroupNonUniformShuffleXor:
  10621. case OpGroupNonUniformShuffleUp:
  10622. case OpGroupNonUniformShuffleDown:
  10623. case OpGroupNonUniformAll:
  10624. case OpGroupNonUniformAny:
  10625. case OpGroupNonUniformAllEqual:
  10626. case OpGroupNonUniformFAdd:
  10627. case OpGroupNonUniformIAdd:
  10628. case OpGroupNonUniformFMul:
  10629. case OpGroupNonUniformIMul:
  10630. case OpGroupNonUniformFMin:
  10631. case OpGroupNonUniformFMax:
  10632. case OpGroupNonUniformSMin:
  10633. case OpGroupNonUniformSMax:
  10634. case OpGroupNonUniformUMin:
  10635. case OpGroupNonUniformUMax:
  10636. case OpGroupNonUniformBitwiseAnd:
  10637. case OpGroupNonUniformBitwiseOr:
  10638. case OpGroupNonUniformBitwiseXor:
  10639. case OpGroupNonUniformQuadSwap:
  10640. case OpGroupNonUniformQuadBroadcast:
  10641. emit_subgroup_op(instruction);
  10642. break;
  10643. case OpFUnordEqual:
  10644. case OpFUnordNotEqual:
  10645. case OpFUnordLessThan:
  10646. case OpFUnordGreaterThan:
  10647. case OpFUnordLessThanEqual:
  10648. case OpFUnordGreaterThanEqual:
  10649. {
  10650. // GLSL doesn't specify if floating point comparisons are ordered or unordered,
  10651. // but glslang always emits ordered floating point compares for GLSL.
  10652. // To get unordered compares, we can test the opposite thing and invert the result.
  10653. // This way, we force true when there is any NaN present.
  10654. uint32_t op0 = ops[2];
  10655. uint32_t op1 = ops[3];
  10656. string expr;
  10657. if (expression_type(op0).vecsize > 1)
  10658. {
  10659. const char *comp_op = nullptr;
  10660. switch (opcode)
  10661. {
  10662. case OpFUnordEqual:
  10663. comp_op = "notEqual";
  10664. break;
  10665. case OpFUnordNotEqual:
  10666. comp_op = "equal";
  10667. break;
  10668. case OpFUnordLessThan:
  10669. comp_op = "greaterThanEqual";
  10670. break;
  10671. case OpFUnordLessThanEqual:
  10672. comp_op = "greaterThan";
  10673. break;
  10674. case OpFUnordGreaterThan:
  10675. comp_op = "lessThanEqual";
  10676. break;
  10677. case OpFUnordGreaterThanEqual:
  10678. comp_op = "lessThan";
  10679. break;
  10680. default:
  10681. assert(0);
  10682. break;
  10683. }
  10684. expr = join("not(", comp_op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), "))");
  10685. }
  10686. else
  10687. {
  10688. const char *comp_op = nullptr;
  10689. switch (opcode)
  10690. {
  10691. case OpFUnordEqual:
  10692. comp_op = " != ";
  10693. break;
  10694. case OpFUnordNotEqual:
  10695. comp_op = " == ";
  10696. break;
  10697. case OpFUnordLessThan:
  10698. comp_op = " >= ";
  10699. break;
  10700. case OpFUnordLessThanEqual:
  10701. comp_op = " > ";
  10702. break;
  10703. case OpFUnordGreaterThan:
  10704. comp_op = " <= ";
  10705. break;
  10706. case OpFUnordGreaterThanEqual:
  10707. comp_op = " < ";
  10708. break;
  10709. default:
  10710. assert(0);
  10711. break;
  10712. }
  10713. expr = join("!(", to_enclosed_unpacked_expression(op0), comp_op, to_enclosed_unpacked_expression(op1), ")");
  10714. }
  10715. emit_op(ops[0], ops[1], expr, should_forward(op0) && should_forward(op1));
  10716. inherit_expression_dependencies(ops[1], op0);
  10717. inherit_expression_dependencies(ops[1], op1);
  10718. break;
  10719. }
  10720. case OpReportIntersectionKHR:
  10721. // NV is same opcode.
  10722. forced_temporaries.insert(ops[1]);
  10723. if (ray_tracing_is_khr)
  10724. GLSL_BFOP(reportIntersectionEXT);
  10725. else
  10726. GLSL_BFOP(reportIntersectionNV);
  10727. flush_control_dependent_expressions(current_emitting_block->self);
  10728. break;
  10729. case OpIgnoreIntersectionNV:
  10730. // KHR variant is a terminator.
  10731. statement("ignoreIntersectionNV();");
  10732. flush_control_dependent_expressions(current_emitting_block->self);
  10733. break;
  10734. case OpTerminateRayNV:
  10735. // KHR variant is a terminator.
  10736. statement("terminateRayNV();");
  10737. flush_control_dependent_expressions(current_emitting_block->self);
  10738. break;
  10739. case OpTraceNV:
  10740. if (has_decoration(ops[0], DecorationNonUniformEXT))
  10741. propagate_nonuniform_qualifier(ops[0]);
  10742. statement("traceNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ",
  10743. to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ",
  10744. to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ",
  10745. to_expression(ops[9]), ", ", to_expression(ops[10]), ");");
  10746. flush_control_dependent_expressions(current_emitting_block->self);
  10747. break;
  10748. case OpTraceRayKHR:
  10749. if (!has_decoration(ops[10], DecorationLocation))
  10750. SPIRV_CROSS_THROW("A memory declaration object must be used in TraceRayKHR.");
  10751. if (has_decoration(ops[0], DecorationNonUniformEXT))
  10752. propagate_nonuniform_qualifier(ops[0]);
  10753. statement("traceRayEXT(", to_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ",
  10754. to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ",
  10755. to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ",
  10756. to_expression(ops[9]), ", ", get_decoration(ops[10], DecorationLocation), ");");
  10757. flush_control_dependent_expressions(current_emitting_block->self);
  10758. break;
  10759. case OpExecuteCallableNV:
  10760. statement("executeCallableNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");");
  10761. flush_control_dependent_expressions(current_emitting_block->self);
  10762. break;
  10763. case OpExecuteCallableKHR:
  10764. if (!has_decoration(ops[1], DecorationLocation))
  10765. SPIRV_CROSS_THROW("A memory declaration object must be used in ExecuteCallableKHR.");
  10766. statement("executeCallableEXT(", to_expression(ops[0]), ", ", get_decoration(ops[1], DecorationLocation), ");");
  10767. flush_control_dependent_expressions(current_emitting_block->self);
  10768. break;
  10769. case OpConvertUToAccelerationStructureKHR:
  10770. GLSL_UFOP(accelerationStructureEXT);
  10771. break;
  10772. case OpConvertUToPtr:
  10773. {
  10774. auto &type = get<SPIRType>(ops[0]);
  10775. if (type.storage != StorageClassPhysicalStorageBufferEXT)
  10776. SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertUToPtr.");
  10777. auto op = type_to_glsl(type);
  10778. emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str());
  10779. break;
  10780. }
  10781. case OpConvertPtrToU:
  10782. {
  10783. auto &type = get<SPIRType>(ops[0]);
  10784. auto &ptr_type = expression_type(ops[2]);
  10785. if (ptr_type.storage != StorageClassPhysicalStorageBufferEXT)
  10786. SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertPtrToU.");
  10787. auto op = type_to_glsl(type);
  10788. emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str());
  10789. break;
  10790. }
  10791. case OpUndef:
  10792. // Undefined value has been declared.
  10793. break;
  10794. case OpLine:
  10795. {
  10796. emit_line_directive(ops[0], ops[1]);
  10797. break;
  10798. }
  10799. case OpNoLine:
  10800. break;
  10801. case OpDemoteToHelperInvocationEXT:
  10802. if (!options.vulkan_semantics)
  10803. SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL.");
  10804. require_extension_internal("GL_EXT_demote_to_helper_invocation");
  10805. statement(backend.demote_literal, ";");
  10806. break;
  10807. case OpIsHelperInvocationEXT:
  10808. if (!options.vulkan_semantics)
  10809. SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL.");
  10810. require_extension_internal("GL_EXT_demote_to_helper_invocation");
  10811. emit_op(ops[0], ops[1], "helperInvocationEXT()", false);
  10812. break;
  10813. case OpBeginInvocationInterlockEXT:
  10814. // If the interlock is complex, we emit this elsewhere.
  10815. if (!interlocked_is_complex)
  10816. {
  10817. if (options.es)
  10818. statement("beginInvocationInterlockNV();");
  10819. else
  10820. statement("beginInvocationInterlockARB();");
  10821. flush_all_active_variables();
  10822. // Make sure forwarding doesn't propagate outside interlock region.
  10823. }
  10824. break;
  10825. case OpEndInvocationInterlockEXT:
  10826. // If the interlock is complex, we emit this elsewhere.
  10827. if (!interlocked_is_complex)
  10828. {
  10829. if (options.es)
  10830. statement("endInvocationInterlockNV();");
  10831. else
  10832. statement("endInvocationInterlockARB();");
  10833. flush_all_active_variables();
  10834. // Make sure forwarding doesn't propagate outside interlock region.
  10835. }
  10836. break;
  10837. default:
  10838. statement("// unimplemented op ", instruction.op);
  10839. break;
  10840. }
  10841. }
  10842. // Appends function arguments, mapped from global variables, beyond the specified arg index.
  10843. // This is used when a function call uses fewer arguments than the function defines.
  10844. // This situation may occur if the function signature has been dynamically modified to
  10845. // extract global variables referenced from within the function, and convert them to
  10846. // function arguments. This is necessary for shader languages that do not support global
  10847. // access to shader input content from within a function (eg. Metal). Each additional
  10848. // function args uses the name of the global variable. Function nesting will modify the
  10849. // functions and function calls all the way up the nesting chain.
  10850. void CompilerGLSL::append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector<string> &arglist)
  10851. {
  10852. auto &args = func.arguments;
  10853. uint32_t arg_cnt = uint32_t(args.size());
  10854. for (uint32_t arg_idx = index; arg_idx < arg_cnt; arg_idx++)
  10855. {
  10856. auto &arg = args[arg_idx];
  10857. assert(arg.alias_global_variable);
  10858. // If the underlying variable needs to be declared
  10859. // (ie. a local variable with deferred declaration), do so now.
  10860. uint32_t var_id = get<SPIRVariable>(arg.id).basevariable;
  10861. if (var_id)
  10862. flush_variable_declaration(var_id);
  10863. arglist.push_back(to_func_call_arg(arg, arg.id));
  10864. }
  10865. }
  10866. string CompilerGLSL::to_member_name(const SPIRType &type, uint32_t index)
  10867. {
  10868. if (type.type_alias != TypeID(0) &&
  10869. !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
  10870. {
  10871. return to_member_name(get<SPIRType>(type.type_alias), index);
  10872. }
  10873. auto &memb = ir.meta[type.self].members;
  10874. if (index < memb.size() && !memb[index].alias.empty())
  10875. return memb[index].alias;
  10876. else
  10877. return join("_m", index);
  10878. }
  10879. string CompilerGLSL::to_member_reference(uint32_t, const SPIRType &type, uint32_t index, bool)
  10880. {
  10881. return join(".", to_member_name(type, index));
  10882. }
  10883. string CompilerGLSL::to_multi_member_reference(const SPIRType &type, const SmallVector<uint32_t> &indices)
  10884. {
  10885. string ret;
  10886. auto *member_type = &type;
  10887. for (auto &index : indices)
  10888. {
  10889. ret += join(".", to_member_name(*member_type, index));
  10890. member_type = &get<SPIRType>(member_type->member_types[index]);
  10891. }
  10892. return ret;
  10893. }
  10894. void CompilerGLSL::add_member_name(SPIRType &type, uint32_t index)
  10895. {
  10896. auto &memb = ir.meta[type.self].members;
  10897. if (index < memb.size() && !memb[index].alias.empty())
  10898. {
  10899. auto &name = memb[index].alias;
  10900. if (name.empty())
  10901. return;
  10902. ParsedIR::sanitize_identifier(name, true, true);
  10903. update_name_cache(type.member_name_cache, name);
  10904. }
  10905. }
  10906. // Checks whether the ID is a row_major matrix that requires conversion before use
  10907. bool CompilerGLSL::is_non_native_row_major_matrix(uint32_t id)
  10908. {
  10909. // Natively supported row-major matrices do not need to be converted.
  10910. // Legacy targets do not support row major.
  10911. if (backend.native_row_major_matrix && !is_legacy())
  10912. return false;
  10913. auto *e = maybe_get<SPIRExpression>(id);
  10914. if (e)
  10915. return e->need_transpose;
  10916. else
  10917. return has_decoration(id, DecorationRowMajor);
  10918. }
  10919. // Checks whether the member is a row_major matrix that requires conversion before use
  10920. bool CompilerGLSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index)
  10921. {
  10922. // Natively supported row-major matrices do not need to be converted.
  10923. if (backend.native_row_major_matrix && !is_legacy())
  10924. return false;
  10925. // Non-matrix or column-major matrix types do not need to be converted.
  10926. if (!has_member_decoration(type.self, index, DecorationRowMajor))
  10927. return false;
  10928. // Only square row-major matrices can be converted at this time.
  10929. // Converting non-square matrices will require defining custom GLSL function that
  10930. // swaps matrix elements while retaining the original dimensional form of the matrix.
  10931. const auto mbr_type = get<SPIRType>(type.member_types[index]);
  10932. if (mbr_type.columns != mbr_type.vecsize)
  10933. SPIRV_CROSS_THROW("Row-major matrices must be square on this platform.");
  10934. return true;
  10935. }
  10936. // Checks if we need to remap physical type IDs when declaring the type in a buffer.
  10937. bool CompilerGLSL::member_is_remapped_physical_type(const SPIRType &type, uint32_t index) const
  10938. {
  10939. return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID);
  10940. }
  10941. // Checks whether the member is in packed data type, that might need to be unpacked.
  10942. bool CompilerGLSL::member_is_packed_physical_type(const SPIRType &type, uint32_t index) const
  10943. {
  10944. return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypePacked);
  10945. }
  10946. // Wraps the expression string in a function call that converts the
  10947. // row_major matrix result of the expression to a column_major matrix.
  10948. // Base implementation uses the standard library transpose() function.
  10949. // Subclasses may override to use a different function.
  10950. string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, uint32_t /* physical_type_id */,
  10951. bool /*is_packed*/)
  10952. {
  10953. strip_enclosed_expression(exp_str);
  10954. if (!is_matrix(exp_type))
  10955. {
  10956. auto column_index = exp_str.find_last_of('[');
  10957. if (column_index == string::npos)
  10958. return exp_str;
  10959. auto column_expr = exp_str.substr(column_index);
  10960. exp_str.resize(column_index);
  10961. auto transposed_expr = type_to_glsl_constructor(exp_type) + "(";
  10962. // Loading a column from a row-major matrix. Unroll the load.
  10963. for (uint32_t c = 0; c < exp_type.vecsize; c++)
  10964. {
  10965. transposed_expr += join(exp_str, '[', c, ']', column_expr);
  10966. if (c + 1 < exp_type.vecsize)
  10967. transposed_expr += ", ";
  10968. }
  10969. transposed_expr += ")";
  10970. return transposed_expr;
  10971. }
  10972. else if (options.version < 120)
  10973. {
  10974. // GLSL 110, ES 100 do not have transpose(), so emulate it. Note that
  10975. // these GLSL versions do not support non-square matrices.
  10976. if (exp_type.vecsize == 2 && exp_type.columns == 2)
  10977. {
  10978. if (!requires_transpose_2x2)
  10979. {
  10980. requires_transpose_2x2 = true;
  10981. force_recompile();
  10982. }
  10983. }
  10984. else if (exp_type.vecsize == 3 && exp_type.columns == 3)
  10985. {
  10986. if (!requires_transpose_3x3)
  10987. {
  10988. requires_transpose_3x3 = true;
  10989. force_recompile();
  10990. }
  10991. }
  10992. else if (exp_type.vecsize == 4 && exp_type.columns == 4)
  10993. {
  10994. if (!requires_transpose_4x4)
  10995. {
  10996. requires_transpose_4x4 = true;
  10997. force_recompile();
  10998. }
  10999. }
  11000. else
  11001. SPIRV_CROSS_THROW("Non-square matrices are not supported in legacy GLSL, cannot transpose.");
  11002. return join("spvTranspose(", exp_str, ")");
  11003. }
  11004. else
  11005. return join("transpose(", exp_str, ")");
  11006. }
  11007. string CompilerGLSL::variable_decl(const SPIRType &type, const string &name, uint32_t id)
  11008. {
  11009. string type_name = type_to_glsl(type, id);
  11010. remap_variable_type_name(type, name, type_name);
  11011. return join(type_name, " ", name, type_to_array_glsl(type));
  11012. }
  11013. // Emit a structure member. Subclasses may override to modify output,
  11014. // or to dynamically add a padding member if needed.
  11015. void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
  11016. const string &qualifier, uint32_t)
  11017. {
  11018. auto &membertype = get<SPIRType>(member_type_id);
  11019. Bitset memberflags;
  11020. auto &memb = ir.meta[type.self].members;
  11021. if (index < memb.size())
  11022. memberflags = memb[index].decoration_flags;
  11023. string qualifiers;
  11024. bool is_block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
  11025. ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
  11026. if (is_block)
  11027. qualifiers = to_interpolation_qualifiers(memberflags);
  11028. statement(layout_for_member(type, index), qualifiers, qualifier, flags_to_qualifiers_glsl(membertype, memberflags),
  11029. variable_decl(membertype, to_member_name(type, index)), ";");
  11030. }
  11031. void CompilerGLSL::emit_struct_padding_target(const SPIRType &)
  11032. {
  11033. }
  11034. const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags)
  11035. {
  11036. // GL_EXT_buffer_reference variables can be marked as restrict.
  11037. if (flags.get(DecorationRestrictPointerEXT))
  11038. return "restrict ";
  11039. // Structs do not have precision qualifiers, neither do doubles (desktop only anyways, so no mediump/highp).
  11040. if (type.basetype != SPIRType::Float && type.basetype != SPIRType::Int && type.basetype != SPIRType::UInt &&
  11041. type.basetype != SPIRType::Image && type.basetype != SPIRType::SampledImage &&
  11042. type.basetype != SPIRType::Sampler)
  11043. return "";
  11044. if (options.es)
  11045. {
  11046. auto &execution = get_entry_point();
  11047. if (flags.get(DecorationRelaxedPrecision))
  11048. {
  11049. bool implied_fmediump = type.basetype == SPIRType::Float &&
  11050. options.fragment.default_float_precision == Options::Mediump &&
  11051. execution.model == ExecutionModelFragment;
  11052. bool implied_imediump = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
  11053. options.fragment.default_int_precision == Options::Mediump &&
  11054. execution.model == ExecutionModelFragment;
  11055. return implied_fmediump || implied_imediump ? "" : "mediump ";
  11056. }
  11057. else
  11058. {
  11059. bool implied_fhighp =
  11060. type.basetype == SPIRType::Float && ((options.fragment.default_float_precision == Options::Highp &&
  11061. execution.model == ExecutionModelFragment) ||
  11062. (execution.model != ExecutionModelFragment));
  11063. bool implied_ihighp = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
  11064. ((options.fragment.default_int_precision == Options::Highp &&
  11065. execution.model == ExecutionModelFragment) ||
  11066. (execution.model != ExecutionModelFragment));
  11067. return implied_fhighp || implied_ihighp ? "" : "highp ";
  11068. }
  11069. }
  11070. else if (backend.allow_precision_qualifiers)
  11071. {
  11072. // Vulkan GLSL supports precision qualifiers, even in desktop profiles, which is convenient.
  11073. // The default is highp however, so only emit mediump in the rare case that a shader has these.
  11074. if (flags.get(DecorationRelaxedPrecision))
  11075. return "mediump ";
  11076. else
  11077. return "";
  11078. }
  11079. else
  11080. return "";
  11081. }
  11082. const char *CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id)
  11083. {
  11084. auto &type = expression_type(id);
  11085. bool use_precision_qualifiers = backend.allow_precision_qualifiers || options.es;
  11086. if (use_precision_qualifiers && (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage))
  11087. {
  11088. // Force mediump for the sampler type. We cannot declare 16-bit or smaller image types.
  11089. auto &result_type = get<SPIRType>(type.image.type);
  11090. if (result_type.width < 32)
  11091. return "mediump ";
  11092. }
  11093. return flags_to_qualifiers_glsl(type, ir.meta[id].decoration.decoration_flags);
  11094. }
  11095. void CompilerGLSL::fixup_io_block_patch_qualifiers(const SPIRVariable &var)
  11096. {
  11097. // Works around weird behavior in glslangValidator where
  11098. // a patch out block is translated to just block members getting the decoration.
  11099. // To make glslang not complain when we compile again, we have to transform this back to a case where
  11100. // the variable itself has Patch decoration, and not members.
  11101. auto &type = get<SPIRType>(var.basetype);
  11102. if (has_decoration(type.self, DecorationBlock))
  11103. {
  11104. uint32_t member_count = uint32_t(type.member_types.size());
  11105. for (uint32_t i = 0; i < member_count; i++)
  11106. {
  11107. if (has_member_decoration(type.self, i, DecorationPatch))
  11108. {
  11109. set_decoration(var.self, DecorationPatch);
  11110. break;
  11111. }
  11112. }
  11113. if (has_decoration(var.self, DecorationPatch))
  11114. for (uint32_t i = 0; i < member_count; i++)
  11115. unset_member_decoration(type.self, i, DecorationPatch);
  11116. }
  11117. }
  11118. string CompilerGLSL::to_qualifiers_glsl(uint32_t id)
  11119. {
  11120. auto &flags = ir.meta[id].decoration.decoration_flags;
  11121. string res;
  11122. auto *var = maybe_get<SPIRVariable>(id);
  11123. if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied)
  11124. res += "shared ";
  11125. res += to_interpolation_qualifiers(flags);
  11126. if (var)
  11127. res += to_storage_qualifiers_glsl(*var);
  11128. auto &type = expression_type(id);
  11129. if (type.image.dim != DimSubpassData && type.image.sampled == 2)
  11130. {
  11131. if (flags.get(DecorationCoherent))
  11132. res += "coherent ";
  11133. if (flags.get(DecorationRestrict))
  11134. res += "restrict ";
  11135. if (flags.get(DecorationNonWritable))
  11136. res += "readonly ";
  11137. if (flags.get(DecorationNonReadable))
  11138. res += "writeonly ";
  11139. }
  11140. res += to_precision_qualifiers_glsl(id);
  11141. return res;
  11142. }
  11143. string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg)
  11144. {
  11145. // glslangValidator seems to make all arguments pointer no matter what which is rather bizarre ...
  11146. auto &type = expression_type(arg.id);
  11147. const char *direction = "";
  11148. if (type.pointer)
  11149. {
  11150. if (arg.write_count && arg.read_count)
  11151. direction = "inout ";
  11152. else if (arg.write_count)
  11153. direction = "out ";
  11154. }
  11155. return join(direction, to_qualifiers_glsl(arg.id), variable_decl(type, to_name(arg.id), arg.id));
  11156. }
  11157. string CompilerGLSL::to_initializer_expression(const SPIRVariable &var)
  11158. {
  11159. return to_expression(var.initializer);
  11160. }
  11161. string CompilerGLSL::to_zero_initialized_expression(uint32_t type_id)
  11162. {
  11163. #ifndef NDEBUG
  11164. auto &type = get<SPIRType>(type_id);
  11165. assert(type.storage == StorageClassPrivate || type.storage == StorageClassFunction ||
  11166. type.storage == StorageClassGeneric);
  11167. #endif
  11168. uint32_t id = ir.increase_bound_by(1);
  11169. ir.make_constant_null(id, type_id, false);
  11170. return constant_expression(get<SPIRConstant>(id));
  11171. }
  11172. bool CompilerGLSL::type_can_zero_initialize(const SPIRType &type) const
  11173. {
  11174. if (type.pointer)
  11175. return false;
  11176. if (!type.array.empty() && options.flatten_multidimensional_arrays)
  11177. return false;
  11178. for (auto &literal : type.array_size_literal)
  11179. if (!literal)
  11180. return false;
  11181. for (auto &memb : type.member_types)
  11182. if (!type_can_zero_initialize(get<SPIRType>(memb)))
  11183. return false;
  11184. return true;
  11185. }
  11186. string CompilerGLSL::variable_decl(const SPIRVariable &variable)
  11187. {
  11188. // Ignore the pointer type since GLSL doesn't have pointers.
  11189. auto &type = get_variable_data_type(variable);
  11190. if (type.pointer_depth > 1)
  11191. SPIRV_CROSS_THROW("Cannot declare pointer-to-pointer types.");
  11192. auto res = join(to_qualifiers_glsl(variable.self), variable_decl(type, to_name(variable.self), variable.self));
  11193. if (variable.loop_variable && variable.static_expression)
  11194. {
  11195. uint32_t expr = variable.static_expression;
  11196. if (ir.ids[expr].get_type() != TypeUndef)
  11197. res += join(" = ", to_expression(variable.static_expression));
  11198. else if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
  11199. res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable)));
  11200. }
  11201. else if (variable.initializer)
  11202. {
  11203. uint32_t expr = variable.initializer;
  11204. if (ir.ids[expr].get_type() != TypeUndef)
  11205. res += join(" = ", to_initializer_expression(variable));
  11206. else if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
  11207. res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable)));
  11208. }
  11209. return res;
  11210. }
  11211. const char *CompilerGLSL::to_pls_qualifiers_glsl(const SPIRVariable &variable)
  11212. {
  11213. auto &flags = ir.meta[variable.self].decoration.decoration_flags;
  11214. if (flags.get(DecorationRelaxedPrecision))
  11215. return "mediump ";
  11216. else
  11217. return "highp ";
  11218. }
  11219. string CompilerGLSL::pls_decl(const PlsRemap &var)
  11220. {
  11221. auto &variable = get<SPIRVariable>(var.id);
  11222. SPIRType type;
  11223. type.vecsize = pls_format_to_components(var.format);
  11224. type.basetype = pls_format_to_basetype(var.format);
  11225. return join(to_pls_layout(var.format), to_pls_qualifiers_glsl(variable), type_to_glsl(type), " ",
  11226. to_name(variable.self));
  11227. }
  11228. uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type) const
  11229. {
  11230. return to_array_size_literal(type, uint32_t(type.array.size() - 1));
  11231. }
  11232. uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type, uint32_t index) const
  11233. {
  11234. assert(type.array.size() == type.array_size_literal.size());
  11235. if (type.array_size_literal[index])
  11236. {
  11237. return type.array[index];
  11238. }
  11239. else
  11240. {
  11241. // Use the default spec constant value.
  11242. // This is the best we can do.
  11243. return evaluate_constant_u32(type.array[index]);
  11244. }
  11245. }
  11246. string CompilerGLSL::to_array_size(const SPIRType &type, uint32_t index)
  11247. {
  11248. assert(type.array.size() == type.array_size_literal.size());
  11249. auto &size = type.array[index];
  11250. if (!type.array_size_literal[index])
  11251. return to_expression(size);
  11252. else if (size)
  11253. return convert_to_string(size);
  11254. else if (!backend.unsized_array_supported)
  11255. {
  11256. // For runtime-sized arrays, we can work around
  11257. // lack of standard support for this by simply having
  11258. // a single element array.
  11259. //
  11260. // Runtime length arrays must always be the last element
  11261. // in an interface block.
  11262. return "1";
  11263. }
  11264. else
  11265. return "";
  11266. }
  11267. string CompilerGLSL::type_to_array_glsl(const SPIRType &type)
  11268. {
  11269. if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct)
  11270. {
  11271. // We are using a wrapped pointer type, and we should not emit any array declarations here.
  11272. return "";
  11273. }
  11274. if (type.array.empty())
  11275. return "";
  11276. if (options.flatten_multidimensional_arrays)
  11277. {
  11278. string res;
  11279. res += "[";
  11280. for (auto i = uint32_t(type.array.size()); i; i--)
  11281. {
  11282. res += enclose_expression(to_array_size(type, i - 1));
  11283. if (i > 1)
  11284. res += " * ";
  11285. }
  11286. res += "]";
  11287. return res;
  11288. }
  11289. else
  11290. {
  11291. if (type.array.size() > 1)
  11292. {
  11293. if (!options.es && options.version < 430)
  11294. require_extension_internal("GL_ARB_arrays_of_arrays");
  11295. else if (options.es && options.version < 310)
  11296. SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310. "
  11297. "Try using --flatten-multidimensional-arrays or set "
  11298. "options.flatten_multidimensional_arrays to true.");
  11299. }
  11300. string res;
  11301. for (auto i = uint32_t(type.array.size()); i; i--)
  11302. {
  11303. res += "[";
  11304. res += to_array_size(type, i - 1);
  11305. res += "]";
  11306. }
  11307. return res;
  11308. }
  11309. }
  11310. string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id)
  11311. {
  11312. auto &imagetype = get<SPIRType>(type.image.type);
  11313. string res;
  11314. switch (imagetype.basetype)
  11315. {
  11316. case SPIRType::Int:
  11317. case SPIRType::Short:
  11318. case SPIRType::SByte:
  11319. res = "i";
  11320. break;
  11321. case SPIRType::UInt:
  11322. case SPIRType::UShort:
  11323. case SPIRType::UByte:
  11324. res = "u";
  11325. break;
  11326. default:
  11327. break;
  11328. }
  11329. // For half image types, we will force mediump for the sampler, and cast to f16 after any sampling operation.
  11330. // We cannot express a true half texture type in GLSL. Neither for short integer formats for that matter.
  11331. if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics)
  11332. return res + "subpassInput" + (type.image.ms ? "MS" : "");
  11333. else if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData &&
  11334. subpass_input_is_framebuffer_fetch(id))
  11335. {
  11336. SPIRType sampled_type = get<SPIRType>(type.image.type);
  11337. sampled_type.vecsize = 4;
  11338. return type_to_glsl(sampled_type);
  11339. }
  11340. // If we're emulating subpassInput with samplers, force sampler2D
  11341. // so we don't have to specify format.
  11342. if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData)
  11343. {
  11344. // Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V.
  11345. if (type.image.dim == DimBuffer && type.image.sampled == 1)
  11346. res += "sampler";
  11347. else
  11348. res += type.image.sampled == 2 ? "image" : "texture";
  11349. }
  11350. else
  11351. res += "sampler";
  11352. switch (type.image.dim)
  11353. {
  11354. case Dim1D:
  11355. res += "1D";
  11356. break;
  11357. case Dim2D:
  11358. res += "2D";
  11359. break;
  11360. case Dim3D:
  11361. res += "3D";
  11362. break;
  11363. case DimCube:
  11364. res += "Cube";
  11365. break;
  11366. case DimRect:
  11367. if (options.es)
  11368. SPIRV_CROSS_THROW("Rectangle textures are not supported on OpenGL ES.");
  11369. if (is_legacy_desktop())
  11370. require_extension_internal("GL_ARB_texture_rectangle");
  11371. res += "2DRect";
  11372. break;
  11373. case DimBuffer:
  11374. if (options.es && options.version < 320)
  11375. require_extension_internal("GL_OES_texture_buffer");
  11376. else if (!options.es && options.version < 300)
  11377. require_extension_internal("GL_EXT_texture_buffer_object");
  11378. res += "Buffer";
  11379. break;
  11380. case DimSubpassData:
  11381. res += "2D";
  11382. break;
  11383. default:
  11384. SPIRV_CROSS_THROW("Only 1D, 2D, 2DRect, 3D, Buffer, InputTarget and Cube textures supported.");
  11385. }
  11386. if (type.image.ms)
  11387. res += "MS";
  11388. if (type.image.arrayed)
  11389. {
  11390. if (is_legacy_desktop())
  11391. require_extension_internal("GL_EXT_texture_array");
  11392. res += "Array";
  11393. }
  11394. // "Shadow" state in GLSL only exists for samplers and combined image samplers.
  11395. if (((type.basetype == SPIRType::SampledImage) || (type.basetype == SPIRType::Sampler)) &&
  11396. image_is_comparison(type, id))
  11397. {
  11398. res += "Shadow";
  11399. }
  11400. return res;
  11401. }
  11402. string CompilerGLSL::type_to_glsl_constructor(const SPIRType &type)
  11403. {
  11404. if (backend.use_array_constructor && type.array.size() > 1)
  11405. {
  11406. if (options.flatten_multidimensional_arrays)
  11407. SPIRV_CROSS_THROW("Cannot flatten constructors of multidimensional array constructors, "
  11408. "e.g. float[][]().");
  11409. else if (!options.es && options.version < 430)
  11410. require_extension_internal("GL_ARB_arrays_of_arrays");
  11411. else if (options.es && options.version < 310)
  11412. SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310.");
  11413. }
  11414. auto e = type_to_glsl(type);
  11415. if (backend.use_array_constructor)
  11416. {
  11417. for (uint32_t i = 0; i < type.array.size(); i++)
  11418. e += "[]";
  11419. }
  11420. return e;
  11421. }
  11422. // The optional id parameter indicates the object whose type we are trying
  11423. // to find the description for. It is optional. Most type descriptions do not
  11424. // depend on a specific object's use of that type.
  11425. string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id)
  11426. {
  11427. if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct)
  11428. {
  11429. // Need to create a magic type name which compacts the entire type information.
  11430. string name = type_to_glsl(get_pointee_type(type));
  11431. for (size_t i = 0; i < type.array.size(); i++)
  11432. {
  11433. if (type.array_size_literal[i])
  11434. name += join(type.array[i], "_");
  11435. else
  11436. name += join("id", type.array[i], "_");
  11437. }
  11438. name += "Pointer";
  11439. return name;
  11440. }
  11441. switch (type.basetype)
  11442. {
  11443. case SPIRType::Struct:
  11444. // Need OpName lookup here to get a "sensible" name for a struct.
  11445. if (backend.explicit_struct_type)
  11446. return join("struct ", to_name(type.self));
  11447. else
  11448. return to_name(type.self);
  11449. case SPIRType::Image:
  11450. case SPIRType::SampledImage:
  11451. return image_type_glsl(type, id);
  11452. case SPIRType::Sampler:
  11453. // The depth field is set by calling code based on the variable ID of the sampler, effectively reintroducing
  11454. // this distinction into the type system.
  11455. return comparison_ids.count(id) ? "samplerShadow" : "sampler";
  11456. case SPIRType::AccelerationStructure:
  11457. return ray_tracing_is_khr ? "accelerationStructureEXT" : "accelerationStructureNV";
  11458. case SPIRType::Void:
  11459. return "void";
  11460. default:
  11461. break;
  11462. }
  11463. if (type.basetype == SPIRType::UInt && is_legacy())
  11464. SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets.");
  11465. if (type.vecsize == 1 && type.columns == 1) // Scalar builtin
  11466. {
  11467. switch (type.basetype)
  11468. {
  11469. case SPIRType::Boolean:
  11470. return "bool";
  11471. case SPIRType::SByte:
  11472. return backend.basic_int8_type;
  11473. case SPIRType::UByte:
  11474. return backend.basic_uint8_type;
  11475. case SPIRType::Short:
  11476. return backend.basic_int16_type;
  11477. case SPIRType::UShort:
  11478. return backend.basic_uint16_type;
  11479. case SPIRType::Int:
  11480. return backend.basic_int_type;
  11481. case SPIRType::UInt:
  11482. return backend.basic_uint_type;
  11483. case SPIRType::AtomicCounter:
  11484. return "atomic_uint";
  11485. case SPIRType::Half:
  11486. return "float16_t";
  11487. case SPIRType::Float:
  11488. return "float";
  11489. case SPIRType::Double:
  11490. return "double";
  11491. case SPIRType::Int64:
  11492. return "int64_t";
  11493. case SPIRType::UInt64:
  11494. return "uint64_t";
  11495. default:
  11496. return "???";
  11497. }
  11498. }
  11499. else if (type.vecsize > 1 && type.columns == 1) // Vector builtin
  11500. {
  11501. switch (type.basetype)
  11502. {
  11503. case SPIRType::Boolean:
  11504. return join("bvec", type.vecsize);
  11505. case SPIRType::SByte:
  11506. return join("i8vec", type.vecsize);
  11507. case SPIRType::UByte:
  11508. return join("u8vec", type.vecsize);
  11509. case SPIRType::Short:
  11510. return join("i16vec", type.vecsize);
  11511. case SPIRType::UShort:
  11512. return join("u16vec", type.vecsize);
  11513. case SPIRType::Int:
  11514. return join("ivec", type.vecsize);
  11515. case SPIRType::UInt:
  11516. return join("uvec", type.vecsize);
  11517. case SPIRType::Half:
  11518. return join("f16vec", type.vecsize);
  11519. case SPIRType::Float:
  11520. return join("vec", type.vecsize);
  11521. case SPIRType::Double:
  11522. return join("dvec", type.vecsize);
  11523. case SPIRType::Int64:
  11524. return join("i64vec", type.vecsize);
  11525. case SPIRType::UInt64:
  11526. return join("u64vec", type.vecsize);
  11527. default:
  11528. return "???";
  11529. }
  11530. }
  11531. else if (type.vecsize == type.columns) // Simple Matrix builtin
  11532. {
  11533. switch (type.basetype)
  11534. {
  11535. case SPIRType::Boolean:
  11536. return join("bmat", type.vecsize);
  11537. case SPIRType::Int:
  11538. return join("imat", type.vecsize);
  11539. case SPIRType::UInt:
  11540. return join("umat", type.vecsize);
  11541. case SPIRType::Half:
  11542. return join("f16mat", type.vecsize);
  11543. case SPIRType::Float:
  11544. return join("mat", type.vecsize);
  11545. case SPIRType::Double:
  11546. return join("dmat", type.vecsize);
  11547. // Matrix types not supported for int64/uint64.
  11548. default:
  11549. return "???";
  11550. }
  11551. }
  11552. else
  11553. {
  11554. switch (type.basetype)
  11555. {
  11556. case SPIRType::Boolean:
  11557. return join("bmat", type.columns, "x", type.vecsize);
  11558. case SPIRType::Int:
  11559. return join("imat", type.columns, "x", type.vecsize);
  11560. case SPIRType::UInt:
  11561. return join("umat", type.columns, "x", type.vecsize);
  11562. case SPIRType::Half:
  11563. return join("f16mat", type.columns, "x", type.vecsize);
  11564. case SPIRType::Float:
  11565. return join("mat", type.columns, "x", type.vecsize);
  11566. case SPIRType::Double:
  11567. return join("dmat", type.columns, "x", type.vecsize);
  11568. // Matrix types not supported for int64/uint64.
  11569. default:
  11570. return "???";
  11571. }
  11572. }
  11573. }
  11574. void CompilerGLSL::add_variable(unordered_set<string> &variables_primary,
  11575. const unordered_set<string> &variables_secondary, string &name)
  11576. {
  11577. if (name.empty())
  11578. return;
  11579. ParsedIR::sanitize_underscores(name);
  11580. if (ParsedIR::is_globally_reserved_identifier(name, true))
  11581. {
  11582. name.clear();
  11583. return;
  11584. }
  11585. update_name_cache(variables_primary, variables_secondary, name);
  11586. }
  11587. void CompilerGLSL::add_local_variable_name(uint32_t id)
  11588. {
  11589. add_variable(local_variable_names, block_names, ir.meta[id].decoration.alias);
  11590. }
  11591. void CompilerGLSL::add_resource_name(uint32_t id)
  11592. {
  11593. add_variable(resource_names, block_names, ir.meta[id].decoration.alias);
  11594. }
  11595. void CompilerGLSL::add_header_line(const std::string &line)
  11596. {
  11597. header_lines.push_back(line);
  11598. }
  11599. bool CompilerGLSL::has_extension(const std::string &ext) const
  11600. {
  11601. auto itr = find(begin(forced_extensions), end(forced_extensions), ext);
  11602. return itr != end(forced_extensions);
  11603. }
  11604. void CompilerGLSL::require_extension(const std::string &ext)
  11605. {
  11606. if (!has_extension(ext))
  11607. forced_extensions.push_back(ext);
  11608. }
  11609. void CompilerGLSL::require_extension_internal(const string &ext)
  11610. {
  11611. if (backend.supports_extensions && !has_extension(ext))
  11612. {
  11613. forced_extensions.push_back(ext);
  11614. force_recompile();
  11615. }
  11616. }
  11617. void CompilerGLSL::flatten_buffer_block(VariableID id)
  11618. {
  11619. auto &var = get<SPIRVariable>(id);
  11620. auto &type = get<SPIRType>(var.basetype);
  11621. auto name = to_name(type.self, false);
  11622. auto &flags = ir.meta[type.self].decoration.decoration_flags;
  11623. if (!type.array.empty())
  11624. SPIRV_CROSS_THROW(name + " is an array of UBOs.");
  11625. if (type.basetype != SPIRType::Struct)
  11626. SPIRV_CROSS_THROW(name + " is not a struct.");
  11627. if (!flags.get(DecorationBlock))
  11628. SPIRV_CROSS_THROW(name + " is not a block.");
  11629. if (type.member_types.empty())
  11630. SPIRV_CROSS_THROW(name + " is an empty struct.");
  11631. flattened_buffer_blocks.insert(id);
  11632. }
  11633. bool CompilerGLSL::builtin_translates_to_nonarray(spv::BuiltIn /*builtin*/) const
  11634. {
  11635. return false; // GLSL itself does not need to translate array builtin types to non-array builtin types
  11636. }
  11637. bool CompilerGLSL::check_atomic_image(uint32_t id)
  11638. {
  11639. auto &type = expression_type(id);
  11640. if (type.storage == StorageClassImage)
  11641. {
  11642. if (options.es && options.version < 320)
  11643. require_extension_internal("GL_OES_shader_image_atomic");
  11644. auto *var = maybe_get_backing_variable(id);
  11645. if (var)
  11646. {
  11647. auto &flags = ir.meta[var->self].decoration.decoration_flags;
  11648. if (flags.get(DecorationNonWritable) || flags.get(DecorationNonReadable))
  11649. {
  11650. flags.clear(DecorationNonWritable);
  11651. flags.clear(DecorationNonReadable);
  11652. force_recompile();
  11653. }
  11654. }
  11655. return true;
  11656. }
  11657. else
  11658. return false;
  11659. }
  11660. void CompilerGLSL::add_function_overload(const SPIRFunction &func)
  11661. {
  11662. Hasher hasher;
  11663. for (auto &arg : func.arguments)
  11664. {
  11665. // Parameters can vary with pointer type or not,
  11666. // but that will not change the signature in GLSL/HLSL,
  11667. // so strip the pointer type before hashing.
  11668. uint32_t type_id = get_pointee_type_id(arg.type);
  11669. auto &type = get<SPIRType>(type_id);
  11670. if (!combined_image_samplers.empty())
  11671. {
  11672. // If we have combined image samplers, we cannot really trust the image and sampler arguments
  11673. // we pass down to callees, because they may be shuffled around.
  11674. // Ignore these arguments, to make sure that functions need to differ in some other way
  11675. // to be considered different overloads.
  11676. if (type.basetype == SPIRType::SampledImage ||
  11677. (type.basetype == SPIRType::Image && type.image.sampled == 1) || type.basetype == SPIRType::Sampler)
  11678. {
  11679. continue;
  11680. }
  11681. }
  11682. hasher.u32(type_id);
  11683. }
  11684. uint64_t types_hash = hasher.get();
  11685. auto function_name = to_name(func.self);
  11686. auto itr = function_overloads.find(function_name);
  11687. if (itr != end(function_overloads))
  11688. {
  11689. // There exists a function with this name already.
  11690. auto &overloads = itr->second;
  11691. if (overloads.count(types_hash) != 0)
  11692. {
  11693. // Overload conflict, assign a new name.
  11694. add_resource_name(func.self);
  11695. function_overloads[to_name(func.self)].insert(types_hash);
  11696. }
  11697. else
  11698. {
  11699. // Can reuse the name.
  11700. overloads.insert(types_hash);
  11701. }
  11702. }
  11703. else
  11704. {
  11705. // First time we see this function name.
  11706. add_resource_name(func.self);
  11707. function_overloads[to_name(func.self)].insert(types_hash);
  11708. }
  11709. }
  11710. void CompilerGLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags)
  11711. {
  11712. if (func.self != ir.default_entry_point)
  11713. add_function_overload(func);
  11714. // Avoid shadow declarations.
  11715. local_variable_names = resource_names;
  11716. string decl;
  11717. auto &type = get<SPIRType>(func.return_type);
  11718. decl += flags_to_qualifiers_glsl(type, return_flags);
  11719. decl += type_to_glsl(type);
  11720. decl += type_to_array_glsl(type);
  11721. decl += " ";
  11722. if (func.self == ir.default_entry_point)
  11723. {
  11724. // If we need complex fallback in GLSL, we just wrap main() in a function
  11725. // and interlock the entire shader ...
  11726. if (interlocked_is_complex)
  11727. decl += "spvMainInterlockedBody";
  11728. else
  11729. decl += "main";
  11730. processing_entry_point = true;
  11731. }
  11732. else
  11733. decl += to_name(func.self);
  11734. decl += "(";
  11735. SmallVector<string> arglist;
  11736. for (auto &arg : func.arguments)
  11737. {
  11738. // Do not pass in separate images or samplers if we're remapping
  11739. // to combined image samplers.
  11740. if (skip_argument(arg.id))
  11741. continue;
  11742. // Might change the variable name if it already exists in this function.
  11743. // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
  11744. // to use same name for variables.
  11745. // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
  11746. add_local_variable_name(arg.id);
  11747. arglist.push_back(argument_decl(arg));
  11748. // Hold a pointer to the parameter so we can invalidate the readonly field if needed.
  11749. auto *var = maybe_get<SPIRVariable>(arg.id);
  11750. if (var)
  11751. var->parameter = &arg;
  11752. }
  11753. for (auto &arg : func.shadow_arguments)
  11754. {
  11755. // Might change the variable name if it already exists in this function.
  11756. // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
  11757. // to use same name for variables.
  11758. // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
  11759. add_local_variable_name(arg.id);
  11760. arglist.push_back(argument_decl(arg));
  11761. // Hold a pointer to the parameter so we can invalidate the readonly field if needed.
  11762. auto *var = maybe_get<SPIRVariable>(arg.id);
  11763. if (var)
  11764. var->parameter = &arg;
  11765. }
  11766. decl += merge(arglist);
  11767. decl += ")";
  11768. statement(decl);
  11769. }
  11770. void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags)
  11771. {
  11772. // Avoid potential cycles.
  11773. if (func.active)
  11774. return;
  11775. func.active = true;
  11776. // If we depend on a function, emit that function before we emit our own function.
  11777. for (auto block : func.blocks)
  11778. {
  11779. auto &b = get<SPIRBlock>(block);
  11780. for (auto &i : b.ops)
  11781. {
  11782. auto ops = stream(i);
  11783. auto op = static_cast<Op>(i.op);
  11784. if (op == OpFunctionCall)
  11785. {
  11786. // Recursively emit functions which are called.
  11787. uint32_t id = ops[2];
  11788. emit_function(get<SPIRFunction>(id), ir.meta[ops[1]].decoration.decoration_flags);
  11789. }
  11790. }
  11791. }
  11792. if (func.entry_line.file_id != 0)
  11793. emit_line_directive(func.entry_line.file_id, func.entry_line.line_literal);
  11794. emit_function_prototype(func, return_flags);
  11795. begin_scope();
  11796. if (func.self == ir.default_entry_point)
  11797. emit_entry_point_declarations();
  11798. current_function = &func;
  11799. auto &entry_block = get<SPIRBlock>(func.entry_block);
  11800. sort(begin(func.constant_arrays_needed_on_stack), end(func.constant_arrays_needed_on_stack));
  11801. for (auto &array : func.constant_arrays_needed_on_stack)
  11802. {
  11803. auto &c = get<SPIRConstant>(array);
  11804. auto &type = get<SPIRType>(c.constant_type);
  11805. statement(variable_decl(type, join("_", array, "_array_copy")), " = ", constant_expression(c), ";");
  11806. }
  11807. for (auto &v : func.local_variables)
  11808. {
  11809. auto &var = get<SPIRVariable>(v);
  11810. var.deferred_declaration = false;
  11811. if (var.storage == StorageClassWorkgroup)
  11812. {
  11813. // Special variable type which cannot have initializer,
  11814. // need to be declared as standalone variables.
  11815. // Comes from MSL which can push global variables as local variables in main function.
  11816. add_local_variable_name(var.self);
  11817. statement(variable_decl(var), ";");
  11818. var.deferred_declaration = false;
  11819. }
  11820. else if (var.storage == StorageClassPrivate)
  11821. {
  11822. // These variables will not have had their CFG usage analyzed, so move it to the entry block.
  11823. // Comes from MSL which can push global variables as local variables in main function.
  11824. // We could just declare them right now, but we would miss out on an important initialization case which is
  11825. // LUT declaration in MSL.
  11826. // If we don't declare the variable when it is assigned we're forced to go through a helper function
  11827. // which copies elements one by one.
  11828. add_local_variable_name(var.self);
  11829. if (var.initializer)
  11830. {
  11831. statement(variable_decl(var), ";");
  11832. var.deferred_declaration = false;
  11833. }
  11834. else
  11835. {
  11836. auto &dominated = entry_block.dominated_variables;
  11837. if (find(begin(dominated), end(dominated), var.self) == end(dominated))
  11838. entry_block.dominated_variables.push_back(var.self);
  11839. var.deferred_declaration = true;
  11840. }
  11841. }
  11842. else if (var.storage == StorageClassFunction && var.remapped_variable && var.static_expression)
  11843. {
  11844. // No need to declare this variable, it has a static expression.
  11845. var.deferred_declaration = false;
  11846. }
  11847. else if (expression_is_lvalue(v))
  11848. {
  11849. add_local_variable_name(var.self);
  11850. // Loop variables should never be declared early, they are explicitly emitted in a loop.
  11851. if (var.initializer && !var.loop_variable)
  11852. statement(variable_decl_function_local(var), ";");
  11853. else
  11854. {
  11855. // Don't declare variable until first use to declutter the GLSL output quite a lot.
  11856. // If we don't touch the variable before first branch,
  11857. // declare it then since we need variable declaration to be in top scope.
  11858. var.deferred_declaration = true;
  11859. }
  11860. }
  11861. else
  11862. {
  11863. // HACK: SPIR-V in older glslang output likes to use samplers and images as local variables, but GLSL does not allow this.
  11864. // For these types (non-lvalue), we enforce forwarding through a shadowed variable.
  11865. // This means that when we OpStore to these variables, we just write in the expression ID directly.
  11866. // This breaks any kind of branching, since the variable must be statically assigned.
  11867. // Branching on samplers and images would be pretty much impossible to fake in GLSL.
  11868. var.statically_assigned = true;
  11869. }
  11870. var.loop_variable_enable = false;
  11871. // Loop variables are never declared outside their for-loop, so block any implicit declaration.
  11872. if (var.loop_variable)
  11873. var.deferred_declaration = false;
  11874. }
  11875. // Enforce declaration order for regression testing purposes.
  11876. for (auto &block_id : func.blocks)
  11877. {
  11878. auto &block = get<SPIRBlock>(block_id);
  11879. sort(begin(block.dominated_variables), end(block.dominated_variables));
  11880. }
  11881. for (auto &line : current_function->fixup_hooks_in)
  11882. line();
  11883. emit_block_chain(entry_block);
  11884. end_scope();
  11885. processing_entry_point = false;
  11886. statement("");
  11887. // Make sure deferred declaration state for local variables is cleared when we are done with function.
  11888. // We risk declaring Private/Workgroup variables in places we are not supposed to otherwise.
  11889. for (auto &v : func.local_variables)
  11890. {
  11891. auto &var = get<SPIRVariable>(v);
  11892. var.deferred_declaration = false;
  11893. }
  11894. }
  11895. void CompilerGLSL::emit_fixup()
  11896. {
  11897. if (is_vertex_like_shader())
  11898. {
  11899. if (options.vertex.fixup_clipspace)
  11900. {
  11901. const char *suffix = backend.float_literal_suffix ? "f" : "";
  11902. statement("gl_Position.z = 2.0", suffix, " * gl_Position.z - gl_Position.w;");
  11903. }
  11904. if (options.vertex.flip_vert_y)
  11905. statement("gl_Position.y = -gl_Position.y;");
  11906. }
  11907. }
  11908. void CompilerGLSL::flush_phi(BlockID from, BlockID to)
  11909. {
  11910. auto &child = get<SPIRBlock>(to);
  11911. if (child.ignore_phi_from_block == from)
  11912. return;
  11913. unordered_set<uint32_t> temporary_phi_variables;
  11914. for (auto itr = begin(child.phi_variables); itr != end(child.phi_variables); ++itr)
  11915. {
  11916. auto &phi = *itr;
  11917. if (phi.parent == from)
  11918. {
  11919. auto &var = get<SPIRVariable>(phi.function_variable);
  11920. // A Phi variable might be a loop variable, so flush to static expression.
  11921. if (var.loop_variable && !var.loop_variable_enable)
  11922. var.static_expression = phi.local_variable;
  11923. else
  11924. {
  11925. flush_variable_declaration(phi.function_variable);
  11926. // Check if we are going to write to a Phi variable that another statement will read from
  11927. // as part of another Phi node in our target block.
  11928. // For this case, we will need to copy phi.function_variable to a temporary, and use that for future reads.
  11929. // This is judged to be extremely rare, so deal with it here using a simple, but suboptimal algorithm.
  11930. bool need_saved_temporary =
  11931. find_if(itr + 1, end(child.phi_variables), [&](const SPIRBlock::Phi &future_phi) -> bool {
  11932. return future_phi.local_variable == ID(phi.function_variable) && future_phi.parent == from;
  11933. }) != end(child.phi_variables);
  11934. if (need_saved_temporary)
  11935. {
  11936. // Need to make sure we declare the phi variable with a copy at the right scope.
  11937. // We cannot safely declare a temporary here since we might be inside a continue block.
  11938. if (!var.allocate_temporary_copy)
  11939. {
  11940. var.allocate_temporary_copy = true;
  11941. force_recompile();
  11942. }
  11943. statement("_", phi.function_variable, "_copy", " = ", to_name(phi.function_variable), ";");
  11944. temporary_phi_variables.insert(phi.function_variable);
  11945. }
  11946. // This might be called in continue block, so make sure we
  11947. // use this to emit ESSL 1.0 compliant increments/decrements.
  11948. auto lhs = to_expression(phi.function_variable);
  11949. string rhs;
  11950. if (temporary_phi_variables.count(phi.local_variable))
  11951. rhs = join("_", phi.local_variable, "_copy");
  11952. else
  11953. rhs = to_pointer_expression(phi.local_variable);
  11954. if (!optimize_read_modify_write(get<SPIRType>(var.basetype), lhs, rhs))
  11955. statement(lhs, " = ", rhs, ";");
  11956. }
  11957. register_write(phi.function_variable);
  11958. }
  11959. }
  11960. }
  11961. void CompilerGLSL::branch_to_continue(BlockID from, BlockID to)
  11962. {
  11963. auto &to_block = get<SPIRBlock>(to);
  11964. if (from == to)
  11965. return;
  11966. assert(is_continue(to));
  11967. if (to_block.complex_continue)
  11968. {
  11969. // Just emit the whole block chain as is.
  11970. auto usage_counts = expression_usage_counts;
  11971. emit_block_chain(to_block);
  11972. // Expression usage counts are moot after returning from the continue block.
  11973. expression_usage_counts = usage_counts;
  11974. }
  11975. else
  11976. {
  11977. auto &from_block = get<SPIRBlock>(from);
  11978. bool outside_control_flow = false;
  11979. uint32_t loop_dominator = 0;
  11980. // FIXME: Refactor this to not use the old loop_dominator tracking.
  11981. if (from_block.merge_block)
  11982. {
  11983. // If we are a loop header, we don't set the loop dominator,
  11984. // so just use "self" here.
  11985. loop_dominator = from;
  11986. }
  11987. else if (from_block.loop_dominator != BlockID(SPIRBlock::NoDominator))
  11988. {
  11989. loop_dominator = from_block.loop_dominator;
  11990. }
  11991. if (loop_dominator != 0)
  11992. {
  11993. auto &cfg = get_cfg_for_current_function();
  11994. // For non-complex continue blocks, we implicitly branch to the continue block
  11995. // by having the continue block be part of the loop header in for (; ; continue-block).
  11996. outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(loop_dominator, from);
  11997. }
  11998. // Some simplification for for-loops. We always end up with a useless continue;
  11999. // statement since we branch to a loop block.
  12000. // Walk the CFG, if we unconditionally execute the block calling continue assuming we're in the loop block,
  12001. // we can avoid writing out an explicit continue statement.
  12002. // Similar optimization to return statements if we know we're outside flow control.
  12003. if (!outside_control_flow)
  12004. statement("continue;");
  12005. }
  12006. }
  12007. void CompilerGLSL::branch(BlockID from, BlockID to)
  12008. {
  12009. flush_phi(from, to);
  12010. flush_control_dependent_expressions(from);
  12011. bool to_is_continue = is_continue(to);
  12012. // This is only a continue if we branch to our loop dominator.
  12013. if ((ir.block_meta[to] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) != 0 && get<SPIRBlock>(from).loop_dominator == to)
  12014. {
  12015. // This can happen if we had a complex continue block which was emitted.
  12016. // Once the continue block tries to branch to the loop header, just emit continue;
  12017. // and end the chain here.
  12018. statement("continue;");
  12019. }
  12020. else if (from != to && is_break(to))
  12021. {
  12022. // We cannot break to ourselves, so check explicitly for from != to.
  12023. // This case can trigger if a loop header is all three of these things:
  12024. // - Continue block
  12025. // - Loop header
  12026. // - Break merge target all at once ...
  12027. // Very dirty workaround.
  12028. // Switch constructs are able to break, but they cannot break out of a loop at the same time.
  12029. // Only sensible solution is to make a ladder variable, which we declare at the top of the switch block,
  12030. // write to the ladder here, and defer the break.
  12031. // The loop we're breaking out of must dominate the switch block, or there is no ladder breaking case.
  12032. if (current_emitting_switch && is_loop_break(to) &&
  12033. current_emitting_switch->loop_dominator != BlockID(SPIRBlock::NoDominator) &&
  12034. get<SPIRBlock>(current_emitting_switch->loop_dominator).merge_block == to)
  12035. {
  12036. if (!current_emitting_switch->need_ladder_break)
  12037. {
  12038. force_recompile();
  12039. current_emitting_switch->need_ladder_break = true;
  12040. }
  12041. statement("_", current_emitting_switch->self, "_ladder_break = true;");
  12042. }
  12043. statement("break;");
  12044. }
  12045. else if (to_is_continue || from == to)
  12046. {
  12047. // For from == to case can happen for a do-while loop which branches into itself.
  12048. // We don't mark these cases as continue blocks, but the only possible way to branch into
  12049. // ourselves is through means of continue blocks.
  12050. // If we are merging to a continue block, there is no need to emit the block chain for continue here.
  12051. // We can branch to the continue block after we merge execution.
  12052. // Here we make use of structured control flow rules from spec:
  12053. // 2.11: - the merge block declared by a header block cannot be a merge block declared by any other header block
  12054. // - each header block must strictly dominate its merge block, unless the merge block is unreachable in the CFG
  12055. // If we are branching to a merge block, we must be inside a construct which dominates the merge block.
  12056. auto &block_meta = ir.block_meta[to];
  12057. bool branching_to_merge =
  12058. (block_meta & (ParsedIR::BLOCK_META_SELECTION_MERGE_BIT | ParsedIR::BLOCK_META_MULTISELECT_MERGE_BIT |
  12059. ParsedIR::BLOCK_META_LOOP_MERGE_BIT)) != 0;
  12060. if (!to_is_continue || !branching_to_merge)
  12061. branch_to_continue(from, to);
  12062. }
  12063. else if (!is_conditional(to))
  12064. emit_block_chain(get<SPIRBlock>(to));
  12065. // It is important that we check for break before continue.
  12066. // A block might serve two purposes, a break block for the inner scope, and
  12067. // a continue block in the outer scope.
  12068. // Inner scope always takes precedence.
  12069. }
  12070. void CompilerGLSL::branch(BlockID from, uint32_t cond, BlockID true_block, BlockID false_block)
  12071. {
  12072. auto &from_block = get<SPIRBlock>(from);
  12073. BlockID merge_block = from_block.merge == SPIRBlock::MergeSelection ? from_block.next_block : BlockID(0);
  12074. // If we branch directly to our selection merge target, we don't need a code path.
  12075. bool true_block_needs_code = true_block != merge_block || flush_phi_required(from, true_block);
  12076. bool false_block_needs_code = false_block != merge_block || flush_phi_required(from, false_block);
  12077. if (!true_block_needs_code && !false_block_needs_code)
  12078. return;
  12079. emit_block_hints(get<SPIRBlock>(from));
  12080. if (true_block_needs_code)
  12081. {
  12082. statement("if (", to_expression(cond), ")");
  12083. begin_scope();
  12084. branch(from, true_block);
  12085. end_scope();
  12086. if (false_block_needs_code)
  12087. {
  12088. statement("else");
  12089. begin_scope();
  12090. branch(from, false_block);
  12091. end_scope();
  12092. }
  12093. }
  12094. else if (false_block_needs_code)
  12095. {
  12096. // Only need false path, use negative conditional.
  12097. statement("if (!", to_enclosed_expression(cond), ")");
  12098. begin_scope();
  12099. branch(from, false_block);
  12100. end_scope();
  12101. }
  12102. }
  12103. // FIXME: This currently cannot handle complex continue blocks
  12104. // as in do-while.
  12105. // This should be seen as a "trivial" continue block.
  12106. string CompilerGLSL::emit_continue_block(uint32_t continue_block, bool follow_true_block, bool follow_false_block)
  12107. {
  12108. auto *block = &get<SPIRBlock>(continue_block);
  12109. // While emitting the continue block, declare_temporary will check this
  12110. // if we have to emit temporaries.
  12111. current_continue_block = block;
  12112. SmallVector<string> statements;
  12113. // Capture all statements into our list.
  12114. auto *old = redirect_statement;
  12115. redirect_statement = &statements;
  12116. // Stamp out all blocks one after each other.
  12117. while ((ir.block_meta[block->self] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) == 0)
  12118. {
  12119. // Write out all instructions we have in this block.
  12120. emit_block_instructions(*block);
  12121. // For plain branchless for/while continue blocks.
  12122. if (block->next_block)
  12123. {
  12124. flush_phi(continue_block, block->next_block);
  12125. block = &get<SPIRBlock>(block->next_block);
  12126. }
  12127. // For do while blocks. The last block will be a select block.
  12128. else if (block->true_block && follow_true_block)
  12129. {
  12130. flush_phi(continue_block, block->true_block);
  12131. block = &get<SPIRBlock>(block->true_block);
  12132. }
  12133. else if (block->false_block && follow_false_block)
  12134. {
  12135. flush_phi(continue_block, block->false_block);
  12136. block = &get<SPIRBlock>(block->false_block);
  12137. }
  12138. else
  12139. {
  12140. SPIRV_CROSS_THROW("Invalid continue block detected!");
  12141. }
  12142. }
  12143. // Restore old pointer.
  12144. redirect_statement = old;
  12145. // Somewhat ugly, strip off the last ';' since we use ',' instead.
  12146. // Ideally, we should select this behavior in statement().
  12147. for (auto &s : statements)
  12148. {
  12149. if (!s.empty() && s.back() == ';')
  12150. s.erase(s.size() - 1, 1);
  12151. }
  12152. current_continue_block = nullptr;
  12153. return merge(statements);
  12154. }
  12155. void CompilerGLSL::emit_while_loop_initializers(const SPIRBlock &block)
  12156. {
  12157. // While loops do not take initializers, so declare all of them outside.
  12158. for (auto &loop_var : block.loop_variables)
  12159. {
  12160. auto &var = get<SPIRVariable>(loop_var);
  12161. statement(variable_decl(var), ";");
  12162. }
  12163. }
  12164. string CompilerGLSL::emit_for_loop_initializers(const SPIRBlock &block)
  12165. {
  12166. if (block.loop_variables.empty())
  12167. return "";
  12168. bool same_types = for_loop_initializers_are_same_type(block);
  12169. // We can only declare for loop initializers if all variables are of same type.
  12170. // If we cannot do this, declare individual variables before the loop header.
  12171. // We might have a loop variable candidate which was not assigned to for some reason.
  12172. uint32_t missing_initializers = 0;
  12173. for (auto &variable : block.loop_variables)
  12174. {
  12175. uint32_t expr = get<SPIRVariable>(variable).static_expression;
  12176. // Sometimes loop variables are initialized with OpUndef, but we can just declare
  12177. // a plain variable without initializer in this case.
  12178. if (expr == 0 || ir.ids[expr].get_type() == TypeUndef)
  12179. missing_initializers++;
  12180. }
  12181. if (block.loop_variables.size() == 1 && missing_initializers == 0)
  12182. {
  12183. return variable_decl(get<SPIRVariable>(block.loop_variables.front()));
  12184. }
  12185. else if (!same_types || missing_initializers == uint32_t(block.loop_variables.size()))
  12186. {
  12187. for (auto &loop_var : block.loop_variables)
  12188. statement(variable_decl(get<SPIRVariable>(loop_var)), ";");
  12189. return "";
  12190. }
  12191. else
  12192. {
  12193. // We have a mix of loop variables, either ones with a clear initializer, or ones without.
  12194. // Separate the two streams.
  12195. string expr;
  12196. for (auto &loop_var : block.loop_variables)
  12197. {
  12198. uint32_t static_expr = get<SPIRVariable>(loop_var).static_expression;
  12199. if (static_expr == 0 || ir.ids[static_expr].get_type() == TypeUndef)
  12200. {
  12201. statement(variable_decl(get<SPIRVariable>(loop_var)), ";");
  12202. }
  12203. else
  12204. {
  12205. auto &var = get<SPIRVariable>(loop_var);
  12206. auto &type = get_variable_data_type(var);
  12207. if (expr.empty())
  12208. {
  12209. // For loop initializers are of the form <type id = value, id = value, id = value, etc ...
  12210. expr = join(to_qualifiers_glsl(var.self), type_to_glsl(type), " ");
  12211. }
  12212. else
  12213. {
  12214. expr += ", ";
  12215. // In MSL, being based on C++, the asterisk marking a pointer
  12216. // binds to the identifier, not the type.
  12217. if (type.pointer)
  12218. expr += "* ";
  12219. }
  12220. expr += join(to_name(loop_var), " = ", to_pointer_expression(var.static_expression));
  12221. }
  12222. }
  12223. return expr;
  12224. }
  12225. }
  12226. bool CompilerGLSL::for_loop_initializers_are_same_type(const SPIRBlock &block)
  12227. {
  12228. if (block.loop_variables.size() <= 1)
  12229. return true;
  12230. uint32_t expected = 0;
  12231. Bitset expected_flags;
  12232. for (auto &var : block.loop_variables)
  12233. {
  12234. // Don't care about uninitialized variables as they will not be part of the initializers.
  12235. uint32_t expr = get<SPIRVariable>(var).static_expression;
  12236. if (expr == 0 || ir.ids[expr].get_type() == TypeUndef)
  12237. continue;
  12238. if (expected == 0)
  12239. {
  12240. expected = get<SPIRVariable>(var).basetype;
  12241. expected_flags = get_decoration_bitset(var);
  12242. }
  12243. else if (expected != get<SPIRVariable>(var).basetype)
  12244. return false;
  12245. // Precision flags and things like that must also match.
  12246. if (expected_flags != get_decoration_bitset(var))
  12247. return false;
  12248. }
  12249. return true;
  12250. }
  12251. bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method)
  12252. {
  12253. SPIRBlock::ContinueBlockType continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
  12254. if (method == SPIRBlock::MergeToSelectForLoop || method == SPIRBlock::MergeToSelectContinueForLoop)
  12255. {
  12256. uint32_t current_count = statement_count;
  12257. // If we're trying to create a true for loop,
  12258. // we need to make sure that all opcodes before branch statement do not actually emit any code.
  12259. // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
  12260. emit_block_instructions(block);
  12261. bool condition_is_temporary = forced_temporaries.find(block.condition) == end(forced_temporaries);
  12262. // This can work! We only did trivial things which could be forwarded in block body!
  12263. if (current_count == statement_count && condition_is_temporary)
  12264. {
  12265. switch (continue_type)
  12266. {
  12267. case SPIRBlock::ForLoop:
  12268. {
  12269. // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
  12270. flush_undeclared_variables(block);
  12271. // Important that we do this in this order because
  12272. // emitting the continue block can invalidate the condition expression.
  12273. auto initializer = emit_for_loop_initializers(block);
  12274. auto condition = to_expression(block.condition);
  12275. // Condition might have to be inverted.
  12276. if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
  12277. condition = join("!", enclose_expression(condition));
  12278. emit_block_hints(block);
  12279. if (method != SPIRBlock::MergeToSelectContinueForLoop)
  12280. {
  12281. auto continue_block = emit_continue_block(block.continue_block, false, false);
  12282. statement("for (", initializer, "; ", condition, "; ", continue_block, ")");
  12283. }
  12284. else
  12285. statement("for (", initializer, "; ", condition, "; )");
  12286. break;
  12287. }
  12288. case SPIRBlock::WhileLoop:
  12289. {
  12290. // This block may be a dominating block, so make sure we flush undeclared variables before building the while loop header.
  12291. flush_undeclared_variables(block);
  12292. emit_while_loop_initializers(block);
  12293. emit_block_hints(block);
  12294. auto condition = to_expression(block.condition);
  12295. // Condition might have to be inverted.
  12296. if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
  12297. condition = join("!", enclose_expression(condition));
  12298. statement("while (", condition, ")");
  12299. break;
  12300. }
  12301. default:
  12302. block.disable_block_optimization = true;
  12303. force_recompile();
  12304. begin_scope(); // We'll see an end_scope() later.
  12305. return false;
  12306. }
  12307. begin_scope();
  12308. return true;
  12309. }
  12310. else
  12311. {
  12312. block.disable_block_optimization = true;
  12313. force_recompile();
  12314. begin_scope(); // We'll see an end_scope() later.
  12315. return false;
  12316. }
  12317. }
  12318. else if (method == SPIRBlock::MergeToDirectForLoop)
  12319. {
  12320. auto &child = get<SPIRBlock>(block.next_block);
  12321. // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
  12322. flush_undeclared_variables(child);
  12323. uint32_t current_count = statement_count;
  12324. // If we're trying to create a true for loop,
  12325. // we need to make sure that all opcodes before branch statement do not actually emit any code.
  12326. // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
  12327. emit_block_instructions(child);
  12328. bool condition_is_temporary = forced_temporaries.find(child.condition) == end(forced_temporaries);
  12329. if (current_count == statement_count && condition_is_temporary)
  12330. {
  12331. uint32_t target_block = child.true_block;
  12332. switch (continue_type)
  12333. {
  12334. case SPIRBlock::ForLoop:
  12335. {
  12336. // Important that we do this in this order because
  12337. // emitting the continue block can invalidate the condition expression.
  12338. auto initializer = emit_for_loop_initializers(block);
  12339. auto condition = to_expression(child.condition);
  12340. // Condition might have to be inverted.
  12341. if (execution_is_noop(get<SPIRBlock>(child.true_block), get<SPIRBlock>(block.merge_block)))
  12342. {
  12343. condition = join("!", enclose_expression(condition));
  12344. target_block = child.false_block;
  12345. }
  12346. auto continue_block = emit_continue_block(block.continue_block, false, false);
  12347. emit_block_hints(block);
  12348. statement("for (", initializer, "; ", condition, "; ", continue_block, ")");
  12349. break;
  12350. }
  12351. case SPIRBlock::WhileLoop:
  12352. {
  12353. emit_while_loop_initializers(block);
  12354. emit_block_hints(block);
  12355. auto condition = to_expression(child.condition);
  12356. // Condition might have to be inverted.
  12357. if (execution_is_noop(get<SPIRBlock>(child.true_block), get<SPIRBlock>(block.merge_block)))
  12358. {
  12359. condition = join("!", enclose_expression(condition));
  12360. target_block = child.false_block;
  12361. }
  12362. statement("while (", condition, ")");
  12363. break;
  12364. }
  12365. default:
  12366. block.disable_block_optimization = true;
  12367. force_recompile();
  12368. begin_scope(); // We'll see an end_scope() later.
  12369. return false;
  12370. }
  12371. begin_scope();
  12372. branch(child.self, target_block);
  12373. return true;
  12374. }
  12375. else
  12376. {
  12377. block.disable_block_optimization = true;
  12378. force_recompile();
  12379. begin_scope(); // We'll see an end_scope() later.
  12380. return false;
  12381. }
  12382. }
  12383. else
  12384. return false;
  12385. }
  12386. void CompilerGLSL::flush_undeclared_variables(SPIRBlock &block)
  12387. {
  12388. for (auto &v : block.dominated_variables)
  12389. flush_variable_declaration(v);
  12390. }
  12391. void CompilerGLSL::emit_hoisted_temporaries(SmallVector<pair<TypeID, ID>> &temporaries)
  12392. {
  12393. // If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header.
  12394. // Need to sort these to ensure that reference output is stable.
  12395. sort(begin(temporaries), end(temporaries),
  12396. [](const pair<TypeID, ID> &a, const pair<TypeID, ID> &b) { return a.second < b.second; });
  12397. for (auto &tmp : temporaries)
  12398. {
  12399. add_local_variable_name(tmp.second);
  12400. auto &flags = ir.meta[tmp.second].decoration.decoration_flags;
  12401. auto &type = get<SPIRType>(tmp.first);
  12402. // Not all targets support pointer literals, so don't bother with that case.
  12403. string initializer;
  12404. if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
  12405. initializer = join(" = ", to_zero_initialized_expression(tmp.first));
  12406. statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(tmp.second)), initializer, ";");
  12407. hoisted_temporaries.insert(tmp.second);
  12408. forced_temporaries.insert(tmp.second);
  12409. // The temporary might be read from before it's assigned, set up the expression now.
  12410. set<SPIRExpression>(tmp.second, to_name(tmp.second), tmp.first, true);
  12411. }
  12412. }
  12413. void CompilerGLSL::emit_block_chain(SPIRBlock &block)
  12414. {
  12415. bool select_branch_to_true_block = false;
  12416. bool select_branch_to_false_block = false;
  12417. bool skip_direct_branch = false;
  12418. bool emitted_loop_header_variables = false;
  12419. bool force_complex_continue_block = false;
  12420. ValueSaver<uint32_t> loop_level_saver(current_loop_level);
  12421. if (block.merge == SPIRBlock::MergeLoop)
  12422. add_loop_level();
  12423. emit_hoisted_temporaries(block.declare_temporary);
  12424. SPIRBlock::ContinueBlockType continue_type = SPIRBlock::ContinueNone;
  12425. if (block.continue_block)
  12426. {
  12427. continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
  12428. // If we know we cannot emit a loop, mark the block early as a complex loop so we don't force unnecessary recompiles.
  12429. if (continue_type == SPIRBlock::ComplexLoop)
  12430. block.complex_continue = true;
  12431. }
  12432. // If we have loop variables, stop masking out access to the variable now.
  12433. for (auto var_id : block.loop_variables)
  12434. {
  12435. auto &var = get<SPIRVariable>(var_id);
  12436. var.loop_variable_enable = true;
  12437. // We're not going to declare the variable directly, so emit a copy here.
  12438. emit_variable_temporary_copies(var);
  12439. }
  12440. // Remember deferred declaration state. We will restore it before returning.
  12441. SmallVector<bool, 64> rearm_dominated_variables(block.dominated_variables.size());
  12442. for (size_t i = 0; i < block.dominated_variables.size(); i++)
  12443. {
  12444. uint32_t var_id = block.dominated_variables[i];
  12445. auto &var = get<SPIRVariable>(var_id);
  12446. rearm_dominated_variables[i] = var.deferred_declaration;
  12447. }
  12448. // This is the method often used by spirv-opt to implement loops.
  12449. // The loop header goes straight into the continue block.
  12450. // However, don't attempt this on ESSL 1.0, because if a loop variable is used in a continue block,
  12451. // it *MUST* be used in the continue block. This loop method will not work.
  12452. if (!is_legacy_es() && block_is_loop_candidate(block, SPIRBlock::MergeToSelectContinueForLoop))
  12453. {
  12454. flush_undeclared_variables(block);
  12455. if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectContinueForLoop))
  12456. {
  12457. if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
  12458. select_branch_to_false_block = true;
  12459. else
  12460. select_branch_to_true_block = true;
  12461. emitted_loop_header_variables = true;
  12462. force_complex_continue_block = true;
  12463. }
  12464. }
  12465. // This is the older loop behavior in glslang which branches to loop body directly from the loop header.
  12466. else if (block_is_loop_candidate(block, SPIRBlock::MergeToSelectForLoop))
  12467. {
  12468. flush_undeclared_variables(block);
  12469. if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectForLoop))
  12470. {
  12471. // The body of while, is actually just the true (or false) block, so always branch there unconditionally.
  12472. if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
  12473. select_branch_to_false_block = true;
  12474. else
  12475. select_branch_to_true_block = true;
  12476. emitted_loop_header_variables = true;
  12477. }
  12478. }
  12479. // This is the newer loop behavior in glslang which branches from Loop header directly to
  12480. // a new block, which in turn has a OpBranchSelection without a selection merge.
  12481. else if (block_is_loop_candidate(block, SPIRBlock::MergeToDirectForLoop))
  12482. {
  12483. flush_undeclared_variables(block);
  12484. if (attempt_emit_loop_header(block, SPIRBlock::MergeToDirectForLoop))
  12485. {
  12486. skip_direct_branch = true;
  12487. emitted_loop_header_variables = true;
  12488. }
  12489. }
  12490. else if (continue_type == SPIRBlock::DoWhileLoop)
  12491. {
  12492. flush_undeclared_variables(block);
  12493. emit_while_loop_initializers(block);
  12494. emitted_loop_header_variables = true;
  12495. // We have some temporaries where the loop header is the dominator.
  12496. // We risk a case where we have code like:
  12497. // for (;;) { create-temporary; break; } consume-temporary;
  12498. // so force-declare temporaries here.
  12499. emit_hoisted_temporaries(block.potential_declare_temporary);
  12500. statement("do");
  12501. begin_scope();
  12502. emit_block_instructions(block);
  12503. }
  12504. else if (block.merge == SPIRBlock::MergeLoop)
  12505. {
  12506. flush_undeclared_variables(block);
  12507. emit_while_loop_initializers(block);
  12508. emitted_loop_header_variables = true;
  12509. // We have a generic loop without any distinguishable pattern like for, while or do while.
  12510. get<SPIRBlock>(block.continue_block).complex_continue = true;
  12511. continue_type = SPIRBlock::ComplexLoop;
  12512. // We have some temporaries where the loop header is the dominator.
  12513. // We risk a case where we have code like:
  12514. // for (;;) { create-temporary; break; } consume-temporary;
  12515. // so force-declare temporaries here.
  12516. emit_hoisted_temporaries(block.potential_declare_temporary);
  12517. statement("for (;;)");
  12518. begin_scope();
  12519. emit_block_instructions(block);
  12520. }
  12521. else
  12522. {
  12523. emit_block_instructions(block);
  12524. }
  12525. // If we didn't successfully emit a loop header and we had loop variable candidates, we have a problem
  12526. // as writes to said loop variables might have been masked out, we need a recompile.
  12527. if (!emitted_loop_header_variables && !block.loop_variables.empty())
  12528. {
  12529. force_recompile();
  12530. for (auto var : block.loop_variables)
  12531. get<SPIRVariable>(var).loop_variable = false;
  12532. block.loop_variables.clear();
  12533. }
  12534. flush_undeclared_variables(block);
  12535. bool emit_next_block = true;
  12536. // Handle end of block.
  12537. switch (block.terminator)
  12538. {
  12539. case SPIRBlock::Direct:
  12540. // True when emitting complex continue block.
  12541. if (block.loop_dominator == block.next_block)
  12542. {
  12543. branch(block.self, block.next_block);
  12544. emit_next_block = false;
  12545. }
  12546. // True if MergeToDirectForLoop succeeded.
  12547. else if (skip_direct_branch)
  12548. emit_next_block = false;
  12549. else if (is_continue(block.next_block) || is_break(block.next_block) || is_conditional(block.next_block))
  12550. {
  12551. branch(block.self, block.next_block);
  12552. emit_next_block = false;
  12553. }
  12554. break;
  12555. case SPIRBlock::Select:
  12556. // True if MergeToSelectForLoop or MergeToSelectContinueForLoop succeeded.
  12557. if (select_branch_to_true_block)
  12558. {
  12559. if (force_complex_continue_block)
  12560. {
  12561. assert(block.true_block == block.continue_block);
  12562. // We're going to emit a continue block directly here, so make sure it's marked as complex.
  12563. auto &complex_continue = get<SPIRBlock>(block.continue_block).complex_continue;
  12564. bool old_complex = complex_continue;
  12565. complex_continue = true;
  12566. branch(block.self, block.true_block);
  12567. complex_continue = old_complex;
  12568. }
  12569. else
  12570. branch(block.self, block.true_block);
  12571. }
  12572. else if (select_branch_to_false_block)
  12573. {
  12574. if (force_complex_continue_block)
  12575. {
  12576. assert(block.false_block == block.continue_block);
  12577. // We're going to emit a continue block directly here, so make sure it's marked as complex.
  12578. auto &complex_continue = get<SPIRBlock>(block.continue_block).complex_continue;
  12579. bool old_complex = complex_continue;
  12580. complex_continue = true;
  12581. branch(block.self, block.false_block);
  12582. complex_continue = old_complex;
  12583. }
  12584. else
  12585. branch(block.self, block.false_block);
  12586. }
  12587. else
  12588. branch(block.self, block.condition, block.true_block, block.false_block);
  12589. break;
  12590. case SPIRBlock::MultiSelect:
  12591. {
  12592. auto &type = expression_type(block.condition);
  12593. bool unsigned_case =
  12594. type.basetype == SPIRType::UInt || type.basetype == SPIRType::UShort || type.basetype == SPIRType::UByte;
  12595. if (block.merge == SPIRBlock::MergeNone)
  12596. SPIRV_CROSS_THROW("Switch statement is not structured");
  12597. if (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64)
  12598. {
  12599. // SPIR-V spec suggests this is allowed, but we cannot support it in higher level languages.
  12600. SPIRV_CROSS_THROW("Cannot use 64-bit switch selectors.");
  12601. }
  12602. const char *label_suffix = "";
  12603. if (type.basetype == SPIRType::UInt && backend.uint32_t_literal_suffix)
  12604. label_suffix = "u";
  12605. else if (type.basetype == SPIRType::UShort)
  12606. label_suffix = backend.uint16_t_literal_suffix;
  12607. else if (type.basetype == SPIRType::Short)
  12608. label_suffix = backend.int16_t_literal_suffix;
  12609. SPIRBlock *old_emitting_switch = current_emitting_switch;
  12610. current_emitting_switch = &block;
  12611. if (block.need_ladder_break)
  12612. statement("bool _", block.self, "_ladder_break = false;");
  12613. // Find all unique case constructs.
  12614. unordered_map<uint32_t, SmallVector<uint32_t>> case_constructs;
  12615. SmallVector<uint32_t> block_declaration_order;
  12616. SmallVector<uint32_t> literals_to_merge;
  12617. // If a switch case branches to the default block for some reason, we can just remove that literal from consideration
  12618. // and let the default: block handle it.
  12619. // 2.11 in SPIR-V spec states that for fall-through cases, there is a very strict declaration order which we can take advantage of here.
  12620. // We only need to consider possible fallthrough if order[i] branches to order[i + 1].
  12621. for (auto &c : block.cases)
  12622. {
  12623. if (c.block != block.next_block && c.block != block.default_block)
  12624. {
  12625. if (!case_constructs.count(c.block))
  12626. block_declaration_order.push_back(c.block);
  12627. case_constructs[c.block].push_back(c.value);
  12628. }
  12629. else if (c.block == block.next_block && block.default_block != block.next_block)
  12630. {
  12631. // We might have to flush phi inside specific case labels.
  12632. // If we can piggyback on default:, do so instead.
  12633. literals_to_merge.push_back(c.value);
  12634. }
  12635. }
  12636. // Empty literal array -> default.
  12637. if (block.default_block != block.next_block)
  12638. {
  12639. auto &default_block = get<SPIRBlock>(block.default_block);
  12640. // We need to slide in the default block somewhere in this chain
  12641. // if there are fall-through scenarios since the default is declared separately in OpSwitch.
  12642. // Only consider trivial fall-through cases here.
  12643. size_t num_blocks = block_declaration_order.size();
  12644. bool injected_block = false;
  12645. for (size_t i = 0; i < num_blocks; i++)
  12646. {
  12647. auto &case_block = get<SPIRBlock>(block_declaration_order[i]);
  12648. if (execution_is_direct_branch(case_block, default_block))
  12649. {
  12650. // Fallthrough to default block, we must inject the default block here.
  12651. block_declaration_order.insert(begin(block_declaration_order) + i + 1, block.default_block);
  12652. injected_block = true;
  12653. break;
  12654. }
  12655. else if (execution_is_direct_branch(default_block, case_block))
  12656. {
  12657. // Default case is falling through to another case label, we must inject the default block here.
  12658. block_declaration_order.insert(begin(block_declaration_order) + i, block.default_block);
  12659. injected_block = true;
  12660. break;
  12661. }
  12662. }
  12663. // Order does not matter.
  12664. if (!injected_block)
  12665. block_declaration_order.push_back(block.default_block);
  12666. else if (is_legacy_es())
  12667. SPIRV_CROSS_THROW("Default case label fallthrough to other case label is not supported in ESSL 1.0.");
  12668. case_constructs[block.default_block] = {};
  12669. }
  12670. size_t num_blocks = block_declaration_order.size();
  12671. const auto to_case_label = [](uint32_t literal, bool is_unsigned_case) -> string {
  12672. return is_unsigned_case ? convert_to_string(literal) : convert_to_string(int32_t(literal));
  12673. };
  12674. const auto to_legacy_case_label = [&](uint32_t condition, const SmallVector<uint32_t> &labels,
  12675. const char *suffix) -> string {
  12676. string ret;
  12677. size_t count = labels.size();
  12678. for (size_t i = 0; i < count; i++)
  12679. {
  12680. if (i)
  12681. ret += " || ";
  12682. ret += join(count > 1 ? "(" : "", to_enclosed_expression(condition), " == ", labels[i], suffix,
  12683. count > 1 ? ")" : "");
  12684. }
  12685. return ret;
  12686. };
  12687. // We need to deal with a complex scenario for OpPhi. If we have case-fallthrough and Phi in the picture,
  12688. // we need to flush phi nodes outside the switch block in a branch,
  12689. // and skip any Phi handling inside the case label to make fall-through work as expected.
  12690. // This kind of code-gen is super awkward and it's a last resort. Normally we would want to handle this
  12691. // inside the case label if at all possible.
  12692. for (size_t i = 1; backend.support_case_fallthrough && i < num_blocks; i++)
  12693. {
  12694. if (flush_phi_required(block.self, block_declaration_order[i]) &&
  12695. flush_phi_required(block_declaration_order[i - 1], block_declaration_order[i]))
  12696. {
  12697. uint32_t target_block = block_declaration_order[i];
  12698. // Make sure we flush Phi, it might have been marked to be ignored earlier.
  12699. get<SPIRBlock>(target_block).ignore_phi_from_block = 0;
  12700. auto &literals = case_constructs[target_block];
  12701. if (literals.empty())
  12702. {
  12703. // Oh boy, gotta make a complete negative test instead! o.o
  12704. // Find all possible literals that would *not* make us enter the default block.
  12705. // If none of those literals match, we flush Phi ...
  12706. SmallVector<string> conditions;
  12707. for (size_t j = 0; j < num_blocks; j++)
  12708. {
  12709. auto &negative_literals = case_constructs[block_declaration_order[j]];
  12710. for (auto &case_label : negative_literals)
  12711. conditions.push_back(join(to_enclosed_expression(block.condition),
  12712. " != ", to_case_label(case_label, unsigned_case)));
  12713. }
  12714. statement("if (", merge(conditions, " && "), ")");
  12715. begin_scope();
  12716. flush_phi(block.self, target_block);
  12717. end_scope();
  12718. }
  12719. else
  12720. {
  12721. SmallVector<string> conditions;
  12722. conditions.reserve(literals.size());
  12723. for (auto &case_label : literals)
  12724. conditions.push_back(join(to_enclosed_expression(block.condition),
  12725. " == ", to_case_label(case_label, unsigned_case)));
  12726. statement("if (", merge(conditions, " || "), ")");
  12727. begin_scope();
  12728. flush_phi(block.self, target_block);
  12729. end_scope();
  12730. }
  12731. // Mark the block so that we don't flush Phi from header to case label.
  12732. get<SPIRBlock>(target_block).ignore_phi_from_block = block.self;
  12733. }
  12734. }
  12735. // If there is only one default block, and no cases, this is a case where SPIRV-opt decided to emulate
  12736. // non-structured exits with the help of a switch block.
  12737. // This is buggy on FXC, so just emit the logical equivalent of a do { } while(false), which is more idiomatic.
  12738. bool degenerate_switch = block.default_block != block.merge_block && block.cases.empty();
  12739. if (degenerate_switch || is_legacy_es())
  12740. {
  12741. // ESSL 1.0 is not guaranteed to support do/while.
  12742. if (is_legacy_es())
  12743. {
  12744. uint32_t counter = statement_count;
  12745. statement("for (int spvDummy", counter, " = 0; spvDummy", counter,
  12746. " < 1; spvDummy", counter, "++)");
  12747. }
  12748. else
  12749. statement("do");
  12750. }
  12751. else
  12752. {
  12753. emit_block_hints(block);
  12754. statement("switch (", to_expression(block.condition), ")");
  12755. }
  12756. begin_scope();
  12757. for (size_t i = 0; i < num_blocks; i++)
  12758. {
  12759. uint32_t target_block = block_declaration_order[i];
  12760. auto &literals = case_constructs[target_block];
  12761. if (literals.empty())
  12762. {
  12763. // Default case.
  12764. if (!degenerate_switch)
  12765. {
  12766. if (is_legacy_es())
  12767. statement("else");
  12768. else
  12769. statement("default:");
  12770. }
  12771. }
  12772. else
  12773. {
  12774. if (is_legacy_es())
  12775. {
  12776. statement((i ? "else " : ""), "if (", to_legacy_case_label(block.condition, literals, label_suffix),
  12777. ")");
  12778. }
  12779. else
  12780. {
  12781. for (auto &case_literal : literals)
  12782. {
  12783. // The case label value must be sign-extended properly in SPIR-V, so we can assume 32-bit values here.
  12784. statement("case ", to_case_label(case_literal, unsigned_case), label_suffix, ":");
  12785. }
  12786. }
  12787. }
  12788. auto &case_block = get<SPIRBlock>(target_block);
  12789. if (backend.support_case_fallthrough && i + 1 < num_blocks &&
  12790. execution_is_direct_branch(case_block, get<SPIRBlock>(block_declaration_order[i + 1])))
  12791. {
  12792. // We will fall through here, so just terminate the block chain early.
  12793. // We still need to deal with Phi potentially.
  12794. // No need for a stack-like thing here since we only do fall-through when there is a
  12795. // single trivial branch to fall-through target..
  12796. current_emitting_switch_fallthrough = true;
  12797. }
  12798. else
  12799. current_emitting_switch_fallthrough = false;
  12800. if (!degenerate_switch)
  12801. begin_scope();
  12802. branch(block.self, target_block);
  12803. if (!degenerate_switch)
  12804. end_scope();
  12805. current_emitting_switch_fallthrough = false;
  12806. }
  12807. // Might still have to flush phi variables if we branch from loop header directly to merge target.
  12808. if (flush_phi_required(block.self, block.next_block))
  12809. {
  12810. if (block.default_block == block.next_block || !literals_to_merge.empty())
  12811. {
  12812. for (auto &case_literal : literals_to_merge)
  12813. statement("case ", to_case_label(case_literal, unsigned_case), label_suffix, ":");
  12814. if (block.default_block == block.next_block)
  12815. {
  12816. if (is_legacy_es())
  12817. statement("else");
  12818. else
  12819. statement("default:");
  12820. }
  12821. begin_scope();
  12822. flush_phi(block.self, block.next_block);
  12823. statement("break;");
  12824. end_scope();
  12825. }
  12826. }
  12827. if (degenerate_switch && !is_legacy_es())
  12828. end_scope_decl("while(false)");
  12829. else
  12830. end_scope();
  12831. if (block.need_ladder_break)
  12832. {
  12833. statement("if (_", block.self, "_ladder_break)");
  12834. begin_scope();
  12835. statement("break;");
  12836. end_scope();
  12837. }
  12838. current_emitting_switch = old_emitting_switch;
  12839. break;
  12840. }
  12841. case SPIRBlock::Return:
  12842. {
  12843. for (auto &line : current_function->fixup_hooks_out)
  12844. line();
  12845. if (processing_entry_point)
  12846. emit_fixup();
  12847. auto &cfg = get_cfg_for_current_function();
  12848. if (block.return_value)
  12849. {
  12850. auto &type = expression_type(block.return_value);
  12851. if (!type.array.empty() && !backend.can_return_array)
  12852. {
  12853. // If we cannot return arrays, we will have a special out argument we can write to instead.
  12854. // The backend is responsible for setting this up, and redirection the return values as appropriate.
  12855. if (ir.ids[block.return_value].get_type() != TypeUndef)
  12856. {
  12857. emit_array_copy("spvReturnValue", block.return_value, StorageClassFunction,
  12858. get_expression_effective_storage_class(block.return_value));
  12859. }
  12860. if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) ||
  12861. block.loop_dominator != BlockID(SPIRBlock::NoDominator))
  12862. {
  12863. statement("return;");
  12864. }
  12865. }
  12866. else
  12867. {
  12868. // OpReturnValue can return Undef, so don't emit anything for this case.
  12869. if (ir.ids[block.return_value].get_type() != TypeUndef)
  12870. statement("return ", to_expression(block.return_value), ";");
  12871. }
  12872. }
  12873. else if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) ||
  12874. block.loop_dominator != BlockID(SPIRBlock::NoDominator))
  12875. {
  12876. // If this block is the very final block and not called from control flow,
  12877. // we do not need an explicit return which looks out of place. Just end the function here.
  12878. // In the very weird case of for(;;) { return; } executing return is unconditional,
  12879. // but we actually need a return here ...
  12880. statement("return;");
  12881. }
  12882. break;
  12883. }
  12884. case SPIRBlock::Kill:
  12885. statement(backend.discard_literal, ";");
  12886. break;
  12887. case SPIRBlock::Unreachable:
  12888. emit_next_block = false;
  12889. break;
  12890. case SPIRBlock::IgnoreIntersection:
  12891. statement("ignoreIntersectionEXT;");
  12892. break;
  12893. case SPIRBlock::TerminateRay:
  12894. statement("terminateRayEXT;");
  12895. break;
  12896. default:
  12897. SPIRV_CROSS_THROW("Unimplemented block terminator.");
  12898. }
  12899. if (block.next_block && emit_next_block)
  12900. {
  12901. // If we hit this case, we're dealing with an unconditional branch, which means we will output
  12902. // that block after this. If we had selection merge, we already flushed phi variables.
  12903. if (block.merge != SPIRBlock::MergeSelection)
  12904. {
  12905. flush_phi(block.self, block.next_block);
  12906. // For a direct branch, need to remember to invalidate expressions in the next linear block instead.
  12907. get<SPIRBlock>(block.next_block).invalidate_expressions = block.invalidate_expressions;
  12908. }
  12909. // For switch fallthrough cases, we terminate the chain here, but we still need to handle Phi.
  12910. if (!current_emitting_switch_fallthrough)
  12911. {
  12912. // For merge selects we might have ignored the fact that a merge target
  12913. // could have been a break; or continue;
  12914. // We will need to deal with it here.
  12915. if (is_loop_break(block.next_block))
  12916. {
  12917. // Cannot check for just break, because switch statements will also use break.
  12918. assert(block.merge == SPIRBlock::MergeSelection);
  12919. statement("break;");
  12920. }
  12921. else if (is_continue(block.next_block))
  12922. {
  12923. assert(block.merge == SPIRBlock::MergeSelection);
  12924. branch_to_continue(block.self, block.next_block);
  12925. }
  12926. else if (BlockID(block.self) != block.next_block)
  12927. emit_block_chain(get<SPIRBlock>(block.next_block));
  12928. }
  12929. }
  12930. if (block.merge == SPIRBlock::MergeLoop)
  12931. {
  12932. if (continue_type == SPIRBlock::DoWhileLoop)
  12933. {
  12934. // Make sure that we run the continue block to get the expressions set, but this
  12935. // should become an empty string.
  12936. // We have no fallbacks if we cannot forward everything to temporaries ...
  12937. const auto &continue_block = get<SPIRBlock>(block.continue_block);
  12938. bool positive_test = execution_is_noop(get<SPIRBlock>(continue_block.true_block),
  12939. get<SPIRBlock>(continue_block.loop_dominator));
  12940. uint32_t current_count = statement_count;
  12941. auto statements = emit_continue_block(block.continue_block, positive_test, !positive_test);
  12942. if (statement_count != current_count)
  12943. {
  12944. // The DoWhile block has side effects, force ComplexLoop pattern next pass.
  12945. get<SPIRBlock>(block.continue_block).complex_continue = true;
  12946. force_recompile();
  12947. }
  12948. // Might have to invert the do-while test here.
  12949. auto condition = to_expression(continue_block.condition);
  12950. if (!positive_test)
  12951. condition = join("!", enclose_expression(condition));
  12952. end_scope_decl(join("while (", condition, ")"));
  12953. }
  12954. else
  12955. end_scope();
  12956. loop_level_saver.release();
  12957. // We cannot break out of two loops at once, so don't check for break; here.
  12958. // Using block.self as the "from" block isn't quite right, but it has the same scope
  12959. // and dominance structure, so it's fine.
  12960. if (is_continue(block.merge_block))
  12961. branch_to_continue(block.self, block.merge_block);
  12962. else
  12963. emit_block_chain(get<SPIRBlock>(block.merge_block));
  12964. }
  12965. // Forget about control dependent expressions now.
  12966. block.invalidate_expressions.clear();
  12967. // After we return, we must be out of scope, so if we somehow have to re-emit this function,
  12968. // re-declare variables if necessary.
  12969. assert(rearm_dominated_variables.size() == block.dominated_variables.size());
  12970. for (size_t i = 0; i < block.dominated_variables.size(); i++)
  12971. {
  12972. uint32_t var = block.dominated_variables[i];
  12973. get<SPIRVariable>(var).deferred_declaration = rearm_dominated_variables[i];
  12974. }
  12975. // Just like for deferred declaration, we need to forget about loop variable enable
  12976. // if our block chain is reinstantiated later.
  12977. for (auto &var_id : block.loop_variables)
  12978. get<SPIRVariable>(var_id).loop_variable_enable = false;
  12979. }
  12980. void CompilerGLSL::begin_scope()
  12981. {
  12982. statement("{");
  12983. indent++;
  12984. }
  12985. void CompilerGLSL::end_scope()
  12986. {
  12987. if (!indent)
  12988. SPIRV_CROSS_THROW("Popping empty indent stack.");
  12989. indent--;
  12990. statement("}");
  12991. }
  12992. void CompilerGLSL::end_scope(const string &trailer)
  12993. {
  12994. if (!indent)
  12995. SPIRV_CROSS_THROW("Popping empty indent stack.");
  12996. indent--;
  12997. statement("}", trailer);
  12998. }
  12999. void CompilerGLSL::end_scope_decl()
  13000. {
  13001. if (!indent)
  13002. SPIRV_CROSS_THROW("Popping empty indent stack.");
  13003. indent--;
  13004. statement("};");
  13005. }
  13006. void CompilerGLSL::end_scope_decl(const string &decl)
  13007. {
  13008. if (!indent)
  13009. SPIRV_CROSS_THROW("Popping empty indent stack.");
  13010. indent--;
  13011. statement("} ", decl, ";");
  13012. }
  13013. void CompilerGLSL::check_function_call_constraints(const uint32_t *args, uint32_t length)
  13014. {
  13015. // If our variable is remapped, and we rely on type-remapping information as
  13016. // well, then we cannot pass the variable as a function parameter.
  13017. // Fixing this is non-trivial without stamping out variants of the same function,
  13018. // so for now warn about this and suggest workarounds instead.
  13019. for (uint32_t i = 0; i < length; i++)
  13020. {
  13021. auto *var = maybe_get<SPIRVariable>(args[i]);
  13022. if (!var || !var->remapped_variable)
  13023. continue;
  13024. auto &type = get<SPIRType>(var->basetype);
  13025. if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData)
  13026. {
  13027. SPIRV_CROSS_THROW("Tried passing a remapped subpassInput variable to a function. "
  13028. "This will not work correctly because type-remapping information is lost. "
  13029. "To workaround, please consider not passing the subpass input as a function parameter, "
  13030. "or use in/out variables instead which do not need type remapping information.");
  13031. }
  13032. }
  13033. }
  13034. const Instruction *CompilerGLSL::get_next_instruction_in_block(const Instruction &instr)
  13035. {
  13036. // FIXME: This is kind of hacky. There should be a cleaner way.
  13037. auto offset = uint32_t(&instr - current_emitting_block->ops.data());
  13038. if ((offset + 1) < current_emitting_block->ops.size())
  13039. return &current_emitting_block->ops[offset + 1];
  13040. else
  13041. return nullptr;
  13042. }
  13043. uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics)
  13044. {
  13045. return semantics & (MemorySemanticsAtomicCounterMemoryMask | MemorySemanticsImageMemoryMask |
  13046. MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask |
  13047. MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask);
  13048. }
  13049. void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageClass, StorageClass)
  13050. {
  13051. statement(lhs, " = ", to_expression(rhs_id), ";");
  13052. }
  13053. void CompilerGLSL::unroll_array_from_complex_load(uint32_t target_id, uint32_t source_id, std::string &expr)
  13054. {
  13055. if (!backend.force_gl_in_out_block)
  13056. return;
  13057. // This path is only relevant for GL backends.
  13058. auto *var = maybe_get<SPIRVariable>(source_id);
  13059. if (!var)
  13060. return;
  13061. if (var->storage != StorageClassInput)
  13062. return;
  13063. auto &type = get_variable_data_type(*var);
  13064. if (type.array.empty())
  13065. return;
  13066. auto builtin = BuiltIn(get_decoration(var->self, DecorationBuiltIn));
  13067. bool is_builtin = is_builtin_variable(*var) && (builtin == BuiltInPointSize || builtin == BuiltInPosition);
  13068. bool is_tess = is_tessellation_shader();
  13069. bool is_patch = has_decoration(var->self, DecorationPatch);
  13070. // Tessellation input arrays are special in that they are unsized, so we cannot directly copy from it.
  13071. // We must unroll the array load.
  13072. // For builtins, we couldn't catch this case normally,
  13073. // because this is resolved in the OpAccessChain in most cases.
  13074. // If we load the entire array, we have no choice but to unroll here.
  13075. if (!is_patch && (is_builtin || is_tess))
  13076. {
  13077. auto new_expr = join("_", target_id, "_unrolled");
  13078. statement(variable_decl(type, new_expr, target_id), ";");
  13079. string array_expr;
  13080. if (type.array_size_literal.back())
  13081. {
  13082. array_expr = convert_to_string(type.array.back());
  13083. if (type.array.back() == 0)
  13084. SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array.");
  13085. }
  13086. else
  13087. array_expr = to_expression(type.array.back());
  13088. // The array size might be a specialization constant, so use a for-loop instead.
  13089. statement("for (int i = 0; i < int(", array_expr, "); i++)");
  13090. begin_scope();
  13091. if (is_builtin)
  13092. statement(new_expr, "[i] = gl_in[i].", expr, ";");
  13093. else
  13094. statement(new_expr, "[i] = ", expr, "[i];");
  13095. end_scope();
  13096. expr = move(new_expr);
  13097. }
  13098. }
  13099. void CompilerGLSL::cast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type)
  13100. {
  13101. auto *var = maybe_get_backing_variable(source_id);
  13102. if (var)
  13103. source_id = var->self;
  13104. // Only interested in standalone builtin variables.
  13105. if (!has_decoration(source_id, DecorationBuiltIn))
  13106. return;
  13107. auto builtin = static_cast<BuiltIn>(get_decoration(source_id, DecorationBuiltIn));
  13108. auto expected_type = expr_type.basetype;
  13109. // TODO: Fill in for more builtins.
  13110. switch (builtin)
  13111. {
  13112. case BuiltInLayer:
  13113. case BuiltInPrimitiveId:
  13114. case BuiltInViewportIndex:
  13115. case BuiltInInstanceId:
  13116. case BuiltInInstanceIndex:
  13117. case BuiltInVertexId:
  13118. case BuiltInVertexIndex:
  13119. case BuiltInSampleId:
  13120. case BuiltInBaseVertex:
  13121. case BuiltInBaseInstance:
  13122. case BuiltInDrawIndex:
  13123. case BuiltInFragStencilRefEXT:
  13124. case BuiltInInstanceCustomIndexNV:
  13125. expected_type = SPIRType::Int;
  13126. break;
  13127. case BuiltInGlobalInvocationId:
  13128. case BuiltInLocalInvocationId:
  13129. case BuiltInWorkgroupId:
  13130. case BuiltInLocalInvocationIndex:
  13131. case BuiltInWorkgroupSize:
  13132. case BuiltInNumWorkgroups:
  13133. case BuiltInIncomingRayFlagsNV:
  13134. case BuiltInLaunchIdNV:
  13135. case BuiltInLaunchSizeNV:
  13136. expected_type = SPIRType::UInt;
  13137. break;
  13138. default:
  13139. break;
  13140. }
  13141. if (expected_type != expr_type.basetype)
  13142. expr = bitcast_expression(expr_type, expected_type, expr);
  13143. }
  13144. void CompilerGLSL::cast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type)
  13145. {
  13146. // Only interested in standalone builtin variables.
  13147. if (!has_decoration(target_id, DecorationBuiltIn))
  13148. return;
  13149. auto builtin = static_cast<BuiltIn>(get_decoration(target_id, DecorationBuiltIn));
  13150. auto expected_type = expr_type.basetype;
  13151. // TODO: Fill in for more builtins.
  13152. switch (builtin)
  13153. {
  13154. case BuiltInLayer:
  13155. case BuiltInPrimitiveId:
  13156. case BuiltInViewportIndex:
  13157. case BuiltInFragStencilRefEXT:
  13158. expected_type = SPIRType::Int;
  13159. break;
  13160. default:
  13161. break;
  13162. }
  13163. if (expected_type != expr_type.basetype)
  13164. {
  13165. auto type = expr_type;
  13166. type.basetype = expected_type;
  13167. expr = bitcast_expression(type, expr_type.basetype, expr);
  13168. }
  13169. }
  13170. void CompilerGLSL::convert_non_uniform_expression(const SPIRType &type, std::string &expr)
  13171. {
  13172. if (*backend.nonuniform_qualifier == '\0')
  13173. return;
  13174. // Handle SPV_EXT_descriptor_indexing.
  13175. if (type.basetype == SPIRType::Sampler || type.basetype == SPIRType::SampledImage ||
  13176. type.basetype == SPIRType::Image || type.basetype == SPIRType::AccelerationStructure)
  13177. {
  13178. // The image/sampler ID must be declared as non-uniform.
  13179. // However, it is not legal GLSL to have
  13180. // nonuniformEXT(samplers[index]), so we must move the nonuniform qualifier
  13181. // to the array indexing, like
  13182. // samplers[nonuniformEXT(index)].
  13183. // While the access chain will generally be nonuniformEXT, it's not necessarily so,
  13184. // so we might have to fixup the OpLoad-ed expression late.
  13185. auto start_array_index = expr.find_first_of('[');
  13186. if (start_array_index == string::npos)
  13187. return;
  13188. // Check for the edge case that a non-arrayed resource was marked to be nonuniform,
  13189. // and the bracket we found is actually part of non-resource related data.
  13190. if (expr.find_first_of(',') < start_array_index)
  13191. return;
  13192. // We've opened a bracket, track expressions until we can close the bracket.
  13193. // This must be our image index.
  13194. size_t end_array_index = string::npos;
  13195. unsigned bracket_count = 1;
  13196. for (size_t index = start_array_index + 1; index < expr.size(); index++)
  13197. {
  13198. if (expr[index] == ']')
  13199. {
  13200. if (--bracket_count == 0)
  13201. {
  13202. end_array_index = index;
  13203. break;
  13204. }
  13205. }
  13206. else if (expr[index] == '[')
  13207. bracket_count++;
  13208. }
  13209. assert(bracket_count == 0);
  13210. // Doesn't really make sense to declare a non-arrayed image with nonuniformEXT, but there's
  13211. // nothing we can do here to express that.
  13212. if (start_array_index == string::npos || end_array_index == string::npos || end_array_index < start_array_index)
  13213. return;
  13214. start_array_index++;
  13215. expr = join(expr.substr(0, start_array_index), backend.nonuniform_qualifier, "(",
  13216. expr.substr(start_array_index, end_array_index - start_array_index), ")",
  13217. expr.substr(end_array_index, string::npos));
  13218. }
  13219. }
  13220. void CompilerGLSL::emit_block_hints(const SPIRBlock &)
  13221. {
  13222. }
  13223. void CompilerGLSL::preserve_alias_on_reset(uint32_t id)
  13224. {
  13225. preserved_aliases[id] = get_name(id);
  13226. }
  13227. void CompilerGLSL::reset_name_caches()
  13228. {
  13229. for (auto &preserved : preserved_aliases)
  13230. set_name(preserved.first, preserved.second);
  13231. preserved_aliases.clear();
  13232. resource_names.clear();
  13233. block_input_names.clear();
  13234. block_output_names.clear();
  13235. block_ubo_names.clear();
  13236. block_ssbo_names.clear();
  13237. block_names.clear();
  13238. function_overloads.clear();
  13239. }
  13240. void CompilerGLSL::fixup_type_alias()
  13241. {
  13242. // Due to how some backends work, the "master" type of type_alias must be a block-like type if it exists.
  13243. ir.for_each_typed_id<SPIRType>([&](uint32_t self, SPIRType &type) {
  13244. if (!type.type_alias)
  13245. return;
  13246. if (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock))
  13247. {
  13248. // Top-level block types should never alias anything else.
  13249. type.type_alias = 0;
  13250. }
  13251. else if (type_is_block_like(type) && type.self == ID(self))
  13252. {
  13253. // A block-like type is any type which contains Offset decoration, but not top-level blocks,
  13254. // i.e. blocks which are placed inside buffers.
  13255. // Become the master.
  13256. ir.for_each_typed_id<SPIRType>([&](uint32_t other_id, SPIRType &other_type) {
  13257. if (other_id == self)
  13258. return;
  13259. if (other_type.type_alias == type.type_alias)
  13260. other_type.type_alias = self;
  13261. });
  13262. this->get<SPIRType>(type.type_alias).type_alias = self;
  13263. type.type_alias = 0;
  13264. }
  13265. });
  13266. }
  13267. void CompilerGLSL::reorder_type_alias()
  13268. {
  13269. // Reorder declaration of types so that the master of the type alias is always emitted first.
  13270. // We need this in case a type B depends on type A (A must come before in the vector), but A is an alias of a type Abuffer, which
  13271. // means declaration of A doesn't happen (yet), and order would be B, ABuffer and not ABuffer, B. Fix this up here.
  13272. auto loop_lock = ir.create_loop_hard_lock();
  13273. auto &type_ids = ir.ids_for_type[TypeType];
  13274. for (auto alias_itr = begin(type_ids); alias_itr != end(type_ids); ++alias_itr)
  13275. {
  13276. auto &type = get<SPIRType>(*alias_itr);
  13277. if (type.type_alias != TypeID(0) &&
  13278. !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
  13279. {
  13280. // We will skip declaring this type, so make sure the type_alias type comes before.
  13281. auto master_itr = find(begin(type_ids), end(type_ids), ID(type.type_alias));
  13282. assert(master_itr != end(type_ids));
  13283. if (alias_itr < master_itr)
  13284. {
  13285. // Must also swap the type order for the constant-type joined array.
  13286. auto &joined_types = ir.ids_for_constant_or_type;
  13287. auto alt_alias_itr = find(begin(joined_types), end(joined_types), *alias_itr);
  13288. auto alt_master_itr = find(begin(joined_types), end(joined_types), *master_itr);
  13289. assert(alt_alias_itr != end(joined_types));
  13290. assert(alt_master_itr != end(joined_types));
  13291. swap(*alias_itr, *master_itr);
  13292. swap(*alt_alias_itr, *alt_master_itr);
  13293. }
  13294. }
  13295. }
  13296. }
  13297. void CompilerGLSL::emit_line_directive(uint32_t file_id, uint32_t line_literal)
  13298. {
  13299. // If we are redirecting statements, ignore the line directive.
  13300. // Common case here is continue blocks.
  13301. if (redirect_statement)
  13302. return;
  13303. if (options.emit_line_directives)
  13304. {
  13305. require_extension_internal("GL_GOOGLE_cpp_style_line_directive");
  13306. statement_no_indent("#line ", line_literal, " \"", get<SPIRString>(file_id).str, "\"");
  13307. }
  13308. }
  13309. void CompilerGLSL::propagate_nonuniform_qualifier(uint32_t id)
  13310. {
  13311. // SPIR-V might only tag the very last ID with NonUniformEXT, but for codegen,
  13312. // we need to know NonUniformEXT a little earlier, when the resource is actually loaded.
  13313. // Back-propagate the qualifier based on the expression dependency chain.
  13314. if (!has_decoration(id, DecorationNonUniformEXT))
  13315. {
  13316. set_decoration(id, DecorationNonUniformEXT);
  13317. force_recompile();
  13318. }
  13319. auto *e = maybe_get<SPIRExpression>(id);
  13320. auto *combined = maybe_get<SPIRCombinedImageSampler>(id);
  13321. auto *chain = maybe_get<SPIRAccessChain>(id);
  13322. if (e)
  13323. {
  13324. for (auto &expr : e->expression_dependencies)
  13325. propagate_nonuniform_qualifier(expr);
  13326. for (auto &expr : e->implied_read_expressions)
  13327. propagate_nonuniform_qualifier(expr);
  13328. }
  13329. else if (combined)
  13330. {
  13331. propagate_nonuniform_qualifier(combined->image);
  13332. propagate_nonuniform_qualifier(combined->sampler);
  13333. }
  13334. else if (chain)
  13335. {
  13336. for (auto &expr : chain->implied_read_expressions)
  13337. propagate_nonuniform_qualifier(expr);
  13338. }
  13339. }
  13340. void CompilerGLSL::emit_copy_logical_type(uint32_t lhs_id, uint32_t lhs_type_id, uint32_t rhs_id, uint32_t rhs_type_id,
  13341. SmallVector<uint32_t> chain)
  13342. {
  13343. // Fully unroll all member/array indices one by one.
  13344. auto &lhs_type = get<SPIRType>(lhs_type_id);
  13345. auto &rhs_type = get<SPIRType>(rhs_type_id);
  13346. if (!lhs_type.array.empty())
  13347. {
  13348. // Could use a loop here to support specialization constants, but it gets rather complicated with nested array types,
  13349. // and this is a rather obscure opcode anyways, keep it simple unless we are forced to.
  13350. uint32_t array_size = to_array_size_literal(lhs_type);
  13351. chain.push_back(0);
  13352. for (uint32_t i = 0; i < array_size; i++)
  13353. {
  13354. chain.back() = i;
  13355. emit_copy_logical_type(lhs_id, lhs_type.parent_type, rhs_id, rhs_type.parent_type, chain);
  13356. }
  13357. }
  13358. else if (lhs_type.basetype == SPIRType::Struct)
  13359. {
  13360. chain.push_back(0);
  13361. uint32_t member_count = uint32_t(lhs_type.member_types.size());
  13362. for (uint32_t i = 0; i < member_count; i++)
  13363. {
  13364. chain.back() = i;
  13365. emit_copy_logical_type(lhs_id, lhs_type.member_types[i], rhs_id, rhs_type.member_types[i], chain);
  13366. }
  13367. }
  13368. else
  13369. {
  13370. // Need to handle unpack/packing fixups since this can differ wildly between the logical types,
  13371. // particularly in MSL.
  13372. // To deal with this, we emit access chains and go through emit_store_statement
  13373. // to deal with all the special cases we can encounter.
  13374. AccessChainMeta lhs_meta, rhs_meta;
  13375. auto lhs = access_chain_internal(lhs_id, chain.data(), uint32_t(chain.size()),
  13376. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &lhs_meta);
  13377. auto rhs = access_chain_internal(rhs_id, chain.data(), uint32_t(chain.size()),
  13378. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &rhs_meta);
  13379. uint32_t id = ir.increase_bound_by(2);
  13380. lhs_id = id;
  13381. rhs_id = id + 1;
  13382. {
  13383. auto &lhs_expr = set<SPIRExpression>(lhs_id, move(lhs), lhs_type_id, true);
  13384. lhs_expr.need_transpose = lhs_meta.need_transpose;
  13385. if (lhs_meta.storage_is_packed)
  13386. set_extended_decoration(lhs_id, SPIRVCrossDecorationPhysicalTypePacked);
  13387. if (lhs_meta.storage_physical_type != 0)
  13388. set_extended_decoration(lhs_id, SPIRVCrossDecorationPhysicalTypeID, lhs_meta.storage_physical_type);
  13389. forwarded_temporaries.insert(lhs_id);
  13390. suppressed_usage_tracking.insert(lhs_id);
  13391. }
  13392. {
  13393. auto &rhs_expr = set<SPIRExpression>(rhs_id, move(rhs), rhs_type_id, true);
  13394. rhs_expr.need_transpose = rhs_meta.need_transpose;
  13395. if (rhs_meta.storage_is_packed)
  13396. set_extended_decoration(rhs_id, SPIRVCrossDecorationPhysicalTypePacked);
  13397. if (rhs_meta.storage_physical_type != 0)
  13398. set_extended_decoration(rhs_id, SPIRVCrossDecorationPhysicalTypeID, rhs_meta.storage_physical_type);
  13399. forwarded_temporaries.insert(rhs_id);
  13400. suppressed_usage_tracking.insert(rhs_id);
  13401. }
  13402. emit_store_statement(lhs_id, rhs_id);
  13403. }
  13404. }
  13405. bool CompilerGLSL::subpass_input_is_framebuffer_fetch(uint32_t id) const
  13406. {
  13407. if (!has_decoration(id, DecorationInputAttachmentIndex))
  13408. return false;
  13409. uint32_t input_attachment_index = get_decoration(id, DecorationInputAttachmentIndex);
  13410. for (auto &remap : subpass_to_framebuffer_fetch_attachment)
  13411. if (remap.first == input_attachment_index)
  13412. return true;
  13413. return false;
  13414. }
  13415. const SPIRVariable *CompilerGLSL::find_subpass_input_by_attachment_index(uint32_t index) const
  13416. {
  13417. const SPIRVariable *ret = nullptr;
  13418. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
  13419. if (has_decoration(var.self, DecorationInputAttachmentIndex) &&
  13420. get_decoration(var.self, DecorationInputAttachmentIndex) == index)
  13421. {
  13422. ret = &var;
  13423. }
  13424. });
  13425. return ret;
  13426. }
  13427. const SPIRVariable *CompilerGLSL::find_color_output_by_location(uint32_t location) const
  13428. {
  13429. const SPIRVariable *ret = nullptr;
  13430. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
  13431. if (var.storage == StorageClassOutput && get_decoration(var.self, DecorationLocation) == location)
  13432. ret = &var;
  13433. });
  13434. return ret;
  13435. }
  13436. void CompilerGLSL::emit_inout_fragment_outputs_copy_to_subpass_inputs()
  13437. {
  13438. for (auto &remap : subpass_to_framebuffer_fetch_attachment)
  13439. {
  13440. auto *subpass_var = find_subpass_input_by_attachment_index(remap.first);
  13441. auto *output_var = find_color_output_by_location(remap.second);
  13442. if (!subpass_var)
  13443. continue;
  13444. if (!output_var)
  13445. SPIRV_CROSS_THROW("Need to declare the corresponding fragment output variable to be able "
  13446. "to read from it.");
  13447. if (is_array(get<SPIRType>(output_var->basetype)))
  13448. SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_framebuffer_fetch with arrays of color outputs.");
  13449. auto &func = get<SPIRFunction>(get_entry_point().self);
  13450. func.fixup_hooks_in.push_back([=]() {
  13451. if (is_legacy())
  13452. {
  13453. statement(to_expression(subpass_var->self), " = ", "gl_LastFragData[",
  13454. get_decoration(output_var->self, DecorationLocation), "];");
  13455. }
  13456. else
  13457. {
  13458. uint32_t num_rt_components = this->get<SPIRType>(output_var->basetype).vecsize;
  13459. statement(to_expression(subpass_var->self), vector_swizzle(num_rt_components, 0), " = ",
  13460. to_expression(output_var->self), ";");
  13461. }
  13462. });
  13463. }
  13464. }
  13465. bool CompilerGLSL::variable_is_depth_or_compare(VariableID id) const
  13466. {
  13467. return image_is_comparison(get<SPIRType>(get<SPIRVariable>(id).basetype), id);
  13468. }
  13469. const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extension_name(Candidate c)
  13470. {
  13471. static const char *const retval[CandidateCount] = { "GL_KHR_shader_subgroup_ballot",
  13472. "GL_KHR_shader_subgroup_basic",
  13473. "GL_KHR_shader_subgroup_vote",
  13474. "GL_NV_gpu_shader_5",
  13475. "GL_NV_shader_thread_group",
  13476. "GL_NV_shader_thread_shuffle",
  13477. "GL_ARB_shader_ballot",
  13478. "GL_ARB_shader_group_vote",
  13479. "GL_AMD_gcn_shader" };
  13480. return retval[c];
  13481. }
  13482. SmallVector<std::string> CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_names(Candidate c)
  13483. {
  13484. switch (c)
  13485. {
  13486. case ARB_shader_ballot:
  13487. return { "GL_ARB_shader_int64" };
  13488. case AMD_gcn_shader:
  13489. return { "GL_AMD_gpu_shader_int64", "GL_NV_gpu_shader5" };
  13490. default:
  13491. return {};
  13492. }
  13493. }
  13494. const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_predicate(Candidate c)
  13495. {
  13496. switch (c)
  13497. {
  13498. case ARB_shader_ballot:
  13499. return "defined(GL_ARB_shader_int64)";
  13500. case AMD_gcn_shader:
  13501. return "(defined(GL_AMD_gpu_shader_int64) || defined(GL_NV_gpu_shader5))";
  13502. default:
  13503. return "";
  13504. }
  13505. }
  13506. CompilerGLSL::ShaderSubgroupSupportHelper::FeatureVector CompilerGLSL::ShaderSubgroupSupportHelper::
  13507. get_feature_dependencies(Feature feature)
  13508. {
  13509. switch (feature)
  13510. {
  13511. case SubgroupAllEqualT:
  13512. return { SubgroupBroadcast_First, SubgroupAll_Any_AllEqualBool };
  13513. case SubgroupElect:
  13514. return { SubgroupBallotFindLSB_MSB, SubgroupBallot, SubgroupInvocationID };
  13515. case SubgroupInverseBallot_InclBitCount_ExclBitCout:
  13516. return { SubgroupMask };
  13517. case SubgroupBallotBitCount:
  13518. return { SubgroupBallot };
  13519. default:
  13520. return {};
  13521. }
  13522. }
  13523. CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::
  13524. get_feature_dependency_mask(Feature feature)
  13525. {
  13526. return build_mask(get_feature_dependencies(feature));
  13527. }
  13528. bool CompilerGLSL::ShaderSubgroupSupportHelper::can_feature_be_implemented_without_extensions(Feature feature)
  13529. {
  13530. static const bool retval[FeatureCount] = { false, false, false, false, false, false,
  13531. true, // SubgroupBalloFindLSB_MSB
  13532. false, false, false, false,
  13533. true, // SubgroupMemBarrier - replaced with workgroup memory barriers
  13534. false, false, true, false };
  13535. return retval[feature];
  13536. }
  13537. CompilerGLSL::ShaderSubgroupSupportHelper::Candidate CompilerGLSL::ShaderSubgroupSupportHelper::
  13538. get_KHR_extension_for_feature(Feature feature)
  13539. {
  13540. static const Candidate extensions[FeatureCount] = {
  13541. KHR_shader_subgroup_ballot, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic,
  13542. KHR_shader_subgroup_basic, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_vote,
  13543. KHR_shader_subgroup_vote, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic,
  13544. KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot
  13545. };
  13546. return extensions[feature];
  13547. }
  13548. void CompilerGLSL::ShaderSubgroupSupportHelper::request_feature(Feature feature)
  13549. {
  13550. feature_mask |= (FeatureMask(1) << feature) | get_feature_dependency_mask(feature);
  13551. }
  13552. bool CompilerGLSL::ShaderSubgroupSupportHelper::is_feature_requested(Feature feature) const
  13553. {
  13554. return (feature_mask & (1u << feature)) != 0;
  13555. }
  13556. CompilerGLSL::ShaderSubgroupSupportHelper::Result CompilerGLSL::ShaderSubgroupSupportHelper::resolve() const
  13557. {
  13558. Result res;
  13559. for (uint32_t i = 0u; i < FeatureCount; ++i)
  13560. {
  13561. if (feature_mask & (1u << i))
  13562. {
  13563. auto feature = static_cast<Feature>(i);
  13564. std::unordered_set<uint32_t> unique_candidates;
  13565. auto candidates = get_candidates_for_feature(feature);
  13566. unique_candidates.insert(candidates.begin(), candidates.end());
  13567. auto deps = get_feature_dependencies(feature);
  13568. for (Feature d : deps)
  13569. {
  13570. candidates = get_candidates_for_feature(d);
  13571. if (!candidates.empty())
  13572. unique_candidates.insert(candidates.begin(), candidates.end());
  13573. }
  13574. for (uint32_t c : unique_candidates)
  13575. ++res.weights[static_cast<Candidate>(c)];
  13576. }
  13577. }
  13578. return res;
  13579. }
  13580. CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper::
  13581. get_candidates_for_feature(Feature ft, const Result &r)
  13582. {
  13583. auto c = get_candidates_for_feature(ft);
  13584. auto cmp = [&r](Candidate a, Candidate b) {
  13585. if (r.weights[a] == r.weights[b])
  13586. return a < b; // Prefer candidates with lower enum value
  13587. return r.weights[a] > r.weights[b];
  13588. };
  13589. std::sort(c.begin(), c.end(), cmp);
  13590. return c;
  13591. }
  13592. CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper::
  13593. get_candidates_for_feature(Feature feature)
  13594. {
  13595. switch (feature)
  13596. {
  13597. case SubgroupMask:
  13598. return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot };
  13599. case SubgroupSize:
  13600. return { KHR_shader_subgroup_basic, NV_shader_thread_group, AMD_gcn_shader, ARB_shader_ballot };
  13601. case SubgroupInvocationID:
  13602. return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot };
  13603. case SubgroupID:
  13604. return { KHR_shader_subgroup_basic, NV_shader_thread_group };
  13605. case NumSubgroups:
  13606. return { KHR_shader_subgroup_basic, NV_shader_thread_group };
  13607. case SubgroupBroadcast_First:
  13608. return { KHR_shader_subgroup_ballot, NV_shader_thread_shuffle, ARB_shader_ballot };
  13609. case SubgroupBallotFindLSB_MSB:
  13610. return { KHR_shader_subgroup_ballot, NV_shader_thread_group };
  13611. case SubgroupAll_Any_AllEqualBool:
  13612. return { KHR_shader_subgroup_vote, NV_gpu_shader_5, ARB_shader_group_vote, AMD_gcn_shader };
  13613. case SubgroupAllEqualT:
  13614. return {}; // depends on other features only
  13615. case SubgroupElect:
  13616. return {}; // depends on other features only
  13617. case SubgroupBallot:
  13618. return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot };
  13619. case SubgroupBarrier:
  13620. return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot, AMD_gcn_shader };
  13621. case SubgroupMemBarrier:
  13622. return { KHR_shader_subgroup_basic };
  13623. case SubgroupInverseBallot_InclBitCount_ExclBitCout:
  13624. return {};
  13625. case SubgroupBallotBitExtract:
  13626. return { NV_shader_thread_group };
  13627. case SubgroupBallotBitCount:
  13628. return {};
  13629. default:
  13630. return {};
  13631. }
  13632. }
  13633. CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::build_mask(
  13634. const SmallVector<Feature> &features)
  13635. {
  13636. FeatureMask mask = 0;
  13637. for (Feature f : features)
  13638. mask |= FeatureMask(1) << f;
  13639. return mask;
  13640. }
  13641. CompilerGLSL::ShaderSubgroupSupportHelper::Result::Result()
  13642. {
  13643. for (auto &weight : weights)
  13644. weight = 0;
  13645. // Make sure KHR_shader_subgroup extensions are always prefered.
  13646. const uint32_t big_num = FeatureCount;
  13647. weights[KHR_shader_subgroup_ballot] = big_num;
  13648. weights[KHR_shader_subgroup_basic] = big_num;
  13649. weights[KHR_shader_subgroup_vote] = big_num;
  13650. }
  13651. void CompilerGLSL::request_workaround_wrapper_overload(TypeID id)
  13652. {
  13653. // Must be ordered to maintain deterministic output, so vector is appropriate.
  13654. if (find(begin(workaround_ubo_load_overload_types), end(workaround_ubo_load_overload_types), id) ==
  13655. end(workaround_ubo_load_overload_types))
  13656. {
  13657. force_recompile();
  13658. workaround_ubo_load_overload_types.push_back(id);
  13659. }
  13660. }
  13661. void CompilerGLSL::rewrite_load_for_wrapped_row_major(std::string &expr, TypeID loaded_type, ID ptr)
  13662. {
  13663. // Loading row-major matrices from UBOs on older AMD Windows OpenGL drivers is problematic.
  13664. // To load these types correctly, we must first wrap them in a dummy function which only purpose is to
  13665. // ensure row_major decoration is actually respected.
  13666. auto *var = maybe_get_backing_variable(ptr);
  13667. if (!var)
  13668. return;
  13669. auto &backing_type = get<SPIRType>(var->basetype);
  13670. bool is_ubo = backing_type.basetype == SPIRType::Struct && backing_type.storage == StorageClassUniform &&
  13671. has_decoration(backing_type.self, DecorationBlock);
  13672. if (!is_ubo)
  13673. return;
  13674. auto *type = &get<SPIRType>(loaded_type);
  13675. bool rewrite = false;
  13676. if (is_matrix(*type))
  13677. {
  13678. // To avoid adding a lot of unnecessary meta tracking to forward the row_major state,
  13679. // we will simply look at the base struct itself. It is exceptionally rare to mix and match row-major/col-major state.
  13680. // If there is any row-major action going on, we apply the workaround.
  13681. // It is harmless to apply the workaround to column-major matrices, so this is still a valid solution.
  13682. // If an access chain occurred, the workaround is not required, so loading vectors or scalars don't need workaround.
  13683. type = &backing_type;
  13684. }
  13685. if (type->basetype == SPIRType::Struct)
  13686. {
  13687. // If we're loading a struct where any member is a row-major matrix, apply the workaround.
  13688. for (uint32_t i = 0; i < uint32_t(type->member_types.size()); i++)
  13689. {
  13690. if (combined_decoration_for_member(*type, i).get(DecorationRowMajor))
  13691. {
  13692. rewrite = true;
  13693. break;
  13694. }
  13695. }
  13696. }
  13697. if (rewrite)
  13698. {
  13699. request_workaround_wrapper_overload(loaded_type);
  13700. expr = join("spvWorkaroundRowMajor(", expr, ")");
  13701. }
  13702. }