spirv_glsl.cpp 493 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023802480258026802780288029803080318032803380348035803680378038803980408041804280438044804580468047804880498050805180528053805480558056805780588059806080618062806380648065806680678068806980708071807280738074807580768077807880798080808180828083808480858086808780888089809080918092809380948095809680978098809981008101810281038104810581068107810881098110811181128113811481158116811781188119812081218122812381248125812681278128812981308131813281338134813581368137813881398140814181428143814481458146814781488149815081518152815381548155815681578158815981608161816281638164816581668167816881698170817181728173817481758176817781788179818081818182818381848185818681878188818981908191819281938194819581968197819881998200820182028203820482058206820782088209821082118212821382148215821682178218821982208221822282238224822582268227822882298230823182328233823482358236823782388239824082418242824382448245824682478248824982508251825282538254825582568257825882598260826182628263826482658266826782688269827082718272827382748275827682778278827982808281828282838284828582868287828882898290829182928293829482958296829782988299830083018302830383048305830683078308830983108311831283138314831583168317831883198320832183228323832483258326832783288329833083318332833383348335833683378338833983408341834283438344834583468347834883498350835183528353835483558356835783588359836083618362836383648365836683678368836983708371837283738374837583768377837883798380838183828383838483858386838783888389839083918392839383948395839683978398839984008401840284038404840584068407840884098410841184128413841484158416841784188419842084218422842384248425842684278428842984308431843284338434843584368437843884398440844184428443844484458446844784488449845084518452845384548455845684578458845984608461846284638464846584668467846884698470847184728473847484758476847784788479848084818482848384848485848684878488848984908491849284938494849584968497849884998500850185028503850485058506850785088509851085118512851385148515851685178518851985208521852285238524852585268527852885298530853185328533853485358536853785388539854085418542854385448545854685478548854985508551855285538554855585568557855885598560856185628563856485658566856785688569857085718572857385748575857685778578857985808581858285838584858585868587858885898590859185928593859485958596859785988599860086018602860386048605860686078608860986108611861286138614861586168617861886198620862186228623862486258626862786288629863086318632863386348635863686378638863986408641864286438644864586468647864886498650865186528653865486558656865786588659866086618662866386648665866686678668866986708671867286738674867586768677867886798680868186828683868486858686868786888689869086918692869386948695869686978698869987008701870287038704870587068707870887098710871187128713871487158716871787188719872087218722872387248725872687278728872987308731873287338734873587368737873887398740874187428743874487458746874787488749875087518752875387548755875687578758875987608761876287638764876587668767876887698770877187728773877487758776877787788779878087818782878387848785878687878788878987908791879287938794879587968797879887998800880188028803880488058806880788088809881088118812881388148815881688178818881988208821882288238824882588268827882888298830883188328833883488358836883788388839884088418842884388448845884688478848884988508851885288538854885588568857885888598860886188628863886488658866886788688869887088718872887388748875887688778878887988808881888288838884888588868887888888898890889188928893889488958896889788988899890089018902890389048905890689078908890989108911891289138914891589168917891889198920892189228923892489258926892789288929893089318932893389348935893689378938893989408941894289438944894589468947894889498950895189528953895489558956895789588959896089618962896389648965896689678968896989708971897289738974897589768977897889798980898189828983898489858986898789888989899089918992899389948995899689978998899990009001900290039004900590069007900890099010901190129013901490159016901790189019902090219022902390249025902690279028902990309031903290339034903590369037903890399040904190429043904490459046904790489049905090519052905390549055905690579058905990609061906290639064906590669067906890699070907190729073907490759076907790789079908090819082908390849085908690879088908990909091909290939094909590969097909890999100910191029103910491059106910791089109911091119112911391149115911691179118911991209121912291239124912591269127912891299130913191329133913491359136913791389139914091419142914391449145914691479148914991509151915291539154915591569157915891599160916191629163916491659166916791689169917091719172917391749175917691779178917991809181918291839184918591869187918891899190919191929193919491959196919791989199920092019202920392049205920692079208920992109211921292139214921592169217921892199220922192229223922492259226922792289229923092319232923392349235923692379238923992409241924292439244924592469247924892499250925192529253925492559256925792589259926092619262926392649265926692679268926992709271927292739274927592769277927892799280928192829283928492859286928792889289929092919292929392949295929692979298929993009301930293039304930593069307930893099310931193129313931493159316931793189319932093219322932393249325932693279328932993309331933293339334933593369337933893399340934193429343934493459346934793489349935093519352935393549355935693579358935993609361936293639364936593669367936893699370937193729373937493759376937793789379938093819382938393849385938693879388938993909391939293939394939593969397939893999400940194029403940494059406940794089409941094119412941394149415941694179418941994209421942294239424942594269427942894299430943194329433943494359436943794389439944094419442944394449445944694479448944994509451945294539454945594569457945894599460946194629463946494659466946794689469947094719472947394749475947694779478947994809481948294839484948594869487948894899490949194929493949494959496949794989499950095019502950395049505950695079508950995109511951295139514951595169517951895199520952195229523952495259526952795289529953095319532953395349535953695379538953995409541954295439544954595469547954895499550955195529553955495559556955795589559956095619562956395649565956695679568956995709571957295739574957595769577957895799580958195829583958495859586958795889589959095919592959395949595959695979598959996009601960296039604960596069607960896099610961196129613961496159616961796189619962096219622962396249625962696279628962996309631963296339634963596369637963896399640964196429643964496459646964796489649965096519652965396549655965696579658965996609661966296639664966596669667966896699670967196729673967496759676967796789679968096819682968396849685968696879688968996909691969296939694969596969697969896999700970197029703970497059706970797089709971097119712971397149715971697179718971997209721972297239724972597269727972897299730973197329733973497359736973797389739974097419742974397449745974697479748974997509751975297539754975597569757975897599760976197629763976497659766976797689769977097719772977397749775977697779778977997809781978297839784978597869787978897899790979197929793979497959796979797989799980098019802980398049805980698079808980998109811981298139814981598169817981898199820982198229823982498259826982798289829983098319832983398349835983698379838983998409841984298439844984598469847984898499850985198529853985498559856985798589859986098619862986398649865986698679868986998709871987298739874987598769877987898799880988198829883988498859886988798889889989098919892989398949895989698979898989999009901990299039904990599069907990899099910991199129913991499159916991799189919992099219922992399249925992699279928992999309931993299339934993599369937993899399940994199429943994499459946994799489949995099519952995399549955995699579958995999609961996299639964996599669967996899699970997199729973997499759976997799789979998099819982998399849985998699879988998999909991999299939994999599969997999899991000010001100021000310004100051000610007100081000910010100111001210013100141001510016100171001810019100201002110022100231002410025100261002710028100291003010031100321003310034100351003610037100381003910040100411004210043100441004510046100471004810049100501005110052100531005410055100561005710058100591006010061100621006310064100651006610067100681006910070100711007210073100741007510076100771007810079100801008110082100831008410085100861008710088100891009010091100921009310094100951009610097100981009910100101011010210103101041010510106101071010810109101101011110112101131011410115101161011710118101191012010121101221012310124101251012610127101281012910130101311013210133101341013510136101371013810139101401014110142101431014410145101461014710148101491015010151101521015310154101551015610157101581015910160101611016210163101641016510166101671016810169101701017110172101731017410175101761017710178101791018010181101821018310184101851018610187101881018910190101911019210193101941019510196101971019810199102001020110202102031020410205102061020710208102091021010211102121021310214102151021610217102181021910220102211022210223102241022510226102271022810229102301023110232102331023410235102361023710238102391024010241102421024310244102451024610247102481024910250102511025210253102541025510256102571025810259102601026110262102631026410265102661026710268102691027010271102721027310274102751027610277102781027910280102811028210283102841028510286102871028810289102901029110292102931029410295102961029710298102991030010301103021030310304103051030610307103081030910310103111031210313103141031510316103171031810319103201032110322103231032410325103261032710328103291033010331103321033310334103351033610337103381033910340103411034210343103441034510346103471034810349103501035110352103531035410355103561035710358103591036010361103621036310364103651036610367103681036910370103711037210373103741037510376103771037810379103801038110382103831038410385103861038710388103891039010391103921039310394103951039610397103981039910400104011040210403104041040510406104071040810409104101041110412104131041410415104161041710418104191042010421104221042310424104251042610427104281042910430104311043210433104341043510436104371043810439104401044110442104431044410445104461044710448104491045010451104521045310454104551045610457104581045910460104611046210463104641046510466104671046810469104701047110472104731047410475104761047710478104791048010481104821048310484104851048610487104881048910490104911049210493104941049510496104971049810499105001050110502105031050410505105061050710508105091051010511105121051310514105151051610517105181051910520105211052210523105241052510526105271052810529105301053110532105331053410535105361053710538105391054010541105421054310544105451054610547105481054910550105511055210553105541055510556105571055810559105601056110562105631056410565105661056710568105691057010571105721057310574105751057610577105781057910580105811058210583105841058510586105871058810589105901059110592105931059410595105961059710598105991060010601106021060310604106051060610607106081060910610106111061210613106141061510616106171061810619106201062110622106231062410625106261062710628106291063010631106321063310634106351063610637106381063910640106411064210643106441064510646106471064810649106501065110652106531065410655106561065710658106591066010661106621066310664106651066610667106681066910670106711067210673106741067510676106771067810679106801068110682106831068410685106861068710688106891069010691106921069310694106951069610697106981069910700107011070210703107041070510706107071070810709107101071110712107131071410715107161071710718107191072010721107221072310724107251072610727107281072910730107311073210733107341073510736107371073810739107401074110742107431074410745107461074710748107491075010751107521075310754107551075610757107581075910760107611076210763107641076510766107671076810769107701077110772107731077410775107761077710778107791078010781107821078310784107851078610787107881078910790107911079210793107941079510796107971079810799108001080110802108031080410805108061080710808108091081010811108121081310814108151081610817108181081910820108211082210823108241082510826108271082810829108301083110832108331083410835108361083710838108391084010841108421084310844108451084610847108481084910850108511085210853108541085510856108571085810859108601086110862108631086410865108661086710868108691087010871108721087310874108751087610877108781087910880108811088210883108841088510886108871088810889108901089110892108931089410895108961089710898108991090010901109021090310904109051090610907109081090910910109111091210913109141091510916109171091810919109201092110922109231092410925109261092710928109291093010931109321093310934109351093610937109381093910940109411094210943109441094510946109471094810949109501095110952109531095410955109561095710958109591096010961109621096310964109651096610967109681096910970109711097210973109741097510976109771097810979109801098110982109831098410985109861098710988109891099010991109921099310994109951099610997109981099911000110011100211003110041100511006110071100811009110101101111012110131101411015110161101711018110191102011021110221102311024110251102611027110281102911030110311103211033110341103511036110371103811039110401104111042110431104411045110461104711048110491105011051110521105311054110551105611057110581105911060110611106211063110641106511066110671106811069110701107111072110731107411075110761107711078110791108011081110821108311084110851108611087110881108911090110911109211093110941109511096110971109811099111001110111102111031110411105111061110711108111091111011111111121111311114111151111611117111181111911120111211112211123111241112511126111271112811129111301113111132111331113411135111361113711138111391114011141111421114311144111451114611147111481114911150111511115211153111541115511156111571115811159111601116111162111631116411165111661116711168111691117011171111721117311174111751117611177111781117911180111811118211183111841118511186111871118811189111901119111192111931119411195111961119711198111991120011201112021120311204112051120611207112081120911210112111121211213112141121511216112171121811219112201122111222112231122411225112261122711228112291123011231112321123311234112351123611237112381123911240112411124211243112441124511246112471124811249112501125111252112531125411255112561125711258112591126011261112621126311264112651126611267112681126911270112711127211273112741127511276112771127811279112801128111282112831128411285112861128711288112891129011291112921129311294112951129611297112981129911300113011130211303113041130511306113071130811309113101131111312113131131411315113161131711318113191132011321113221132311324113251132611327113281132911330113311133211333113341133511336113371133811339113401134111342113431134411345113461134711348113491135011351113521135311354113551135611357113581135911360113611136211363113641136511366113671136811369113701137111372113731137411375113761137711378113791138011381113821138311384113851138611387113881138911390113911139211393113941139511396113971139811399114001140111402114031140411405114061140711408114091141011411114121141311414114151141611417114181141911420114211142211423114241142511426114271142811429114301143111432114331143411435114361143711438114391144011441114421144311444114451144611447114481144911450114511145211453114541145511456114571145811459114601146111462114631146411465114661146711468114691147011471114721147311474114751147611477114781147911480114811148211483114841148511486114871148811489114901149111492114931149411495114961149711498114991150011501115021150311504115051150611507115081150911510115111151211513115141151511516115171151811519115201152111522115231152411525115261152711528115291153011531115321153311534115351153611537115381153911540115411154211543115441154511546115471154811549115501155111552115531155411555115561155711558115591156011561115621156311564115651156611567115681156911570115711157211573115741157511576115771157811579115801158111582115831158411585115861158711588115891159011591115921159311594115951159611597115981159911600116011160211603116041160511606116071160811609116101161111612116131161411615116161161711618116191162011621116221162311624116251162611627116281162911630116311163211633116341163511636116371163811639116401164111642116431164411645116461164711648116491165011651116521165311654116551165611657116581165911660116611166211663116641166511666116671166811669116701167111672116731167411675116761167711678116791168011681116821168311684116851168611687116881168911690116911169211693116941169511696116971169811699117001170111702117031170411705117061170711708117091171011711117121171311714117151171611717117181171911720117211172211723117241172511726117271172811729117301173111732117331173411735117361173711738117391174011741117421174311744117451174611747117481174911750117511175211753117541175511756117571175811759117601176111762117631176411765117661176711768117691177011771117721177311774117751177611777117781177911780117811178211783117841178511786117871178811789117901179111792117931179411795117961179711798117991180011801118021180311804118051180611807118081180911810118111181211813118141181511816118171181811819118201182111822118231182411825118261182711828118291183011831118321183311834118351183611837118381183911840118411184211843118441184511846118471184811849118501185111852118531185411855118561185711858118591186011861118621186311864118651186611867118681186911870118711187211873118741187511876118771187811879118801188111882118831188411885118861188711888118891189011891118921189311894118951189611897118981189911900119011190211903119041190511906119071190811909119101191111912119131191411915119161191711918119191192011921119221192311924119251192611927119281192911930119311193211933119341193511936119371193811939119401194111942119431194411945119461194711948119491195011951119521195311954119551195611957119581195911960119611196211963119641196511966119671196811969119701197111972119731197411975119761197711978119791198011981119821198311984119851198611987119881198911990119911199211993119941199511996119971199811999120001200112002120031200412005120061200712008120091201012011120121201312014120151201612017120181201912020120211202212023120241202512026120271202812029120301203112032120331203412035120361203712038120391204012041120421204312044120451204612047120481204912050120511205212053120541205512056120571205812059120601206112062120631206412065120661206712068120691207012071120721207312074120751207612077120781207912080120811208212083120841208512086120871208812089120901209112092120931209412095120961209712098120991210012101121021210312104121051210612107121081210912110121111211212113121141211512116121171211812119121201212112122121231212412125121261212712128121291213012131121321213312134121351213612137121381213912140121411214212143121441214512146121471214812149121501215112152121531215412155121561215712158121591216012161121621216312164121651216612167121681216912170121711217212173121741217512176121771217812179121801218112182121831218412185121861218712188121891219012191121921219312194121951219612197121981219912200122011220212203122041220512206122071220812209122101221112212122131221412215122161221712218122191222012221122221222312224122251222612227122281222912230122311223212233122341223512236122371223812239122401224112242122431224412245122461224712248122491225012251122521225312254122551225612257122581225912260122611226212263122641226512266122671226812269122701227112272122731227412275122761227712278122791228012281122821228312284122851228612287122881228912290122911229212293122941229512296122971229812299123001230112302123031230412305123061230712308123091231012311123121231312314123151231612317123181231912320123211232212323123241232512326123271232812329123301233112332123331233412335123361233712338123391234012341123421234312344123451234612347123481234912350123511235212353123541235512356123571235812359123601236112362123631236412365123661236712368123691237012371123721237312374123751237612377123781237912380123811238212383123841238512386123871238812389123901239112392123931239412395123961239712398123991240012401124021240312404124051240612407124081240912410124111241212413124141241512416124171241812419124201242112422124231242412425124261242712428124291243012431124321243312434124351243612437124381243912440124411244212443124441244512446124471244812449124501245112452124531245412455124561245712458124591246012461124621246312464124651246612467124681246912470124711247212473124741247512476124771247812479124801248112482124831248412485124861248712488124891249012491124921249312494124951249612497124981249912500125011250212503125041250512506125071250812509125101251112512125131251412515125161251712518125191252012521125221252312524125251252612527125281252912530125311253212533125341253512536125371253812539125401254112542125431254412545125461254712548125491255012551125521255312554125551255612557125581255912560125611256212563125641256512566125671256812569125701257112572125731257412575125761257712578125791258012581125821258312584125851258612587125881258912590125911259212593125941259512596125971259812599126001260112602126031260412605126061260712608126091261012611126121261312614126151261612617126181261912620126211262212623126241262512626126271262812629126301263112632126331263412635126361263712638126391264012641126421264312644126451264612647126481264912650126511265212653126541265512656126571265812659126601266112662126631266412665126661266712668126691267012671126721267312674126751267612677126781267912680126811268212683126841268512686126871268812689126901269112692126931269412695126961269712698126991270012701127021270312704127051270612707127081270912710127111271212713127141271512716127171271812719127201272112722127231272412725127261272712728127291273012731127321273312734127351273612737127381273912740127411274212743127441274512746127471274812749127501275112752127531275412755127561275712758127591276012761127621276312764127651276612767127681276912770127711277212773127741277512776127771277812779127801278112782127831278412785127861278712788127891279012791127921279312794127951279612797127981279912800128011280212803128041280512806128071280812809128101281112812128131281412815128161281712818128191282012821128221282312824128251282612827128281282912830128311283212833128341283512836128371283812839128401284112842128431284412845128461284712848128491285012851128521285312854128551285612857128581285912860128611286212863128641286512866128671286812869128701287112872128731287412875128761287712878128791288012881128821288312884128851288612887128881288912890128911289212893128941289512896128971289812899129001290112902129031290412905129061290712908129091291012911129121291312914129151291612917129181291912920129211292212923129241292512926129271292812929129301293112932129331293412935129361293712938129391294012941129421294312944129451294612947129481294912950129511295212953129541295512956129571295812959129601296112962129631296412965129661296712968129691297012971129721297312974129751297612977129781297912980129811298212983129841298512986129871298812989129901299112992129931299412995129961299712998129991300013001130021300313004130051300613007130081300913010130111301213013130141301513016130171301813019130201302113022130231302413025130261302713028130291303013031130321303313034130351303613037130381303913040130411304213043130441304513046130471304813049130501305113052130531305413055130561305713058130591306013061130621306313064130651306613067130681306913070130711307213073130741307513076130771307813079130801308113082130831308413085130861308713088130891309013091130921309313094130951309613097130981309913100131011310213103131041310513106131071310813109131101311113112131131311413115131161311713118131191312013121131221312313124131251312613127131281312913130131311313213133131341313513136131371313813139131401314113142131431314413145131461314713148131491315013151131521315313154131551315613157131581315913160131611316213163131641316513166131671316813169131701317113172131731317413175131761317713178131791318013181131821318313184131851318613187131881318913190131911319213193131941319513196131971319813199132001320113202132031320413205132061320713208132091321013211132121321313214132151321613217132181321913220132211322213223132241322513226132271322813229132301323113232132331323413235132361323713238132391324013241132421324313244132451324613247132481324913250132511325213253132541325513256132571325813259132601326113262132631326413265132661326713268132691327013271132721327313274132751327613277132781327913280132811328213283132841328513286132871328813289132901329113292132931329413295132961329713298132991330013301133021330313304133051330613307133081330913310133111331213313133141331513316133171331813319133201332113322133231332413325133261332713328133291333013331133321333313334133351333613337133381333913340133411334213343133441334513346133471334813349133501335113352133531335413355133561335713358133591336013361133621336313364133651336613367133681336913370133711337213373133741337513376133771337813379133801338113382133831338413385133861338713388133891339013391133921339313394133951339613397133981339913400134011340213403134041340513406134071340813409134101341113412134131341413415134161341713418134191342013421134221342313424134251342613427134281342913430134311343213433134341343513436134371343813439134401344113442134431344413445134461344713448134491345013451134521345313454134551345613457134581345913460134611346213463134641346513466134671346813469134701347113472134731347413475134761347713478134791348013481134821348313484134851348613487134881348913490134911349213493134941349513496134971349813499135001350113502135031350413505135061350713508135091351013511135121351313514135151351613517135181351913520135211352213523135241352513526135271352813529135301353113532135331353413535135361353713538135391354013541135421354313544135451354613547135481354913550135511355213553135541355513556135571355813559135601356113562135631356413565135661356713568135691357013571135721357313574135751357613577135781357913580135811358213583135841358513586135871358813589135901359113592135931359413595135961359713598135991360013601136021360313604136051360613607136081360913610136111361213613136141361513616136171361813619136201362113622136231362413625136261362713628136291363013631136321363313634136351363613637136381363913640136411364213643136441364513646136471364813649136501365113652136531365413655136561365713658136591366013661136621366313664136651366613667136681366913670136711367213673136741367513676136771367813679136801368113682136831368413685136861368713688136891369013691136921369313694136951369613697136981369913700137011370213703137041370513706137071370813709137101371113712137131371413715137161371713718137191372013721137221372313724137251372613727137281372913730137311373213733137341373513736137371373813739137401374113742137431374413745137461374713748137491375013751137521375313754137551375613757137581375913760137611376213763137641376513766137671376813769137701377113772137731377413775137761377713778137791378013781137821378313784137851378613787137881378913790137911379213793137941379513796137971379813799138001380113802138031380413805138061380713808138091381013811138121381313814138151381613817138181381913820138211382213823138241382513826138271382813829138301383113832138331383413835138361383713838138391384013841138421384313844138451384613847138481384913850138511385213853138541385513856138571385813859138601386113862138631386413865138661386713868138691387013871138721387313874138751387613877138781387913880138811388213883138841388513886138871388813889138901389113892138931389413895138961389713898138991390013901139021390313904139051390613907139081390913910139111391213913139141391513916139171391813919139201392113922139231392413925139261392713928139291393013931139321393313934139351393613937139381393913940139411394213943139441394513946139471394813949139501395113952139531395413955139561395713958139591396013961139621396313964139651396613967139681396913970139711397213973139741397513976139771397813979139801398113982139831398413985139861398713988139891399013991139921399313994139951399613997139981399914000140011400214003140041400514006140071400814009140101401114012140131401414015140161401714018140191402014021140221402314024140251402614027140281402914030140311403214033140341403514036140371403814039140401404114042140431404414045140461404714048140491405014051140521405314054140551405614057140581405914060140611406214063140641406514066140671406814069140701407114072140731407414075140761407714078140791408014081140821408314084140851408614087140881408914090140911409214093140941409514096140971409814099141001410114102141031410414105141061410714108141091411014111141121411314114141151411614117141181411914120141211412214123141241412514126141271412814129141301413114132141331413414135141361413714138141391414014141141421414314144141451414614147141481414914150141511415214153141541415514156141571415814159141601416114162141631416414165141661416714168141691417014171141721417314174141751417614177141781417914180141811418214183141841418514186141871418814189141901419114192141931419414195141961419714198141991420014201142021420314204142051420614207142081420914210142111421214213142141421514216142171421814219142201422114222142231422414225142261422714228142291423014231142321423314234142351423614237142381423914240142411424214243142441424514246142471424814249142501425114252142531425414255142561425714258142591426014261142621426314264142651426614267142681426914270142711427214273142741427514276142771427814279142801428114282142831428414285142861428714288142891429014291142921429314294142951429614297142981429914300143011430214303143041430514306143071430814309143101431114312143131431414315143161431714318143191432014321143221432314324143251432614327143281432914330143311433214333143341433514336143371433814339143401434114342143431434414345143461434714348143491435014351143521435314354143551435614357143581435914360143611436214363143641436514366143671436814369143701437114372143731437414375143761437714378143791438014381143821438314384143851438614387143881438914390143911439214393143941439514396143971439814399144001440114402144031440414405144061440714408144091441014411144121441314414144151441614417144181441914420144211442214423144241442514426144271442814429144301443114432144331443414435144361443714438144391444014441144421444314444144451444614447144481444914450144511445214453144541445514456144571445814459144601446114462144631446414465144661446714468144691447014471144721447314474144751447614477144781447914480144811448214483144841448514486144871448814489144901449114492144931449414495144961449714498144991450014501145021450314504145051450614507145081450914510145111451214513145141451514516145171451814519145201452114522145231452414525145261452714528145291453014531145321453314534145351453614537145381453914540145411454214543145441454514546145471454814549145501455114552145531455414555145561455714558145591456014561145621456314564145651456614567145681456914570145711457214573145741457514576145771457814579145801458114582145831458414585145861458714588145891459014591145921459314594145951459614597145981459914600146011460214603146041460514606146071460814609146101461114612146131461414615146161461714618146191462014621146221462314624146251462614627146281462914630146311463214633146341463514636146371463814639146401464114642146431464414645146461464714648146491465014651146521465314654146551465614657146581465914660146611466214663146641466514666146671466814669146701467114672146731467414675146761467714678146791468014681146821468314684146851468614687146881468914690146911469214693146941469514696146971469814699147001470114702147031470414705147061470714708147091471014711147121471314714147151471614717147181471914720147211472214723147241472514726147271472814729147301473114732147331473414735147361473714738147391474014741147421474314744147451474614747147481474914750147511475214753147541475514756147571475814759147601476114762147631476414765147661476714768147691477014771147721477314774147751477614777147781477914780147811478214783147841478514786147871478814789147901479114792147931479414795147961479714798147991480014801148021480314804148051480614807148081480914810148111481214813148141481514816148171481814819148201482114822148231482414825148261482714828148291483014831148321483314834148351483614837148381483914840148411484214843148441484514846148471484814849148501485114852148531485414855148561485714858148591486014861148621486314864148651486614867148681486914870148711487214873148741487514876148771487814879148801488114882148831488414885148861488714888148891489014891148921489314894148951489614897148981489914900149011490214903149041490514906149071490814909149101491114912149131491414915149161491714918149191492014921149221492314924149251492614927149281492914930149311493214933149341493514936149371493814939149401494114942149431494414945149461494714948149491495014951149521495314954149551495614957149581495914960149611496214963149641496514966149671496814969149701497114972149731497414975149761497714978149791498014981149821498314984149851498614987149881498914990149911499214993149941499514996149971499814999150001500115002150031500415005150061500715008150091501015011150121501315014150151501615017150181501915020150211502215023150241502515026150271502815029150301503115032150331503415035150361503715038150391504015041150421504315044150451504615047150481504915050150511505215053150541505515056150571505815059150601506115062150631506415065150661506715068150691507015071150721507315074150751507615077150781507915080150811508215083150841508515086150871508815089150901509115092150931509415095150961509715098150991510015101151021510315104151051510615107151081510915110151111511215113151141511515116151171511815119151201512115122151231512415125151261512715128151291513015131151321513315134151351513615137151381513915140151411514215143151441514515146151471514815149151501515115152151531515415155151561515715158151591516015161151621516315164151651516615167151681516915170151711517215173151741517515176151771517815179151801518115182151831518415185151861518715188151891519015191151921519315194151951519615197151981519915200152011520215203152041520515206152071520815209152101521115212152131521415215152161521715218152191522015221152221522315224152251522615227152281522915230152311523215233152341523515236152371523815239152401524115242152431524415245152461524715248152491525015251152521525315254152551525615257152581525915260152611526215263152641526515266152671526815269152701527115272152731527415275152761527715278152791528015281152821528315284152851528615287152881528915290152911529215293152941529515296152971529815299153001530115302153031530415305153061530715308153091531015311153121531315314153151531615317153181531915320153211532215323153241532515326153271532815329153301533115332153331533415335153361533715338153391534015341153421534315344153451534615347153481534915350153511535215353153541535515356153571535815359153601536115362153631536415365153661536715368153691537015371153721537315374153751537615377153781537915380153811538215383153841538515386153871538815389153901539115392153931539415395153961539715398153991540015401154021540315404154051540615407154081540915410154111541215413154141541515416154171541815419154201542115422154231542415425154261542715428154291543015431154321543315434154351543615437154381543915440154411544215443154441544515446154471544815449154501545115452154531545415455154561545715458154591546015461154621546315464154651546615467154681546915470154711547215473154741547515476154771547815479154801548115482154831548415485154861548715488154891549015491154921549315494154951549615497154981549915500155011550215503155041550515506155071550815509155101551115512155131551415515155161551715518155191552015521155221552315524155251552615527155281552915530155311553215533155341553515536155371553815539155401554115542155431554415545155461554715548155491555015551155521555315554155551555615557155581555915560155611556215563155641556515566155671556815569155701557115572155731557415575155761557715578155791558015581155821558315584155851558615587155881558915590155911559215593155941559515596155971559815599156001560115602156031560415605156061560715608156091561015611156121561315614156151561615617156181561915620156211562215623156241562515626156271562815629156301563115632156331563415635156361563715638156391564015641156421564315644156451564615647156481564915650156511565215653156541565515656156571565815659156601566115662156631566415665156661566715668
  1. /*
  2. * Copyright 2015-2021 Arm Limited
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /*
  17. * At your option, you may choose to accept this material under either:
  18. * 1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
  19. * 2. The MIT License, found at <http://opensource.org/licenses/MIT>.
  20. * SPDX-License-Identifier: Apache-2.0 OR MIT.
  21. */
  22. #include "spirv_glsl.hpp"
  23. #include "GLSL.std.450.h"
  24. #include "spirv_common.hpp"
  25. #include <algorithm>
  26. #include <assert.h>
  27. #include <cmath>
  28. #include <limits>
  29. #include <locale.h>
  30. #include <utility>
  31. #ifndef _WIN32
  32. #include <langinfo.h>
  33. #endif
  34. #include <locale.h>
  35. using namespace spv;
  36. using namespace SPIRV_CROSS_NAMESPACE;
  37. using namespace std;
  38. static bool is_unsigned_opcode(Op op)
  39. {
  40. // Don't have to be exhaustive, only relevant for legacy target checking ...
  41. switch (op)
  42. {
  43. case OpShiftRightLogical:
  44. case OpUGreaterThan:
  45. case OpUGreaterThanEqual:
  46. case OpULessThan:
  47. case OpULessThanEqual:
  48. case OpUConvert:
  49. case OpUDiv:
  50. case OpUMod:
  51. case OpUMulExtended:
  52. case OpConvertUToF:
  53. case OpConvertFToU:
  54. return true;
  55. default:
  56. return false;
  57. }
  58. }
  59. static bool is_unsigned_glsl_opcode(GLSLstd450 op)
  60. {
  61. // Don't have to be exhaustive, only relevant for legacy target checking ...
  62. switch (op)
  63. {
  64. case GLSLstd450UClamp:
  65. case GLSLstd450UMin:
  66. case GLSLstd450UMax:
  67. case GLSLstd450FindUMsb:
  68. return true;
  69. default:
  70. return false;
  71. }
  72. }
  73. static bool packing_is_vec4_padded(BufferPackingStandard packing)
  74. {
  75. switch (packing)
  76. {
  77. case BufferPackingHLSLCbuffer:
  78. case BufferPackingHLSLCbufferPackOffset:
  79. case BufferPackingStd140:
  80. case BufferPackingStd140EnhancedLayout:
  81. return true;
  82. default:
  83. return false;
  84. }
  85. }
  86. static bool packing_is_hlsl(BufferPackingStandard packing)
  87. {
  88. switch (packing)
  89. {
  90. case BufferPackingHLSLCbuffer:
  91. case BufferPackingHLSLCbufferPackOffset:
  92. return true;
  93. default:
  94. return false;
  95. }
  96. }
  97. static bool packing_has_flexible_offset(BufferPackingStandard packing)
  98. {
  99. switch (packing)
  100. {
  101. case BufferPackingStd140:
  102. case BufferPackingStd430:
  103. case BufferPackingScalar:
  104. case BufferPackingHLSLCbuffer:
  105. return false;
  106. default:
  107. return true;
  108. }
  109. }
  110. static bool packing_is_scalar(BufferPackingStandard packing)
  111. {
  112. switch (packing)
  113. {
  114. case BufferPackingScalar:
  115. case BufferPackingScalarEnhancedLayout:
  116. return true;
  117. default:
  118. return false;
  119. }
  120. }
  121. static BufferPackingStandard packing_to_substruct_packing(BufferPackingStandard packing)
  122. {
  123. switch (packing)
  124. {
  125. case BufferPackingStd140EnhancedLayout:
  126. return BufferPackingStd140;
  127. case BufferPackingStd430EnhancedLayout:
  128. return BufferPackingStd430;
  129. case BufferPackingHLSLCbufferPackOffset:
  130. return BufferPackingHLSLCbuffer;
  131. case BufferPackingScalarEnhancedLayout:
  132. return BufferPackingScalar;
  133. default:
  134. return packing;
  135. }
  136. }
  137. void CompilerGLSL::init()
  138. {
  139. if (ir.source.known)
  140. {
  141. options.es = ir.source.es;
  142. options.version = ir.source.version;
  143. }
  144. // Query the locale to see what the decimal point is.
  145. // We'll rely on fixing it up ourselves in the rare case we have a comma-as-decimal locale
  146. // rather than setting locales ourselves. Settings locales in a safe and isolated way is rather
  147. // tricky.
  148. #ifdef _WIN32
  149. // On Windows, localeconv uses thread-local storage, so it should be fine.
  150. const struct lconv *conv = localeconv();
  151. if (conv && conv->decimal_point)
  152. current_locale_radix_character = *conv->decimal_point;
  153. #elif defined(__ANDROID__) && __ANDROID_API__ < 26
  154. // nl_langinfo is not supported on this platform, fall back to the worse alternative.
  155. const struct lconv *conv = localeconv();
  156. if (conv && conv->decimal_point)
  157. current_locale_radix_character = *conv->decimal_point;
  158. #else
  159. // localeconv, the portable function is not MT safe ...
  160. const char *decimal_point = nl_langinfo(RADIXCHAR);
  161. if (decimal_point && *decimal_point != '\0')
  162. current_locale_radix_character = *decimal_point;
  163. #endif
  164. }
  165. static const char *to_pls_layout(PlsFormat format)
  166. {
  167. switch (format)
  168. {
  169. case PlsR11FG11FB10F:
  170. return "layout(r11f_g11f_b10f) ";
  171. case PlsR32F:
  172. return "layout(r32f) ";
  173. case PlsRG16F:
  174. return "layout(rg16f) ";
  175. case PlsRGB10A2:
  176. return "layout(rgb10_a2) ";
  177. case PlsRGBA8:
  178. return "layout(rgba8) ";
  179. case PlsRG16:
  180. return "layout(rg16) ";
  181. case PlsRGBA8I:
  182. return "layout(rgba8i)";
  183. case PlsRG16I:
  184. return "layout(rg16i) ";
  185. case PlsRGB10A2UI:
  186. return "layout(rgb10_a2ui) ";
  187. case PlsRGBA8UI:
  188. return "layout(rgba8ui) ";
  189. case PlsRG16UI:
  190. return "layout(rg16ui) ";
  191. case PlsR32UI:
  192. return "layout(r32ui) ";
  193. default:
  194. return "";
  195. }
  196. }
  197. static SPIRType::BaseType pls_format_to_basetype(PlsFormat format)
  198. {
  199. switch (format)
  200. {
  201. default:
  202. case PlsR11FG11FB10F:
  203. case PlsR32F:
  204. case PlsRG16F:
  205. case PlsRGB10A2:
  206. case PlsRGBA8:
  207. case PlsRG16:
  208. return SPIRType::Float;
  209. case PlsRGBA8I:
  210. case PlsRG16I:
  211. return SPIRType::Int;
  212. case PlsRGB10A2UI:
  213. case PlsRGBA8UI:
  214. case PlsRG16UI:
  215. case PlsR32UI:
  216. return SPIRType::UInt;
  217. }
  218. }
  219. static uint32_t pls_format_to_components(PlsFormat format)
  220. {
  221. switch (format)
  222. {
  223. default:
  224. case PlsR32F:
  225. case PlsR32UI:
  226. return 1;
  227. case PlsRG16F:
  228. case PlsRG16:
  229. case PlsRG16UI:
  230. case PlsRG16I:
  231. return 2;
  232. case PlsR11FG11FB10F:
  233. return 3;
  234. case PlsRGB10A2:
  235. case PlsRGBA8:
  236. case PlsRGBA8I:
  237. case PlsRGB10A2UI:
  238. case PlsRGBA8UI:
  239. return 4;
  240. }
  241. }
  242. const char *CompilerGLSL::vector_swizzle(int vecsize, int index)
  243. {
  244. static const char *const swizzle[4][4] = {
  245. { ".x", ".y", ".z", ".w" },
  246. { ".xy", ".yz", ".zw", nullptr },
  247. { ".xyz", ".yzw", nullptr, nullptr },
  248. #if defined(__GNUC__) && (__GNUC__ == 9)
  249. // This works around a GCC 9 bug, see details in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90947.
  250. // This array ends up being compiled as all nullptrs, tripping the assertions below.
  251. { "", nullptr, nullptr, "$" },
  252. #else
  253. { "", nullptr, nullptr, nullptr },
  254. #endif
  255. };
  256. assert(vecsize >= 1 && vecsize <= 4);
  257. assert(index >= 0 && index < 4);
  258. assert(swizzle[vecsize - 1][index]);
  259. return swizzle[vecsize - 1][index];
  260. }
  261. void CompilerGLSL::reset()
  262. {
  263. // We do some speculative optimizations which should pretty much always work out,
  264. // but just in case the SPIR-V is rather weird, recompile until it's happy.
  265. // This typically only means one extra pass.
  266. clear_force_recompile();
  267. // Clear invalid expression tracking.
  268. invalid_expressions.clear();
  269. current_function = nullptr;
  270. // Clear temporary usage tracking.
  271. expression_usage_counts.clear();
  272. forwarded_temporaries.clear();
  273. suppressed_usage_tracking.clear();
  274. // Ensure that we declare phi-variable copies even if the original declaration isn't deferred
  275. flushed_phi_variables.clear();
  276. reset_name_caches();
  277. ir.for_each_typed_id<SPIRFunction>([&](uint32_t, SPIRFunction &func) {
  278. func.active = false;
  279. func.flush_undeclared = true;
  280. });
  281. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { var.dependees.clear(); });
  282. ir.reset_all_of_type<SPIRExpression>();
  283. ir.reset_all_of_type<SPIRAccessChain>();
  284. statement_count = 0;
  285. indent = 0;
  286. current_loop_level = 0;
  287. }
  288. void CompilerGLSL::remap_pls_variables()
  289. {
  290. for (auto &input : pls_inputs)
  291. {
  292. auto &var = get<SPIRVariable>(input.id);
  293. bool input_is_target = false;
  294. if (var.storage == StorageClassUniformConstant)
  295. {
  296. auto &type = get<SPIRType>(var.basetype);
  297. input_is_target = type.image.dim == DimSubpassData;
  298. }
  299. if (var.storage != StorageClassInput && !input_is_target)
  300. SPIRV_CROSS_THROW("Can only use in and target variables for PLS inputs.");
  301. var.remapped_variable = true;
  302. }
  303. for (auto &output : pls_outputs)
  304. {
  305. auto &var = get<SPIRVariable>(output.id);
  306. if (var.storage != StorageClassOutput)
  307. SPIRV_CROSS_THROW("Can only use out variables for PLS outputs.");
  308. var.remapped_variable = true;
  309. }
  310. }
  311. void CompilerGLSL::remap_ext_framebuffer_fetch(uint32_t input_attachment_index, uint32_t color_location)
  312. {
  313. subpass_to_framebuffer_fetch_attachment.push_back({ input_attachment_index, color_location });
  314. inout_color_attachments.insert(color_location);
  315. }
  316. void CompilerGLSL::find_static_extensions()
  317. {
  318. ir.for_each_typed_id<SPIRType>([&](uint32_t, const SPIRType &type) {
  319. if (type.basetype == SPIRType::Double)
  320. {
  321. if (options.es)
  322. SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
  323. if (!options.es && options.version < 400)
  324. require_extension_internal("GL_ARB_gpu_shader_fp64");
  325. }
  326. else if (type.basetype == SPIRType::Int64 || type.basetype == SPIRType::UInt64)
  327. {
  328. if (options.es)
  329. SPIRV_CROSS_THROW("64-bit integers not supported in ES profile.");
  330. if (!options.es)
  331. require_extension_internal("GL_ARB_gpu_shader_int64");
  332. }
  333. else if (type.basetype == SPIRType::Half)
  334. {
  335. require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_float16");
  336. if (options.vulkan_semantics)
  337. require_extension_internal("GL_EXT_shader_16bit_storage");
  338. }
  339. else if (type.basetype == SPIRType::SByte || type.basetype == SPIRType::UByte)
  340. {
  341. require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int8");
  342. if (options.vulkan_semantics)
  343. require_extension_internal("GL_EXT_shader_8bit_storage");
  344. }
  345. else if (type.basetype == SPIRType::Short || type.basetype == SPIRType::UShort)
  346. {
  347. require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int16");
  348. if (options.vulkan_semantics)
  349. require_extension_internal("GL_EXT_shader_16bit_storage");
  350. }
  351. });
  352. auto &execution = get_entry_point();
  353. switch (execution.model)
  354. {
  355. case ExecutionModelGLCompute:
  356. if (!options.es && options.version < 430)
  357. require_extension_internal("GL_ARB_compute_shader");
  358. if (options.es && options.version < 310)
  359. SPIRV_CROSS_THROW("At least ESSL 3.10 required for compute shaders.");
  360. break;
  361. case ExecutionModelGeometry:
  362. if (options.es && options.version < 320)
  363. require_extension_internal("GL_EXT_geometry_shader");
  364. if (!options.es && options.version < 150)
  365. require_extension_internal("GL_ARB_geometry_shader4");
  366. if (execution.flags.get(ExecutionModeInvocations) && execution.invocations != 1)
  367. {
  368. // Instanced GS is part of 400 core or this extension.
  369. if (!options.es && options.version < 400)
  370. require_extension_internal("GL_ARB_gpu_shader5");
  371. }
  372. break;
  373. case ExecutionModelTessellationEvaluation:
  374. case ExecutionModelTessellationControl:
  375. if (options.es && options.version < 320)
  376. require_extension_internal("GL_EXT_tessellation_shader");
  377. if (!options.es && options.version < 400)
  378. require_extension_internal("GL_ARB_tessellation_shader");
  379. break;
  380. case ExecutionModelRayGenerationKHR:
  381. case ExecutionModelIntersectionKHR:
  382. case ExecutionModelAnyHitKHR:
  383. case ExecutionModelClosestHitKHR:
  384. case ExecutionModelMissKHR:
  385. case ExecutionModelCallableKHR:
  386. // NV enums are aliases.
  387. if (options.es || options.version < 460)
  388. SPIRV_CROSS_THROW("Ray tracing shaders require non-es profile with version 460 or above.");
  389. if (!options.vulkan_semantics)
  390. SPIRV_CROSS_THROW("Ray tracing requires Vulkan semantics.");
  391. // Need to figure out if we should target KHR or NV extension based on capabilities.
  392. for (auto &cap : ir.declared_capabilities)
  393. {
  394. if (cap == CapabilityRayTracingKHR || cap == CapabilityRayQueryKHR)
  395. {
  396. ray_tracing_is_khr = true;
  397. break;
  398. }
  399. }
  400. if (ray_tracing_is_khr)
  401. {
  402. // In KHR ray tracing we pass payloads by pointer instead of location,
  403. // so make sure we assign locations properly.
  404. ray_tracing_khr_fixup_locations();
  405. require_extension_internal("GL_EXT_ray_tracing");
  406. }
  407. else
  408. require_extension_internal("GL_NV_ray_tracing");
  409. break;
  410. default:
  411. break;
  412. }
  413. if (!pls_inputs.empty() || !pls_outputs.empty())
  414. {
  415. if (execution.model != ExecutionModelFragment)
  416. SPIRV_CROSS_THROW("Can only use GL_EXT_shader_pixel_local_storage in fragment shaders.");
  417. require_extension_internal("GL_EXT_shader_pixel_local_storage");
  418. }
  419. if (!inout_color_attachments.empty())
  420. {
  421. if (execution.model != ExecutionModelFragment)
  422. SPIRV_CROSS_THROW("Can only use GL_EXT_shader_framebuffer_fetch in fragment shaders.");
  423. if (options.vulkan_semantics)
  424. SPIRV_CROSS_THROW("Cannot use EXT_shader_framebuffer_fetch in Vulkan GLSL.");
  425. require_extension_internal("GL_EXT_shader_framebuffer_fetch");
  426. }
  427. if (options.separate_shader_objects && !options.es && options.version < 410)
  428. require_extension_internal("GL_ARB_separate_shader_objects");
  429. if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
  430. {
  431. if (!options.vulkan_semantics)
  432. SPIRV_CROSS_THROW("GL_EXT_buffer_reference is only supported in Vulkan GLSL.");
  433. if (options.es && options.version < 320)
  434. SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires ESSL 320.");
  435. else if (!options.es && options.version < 450)
  436. SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires GLSL 450.");
  437. require_extension_internal("GL_EXT_buffer_reference");
  438. }
  439. else if (ir.addressing_model != AddressingModelLogical)
  440. {
  441. SPIRV_CROSS_THROW("Only Logical and PhysicalStorageBuffer64EXT addressing models are supported.");
  442. }
  443. // Check for nonuniform qualifier and passthrough.
  444. // Instead of looping over all decorations to find this, just look at capabilities.
  445. for (auto &cap : ir.declared_capabilities)
  446. {
  447. switch (cap)
  448. {
  449. case CapabilityShaderNonUniformEXT:
  450. if (!options.vulkan_semantics)
  451. require_extension_internal("GL_NV_gpu_shader5");
  452. else
  453. require_extension_internal("GL_EXT_nonuniform_qualifier");
  454. break;
  455. case CapabilityRuntimeDescriptorArrayEXT:
  456. if (!options.vulkan_semantics)
  457. SPIRV_CROSS_THROW("GL_EXT_nonuniform_qualifier is only supported in Vulkan GLSL.");
  458. require_extension_internal("GL_EXT_nonuniform_qualifier");
  459. break;
  460. case CapabilityGeometryShaderPassthroughNV:
  461. if (execution.model == ExecutionModelGeometry)
  462. {
  463. require_extension_internal("GL_NV_geometry_shader_passthrough");
  464. execution.geometry_passthrough = true;
  465. }
  466. break;
  467. case CapabilityVariablePointers:
  468. case CapabilityVariablePointersStorageBuffer:
  469. SPIRV_CROSS_THROW("VariablePointers capability is not supported in GLSL.");
  470. default:
  471. break;
  472. }
  473. }
  474. }
  475. void CompilerGLSL::ray_tracing_khr_fixup_locations()
  476. {
  477. uint32_t location = 0;
  478. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  479. // Incoming payload storage can also be used for tracing.
  480. if (var.storage != StorageClassRayPayloadKHR && var.storage != StorageClassCallableDataKHR &&
  481. var.storage != StorageClassIncomingRayPayloadKHR && var.storage != StorageClassIncomingCallableDataKHR)
  482. return;
  483. if (is_hidden_variable(var))
  484. return;
  485. set_decoration(var.self, DecorationLocation, location++);
  486. });
  487. }
  488. string CompilerGLSL::compile()
  489. {
  490. ir.fixup_reserved_names();
  491. if (options.vulkan_semantics)
  492. backend.allow_precision_qualifiers = true;
  493. else
  494. {
  495. // only NV_gpu_shader5 supports divergent indexing on OpenGL, and it does so without extra qualifiers
  496. backend.nonuniform_qualifier = "";
  497. backend.needs_row_major_load_workaround = true;
  498. }
  499. backend.force_gl_in_out_block = true;
  500. backend.supports_extensions = true;
  501. backend.use_array_constructor = true;
  502. if (is_legacy_es())
  503. backend.support_case_fallthrough = false;
  504. // Scan the SPIR-V to find trivial uses of extensions.
  505. fixup_type_alias();
  506. reorder_type_alias();
  507. build_function_control_flow_graphs_and_analyze();
  508. find_static_extensions();
  509. fixup_image_load_store_access();
  510. update_active_builtins();
  511. analyze_image_and_sampler_usage();
  512. analyze_interlocked_resource_usage();
  513. if (!inout_color_attachments.empty())
  514. emit_inout_fragment_outputs_copy_to_subpass_inputs();
  515. // Shaders might cast unrelated data to pointers of non-block types.
  516. // Find all such instances and make sure we can cast the pointers to a synthesized block type.
  517. if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
  518. analyze_non_block_pointer_types();
  519. uint32_t pass_count = 0;
  520. do
  521. {
  522. if (pass_count >= 3)
  523. SPIRV_CROSS_THROW("Over 3 compilation loops detected. Must be a bug!");
  524. reset();
  525. buffer.reset();
  526. emit_header();
  527. emit_resources();
  528. emit_extension_workarounds(get_execution_model());
  529. emit_function(get<SPIRFunction>(ir.default_entry_point), Bitset());
  530. pass_count++;
  531. } while (is_forcing_recompilation());
  532. // Implement the interlocked wrapper function at the end.
  533. // The body was implemented in lieu of main().
  534. if (interlocked_is_complex)
  535. {
  536. statement("void main()");
  537. begin_scope();
  538. statement("// Interlocks were used in a way not compatible with GLSL, this is very slow.");
  539. if (options.es)
  540. statement("beginInvocationInterlockNV();");
  541. else
  542. statement("beginInvocationInterlockARB();");
  543. statement("spvMainInterlockedBody();");
  544. if (options.es)
  545. statement("endInvocationInterlockNV();");
  546. else
  547. statement("endInvocationInterlockARB();");
  548. end_scope();
  549. }
  550. // Entry point in GLSL is always main().
  551. get_entry_point().name = "main";
  552. return buffer.str();
  553. }
  554. std::string CompilerGLSL::get_partial_source()
  555. {
  556. return buffer.str();
  557. }
  558. void CompilerGLSL::build_workgroup_size(SmallVector<string> &arguments, const SpecializationConstant &wg_x,
  559. const SpecializationConstant &wg_y, const SpecializationConstant &wg_z)
  560. {
  561. auto &execution = get_entry_point();
  562. if (wg_x.id)
  563. {
  564. if (options.vulkan_semantics)
  565. arguments.push_back(join("local_size_x_id = ", wg_x.constant_id));
  566. else
  567. arguments.push_back(join("local_size_x = ", get<SPIRConstant>(wg_x.id).specialization_constant_macro_name));
  568. }
  569. else
  570. arguments.push_back(join("local_size_x = ", execution.workgroup_size.x));
  571. if (wg_y.id)
  572. {
  573. if (options.vulkan_semantics)
  574. arguments.push_back(join("local_size_y_id = ", wg_y.constant_id));
  575. else
  576. arguments.push_back(join("local_size_y = ", get<SPIRConstant>(wg_y.id).specialization_constant_macro_name));
  577. }
  578. else
  579. arguments.push_back(join("local_size_y = ", execution.workgroup_size.y));
  580. if (wg_z.id)
  581. {
  582. if (options.vulkan_semantics)
  583. arguments.push_back(join("local_size_z_id = ", wg_z.constant_id));
  584. else
  585. arguments.push_back(join("local_size_z = ", get<SPIRConstant>(wg_z.id).specialization_constant_macro_name));
  586. }
  587. else
  588. arguments.push_back(join("local_size_z = ", execution.workgroup_size.z));
  589. }
  590. void CompilerGLSL::request_subgroup_feature(ShaderSubgroupSupportHelper::Feature feature)
  591. {
  592. if (options.vulkan_semantics)
  593. {
  594. auto khr_extension = ShaderSubgroupSupportHelper::get_KHR_extension_for_feature(feature);
  595. require_extension_internal(ShaderSubgroupSupportHelper::get_extension_name(khr_extension));
  596. }
  597. else
  598. {
  599. if (!shader_subgroup_supporter.is_feature_requested(feature))
  600. force_recompile();
  601. shader_subgroup_supporter.request_feature(feature);
  602. }
  603. }
  604. void CompilerGLSL::emit_header()
  605. {
  606. auto &execution = get_entry_point();
  607. statement("#version ", options.version, options.es && options.version > 100 ? " es" : "");
  608. if (!options.es && options.version < 420)
  609. {
  610. // Needed for binding = # on UBOs, etc.
  611. if (options.enable_420pack_extension)
  612. {
  613. statement("#ifdef GL_ARB_shading_language_420pack");
  614. statement("#extension GL_ARB_shading_language_420pack : require");
  615. statement("#endif");
  616. }
  617. // Needed for: layout(early_fragment_tests) in;
  618. if (execution.flags.get(ExecutionModeEarlyFragmentTests))
  619. require_extension_internal("GL_ARB_shader_image_load_store");
  620. }
  621. // Needed for: layout(post_depth_coverage) in;
  622. if (execution.flags.get(ExecutionModePostDepthCoverage))
  623. require_extension_internal("GL_ARB_post_depth_coverage");
  624. // Needed for: layout({pixel,sample}_interlock_[un]ordered) in;
  625. if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT) ||
  626. execution.flags.get(ExecutionModePixelInterlockUnorderedEXT) ||
  627. execution.flags.get(ExecutionModeSampleInterlockOrderedEXT) ||
  628. execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT))
  629. {
  630. if (options.es)
  631. {
  632. if (options.version < 310)
  633. SPIRV_CROSS_THROW("At least ESSL 3.10 required for fragment shader interlock.");
  634. require_extension_internal("GL_NV_fragment_shader_interlock");
  635. }
  636. else
  637. {
  638. if (options.version < 420)
  639. require_extension_internal("GL_ARB_shader_image_load_store");
  640. require_extension_internal("GL_ARB_fragment_shader_interlock");
  641. }
  642. }
  643. for (auto &ext : forced_extensions)
  644. {
  645. if (ext == "GL_EXT_shader_explicit_arithmetic_types_float16")
  646. {
  647. // Special case, this extension has a potential fallback to another vendor extension in normal GLSL.
  648. // GL_AMD_gpu_shader_half_float is a superset, so try that first.
  649. statement("#if defined(GL_AMD_gpu_shader_half_float)");
  650. statement("#extension GL_AMD_gpu_shader_half_float : require");
  651. if (!options.vulkan_semantics)
  652. {
  653. statement("#elif defined(GL_NV_gpu_shader5)");
  654. statement("#extension GL_NV_gpu_shader5 : require");
  655. }
  656. else
  657. {
  658. statement("#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)");
  659. statement("#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require");
  660. }
  661. statement("#else");
  662. statement("#error No extension available for FP16.");
  663. statement("#endif");
  664. }
  665. else if (ext == "GL_EXT_shader_explicit_arithmetic_types_int16")
  666. {
  667. if (options.vulkan_semantics)
  668. statement("#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require");
  669. else
  670. {
  671. statement("#if defined(GL_AMD_gpu_shader_int16)");
  672. statement("#extension GL_AMD_gpu_shader_int16 : require");
  673. statement("#else");
  674. statement("#error No extension available for Int16.");
  675. statement("#endif");
  676. }
  677. }
  678. else if (ext == "GL_ARB_post_depth_coverage")
  679. {
  680. if (options.es)
  681. statement("#extension GL_EXT_post_depth_coverage : require");
  682. else
  683. {
  684. statement("#if defined(GL_ARB_post_depth_coverge)");
  685. statement("#extension GL_ARB_post_depth_coverage : require");
  686. statement("#else");
  687. statement("#extension GL_EXT_post_depth_coverage : require");
  688. statement("#endif");
  689. }
  690. }
  691. else if (!options.vulkan_semantics && ext == "GL_ARB_shader_draw_parameters")
  692. {
  693. // Soft-enable this extension on plain GLSL.
  694. statement("#ifdef ", ext);
  695. statement("#extension ", ext, " : enable");
  696. statement("#endif");
  697. }
  698. else
  699. statement("#extension ", ext, " : require");
  700. }
  701. if (!options.vulkan_semantics)
  702. {
  703. using Supp = ShaderSubgroupSupportHelper;
  704. auto result = shader_subgroup_supporter.resolve();
  705. for (uint32_t feature_index = 0; feature_index < Supp::FeatureCount; feature_index++)
  706. {
  707. auto feature = static_cast<Supp::Feature>(feature_index);
  708. if (!shader_subgroup_supporter.is_feature_requested(feature))
  709. continue;
  710. auto exts = Supp::get_candidates_for_feature(feature, result);
  711. if (exts.empty())
  712. continue;
  713. statement("");
  714. for (auto &ext : exts)
  715. {
  716. const char *name = Supp::get_extension_name(ext);
  717. const char *extra_predicate = Supp::get_extra_required_extension_predicate(ext);
  718. auto extra_names = Supp::get_extra_required_extension_names(ext);
  719. statement(&ext != &exts.front() ? "#elif" : "#if", " defined(", name, ")",
  720. (*extra_predicate != '\0' ? " && " : ""), extra_predicate);
  721. for (const auto &e : extra_names)
  722. statement("#extension ", e, " : enable");
  723. statement("#extension ", name, " : require");
  724. }
  725. if (!Supp::can_feature_be_implemented_without_extensions(feature))
  726. {
  727. statement("#else");
  728. statement("#error No extensions available to emulate requested subgroup feature.");
  729. }
  730. statement("#endif");
  731. }
  732. }
  733. for (auto &header : header_lines)
  734. statement(header);
  735. SmallVector<string> inputs;
  736. SmallVector<string> outputs;
  737. switch (execution.model)
  738. {
  739. case ExecutionModelGeometry:
  740. if ((execution.flags.get(ExecutionModeInvocations)) && execution.invocations != 1)
  741. inputs.push_back(join("invocations = ", execution.invocations));
  742. if (execution.flags.get(ExecutionModeInputPoints))
  743. inputs.push_back("points");
  744. if (execution.flags.get(ExecutionModeInputLines))
  745. inputs.push_back("lines");
  746. if (execution.flags.get(ExecutionModeInputLinesAdjacency))
  747. inputs.push_back("lines_adjacency");
  748. if (execution.flags.get(ExecutionModeTriangles))
  749. inputs.push_back("triangles");
  750. if (execution.flags.get(ExecutionModeInputTrianglesAdjacency))
  751. inputs.push_back("triangles_adjacency");
  752. if (!execution.geometry_passthrough)
  753. {
  754. // For passthrough, these are implies and cannot be declared in shader.
  755. outputs.push_back(join("max_vertices = ", execution.output_vertices));
  756. if (execution.flags.get(ExecutionModeOutputTriangleStrip))
  757. outputs.push_back("triangle_strip");
  758. if (execution.flags.get(ExecutionModeOutputPoints))
  759. outputs.push_back("points");
  760. if (execution.flags.get(ExecutionModeOutputLineStrip))
  761. outputs.push_back("line_strip");
  762. }
  763. break;
  764. case ExecutionModelTessellationControl:
  765. if (execution.flags.get(ExecutionModeOutputVertices))
  766. outputs.push_back(join("vertices = ", execution.output_vertices));
  767. break;
  768. case ExecutionModelTessellationEvaluation:
  769. if (execution.flags.get(ExecutionModeQuads))
  770. inputs.push_back("quads");
  771. if (execution.flags.get(ExecutionModeTriangles))
  772. inputs.push_back("triangles");
  773. if (execution.flags.get(ExecutionModeIsolines))
  774. inputs.push_back("isolines");
  775. if (execution.flags.get(ExecutionModePointMode))
  776. inputs.push_back("point_mode");
  777. if (!execution.flags.get(ExecutionModeIsolines))
  778. {
  779. if (execution.flags.get(ExecutionModeVertexOrderCw))
  780. inputs.push_back("cw");
  781. if (execution.flags.get(ExecutionModeVertexOrderCcw))
  782. inputs.push_back("ccw");
  783. }
  784. if (execution.flags.get(ExecutionModeSpacingFractionalEven))
  785. inputs.push_back("fractional_even_spacing");
  786. if (execution.flags.get(ExecutionModeSpacingFractionalOdd))
  787. inputs.push_back("fractional_odd_spacing");
  788. if (execution.flags.get(ExecutionModeSpacingEqual))
  789. inputs.push_back("equal_spacing");
  790. break;
  791. case ExecutionModelGLCompute:
  792. {
  793. if (execution.workgroup_size.constant != 0)
  794. {
  795. SpecializationConstant wg_x, wg_y, wg_z;
  796. get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
  797. // If there are any spec constants on legacy GLSL, defer declaration, we need to set up macro
  798. // declarations before we can emit the work group size.
  799. if (options.vulkan_semantics ||
  800. ((wg_x.id == ConstantID(0)) && (wg_y.id == ConstantID(0)) && (wg_z.id == ConstantID(0))))
  801. build_workgroup_size(inputs, wg_x, wg_y, wg_z);
  802. }
  803. else
  804. {
  805. inputs.push_back(join("local_size_x = ", execution.workgroup_size.x));
  806. inputs.push_back(join("local_size_y = ", execution.workgroup_size.y));
  807. inputs.push_back(join("local_size_z = ", execution.workgroup_size.z));
  808. }
  809. break;
  810. }
  811. case ExecutionModelFragment:
  812. if (options.es)
  813. {
  814. switch (options.fragment.default_float_precision)
  815. {
  816. case Options::Lowp:
  817. statement("precision lowp float;");
  818. break;
  819. case Options::Mediump:
  820. statement("precision mediump float;");
  821. break;
  822. case Options::Highp:
  823. statement("precision highp float;");
  824. break;
  825. default:
  826. break;
  827. }
  828. switch (options.fragment.default_int_precision)
  829. {
  830. case Options::Lowp:
  831. statement("precision lowp int;");
  832. break;
  833. case Options::Mediump:
  834. statement("precision mediump int;");
  835. break;
  836. case Options::Highp:
  837. statement("precision highp int;");
  838. break;
  839. default:
  840. break;
  841. }
  842. }
  843. if (execution.flags.get(ExecutionModeEarlyFragmentTests))
  844. inputs.push_back("early_fragment_tests");
  845. if (execution.flags.get(ExecutionModePostDepthCoverage))
  846. inputs.push_back("post_depth_coverage");
  847. if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT))
  848. inputs.push_back("pixel_interlock_ordered");
  849. else if (execution.flags.get(ExecutionModePixelInterlockUnorderedEXT))
  850. inputs.push_back("pixel_interlock_unordered");
  851. else if (execution.flags.get(ExecutionModeSampleInterlockOrderedEXT))
  852. inputs.push_back("sample_interlock_ordered");
  853. else if (execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT))
  854. inputs.push_back("sample_interlock_unordered");
  855. if (!options.es && execution.flags.get(ExecutionModeDepthGreater))
  856. statement("layout(depth_greater) out float gl_FragDepth;");
  857. else if (!options.es && execution.flags.get(ExecutionModeDepthLess))
  858. statement("layout(depth_less) out float gl_FragDepth;");
  859. break;
  860. default:
  861. break;
  862. }
  863. if (!inputs.empty())
  864. statement("layout(", merge(inputs), ") in;");
  865. if (!outputs.empty())
  866. statement("layout(", merge(outputs), ") out;");
  867. statement("");
  868. }
  869. bool CompilerGLSL::type_is_empty(const SPIRType &type)
  870. {
  871. return type.basetype == SPIRType::Struct && type.member_types.empty();
  872. }
  873. void CompilerGLSL::emit_struct(SPIRType &type)
  874. {
  875. // Struct types can be stamped out multiple times
  876. // with just different offsets, matrix layouts, etc ...
  877. // Type-punning with these types is legal, which complicates things
  878. // when we are storing struct and array types in an SSBO for example.
  879. // If the type master is packed however, we can no longer assume that the struct declaration will be redundant.
  880. if (type.type_alias != TypeID(0) &&
  881. !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
  882. return;
  883. add_resource_name(type.self);
  884. auto name = type_to_glsl(type);
  885. statement(!backend.explicit_struct_type ? "struct " : "", name);
  886. begin_scope();
  887. type.member_name_cache.clear();
  888. uint32_t i = 0;
  889. bool emitted = false;
  890. for (auto &member : type.member_types)
  891. {
  892. add_member_name(type, i);
  893. emit_struct_member(type, member, i);
  894. i++;
  895. emitted = true;
  896. }
  897. // Don't declare empty structs in GLSL, this is not allowed.
  898. if (type_is_empty(type) && !backend.supports_empty_struct)
  899. {
  900. statement("int empty_struct_member;");
  901. emitted = true;
  902. }
  903. if (has_extended_decoration(type.self, SPIRVCrossDecorationPaddingTarget))
  904. emit_struct_padding_target(type);
  905. end_scope_decl();
  906. if (emitted)
  907. statement("");
  908. }
  909. string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags)
  910. {
  911. string res;
  912. //if (flags & (1ull << DecorationSmooth))
  913. // res += "smooth ";
  914. if (flags.get(DecorationFlat))
  915. res += "flat ";
  916. if (flags.get(DecorationNoPerspective))
  917. res += "noperspective ";
  918. if (flags.get(DecorationCentroid))
  919. res += "centroid ";
  920. if (flags.get(DecorationPatch))
  921. res += "patch ";
  922. if (flags.get(DecorationSample))
  923. res += "sample ";
  924. if (flags.get(DecorationInvariant))
  925. res += "invariant ";
  926. if (flags.get(DecorationExplicitInterpAMD))
  927. res += "__explicitInterpAMD ";
  928. return res;
  929. }
  930. string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index)
  931. {
  932. if (is_legacy())
  933. return "";
  934. bool is_block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
  935. ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
  936. if (!is_block)
  937. return "";
  938. auto &memb = ir.meta[type.self].members;
  939. if (index >= memb.size())
  940. return "";
  941. auto &dec = memb[index];
  942. SmallVector<string> attr;
  943. if (has_member_decoration(type.self, index, DecorationPassthroughNV))
  944. attr.push_back("passthrough");
  945. // We can only apply layouts on members in block interfaces.
  946. // This is a bit problematic because in SPIR-V decorations are applied on the struct types directly.
  947. // This is not supported on GLSL, so we have to make the assumption that if a struct within our buffer block struct
  948. // has a decoration, it was originally caused by a top-level layout() qualifier in GLSL.
  949. //
  950. // We would like to go from (SPIR-V style):
  951. //
  952. // struct Foo { layout(row_major) mat4 matrix; };
  953. // buffer UBO { Foo foo; };
  954. //
  955. // to
  956. //
  957. // struct Foo { mat4 matrix; }; // GLSL doesn't support any layout shenanigans in raw struct declarations.
  958. // buffer UBO { layout(row_major) Foo foo; }; // Apply the layout on top-level.
  959. auto flags = combined_decoration_for_member(type, index);
  960. if (flags.get(DecorationRowMajor))
  961. attr.push_back("row_major");
  962. // We don't emit any global layouts, so column_major is default.
  963. //if (flags & (1ull << DecorationColMajor))
  964. // attr.push_back("column_major");
  965. if (dec.decoration_flags.get(DecorationLocation) && can_use_io_location(type.storage, true))
  966. attr.push_back(join("location = ", dec.location));
  967. // Can only declare component if we can declare location.
  968. if (dec.decoration_flags.get(DecorationComponent) && can_use_io_location(type.storage, true))
  969. {
  970. if (!options.es)
  971. {
  972. if (options.version < 440 && options.version >= 140)
  973. require_extension_internal("GL_ARB_enhanced_layouts");
  974. else if (options.version < 140)
  975. SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40.");
  976. attr.push_back(join("component = ", dec.component));
  977. }
  978. else
  979. SPIRV_CROSS_THROW("Component decoration is not supported in ES targets.");
  980. }
  981. // SPIRVCrossDecorationPacked is set by layout_for_variable earlier to mark that we need to emit offset qualifiers.
  982. // This is only done selectively in GLSL as needed.
  983. if (has_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset) &&
  984. dec.decoration_flags.get(DecorationOffset))
  985. attr.push_back(join("offset = ", dec.offset));
  986. else if (type.storage == StorageClassOutput && dec.decoration_flags.get(DecorationOffset))
  987. attr.push_back(join("xfb_offset = ", dec.offset));
  988. if (attr.empty())
  989. return "";
  990. string res = "layout(";
  991. res += merge(attr);
  992. res += ") ";
  993. return res;
  994. }
  995. const char *CompilerGLSL::format_to_glsl(spv::ImageFormat format)
  996. {
  997. if (options.es && is_desktop_only_format(format))
  998. SPIRV_CROSS_THROW("Attempting to use image format not supported in ES profile.");
  999. switch (format)
  1000. {
  1001. case ImageFormatRgba32f:
  1002. return "rgba32f";
  1003. case ImageFormatRgba16f:
  1004. return "rgba16f";
  1005. case ImageFormatR32f:
  1006. return "r32f";
  1007. case ImageFormatRgba8:
  1008. return "rgba8";
  1009. case ImageFormatRgba8Snorm:
  1010. return "rgba8_snorm";
  1011. case ImageFormatRg32f:
  1012. return "rg32f";
  1013. case ImageFormatRg16f:
  1014. return "rg16f";
  1015. case ImageFormatRgba32i:
  1016. return "rgba32i";
  1017. case ImageFormatRgba16i:
  1018. return "rgba16i";
  1019. case ImageFormatR32i:
  1020. return "r32i";
  1021. case ImageFormatRgba8i:
  1022. return "rgba8i";
  1023. case ImageFormatRg32i:
  1024. return "rg32i";
  1025. case ImageFormatRg16i:
  1026. return "rg16i";
  1027. case ImageFormatRgba32ui:
  1028. return "rgba32ui";
  1029. case ImageFormatRgba16ui:
  1030. return "rgba16ui";
  1031. case ImageFormatR32ui:
  1032. return "r32ui";
  1033. case ImageFormatRgba8ui:
  1034. return "rgba8ui";
  1035. case ImageFormatRg32ui:
  1036. return "rg32ui";
  1037. case ImageFormatRg16ui:
  1038. return "rg16ui";
  1039. case ImageFormatR11fG11fB10f:
  1040. return "r11f_g11f_b10f";
  1041. case ImageFormatR16f:
  1042. return "r16f";
  1043. case ImageFormatRgb10A2:
  1044. return "rgb10_a2";
  1045. case ImageFormatR8:
  1046. return "r8";
  1047. case ImageFormatRg8:
  1048. return "rg8";
  1049. case ImageFormatR16:
  1050. return "r16";
  1051. case ImageFormatRg16:
  1052. return "rg16";
  1053. case ImageFormatRgba16:
  1054. return "rgba16";
  1055. case ImageFormatR16Snorm:
  1056. return "r16_snorm";
  1057. case ImageFormatRg16Snorm:
  1058. return "rg16_snorm";
  1059. case ImageFormatRgba16Snorm:
  1060. return "rgba16_snorm";
  1061. case ImageFormatR8Snorm:
  1062. return "r8_snorm";
  1063. case ImageFormatRg8Snorm:
  1064. return "rg8_snorm";
  1065. case ImageFormatR8ui:
  1066. return "r8ui";
  1067. case ImageFormatRg8ui:
  1068. return "rg8ui";
  1069. case ImageFormatR16ui:
  1070. return "r16ui";
  1071. case ImageFormatRgb10a2ui:
  1072. return "rgb10_a2ui";
  1073. case ImageFormatR8i:
  1074. return "r8i";
  1075. case ImageFormatRg8i:
  1076. return "rg8i";
  1077. case ImageFormatR16i:
  1078. return "r16i";
  1079. default:
  1080. case ImageFormatUnknown:
  1081. return nullptr;
  1082. }
  1083. }
  1084. uint32_t CompilerGLSL::type_to_packed_base_size(const SPIRType &type, BufferPackingStandard)
  1085. {
  1086. switch (type.basetype)
  1087. {
  1088. case SPIRType::Double:
  1089. case SPIRType::Int64:
  1090. case SPIRType::UInt64:
  1091. return 8;
  1092. case SPIRType::Float:
  1093. case SPIRType::Int:
  1094. case SPIRType::UInt:
  1095. return 4;
  1096. case SPIRType::Half:
  1097. case SPIRType::Short:
  1098. case SPIRType::UShort:
  1099. return 2;
  1100. case SPIRType::SByte:
  1101. case SPIRType::UByte:
  1102. return 1;
  1103. default:
  1104. SPIRV_CROSS_THROW("Unrecognized type in type_to_packed_base_size.");
  1105. }
  1106. }
  1107. uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, const Bitset &flags,
  1108. BufferPackingStandard packing)
  1109. {
  1110. // If using PhysicalStorageBufferEXT storage class, this is a pointer,
  1111. // and is 64-bit.
  1112. if (type.storage == StorageClassPhysicalStorageBufferEXT)
  1113. {
  1114. if (!type.pointer)
  1115. SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers.");
  1116. if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
  1117. {
  1118. if (packing_is_vec4_padded(packing) && type_is_array_of_pointers(type))
  1119. return 16;
  1120. else
  1121. return 8;
  1122. }
  1123. else
  1124. SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT.");
  1125. }
  1126. if (!type.array.empty())
  1127. {
  1128. uint32_t minimum_alignment = 1;
  1129. if (packing_is_vec4_padded(packing))
  1130. minimum_alignment = 16;
  1131. auto *tmp = &get<SPIRType>(type.parent_type);
  1132. while (!tmp->array.empty())
  1133. tmp = &get<SPIRType>(tmp->parent_type);
  1134. // Get the alignment of the base type, then maybe round up.
  1135. return max(minimum_alignment, type_to_packed_alignment(*tmp, flags, packing));
  1136. }
  1137. if (type.basetype == SPIRType::Struct)
  1138. {
  1139. // Rule 9. Structs alignments are maximum alignment of its members.
  1140. uint32_t alignment = 1;
  1141. for (uint32_t i = 0; i < type.member_types.size(); i++)
  1142. {
  1143. auto member_flags = ir.meta[type.self].members[i].decoration_flags;
  1144. alignment =
  1145. max(alignment, type_to_packed_alignment(get<SPIRType>(type.member_types[i]), member_flags, packing));
  1146. }
  1147. // In std140, struct alignment is rounded up to 16.
  1148. if (packing_is_vec4_padded(packing))
  1149. alignment = max(alignment, 16u);
  1150. return alignment;
  1151. }
  1152. else
  1153. {
  1154. const uint32_t base_alignment = type_to_packed_base_size(type, packing);
  1155. // Alignment requirement for scalar block layout is always the alignment for the most basic component.
  1156. if (packing_is_scalar(packing))
  1157. return base_alignment;
  1158. // Vectors are *not* aligned in HLSL, but there's an extra rule where vectors cannot straddle
  1159. // a vec4, this is handled outside since that part knows our current offset.
  1160. if (type.columns == 1 && packing_is_hlsl(packing))
  1161. return base_alignment;
  1162. // From 7.6.2.2 in GL 4.5 core spec.
  1163. // Rule 1
  1164. if (type.vecsize == 1 && type.columns == 1)
  1165. return base_alignment;
  1166. // Rule 2
  1167. if ((type.vecsize == 2 || type.vecsize == 4) && type.columns == 1)
  1168. return type.vecsize * base_alignment;
  1169. // Rule 3
  1170. if (type.vecsize == 3 && type.columns == 1)
  1171. return 4 * base_alignment;
  1172. // Rule 4 implied. Alignment does not change in std430.
  1173. // Rule 5. Column-major matrices are stored as arrays of
  1174. // vectors.
  1175. if (flags.get(DecorationColMajor) && type.columns > 1)
  1176. {
  1177. if (packing_is_vec4_padded(packing))
  1178. return 4 * base_alignment;
  1179. else if (type.vecsize == 3)
  1180. return 4 * base_alignment;
  1181. else
  1182. return type.vecsize * base_alignment;
  1183. }
  1184. // Rule 6 implied.
  1185. // Rule 7.
  1186. if (flags.get(DecorationRowMajor) && type.vecsize > 1)
  1187. {
  1188. if (packing_is_vec4_padded(packing))
  1189. return 4 * base_alignment;
  1190. else if (type.columns == 3)
  1191. return 4 * base_alignment;
  1192. else
  1193. return type.columns * base_alignment;
  1194. }
  1195. // Rule 8 implied.
  1196. }
  1197. SPIRV_CROSS_THROW("Did not find suitable rule for type. Bogus decorations?");
  1198. }
  1199. uint32_t CompilerGLSL::type_to_packed_array_stride(const SPIRType &type, const Bitset &flags,
  1200. BufferPackingStandard packing)
  1201. {
  1202. // Array stride is equal to aligned size of the underlying type.
  1203. uint32_t parent = type.parent_type;
  1204. assert(parent);
  1205. auto &tmp = get<SPIRType>(parent);
  1206. uint32_t size = type_to_packed_size(tmp, flags, packing);
  1207. uint32_t alignment = type_to_packed_alignment(type, flags, packing);
  1208. return (size + alignment - 1) & ~(alignment - 1);
  1209. }
  1210. uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing)
  1211. {
  1212. if (!type.array.empty())
  1213. {
  1214. uint32_t packed_size = to_array_size_literal(type) * type_to_packed_array_stride(type, flags, packing);
  1215. // For arrays of vectors and matrices in HLSL, the last element has a size which depends on its vector size,
  1216. // so that it is possible to pack other vectors into the last element.
  1217. if (packing_is_hlsl(packing) && type.basetype != SPIRType::Struct)
  1218. packed_size -= (4 - type.vecsize) * (type.width / 8);
  1219. return packed_size;
  1220. }
  1221. // If using PhysicalStorageBufferEXT storage class, this is a pointer,
  1222. // and is 64-bit.
  1223. if (type.storage == StorageClassPhysicalStorageBufferEXT)
  1224. {
  1225. if (!type.pointer)
  1226. SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers.");
  1227. if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
  1228. return 8;
  1229. else
  1230. SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT.");
  1231. }
  1232. uint32_t size = 0;
  1233. if (type.basetype == SPIRType::Struct)
  1234. {
  1235. uint32_t pad_alignment = 1;
  1236. for (uint32_t i = 0; i < type.member_types.size(); i++)
  1237. {
  1238. auto member_flags = ir.meta[type.self].members[i].decoration_flags;
  1239. auto &member_type = get<SPIRType>(type.member_types[i]);
  1240. uint32_t packed_alignment = type_to_packed_alignment(member_type, member_flags, packing);
  1241. uint32_t alignment = max(packed_alignment, pad_alignment);
  1242. // The next member following a struct member is aligned to the base alignment of the struct that came before.
  1243. // GL 4.5 spec, 7.6.2.2.
  1244. if (member_type.basetype == SPIRType::Struct)
  1245. pad_alignment = packed_alignment;
  1246. else
  1247. pad_alignment = 1;
  1248. size = (size + alignment - 1) & ~(alignment - 1);
  1249. size += type_to_packed_size(member_type, member_flags, packing);
  1250. }
  1251. }
  1252. else
  1253. {
  1254. const uint32_t base_alignment = type_to_packed_base_size(type, packing);
  1255. if (packing_is_scalar(packing))
  1256. {
  1257. size = type.vecsize * type.columns * base_alignment;
  1258. }
  1259. else
  1260. {
  1261. if (type.columns == 1)
  1262. size = type.vecsize * base_alignment;
  1263. if (flags.get(DecorationColMajor) && type.columns > 1)
  1264. {
  1265. if (packing_is_vec4_padded(packing))
  1266. size = type.columns * 4 * base_alignment;
  1267. else if (type.vecsize == 3)
  1268. size = type.columns * 4 * base_alignment;
  1269. else
  1270. size = type.columns * type.vecsize * base_alignment;
  1271. }
  1272. if (flags.get(DecorationRowMajor) && type.vecsize > 1)
  1273. {
  1274. if (packing_is_vec4_padded(packing))
  1275. size = type.vecsize * 4 * base_alignment;
  1276. else if (type.columns == 3)
  1277. size = type.vecsize * 4 * base_alignment;
  1278. else
  1279. size = type.vecsize * type.columns * base_alignment;
  1280. }
  1281. // For matrices in HLSL, the last element has a size which depends on its vector size,
  1282. // so that it is possible to pack other vectors into the last element.
  1283. if (packing_is_hlsl(packing) && type.columns > 1)
  1284. size -= (4 - type.vecsize) * (type.width / 8);
  1285. }
  1286. }
  1287. return size;
  1288. }
  1289. bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing,
  1290. uint32_t *failed_validation_index, uint32_t start_offset,
  1291. uint32_t end_offset)
  1292. {
  1293. // This is very tricky and error prone, but try to be exhaustive and correct here.
  1294. // SPIR-V doesn't directly say if we're using std430 or std140.
  1295. // SPIR-V communicates this using Offset and ArrayStride decorations (which is what really matters),
  1296. // so we have to try to infer whether or not the original GLSL source was std140 or std430 based on this information.
  1297. // We do not have to consider shared or packed since these layouts are not allowed in Vulkan SPIR-V (they are useless anyways, and custom offsets would do the same thing).
  1298. //
  1299. // It is almost certain that we're using std430, but it gets tricky with arrays in particular.
  1300. // We will assume std430, but infer std140 if we can prove the struct is not compliant with std430.
  1301. //
  1302. // The only two differences between std140 and std430 are related to padding alignment/array stride
  1303. // in arrays and structs. In std140 they take minimum vec4 alignment.
  1304. // std430 only removes the vec4 requirement.
  1305. uint32_t offset = 0;
  1306. uint32_t pad_alignment = 1;
  1307. bool is_top_level_block =
  1308. has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock);
  1309. for (uint32_t i = 0; i < type.member_types.size(); i++)
  1310. {
  1311. auto &memb_type = get<SPIRType>(type.member_types[i]);
  1312. auto member_flags = ir.meta[type.self].members[i].decoration_flags;
  1313. // Verify alignment rules.
  1314. uint32_t packed_alignment = type_to_packed_alignment(memb_type, member_flags, packing);
  1315. // This is a rather dirty workaround to deal with some cases of OpSpecConstantOp used as array size, e.g:
  1316. // layout(constant_id = 0) const int s = 10;
  1317. // const int S = s + 5; // SpecConstantOp
  1318. // buffer Foo { int data[S]; }; // <-- Very hard for us to deduce a fixed value here,
  1319. // we would need full implementation of compile-time constant folding. :(
  1320. // If we are the last member of a struct, there might be cases where the actual size of that member is irrelevant
  1321. // for our analysis (e.g. unsized arrays).
  1322. // This lets us simply ignore that there are spec constant op sized arrays in our buffers.
  1323. // Querying size of this member will fail, so just don't call it unless we have to.
  1324. //
  1325. // This is likely "best effort" we can support without going into unacceptably complicated workarounds.
  1326. bool member_can_be_unsized =
  1327. is_top_level_block && size_t(i + 1) == type.member_types.size() && !memb_type.array.empty();
  1328. uint32_t packed_size = 0;
  1329. if (!member_can_be_unsized || packing_is_hlsl(packing))
  1330. packed_size = type_to_packed_size(memb_type, member_flags, packing);
  1331. // We only need to care about this if we have non-array types which can straddle the vec4 boundary.
  1332. if (packing_is_hlsl(packing))
  1333. {
  1334. // If a member straddles across a vec4 boundary, alignment is actually vec4.
  1335. uint32_t begin_word = offset / 16;
  1336. uint32_t end_word = (offset + packed_size - 1) / 16;
  1337. if (begin_word != end_word)
  1338. packed_alignment = max(packed_alignment, 16u);
  1339. }
  1340. uint32_t actual_offset = type_struct_member_offset(type, i);
  1341. // Field is not in the specified range anymore and we can ignore any further fields.
  1342. if (actual_offset >= end_offset)
  1343. break;
  1344. uint32_t alignment = max(packed_alignment, pad_alignment);
  1345. offset = (offset + alignment - 1) & ~(alignment - 1);
  1346. // The next member following a struct member is aligned to the base alignment of the struct that came before.
  1347. // GL 4.5 spec, 7.6.2.2.
  1348. if (memb_type.basetype == SPIRType::Struct && !memb_type.pointer)
  1349. pad_alignment = packed_alignment;
  1350. else
  1351. pad_alignment = 1;
  1352. // Only care about packing if we are in the given range
  1353. if (actual_offset >= start_offset)
  1354. {
  1355. // We only care about offsets in std140, std430, etc ...
  1356. // For EnhancedLayout variants, we have the flexibility to choose our own offsets.
  1357. if (!packing_has_flexible_offset(packing))
  1358. {
  1359. if (actual_offset != offset) // This cannot be the packing we're looking for.
  1360. {
  1361. if (failed_validation_index)
  1362. *failed_validation_index = i;
  1363. return false;
  1364. }
  1365. }
  1366. else if ((actual_offset & (alignment - 1)) != 0)
  1367. {
  1368. // We still need to verify that alignment rules are observed, even if we have explicit offset.
  1369. if (failed_validation_index)
  1370. *failed_validation_index = i;
  1371. return false;
  1372. }
  1373. // Verify array stride rules.
  1374. if (!memb_type.array.empty() && type_to_packed_array_stride(memb_type, member_flags, packing) !=
  1375. type_struct_member_array_stride(type, i))
  1376. {
  1377. if (failed_validation_index)
  1378. *failed_validation_index = i;
  1379. return false;
  1380. }
  1381. // Verify that sub-structs also follow packing rules.
  1382. // We cannot use enhanced layouts on substructs, so they better be up to spec.
  1383. auto substruct_packing = packing_to_substruct_packing(packing);
  1384. if (!memb_type.pointer && !memb_type.member_types.empty() &&
  1385. !buffer_is_packing_standard(memb_type, substruct_packing))
  1386. {
  1387. if (failed_validation_index)
  1388. *failed_validation_index = i;
  1389. return false;
  1390. }
  1391. }
  1392. // Bump size.
  1393. offset = actual_offset + packed_size;
  1394. }
  1395. return true;
  1396. }
  1397. bool CompilerGLSL::can_use_io_location(StorageClass storage, bool block)
  1398. {
  1399. // Location specifiers are must have in SPIR-V, but they aren't really supported in earlier versions of GLSL.
  1400. // Be very explicit here about how to solve the issue.
  1401. if ((get_execution_model() != ExecutionModelVertex && storage == StorageClassInput) ||
  1402. (get_execution_model() != ExecutionModelFragment && storage == StorageClassOutput))
  1403. {
  1404. uint32_t minimum_desktop_version = block ? 440 : 410;
  1405. // ARB_enhanced_layouts vs ARB_separate_shader_objects ...
  1406. if (!options.es && options.version < minimum_desktop_version && !options.separate_shader_objects)
  1407. return false;
  1408. else if (options.es && options.version < 310)
  1409. return false;
  1410. }
  1411. if ((get_execution_model() == ExecutionModelVertex && storage == StorageClassInput) ||
  1412. (get_execution_model() == ExecutionModelFragment && storage == StorageClassOutput))
  1413. {
  1414. if (options.es && options.version < 300)
  1415. return false;
  1416. else if (!options.es && options.version < 330)
  1417. return false;
  1418. }
  1419. if (storage == StorageClassUniform || storage == StorageClassUniformConstant || storage == StorageClassPushConstant)
  1420. {
  1421. if (options.es && options.version < 310)
  1422. return false;
  1423. else if (!options.es && options.version < 430)
  1424. return false;
  1425. }
  1426. return true;
  1427. }
  1428. string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
  1429. {
  1430. // FIXME: Come up with a better solution for when to disable layouts.
  1431. // Having layouts depend on extensions as well as which types
  1432. // of layouts are used. For now, the simple solution is to just disable
  1433. // layouts for legacy versions.
  1434. if (is_legacy())
  1435. return "";
  1436. if (subpass_input_is_framebuffer_fetch(var.self))
  1437. return "";
  1438. SmallVector<string> attr;
  1439. auto &type = get<SPIRType>(var.basetype);
  1440. auto &flags = get_decoration_bitset(var.self);
  1441. auto &typeflags = get_decoration_bitset(type.self);
  1442. if (flags.get(DecorationPassthroughNV))
  1443. attr.push_back("passthrough");
  1444. if (options.vulkan_semantics && var.storage == StorageClassPushConstant)
  1445. attr.push_back("push_constant");
  1446. else if (var.storage == StorageClassShaderRecordBufferKHR)
  1447. attr.push_back(ray_tracing_is_khr ? "shaderRecordEXT" : "shaderRecordNV");
  1448. if (flags.get(DecorationRowMajor))
  1449. attr.push_back("row_major");
  1450. if (flags.get(DecorationColMajor))
  1451. attr.push_back("column_major");
  1452. if (options.vulkan_semantics)
  1453. {
  1454. if (flags.get(DecorationInputAttachmentIndex))
  1455. attr.push_back(join("input_attachment_index = ", get_decoration(var.self, DecorationInputAttachmentIndex)));
  1456. }
  1457. bool is_block = has_decoration(type.self, DecorationBlock);
  1458. if (flags.get(DecorationLocation) && can_use_io_location(var.storage, is_block))
  1459. {
  1460. Bitset combined_decoration;
  1461. for (uint32_t i = 0; i < ir.meta[type.self].members.size(); i++)
  1462. combined_decoration.merge_or(combined_decoration_for_member(type, i));
  1463. // If our members have location decorations, we don't need to
  1464. // emit location decorations at the top as well (looks weird).
  1465. if (!combined_decoration.get(DecorationLocation))
  1466. attr.push_back(join("location = ", get_decoration(var.self, DecorationLocation)));
  1467. }
  1468. // Transform feedback
  1469. bool uses_enhanced_layouts = false;
  1470. if (is_block && var.storage == StorageClassOutput)
  1471. {
  1472. // For blocks, there is a restriction where xfb_stride/xfb_buffer must only be declared on the block itself,
  1473. // since all members must match the same xfb_buffer. The only thing we will declare for members of the block
  1474. // is the xfb_offset.
  1475. uint32_t member_count = uint32_t(type.member_types.size());
  1476. bool have_xfb_buffer_stride = false;
  1477. bool have_any_xfb_offset = false;
  1478. bool have_geom_stream = false;
  1479. uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0;
  1480. if (flags.get(DecorationXfbBuffer) && flags.get(DecorationXfbStride))
  1481. {
  1482. have_xfb_buffer_stride = true;
  1483. xfb_buffer = get_decoration(var.self, DecorationXfbBuffer);
  1484. xfb_stride = get_decoration(var.self, DecorationXfbStride);
  1485. }
  1486. if (flags.get(DecorationStream))
  1487. {
  1488. have_geom_stream = true;
  1489. geom_stream = get_decoration(var.self, DecorationStream);
  1490. }
  1491. // Verify that none of the members violate our assumption.
  1492. for (uint32_t i = 0; i < member_count; i++)
  1493. {
  1494. if (has_member_decoration(type.self, i, DecorationStream))
  1495. {
  1496. uint32_t member_geom_stream = get_member_decoration(type.self, i, DecorationStream);
  1497. if (have_geom_stream && member_geom_stream != geom_stream)
  1498. SPIRV_CROSS_THROW("IO block member Stream mismatch.");
  1499. have_geom_stream = true;
  1500. geom_stream = member_geom_stream;
  1501. }
  1502. // Only members with an Offset decoration participate in XFB.
  1503. if (!has_member_decoration(type.self, i, DecorationOffset))
  1504. continue;
  1505. have_any_xfb_offset = true;
  1506. if (has_member_decoration(type.self, i, DecorationXfbBuffer))
  1507. {
  1508. uint32_t buffer_index = get_member_decoration(type.self, i, DecorationXfbBuffer);
  1509. if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
  1510. SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
  1511. have_xfb_buffer_stride = true;
  1512. xfb_buffer = buffer_index;
  1513. }
  1514. if (has_member_decoration(type.self, i, DecorationXfbStride))
  1515. {
  1516. uint32_t stride = get_member_decoration(type.self, i, DecorationXfbStride);
  1517. if (have_xfb_buffer_stride && stride != xfb_stride)
  1518. SPIRV_CROSS_THROW("IO block member XfbStride mismatch.");
  1519. have_xfb_buffer_stride = true;
  1520. xfb_stride = stride;
  1521. }
  1522. }
  1523. if (have_xfb_buffer_stride && have_any_xfb_offset)
  1524. {
  1525. attr.push_back(join("xfb_buffer = ", xfb_buffer));
  1526. attr.push_back(join("xfb_stride = ", xfb_stride));
  1527. uses_enhanced_layouts = true;
  1528. }
  1529. if (have_geom_stream)
  1530. {
  1531. if (get_execution_model() != ExecutionModelGeometry)
  1532. SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
  1533. if (options.es)
  1534. SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
  1535. if (options.version < 400)
  1536. require_extension_internal("GL_ARB_transform_feedback3");
  1537. attr.push_back(join("stream = ", get_decoration(var.self, DecorationStream)));
  1538. }
  1539. }
  1540. else if (var.storage == StorageClassOutput)
  1541. {
  1542. if (flags.get(DecorationXfbBuffer) && flags.get(DecorationXfbStride) && flags.get(DecorationOffset))
  1543. {
  1544. // XFB for standalone variables, we can emit all decorations.
  1545. attr.push_back(join("xfb_buffer = ", get_decoration(var.self, DecorationXfbBuffer)));
  1546. attr.push_back(join("xfb_stride = ", get_decoration(var.self, DecorationXfbStride)));
  1547. attr.push_back(join("xfb_offset = ", get_decoration(var.self, DecorationOffset)));
  1548. uses_enhanced_layouts = true;
  1549. }
  1550. if (flags.get(DecorationStream))
  1551. {
  1552. if (get_execution_model() != ExecutionModelGeometry)
  1553. SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
  1554. if (options.es)
  1555. SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
  1556. if (options.version < 400)
  1557. require_extension_internal("GL_ARB_transform_feedback3");
  1558. attr.push_back(join("stream = ", get_decoration(var.self, DecorationStream)));
  1559. }
  1560. }
  1561. // Can only declare Component if we can declare location.
  1562. if (flags.get(DecorationComponent) && can_use_io_location(var.storage, is_block))
  1563. {
  1564. uses_enhanced_layouts = true;
  1565. attr.push_back(join("component = ", get_decoration(var.self, DecorationComponent)));
  1566. }
  1567. if (uses_enhanced_layouts)
  1568. {
  1569. if (!options.es)
  1570. {
  1571. if (options.version < 440 && options.version >= 140)
  1572. require_extension_internal("GL_ARB_enhanced_layouts");
  1573. else if (options.version < 140)
  1574. SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in targets below GLSL 1.40.");
  1575. if (!options.es && options.version < 440)
  1576. require_extension_internal("GL_ARB_enhanced_layouts");
  1577. }
  1578. else if (options.es)
  1579. SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in ESSL.");
  1580. }
  1581. if (flags.get(DecorationIndex))
  1582. attr.push_back(join("index = ", get_decoration(var.self, DecorationIndex)));
  1583. // Do not emit set = decoration in regular GLSL output, but
  1584. // we need to preserve it in Vulkan GLSL mode.
  1585. if (var.storage != StorageClassPushConstant && var.storage != StorageClassShaderRecordBufferKHR)
  1586. {
  1587. if (flags.get(DecorationDescriptorSet) && options.vulkan_semantics)
  1588. attr.push_back(join("set = ", get_decoration(var.self, DecorationDescriptorSet)));
  1589. }
  1590. bool push_constant_block = options.vulkan_semantics && var.storage == StorageClassPushConstant;
  1591. bool ssbo_block = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferKHR ||
  1592. (var.storage == StorageClassUniform && typeflags.get(DecorationBufferBlock));
  1593. bool emulated_ubo = var.storage == StorageClassPushConstant && options.emit_push_constant_as_uniform_buffer;
  1594. bool ubo_block = var.storage == StorageClassUniform && typeflags.get(DecorationBlock);
  1595. // GL 3.0/GLSL 1.30 is not considered legacy, but it doesn't have UBOs ...
  1596. bool can_use_buffer_blocks = (options.es && options.version >= 300) || (!options.es && options.version >= 140);
  1597. // pretend no UBOs when options say so
  1598. if (ubo_block && options.emit_uniform_buffer_as_plain_uniforms)
  1599. can_use_buffer_blocks = false;
  1600. bool can_use_binding;
  1601. if (options.es)
  1602. can_use_binding = options.version >= 310;
  1603. else
  1604. can_use_binding = options.enable_420pack_extension || (options.version >= 420);
  1605. // Make sure we don't emit binding layout for a classic uniform on GLSL 1.30.
  1606. if (!can_use_buffer_blocks && var.storage == StorageClassUniform)
  1607. can_use_binding = false;
  1608. if (var.storage == StorageClassShaderRecordBufferKHR)
  1609. can_use_binding = false;
  1610. if (can_use_binding && flags.get(DecorationBinding))
  1611. attr.push_back(join("binding = ", get_decoration(var.self, DecorationBinding)));
  1612. if (var.storage != StorageClassOutput && flags.get(DecorationOffset))
  1613. attr.push_back(join("offset = ", get_decoration(var.self, DecorationOffset)));
  1614. // Instead of adding explicit offsets for every element here, just assume we're using std140 or std430.
  1615. // If SPIR-V does not comply with either layout, we cannot really work around it.
  1616. if (can_use_buffer_blocks && (ubo_block || emulated_ubo))
  1617. {
  1618. attr.push_back(buffer_to_packing_standard(type, false));
  1619. }
  1620. else if (can_use_buffer_blocks && (push_constant_block || ssbo_block))
  1621. {
  1622. attr.push_back(buffer_to_packing_standard(type, true));
  1623. }
  1624. // For images, the type itself adds a layout qualifer.
  1625. // Only emit the format for storage images.
  1626. if (type.basetype == SPIRType::Image && type.image.sampled == 2)
  1627. {
  1628. const char *fmt = format_to_glsl(type.image.format);
  1629. if (fmt)
  1630. attr.push_back(fmt);
  1631. }
  1632. if (attr.empty())
  1633. return "";
  1634. string res = "layout(";
  1635. res += merge(attr);
  1636. res += ") ";
  1637. return res;
  1638. }
  1639. string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool support_std430_without_scalar_layout)
  1640. {
  1641. if (support_std430_without_scalar_layout && buffer_is_packing_standard(type, BufferPackingStd430))
  1642. return "std430";
  1643. else if (buffer_is_packing_standard(type, BufferPackingStd140))
  1644. return "std140";
  1645. else if (options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingScalar))
  1646. {
  1647. require_extension_internal("GL_EXT_scalar_block_layout");
  1648. return "scalar";
  1649. }
  1650. else if (support_std430_without_scalar_layout &&
  1651. buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout))
  1652. {
  1653. if (options.es && !options.vulkan_semantics)
  1654. SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
  1655. "not support GL_ARB_enhanced_layouts.");
  1656. if (!options.es && !options.vulkan_semantics && options.version < 440)
  1657. require_extension_internal("GL_ARB_enhanced_layouts");
  1658. set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
  1659. return "std430";
  1660. }
  1661. else if (buffer_is_packing_standard(type, BufferPackingStd140EnhancedLayout))
  1662. {
  1663. // Fallback time. We might be able to use the ARB_enhanced_layouts to deal with this difference,
  1664. // however, we can only use layout(offset) on the block itself, not any substructs, so the substructs better be the appropriate layout.
  1665. // Enhanced layouts seem to always work in Vulkan GLSL, so no need for extensions there.
  1666. if (options.es && !options.vulkan_semantics)
  1667. SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
  1668. "not support GL_ARB_enhanced_layouts.");
  1669. if (!options.es && !options.vulkan_semantics && options.version < 440)
  1670. require_extension_internal("GL_ARB_enhanced_layouts");
  1671. set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
  1672. return "std140";
  1673. }
  1674. else if (options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingScalarEnhancedLayout))
  1675. {
  1676. set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
  1677. require_extension_internal("GL_EXT_scalar_block_layout");
  1678. return "scalar";
  1679. }
  1680. else if (!support_std430_without_scalar_layout && options.vulkan_semantics &&
  1681. buffer_is_packing_standard(type, BufferPackingStd430))
  1682. {
  1683. // UBOs can support std430 with GL_EXT_scalar_block_layout.
  1684. require_extension_internal("GL_EXT_scalar_block_layout");
  1685. return "std430";
  1686. }
  1687. else if (!support_std430_without_scalar_layout && options.vulkan_semantics &&
  1688. buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout))
  1689. {
  1690. // UBOs can support std430 with GL_EXT_scalar_block_layout.
  1691. set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
  1692. require_extension_internal("GL_EXT_scalar_block_layout");
  1693. return "std430";
  1694. }
  1695. else
  1696. {
  1697. SPIRV_CROSS_THROW("Buffer block cannot be expressed as any of std430, std140, scalar, even with enhanced "
  1698. "layouts. You can try flattening this block to support a more flexible layout.");
  1699. }
  1700. }
  1701. void CompilerGLSL::emit_push_constant_block(const SPIRVariable &var)
  1702. {
  1703. if (flattened_buffer_blocks.count(var.self))
  1704. emit_buffer_block_flattened(var);
  1705. else if (options.vulkan_semantics)
  1706. emit_push_constant_block_vulkan(var);
  1707. else if (options.emit_push_constant_as_uniform_buffer)
  1708. emit_buffer_block_native(var);
  1709. else
  1710. emit_push_constant_block_glsl(var);
  1711. }
  1712. void CompilerGLSL::emit_push_constant_block_vulkan(const SPIRVariable &var)
  1713. {
  1714. emit_buffer_block(var);
  1715. }
  1716. void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var)
  1717. {
  1718. // OpenGL has no concept of push constant blocks, implement it as a uniform struct.
  1719. auto &type = get<SPIRType>(var.basetype);
  1720. auto &flags = ir.meta[var.self].decoration.decoration_flags;
  1721. flags.clear(DecorationBinding);
  1722. flags.clear(DecorationDescriptorSet);
  1723. #if 0
  1724. if (flags & ((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet)))
  1725. SPIRV_CROSS_THROW("Push constant blocks cannot be compiled to GLSL with Binding or Set syntax. "
  1726. "Remap to location with reflection API first or disable these decorations.");
  1727. #endif
  1728. // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
  1729. // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
  1730. auto &block_flags = ir.meta[type.self].decoration.decoration_flags;
  1731. bool block_flag = block_flags.get(DecorationBlock);
  1732. block_flags.clear(DecorationBlock);
  1733. emit_struct(type);
  1734. if (block_flag)
  1735. block_flags.set(DecorationBlock);
  1736. emit_uniform(var);
  1737. statement("");
  1738. }
  1739. void CompilerGLSL::emit_buffer_block(const SPIRVariable &var)
  1740. {
  1741. auto &type = get<SPIRType>(var.basetype);
  1742. bool ubo_block = var.storage == StorageClassUniform && has_decoration(type.self, DecorationBlock);
  1743. if (flattened_buffer_blocks.count(var.self))
  1744. emit_buffer_block_flattened(var);
  1745. else if (is_legacy() || (!options.es && options.version == 130) ||
  1746. (ubo_block && options.emit_uniform_buffer_as_plain_uniforms))
  1747. emit_buffer_block_legacy(var);
  1748. else
  1749. emit_buffer_block_native(var);
  1750. }
  1751. void CompilerGLSL::emit_buffer_block_legacy(const SPIRVariable &var)
  1752. {
  1753. auto &type = get<SPIRType>(var.basetype);
  1754. bool ssbo = var.storage == StorageClassStorageBuffer ||
  1755. ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
  1756. if (ssbo)
  1757. SPIRV_CROSS_THROW("SSBOs not supported in legacy targets.");
  1758. // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
  1759. // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
  1760. auto &block_flags = ir.meta[type.self].decoration.decoration_flags;
  1761. bool block_flag = block_flags.get(DecorationBlock);
  1762. block_flags.clear(DecorationBlock);
  1763. emit_struct(type);
  1764. if (block_flag)
  1765. block_flags.set(DecorationBlock);
  1766. emit_uniform(var);
  1767. statement("");
  1768. }
  1769. void CompilerGLSL::emit_buffer_reference_block(SPIRType &type, bool forward_declaration)
  1770. {
  1771. string buffer_name;
  1772. if (forward_declaration)
  1773. {
  1774. // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ...
  1775. // Allow aliased name since we might be declaring the block twice. Once with buffer reference (forward declared) and one proper declaration.
  1776. // The names must match up.
  1777. buffer_name = to_name(type.self, false);
  1778. // Shaders never use the block by interface name, so we don't
  1779. // have to track this other than updating name caches.
  1780. // If we have a collision for any reason, just fallback immediately.
  1781. if (ir.meta[type.self].decoration.alias.empty() ||
  1782. block_ssbo_names.find(buffer_name) != end(block_ssbo_names) ||
  1783. resource_names.find(buffer_name) != end(resource_names))
  1784. {
  1785. buffer_name = join("_", type.self);
  1786. }
  1787. // Make sure we get something unique for both global name scope and block name scope.
  1788. // See GLSL 4.5 spec: section 4.3.9 for details.
  1789. add_variable(block_ssbo_names, resource_names, buffer_name);
  1790. // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
  1791. // This cannot conflict with anything else, so we're safe now.
  1792. // We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope.
  1793. if (buffer_name.empty())
  1794. buffer_name = join("_", type.self);
  1795. block_names.insert(buffer_name);
  1796. block_ssbo_names.insert(buffer_name);
  1797. // Ensure we emit the correct name when emitting non-forward pointer type.
  1798. ir.meta[type.self].decoration.alias = buffer_name;
  1799. }
  1800. else if (type.basetype != SPIRType::Struct)
  1801. buffer_name = type_to_glsl(type);
  1802. else
  1803. buffer_name = to_name(type.self, false);
  1804. if (!forward_declaration)
  1805. {
  1806. if (type.basetype == SPIRType::Struct)
  1807. {
  1808. auto flags = ir.get_buffer_block_type_flags(type);
  1809. string decorations;
  1810. if (flags.get(DecorationRestrict))
  1811. decorations += " restrict";
  1812. if (flags.get(DecorationCoherent))
  1813. decorations += " coherent";
  1814. if (flags.get(DecorationNonReadable))
  1815. decorations += " writeonly";
  1816. if (flags.get(DecorationNonWritable))
  1817. decorations += " readonly";
  1818. statement("layout(buffer_reference, ", buffer_to_packing_standard(type, true),
  1819. ")", decorations, " buffer ", buffer_name);
  1820. }
  1821. else
  1822. statement("layout(buffer_reference) buffer ", buffer_name);
  1823. begin_scope();
  1824. if (type.basetype == SPIRType::Struct)
  1825. {
  1826. type.member_name_cache.clear();
  1827. uint32_t i = 0;
  1828. for (auto &member : type.member_types)
  1829. {
  1830. add_member_name(type, i);
  1831. emit_struct_member(type, member, i);
  1832. i++;
  1833. }
  1834. }
  1835. else
  1836. {
  1837. auto &pointee_type = get_pointee_type(type);
  1838. statement(type_to_glsl(pointee_type), " value", type_to_array_glsl(pointee_type), ";");
  1839. }
  1840. end_scope_decl();
  1841. statement("");
  1842. }
  1843. else
  1844. {
  1845. statement("layout(buffer_reference) buffer ", buffer_name, ";");
  1846. }
  1847. }
  1848. void CompilerGLSL::emit_buffer_block_native(const SPIRVariable &var)
  1849. {
  1850. auto &type = get<SPIRType>(var.basetype);
  1851. Bitset flags = ir.get_buffer_block_flags(var);
  1852. bool ssbo = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferKHR ||
  1853. ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
  1854. bool is_restrict = ssbo && flags.get(DecorationRestrict);
  1855. bool is_writeonly = ssbo && flags.get(DecorationNonReadable);
  1856. bool is_readonly = ssbo && flags.get(DecorationNonWritable);
  1857. bool is_coherent = ssbo && flags.get(DecorationCoherent);
  1858. // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ...
  1859. auto buffer_name = to_name(type.self, false);
  1860. auto &block_namespace = ssbo ? block_ssbo_names : block_ubo_names;
  1861. // Shaders never use the block by interface name, so we don't
  1862. // have to track this other than updating name caches.
  1863. // If we have a collision for any reason, just fallback immediately.
  1864. if (ir.meta[type.self].decoration.alias.empty() || block_namespace.find(buffer_name) != end(block_namespace) ||
  1865. resource_names.find(buffer_name) != end(resource_names))
  1866. {
  1867. buffer_name = get_block_fallback_name(var.self);
  1868. }
  1869. // Make sure we get something unique for both global name scope and block name scope.
  1870. // See GLSL 4.5 spec: section 4.3.9 for details.
  1871. add_variable(block_namespace, resource_names, buffer_name);
  1872. // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
  1873. // This cannot conflict with anything else, so we're safe now.
  1874. // We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope.
  1875. if (buffer_name.empty())
  1876. buffer_name = join("_", get<SPIRType>(var.basetype).self, "_", var.self);
  1877. block_names.insert(buffer_name);
  1878. block_namespace.insert(buffer_name);
  1879. // Save for post-reflection later.
  1880. declared_block_names[var.self] = buffer_name;
  1881. statement(layout_for_variable(var), is_coherent ? "coherent " : "", is_restrict ? "restrict " : "",
  1882. is_writeonly ? "writeonly " : "", is_readonly ? "readonly " : "", ssbo ? "buffer " : "uniform ",
  1883. buffer_name);
  1884. begin_scope();
  1885. type.member_name_cache.clear();
  1886. uint32_t i = 0;
  1887. for (auto &member : type.member_types)
  1888. {
  1889. add_member_name(type, i);
  1890. emit_struct_member(type, member, i);
  1891. i++;
  1892. }
  1893. // var.self can be used as a backup name for the block name,
  1894. // so we need to make sure we don't disturb the name here on a recompile.
  1895. // It will need to be reset if we have to recompile.
  1896. preserve_alias_on_reset(var.self);
  1897. add_resource_name(var.self);
  1898. end_scope_decl(to_name(var.self) + type_to_array_glsl(type));
  1899. statement("");
  1900. }
  1901. void CompilerGLSL::emit_buffer_block_flattened(const SPIRVariable &var)
  1902. {
  1903. auto &type = get<SPIRType>(var.basetype);
  1904. // Block names should never alias.
  1905. auto buffer_name = to_name(type.self, false);
  1906. size_t buffer_size = (get_declared_struct_size(type) + 15) / 16;
  1907. SPIRType::BaseType basic_type;
  1908. if (get_common_basic_type(type, basic_type))
  1909. {
  1910. SPIRType tmp;
  1911. tmp.basetype = basic_type;
  1912. tmp.vecsize = 4;
  1913. if (basic_type != SPIRType::Float && basic_type != SPIRType::Int && basic_type != SPIRType::UInt)
  1914. SPIRV_CROSS_THROW("Basic types in a flattened UBO must be float, int or uint.");
  1915. auto flags = ir.get_buffer_block_flags(var);
  1916. statement("uniform ", flags_to_qualifiers_glsl(tmp, flags), type_to_glsl(tmp), " ", buffer_name, "[",
  1917. buffer_size, "];");
  1918. }
  1919. else
  1920. SPIRV_CROSS_THROW("All basic types in a flattened block must be the same.");
  1921. }
  1922. const char *CompilerGLSL::to_storage_qualifiers_glsl(const SPIRVariable &var)
  1923. {
  1924. auto &execution = get_entry_point();
  1925. if (subpass_input_is_framebuffer_fetch(var.self))
  1926. return "";
  1927. if (var.storage == StorageClassInput || var.storage == StorageClassOutput)
  1928. {
  1929. if (is_legacy() && execution.model == ExecutionModelVertex)
  1930. return var.storage == StorageClassInput ? "attribute " : "varying ";
  1931. else if (is_legacy() && execution.model == ExecutionModelFragment)
  1932. return "varying "; // Fragment outputs are renamed so they never hit this case.
  1933. else if (execution.model == ExecutionModelFragment && var.storage == StorageClassOutput)
  1934. {
  1935. if (inout_color_attachments.count(get_decoration(var.self, DecorationLocation)) != 0)
  1936. return "inout ";
  1937. else
  1938. return "out ";
  1939. }
  1940. else
  1941. return var.storage == StorageClassInput ? "in " : "out ";
  1942. }
  1943. else if (var.storage == StorageClassUniformConstant || var.storage == StorageClassUniform ||
  1944. var.storage == StorageClassPushConstant)
  1945. {
  1946. return "uniform ";
  1947. }
  1948. else if (var.storage == StorageClassRayPayloadKHR)
  1949. {
  1950. return ray_tracing_is_khr ? "rayPayloadEXT " : "rayPayloadNV ";
  1951. }
  1952. else if (var.storage == StorageClassIncomingRayPayloadKHR)
  1953. {
  1954. return ray_tracing_is_khr ? "rayPayloadInEXT " : "rayPayloadInNV ";
  1955. }
  1956. else if (var.storage == StorageClassHitAttributeKHR)
  1957. {
  1958. return ray_tracing_is_khr ? "hitAttributeEXT " : "hitAttributeNV ";
  1959. }
  1960. else if (var.storage == StorageClassCallableDataKHR)
  1961. {
  1962. return ray_tracing_is_khr ? "callableDataEXT " : "callableDataNV ";
  1963. }
  1964. else if (var.storage == StorageClassIncomingCallableDataKHR)
  1965. {
  1966. return ray_tracing_is_khr ? "callableDataInEXT " : "callableDataInNV ";
  1967. }
  1968. return "";
  1969. }
  1970. void CompilerGLSL::emit_flattened_io_block_member(const std::string &basename, const SPIRType &type, const char *qual,
  1971. const SmallVector<uint32_t> &indices)
  1972. {
  1973. uint32_t member_type_id = type.self;
  1974. const SPIRType *member_type = &type;
  1975. const SPIRType *parent_type = nullptr;
  1976. auto flattened_name = basename;
  1977. for (auto &index : indices)
  1978. {
  1979. flattened_name += "_";
  1980. flattened_name += to_member_name(*member_type, index);
  1981. parent_type = member_type;
  1982. member_type_id = member_type->member_types[index];
  1983. member_type = &get<SPIRType>(member_type_id);
  1984. }
  1985. assert(member_type->basetype != SPIRType::Struct);
  1986. // We're overriding struct member names, so ensure we do so on the primary type.
  1987. if (parent_type->type_alias)
  1988. parent_type = &get<SPIRType>(parent_type->type_alias);
  1989. // Sanitize underscores because joining the two identifiers might create more than 1 underscore in a row,
  1990. // which is not allowed.
  1991. ParsedIR::sanitize_underscores(flattened_name);
  1992. uint32_t last_index = indices.back();
  1993. // Pass in the varying qualifier here so it will appear in the correct declaration order.
  1994. // Replace member name while emitting it so it encodes both struct name and member name.
  1995. auto backup_name = get_member_name(parent_type->self, last_index);
  1996. auto member_name = to_member_name(*parent_type, last_index);
  1997. set_member_name(parent_type->self, last_index, flattened_name);
  1998. emit_struct_member(*parent_type, member_type_id, last_index, qual);
  1999. // Restore member name.
  2000. set_member_name(parent_type->self, last_index, member_name);
  2001. }
  2002. void CompilerGLSL::emit_flattened_io_block_struct(const std::string &basename, const SPIRType &type, const char *qual,
  2003. const SmallVector<uint32_t> &indices)
  2004. {
  2005. auto sub_indices = indices;
  2006. sub_indices.push_back(0);
  2007. const SPIRType *member_type = &type;
  2008. for (auto &index : indices)
  2009. member_type = &get<SPIRType>(member_type->member_types[index]);
  2010. assert(member_type->basetype == SPIRType::Struct);
  2011. if (!member_type->array.empty())
  2012. SPIRV_CROSS_THROW("Cannot flatten array of structs in I/O blocks.");
  2013. for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++)
  2014. {
  2015. sub_indices.back() = i;
  2016. if (get<SPIRType>(member_type->member_types[i]).basetype == SPIRType::Struct)
  2017. emit_flattened_io_block_struct(basename, type, qual, sub_indices);
  2018. else
  2019. emit_flattened_io_block_member(basename, type, qual, sub_indices);
  2020. }
  2021. }
  2022. void CompilerGLSL::emit_flattened_io_block(const SPIRVariable &var, const char *qual)
  2023. {
  2024. auto &var_type = get<SPIRType>(var.basetype);
  2025. if (!var_type.array.empty())
  2026. SPIRV_CROSS_THROW("Array of varying structs cannot be flattened to legacy-compatible varyings.");
  2027. // Emit flattened types based on the type alias. Normally, we are never supposed to emit
  2028. // struct declarations for aliased types.
  2029. auto &type = var_type.type_alias ? get<SPIRType>(var_type.type_alias) : var_type;
  2030. auto old_flags = ir.meta[type.self].decoration.decoration_flags;
  2031. // Emit the members as if they are part of a block to get all qualifiers.
  2032. ir.meta[type.self].decoration.decoration_flags.set(DecorationBlock);
  2033. type.member_name_cache.clear();
  2034. SmallVector<uint32_t> member_indices;
  2035. member_indices.push_back(0);
  2036. auto basename = to_name(var.self);
  2037. uint32_t i = 0;
  2038. for (auto &member : type.member_types)
  2039. {
  2040. add_member_name(type, i);
  2041. auto &membertype = get<SPIRType>(member);
  2042. member_indices.back() = i;
  2043. if (membertype.basetype == SPIRType::Struct)
  2044. emit_flattened_io_block_struct(basename, type, qual, member_indices);
  2045. else
  2046. emit_flattened_io_block_member(basename, type, qual, member_indices);
  2047. i++;
  2048. }
  2049. ir.meta[type.self].decoration.decoration_flags = old_flags;
  2050. // Treat this variable as fully flattened from now on.
  2051. flattened_structs[var.self] = true;
  2052. }
  2053. void CompilerGLSL::emit_interface_block(const SPIRVariable &var)
  2054. {
  2055. auto &type = get<SPIRType>(var.basetype);
  2056. if (var.storage == StorageClassInput && type.basetype == SPIRType::Double &&
  2057. !options.es && options.version < 410)
  2058. {
  2059. require_extension_internal("GL_ARB_vertex_attrib_64bit");
  2060. }
  2061. // Either make it plain in/out or in/out blocks depending on what shader is doing ...
  2062. bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock);
  2063. const char *qual = to_storage_qualifiers_glsl(var);
  2064. if (block)
  2065. {
  2066. // ESSL earlier than 310 and GLSL earlier than 150 did not support
  2067. // I/O variables which are struct types.
  2068. // To support this, flatten the struct into separate varyings instead.
  2069. if (options.force_flattened_io_blocks || (options.es && options.version < 310) ||
  2070. (!options.es && options.version < 150))
  2071. {
  2072. // I/O blocks on ES require version 310 with Android Extension Pack extensions, or core version 320.
  2073. // On desktop, I/O blocks were introduced with geometry shaders in GL 3.2 (GLSL 150).
  2074. emit_flattened_io_block(var, qual);
  2075. }
  2076. else
  2077. {
  2078. if (options.es && options.version < 320)
  2079. {
  2080. // Geometry and tessellation extensions imply this extension.
  2081. if (!has_extension("GL_EXT_geometry_shader") && !has_extension("GL_EXT_tessellation_shader"))
  2082. require_extension_internal("GL_EXT_shader_io_blocks");
  2083. }
  2084. // Workaround to make sure we can emit "patch in/out" correctly.
  2085. fixup_io_block_patch_qualifiers(var);
  2086. // Block names should never alias.
  2087. auto block_name = to_name(type.self, false);
  2088. // The namespace for I/O blocks is separate from other variables in GLSL.
  2089. auto &block_namespace = type.storage == StorageClassInput ? block_input_names : block_output_names;
  2090. // Shaders never use the block by interface name, so we don't
  2091. // have to track this other than updating name caches.
  2092. if (block_name.empty() || block_namespace.find(block_name) != end(block_namespace))
  2093. block_name = get_fallback_name(type.self);
  2094. else
  2095. block_namespace.insert(block_name);
  2096. // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
  2097. // This cannot conflict with anything else, so we're safe now.
  2098. if (block_name.empty())
  2099. block_name = join("_", get<SPIRType>(var.basetype).self, "_", var.self);
  2100. // Instance names cannot alias block names.
  2101. resource_names.insert(block_name);
  2102. bool is_patch = has_decoration(var.self, DecorationPatch);
  2103. statement(layout_for_variable(var), (is_patch ? "patch " : ""), qual, block_name);
  2104. begin_scope();
  2105. type.member_name_cache.clear();
  2106. uint32_t i = 0;
  2107. for (auto &member : type.member_types)
  2108. {
  2109. add_member_name(type, i);
  2110. emit_struct_member(type, member, i);
  2111. i++;
  2112. }
  2113. add_resource_name(var.self);
  2114. end_scope_decl(join(to_name(var.self), type_to_array_glsl(type)));
  2115. statement("");
  2116. }
  2117. }
  2118. else
  2119. {
  2120. // ESSL earlier than 310 and GLSL earlier than 150 did not support
  2121. // I/O variables which are struct types.
  2122. // To support this, flatten the struct into separate varyings instead.
  2123. if (type.basetype == SPIRType::Struct &&
  2124. (options.force_flattened_io_blocks || (options.es && options.version < 310) ||
  2125. (!options.es && options.version < 150)))
  2126. {
  2127. emit_flattened_io_block(var, qual);
  2128. }
  2129. else
  2130. {
  2131. add_resource_name(var.self);
  2132. // Tessellation control and evaluation shaders must have either gl_MaxPatchVertices or unsized arrays for input arrays.
  2133. // Opt for unsized as it's the more "correct" variant to use.
  2134. bool control_point_input_array = type.storage == StorageClassInput && !type.array.empty() &&
  2135. !has_decoration(var.self, DecorationPatch) &&
  2136. (get_entry_point().model == ExecutionModelTessellationControl ||
  2137. get_entry_point().model == ExecutionModelTessellationEvaluation);
  2138. uint32_t old_array_size = 0;
  2139. bool old_array_size_literal = true;
  2140. if (control_point_input_array)
  2141. {
  2142. swap(type.array.back(), old_array_size);
  2143. swap(type.array_size_literal.back(), old_array_size_literal);
  2144. }
  2145. statement(layout_for_variable(var), to_qualifiers_glsl(var.self),
  2146. variable_decl(type, to_name(var.self), var.self), ";");
  2147. if (control_point_input_array)
  2148. {
  2149. swap(type.array.back(), old_array_size);
  2150. swap(type.array_size_literal.back(), old_array_size_literal);
  2151. }
  2152. }
  2153. }
  2154. }
  2155. void CompilerGLSL::emit_uniform(const SPIRVariable &var)
  2156. {
  2157. auto &type = get<SPIRType>(var.basetype);
  2158. if (type.basetype == SPIRType::Image && type.image.sampled == 2 && type.image.dim != DimSubpassData)
  2159. {
  2160. if (!options.es && options.version < 420)
  2161. require_extension_internal("GL_ARB_shader_image_load_store");
  2162. else if (options.es && options.version < 310)
  2163. SPIRV_CROSS_THROW("At least ESSL 3.10 required for shader image load store.");
  2164. }
  2165. add_resource_name(var.self);
  2166. statement(layout_for_variable(var), variable_decl(var), ";");
  2167. }
  2168. string CompilerGLSL::constant_value_macro_name(uint32_t id)
  2169. {
  2170. return join("SPIRV_CROSS_CONSTANT_ID_", id);
  2171. }
  2172. void CompilerGLSL::emit_specialization_constant_op(const SPIRConstantOp &constant)
  2173. {
  2174. auto &type = get<SPIRType>(constant.basetype);
  2175. auto name = to_name(constant.self);
  2176. statement("const ", variable_decl(type, name), " = ", constant_op_expression(constant), ";");
  2177. }
  2178. void CompilerGLSL::emit_constant(const SPIRConstant &constant)
  2179. {
  2180. auto &type = get<SPIRType>(constant.constant_type);
  2181. auto name = to_name(constant.self);
  2182. SpecializationConstant wg_x, wg_y, wg_z;
  2183. ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
  2184. // This specialization constant is implicitly declared by emitting layout() in;
  2185. if (constant.self == workgroup_size_id)
  2186. return;
  2187. // These specialization constants are implicitly declared by emitting layout() in;
  2188. // In legacy GLSL, we will still need to emit macros for these, so a layout() in; declaration
  2189. // later can use macro overrides for work group size.
  2190. bool is_workgroup_size_constant = ConstantID(constant.self) == wg_x.id || ConstantID(constant.self) == wg_y.id ||
  2191. ConstantID(constant.self) == wg_z.id;
  2192. if (options.vulkan_semantics && is_workgroup_size_constant)
  2193. {
  2194. // Vulkan GLSL does not need to declare workgroup spec constants explicitly, it is handled in layout().
  2195. return;
  2196. }
  2197. else if (!options.vulkan_semantics && is_workgroup_size_constant &&
  2198. !has_decoration(constant.self, DecorationSpecId))
  2199. {
  2200. // Only bother declaring a workgroup size if it is actually a specialization constant, because we need macros.
  2201. return;
  2202. }
  2203. // Only scalars have constant IDs.
  2204. if (has_decoration(constant.self, DecorationSpecId))
  2205. {
  2206. if (options.vulkan_semantics)
  2207. {
  2208. statement("layout(constant_id = ", get_decoration(constant.self, DecorationSpecId), ") const ",
  2209. variable_decl(type, name), " = ", constant_expression(constant), ";");
  2210. }
  2211. else
  2212. {
  2213. const string &macro_name = constant.specialization_constant_macro_name;
  2214. statement("#ifndef ", macro_name);
  2215. statement("#define ", macro_name, " ", constant_expression(constant));
  2216. statement("#endif");
  2217. // For workgroup size constants, only emit the macros.
  2218. if (!is_workgroup_size_constant)
  2219. statement("const ", variable_decl(type, name), " = ", macro_name, ";");
  2220. }
  2221. }
  2222. else
  2223. {
  2224. statement("const ", variable_decl(type, name), " = ", constant_expression(constant), ";");
  2225. }
  2226. }
  2227. void CompilerGLSL::emit_entry_point_declarations()
  2228. {
  2229. }
  2230. void CompilerGLSL::replace_illegal_names(const unordered_set<string> &keywords)
  2231. {
  2232. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
  2233. if (is_hidden_variable(var))
  2234. return;
  2235. auto *meta = ir.find_meta(var.self);
  2236. if (!meta)
  2237. return;
  2238. auto &m = meta->decoration;
  2239. if (keywords.find(m.alias) != end(keywords))
  2240. m.alias = join("_", m.alias);
  2241. });
  2242. ir.for_each_typed_id<SPIRFunction>([&](uint32_t, const SPIRFunction &func) {
  2243. auto *meta = ir.find_meta(func.self);
  2244. if (!meta)
  2245. return;
  2246. auto &m = meta->decoration;
  2247. if (keywords.find(m.alias) != end(keywords))
  2248. m.alias = join("_", m.alias);
  2249. });
  2250. ir.for_each_typed_id<SPIRType>([&](uint32_t, const SPIRType &type) {
  2251. auto *meta = ir.find_meta(type.self);
  2252. if (!meta)
  2253. return;
  2254. auto &m = meta->decoration;
  2255. if (keywords.find(m.alias) != end(keywords))
  2256. m.alias = join("_", m.alias);
  2257. for (auto &memb : meta->members)
  2258. if (keywords.find(memb.alias) != end(keywords))
  2259. memb.alias = join("_", memb.alias);
  2260. });
  2261. }
  2262. void CompilerGLSL::replace_illegal_names()
  2263. {
  2264. // clang-format off
  2265. static const unordered_set<string> keywords = {
  2266. "abs", "acos", "acosh", "all", "any", "asin", "asinh", "atan", "atanh",
  2267. "atomicAdd", "atomicCompSwap", "atomicCounter", "atomicCounterDecrement", "atomicCounterIncrement",
  2268. "atomicExchange", "atomicMax", "atomicMin", "atomicOr", "atomicXor",
  2269. "bitCount", "bitfieldExtract", "bitfieldInsert", "bitfieldReverse",
  2270. "ceil", "cos", "cosh", "cross", "degrees",
  2271. "dFdx", "dFdxCoarse", "dFdxFine",
  2272. "dFdy", "dFdyCoarse", "dFdyFine",
  2273. "distance", "dot", "EmitStreamVertex", "EmitVertex", "EndPrimitive", "EndStreamPrimitive", "equal", "exp", "exp2",
  2274. "faceforward", "findLSB", "findMSB", "float16BitsToInt16", "float16BitsToUint16", "floatBitsToInt", "floatBitsToUint", "floor", "fma", "fract",
  2275. "frexp", "fwidth", "fwidthCoarse", "fwidthFine",
  2276. "greaterThan", "greaterThanEqual", "groupMemoryBarrier",
  2277. "imageAtomicAdd", "imageAtomicAnd", "imageAtomicCompSwap", "imageAtomicExchange", "imageAtomicMax", "imageAtomicMin", "imageAtomicOr", "imageAtomicXor",
  2278. "imageLoad", "imageSamples", "imageSize", "imageStore", "imulExtended", "int16BitsToFloat16", "intBitsToFloat", "interpolateAtOffset", "interpolateAtCentroid", "interpolateAtSample",
  2279. "inverse", "inversesqrt", "isinf", "isnan", "ldexp", "length", "lessThan", "lessThanEqual", "log", "log2",
  2280. "matrixCompMult", "max", "memoryBarrier", "memoryBarrierAtomicCounter", "memoryBarrierBuffer", "memoryBarrierImage", "memoryBarrierShared",
  2281. "min", "mix", "mod", "modf", "noise", "noise1", "noise2", "noise3", "noise4", "normalize", "not", "notEqual",
  2282. "outerProduct", "packDouble2x32", "packHalf2x16", "packInt2x16", "packInt4x16", "packSnorm2x16", "packSnorm4x8",
  2283. "packUint2x16", "packUint4x16", "packUnorm2x16", "packUnorm4x8", "pow",
  2284. "radians", "reflect", "refract", "round", "roundEven", "sign", "sin", "sinh", "smoothstep", "sqrt", "step",
  2285. "tan", "tanh", "texelFetch", "texelFetchOffset", "texture", "textureGather", "textureGatherOffset", "textureGatherOffsets",
  2286. "textureGrad", "textureGradOffset", "textureLod", "textureLodOffset", "textureOffset", "textureProj", "textureProjGrad",
  2287. "textureProjGradOffset", "textureProjLod", "textureProjLodOffset", "textureProjOffset", "textureQueryLevels", "textureQueryLod", "textureSamples", "textureSize",
  2288. "transpose", "trunc", "uaddCarry", "uint16BitsToFloat16", "uintBitsToFloat", "umulExtended", "unpackDouble2x32", "unpackHalf2x16", "unpackInt2x16", "unpackInt4x16",
  2289. "unpackSnorm2x16", "unpackSnorm4x8", "unpackUint2x16", "unpackUint4x16", "unpackUnorm2x16", "unpackUnorm4x8", "usubBorrow",
  2290. "active", "asm", "atomic_uint", "attribute", "bool", "break", "buffer",
  2291. "bvec2", "bvec3", "bvec4", "case", "cast", "centroid", "class", "coherent", "common", "const", "continue", "default", "discard",
  2292. "dmat2", "dmat2x2", "dmat2x3", "dmat2x4", "dmat3", "dmat3x2", "dmat3x3", "dmat3x4", "dmat4", "dmat4x2", "dmat4x3", "dmat4x4",
  2293. "do", "double", "dvec2", "dvec3", "dvec4", "else", "enum", "extern", "external", "false", "filter", "fixed", "flat", "float",
  2294. "for", "fvec2", "fvec3", "fvec4", "goto", "half", "highp", "hvec2", "hvec3", "hvec4", "if", "iimage1D", "iimage1DArray",
  2295. "iimage2D", "iimage2DArray", "iimage2DMS", "iimage2DMSArray", "iimage2DRect", "iimage3D", "iimageBuffer", "iimageCube",
  2296. "iimageCubeArray", "image1D", "image1DArray", "image2D", "image2DArray", "image2DMS", "image2DMSArray", "image2DRect",
  2297. "image3D", "imageBuffer", "imageCube", "imageCubeArray", "in", "inline", "inout", "input", "int", "interface", "invariant",
  2298. "isampler1D", "isampler1DArray", "isampler2D", "isampler2DArray", "isampler2DMS", "isampler2DMSArray", "isampler2DRect",
  2299. "isampler3D", "isamplerBuffer", "isamplerCube", "isamplerCubeArray", "ivec2", "ivec3", "ivec4", "layout", "long", "lowp",
  2300. "mat2", "mat2x2", "mat2x3", "mat2x4", "mat3", "mat3x2", "mat3x3", "mat3x4", "mat4", "mat4x2", "mat4x3", "mat4x4", "mediump",
  2301. "namespace", "noinline", "noperspective", "out", "output", "packed", "partition", "patch", "precise", "precision", "public", "readonly",
  2302. "resource", "restrict", "return", "sample", "sampler1D", "sampler1DArray", "sampler1DArrayShadow",
  2303. "sampler1DShadow", "sampler2D", "sampler2DArray", "sampler2DArrayShadow", "sampler2DMS", "sampler2DMSArray",
  2304. "sampler2DRect", "sampler2DRectShadow", "sampler2DShadow", "sampler3D", "sampler3DRect", "samplerBuffer",
  2305. "samplerCube", "samplerCubeArray", "samplerCubeArrayShadow", "samplerCubeShadow", "shared", "short", "sizeof", "smooth", "static",
  2306. "struct", "subroutine", "superp", "switch", "template", "this", "true", "typedef", "uimage1D", "uimage1DArray", "uimage2D",
  2307. "uimage2DArray", "uimage2DMS", "uimage2DMSArray", "uimage2DRect", "uimage3D", "uimageBuffer", "uimageCube",
  2308. "uimageCubeArray", "uint", "uniform", "union", "unsigned", "usampler1D", "usampler1DArray", "usampler2D", "usampler2DArray",
  2309. "usampler2DMS", "usampler2DMSArray", "usampler2DRect", "usampler3D", "usamplerBuffer", "usamplerCube",
  2310. "usamplerCubeArray", "using", "uvec2", "uvec3", "uvec4", "varying", "vec2", "vec3", "vec4", "void", "volatile",
  2311. "while", "writeonly",
  2312. };
  2313. // clang-format on
  2314. replace_illegal_names(keywords);
  2315. }
  2316. void CompilerGLSL::replace_fragment_output(SPIRVariable &var)
  2317. {
  2318. auto &m = ir.meta[var.self].decoration;
  2319. uint32_t location = 0;
  2320. if (m.decoration_flags.get(DecorationLocation))
  2321. location = m.location;
  2322. // If our variable is arrayed, we must not emit the array part of this as the SPIR-V will
  2323. // do the access chain part of this for us.
  2324. auto &type = get<SPIRType>(var.basetype);
  2325. if (type.array.empty())
  2326. {
  2327. // Redirect the write to a specific render target in legacy GLSL.
  2328. m.alias = join("gl_FragData[", location, "]");
  2329. if (is_legacy_es() && location != 0)
  2330. require_extension_internal("GL_EXT_draw_buffers");
  2331. }
  2332. else if (type.array.size() == 1)
  2333. {
  2334. // If location is non-zero, we probably have to add an offset.
  2335. // This gets really tricky since we'd have to inject an offset in the access chain.
  2336. // FIXME: This seems like an extremely odd-ball case, so it's probably fine to leave it like this for now.
  2337. m.alias = "gl_FragData";
  2338. if (location != 0)
  2339. SPIRV_CROSS_THROW("Arrayed output variable used, but location is not 0. "
  2340. "This is unimplemented in SPIRV-Cross.");
  2341. if (is_legacy_es())
  2342. require_extension_internal("GL_EXT_draw_buffers");
  2343. }
  2344. else
  2345. SPIRV_CROSS_THROW("Array-of-array output variable used. This cannot be implemented in legacy GLSL.");
  2346. var.compat_builtin = true; // We don't want to declare this variable, but use the name as-is.
  2347. }
  2348. void CompilerGLSL::replace_fragment_outputs()
  2349. {
  2350. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  2351. auto &type = this->get<SPIRType>(var.basetype);
  2352. if (!is_builtin_variable(var) && !var.remapped_variable && type.pointer && var.storage == StorageClassOutput)
  2353. replace_fragment_output(var);
  2354. });
  2355. }
  2356. string CompilerGLSL::remap_swizzle(const SPIRType &out_type, uint32_t input_components, const string &expr)
  2357. {
  2358. if (out_type.vecsize == input_components)
  2359. return expr;
  2360. else if (input_components == 1 && !backend.can_swizzle_scalar)
  2361. return join(type_to_glsl(out_type), "(", expr, ")");
  2362. else
  2363. {
  2364. // FIXME: This will not work with packed expressions.
  2365. auto e = enclose_expression(expr) + ".";
  2366. // Just clamp the swizzle index if we have more outputs than inputs.
  2367. for (uint32_t c = 0; c < out_type.vecsize; c++)
  2368. e += index_to_swizzle(min(c, input_components - 1));
  2369. if (backend.swizzle_is_function && out_type.vecsize > 1)
  2370. e += "()";
  2371. remove_duplicate_swizzle(e);
  2372. return e;
  2373. }
  2374. }
  2375. void CompilerGLSL::emit_pls()
  2376. {
  2377. auto &execution = get_entry_point();
  2378. if (execution.model != ExecutionModelFragment)
  2379. SPIRV_CROSS_THROW("Pixel local storage only supported in fragment shaders.");
  2380. if (!options.es)
  2381. SPIRV_CROSS_THROW("Pixel local storage only supported in OpenGL ES.");
  2382. if (options.version < 300)
  2383. SPIRV_CROSS_THROW("Pixel local storage only supported in ESSL 3.0 and above.");
  2384. if (!pls_inputs.empty())
  2385. {
  2386. statement("__pixel_local_inEXT _PLSIn");
  2387. begin_scope();
  2388. for (auto &input : pls_inputs)
  2389. statement(pls_decl(input), ";");
  2390. end_scope_decl();
  2391. statement("");
  2392. }
  2393. if (!pls_outputs.empty())
  2394. {
  2395. statement("__pixel_local_outEXT _PLSOut");
  2396. begin_scope();
  2397. for (auto &output : pls_outputs)
  2398. statement(pls_decl(output), ";");
  2399. end_scope_decl();
  2400. statement("");
  2401. }
  2402. }
  2403. void CompilerGLSL::fixup_image_load_store_access()
  2404. {
  2405. if (!options.enable_storage_image_qualifier_deduction)
  2406. return;
  2407. ir.for_each_typed_id<SPIRVariable>([&](uint32_t var, const SPIRVariable &) {
  2408. auto &vartype = expression_type(var);
  2409. if (vartype.basetype == SPIRType::Image && vartype.image.sampled == 2)
  2410. {
  2411. // Very old glslangValidator and HLSL compilers do not emit required qualifiers here.
  2412. // Solve this by making the image access as restricted as possible and loosen up if we need to.
  2413. // If any no-read/no-write flags are actually set, assume that the compiler knows what it's doing.
  2414. auto &flags = ir.meta[var].decoration.decoration_flags;
  2415. if (!flags.get(DecorationNonWritable) && !flags.get(DecorationNonReadable))
  2416. {
  2417. flags.set(DecorationNonWritable);
  2418. flags.set(DecorationNonReadable);
  2419. }
  2420. }
  2421. });
  2422. }
  2423. static bool is_block_builtin(BuiltIn builtin)
  2424. {
  2425. return builtin == BuiltInPosition || builtin == BuiltInPointSize || builtin == BuiltInClipDistance ||
  2426. builtin == BuiltInCullDistance;
  2427. }
  2428. bool CompilerGLSL::should_force_emit_builtin_block(StorageClass storage)
  2429. {
  2430. // If the builtin block uses XFB, we need to force explicit redeclaration of the builtin block.
  2431. if (storage != StorageClassOutput)
  2432. return false;
  2433. bool should_force = false;
  2434. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  2435. if (should_force)
  2436. return;
  2437. auto &type = this->get<SPIRType>(var.basetype);
  2438. bool block = has_decoration(type.self, DecorationBlock);
  2439. if (var.storage == storage && block && is_builtin_variable(var))
  2440. {
  2441. uint32_t member_count = uint32_t(type.member_types.size());
  2442. for (uint32_t i = 0; i < member_count; i++)
  2443. {
  2444. if (has_member_decoration(type.self, i, DecorationBuiltIn) &&
  2445. is_block_builtin(BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn))) &&
  2446. has_member_decoration(type.self, i, DecorationOffset))
  2447. {
  2448. should_force = true;
  2449. }
  2450. }
  2451. }
  2452. else if (var.storage == storage && !block && is_builtin_variable(var))
  2453. {
  2454. if (is_block_builtin(BuiltIn(get_decoration(type.self, DecorationBuiltIn))) &&
  2455. has_decoration(var.self, DecorationOffset))
  2456. {
  2457. should_force = true;
  2458. }
  2459. }
  2460. });
  2461. // If we're declaring clip/cull planes with control points we need to force block declaration.
  2462. if (get_execution_model() == ExecutionModelTessellationControl &&
  2463. (clip_distance_count || cull_distance_count))
  2464. {
  2465. should_force = true;
  2466. }
  2467. return should_force;
  2468. }
  2469. void CompilerGLSL::fixup_implicit_builtin_block_names()
  2470. {
  2471. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  2472. auto &type = this->get<SPIRType>(var.basetype);
  2473. bool block = has_decoration(type.self, DecorationBlock);
  2474. if ((var.storage == StorageClassOutput || var.storage == StorageClassInput) && block &&
  2475. is_builtin_variable(var))
  2476. {
  2477. // Make sure the array has a supported name in the code.
  2478. if (var.storage == StorageClassOutput)
  2479. set_name(var.self, "gl_out");
  2480. else if (var.storage == StorageClassInput)
  2481. set_name(var.self, "gl_in");
  2482. }
  2483. });
  2484. }
  2485. void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionModel model)
  2486. {
  2487. Bitset emitted_builtins;
  2488. Bitset global_builtins;
  2489. const SPIRVariable *block_var = nullptr;
  2490. bool emitted_block = false;
  2491. bool builtin_array = false;
  2492. // Need to use declared size in the type.
  2493. // These variables might have been declared, but not statically used, so we haven't deduced their size yet.
  2494. uint32_t cull_distance_size = 0;
  2495. uint32_t clip_distance_size = 0;
  2496. bool have_xfb_buffer_stride = false;
  2497. bool have_geom_stream = false;
  2498. bool have_any_xfb_offset = false;
  2499. uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0;
  2500. std::unordered_map<uint32_t, uint32_t> builtin_xfb_offsets;
  2501. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  2502. auto &type = this->get<SPIRType>(var.basetype);
  2503. bool block = has_decoration(type.self, DecorationBlock);
  2504. Bitset builtins;
  2505. if (var.storage == storage && block && is_builtin_variable(var))
  2506. {
  2507. uint32_t index = 0;
  2508. for (auto &m : ir.meta[type.self].members)
  2509. {
  2510. if (m.builtin)
  2511. {
  2512. builtins.set(m.builtin_type);
  2513. if (m.builtin_type == BuiltInCullDistance)
  2514. cull_distance_size = to_array_size_literal(this->get<SPIRType>(type.member_types[index]));
  2515. else if (m.builtin_type == BuiltInClipDistance)
  2516. clip_distance_size = to_array_size_literal(this->get<SPIRType>(type.member_types[index]));
  2517. if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationOffset))
  2518. {
  2519. have_any_xfb_offset = true;
  2520. builtin_xfb_offsets[m.builtin_type] = m.offset;
  2521. }
  2522. if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationStream))
  2523. {
  2524. uint32_t stream = m.stream;
  2525. if (have_geom_stream && geom_stream != stream)
  2526. SPIRV_CROSS_THROW("IO block member Stream mismatch.");
  2527. have_geom_stream = true;
  2528. geom_stream = stream;
  2529. }
  2530. }
  2531. index++;
  2532. }
  2533. if (storage == StorageClassOutput && has_decoration(var.self, DecorationXfbBuffer) &&
  2534. has_decoration(var.self, DecorationXfbStride))
  2535. {
  2536. uint32_t buffer_index = get_decoration(var.self, DecorationXfbBuffer);
  2537. uint32_t stride = get_decoration(var.self, DecorationXfbStride);
  2538. if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
  2539. SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
  2540. if (have_xfb_buffer_stride && stride != xfb_stride)
  2541. SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
  2542. have_xfb_buffer_stride = true;
  2543. xfb_buffer = buffer_index;
  2544. xfb_stride = stride;
  2545. }
  2546. if (storage == StorageClassOutput && has_decoration(var.self, DecorationStream))
  2547. {
  2548. uint32_t stream = get_decoration(var.self, DecorationStream);
  2549. if (have_geom_stream && geom_stream != stream)
  2550. SPIRV_CROSS_THROW("IO block member Stream mismatch.");
  2551. have_geom_stream = true;
  2552. geom_stream = stream;
  2553. }
  2554. }
  2555. else if (var.storage == storage && !block && is_builtin_variable(var))
  2556. {
  2557. // While we're at it, collect all declared global builtins (HLSL mostly ...).
  2558. auto &m = ir.meta[var.self].decoration;
  2559. if (m.builtin)
  2560. {
  2561. global_builtins.set(m.builtin_type);
  2562. if (m.builtin_type == BuiltInCullDistance)
  2563. cull_distance_size = to_array_size_literal(type);
  2564. else if (m.builtin_type == BuiltInClipDistance)
  2565. clip_distance_size = to_array_size_literal(type);
  2566. if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationXfbStride) &&
  2567. m.decoration_flags.get(DecorationXfbBuffer) && m.decoration_flags.get(DecorationOffset))
  2568. {
  2569. have_any_xfb_offset = true;
  2570. builtin_xfb_offsets[m.builtin_type] = m.offset;
  2571. uint32_t buffer_index = m.xfb_buffer;
  2572. uint32_t stride = m.xfb_stride;
  2573. if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
  2574. SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
  2575. if (have_xfb_buffer_stride && stride != xfb_stride)
  2576. SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
  2577. have_xfb_buffer_stride = true;
  2578. xfb_buffer = buffer_index;
  2579. xfb_stride = stride;
  2580. }
  2581. if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationStream))
  2582. {
  2583. uint32_t stream = get_decoration(var.self, DecorationStream);
  2584. if (have_geom_stream && geom_stream != stream)
  2585. SPIRV_CROSS_THROW("IO block member Stream mismatch.");
  2586. have_geom_stream = true;
  2587. geom_stream = stream;
  2588. }
  2589. }
  2590. }
  2591. if (builtins.empty())
  2592. return;
  2593. if (emitted_block)
  2594. SPIRV_CROSS_THROW("Cannot use more than one builtin I/O block.");
  2595. emitted_builtins = builtins;
  2596. emitted_block = true;
  2597. builtin_array = !type.array.empty();
  2598. block_var = &var;
  2599. });
  2600. global_builtins =
  2601. Bitset(global_builtins.get_lower() & ((1ull << BuiltInPosition) | (1ull << BuiltInPointSize) |
  2602. (1ull << BuiltInClipDistance) | (1ull << BuiltInCullDistance)));
  2603. // Try to collect all other declared builtins.
  2604. if (!emitted_block)
  2605. emitted_builtins = global_builtins;
  2606. // Can't declare an empty interface block.
  2607. if (emitted_builtins.empty())
  2608. return;
  2609. if (storage == StorageClassOutput)
  2610. {
  2611. SmallVector<string> attr;
  2612. if (have_xfb_buffer_stride && have_any_xfb_offset)
  2613. {
  2614. if (!options.es)
  2615. {
  2616. if (options.version < 440 && options.version >= 140)
  2617. require_extension_internal("GL_ARB_enhanced_layouts");
  2618. else if (options.version < 140)
  2619. SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40.");
  2620. if (!options.es && options.version < 440)
  2621. require_extension_internal("GL_ARB_enhanced_layouts");
  2622. }
  2623. else if (options.es)
  2624. SPIRV_CROSS_THROW("Need GL_ARB_enhanced_layouts for xfb_stride or xfb_buffer.");
  2625. attr.push_back(join("xfb_buffer = ", xfb_buffer, ", xfb_stride = ", xfb_stride));
  2626. }
  2627. if (have_geom_stream)
  2628. {
  2629. if (get_execution_model() != ExecutionModelGeometry)
  2630. SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
  2631. if (options.es)
  2632. SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
  2633. if (options.version < 400)
  2634. require_extension_internal("GL_ARB_transform_feedback3");
  2635. attr.push_back(join("stream = ", geom_stream));
  2636. }
  2637. if (!attr.empty())
  2638. statement("layout(", merge(attr), ") out gl_PerVertex");
  2639. else
  2640. statement("out gl_PerVertex");
  2641. }
  2642. else
  2643. {
  2644. // If we have passthrough, there is no way PerVertex cannot be passthrough.
  2645. if (get_entry_point().geometry_passthrough)
  2646. statement("layout(passthrough) in gl_PerVertex");
  2647. else
  2648. statement("in gl_PerVertex");
  2649. }
  2650. begin_scope();
  2651. if (emitted_builtins.get(BuiltInPosition))
  2652. {
  2653. auto itr = builtin_xfb_offsets.find(BuiltInPosition);
  2654. if (itr != end(builtin_xfb_offsets))
  2655. statement("layout(xfb_offset = ", itr->second, ") vec4 gl_Position;");
  2656. else
  2657. statement("vec4 gl_Position;");
  2658. }
  2659. if (emitted_builtins.get(BuiltInPointSize))
  2660. {
  2661. auto itr = builtin_xfb_offsets.find(BuiltInPointSize);
  2662. if (itr != end(builtin_xfb_offsets))
  2663. statement("layout(xfb_offset = ", itr->second, ") float gl_PointSize;");
  2664. else
  2665. statement("float gl_PointSize;");
  2666. }
  2667. if (emitted_builtins.get(BuiltInClipDistance))
  2668. {
  2669. auto itr = builtin_xfb_offsets.find(BuiltInClipDistance);
  2670. if (itr != end(builtin_xfb_offsets))
  2671. statement("layout(xfb_offset = ", itr->second, ") float gl_ClipDistance[", clip_distance_size, "];");
  2672. else
  2673. statement("float gl_ClipDistance[", clip_distance_size, "];");
  2674. }
  2675. if (emitted_builtins.get(BuiltInCullDistance))
  2676. {
  2677. auto itr = builtin_xfb_offsets.find(BuiltInCullDistance);
  2678. if (itr != end(builtin_xfb_offsets))
  2679. statement("layout(xfb_offset = ", itr->second, ") float gl_CullDistance[", cull_distance_size, "];");
  2680. else
  2681. statement("float gl_CullDistance[", cull_distance_size, "];");
  2682. }
  2683. if (builtin_array)
  2684. {
  2685. if (model == ExecutionModelTessellationControl && storage == StorageClassOutput)
  2686. end_scope_decl(join(to_name(block_var->self), "[", get_entry_point().output_vertices, "]"));
  2687. else
  2688. end_scope_decl(join(to_name(block_var->self), "[]"));
  2689. }
  2690. else
  2691. end_scope_decl();
  2692. statement("");
  2693. }
  2694. void CompilerGLSL::declare_undefined_values()
  2695. {
  2696. bool emitted = false;
  2697. ir.for_each_typed_id<SPIRUndef>([&](uint32_t, const SPIRUndef &undef) {
  2698. auto &type = this->get<SPIRType>(undef.basetype);
  2699. // OpUndef can be void for some reason ...
  2700. if (type.basetype == SPIRType::Void)
  2701. return;
  2702. string initializer;
  2703. if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
  2704. initializer = join(" = ", to_zero_initialized_expression(undef.basetype));
  2705. statement(variable_decl(type, to_name(undef.self), undef.self), initializer, ";");
  2706. emitted = true;
  2707. });
  2708. if (emitted)
  2709. statement("");
  2710. }
  2711. bool CompilerGLSL::variable_is_lut(const SPIRVariable &var) const
  2712. {
  2713. bool statically_assigned = var.statically_assigned && var.static_expression != ID(0) && var.remapped_variable;
  2714. if (statically_assigned)
  2715. {
  2716. auto *constant = maybe_get<SPIRConstant>(var.static_expression);
  2717. if (constant && constant->is_used_as_lut)
  2718. return true;
  2719. }
  2720. return false;
  2721. }
  2722. void CompilerGLSL::emit_resources()
  2723. {
  2724. auto &execution = get_entry_point();
  2725. replace_illegal_names();
  2726. // Legacy GL uses gl_FragData[], redeclare all fragment outputs
  2727. // with builtins.
  2728. if (execution.model == ExecutionModelFragment && is_legacy())
  2729. replace_fragment_outputs();
  2730. // Emit PLS blocks if we have such variables.
  2731. if (!pls_inputs.empty() || !pls_outputs.empty())
  2732. emit_pls();
  2733. switch (execution.model)
  2734. {
  2735. case ExecutionModelGeometry:
  2736. case ExecutionModelTessellationControl:
  2737. case ExecutionModelTessellationEvaluation:
  2738. fixup_implicit_builtin_block_names();
  2739. break;
  2740. default:
  2741. break;
  2742. }
  2743. // Emit custom gl_PerVertex for SSO compatibility.
  2744. if (options.separate_shader_objects && !options.es && execution.model != ExecutionModelFragment)
  2745. {
  2746. switch (execution.model)
  2747. {
  2748. case ExecutionModelGeometry:
  2749. case ExecutionModelTessellationControl:
  2750. case ExecutionModelTessellationEvaluation:
  2751. emit_declared_builtin_block(StorageClassInput, execution.model);
  2752. emit_declared_builtin_block(StorageClassOutput, execution.model);
  2753. break;
  2754. case ExecutionModelVertex:
  2755. emit_declared_builtin_block(StorageClassOutput, execution.model);
  2756. break;
  2757. default:
  2758. break;
  2759. }
  2760. }
  2761. else if (should_force_emit_builtin_block(StorageClassOutput))
  2762. {
  2763. emit_declared_builtin_block(StorageClassOutput, execution.model);
  2764. }
  2765. else if (execution.geometry_passthrough)
  2766. {
  2767. // Need to declare gl_in with Passthrough.
  2768. // If we're doing passthrough, we cannot emit an output block, so the output block test above will never pass.
  2769. emit_declared_builtin_block(StorageClassInput, execution.model);
  2770. }
  2771. else
  2772. {
  2773. // Need to redeclare clip/cull distance with explicit size to use them.
  2774. // SPIR-V mandates these builtins have a size declared.
  2775. const char *storage = execution.model == ExecutionModelFragment ? "in" : "out";
  2776. if (clip_distance_count != 0)
  2777. statement(storage, " float gl_ClipDistance[", clip_distance_count, "];");
  2778. if (cull_distance_count != 0)
  2779. statement(storage, " float gl_CullDistance[", cull_distance_count, "];");
  2780. if (clip_distance_count != 0 || cull_distance_count != 0)
  2781. statement("");
  2782. }
  2783. if (position_invariant)
  2784. {
  2785. statement("invariant gl_Position;");
  2786. statement("");
  2787. }
  2788. bool emitted = false;
  2789. // If emitted Vulkan GLSL,
  2790. // emit specialization constants as actual floats,
  2791. // spec op expressions will redirect to the constant name.
  2792. //
  2793. {
  2794. auto loop_lock = ir.create_loop_hard_lock();
  2795. for (auto &id_ : ir.ids_for_constant_or_type)
  2796. {
  2797. auto &id = ir.ids[id_];
  2798. if (id.get_type() == TypeConstant)
  2799. {
  2800. auto &c = id.get<SPIRConstant>();
  2801. bool needs_declaration = c.specialization || c.is_used_as_lut;
  2802. if (needs_declaration)
  2803. {
  2804. if (!options.vulkan_semantics && c.specialization)
  2805. {
  2806. c.specialization_constant_macro_name =
  2807. constant_value_macro_name(get_decoration(c.self, DecorationSpecId));
  2808. }
  2809. emit_constant(c);
  2810. emitted = true;
  2811. }
  2812. }
  2813. else if (id.get_type() == TypeConstantOp)
  2814. {
  2815. emit_specialization_constant_op(id.get<SPIRConstantOp>());
  2816. emitted = true;
  2817. }
  2818. else if (id.get_type() == TypeType)
  2819. {
  2820. auto *type = &id.get<SPIRType>();
  2821. bool is_natural_struct = type->basetype == SPIRType::Struct && type->array.empty() && !type->pointer &&
  2822. (!has_decoration(type->self, DecorationBlock) &&
  2823. !has_decoration(type->self, DecorationBufferBlock));
  2824. // Special case, ray payload and hit attribute blocks are not really blocks, just regular structs.
  2825. if (type->basetype == SPIRType::Struct && type->pointer &&
  2826. has_decoration(type->self, DecorationBlock) &&
  2827. (type->storage == StorageClassRayPayloadKHR || type->storage == StorageClassIncomingRayPayloadKHR ||
  2828. type->storage == StorageClassHitAttributeKHR))
  2829. {
  2830. type = &get<SPIRType>(type->parent_type);
  2831. is_natural_struct = true;
  2832. }
  2833. if (is_natural_struct)
  2834. {
  2835. if (emitted)
  2836. statement("");
  2837. emitted = false;
  2838. emit_struct(*type);
  2839. }
  2840. }
  2841. }
  2842. }
  2843. if (emitted)
  2844. statement("");
  2845. // If we needed to declare work group size late, check here.
  2846. // If the work group size depends on a specialization constant, we need to declare the layout() block
  2847. // after constants (and their macros) have been declared.
  2848. if (execution.model == ExecutionModelGLCompute && !options.vulkan_semantics &&
  2849. execution.workgroup_size.constant != 0)
  2850. {
  2851. SpecializationConstant wg_x, wg_y, wg_z;
  2852. get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
  2853. if ((wg_x.id != ConstantID(0)) || (wg_y.id != ConstantID(0)) || (wg_z.id != ConstantID(0)))
  2854. {
  2855. SmallVector<string> inputs;
  2856. build_workgroup_size(inputs, wg_x, wg_y, wg_z);
  2857. statement("layout(", merge(inputs), ") in;");
  2858. statement("");
  2859. }
  2860. }
  2861. emitted = false;
  2862. if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
  2863. {
  2864. for (auto type : physical_storage_non_block_pointer_types)
  2865. {
  2866. emit_buffer_reference_block(get<SPIRType>(type), false);
  2867. }
  2868. // Output buffer reference blocks.
  2869. // Do this in two stages, one with forward declaration,
  2870. // and one without. Buffer reference blocks can reference themselves
  2871. // to support things like linked lists.
  2872. ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) {
  2873. bool has_block_flags = has_decoration(type.self, DecorationBlock);
  2874. if (has_block_flags && type.pointer && type.pointer_depth == 1 && !type_is_array_of_pointers(type) &&
  2875. type.storage == StorageClassPhysicalStorageBufferEXT)
  2876. {
  2877. emit_buffer_reference_block(type, true);
  2878. }
  2879. });
  2880. ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) {
  2881. bool has_block_flags = has_decoration(type.self, DecorationBlock);
  2882. if (has_block_flags && type.pointer && type.pointer_depth == 1 && !type_is_array_of_pointers(type) &&
  2883. type.storage == StorageClassPhysicalStorageBufferEXT)
  2884. {
  2885. emit_buffer_reference_block(type, false);
  2886. }
  2887. });
  2888. }
  2889. // Output UBOs and SSBOs
  2890. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  2891. auto &type = this->get<SPIRType>(var.basetype);
  2892. bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform ||
  2893. type.storage == StorageClassShaderRecordBufferKHR;
  2894. bool has_block_flags = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
  2895. ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
  2896. if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) &&
  2897. has_block_flags)
  2898. {
  2899. emit_buffer_block(var);
  2900. }
  2901. });
  2902. // Output push constant blocks
  2903. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  2904. auto &type = this->get<SPIRType>(var.basetype);
  2905. if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassPushConstant &&
  2906. !is_hidden_variable(var))
  2907. {
  2908. emit_push_constant_block(var);
  2909. }
  2910. });
  2911. bool skip_separate_image_sampler = !combined_image_samplers.empty() || !options.vulkan_semantics;
  2912. // Output Uniform Constants (values, samplers, images, etc).
  2913. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  2914. auto &type = this->get<SPIRType>(var.basetype);
  2915. // If we're remapping separate samplers and images, only emit the combined samplers.
  2916. if (skip_separate_image_sampler)
  2917. {
  2918. // Sampler buffers are always used without a sampler, and they will also work in regular GL.
  2919. bool sampler_buffer = type.basetype == SPIRType::Image && type.image.dim == DimBuffer;
  2920. bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1;
  2921. bool separate_sampler = type.basetype == SPIRType::Sampler;
  2922. if (!sampler_buffer && (separate_image || separate_sampler))
  2923. return;
  2924. }
  2925. if (var.storage != StorageClassFunction && type.pointer &&
  2926. (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter ||
  2927. type.storage == StorageClassRayPayloadKHR || type.storage == StorageClassIncomingRayPayloadKHR ||
  2928. type.storage == StorageClassCallableDataKHR || type.storage == StorageClassIncomingCallableDataKHR ||
  2929. type.storage == StorageClassHitAttributeKHR) &&
  2930. !is_hidden_variable(var))
  2931. {
  2932. emit_uniform(var);
  2933. emitted = true;
  2934. }
  2935. });
  2936. if (emitted)
  2937. statement("");
  2938. emitted = false;
  2939. bool emitted_base_instance = false;
  2940. // Output in/out interfaces.
  2941. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  2942. auto &type = this->get<SPIRType>(var.basetype);
  2943. bool is_hidden = is_hidden_variable(var);
  2944. // Unused output I/O variables might still be required to implement framebuffer fetch.
  2945. if (var.storage == StorageClassOutput && !is_legacy() &&
  2946. inout_color_attachments.count(get_decoration(var.self, DecorationLocation)) != 0)
  2947. {
  2948. is_hidden = false;
  2949. }
  2950. if (var.storage != StorageClassFunction && type.pointer &&
  2951. (var.storage == StorageClassInput || var.storage == StorageClassOutput) &&
  2952. interface_variable_exists_in_entry_point(var.self) && !is_hidden)
  2953. {
  2954. emit_interface_block(var);
  2955. emitted = true;
  2956. }
  2957. else if (is_builtin_variable(var))
  2958. {
  2959. auto builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
  2960. // For gl_InstanceIndex emulation on GLES, the API user needs to
  2961. // supply this uniform.
  2962. // The draw parameter extension is soft-enabled on GL with some fallbacks.
  2963. if (!options.vulkan_semantics)
  2964. {
  2965. if (!emitted_base_instance &&
  2966. ((options.vertex.support_nonzero_base_instance && builtin == BuiltInInstanceIndex) ||
  2967. (builtin == BuiltInBaseInstance)))
  2968. {
  2969. statement("#ifdef GL_ARB_shader_draw_parameters");
  2970. statement("#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB");
  2971. statement("#else");
  2972. // A crude, but simple workaround which should be good enough for non-indirect draws.
  2973. statement("uniform int SPIRV_Cross_BaseInstance;");
  2974. statement("#endif");
  2975. emitted = true;
  2976. emitted_base_instance = true;
  2977. }
  2978. else if (builtin == BuiltInBaseVertex)
  2979. {
  2980. statement("#ifdef GL_ARB_shader_draw_parameters");
  2981. statement("#define SPIRV_Cross_BaseVertex gl_BaseVertexARB");
  2982. statement("#else");
  2983. // A crude, but simple workaround which should be good enough for non-indirect draws.
  2984. statement("uniform int SPIRV_Cross_BaseVertex;");
  2985. statement("#endif");
  2986. }
  2987. else if (builtin == BuiltInDrawIndex)
  2988. {
  2989. statement("#ifndef GL_ARB_shader_draw_parameters");
  2990. // Cannot really be worked around.
  2991. statement("#error GL_ARB_shader_draw_parameters is not supported.");
  2992. statement("#endif");
  2993. }
  2994. }
  2995. }
  2996. });
  2997. // Global variables.
  2998. for (auto global : global_variables)
  2999. {
  3000. auto &var = get<SPIRVariable>(global);
  3001. if (is_hidden_variable(var, true))
  3002. continue;
  3003. if (var.storage != StorageClassOutput)
  3004. {
  3005. if (!variable_is_lut(var))
  3006. {
  3007. add_resource_name(var.self);
  3008. string initializer;
  3009. if (options.force_zero_initialized_variables && var.storage == StorageClassPrivate &&
  3010. !var.initializer && !var.static_expression && type_can_zero_initialize(get_variable_data_type(var)))
  3011. {
  3012. initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(var)));
  3013. }
  3014. statement(variable_decl(var), initializer, ";");
  3015. emitted = true;
  3016. }
  3017. }
  3018. else if (var.initializer && maybe_get<SPIRConstant>(var.initializer) != nullptr)
  3019. {
  3020. emit_output_variable_initializer(var);
  3021. }
  3022. }
  3023. if (emitted)
  3024. statement("");
  3025. declare_undefined_values();
  3026. }
  3027. void CompilerGLSL::emit_output_variable_initializer(const SPIRVariable &var)
  3028. {
  3029. // If a StorageClassOutput variable has an initializer, we need to initialize it in main().
  3030. auto &entry_func = this->get<SPIRFunction>(ir.default_entry_point);
  3031. auto &type = get<SPIRType>(var.basetype);
  3032. bool is_patch = has_decoration(var.self, DecorationPatch);
  3033. bool is_block = has_decoration(type.self, DecorationBlock);
  3034. bool is_control_point = get_execution_model() == ExecutionModelTessellationControl && !is_patch;
  3035. if (is_block)
  3036. {
  3037. uint32_t member_count = uint32_t(type.member_types.size());
  3038. bool type_is_array = type.array.size() == 1;
  3039. uint32_t array_size = 1;
  3040. if (type_is_array)
  3041. array_size = to_array_size_literal(type);
  3042. uint32_t iteration_count = is_control_point ? 1 : array_size;
  3043. // If the initializer is a block, we must initialize each block member one at a time.
  3044. for (uint32_t i = 0; i < member_count; i++)
  3045. {
  3046. // These outputs might not have been properly declared, so don't initialize them in that case.
  3047. if (has_member_decoration(type.self, i, DecorationBuiltIn))
  3048. {
  3049. if (get_member_decoration(type.self, i, DecorationBuiltIn) == BuiltInCullDistance &&
  3050. !cull_distance_count)
  3051. continue;
  3052. if (get_member_decoration(type.self, i, DecorationBuiltIn) == BuiltInClipDistance &&
  3053. !clip_distance_count)
  3054. continue;
  3055. }
  3056. // We need to build a per-member array first, essentially transposing from AoS to SoA.
  3057. // This code path hits when we have an array of blocks.
  3058. string lut_name;
  3059. if (type_is_array)
  3060. {
  3061. lut_name = join("_", var.self, "_", i, "_init");
  3062. uint32_t member_type_id = get<SPIRType>(var.basetype).member_types[i];
  3063. auto &member_type = get<SPIRType>(member_type_id);
  3064. auto array_type = member_type;
  3065. array_type.parent_type = member_type_id;
  3066. array_type.array.push_back(array_size);
  3067. array_type.array_size_literal.push_back(true);
  3068. SmallVector<string> exprs;
  3069. exprs.reserve(array_size);
  3070. auto &c = get<SPIRConstant>(var.initializer);
  3071. for (uint32_t j = 0; j < array_size; j++)
  3072. exprs.push_back(to_expression(get<SPIRConstant>(c.subconstants[j]).subconstants[i]));
  3073. statement("const ", type_to_glsl(array_type), " ", lut_name, type_to_array_glsl(array_type), " = ",
  3074. type_to_glsl_constructor(array_type), "(", merge(exprs, ", "), ");");
  3075. }
  3076. for (uint32_t j = 0; j < iteration_count; j++)
  3077. {
  3078. entry_func.fixup_hooks_in.push_back([=, &var]() {
  3079. AccessChainMeta meta;
  3080. auto &c = this->get<SPIRConstant>(var.initializer);
  3081. uint32_t invocation_id = 0;
  3082. uint32_t member_index_id = 0;
  3083. if (is_control_point)
  3084. {
  3085. uint32_t ids = ir.increase_bound_by(3);
  3086. SPIRType uint_type;
  3087. uint_type.basetype = SPIRType::UInt;
  3088. uint_type.width = 32;
  3089. set<SPIRType>(ids, uint_type);
  3090. set<SPIRExpression>(ids + 1, builtin_to_glsl(BuiltInInvocationId, StorageClassInput), ids, true);
  3091. set<SPIRConstant>(ids + 2, ids, i, false);
  3092. invocation_id = ids + 1;
  3093. member_index_id = ids + 2;
  3094. }
  3095. if (is_patch)
  3096. {
  3097. statement("if (gl_InvocationID == 0)");
  3098. begin_scope();
  3099. }
  3100. if (type_is_array && !is_control_point)
  3101. {
  3102. uint32_t indices[2] = { j, i };
  3103. auto chain = access_chain_internal(var.self, indices, 2, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta);
  3104. statement(chain, " = ", lut_name, "[", j, "];");
  3105. }
  3106. else if (is_control_point)
  3107. {
  3108. uint32_t indices[2] = { invocation_id, member_index_id };
  3109. auto chain = access_chain_internal(var.self, indices, 2, 0, &meta);
  3110. statement(chain, " = ", lut_name, "[", builtin_to_glsl(BuiltInInvocationId, StorageClassInput), "];");
  3111. }
  3112. else
  3113. {
  3114. auto chain =
  3115. access_chain_internal(var.self, &i, 1, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta);
  3116. statement(chain, " = ", to_expression(c.subconstants[i]), ";");
  3117. }
  3118. if (is_patch)
  3119. end_scope();
  3120. });
  3121. }
  3122. }
  3123. }
  3124. else if (is_control_point)
  3125. {
  3126. auto lut_name = join("_", var.self, "_init");
  3127. statement("const ", type_to_glsl(type), " ", lut_name, type_to_array_glsl(type),
  3128. " = ", to_expression(var.initializer), ";");
  3129. entry_func.fixup_hooks_in.push_back([&, lut_name]() {
  3130. statement(to_expression(var.self), "[gl_InvocationID] = ", lut_name, "[gl_InvocationID];");
  3131. });
  3132. }
  3133. else
  3134. {
  3135. auto lut_name = join("_", var.self, "_init");
  3136. statement("const ", type_to_glsl(type), " ", lut_name,
  3137. type_to_array_glsl(type), " = ", to_expression(var.initializer), ";");
  3138. entry_func.fixup_hooks_in.push_back([&, lut_name, is_patch]() {
  3139. if (is_patch)
  3140. {
  3141. statement("if (gl_InvocationID == 0)");
  3142. begin_scope();
  3143. }
  3144. statement(to_expression(var.self), " = ", lut_name, ";");
  3145. if (is_patch)
  3146. end_scope();
  3147. });
  3148. }
  3149. }
  3150. void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model)
  3151. {
  3152. static const char *workaround_types[] = { "int", "ivec2", "ivec3", "ivec4", "uint", "uvec2", "uvec3", "uvec4",
  3153. "float", "vec2", "vec3", "vec4", "double", "dvec2", "dvec3", "dvec4" };
  3154. if (!options.vulkan_semantics)
  3155. {
  3156. using Supp = ShaderSubgroupSupportHelper;
  3157. auto result = shader_subgroup_supporter.resolve();
  3158. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupMask))
  3159. {
  3160. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupMask, result);
  3161. for (auto &e : exts)
  3162. {
  3163. const char *name = Supp::get_extension_name(e);
  3164. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3165. switch (e)
  3166. {
  3167. case Supp::NV_shader_thread_group:
  3168. statement("#define gl_SubgroupEqMask uvec4(gl_ThreadEqMaskNV, 0u, 0u, 0u)");
  3169. statement("#define gl_SubgroupGeMask uvec4(gl_ThreadGeMaskNV, 0u, 0u, 0u)");
  3170. statement("#define gl_SubgroupGtMask uvec4(gl_ThreadGtMaskNV, 0u, 0u, 0u)");
  3171. statement("#define gl_SubgroupLeMask uvec4(gl_ThreadLeMaskNV, 0u, 0u, 0u)");
  3172. statement("#define gl_SubgroupLtMask uvec4(gl_ThreadLtMaskNV, 0u, 0u, 0u)");
  3173. break;
  3174. case Supp::ARB_shader_ballot:
  3175. statement("#define gl_SubgroupEqMask uvec4(unpackUint2x32(gl_SubGroupEqMaskARB), 0u, 0u)");
  3176. statement("#define gl_SubgroupGeMask uvec4(unpackUint2x32(gl_SubGroupGeMaskARB), 0u, 0u)");
  3177. statement("#define gl_SubgroupGtMask uvec4(unpackUint2x32(gl_SubGroupGtMaskARB), 0u, 0u)");
  3178. statement("#define gl_SubgroupLeMask uvec4(unpackUint2x32(gl_SubGroupLeMaskARB), 0u, 0u)");
  3179. statement("#define gl_SubgroupLtMask uvec4(unpackUint2x32(gl_SubGroupLtMaskARB), 0u, 0u)");
  3180. break;
  3181. default:
  3182. break;
  3183. }
  3184. }
  3185. statement("#endif");
  3186. statement("");
  3187. }
  3188. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupSize))
  3189. {
  3190. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupSize, result);
  3191. for (auto &e : exts)
  3192. {
  3193. const char *name = Supp::get_extension_name(e);
  3194. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3195. switch (e)
  3196. {
  3197. case Supp::NV_shader_thread_group:
  3198. statement("#define gl_SubgroupSize gl_WarpSizeNV");
  3199. break;
  3200. case Supp::ARB_shader_ballot:
  3201. statement("#define gl_SubgroupSize gl_SubGroupSizeARB");
  3202. break;
  3203. case Supp::AMD_gcn_shader:
  3204. statement("#define gl_SubgroupSize uint(gl_SIMDGroupSizeAMD)");
  3205. break;
  3206. default:
  3207. break;
  3208. }
  3209. }
  3210. statement("#endif");
  3211. statement("");
  3212. }
  3213. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupInvocationID))
  3214. {
  3215. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupInvocationID, result);
  3216. for (auto &e : exts)
  3217. {
  3218. const char *name = Supp::get_extension_name(e);
  3219. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3220. switch (e)
  3221. {
  3222. case Supp::NV_shader_thread_group:
  3223. statement("#define gl_SubgroupInvocationID gl_ThreadInWarpNV");
  3224. break;
  3225. case Supp::ARB_shader_ballot:
  3226. statement("#define gl_SubgroupInvocationID gl_SubGroupInvocationARB");
  3227. break;
  3228. default:
  3229. break;
  3230. }
  3231. }
  3232. statement("#endif");
  3233. statement("");
  3234. }
  3235. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupID))
  3236. {
  3237. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupID, result);
  3238. for (auto &e : exts)
  3239. {
  3240. const char *name = Supp::get_extension_name(e);
  3241. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3242. switch (e)
  3243. {
  3244. case Supp::NV_shader_thread_group:
  3245. statement("#define gl_SubgroupID gl_WarpIDNV");
  3246. break;
  3247. default:
  3248. break;
  3249. }
  3250. }
  3251. statement("#endif");
  3252. statement("");
  3253. }
  3254. if (shader_subgroup_supporter.is_feature_requested(Supp::NumSubgroups))
  3255. {
  3256. auto exts = Supp::get_candidates_for_feature(Supp::NumSubgroups, result);
  3257. for (auto &e : exts)
  3258. {
  3259. const char *name = Supp::get_extension_name(e);
  3260. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3261. switch (e)
  3262. {
  3263. case Supp::NV_shader_thread_group:
  3264. statement("#define gl_NumSubgroups gl_WarpsPerSMNV");
  3265. break;
  3266. default:
  3267. break;
  3268. }
  3269. }
  3270. statement("#endif");
  3271. statement("");
  3272. }
  3273. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBroadcast_First))
  3274. {
  3275. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBroadcast_First, result);
  3276. for (auto &e : exts)
  3277. {
  3278. const char *name = Supp::get_extension_name(e);
  3279. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3280. switch (e)
  3281. {
  3282. case Supp::NV_shader_thread_shuffle:
  3283. for (const char *t : workaround_types)
  3284. {
  3285. statement(t, " subgroupBroadcastFirst(", t,
  3286. " value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }");
  3287. }
  3288. for (const char *t : workaround_types)
  3289. {
  3290. statement(t, " subgroupBroadcast(", t,
  3291. " value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }");
  3292. }
  3293. break;
  3294. case Supp::ARB_shader_ballot:
  3295. for (const char *t : workaround_types)
  3296. {
  3297. statement(t, " subgroupBroadcastFirst(", t,
  3298. " value) { return readFirstInvocationARB(value); }");
  3299. }
  3300. for (const char *t : workaround_types)
  3301. {
  3302. statement(t, " subgroupBroadcast(", t,
  3303. " value, uint id) { return readInvocationARB(value, id); }");
  3304. }
  3305. break;
  3306. default:
  3307. break;
  3308. }
  3309. }
  3310. statement("#endif");
  3311. statement("");
  3312. }
  3313. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotFindLSB_MSB))
  3314. {
  3315. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBallotFindLSB_MSB, result);
  3316. for (auto &e : exts)
  3317. {
  3318. const char *name = Supp::get_extension_name(e);
  3319. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3320. switch (e)
  3321. {
  3322. case Supp::NV_shader_thread_group:
  3323. statement("uint subgroupBallotFindLSB(uvec4 value) { return findLSB(value.x); }");
  3324. statement("uint subgroupBallotFindMSB(uvec4 value) { return findMSB(value.x); }");
  3325. break;
  3326. default:
  3327. break;
  3328. }
  3329. }
  3330. statement("#else");
  3331. statement("uint subgroupBallotFindLSB(uvec4 value)");
  3332. begin_scope();
  3333. statement("int firstLive = findLSB(value.x);");
  3334. statement("return uint(firstLive != -1 ? firstLive : (findLSB(value.y) + 32));");
  3335. end_scope();
  3336. statement("uint subgroupBallotFindMSB(uvec4 value)");
  3337. begin_scope();
  3338. statement("int firstLive = findMSB(value.y);");
  3339. statement("return uint(firstLive != -1 ? (firstLive + 32) : findMSB(value.x));");
  3340. end_scope();
  3341. statement("#endif");
  3342. statement("");
  3343. }
  3344. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupAll_Any_AllEqualBool))
  3345. {
  3346. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupAll_Any_AllEqualBool, result);
  3347. for (auto &e : exts)
  3348. {
  3349. const char *name = Supp::get_extension_name(e);
  3350. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3351. switch (e)
  3352. {
  3353. case Supp::NV_gpu_shader_5:
  3354. statement("bool subgroupAll(bool value) { return allThreadsNV(value); }");
  3355. statement("bool subgroupAny(bool value) { return anyThreadNV(value); }");
  3356. statement("bool subgroupAllEqual(bool value) { return allThreadsEqualNV(value); }");
  3357. break;
  3358. case Supp::ARB_shader_group_vote:
  3359. statement("bool subgroupAll(bool v) { return allInvocationsARB(v); }");
  3360. statement("bool subgroupAny(bool v) { return anyInvocationARB(v); }");
  3361. statement("bool subgroupAllEqual(bool v) { return allInvocationsEqualARB(v); }");
  3362. break;
  3363. case Supp::AMD_gcn_shader:
  3364. statement("bool subgroupAll(bool value) { return ballotAMD(value) == ballotAMD(true); }");
  3365. statement("bool subgroupAny(bool value) { return ballotAMD(value) != 0ull; }");
  3366. statement("bool subgroupAllEqual(bool value) { uint64_t b = ballotAMD(value); return b == 0ull || "
  3367. "b == ballotAMD(true); }");
  3368. break;
  3369. default:
  3370. break;
  3371. }
  3372. }
  3373. statement("#endif");
  3374. statement("");
  3375. }
  3376. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupAllEqualT))
  3377. {
  3378. statement("#ifndef GL_KHR_shader_subgroup_vote");
  3379. statement(
  3380. "#define _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(type) bool subgroupAllEqual(type value) { return "
  3381. "subgroupAllEqual(subgroupBroadcastFirst(value) == value); }");
  3382. for (const char *t : workaround_types)
  3383. statement("_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(", t, ")");
  3384. statement("#undef _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND");
  3385. statement("#endif");
  3386. statement("");
  3387. }
  3388. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallot))
  3389. {
  3390. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBallot, result);
  3391. for (auto &e : exts)
  3392. {
  3393. const char *name = Supp::get_extension_name(e);
  3394. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3395. switch (e)
  3396. {
  3397. case Supp::NV_shader_thread_group:
  3398. statement("uvec4 subgroupBallot(bool v) { return uvec4(ballotThreadNV(v), 0u, 0u, 0u); }");
  3399. break;
  3400. case Supp::ARB_shader_ballot:
  3401. statement("uvec4 subgroupBallot(bool v) { return uvec4(unpackUint2x32(ballotARB(v)), 0u, 0u); }");
  3402. break;
  3403. default:
  3404. break;
  3405. }
  3406. }
  3407. statement("#endif");
  3408. statement("");
  3409. }
  3410. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupElect))
  3411. {
  3412. statement("#ifndef GL_KHR_shader_subgroup_basic");
  3413. statement("bool subgroupElect()");
  3414. begin_scope();
  3415. statement("uvec4 activeMask = subgroupBallot(true);");
  3416. statement("uint firstLive = subgroupBallotFindLSB(activeMask);");
  3417. statement("return gl_SubgroupInvocationID == firstLive;");
  3418. end_scope();
  3419. statement("#endif");
  3420. statement("");
  3421. }
  3422. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBarrier))
  3423. {
  3424. // Extensions we're using in place of GL_KHR_shader_subgroup_basic state
  3425. // that subgroup execute in lockstep so this barrier is implicit.
  3426. // However the GL 4.6 spec also states that `barrier` implies a shared memory barrier,
  3427. // and a specific test of optimizing scans by leveraging lock-step invocation execution,
  3428. // has shown that a `memoryBarrierShared` is needed in place of a `subgroupBarrier`.
  3429. // https://github.com/buildaworldnet/IrrlichtBAW/commit/d8536857991b89a30a6b65d29441e51b64c2c7ad#diff-9f898d27be1ea6fc79b03d9b361e299334c1a347b6e4dc344ee66110c6aa596aR19
  3430. statement("#ifndef GL_KHR_shader_subgroup_basic");
  3431. statement("void subgroupBarrier() { memoryBarrierShared(); }");
  3432. statement("#endif");
  3433. statement("");
  3434. }
  3435. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupMemBarrier))
  3436. {
  3437. if (model == spv::ExecutionModelGLCompute)
  3438. {
  3439. statement("#ifndef GL_KHR_shader_subgroup_basic");
  3440. statement("void subgroupMemoryBarrier() { groupMemoryBarrier(); }");
  3441. statement("void subgroupMemoryBarrierBuffer() { groupMemoryBarrier(); }");
  3442. statement("void subgroupMemoryBarrierShared() { memoryBarrierShared(); }");
  3443. statement("void subgroupMemoryBarrierImage() { groupMemoryBarrier(); }");
  3444. statement("#endif");
  3445. }
  3446. else
  3447. {
  3448. statement("#ifndef GL_KHR_shader_subgroup_basic");
  3449. statement("void subgroupMemoryBarrier() { memoryBarrier(); }");
  3450. statement("void subgroupMemoryBarrierBuffer() { memoryBarrierBuffer(); }");
  3451. statement("void subgroupMemoryBarrierImage() { memoryBarrierImage(); }");
  3452. statement("#endif");
  3453. }
  3454. statement("");
  3455. }
  3456. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupInverseBallot_InclBitCount_ExclBitCout))
  3457. {
  3458. statement("#ifndef GL_KHR_shader_subgroup_ballot");
  3459. statement("bool subgroupInverseBallot(uvec4 value)");
  3460. begin_scope();
  3461. statement("return any(notEqual(value.xy & gl_SubgroupEqMask.xy, uvec2(0u)));");
  3462. end_scope();
  3463. statement("uint subgroupBallotInclusiveBitCount(uvec4 value)");
  3464. begin_scope();
  3465. statement("uvec2 v = value.xy & gl_SubgroupLeMask.xy;");
  3466. statement("ivec2 c = bitCount(v);");
  3467. statement_no_indent("#ifdef GL_NV_shader_thread_group");
  3468. statement("return uint(c.x);");
  3469. statement_no_indent("#else");
  3470. statement("return uint(c.x + c.y);");
  3471. statement_no_indent("#endif");
  3472. end_scope();
  3473. statement("uint subgroupBallotExclusiveBitCount(uvec4 value)");
  3474. begin_scope();
  3475. statement("uvec2 v = value.xy & gl_SubgroupLtMask.xy;");
  3476. statement("ivec2 c = bitCount(v);");
  3477. statement_no_indent("#ifdef GL_NV_shader_thread_group");
  3478. statement("return uint(c.x);");
  3479. statement_no_indent("#else");
  3480. statement("return uint(c.x + c.y);");
  3481. statement_no_indent("#endif");
  3482. end_scope();
  3483. statement("#endif");
  3484. statement("");
  3485. }
  3486. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotBitCount))
  3487. {
  3488. statement("#ifndef GL_KHR_shader_subgroup_ballot");
  3489. statement("uint subgroupBallotBitCount(uvec4 value)");
  3490. begin_scope();
  3491. statement("ivec2 c = bitCount(value.xy);");
  3492. statement_no_indent("#ifdef GL_NV_shader_thread_group");
  3493. statement("return uint(c.x);");
  3494. statement_no_indent("#else");
  3495. statement("return uint(c.x + c.y);");
  3496. statement_no_indent("#endif");
  3497. end_scope();
  3498. statement("#endif");
  3499. statement("");
  3500. }
  3501. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotBitExtract))
  3502. {
  3503. statement("#ifndef GL_KHR_shader_subgroup_ballot");
  3504. statement("bool subgroupBallotBitExtract(uvec4 value, uint index)");
  3505. begin_scope();
  3506. statement_no_indent("#ifdef GL_NV_shader_thread_group");
  3507. statement("uint shifted = value.x >> index;");
  3508. statement_no_indent("#else");
  3509. statement("uint shifted = value[index >> 5u] >> (index & 0x1fu);");
  3510. statement_no_indent("#endif");
  3511. statement("return (shifted & 1u) != 0u;");
  3512. end_scope();
  3513. statement("#endif");
  3514. statement("");
  3515. }
  3516. }
  3517. if (!workaround_ubo_load_overload_types.empty())
  3518. {
  3519. for (auto &type_id : workaround_ubo_load_overload_types)
  3520. {
  3521. auto &type = get<SPIRType>(type_id);
  3522. statement(type_to_glsl(type), " spvWorkaroundRowMajor(", type_to_glsl(type),
  3523. " wrap) { return wrap; }");
  3524. }
  3525. statement("");
  3526. }
  3527. if (requires_transpose_2x2)
  3528. {
  3529. statement("mat2 spvTranspose(mat2 m)");
  3530. begin_scope();
  3531. statement("return mat2(m[0][0], m[1][0], m[0][1], m[1][1]);");
  3532. end_scope();
  3533. statement("");
  3534. }
  3535. if (requires_transpose_3x3)
  3536. {
  3537. statement("mat3 spvTranspose(mat3 m)");
  3538. begin_scope();
  3539. statement("return mat3(m[0][0], m[1][0], m[2][0], m[0][1], m[1][1], m[2][1], m[0][2], m[1][2], m[2][2]);");
  3540. end_scope();
  3541. statement("");
  3542. }
  3543. if (requires_transpose_4x4)
  3544. {
  3545. statement("mat4 spvTranspose(mat4 m)");
  3546. begin_scope();
  3547. statement("return mat4(m[0][0], m[1][0], m[2][0], m[3][0], m[0][1], m[1][1], m[2][1], m[3][1], m[0][2], "
  3548. "m[1][2], m[2][2], m[3][2], m[0][3], m[1][3], m[2][3], m[3][3]);");
  3549. end_scope();
  3550. statement("");
  3551. }
  3552. }
  3553. // Returns a string representation of the ID, usable as a function arg.
  3554. // Default is to simply return the expression representation fo the arg ID.
  3555. // Subclasses may override to modify the return value.
  3556. string CompilerGLSL::to_func_call_arg(const SPIRFunction::Parameter &, uint32_t id)
  3557. {
  3558. // Make sure that we use the name of the original variable, and not the parameter alias.
  3559. uint32_t name_id = id;
  3560. auto *var = maybe_get<SPIRVariable>(id);
  3561. if (var && var->basevariable)
  3562. name_id = var->basevariable;
  3563. return to_expression(name_id);
  3564. }
  3565. void CompilerGLSL::handle_invalid_expression(uint32_t id)
  3566. {
  3567. // We tried to read an invalidated expression.
  3568. // This means we need another pass at compilation, but next time, force temporary variables so that they cannot be invalidated.
  3569. forced_temporaries.insert(id);
  3570. force_recompile();
  3571. }
  3572. // Converts the format of the current expression from packed to unpacked,
  3573. // by wrapping the expression in a constructor of the appropriate type.
  3574. // GLSL does not support packed formats, so simply return the expression.
  3575. // Subclasses that do will override.
  3576. string CompilerGLSL::unpack_expression_type(string expr_str, const SPIRType &, uint32_t, bool, bool)
  3577. {
  3578. return expr_str;
  3579. }
  3580. // Sometimes we proactively enclosed an expression where it turns out we might have not needed it after all.
  3581. void CompilerGLSL::strip_enclosed_expression(string &expr)
  3582. {
  3583. if (expr.size() < 2 || expr.front() != '(' || expr.back() != ')')
  3584. return;
  3585. // Have to make sure that our first and last parens actually enclose everything inside it.
  3586. uint32_t paren_count = 0;
  3587. for (auto &c : expr)
  3588. {
  3589. if (c == '(')
  3590. paren_count++;
  3591. else if (c == ')')
  3592. {
  3593. paren_count--;
  3594. // If we hit 0 and this is not the final char, our first and final parens actually don't
  3595. // enclose the expression, and we cannot strip, e.g.: (a + b) * (c + d).
  3596. if (paren_count == 0 && &c != &expr.back())
  3597. return;
  3598. }
  3599. }
  3600. expr.erase(expr.size() - 1, 1);
  3601. expr.erase(begin(expr));
  3602. }
  3603. string CompilerGLSL::enclose_expression(const string &expr)
  3604. {
  3605. bool need_parens = false;
  3606. // If the expression starts with a unary we need to enclose to deal with cases where we have back-to-back
  3607. // unary expressions.
  3608. if (!expr.empty())
  3609. {
  3610. auto c = expr.front();
  3611. if (c == '-' || c == '+' || c == '!' || c == '~' || c == '&' || c == '*')
  3612. need_parens = true;
  3613. }
  3614. if (!need_parens)
  3615. {
  3616. uint32_t paren_count = 0;
  3617. for (auto c : expr)
  3618. {
  3619. if (c == '(' || c == '[')
  3620. paren_count++;
  3621. else if (c == ')' || c == ']')
  3622. {
  3623. assert(paren_count);
  3624. paren_count--;
  3625. }
  3626. else if (c == ' ' && paren_count == 0)
  3627. {
  3628. need_parens = true;
  3629. break;
  3630. }
  3631. }
  3632. assert(paren_count == 0);
  3633. }
  3634. // If this expression contains any spaces which are not enclosed by parentheses,
  3635. // we need to enclose it so we can treat the whole string as an expression.
  3636. // This happens when two expressions have been part of a binary op earlier.
  3637. if (need_parens)
  3638. return join('(', expr, ')');
  3639. else
  3640. return expr;
  3641. }
  3642. string CompilerGLSL::dereference_expression(const SPIRType &expr_type, const std::string &expr)
  3643. {
  3644. // If this expression starts with an address-of operator ('&'), then
  3645. // just return the part after the operator.
  3646. // TODO: Strip parens if unnecessary?
  3647. if (expr.front() == '&')
  3648. return expr.substr(1);
  3649. else if (backend.native_pointers)
  3650. return join('*', expr);
  3651. else if (expr_type.storage == StorageClassPhysicalStorageBufferEXT && expr_type.basetype != SPIRType::Struct &&
  3652. expr_type.pointer_depth == 1)
  3653. {
  3654. return join(enclose_expression(expr), ".value");
  3655. }
  3656. else
  3657. return expr;
  3658. }
  3659. string CompilerGLSL::address_of_expression(const std::string &expr)
  3660. {
  3661. if (expr.size() > 3 && expr[0] == '(' && expr[1] == '*' && expr.back() == ')')
  3662. {
  3663. // If we have an expression which looks like (*foo), taking the address of it is the same as stripping
  3664. // the first two and last characters. We might have to enclose the expression.
  3665. // This doesn't work for cases like (*foo + 10),
  3666. // but this is an r-value expression which we cannot take the address of anyways.
  3667. return enclose_expression(expr.substr(2, expr.size() - 3));
  3668. }
  3669. else if (expr.front() == '*')
  3670. {
  3671. // If this expression starts with a dereference operator ('*'), then
  3672. // just return the part after the operator.
  3673. return expr.substr(1);
  3674. }
  3675. else
  3676. return join('&', enclose_expression(expr));
  3677. }
  3678. // Just like to_expression except that we enclose the expression inside parentheses if needed.
  3679. string CompilerGLSL::to_enclosed_expression(uint32_t id, bool register_expression_read)
  3680. {
  3681. return enclose_expression(to_expression(id, register_expression_read));
  3682. }
  3683. // Used explicitly when we want to read a row-major expression, but without any transpose shenanigans.
  3684. // need_transpose must be forced to false.
  3685. string CompilerGLSL::to_unpacked_row_major_matrix_expression(uint32_t id)
  3686. {
  3687. return unpack_expression_type(to_expression(id), expression_type(id),
  3688. get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
  3689. has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), true);
  3690. }
  3691. string CompilerGLSL::to_unpacked_expression(uint32_t id, bool register_expression_read)
  3692. {
  3693. // If we need to transpose, it will also take care of unpacking rules.
  3694. auto *e = maybe_get<SPIRExpression>(id);
  3695. bool need_transpose = e && e->need_transpose;
  3696. bool is_remapped = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
  3697. bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
  3698. if (!need_transpose && (is_remapped || is_packed))
  3699. {
  3700. return unpack_expression_type(to_expression(id, register_expression_read),
  3701. get_pointee_type(expression_type_id(id)),
  3702. get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
  3703. has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), false);
  3704. }
  3705. else
  3706. return to_expression(id, register_expression_read);
  3707. }
  3708. string CompilerGLSL::to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read)
  3709. {
  3710. // If we need to transpose, it will also take care of unpacking rules.
  3711. auto *e = maybe_get<SPIRExpression>(id);
  3712. bool need_transpose = e && e->need_transpose;
  3713. bool is_remapped = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
  3714. bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
  3715. if (!need_transpose && (is_remapped || is_packed))
  3716. {
  3717. return unpack_expression_type(to_expression(id, register_expression_read), expression_type(id),
  3718. get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
  3719. has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), false);
  3720. }
  3721. else
  3722. return to_enclosed_expression(id, register_expression_read);
  3723. }
  3724. string CompilerGLSL::to_dereferenced_expression(uint32_t id, bool register_expression_read)
  3725. {
  3726. auto &type = expression_type(id);
  3727. if (type.pointer && should_dereference(id))
  3728. return dereference_expression(type, to_enclosed_expression(id, register_expression_read));
  3729. else
  3730. return to_expression(id, register_expression_read);
  3731. }
  3732. string CompilerGLSL::to_pointer_expression(uint32_t id, bool register_expression_read)
  3733. {
  3734. auto &type = expression_type(id);
  3735. if (type.pointer && expression_is_lvalue(id) && !should_dereference(id))
  3736. return address_of_expression(to_enclosed_expression(id, register_expression_read));
  3737. else
  3738. return to_unpacked_expression(id, register_expression_read);
  3739. }
  3740. string CompilerGLSL::to_enclosed_pointer_expression(uint32_t id, bool register_expression_read)
  3741. {
  3742. auto &type = expression_type(id);
  3743. if (type.pointer && expression_is_lvalue(id) && !should_dereference(id))
  3744. return address_of_expression(to_enclosed_expression(id, register_expression_read));
  3745. else
  3746. return to_enclosed_unpacked_expression(id, register_expression_read);
  3747. }
  3748. string CompilerGLSL::to_extract_component_expression(uint32_t id, uint32_t index)
  3749. {
  3750. auto expr = to_enclosed_expression(id);
  3751. if (has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked))
  3752. return join(expr, "[", index, "]");
  3753. else
  3754. return join(expr, ".", index_to_swizzle(index));
  3755. }
  3756. string CompilerGLSL::to_extract_constant_composite_expression(uint32_t result_type, const SPIRConstant &c,
  3757. const uint32_t *chain, uint32_t length)
  3758. {
  3759. // It is kinda silly if application actually enter this path since they know the constant up front.
  3760. // It is useful here to extract the plain constant directly.
  3761. SPIRConstant tmp;
  3762. tmp.constant_type = result_type;
  3763. auto &composite_type = get<SPIRType>(c.constant_type);
  3764. assert(composite_type.basetype != SPIRType::Struct && composite_type.array.empty());
  3765. assert(!c.specialization);
  3766. if (is_matrix(composite_type))
  3767. {
  3768. if (length == 2)
  3769. {
  3770. tmp.m.c[0].vecsize = 1;
  3771. tmp.m.columns = 1;
  3772. tmp.m.c[0].r[0] = c.m.c[chain[0]].r[chain[1]];
  3773. }
  3774. else
  3775. {
  3776. assert(length == 1);
  3777. tmp.m.c[0].vecsize = composite_type.vecsize;
  3778. tmp.m.columns = 1;
  3779. tmp.m.c[0] = c.m.c[chain[0]];
  3780. }
  3781. }
  3782. else
  3783. {
  3784. assert(length == 1);
  3785. tmp.m.c[0].vecsize = 1;
  3786. tmp.m.columns = 1;
  3787. tmp.m.c[0].r[0] = c.m.c[0].r[chain[0]];
  3788. }
  3789. return constant_expression(tmp);
  3790. }
  3791. string CompilerGLSL::to_rerolled_array_expression(const string &base_expr, const SPIRType &type)
  3792. {
  3793. uint32_t size = to_array_size_literal(type);
  3794. auto &parent = get<SPIRType>(type.parent_type);
  3795. string expr = "{ ";
  3796. for (uint32_t i = 0; i < size; i++)
  3797. {
  3798. auto subexpr = join(base_expr, "[", convert_to_string(i), "]");
  3799. if (parent.array.empty())
  3800. expr += subexpr;
  3801. else
  3802. expr += to_rerolled_array_expression(subexpr, parent);
  3803. if (i + 1 < size)
  3804. expr += ", ";
  3805. }
  3806. expr += " }";
  3807. return expr;
  3808. }
  3809. string CompilerGLSL::to_composite_constructor_expression(uint32_t id, bool uses_buffer_offset)
  3810. {
  3811. auto &type = expression_type(id);
  3812. bool reroll_array = !type.array.empty() && (!backend.array_is_value_type ||
  3813. (uses_buffer_offset && !backend.buffer_offset_array_is_value_type));
  3814. if (reroll_array)
  3815. {
  3816. // For this case, we need to "re-roll" an array initializer from a temporary.
  3817. // We cannot simply pass the array directly, since it decays to a pointer and it cannot
  3818. // participate in a struct initializer. E.g.
  3819. // float arr[2] = { 1.0, 2.0 };
  3820. // Foo foo = { arr }; must be transformed to
  3821. // Foo foo = { { arr[0], arr[1] } };
  3822. // The array sizes cannot be deduced from specialization constants since we cannot use any loops.
  3823. // We're only triggering one read of the array expression, but this is fine since arrays have to be declared
  3824. // as temporaries anyways.
  3825. return to_rerolled_array_expression(to_enclosed_expression(id), type);
  3826. }
  3827. else
  3828. return to_unpacked_expression(id);
  3829. }
  3830. string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read)
  3831. {
  3832. auto itr = invalid_expressions.find(id);
  3833. if (itr != end(invalid_expressions))
  3834. handle_invalid_expression(id);
  3835. if (ir.ids[id].get_type() == TypeExpression)
  3836. {
  3837. // We might have a more complex chain of dependencies.
  3838. // A possible scenario is that we
  3839. //
  3840. // %1 = OpLoad
  3841. // %2 = OpDoSomething %1 %1. here %2 will have a dependency on %1.
  3842. // %3 = OpDoSomethingAgain %2 %2. Here %3 will lose the link to %1 since we don't propagate the dependencies like that.
  3843. // OpStore %1 %foo // Here we can invalidate %1, and hence all expressions which depend on %1. Only %2 will know since it's part of invalid_expressions.
  3844. // %4 = OpDoSomethingAnotherTime %3 %3 // If we forward all expressions we will see %1 expression after store, not before.
  3845. //
  3846. // However, we can propagate up a list of depended expressions when we used %2, so we can check if %2 is invalid when reading %3 after the store,
  3847. // and see that we should not forward reads of the original variable.
  3848. auto &expr = get<SPIRExpression>(id);
  3849. for (uint32_t dep : expr.expression_dependencies)
  3850. if (invalid_expressions.find(dep) != end(invalid_expressions))
  3851. handle_invalid_expression(dep);
  3852. }
  3853. if (register_expression_read)
  3854. track_expression_read(id);
  3855. switch (ir.ids[id].get_type())
  3856. {
  3857. case TypeExpression:
  3858. {
  3859. auto &e = get<SPIRExpression>(id);
  3860. if (e.base_expression)
  3861. return to_enclosed_expression(e.base_expression) + e.expression;
  3862. else if (e.need_transpose)
  3863. {
  3864. // This should not be reached for access chains, since we always deal explicitly with transpose state
  3865. // when consuming an access chain expression.
  3866. uint32_t physical_type_id = get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
  3867. bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
  3868. return convert_row_major_matrix(e.expression, get<SPIRType>(e.expression_type), physical_type_id,
  3869. is_packed);
  3870. }
  3871. else if (flattened_structs.count(id))
  3872. {
  3873. return load_flattened_struct(e.expression, get<SPIRType>(e.expression_type));
  3874. }
  3875. else
  3876. {
  3877. if (is_forcing_recompilation())
  3878. {
  3879. // During first compilation phase, certain expression patterns can trigger exponential growth of memory.
  3880. // Avoid this by returning dummy expressions during this phase.
  3881. // Do not use empty expressions here, because those are sentinels for other cases.
  3882. return "_";
  3883. }
  3884. else
  3885. return e.expression;
  3886. }
  3887. }
  3888. case TypeConstant:
  3889. {
  3890. auto &c = get<SPIRConstant>(id);
  3891. auto &type = get<SPIRType>(c.constant_type);
  3892. // WorkGroupSize may be a constant.
  3893. auto &dec = ir.meta[c.self].decoration;
  3894. if (dec.builtin)
  3895. return builtin_to_glsl(dec.builtin_type, StorageClassGeneric);
  3896. else if (c.specialization)
  3897. return to_name(id);
  3898. else if (c.is_used_as_lut)
  3899. return to_name(id);
  3900. else if (type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline)
  3901. return to_name(id);
  3902. else if (!type.array.empty() && !backend.can_declare_arrays_inline)
  3903. return to_name(id);
  3904. else
  3905. return constant_expression(c);
  3906. }
  3907. case TypeConstantOp:
  3908. return to_name(id);
  3909. case TypeVariable:
  3910. {
  3911. auto &var = get<SPIRVariable>(id);
  3912. // If we try to use a loop variable before the loop header, we have to redirect it to the static expression,
  3913. // the variable has not been declared yet.
  3914. if (var.statically_assigned || (var.loop_variable && !var.loop_variable_enable))
  3915. return to_expression(var.static_expression);
  3916. else if (var.deferred_declaration)
  3917. {
  3918. var.deferred_declaration = false;
  3919. return variable_decl(var);
  3920. }
  3921. else if (flattened_structs.count(id))
  3922. {
  3923. return load_flattened_struct(to_name(id), get<SPIRType>(var.basetype));
  3924. }
  3925. else
  3926. {
  3927. auto &dec = ir.meta[var.self].decoration;
  3928. if (dec.builtin)
  3929. return builtin_to_glsl(dec.builtin_type, var.storage);
  3930. else
  3931. return to_name(id);
  3932. }
  3933. }
  3934. case TypeCombinedImageSampler:
  3935. // This type should never be taken the expression of directly.
  3936. // The intention is that texture sampling functions will extract the image and samplers
  3937. // separately and take their expressions as needed.
  3938. // GLSL does not use this type because OpSampledImage immediately creates a combined image sampler
  3939. // expression ala sampler2D(texture, sampler).
  3940. SPIRV_CROSS_THROW("Combined image samplers have no default expression representation.");
  3941. case TypeAccessChain:
  3942. // We cannot express this type. They only have meaning in other OpAccessChains, OpStore or OpLoad.
  3943. SPIRV_CROSS_THROW("Access chains have no default expression representation.");
  3944. default:
  3945. return to_name(id);
  3946. }
  3947. }
  3948. string CompilerGLSL::constant_op_expression(const SPIRConstantOp &cop)
  3949. {
  3950. auto &type = get<SPIRType>(cop.basetype);
  3951. bool binary = false;
  3952. bool unary = false;
  3953. string op;
  3954. if (is_legacy() && is_unsigned_opcode(cop.opcode))
  3955. SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets.");
  3956. // TODO: Find a clean way to reuse emit_instruction.
  3957. switch (cop.opcode)
  3958. {
  3959. case OpSConvert:
  3960. case OpUConvert:
  3961. case OpFConvert:
  3962. op = type_to_glsl_constructor(type);
  3963. break;
  3964. #define GLSL_BOP(opname, x) \
  3965. case Op##opname: \
  3966. binary = true; \
  3967. op = x; \
  3968. break
  3969. #define GLSL_UOP(opname, x) \
  3970. case Op##opname: \
  3971. unary = true; \
  3972. op = x; \
  3973. break
  3974. GLSL_UOP(SNegate, "-");
  3975. GLSL_UOP(Not, "~");
  3976. GLSL_BOP(IAdd, "+");
  3977. GLSL_BOP(ISub, "-");
  3978. GLSL_BOP(IMul, "*");
  3979. GLSL_BOP(SDiv, "/");
  3980. GLSL_BOP(UDiv, "/");
  3981. GLSL_BOP(UMod, "%");
  3982. GLSL_BOP(SMod, "%");
  3983. GLSL_BOP(ShiftRightLogical, ">>");
  3984. GLSL_BOP(ShiftRightArithmetic, ">>");
  3985. GLSL_BOP(ShiftLeftLogical, "<<");
  3986. GLSL_BOP(BitwiseOr, "|");
  3987. GLSL_BOP(BitwiseXor, "^");
  3988. GLSL_BOP(BitwiseAnd, "&");
  3989. GLSL_BOP(LogicalOr, "||");
  3990. GLSL_BOP(LogicalAnd, "&&");
  3991. GLSL_UOP(LogicalNot, "!");
  3992. GLSL_BOP(LogicalEqual, "==");
  3993. GLSL_BOP(LogicalNotEqual, "!=");
  3994. GLSL_BOP(IEqual, "==");
  3995. GLSL_BOP(INotEqual, "!=");
  3996. GLSL_BOP(ULessThan, "<");
  3997. GLSL_BOP(SLessThan, "<");
  3998. GLSL_BOP(ULessThanEqual, "<=");
  3999. GLSL_BOP(SLessThanEqual, "<=");
  4000. GLSL_BOP(UGreaterThan, ">");
  4001. GLSL_BOP(SGreaterThan, ">");
  4002. GLSL_BOP(UGreaterThanEqual, ">=");
  4003. GLSL_BOP(SGreaterThanEqual, ">=");
  4004. case OpSelect:
  4005. {
  4006. if (cop.arguments.size() < 3)
  4007. SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
  4008. // This one is pretty annoying. It's triggered from
  4009. // uint(bool), int(bool) from spec constants.
  4010. // In order to preserve its compile-time constness in Vulkan GLSL,
  4011. // we need to reduce the OpSelect expression back to this simplified model.
  4012. // If we cannot, fail.
  4013. if (to_trivial_mix_op(type, op, cop.arguments[2], cop.arguments[1], cop.arguments[0]))
  4014. {
  4015. // Implement as a simple cast down below.
  4016. }
  4017. else
  4018. {
  4019. // Implement a ternary and pray the compiler understands it :)
  4020. return to_ternary_expression(type, cop.arguments[0], cop.arguments[1], cop.arguments[2]);
  4021. }
  4022. break;
  4023. }
  4024. case OpVectorShuffle:
  4025. {
  4026. string expr = type_to_glsl_constructor(type);
  4027. expr += "(";
  4028. uint32_t left_components = expression_type(cop.arguments[0]).vecsize;
  4029. string left_arg = to_enclosed_expression(cop.arguments[0]);
  4030. string right_arg = to_enclosed_expression(cop.arguments[1]);
  4031. for (uint32_t i = 2; i < uint32_t(cop.arguments.size()); i++)
  4032. {
  4033. uint32_t index = cop.arguments[i];
  4034. if (index >= left_components)
  4035. expr += right_arg + "." + "xyzw"[index - left_components];
  4036. else
  4037. expr += left_arg + "." + "xyzw"[index];
  4038. if (i + 1 < uint32_t(cop.arguments.size()))
  4039. expr += ", ";
  4040. }
  4041. expr += ")";
  4042. return expr;
  4043. }
  4044. case OpCompositeExtract:
  4045. {
  4046. auto expr = access_chain_internal(cop.arguments[0], &cop.arguments[1], uint32_t(cop.arguments.size() - 1),
  4047. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
  4048. return expr;
  4049. }
  4050. case OpCompositeInsert:
  4051. SPIRV_CROSS_THROW("OpCompositeInsert spec constant op is not supported.");
  4052. default:
  4053. // Some opcodes are unimplemented here, these are currently not possible to test from glslang.
  4054. SPIRV_CROSS_THROW("Unimplemented spec constant op.");
  4055. }
  4056. uint32_t bit_width = 0;
  4057. if (unary || binary || cop.opcode == OpSConvert || cop.opcode == OpUConvert)
  4058. bit_width = expression_type(cop.arguments[0]).width;
  4059. SPIRType::BaseType input_type;
  4060. bool skip_cast_if_equal_type = opcode_is_sign_invariant(cop.opcode);
  4061. switch (cop.opcode)
  4062. {
  4063. case OpIEqual:
  4064. case OpINotEqual:
  4065. input_type = to_signed_basetype(bit_width);
  4066. break;
  4067. case OpSLessThan:
  4068. case OpSLessThanEqual:
  4069. case OpSGreaterThan:
  4070. case OpSGreaterThanEqual:
  4071. case OpSMod:
  4072. case OpSDiv:
  4073. case OpShiftRightArithmetic:
  4074. case OpSConvert:
  4075. case OpSNegate:
  4076. input_type = to_signed_basetype(bit_width);
  4077. break;
  4078. case OpULessThan:
  4079. case OpULessThanEqual:
  4080. case OpUGreaterThan:
  4081. case OpUGreaterThanEqual:
  4082. case OpUMod:
  4083. case OpUDiv:
  4084. case OpShiftRightLogical:
  4085. case OpUConvert:
  4086. input_type = to_unsigned_basetype(bit_width);
  4087. break;
  4088. default:
  4089. input_type = type.basetype;
  4090. break;
  4091. }
  4092. #undef GLSL_BOP
  4093. #undef GLSL_UOP
  4094. if (binary)
  4095. {
  4096. if (cop.arguments.size() < 2)
  4097. SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
  4098. string cast_op0;
  4099. string cast_op1;
  4100. auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, cop.arguments[0],
  4101. cop.arguments[1], skip_cast_if_equal_type);
  4102. if (type.basetype != input_type && type.basetype != SPIRType::Boolean)
  4103. {
  4104. expected_type.basetype = input_type;
  4105. auto expr = bitcast_glsl_op(type, expected_type);
  4106. expr += '(';
  4107. expr += join(cast_op0, " ", op, " ", cast_op1);
  4108. expr += ')';
  4109. return expr;
  4110. }
  4111. else
  4112. return join("(", cast_op0, " ", op, " ", cast_op1, ")");
  4113. }
  4114. else if (unary)
  4115. {
  4116. if (cop.arguments.size() < 1)
  4117. SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
  4118. // Auto-bitcast to result type as needed.
  4119. // Works around various casting scenarios in glslang as there is no OpBitcast for specialization constants.
  4120. return join("(", op, bitcast_glsl(type, cop.arguments[0]), ")");
  4121. }
  4122. else if (cop.opcode == OpSConvert || cop.opcode == OpUConvert)
  4123. {
  4124. if (cop.arguments.size() < 1)
  4125. SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
  4126. auto &arg_type = expression_type(cop.arguments[0]);
  4127. if (arg_type.width < type.width && input_type != arg_type.basetype)
  4128. {
  4129. auto expected = arg_type;
  4130. expected.basetype = input_type;
  4131. return join(op, "(", bitcast_glsl(expected, cop.arguments[0]), ")");
  4132. }
  4133. else
  4134. return join(op, "(", to_expression(cop.arguments[0]), ")");
  4135. }
  4136. else
  4137. {
  4138. if (cop.arguments.size() < 1)
  4139. SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
  4140. return join(op, "(", to_expression(cop.arguments[0]), ")");
  4141. }
  4142. }
  4143. string CompilerGLSL::constant_expression(const SPIRConstant &c)
  4144. {
  4145. auto &type = get<SPIRType>(c.constant_type);
  4146. if (type.pointer)
  4147. {
  4148. return backend.null_pointer_literal;
  4149. }
  4150. else if (!c.subconstants.empty())
  4151. {
  4152. // Handles Arrays and structures.
  4153. string res;
  4154. // Allow Metal to use the array<T> template to make arrays a value type
  4155. bool needs_trailing_tracket = false;
  4156. if (backend.use_initializer_list && backend.use_typed_initializer_list && type.basetype == SPIRType::Struct &&
  4157. type.array.empty())
  4158. {
  4159. res = type_to_glsl_constructor(type) + "{ ";
  4160. }
  4161. else if (backend.use_initializer_list && backend.use_typed_initializer_list && backend.array_is_value_type &&
  4162. !type.array.empty())
  4163. {
  4164. res = type_to_glsl_constructor(type) + "({ ";
  4165. needs_trailing_tracket = true;
  4166. }
  4167. else if (backend.use_initializer_list)
  4168. {
  4169. res = "{ ";
  4170. }
  4171. else
  4172. {
  4173. res = type_to_glsl_constructor(type) + "(";
  4174. }
  4175. for (auto &elem : c.subconstants)
  4176. {
  4177. auto &subc = get<SPIRConstant>(elem);
  4178. if (subc.specialization)
  4179. res += to_name(elem);
  4180. else
  4181. res += constant_expression(subc);
  4182. if (&elem != &c.subconstants.back())
  4183. res += ", ";
  4184. }
  4185. res += backend.use_initializer_list ? " }" : ")";
  4186. if (needs_trailing_tracket)
  4187. res += ")";
  4188. return res;
  4189. }
  4190. else if (type.basetype == SPIRType::Struct && type.member_types.size() == 0)
  4191. {
  4192. // Metal tessellation likes empty structs which are then constant expressions.
  4193. if (backend.supports_empty_struct)
  4194. return "{ }";
  4195. else if (backend.use_typed_initializer_list)
  4196. return join(type_to_glsl(get<SPIRType>(c.constant_type)), "{ 0 }");
  4197. else if (backend.use_initializer_list)
  4198. return "{ 0 }";
  4199. else
  4200. return join(type_to_glsl(get<SPIRType>(c.constant_type)), "(0)");
  4201. }
  4202. else if (c.columns() == 1)
  4203. {
  4204. return constant_expression_vector(c, 0);
  4205. }
  4206. else
  4207. {
  4208. string res = type_to_glsl(get<SPIRType>(c.constant_type)) + "(";
  4209. for (uint32_t col = 0; col < c.columns(); col++)
  4210. {
  4211. if (c.specialization_constant_id(col) != 0)
  4212. res += to_name(c.specialization_constant_id(col));
  4213. else
  4214. res += constant_expression_vector(c, col);
  4215. if (col + 1 < c.columns())
  4216. res += ", ";
  4217. }
  4218. res += ")";
  4219. return res;
  4220. }
  4221. }
  4222. #ifdef _MSC_VER
  4223. // sprintf warning.
  4224. // We cannot rely on snprintf existing because, ..., MSVC.
  4225. #pragma warning(push)
  4226. #pragma warning(disable : 4996)
  4227. #endif
  4228. string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
  4229. {
  4230. string res;
  4231. float float_value = c.scalar_f16(col, row);
  4232. // There is no literal "hf" in GL_NV_gpu_shader5, so to avoid lots
  4233. // of complicated workarounds, just value-cast to the half type always.
  4234. if (std::isnan(float_value) || std::isinf(float_value))
  4235. {
  4236. SPIRType type;
  4237. type.basetype = SPIRType::Half;
  4238. type.vecsize = 1;
  4239. type.columns = 1;
  4240. if (float_value == numeric_limits<float>::infinity())
  4241. res = join(type_to_glsl(type), "(1.0 / 0.0)");
  4242. else if (float_value == -numeric_limits<float>::infinity())
  4243. res = join(type_to_glsl(type), "(-1.0 / 0.0)");
  4244. else if (std::isnan(float_value))
  4245. res = join(type_to_glsl(type), "(0.0 / 0.0)");
  4246. else
  4247. SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
  4248. }
  4249. else
  4250. {
  4251. SPIRType type;
  4252. type.basetype = SPIRType::Half;
  4253. type.vecsize = 1;
  4254. type.columns = 1;
  4255. res = join(type_to_glsl(type), "(", convert_to_string(float_value, current_locale_radix_character), ")");
  4256. }
  4257. return res;
  4258. }
  4259. string CompilerGLSL::convert_float_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
  4260. {
  4261. string res;
  4262. float float_value = c.scalar_f32(col, row);
  4263. if (std::isnan(float_value) || std::isinf(float_value))
  4264. {
  4265. // Use special representation.
  4266. if (!is_legacy())
  4267. {
  4268. SPIRType out_type;
  4269. SPIRType in_type;
  4270. out_type.basetype = SPIRType::Float;
  4271. in_type.basetype = SPIRType::UInt;
  4272. out_type.vecsize = 1;
  4273. in_type.vecsize = 1;
  4274. out_type.width = 32;
  4275. in_type.width = 32;
  4276. char print_buffer[32];
  4277. sprintf(print_buffer, "0x%xu", c.scalar(col, row));
  4278. res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, ")");
  4279. }
  4280. else
  4281. {
  4282. if (float_value == numeric_limits<float>::infinity())
  4283. {
  4284. if (backend.float_literal_suffix)
  4285. res = "(1.0f / 0.0f)";
  4286. else
  4287. res = "(1.0 / 0.0)";
  4288. }
  4289. else if (float_value == -numeric_limits<float>::infinity())
  4290. {
  4291. if (backend.float_literal_suffix)
  4292. res = "(-1.0f / 0.0f)";
  4293. else
  4294. res = "(-1.0 / 0.0)";
  4295. }
  4296. else if (std::isnan(float_value))
  4297. {
  4298. if (backend.float_literal_suffix)
  4299. res = "(0.0f / 0.0f)";
  4300. else
  4301. res = "(0.0 / 0.0)";
  4302. }
  4303. else
  4304. SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
  4305. }
  4306. }
  4307. else
  4308. {
  4309. res = convert_to_string(float_value, current_locale_radix_character);
  4310. if (backend.float_literal_suffix)
  4311. res += "f";
  4312. }
  4313. return res;
  4314. }
  4315. std::string CompilerGLSL::convert_double_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
  4316. {
  4317. string res;
  4318. double double_value = c.scalar_f64(col, row);
  4319. if (std::isnan(double_value) || std::isinf(double_value))
  4320. {
  4321. // Use special representation.
  4322. if (!is_legacy())
  4323. {
  4324. SPIRType out_type;
  4325. SPIRType in_type;
  4326. out_type.basetype = SPIRType::Double;
  4327. in_type.basetype = SPIRType::UInt64;
  4328. out_type.vecsize = 1;
  4329. in_type.vecsize = 1;
  4330. out_type.width = 64;
  4331. in_type.width = 64;
  4332. uint64_t u64_value = c.scalar_u64(col, row);
  4333. if (options.es)
  4334. SPIRV_CROSS_THROW("64-bit integers/float not supported in ES profile.");
  4335. require_extension_internal("GL_ARB_gpu_shader_int64");
  4336. char print_buffer[64];
  4337. sprintf(print_buffer, "0x%llx%s", static_cast<unsigned long long>(u64_value),
  4338. backend.long_long_literal_suffix ? "ull" : "ul");
  4339. res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, ")");
  4340. }
  4341. else
  4342. {
  4343. if (options.es)
  4344. SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
  4345. if (options.version < 400)
  4346. require_extension_internal("GL_ARB_gpu_shader_fp64");
  4347. if (double_value == numeric_limits<double>::infinity())
  4348. {
  4349. if (backend.double_literal_suffix)
  4350. res = "(1.0lf / 0.0lf)";
  4351. else
  4352. res = "(1.0 / 0.0)";
  4353. }
  4354. else if (double_value == -numeric_limits<double>::infinity())
  4355. {
  4356. if (backend.double_literal_suffix)
  4357. res = "(-1.0lf / 0.0lf)";
  4358. else
  4359. res = "(-1.0 / 0.0)";
  4360. }
  4361. else if (std::isnan(double_value))
  4362. {
  4363. if (backend.double_literal_suffix)
  4364. res = "(0.0lf / 0.0lf)";
  4365. else
  4366. res = "(0.0 / 0.0)";
  4367. }
  4368. else
  4369. SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
  4370. }
  4371. }
  4372. else
  4373. {
  4374. res = convert_to_string(double_value, current_locale_radix_character);
  4375. if (backend.double_literal_suffix)
  4376. res += "lf";
  4377. }
  4378. return res;
  4379. }
  4380. #ifdef _MSC_VER
  4381. #pragma warning(pop)
  4382. #endif
  4383. string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t vector)
  4384. {
  4385. auto type = get<SPIRType>(c.constant_type);
  4386. type.columns = 1;
  4387. auto scalar_type = type;
  4388. scalar_type.vecsize = 1;
  4389. string res;
  4390. bool splat = backend.use_constructor_splatting && c.vector_size() > 1;
  4391. bool swizzle_splat = backend.can_swizzle_scalar && c.vector_size() > 1;
  4392. if (!type_is_floating_point(type))
  4393. {
  4394. // Cannot swizzle literal integers as a special case.
  4395. swizzle_splat = false;
  4396. }
  4397. if (splat || swizzle_splat)
  4398. {
  4399. // Cannot use constant splatting if we have specialization constants somewhere in the vector.
  4400. for (uint32_t i = 0; i < c.vector_size(); i++)
  4401. {
  4402. if (c.specialization_constant_id(vector, i) != 0)
  4403. {
  4404. splat = false;
  4405. swizzle_splat = false;
  4406. break;
  4407. }
  4408. }
  4409. }
  4410. if (splat || swizzle_splat)
  4411. {
  4412. if (type.width == 64)
  4413. {
  4414. uint64_t ident = c.scalar_u64(vector, 0);
  4415. for (uint32_t i = 1; i < c.vector_size(); i++)
  4416. {
  4417. if (ident != c.scalar_u64(vector, i))
  4418. {
  4419. splat = false;
  4420. swizzle_splat = false;
  4421. break;
  4422. }
  4423. }
  4424. }
  4425. else
  4426. {
  4427. uint32_t ident = c.scalar(vector, 0);
  4428. for (uint32_t i = 1; i < c.vector_size(); i++)
  4429. {
  4430. if (ident != c.scalar(vector, i))
  4431. {
  4432. splat = false;
  4433. swizzle_splat = false;
  4434. }
  4435. }
  4436. }
  4437. }
  4438. if (c.vector_size() > 1 && !swizzle_splat)
  4439. res += type_to_glsl(type) + "(";
  4440. switch (type.basetype)
  4441. {
  4442. case SPIRType::Half:
  4443. if (splat || swizzle_splat)
  4444. {
  4445. res += convert_half_to_string(c, vector, 0);
  4446. if (swizzle_splat)
  4447. res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
  4448. }
  4449. else
  4450. {
  4451. for (uint32_t i = 0; i < c.vector_size(); i++)
  4452. {
  4453. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  4454. res += to_name(c.specialization_constant_id(vector, i));
  4455. else
  4456. res += convert_half_to_string(c, vector, i);
  4457. if (i + 1 < c.vector_size())
  4458. res += ", ";
  4459. }
  4460. }
  4461. break;
  4462. case SPIRType::Float:
  4463. if (splat || swizzle_splat)
  4464. {
  4465. res += convert_float_to_string(c, vector, 0);
  4466. if (swizzle_splat)
  4467. res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
  4468. }
  4469. else
  4470. {
  4471. for (uint32_t i = 0; i < c.vector_size(); i++)
  4472. {
  4473. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  4474. res += to_name(c.specialization_constant_id(vector, i));
  4475. else
  4476. res += convert_float_to_string(c, vector, i);
  4477. if (i + 1 < c.vector_size())
  4478. res += ", ";
  4479. }
  4480. }
  4481. break;
  4482. case SPIRType::Double:
  4483. if (splat || swizzle_splat)
  4484. {
  4485. res += convert_double_to_string(c, vector, 0);
  4486. if (swizzle_splat)
  4487. res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
  4488. }
  4489. else
  4490. {
  4491. for (uint32_t i = 0; i < c.vector_size(); i++)
  4492. {
  4493. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  4494. res += to_name(c.specialization_constant_id(vector, i));
  4495. else
  4496. res += convert_double_to_string(c, vector, i);
  4497. if (i + 1 < c.vector_size())
  4498. res += ", ";
  4499. }
  4500. }
  4501. break;
  4502. case SPIRType::Int64:
  4503. if (splat)
  4504. {
  4505. res += convert_to_string(c.scalar_i64(vector, 0));
  4506. if (backend.long_long_literal_suffix)
  4507. res += "ll";
  4508. else
  4509. res += "l";
  4510. }
  4511. else
  4512. {
  4513. for (uint32_t i = 0; i < c.vector_size(); i++)
  4514. {
  4515. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  4516. res += to_name(c.specialization_constant_id(vector, i));
  4517. else
  4518. {
  4519. res += convert_to_string(c.scalar_i64(vector, i));
  4520. if (backend.long_long_literal_suffix)
  4521. res += "ll";
  4522. else
  4523. res += "l";
  4524. }
  4525. if (i + 1 < c.vector_size())
  4526. res += ", ";
  4527. }
  4528. }
  4529. break;
  4530. case SPIRType::UInt64:
  4531. if (splat)
  4532. {
  4533. res += convert_to_string(c.scalar_u64(vector, 0));
  4534. if (backend.long_long_literal_suffix)
  4535. res += "ull";
  4536. else
  4537. res += "ul";
  4538. }
  4539. else
  4540. {
  4541. for (uint32_t i = 0; i < c.vector_size(); i++)
  4542. {
  4543. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  4544. res += to_name(c.specialization_constant_id(vector, i));
  4545. else
  4546. {
  4547. res += convert_to_string(c.scalar_u64(vector, i));
  4548. if (backend.long_long_literal_suffix)
  4549. res += "ull";
  4550. else
  4551. res += "ul";
  4552. }
  4553. if (i + 1 < c.vector_size())
  4554. res += ", ";
  4555. }
  4556. }
  4557. break;
  4558. case SPIRType::UInt:
  4559. if (splat)
  4560. {
  4561. res += convert_to_string(c.scalar(vector, 0));
  4562. if (is_legacy())
  4563. {
  4564. // Fake unsigned constant literals with signed ones if possible.
  4565. // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
  4566. if (c.scalar_i32(vector, 0) < 0)
  4567. SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made the literal negative.");
  4568. }
  4569. else if (backend.uint32_t_literal_suffix)
  4570. res += "u";
  4571. }
  4572. else
  4573. {
  4574. for (uint32_t i = 0; i < c.vector_size(); i++)
  4575. {
  4576. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  4577. res += to_name(c.specialization_constant_id(vector, i));
  4578. else
  4579. {
  4580. res += convert_to_string(c.scalar(vector, i));
  4581. if (is_legacy())
  4582. {
  4583. // Fake unsigned constant literals with signed ones if possible.
  4584. // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
  4585. if (c.scalar_i32(vector, i) < 0)
  4586. SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made "
  4587. "the literal negative.");
  4588. }
  4589. else if (backend.uint32_t_literal_suffix)
  4590. res += "u";
  4591. }
  4592. if (i + 1 < c.vector_size())
  4593. res += ", ";
  4594. }
  4595. }
  4596. break;
  4597. case SPIRType::Int:
  4598. if (splat)
  4599. res += convert_to_string(c.scalar_i32(vector, 0));
  4600. else
  4601. {
  4602. for (uint32_t i = 0; i < c.vector_size(); i++)
  4603. {
  4604. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  4605. res += to_name(c.specialization_constant_id(vector, i));
  4606. else
  4607. res += convert_to_string(c.scalar_i32(vector, i));
  4608. if (i + 1 < c.vector_size())
  4609. res += ", ";
  4610. }
  4611. }
  4612. break;
  4613. case SPIRType::UShort:
  4614. if (splat)
  4615. {
  4616. res += convert_to_string(c.scalar(vector, 0));
  4617. }
  4618. else
  4619. {
  4620. for (uint32_t i = 0; i < c.vector_size(); i++)
  4621. {
  4622. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  4623. res += to_name(c.specialization_constant_id(vector, i));
  4624. else
  4625. {
  4626. if (*backend.uint16_t_literal_suffix)
  4627. {
  4628. res += convert_to_string(c.scalar_u16(vector, i));
  4629. res += backend.uint16_t_literal_suffix;
  4630. }
  4631. else
  4632. {
  4633. // If backend doesn't have a literal suffix, we need to value cast.
  4634. res += type_to_glsl(scalar_type);
  4635. res += "(";
  4636. res += convert_to_string(c.scalar_u16(vector, i));
  4637. res += ")";
  4638. }
  4639. }
  4640. if (i + 1 < c.vector_size())
  4641. res += ", ";
  4642. }
  4643. }
  4644. break;
  4645. case SPIRType::Short:
  4646. if (splat)
  4647. {
  4648. res += convert_to_string(c.scalar_i16(vector, 0));
  4649. }
  4650. else
  4651. {
  4652. for (uint32_t i = 0; i < c.vector_size(); i++)
  4653. {
  4654. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  4655. res += to_name(c.specialization_constant_id(vector, i));
  4656. else
  4657. {
  4658. if (*backend.int16_t_literal_suffix)
  4659. {
  4660. res += convert_to_string(c.scalar_i16(vector, i));
  4661. res += backend.int16_t_literal_suffix;
  4662. }
  4663. else
  4664. {
  4665. // If backend doesn't have a literal suffix, we need to value cast.
  4666. res += type_to_glsl(scalar_type);
  4667. res += "(";
  4668. res += convert_to_string(c.scalar_i16(vector, i));
  4669. res += ")";
  4670. }
  4671. }
  4672. if (i + 1 < c.vector_size())
  4673. res += ", ";
  4674. }
  4675. }
  4676. break;
  4677. case SPIRType::UByte:
  4678. if (splat)
  4679. {
  4680. res += convert_to_string(c.scalar_u8(vector, 0));
  4681. }
  4682. else
  4683. {
  4684. for (uint32_t i = 0; i < c.vector_size(); i++)
  4685. {
  4686. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  4687. res += to_name(c.specialization_constant_id(vector, i));
  4688. else
  4689. {
  4690. res += type_to_glsl(scalar_type);
  4691. res += "(";
  4692. res += convert_to_string(c.scalar_u8(vector, i));
  4693. res += ")";
  4694. }
  4695. if (i + 1 < c.vector_size())
  4696. res += ", ";
  4697. }
  4698. }
  4699. break;
  4700. case SPIRType::SByte:
  4701. if (splat)
  4702. {
  4703. res += convert_to_string(c.scalar_i8(vector, 0));
  4704. }
  4705. else
  4706. {
  4707. for (uint32_t i = 0; i < c.vector_size(); i++)
  4708. {
  4709. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  4710. res += to_name(c.specialization_constant_id(vector, i));
  4711. else
  4712. {
  4713. res += type_to_glsl(scalar_type);
  4714. res += "(";
  4715. res += convert_to_string(c.scalar_i8(vector, i));
  4716. res += ")";
  4717. }
  4718. if (i + 1 < c.vector_size())
  4719. res += ", ";
  4720. }
  4721. }
  4722. break;
  4723. case SPIRType::Boolean:
  4724. if (splat)
  4725. res += c.scalar(vector, 0) ? "true" : "false";
  4726. else
  4727. {
  4728. for (uint32_t i = 0; i < c.vector_size(); i++)
  4729. {
  4730. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  4731. res += to_name(c.specialization_constant_id(vector, i));
  4732. else
  4733. res += c.scalar(vector, i) ? "true" : "false";
  4734. if (i + 1 < c.vector_size())
  4735. res += ", ";
  4736. }
  4737. }
  4738. break;
  4739. default:
  4740. SPIRV_CROSS_THROW("Invalid constant expression basetype.");
  4741. }
  4742. if (c.vector_size() > 1 && !swizzle_splat)
  4743. res += ")";
  4744. return res;
  4745. }
  4746. SPIRExpression &CompilerGLSL::emit_uninitialized_temporary_expression(uint32_t type, uint32_t id)
  4747. {
  4748. forced_temporaries.insert(id);
  4749. emit_uninitialized_temporary(type, id);
  4750. return set<SPIRExpression>(id, to_name(id), type, true);
  4751. }
  4752. void CompilerGLSL::emit_uninitialized_temporary(uint32_t result_type, uint32_t result_id)
  4753. {
  4754. // If we're declaring temporaries inside continue blocks,
  4755. // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
  4756. if (current_continue_block && !hoisted_temporaries.count(result_id))
  4757. {
  4758. auto &header = get<SPIRBlock>(current_continue_block->loop_dominator);
  4759. if (find_if(begin(header.declare_temporary), end(header.declare_temporary),
  4760. [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
  4761. return tmp.first == result_type && tmp.second == result_id;
  4762. }) == end(header.declare_temporary))
  4763. {
  4764. header.declare_temporary.emplace_back(result_type, result_id);
  4765. hoisted_temporaries.insert(result_id);
  4766. force_recompile();
  4767. }
  4768. }
  4769. else if (hoisted_temporaries.count(result_id) == 0)
  4770. {
  4771. auto &type = get<SPIRType>(result_type);
  4772. auto &flags = ir.meta[result_id].decoration.decoration_flags;
  4773. // The result_id has not been made into an expression yet, so use flags interface.
  4774. add_local_variable_name(result_id);
  4775. string initializer;
  4776. if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
  4777. initializer = join(" = ", to_zero_initialized_expression(result_type));
  4778. statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), initializer, ";");
  4779. }
  4780. }
  4781. string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id)
  4782. {
  4783. auto &type = get<SPIRType>(result_type);
  4784. auto &flags = ir.meta[result_id].decoration.decoration_flags;
  4785. // If we're declaring temporaries inside continue blocks,
  4786. // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
  4787. if (current_continue_block && !hoisted_temporaries.count(result_id))
  4788. {
  4789. auto &header = get<SPIRBlock>(current_continue_block->loop_dominator);
  4790. if (find_if(begin(header.declare_temporary), end(header.declare_temporary),
  4791. [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
  4792. return tmp.first == result_type && tmp.second == result_id;
  4793. }) == end(header.declare_temporary))
  4794. {
  4795. header.declare_temporary.emplace_back(result_type, result_id);
  4796. hoisted_temporaries.insert(result_id);
  4797. force_recompile();
  4798. }
  4799. return join(to_name(result_id), " = ");
  4800. }
  4801. else if (hoisted_temporaries.count(result_id))
  4802. {
  4803. // The temporary has already been declared earlier, so just "declare" the temporary by writing to it.
  4804. return join(to_name(result_id), " = ");
  4805. }
  4806. else
  4807. {
  4808. // The result_id has not been made into an expression yet, so use flags interface.
  4809. add_local_variable_name(result_id);
  4810. return join(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), " = ");
  4811. }
  4812. }
  4813. bool CompilerGLSL::expression_is_forwarded(uint32_t id) const
  4814. {
  4815. return forwarded_temporaries.count(id) != 0;
  4816. }
  4817. bool CompilerGLSL::expression_suppresses_usage_tracking(uint32_t id) const
  4818. {
  4819. return suppressed_usage_tracking.count(id) != 0;
  4820. }
  4821. bool CompilerGLSL::expression_read_implies_multiple_reads(uint32_t id) const
  4822. {
  4823. auto *expr = maybe_get<SPIRExpression>(id);
  4824. if (!expr)
  4825. return false;
  4826. // If we're emitting code at a deeper loop level than when we emitted the expression,
  4827. // we're probably reading the same expression over and over.
  4828. return current_loop_level > expr->emitted_loop_level;
  4829. }
  4830. SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding,
  4831. bool suppress_usage_tracking)
  4832. {
  4833. if (forwarding && (forced_temporaries.find(result_id) == end(forced_temporaries)))
  4834. {
  4835. // Just forward it without temporary.
  4836. // If the forward is trivial, we do not force flushing to temporary for this expression.
  4837. forwarded_temporaries.insert(result_id);
  4838. if (suppress_usage_tracking)
  4839. suppressed_usage_tracking.insert(result_id);
  4840. return set<SPIRExpression>(result_id, rhs, result_type, true);
  4841. }
  4842. else
  4843. {
  4844. // If expression isn't immutable, bind it to a temporary and make the new temporary immutable (they always are).
  4845. statement(declare_temporary(result_type, result_id), rhs, ";");
  4846. return set<SPIRExpression>(result_id, to_name(result_id), result_type, true);
  4847. }
  4848. }
  4849. void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
  4850. {
  4851. bool forward = should_forward(op0);
  4852. emit_op(result_type, result_id, join(op, to_enclosed_unpacked_expression(op0)), forward);
  4853. inherit_expression_dependencies(result_id, op0);
  4854. }
  4855. void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op)
  4856. {
  4857. bool forward = should_forward(op0) && should_forward(op1);
  4858. emit_op(result_type, result_id,
  4859. join(to_enclosed_unpacked_expression(op0), " ", op, " ", to_enclosed_unpacked_expression(op1)), forward);
  4860. inherit_expression_dependencies(result_id, op0);
  4861. inherit_expression_dependencies(result_id, op1);
  4862. }
  4863. void CompilerGLSL::emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op)
  4864. {
  4865. auto &type = get<SPIRType>(result_type);
  4866. auto expr = type_to_glsl_constructor(type);
  4867. expr += '(';
  4868. for (uint32_t i = 0; i < type.vecsize; i++)
  4869. {
  4870. // Make sure to call to_expression multiple times to ensure
  4871. // that these expressions are properly flushed to temporaries if needed.
  4872. expr += op;
  4873. expr += to_extract_component_expression(operand, i);
  4874. if (i + 1 < type.vecsize)
  4875. expr += ", ";
  4876. }
  4877. expr += ')';
  4878. emit_op(result_type, result_id, expr, should_forward(operand));
  4879. inherit_expression_dependencies(result_id, operand);
  4880. }
  4881. void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  4882. const char *op, bool negate, SPIRType::BaseType expected_type)
  4883. {
  4884. auto &type0 = expression_type(op0);
  4885. auto &type1 = expression_type(op1);
  4886. SPIRType target_type0 = type0;
  4887. SPIRType target_type1 = type1;
  4888. target_type0.basetype = expected_type;
  4889. target_type1.basetype = expected_type;
  4890. target_type0.vecsize = 1;
  4891. target_type1.vecsize = 1;
  4892. auto &type = get<SPIRType>(result_type);
  4893. auto expr = type_to_glsl_constructor(type);
  4894. expr += '(';
  4895. for (uint32_t i = 0; i < type.vecsize; i++)
  4896. {
  4897. // Make sure to call to_expression multiple times to ensure
  4898. // that these expressions are properly flushed to temporaries if needed.
  4899. if (negate)
  4900. expr += "!(";
  4901. if (expected_type != SPIRType::Unknown && type0.basetype != expected_type)
  4902. expr += bitcast_expression(target_type0, type0.basetype, to_extract_component_expression(op0, i));
  4903. else
  4904. expr += to_extract_component_expression(op0, i);
  4905. expr += ' ';
  4906. expr += op;
  4907. expr += ' ';
  4908. if (expected_type != SPIRType::Unknown && type1.basetype != expected_type)
  4909. expr += bitcast_expression(target_type1, type1.basetype, to_extract_component_expression(op1, i));
  4910. else
  4911. expr += to_extract_component_expression(op1, i);
  4912. if (negate)
  4913. expr += ")";
  4914. if (i + 1 < type.vecsize)
  4915. expr += ", ";
  4916. }
  4917. expr += ')';
  4918. emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
  4919. inherit_expression_dependencies(result_id, op0);
  4920. inherit_expression_dependencies(result_id, op1);
  4921. }
  4922. SPIRType CompilerGLSL::binary_op_bitcast_helper(string &cast_op0, string &cast_op1, SPIRType::BaseType &input_type,
  4923. uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type)
  4924. {
  4925. auto &type0 = expression_type(op0);
  4926. auto &type1 = expression_type(op1);
  4927. // We have to bitcast if our inputs are of different type, or if our types are not equal to expected inputs.
  4928. // For some functions like OpIEqual and INotEqual, we don't care if inputs are of different types than expected
  4929. // since equality test is exactly the same.
  4930. bool cast = (type0.basetype != type1.basetype) || (!skip_cast_if_equal_type && type0.basetype != input_type);
  4931. // Create a fake type so we can bitcast to it.
  4932. // We only deal with regular arithmetic types here like int, uints and so on.
  4933. SPIRType expected_type;
  4934. expected_type.basetype = input_type;
  4935. expected_type.vecsize = type0.vecsize;
  4936. expected_type.columns = type0.columns;
  4937. expected_type.width = type0.width;
  4938. if (cast)
  4939. {
  4940. cast_op0 = bitcast_glsl(expected_type, op0);
  4941. cast_op1 = bitcast_glsl(expected_type, op1);
  4942. }
  4943. else
  4944. {
  4945. // If we don't cast, our actual input type is that of the first (or second) argument.
  4946. cast_op0 = to_enclosed_unpacked_expression(op0);
  4947. cast_op1 = to_enclosed_unpacked_expression(op1);
  4948. input_type = type0.basetype;
  4949. }
  4950. return expected_type;
  4951. }
  4952. bool CompilerGLSL::emit_complex_bitcast(uint32_t result_type, uint32_t id, uint32_t op0)
  4953. {
  4954. // Some bitcasts may require complex casting sequences, and are implemented here.
  4955. // Otherwise a simply unary function will do with bitcast_glsl_op.
  4956. auto &output_type = get<SPIRType>(result_type);
  4957. auto &input_type = expression_type(op0);
  4958. string expr;
  4959. if (output_type.basetype == SPIRType::Half && input_type.basetype == SPIRType::Float && input_type.vecsize == 1)
  4960. expr = join("unpackFloat2x16(floatBitsToUint(", to_unpacked_expression(op0), "))");
  4961. else if (output_type.basetype == SPIRType::Float && input_type.basetype == SPIRType::Half &&
  4962. input_type.vecsize == 2)
  4963. expr = join("uintBitsToFloat(packFloat2x16(", to_unpacked_expression(op0), "))");
  4964. else
  4965. return false;
  4966. emit_op(result_type, id, expr, should_forward(op0));
  4967. return true;
  4968. }
  4969. void CompilerGLSL::emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  4970. const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
  4971. {
  4972. string cast_op0, cast_op1;
  4973. auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
  4974. auto &out_type = get<SPIRType>(result_type);
  4975. // We might have casted away from the result type, so bitcast again.
  4976. // For example, arithmetic right shift with uint inputs.
  4977. // Special case boolean outputs since relational opcodes output booleans instead of int/uint.
  4978. string expr;
  4979. if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
  4980. {
  4981. expected_type.basetype = input_type;
  4982. expr = bitcast_glsl_op(out_type, expected_type);
  4983. expr += '(';
  4984. expr += join(cast_op0, " ", op, " ", cast_op1);
  4985. expr += ')';
  4986. }
  4987. else
  4988. expr += join(cast_op0, " ", op, " ", cast_op1);
  4989. emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
  4990. inherit_expression_dependencies(result_id, op0);
  4991. inherit_expression_dependencies(result_id, op1);
  4992. }
  4993. void CompilerGLSL::emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
  4994. {
  4995. bool forward = should_forward(op0);
  4996. emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ")"), forward);
  4997. inherit_expression_dependencies(result_id, op0);
  4998. }
  4999. void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  5000. const char *op)
  5001. {
  5002. bool forward = should_forward(op0) && should_forward(op1);
  5003. emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ")"),
  5004. forward);
  5005. inherit_expression_dependencies(result_id, op0);
  5006. inherit_expression_dependencies(result_id, op1);
  5007. }
  5008. void CompilerGLSL::emit_unary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op,
  5009. SPIRType::BaseType input_type, SPIRType::BaseType expected_result_type)
  5010. {
  5011. auto &out_type = get<SPIRType>(result_type);
  5012. auto &expr_type = expression_type(op0);
  5013. auto expected_type = out_type;
  5014. // Bit-widths might be different in unary cases because we use it for SConvert/UConvert and friends.
  5015. expected_type.basetype = input_type;
  5016. expected_type.width = expr_type.width;
  5017. string cast_op = expr_type.basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
  5018. string expr;
  5019. if (out_type.basetype != expected_result_type)
  5020. {
  5021. expected_type.basetype = expected_result_type;
  5022. expected_type.width = out_type.width;
  5023. expr = bitcast_glsl_op(out_type, expected_type);
  5024. expr += '(';
  5025. expr += join(op, "(", cast_op, ")");
  5026. expr += ')';
  5027. }
  5028. else
  5029. {
  5030. expr += join(op, "(", cast_op, ")");
  5031. }
  5032. emit_op(result_type, result_id, expr, should_forward(op0));
  5033. inherit_expression_dependencies(result_id, op0);
  5034. }
  5035. // Very special case. Handling bitfieldExtract requires us to deal with different bitcasts of different signs
  5036. // and different vector sizes all at once. Need a special purpose method here.
  5037. void CompilerGLSL::emit_trinary_func_op_bitextract(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  5038. uint32_t op2, const char *op,
  5039. SPIRType::BaseType expected_result_type,
  5040. SPIRType::BaseType input_type0, SPIRType::BaseType input_type1,
  5041. SPIRType::BaseType input_type2)
  5042. {
  5043. auto &out_type = get<SPIRType>(result_type);
  5044. auto expected_type = out_type;
  5045. expected_type.basetype = input_type0;
  5046. string cast_op0 =
  5047. expression_type(op0).basetype != input_type0 ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
  5048. auto op1_expr = to_unpacked_expression(op1);
  5049. auto op2_expr = to_unpacked_expression(op2);
  5050. // Use value casts here instead. Input must be exactly int or uint, but SPIR-V might be 16-bit.
  5051. expected_type.basetype = input_type1;
  5052. expected_type.vecsize = 1;
  5053. string cast_op1 = expression_type(op1).basetype != input_type1 ?
  5054. join(type_to_glsl_constructor(expected_type), "(", op1_expr, ")") :
  5055. op1_expr;
  5056. expected_type.basetype = input_type2;
  5057. expected_type.vecsize = 1;
  5058. string cast_op2 = expression_type(op2).basetype != input_type2 ?
  5059. join(type_to_glsl_constructor(expected_type), "(", op2_expr, ")") :
  5060. op2_expr;
  5061. string expr;
  5062. if (out_type.basetype != expected_result_type)
  5063. {
  5064. expected_type.vecsize = out_type.vecsize;
  5065. expected_type.basetype = expected_result_type;
  5066. expr = bitcast_glsl_op(out_type, expected_type);
  5067. expr += '(';
  5068. expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
  5069. expr += ')';
  5070. }
  5071. else
  5072. {
  5073. expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
  5074. }
  5075. emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2));
  5076. inherit_expression_dependencies(result_id, op0);
  5077. inherit_expression_dependencies(result_id, op1);
  5078. inherit_expression_dependencies(result_id, op2);
  5079. }
  5080. void CompilerGLSL::emit_trinary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  5081. uint32_t op2, const char *op, SPIRType::BaseType input_type)
  5082. {
  5083. auto &out_type = get<SPIRType>(result_type);
  5084. auto expected_type = out_type;
  5085. expected_type.basetype = input_type;
  5086. string cast_op0 =
  5087. expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
  5088. string cast_op1 =
  5089. expression_type(op1).basetype != input_type ? bitcast_glsl(expected_type, op1) : to_unpacked_expression(op1);
  5090. string cast_op2 =
  5091. expression_type(op2).basetype != input_type ? bitcast_glsl(expected_type, op2) : to_unpacked_expression(op2);
  5092. string expr;
  5093. if (out_type.basetype != input_type)
  5094. {
  5095. expr = bitcast_glsl_op(out_type, expected_type);
  5096. expr += '(';
  5097. expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
  5098. expr += ')';
  5099. }
  5100. else
  5101. {
  5102. expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
  5103. }
  5104. emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2));
  5105. inherit_expression_dependencies(result_id, op0);
  5106. inherit_expression_dependencies(result_id, op1);
  5107. inherit_expression_dependencies(result_id, op2);
  5108. }
  5109. void CompilerGLSL::emit_binary_func_op_cast_clustered(uint32_t result_type, uint32_t result_id, uint32_t op0,
  5110. uint32_t op1, const char *op, SPIRType::BaseType input_type)
  5111. {
  5112. // Special purpose method for implementing clustered subgroup opcodes.
  5113. // Main difference is that op1 does not participate in any casting, it needs to be a literal.
  5114. auto &out_type = get<SPIRType>(result_type);
  5115. auto expected_type = out_type;
  5116. expected_type.basetype = input_type;
  5117. string cast_op0 =
  5118. expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
  5119. string expr;
  5120. if (out_type.basetype != input_type)
  5121. {
  5122. expr = bitcast_glsl_op(out_type, expected_type);
  5123. expr += '(';
  5124. expr += join(op, "(", cast_op0, ", ", to_expression(op1), ")");
  5125. expr += ')';
  5126. }
  5127. else
  5128. {
  5129. expr += join(op, "(", cast_op0, ", ", to_expression(op1), ")");
  5130. }
  5131. emit_op(result_type, result_id, expr, should_forward(op0));
  5132. inherit_expression_dependencies(result_id, op0);
  5133. }
  5134. void CompilerGLSL::emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  5135. const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
  5136. {
  5137. string cast_op0, cast_op1;
  5138. auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
  5139. auto &out_type = get<SPIRType>(result_type);
  5140. // Special case boolean outputs since relational opcodes output booleans instead of int/uint.
  5141. string expr;
  5142. if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
  5143. {
  5144. expected_type.basetype = input_type;
  5145. expr = bitcast_glsl_op(out_type, expected_type);
  5146. expr += '(';
  5147. expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
  5148. expr += ')';
  5149. }
  5150. else
  5151. {
  5152. expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
  5153. }
  5154. emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
  5155. inherit_expression_dependencies(result_id, op0);
  5156. inherit_expression_dependencies(result_id, op1);
  5157. }
  5158. void CompilerGLSL::emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  5159. uint32_t op2, const char *op)
  5160. {
  5161. bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2);
  5162. emit_op(result_type, result_id,
  5163. join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ",
  5164. to_unpacked_expression(op2), ")"),
  5165. forward);
  5166. inherit_expression_dependencies(result_id, op0);
  5167. inherit_expression_dependencies(result_id, op1);
  5168. inherit_expression_dependencies(result_id, op2);
  5169. }
  5170. void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  5171. uint32_t op2, uint32_t op3, const char *op)
  5172. {
  5173. bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3);
  5174. emit_op(result_type, result_id,
  5175. join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ",
  5176. to_unpacked_expression(op2), ", ", to_unpacked_expression(op3), ")"),
  5177. forward);
  5178. inherit_expression_dependencies(result_id, op0);
  5179. inherit_expression_dependencies(result_id, op1);
  5180. inherit_expression_dependencies(result_id, op2);
  5181. inherit_expression_dependencies(result_id, op3);
  5182. }
  5183. void CompilerGLSL::emit_bitfield_insert_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  5184. uint32_t op2, uint32_t op3, const char *op,
  5185. SPIRType::BaseType offset_count_type)
  5186. {
  5187. // Only need to cast offset/count arguments. Types of base/insert must be same as result type,
  5188. // and bitfieldInsert is sign invariant.
  5189. bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3);
  5190. auto op0_expr = to_unpacked_expression(op0);
  5191. auto op1_expr = to_unpacked_expression(op1);
  5192. auto op2_expr = to_unpacked_expression(op2);
  5193. auto op3_expr = to_unpacked_expression(op3);
  5194. SPIRType target_type;
  5195. target_type.vecsize = 1;
  5196. target_type.basetype = offset_count_type;
  5197. if (expression_type(op2).basetype != offset_count_type)
  5198. {
  5199. // Value-cast here. Input might be 16-bit. GLSL requires int.
  5200. op2_expr = join(type_to_glsl_constructor(target_type), "(", op2_expr, ")");
  5201. }
  5202. if (expression_type(op3).basetype != offset_count_type)
  5203. {
  5204. // Value-cast here. Input might be 16-bit. GLSL requires int.
  5205. op3_expr = join(type_to_glsl_constructor(target_type), "(", op3_expr, ")");
  5206. }
  5207. emit_op(result_type, result_id, join(op, "(", op0_expr, ", ", op1_expr, ", ", op2_expr, ", ", op3_expr, ")"),
  5208. forward);
  5209. inherit_expression_dependencies(result_id, op0);
  5210. inherit_expression_dependencies(result_id, op1);
  5211. inherit_expression_dependencies(result_id, op2);
  5212. inherit_expression_dependencies(result_id, op3);
  5213. }
  5214. string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t tex)
  5215. {
  5216. const char *type;
  5217. switch (imgtype.image.dim)
  5218. {
  5219. case spv::Dim1D:
  5220. type = (imgtype.image.arrayed && !options.es) ? "1DArray" : "1D";
  5221. break;
  5222. case spv::Dim2D:
  5223. type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D";
  5224. break;
  5225. case spv::Dim3D:
  5226. type = "3D";
  5227. break;
  5228. case spv::DimCube:
  5229. type = "Cube";
  5230. break;
  5231. case spv::DimRect:
  5232. type = "2DRect";
  5233. break;
  5234. case spv::DimBuffer:
  5235. type = "Buffer";
  5236. break;
  5237. case spv::DimSubpassData:
  5238. type = "2D";
  5239. break;
  5240. default:
  5241. type = "";
  5242. break;
  5243. }
  5244. // In legacy GLSL, an extension is required for textureLod in the fragment
  5245. // shader or textureGrad anywhere.
  5246. bool legacy_lod_ext = false;
  5247. auto &execution = get_entry_point();
  5248. if (op == "textureGrad" || op == "textureProjGrad" ||
  5249. ((op == "textureLod" || op == "textureProjLod") && execution.model != ExecutionModelVertex))
  5250. {
  5251. if (is_legacy_es())
  5252. {
  5253. legacy_lod_ext = true;
  5254. require_extension_internal("GL_EXT_shader_texture_lod");
  5255. }
  5256. else if (is_legacy_desktop())
  5257. require_extension_internal("GL_ARB_shader_texture_lod");
  5258. }
  5259. if (op == "textureLodOffset" || op == "textureProjLodOffset")
  5260. {
  5261. if (is_legacy_es())
  5262. SPIRV_CROSS_THROW(join(op, " not allowed in legacy ES"));
  5263. require_extension_internal("GL_EXT_gpu_shader4");
  5264. }
  5265. // GLES has very limited support for shadow samplers.
  5266. // Basically shadow2D and shadow2DProj work through EXT_shadow_samplers,
  5267. // everything else can just throw
  5268. bool is_comparison = image_is_comparison(imgtype, tex);
  5269. if (is_comparison && is_legacy_es())
  5270. {
  5271. if (op == "texture" || op == "textureProj")
  5272. require_extension_internal("GL_EXT_shadow_samplers");
  5273. else
  5274. SPIRV_CROSS_THROW(join(op, " not allowed on depth samplers in legacy ES"));
  5275. }
  5276. if (op == "textureSize")
  5277. {
  5278. if (is_legacy_es())
  5279. SPIRV_CROSS_THROW("textureSize not supported in legacy ES");
  5280. if (is_comparison)
  5281. SPIRV_CROSS_THROW("textureSize not supported on shadow sampler in legacy GLSL");
  5282. require_extension_internal("GL_EXT_gpu_shader4");
  5283. }
  5284. if (op == "texelFetch" && is_legacy_es())
  5285. SPIRV_CROSS_THROW("texelFetch not supported in legacy ES");
  5286. bool is_es_and_depth = is_legacy_es() && is_comparison;
  5287. std::string type_prefix = is_comparison ? "shadow" : "texture";
  5288. if (op == "texture")
  5289. return is_es_and_depth ? join(type_prefix, type, "EXT") : join(type_prefix, type);
  5290. else if (op == "textureLod")
  5291. return join(type_prefix, type, legacy_lod_ext ? "LodEXT" : "Lod");
  5292. else if (op == "textureProj")
  5293. return join(type_prefix, type, is_es_and_depth ? "ProjEXT" : "Proj");
  5294. else if (op == "textureGrad")
  5295. return join(type_prefix, type, is_legacy_es() ? "GradEXT" : is_legacy_desktop() ? "GradARB" : "Grad");
  5296. else if (op == "textureProjLod")
  5297. return join(type_prefix, type, legacy_lod_ext ? "ProjLodEXT" : "ProjLod");
  5298. else if (op == "textureLodOffset")
  5299. return join(type_prefix, type, "LodOffset");
  5300. else if (op == "textureProjGrad")
  5301. return join(type_prefix, type,
  5302. is_legacy_es() ? "ProjGradEXT" : is_legacy_desktop() ? "ProjGradARB" : "ProjGrad");
  5303. else if (op == "textureProjLodOffset")
  5304. return join(type_prefix, type, "ProjLodOffset");
  5305. else if (op == "textureSize")
  5306. return join("textureSize", type);
  5307. else if (op == "texelFetch")
  5308. return join("texelFetch", type);
  5309. else
  5310. {
  5311. SPIRV_CROSS_THROW(join("Unsupported legacy texture op: ", op));
  5312. }
  5313. }
  5314. bool CompilerGLSL::to_trivial_mix_op(const SPIRType &type, string &op, uint32_t left, uint32_t right, uint32_t lerp)
  5315. {
  5316. auto *cleft = maybe_get<SPIRConstant>(left);
  5317. auto *cright = maybe_get<SPIRConstant>(right);
  5318. auto &lerptype = expression_type(lerp);
  5319. // If our targets aren't constants, we cannot use construction.
  5320. if (!cleft || !cright)
  5321. return false;
  5322. // If our targets are spec constants, we cannot use construction.
  5323. if (cleft->specialization || cright->specialization)
  5324. return false;
  5325. // We can only use trivial construction if we have a scalar
  5326. // (should be possible to do it for vectors as well, but that is overkill for now).
  5327. if (lerptype.basetype != SPIRType::Boolean || lerptype.vecsize > 1)
  5328. return false;
  5329. // If our bool selects between 0 and 1, we can cast from bool instead, making our trivial constructor.
  5330. bool ret = false;
  5331. switch (type.basetype)
  5332. {
  5333. case SPIRType::Short:
  5334. case SPIRType::UShort:
  5335. ret = cleft->scalar_u16() == 0 && cright->scalar_u16() == 1;
  5336. break;
  5337. case SPIRType::Int:
  5338. case SPIRType::UInt:
  5339. ret = cleft->scalar() == 0 && cright->scalar() == 1;
  5340. break;
  5341. case SPIRType::Half:
  5342. ret = cleft->scalar_f16() == 0.0f && cright->scalar_f16() == 1.0f;
  5343. break;
  5344. case SPIRType::Float:
  5345. ret = cleft->scalar_f32() == 0.0f && cright->scalar_f32() == 1.0f;
  5346. break;
  5347. case SPIRType::Double:
  5348. ret = cleft->scalar_f64() == 0.0 && cright->scalar_f64() == 1.0;
  5349. break;
  5350. case SPIRType::Int64:
  5351. case SPIRType::UInt64:
  5352. ret = cleft->scalar_u64() == 0 && cright->scalar_u64() == 1;
  5353. break;
  5354. default:
  5355. break;
  5356. }
  5357. if (ret)
  5358. op = type_to_glsl_constructor(type);
  5359. return ret;
  5360. }
  5361. string CompilerGLSL::to_ternary_expression(const SPIRType &restype, uint32_t select, uint32_t true_value,
  5362. uint32_t false_value)
  5363. {
  5364. string expr;
  5365. auto &lerptype = expression_type(select);
  5366. if (lerptype.vecsize == 1)
  5367. expr = join(to_enclosed_expression(select), " ? ", to_enclosed_pointer_expression(true_value), " : ",
  5368. to_enclosed_pointer_expression(false_value));
  5369. else
  5370. {
  5371. auto swiz = [this](uint32_t expression, uint32_t i) { return to_extract_component_expression(expression, i); };
  5372. expr = type_to_glsl_constructor(restype);
  5373. expr += "(";
  5374. for (uint32_t i = 0; i < restype.vecsize; i++)
  5375. {
  5376. expr += swiz(select, i);
  5377. expr += " ? ";
  5378. expr += swiz(true_value, i);
  5379. expr += " : ";
  5380. expr += swiz(false_value, i);
  5381. if (i + 1 < restype.vecsize)
  5382. expr += ", ";
  5383. }
  5384. expr += ")";
  5385. }
  5386. return expr;
  5387. }
  5388. void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp)
  5389. {
  5390. auto &lerptype = expression_type(lerp);
  5391. auto &restype = get<SPIRType>(result_type);
  5392. // If this results in a variable pointer, assume it may be written through.
  5393. if (restype.pointer)
  5394. {
  5395. register_write(left);
  5396. register_write(right);
  5397. }
  5398. string mix_op;
  5399. bool has_boolean_mix = *backend.boolean_mix_function &&
  5400. ((options.es && options.version >= 310) || (!options.es && options.version >= 450));
  5401. bool trivial_mix = to_trivial_mix_op(restype, mix_op, left, right, lerp);
  5402. // Cannot use boolean mix when the lerp argument is just one boolean,
  5403. // fall back to regular trinary statements.
  5404. if (lerptype.vecsize == 1)
  5405. has_boolean_mix = false;
  5406. // If we can reduce the mix to a simple cast, do so.
  5407. // This helps for cases like int(bool), uint(bool) which is implemented with
  5408. // OpSelect bool 1 0.
  5409. if (trivial_mix)
  5410. {
  5411. emit_unary_func_op(result_type, id, lerp, mix_op.c_str());
  5412. }
  5413. else if (!has_boolean_mix && lerptype.basetype == SPIRType::Boolean)
  5414. {
  5415. // Boolean mix not supported on desktop without extension.
  5416. // Was added in OpenGL 4.5 with ES 3.1 compat.
  5417. //
  5418. // Could use GL_EXT_shader_integer_mix on desktop at least,
  5419. // but Apple doesn't support it. :(
  5420. // Just implement it as ternary expressions.
  5421. auto expr = to_ternary_expression(get<SPIRType>(result_type), lerp, right, left);
  5422. emit_op(result_type, id, expr, should_forward(left) && should_forward(right) && should_forward(lerp));
  5423. inherit_expression_dependencies(id, left);
  5424. inherit_expression_dependencies(id, right);
  5425. inherit_expression_dependencies(id, lerp);
  5426. }
  5427. else if (lerptype.basetype == SPIRType::Boolean)
  5428. emit_trinary_func_op(result_type, id, left, right, lerp, backend.boolean_mix_function);
  5429. else
  5430. emit_trinary_func_op(result_type, id, left, right, lerp, "mix");
  5431. }
  5432. string CompilerGLSL::to_combined_image_sampler(VariableID image_id, VariableID samp_id)
  5433. {
  5434. // Keep track of the array indices we have used to load the image.
  5435. // We'll need to use the same array index into the combined image sampler array.
  5436. auto image_expr = to_expression(image_id);
  5437. string array_expr;
  5438. auto array_index = image_expr.find_first_of('[');
  5439. if (array_index != string::npos)
  5440. array_expr = image_expr.substr(array_index, string::npos);
  5441. auto &args = current_function->arguments;
  5442. // For GLSL and ESSL targets, we must enumerate all possible combinations for sampler2D(texture2D, sampler) and redirect
  5443. // all possible combinations into new sampler2D uniforms.
  5444. auto *image = maybe_get_backing_variable(image_id);
  5445. auto *samp = maybe_get_backing_variable(samp_id);
  5446. if (image)
  5447. image_id = image->self;
  5448. if (samp)
  5449. samp_id = samp->self;
  5450. auto image_itr = find_if(begin(args), end(args),
  5451. [image_id](const SPIRFunction::Parameter &param) { return image_id == param.id; });
  5452. auto sampler_itr = find_if(begin(args), end(args),
  5453. [samp_id](const SPIRFunction::Parameter &param) { return samp_id == param.id; });
  5454. if (image_itr != end(args) || sampler_itr != end(args))
  5455. {
  5456. // If any parameter originates from a parameter, we will find it in our argument list.
  5457. bool global_image = image_itr == end(args);
  5458. bool global_sampler = sampler_itr == end(args);
  5459. VariableID iid = global_image ? image_id : VariableID(uint32_t(image_itr - begin(args)));
  5460. VariableID sid = global_sampler ? samp_id : VariableID(uint32_t(sampler_itr - begin(args)));
  5461. auto &combined = current_function->combined_parameters;
  5462. auto itr = find_if(begin(combined), end(combined), [=](const SPIRFunction::CombinedImageSamplerParameter &p) {
  5463. return p.global_image == global_image && p.global_sampler == global_sampler && p.image_id == iid &&
  5464. p.sampler_id == sid;
  5465. });
  5466. if (itr != end(combined))
  5467. return to_expression(itr->id) + array_expr;
  5468. else
  5469. {
  5470. SPIRV_CROSS_THROW("Cannot find mapping for combined sampler parameter, was "
  5471. "build_combined_image_samplers() used "
  5472. "before compile() was called?");
  5473. }
  5474. }
  5475. else
  5476. {
  5477. // For global sampler2D, look directly at the global remapping table.
  5478. auto &mapping = combined_image_samplers;
  5479. auto itr = find_if(begin(mapping), end(mapping), [image_id, samp_id](const CombinedImageSampler &combined) {
  5480. return combined.image_id == image_id && combined.sampler_id == samp_id;
  5481. });
  5482. if (itr != end(combined_image_samplers))
  5483. return to_expression(itr->combined_id) + array_expr;
  5484. else
  5485. {
  5486. SPIRV_CROSS_THROW("Cannot find mapping for combined sampler, was build_combined_image_samplers() used "
  5487. "before compile() was called?");
  5488. }
  5489. }
  5490. }
  5491. bool CompilerGLSL::is_supported_subgroup_op_in_opengl(spv::Op op)
  5492. {
  5493. switch (op)
  5494. {
  5495. case OpGroupNonUniformElect:
  5496. case OpGroupNonUniformBallot:
  5497. case OpGroupNonUniformBallotFindLSB:
  5498. case OpGroupNonUniformBallotFindMSB:
  5499. case OpGroupNonUniformBroadcast:
  5500. case OpGroupNonUniformBroadcastFirst:
  5501. case OpGroupNonUniformAll:
  5502. case OpGroupNonUniformAny:
  5503. case OpGroupNonUniformAllEqual:
  5504. case OpControlBarrier:
  5505. case OpMemoryBarrier:
  5506. case OpGroupNonUniformBallotBitCount:
  5507. case OpGroupNonUniformBallotBitExtract:
  5508. case OpGroupNonUniformInverseBallot:
  5509. return true;
  5510. default:
  5511. return false;
  5512. }
  5513. }
  5514. void CompilerGLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id)
  5515. {
  5516. if (options.vulkan_semantics && combined_image_samplers.empty())
  5517. {
  5518. emit_binary_func_op(result_type, result_id, image_id, samp_id,
  5519. type_to_glsl(get<SPIRType>(result_type), result_id).c_str());
  5520. }
  5521. else
  5522. {
  5523. // Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types.
  5524. emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true, true);
  5525. }
  5526. // Make sure to suppress usage tracking and any expression invalidation.
  5527. // It is illegal to create temporaries of opaque types.
  5528. forwarded_temporaries.erase(result_id);
  5529. }
  5530. static inline bool image_opcode_is_sample_no_dref(Op op)
  5531. {
  5532. switch (op)
  5533. {
  5534. case OpImageSampleExplicitLod:
  5535. case OpImageSampleImplicitLod:
  5536. case OpImageSampleProjExplicitLod:
  5537. case OpImageSampleProjImplicitLod:
  5538. case OpImageFetch:
  5539. case OpImageRead:
  5540. case OpImageSparseSampleExplicitLod:
  5541. case OpImageSparseSampleImplicitLod:
  5542. case OpImageSparseSampleProjExplicitLod:
  5543. case OpImageSparseSampleProjImplicitLod:
  5544. case OpImageSparseFetch:
  5545. case OpImageSparseRead:
  5546. return true;
  5547. default:
  5548. return false;
  5549. }
  5550. }
  5551. void CompilerGLSL::emit_sparse_feedback_temporaries(uint32_t result_type_id, uint32_t id, uint32_t &feedback_id,
  5552. uint32_t &texel_id)
  5553. {
  5554. // Need to allocate two temporaries.
  5555. if (options.es)
  5556. SPIRV_CROSS_THROW("Sparse texture feedback is not supported on ESSL.");
  5557. require_extension_internal("GL_ARB_sparse_texture2");
  5558. auto &temps = extra_sub_expressions[id];
  5559. if (temps == 0)
  5560. temps = ir.increase_bound_by(2);
  5561. feedback_id = temps + 0;
  5562. texel_id = temps + 1;
  5563. auto &return_type = get<SPIRType>(result_type_id);
  5564. if (return_type.basetype != SPIRType::Struct || return_type.member_types.size() != 2)
  5565. SPIRV_CROSS_THROW("Invalid return type for sparse feedback.");
  5566. emit_uninitialized_temporary(return_type.member_types[0], feedback_id);
  5567. emit_uninitialized_temporary(return_type.member_types[1], texel_id);
  5568. }
  5569. uint32_t CompilerGLSL::get_sparse_feedback_texel_id(uint32_t id) const
  5570. {
  5571. auto itr = extra_sub_expressions.find(id);
  5572. if (itr == extra_sub_expressions.end())
  5573. return 0;
  5574. else
  5575. return itr->second + 1;
  5576. }
  5577. void CompilerGLSL::emit_texture_op(const Instruction &i, bool sparse)
  5578. {
  5579. auto *ops = stream(i);
  5580. auto op = static_cast<Op>(i.op);
  5581. SmallVector<uint32_t> inherited_expressions;
  5582. uint32_t result_type_id = ops[0];
  5583. uint32_t id = ops[1];
  5584. auto &return_type = get<SPIRType>(result_type_id);
  5585. uint32_t sparse_code_id = 0;
  5586. uint32_t sparse_texel_id = 0;
  5587. if (sparse)
  5588. emit_sparse_feedback_temporaries(result_type_id, id, sparse_code_id, sparse_texel_id);
  5589. bool forward = false;
  5590. string expr = to_texture_op(i, sparse, &forward, inherited_expressions);
  5591. if (sparse)
  5592. {
  5593. statement(to_expression(sparse_code_id), " = ", expr, ";");
  5594. expr = join(type_to_glsl(return_type), "(", to_expression(sparse_code_id), ", ", to_expression(sparse_texel_id),
  5595. ")");
  5596. forward = true;
  5597. inherited_expressions.clear();
  5598. }
  5599. emit_op(result_type_id, id, expr, forward);
  5600. for (auto &inherit : inherited_expressions)
  5601. inherit_expression_dependencies(id, inherit);
  5602. // Do not register sparse ops as control dependent as they are always lowered to a temporary.
  5603. switch (op)
  5604. {
  5605. case OpImageSampleDrefImplicitLod:
  5606. case OpImageSampleImplicitLod:
  5607. case OpImageSampleProjImplicitLod:
  5608. case OpImageSampleProjDrefImplicitLod:
  5609. register_control_dependent_expression(id);
  5610. break;
  5611. default:
  5612. break;
  5613. }
  5614. }
  5615. std::string CompilerGLSL::to_texture_op(const Instruction &i, bool sparse, bool *forward,
  5616. SmallVector<uint32_t> &inherited_expressions)
  5617. {
  5618. auto *ops = stream(i);
  5619. auto op = static_cast<Op>(i.op);
  5620. uint32_t length = i.length;
  5621. uint32_t result_type_id = ops[0];
  5622. VariableID img = ops[2];
  5623. uint32_t coord = ops[3];
  5624. uint32_t dref = 0;
  5625. uint32_t comp = 0;
  5626. bool gather = false;
  5627. bool proj = false;
  5628. bool fetch = false;
  5629. bool nonuniform_expression = false;
  5630. const uint32_t *opt = nullptr;
  5631. auto &result_type = get<SPIRType>(result_type_id);
  5632. inherited_expressions.push_back(coord);
  5633. // Make sure non-uniform decoration is back-propagated to where it needs to be.
  5634. if (has_decoration(img, DecorationNonUniformEXT))
  5635. {
  5636. // In Vulkan GLSL, we cannot back-propgate nonuniform qualifiers if we
  5637. // use a combined image sampler constructor.
  5638. // We're only interested in back-propagating if we can trace back through access chains.
  5639. // If not, we will apply nonuniform to the sampled image expression itself.
  5640. auto *backing = maybe_get_backing_variable(img);
  5641. if (backing)
  5642. propagate_nonuniform_qualifier(img);
  5643. else
  5644. nonuniform_expression = true;
  5645. }
  5646. switch (op)
  5647. {
  5648. case OpImageSampleDrefImplicitLod:
  5649. case OpImageSampleDrefExplicitLod:
  5650. case OpImageSparseSampleDrefImplicitLod:
  5651. case OpImageSparseSampleDrefExplicitLod:
  5652. dref = ops[4];
  5653. opt = &ops[5];
  5654. length -= 5;
  5655. break;
  5656. case OpImageSampleProjDrefImplicitLod:
  5657. case OpImageSampleProjDrefExplicitLod:
  5658. case OpImageSparseSampleProjDrefImplicitLod:
  5659. case OpImageSparseSampleProjDrefExplicitLod:
  5660. dref = ops[4];
  5661. opt = &ops[5];
  5662. length -= 5;
  5663. proj = true;
  5664. break;
  5665. case OpImageDrefGather:
  5666. case OpImageSparseDrefGather:
  5667. dref = ops[4];
  5668. opt = &ops[5];
  5669. length -= 5;
  5670. gather = true;
  5671. if (options.es && options.version < 310)
  5672. SPIRV_CROSS_THROW("textureGather requires ESSL 310.");
  5673. else if (!options.es && options.version < 400)
  5674. SPIRV_CROSS_THROW("textureGather with depth compare requires GLSL 400.");
  5675. break;
  5676. case OpImageGather:
  5677. case OpImageSparseGather:
  5678. comp = ops[4];
  5679. opt = &ops[5];
  5680. length -= 5;
  5681. gather = true;
  5682. if (options.es && options.version < 310)
  5683. SPIRV_CROSS_THROW("textureGather requires ESSL 310.");
  5684. else if (!options.es && options.version < 400)
  5685. {
  5686. if (!expression_is_constant_null(comp))
  5687. SPIRV_CROSS_THROW("textureGather with component requires GLSL 400.");
  5688. require_extension_internal("GL_ARB_texture_gather");
  5689. }
  5690. break;
  5691. case OpImageFetch:
  5692. case OpImageSparseFetch:
  5693. case OpImageRead: // Reads == fetches in Metal (other langs will not get here)
  5694. opt = &ops[4];
  5695. length -= 4;
  5696. fetch = true;
  5697. break;
  5698. case OpImageSampleProjImplicitLod:
  5699. case OpImageSampleProjExplicitLod:
  5700. case OpImageSparseSampleProjImplicitLod:
  5701. case OpImageSparseSampleProjExplicitLod:
  5702. opt = &ops[4];
  5703. length -= 4;
  5704. proj = true;
  5705. break;
  5706. default:
  5707. opt = &ops[4];
  5708. length -= 4;
  5709. break;
  5710. }
  5711. // Bypass pointers because we need the real image struct
  5712. auto &type = expression_type(img);
  5713. auto &imgtype = get<SPIRType>(type.self);
  5714. uint32_t coord_components = 0;
  5715. switch (imgtype.image.dim)
  5716. {
  5717. case spv::Dim1D:
  5718. coord_components = 1;
  5719. break;
  5720. case spv::Dim2D:
  5721. coord_components = 2;
  5722. break;
  5723. case spv::Dim3D:
  5724. coord_components = 3;
  5725. break;
  5726. case spv::DimCube:
  5727. coord_components = 3;
  5728. break;
  5729. case spv::DimBuffer:
  5730. coord_components = 1;
  5731. break;
  5732. default:
  5733. coord_components = 2;
  5734. break;
  5735. }
  5736. if (dref)
  5737. inherited_expressions.push_back(dref);
  5738. if (proj)
  5739. coord_components++;
  5740. if (imgtype.image.arrayed)
  5741. coord_components++;
  5742. uint32_t bias = 0;
  5743. uint32_t lod = 0;
  5744. uint32_t grad_x = 0;
  5745. uint32_t grad_y = 0;
  5746. uint32_t coffset = 0;
  5747. uint32_t offset = 0;
  5748. uint32_t coffsets = 0;
  5749. uint32_t sample = 0;
  5750. uint32_t minlod = 0;
  5751. uint32_t flags = 0;
  5752. if (length)
  5753. {
  5754. flags = *opt++;
  5755. length--;
  5756. }
  5757. auto test = [&](uint32_t &v, uint32_t flag) {
  5758. if (length && (flags & flag))
  5759. {
  5760. v = *opt++;
  5761. inherited_expressions.push_back(v);
  5762. length--;
  5763. }
  5764. };
  5765. test(bias, ImageOperandsBiasMask);
  5766. test(lod, ImageOperandsLodMask);
  5767. test(grad_x, ImageOperandsGradMask);
  5768. test(grad_y, ImageOperandsGradMask);
  5769. test(coffset, ImageOperandsConstOffsetMask);
  5770. test(offset, ImageOperandsOffsetMask);
  5771. test(coffsets, ImageOperandsConstOffsetsMask);
  5772. test(sample, ImageOperandsSampleMask);
  5773. test(minlod, ImageOperandsMinLodMask);
  5774. TextureFunctionBaseArguments base_args = {};
  5775. base_args.img = img;
  5776. base_args.imgtype = &imgtype;
  5777. base_args.is_fetch = fetch != 0;
  5778. base_args.is_gather = gather != 0;
  5779. base_args.is_proj = proj != 0;
  5780. string expr;
  5781. TextureFunctionNameArguments name_args = {};
  5782. name_args.base = base_args;
  5783. name_args.has_array_offsets = coffsets != 0;
  5784. name_args.has_offset = coffset != 0 || offset != 0;
  5785. name_args.has_grad = grad_x != 0 || grad_y != 0;
  5786. name_args.has_dref = dref != 0;
  5787. name_args.is_sparse_feedback = sparse;
  5788. name_args.has_min_lod = minlod != 0;
  5789. name_args.lod = lod;
  5790. expr += to_function_name(name_args);
  5791. expr += "(";
  5792. uint32_t sparse_texel_id = 0;
  5793. if (sparse)
  5794. sparse_texel_id = get_sparse_feedback_texel_id(ops[1]);
  5795. TextureFunctionArguments args = {};
  5796. args.base = base_args;
  5797. args.coord = coord;
  5798. args.coord_components = coord_components;
  5799. args.dref = dref;
  5800. args.grad_x = grad_x;
  5801. args.grad_y = grad_y;
  5802. args.lod = lod;
  5803. args.coffset = coffset;
  5804. args.offset = offset;
  5805. args.bias = bias;
  5806. args.component = comp;
  5807. args.sample = sample;
  5808. args.sparse_texel = sparse_texel_id;
  5809. args.min_lod = minlod;
  5810. args.nonuniform_expression = nonuniform_expression;
  5811. expr += to_function_args(args, forward);
  5812. expr += ")";
  5813. // texture(samplerXShadow) returns float. shadowX() returns vec4. Swizzle here.
  5814. if (is_legacy() && image_is_comparison(imgtype, img))
  5815. expr += ".r";
  5816. // Sampling from a texture which was deduced to be a depth image, might actually return 1 component here.
  5817. // Remap back to 4 components as sampling opcodes expect.
  5818. if (backend.comparison_image_samples_scalar && image_opcode_is_sample_no_dref(op))
  5819. {
  5820. bool image_is_depth = false;
  5821. const auto *combined = maybe_get<SPIRCombinedImageSampler>(img);
  5822. VariableID image_id = combined ? combined->image : img;
  5823. if (combined && image_is_comparison(imgtype, combined->image))
  5824. image_is_depth = true;
  5825. else if (image_is_comparison(imgtype, img))
  5826. image_is_depth = true;
  5827. // We must also check the backing variable for the image.
  5828. // We might have loaded an OpImage, and used that handle for two different purposes.
  5829. // Once with comparison, once without.
  5830. auto *image_variable = maybe_get_backing_variable(image_id);
  5831. if (image_variable && image_is_comparison(get<SPIRType>(image_variable->basetype), image_variable->self))
  5832. image_is_depth = true;
  5833. if (image_is_depth)
  5834. expr = remap_swizzle(result_type, 1, expr);
  5835. }
  5836. if (!sparse && !backend.support_small_type_sampling_result && result_type.width < 32)
  5837. {
  5838. // Just value cast (narrowing) to expected type since we cannot rely on narrowing to work automatically.
  5839. // Hopefully compiler picks this up and converts the texturing instruction to the appropriate precision.
  5840. expr = join(type_to_glsl_constructor(result_type), "(", expr, ")");
  5841. }
  5842. // Deals with reads from MSL. We might need to downconvert to fewer components.
  5843. if (op == OpImageRead)
  5844. expr = remap_swizzle(result_type, 4, expr);
  5845. return expr;
  5846. }
  5847. bool CompilerGLSL::expression_is_constant_null(uint32_t id) const
  5848. {
  5849. auto *c = maybe_get<SPIRConstant>(id);
  5850. if (!c)
  5851. return false;
  5852. return c->constant_is_null();
  5853. }
  5854. bool CompilerGLSL::expression_is_non_value_type_array(uint32_t ptr)
  5855. {
  5856. auto &type = expression_type(ptr);
  5857. if (type.array.empty())
  5858. return false;
  5859. if (!backend.array_is_value_type)
  5860. return true;
  5861. auto *var = maybe_get_backing_variable(ptr);
  5862. if (!var)
  5863. return false;
  5864. auto &backed_type = get<SPIRType>(var->basetype);
  5865. return !backend.buffer_offset_array_is_value_type && backed_type.basetype == SPIRType::Struct &&
  5866. has_member_decoration(backed_type.self, 0, DecorationOffset);
  5867. }
  5868. // Returns the function name for a texture sampling function for the specified image and sampling characteristics.
  5869. // For some subclasses, the function is a method on the specified image.
  5870. string CompilerGLSL::to_function_name(const TextureFunctionNameArguments &args)
  5871. {
  5872. if (args.has_min_lod)
  5873. {
  5874. if (options.es)
  5875. SPIRV_CROSS_THROW("Sparse residency is not supported in ESSL.");
  5876. require_extension_internal("GL_ARB_sparse_texture_clamp");
  5877. }
  5878. string fname;
  5879. auto &imgtype = *args.base.imgtype;
  5880. VariableID tex = args.base.img;
  5881. // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
  5882. // To emulate this, we will have to use textureGrad with a constant gradient of 0.
  5883. // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
  5884. // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
  5885. bool workaround_lod_array_shadow_as_grad = false;
  5886. if (((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) &&
  5887. image_is_comparison(imgtype, tex) && args.lod)
  5888. {
  5889. if (!expression_is_constant_null(args.lod))
  5890. {
  5891. SPIRV_CROSS_THROW("textureLod on sampler2DArrayShadow is not constant 0.0. This cannot be "
  5892. "expressed in GLSL.");
  5893. }
  5894. workaround_lod_array_shadow_as_grad = true;
  5895. }
  5896. if (args.is_sparse_feedback)
  5897. fname += "sparse";
  5898. if (args.base.is_fetch)
  5899. fname += args.is_sparse_feedback ? "TexelFetch" : "texelFetch";
  5900. else
  5901. {
  5902. fname += args.is_sparse_feedback ? "Texture" : "texture";
  5903. if (args.base.is_gather)
  5904. fname += "Gather";
  5905. if (args.has_array_offsets)
  5906. fname += "Offsets";
  5907. if (args.base.is_proj)
  5908. fname += "Proj";
  5909. if (args.has_grad || workaround_lod_array_shadow_as_grad)
  5910. fname += "Grad";
  5911. if (args.lod != 0 && !workaround_lod_array_shadow_as_grad)
  5912. fname += "Lod";
  5913. }
  5914. if (args.has_offset)
  5915. fname += "Offset";
  5916. if (args.has_min_lod)
  5917. fname += "Clamp";
  5918. if (args.is_sparse_feedback || args.has_min_lod)
  5919. fname += "ARB";
  5920. return (is_legacy() && !args.base.is_gather) ? legacy_tex_op(fname, imgtype, tex) : fname;
  5921. }
  5922. std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id)
  5923. {
  5924. auto *var = maybe_get_backing_variable(id);
  5925. // If we are fetching from a plain OpTypeImage, we must combine with a dummy sampler in GLSL.
  5926. // In Vulkan GLSL, we can make use of the newer GL_EXT_samplerless_texture_functions.
  5927. if (var)
  5928. {
  5929. auto &type = get<SPIRType>(var->basetype);
  5930. if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer)
  5931. {
  5932. if (options.vulkan_semantics)
  5933. {
  5934. if (dummy_sampler_id)
  5935. {
  5936. // Don't need to consider Shadow state since the dummy sampler is always non-shadow.
  5937. auto sampled_type = type;
  5938. sampled_type.basetype = SPIRType::SampledImage;
  5939. return join(type_to_glsl(sampled_type), "(", to_expression(id), ", ",
  5940. to_expression(dummy_sampler_id), ")");
  5941. }
  5942. else
  5943. {
  5944. // Newer glslang supports this extension to deal with texture2D as argument to texture functions.
  5945. require_extension_internal("GL_EXT_samplerless_texture_functions");
  5946. }
  5947. }
  5948. else
  5949. {
  5950. if (!dummy_sampler_id)
  5951. SPIRV_CROSS_THROW("Cannot find dummy sampler ID. Was "
  5952. "build_dummy_sampler_for_combined_images() called?");
  5953. return to_combined_image_sampler(id, dummy_sampler_id);
  5954. }
  5955. }
  5956. }
  5957. return to_expression(id);
  5958. }
  5959. // Returns the function args for a texture sampling function for the specified image and sampling characteristics.
  5960. string CompilerGLSL::to_function_args(const TextureFunctionArguments &args, bool *p_forward)
  5961. {
  5962. VariableID img = args.base.img;
  5963. auto &imgtype = *args.base.imgtype;
  5964. string farg_str;
  5965. if (args.base.is_fetch)
  5966. farg_str = convert_separate_image_to_expression(img);
  5967. else
  5968. farg_str = to_expression(img);
  5969. if (args.nonuniform_expression && farg_str.find_first_of('[') != string::npos)
  5970. {
  5971. // Only emit nonuniformEXT() wrapper if the underlying expression is arrayed in some way.
  5972. farg_str = join(backend.nonuniform_qualifier, "(", farg_str, ")");
  5973. }
  5974. bool swizz_func = backend.swizzle_is_function;
  5975. auto swizzle = [swizz_func](uint32_t comps, uint32_t in_comps) -> const char * {
  5976. if (comps == in_comps)
  5977. return "";
  5978. switch (comps)
  5979. {
  5980. case 1:
  5981. return ".x";
  5982. case 2:
  5983. return swizz_func ? ".xy()" : ".xy";
  5984. case 3:
  5985. return swizz_func ? ".xyz()" : ".xyz";
  5986. default:
  5987. return "";
  5988. }
  5989. };
  5990. bool forward = should_forward(args.coord);
  5991. // The IR can give us more components than we need, so chop them off as needed.
  5992. auto swizzle_expr = swizzle(args.coord_components, expression_type(args.coord).vecsize);
  5993. // Only enclose the UV expression if needed.
  5994. auto coord_expr =
  5995. (*swizzle_expr == '\0') ? to_expression(args.coord) : (to_enclosed_expression(args.coord) + swizzle_expr);
  5996. // texelFetch only takes int, not uint.
  5997. auto &coord_type = expression_type(args.coord);
  5998. if (coord_type.basetype == SPIRType::UInt)
  5999. {
  6000. auto expected_type = coord_type;
  6001. expected_type.vecsize = args.coord_components;
  6002. expected_type.basetype = SPIRType::Int;
  6003. coord_expr = bitcast_expression(expected_type, coord_type.basetype, coord_expr);
  6004. }
  6005. // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
  6006. // To emulate this, we will have to use textureGrad with a constant gradient of 0.
  6007. // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
  6008. // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
  6009. bool workaround_lod_array_shadow_as_grad =
  6010. ((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) &&
  6011. image_is_comparison(imgtype, img) && args.lod != 0;
  6012. if (args.dref)
  6013. {
  6014. forward = forward && should_forward(args.dref);
  6015. // SPIR-V splits dref and coordinate.
  6016. if (args.base.is_gather ||
  6017. args.coord_components == 4) // GLSL also splits the arguments in two. Same for textureGather.
  6018. {
  6019. farg_str += ", ";
  6020. farg_str += to_expression(args.coord);
  6021. farg_str += ", ";
  6022. farg_str += to_expression(args.dref);
  6023. }
  6024. else if (args.base.is_proj)
  6025. {
  6026. // Have to reshuffle so we get vec4(coord, dref, proj), special case.
  6027. // Other shading languages splits up the arguments for coord and compare value like SPIR-V.
  6028. // The coordinate type for textureProj shadow is always vec4 even for sampler1DShadow.
  6029. farg_str += ", vec4(";
  6030. if (imgtype.image.dim == Dim1D)
  6031. {
  6032. // Could reuse coord_expr, but we will mess up the temporary usage checking.
  6033. farg_str += to_enclosed_expression(args.coord) + ".x";
  6034. farg_str += ", ";
  6035. farg_str += "0.0, ";
  6036. farg_str += to_expression(args.dref);
  6037. farg_str += ", ";
  6038. farg_str += to_enclosed_expression(args.coord) + ".y)";
  6039. }
  6040. else if (imgtype.image.dim == Dim2D)
  6041. {
  6042. // Could reuse coord_expr, but we will mess up the temporary usage checking.
  6043. farg_str += to_enclosed_expression(args.coord) + (swizz_func ? ".xy()" : ".xy");
  6044. farg_str += ", ";
  6045. farg_str += to_expression(args.dref);
  6046. farg_str += ", ";
  6047. farg_str += to_enclosed_expression(args.coord) + ".z)";
  6048. }
  6049. else
  6050. SPIRV_CROSS_THROW("Invalid type for textureProj with shadow.");
  6051. }
  6052. else
  6053. {
  6054. // Create a composite which merges coord/dref into a single vector.
  6055. auto type = expression_type(args.coord);
  6056. type.vecsize = args.coord_components + 1;
  6057. farg_str += ", ";
  6058. farg_str += type_to_glsl_constructor(type);
  6059. farg_str += "(";
  6060. farg_str += coord_expr;
  6061. farg_str += ", ";
  6062. farg_str += to_expression(args.dref);
  6063. farg_str += ")";
  6064. }
  6065. }
  6066. else
  6067. {
  6068. farg_str += ", ";
  6069. farg_str += coord_expr;
  6070. }
  6071. if (args.grad_x || args.grad_y)
  6072. {
  6073. forward = forward && should_forward(args.grad_x);
  6074. forward = forward && should_forward(args.grad_y);
  6075. farg_str += ", ";
  6076. farg_str += to_expression(args.grad_x);
  6077. farg_str += ", ";
  6078. farg_str += to_expression(args.grad_y);
  6079. }
  6080. if (args.lod)
  6081. {
  6082. if (workaround_lod_array_shadow_as_grad)
  6083. {
  6084. // Implement textureGrad() instead. LOD == 0.0 is implemented as gradient of 0.0.
  6085. // Implementing this as plain texture() is not safe on some implementations.
  6086. if (imgtype.image.dim == Dim2D)
  6087. farg_str += ", vec2(0.0), vec2(0.0)";
  6088. else if (imgtype.image.dim == DimCube)
  6089. farg_str += ", vec3(0.0), vec3(0.0)";
  6090. }
  6091. else
  6092. {
  6093. forward = forward && should_forward(args.lod);
  6094. farg_str += ", ";
  6095. auto &lod_expr_type = expression_type(args.lod);
  6096. // Lod expression for TexelFetch in GLSL must be int, and only int.
  6097. if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms &&
  6098. lod_expr_type.basetype != SPIRType::Int)
  6099. {
  6100. farg_str += join("int(", to_expression(args.lod), ")");
  6101. }
  6102. else
  6103. {
  6104. farg_str += to_expression(args.lod);
  6105. }
  6106. }
  6107. }
  6108. else if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms)
  6109. {
  6110. // Lod argument is optional in OpImageFetch, but we require a LOD value, pick 0 as the default.
  6111. farg_str += ", 0";
  6112. }
  6113. if (args.coffset)
  6114. {
  6115. forward = forward && should_forward(args.coffset);
  6116. farg_str += ", ";
  6117. farg_str += to_expression(args.coffset);
  6118. }
  6119. else if (args.offset)
  6120. {
  6121. forward = forward && should_forward(args.offset);
  6122. farg_str += ", ";
  6123. farg_str += to_expression(args.offset);
  6124. }
  6125. if (args.sample)
  6126. {
  6127. farg_str += ", ";
  6128. farg_str += to_expression(args.sample);
  6129. }
  6130. if (args.min_lod)
  6131. {
  6132. farg_str += ", ";
  6133. farg_str += to_expression(args.min_lod);
  6134. }
  6135. if (args.sparse_texel)
  6136. {
  6137. // Sparse texel output parameter comes after everything else, except it's before the optional, component/bias arguments.
  6138. farg_str += ", ";
  6139. farg_str += to_expression(args.sparse_texel);
  6140. }
  6141. if (args.bias)
  6142. {
  6143. forward = forward && should_forward(args.bias);
  6144. farg_str += ", ";
  6145. farg_str += to_expression(args.bias);
  6146. }
  6147. if (args.component && !expression_is_constant_null(args.component))
  6148. {
  6149. forward = forward && should_forward(args.component);
  6150. farg_str += ", ";
  6151. farg_str += to_expression(args.component);
  6152. }
  6153. *p_forward = forward;
  6154. return farg_str;
  6155. }
  6156. void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t length)
  6157. {
  6158. auto op = static_cast<GLSLstd450>(eop);
  6159. if (is_legacy() && is_unsigned_glsl_opcode(op))
  6160. SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy GLSL targets.");
  6161. // If we need to do implicit bitcasts, make sure we do it with the correct type.
  6162. uint32_t integer_width = get_integer_width_for_glsl_instruction(op, args, length);
  6163. auto int_type = to_signed_basetype(integer_width);
  6164. auto uint_type = to_unsigned_basetype(integer_width);
  6165. switch (op)
  6166. {
  6167. // FP fiddling
  6168. case GLSLstd450Round:
  6169. if (!is_legacy())
  6170. emit_unary_func_op(result_type, id, args[0], "round");
  6171. else
  6172. {
  6173. auto op0 = to_enclosed_expression(args[0]);
  6174. auto &op0_type = expression_type(args[0]);
  6175. auto expr = join("floor(", op0, " + ", type_to_glsl_constructor(op0_type), "(0.5))");
  6176. bool forward = should_forward(args[0]);
  6177. emit_op(result_type, id, expr, forward);
  6178. inherit_expression_dependencies(id, args[0]);
  6179. }
  6180. break;
  6181. case GLSLstd450RoundEven:
  6182. if (!is_legacy())
  6183. emit_unary_func_op(result_type, id, args[0], "roundEven");
  6184. else if (!options.es)
  6185. {
  6186. // This extension provides round() with round-to-even semantics.
  6187. require_extension_internal("GL_EXT_gpu_shader4");
  6188. emit_unary_func_op(result_type, id, args[0], "round");
  6189. }
  6190. else
  6191. SPIRV_CROSS_THROW("roundEven supported only in ESSL 300.");
  6192. break;
  6193. case GLSLstd450Trunc:
  6194. emit_unary_func_op(result_type, id, args[0], "trunc");
  6195. break;
  6196. case GLSLstd450SAbs:
  6197. emit_unary_func_op_cast(result_type, id, args[0], "abs", int_type, int_type);
  6198. break;
  6199. case GLSLstd450FAbs:
  6200. emit_unary_func_op(result_type, id, args[0], "abs");
  6201. break;
  6202. case GLSLstd450SSign:
  6203. emit_unary_func_op_cast(result_type, id, args[0], "sign", int_type, int_type);
  6204. break;
  6205. case GLSLstd450FSign:
  6206. emit_unary_func_op(result_type, id, args[0], "sign");
  6207. break;
  6208. case GLSLstd450Floor:
  6209. emit_unary_func_op(result_type, id, args[0], "floor");
  6210. break;
  6211. case GLSLstd450Ceil:
  6212. emit_unary_func_op(result_type, id, args[0], "ceil");
  6213. break;
  6214. case GLSLstd450Fract:
  6215. emit_unary_func_op(result_type, id, args[0], "fract");
  6216. break;
  6217. case GLSLstd450Radians:
  6218. emit_unary_func_op(result_type, id, args[0], "radians");
  6219. break;
  6220. case GLSLstd450Degrees:
  6221. emit_unary_func_op(result_type, id, args[0], "degrees");
  6222. break;
  6223. case GLSLstd450Fma:
  6224. if ((!options.es && options.version < 400) || (options.es && options.version < 320))
  6225. {
  6226. auto expr = join(to_enclosed_expression(args[0]), " * ", to_enclosed_expression(args[1]), " + ",
  6227. to_enclosed_expression(args[2]));
  6228. emit_op(result_type, id, expr,
  6229. should_forward(args[0]) && should_forward(args[1]) && should_forward(args[2]));
  6230. for (uint32_t i = 0; i < 3; i++)
  6231. inherit_expression_dependencies(id, args[i]);
  6232. }
  6233. else
  6234. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "fma");
  6235. break;
  6236. case GLSLstd450Modf:
  6237. register_call_out_argument(args[1]);
  6238. forced_temporaries.insert(id);
  6239. emit_binary_func_op(result_type, id, args[0], args[1], "modf");
  6240. break;
  6241. case GLSLstd450ModfStruct:
  6242. {
  6243. auto &type = get<SPIRType>(result_type);
  6244. emit_uninitialized_temporary_expression(result_type, id);
  6245. statement(to_expression(id), ".", to_member_name(type, 0), " = ", "modf(", to_expression(args[0]), ", ",
  6246. to_expression(id), ".", to_member_name(type, 1), ");");
  6247. break;
  6248. }
  6249. // Minmax
  6250. case GLSLstd450UMin:
  6251. emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", uint_type, false);
  6252. break;
  6253. case GLSLstd450SMin:
  6254. emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", int_type, false);
  6255. break;
  6256. case GLSLstd450FMin:
  6257. emit_binary_func_op(result_type, id, args[0], args[1], "min");
  6258. break;
  6259. case GLSLstd450FMax:
  6260. emit_binary_func_op(result_type, id, args[0], args[1], "max");
  6261. break;
  6262. case GLSLstd450UMax:
  6263. emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", uint_type, false);
  6264. break;
  6265. case GLSLstd450SMax:
  6266. emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", int_type, false);
  6267. break;
  6268. case GLSLstd450FClamp:
  6269. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp");
  6270. break;
  6271. case GLSLstd450UClamp:
  6272. emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", uint_type);
  6273. break;
  6274. case GLSLstd450SClamp:
  6275. emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", int_type);
  6276. break;
  6277. // Trig
  6278. case GLSLstd450Sin:
  6279. emit_unary_func_op(result_type, id, args[0], "sin");
  6280. break;
  6281. case GLSLstd450Cos:
  6282. emit_unary_func_op(result_type, id, args[0], "cos");
  6283. break;
  6284. case GLSLstd450Tan:
  6285. emit_unary_func_op(result_type, id, args[0], "tan");
  6286. break;
  6287. case GLSLstd450Asin:
  6288. emit_unary_func_op(result_type, id, args[0], "asin");
  6289. break;
  6290. case GLSLstd450Acos:
  6291. emit_unary_func_op(result_type, id, args[0], "acos");
  6292. break;
  6293. case GLSLstd450Atan:
  6294. emit_unary_func_op(result_type, id, args[0], "atan");
  6295. break;
  6296. case GLSLstd450Sinh:
  6297. emit_unary_func_op(result_type, id, args[0], "sinh");
  6298. break;
  6299. case GLSLstd450Cosh:
  6300. emit_unary_func_op(result_type, id, args[0], "cosh");
  6301. break;
  6302. case GLSLstd450Tanh:
  6303. emit_unary_func_op(result_type, id, args[0], "tanh");
  6304. break;
  6305. case GLSLstd450Asinh:
  6306. emit_unary_func_op(result_type, id, args[0], "asinh");
  6307. break;
  6308. case GLSLstd450Acosh:
  6309. emit_unary_func_op(result_type, id, args[0], "acosh");
  6310. break;
  6311. case GLSLstd450Atanh:
  6312. emit_unary_func_op(result_type, id, args[0], "atanh");
  6313. break;
  6314. case GLSLstd450Atan2:
  6315. emit_binary_func_op(result_type, id, args[0], args[1], "atan");
  6316. break;
  6317. // Exponentials
  6318. case GLSLstd450Pow:
  6319. emit_binary_func_op(result_type, id, args[0], args[1], "pow");
  6320. break;
  6321. case GLSLstd450Exp:
  6322. emit_unary_func_op(result_type, id, args[0], "exp");
  6323. break;
  6324. case GLSLstd450Log:
  6325. emit_unary_func_op(result_type, id, args[0], "log");
  6326. break;
  6327. case GLSLstd450Exp2:
  6328. emit_unary_func_op(result_type, id, args[0], "exp2");
  6329. break;
  6330. case GLSLstd450Log2:
  6331. emit_unary_func_op(result_type, id, args[0], "log2");
  6332. break;
  6333. case GLSLstd450Sqrt:
  6334. emit_unary_func_op(result_type, id, args[0], "sqrt");
  6335. break;
  6336. case GLSLstd450InverseSqrt:
  6337. emit_unary_func_op(result_type, id, args[0], "inversesqrt");
  6338. break;
  6339. // Matrix math
  6340. case GLSLstd450Determinant:
  6341. emit_unary_func_op(result_type, id, args[0], "determinant");
  6342. break;
  6343. case GLSLstd450MatrixInverse:
  6344. emit_unary_func_op(result_type, id, args[0], "inverse");
  6345. break;
  6346. // Lerping
  6347. case GLSLstd450FMix:
  6348. case GLSLstd450IMix:
  6349. {
  6350. emit_mix_op(result_type, id, args[0], args[1], args[2]);
  6351. break;
  6352. }
  6353. case GLSLstd450Step:
  6354. emit_binary_func_op(result_type, id, args[0], args[1], "step");
  6355. break;
  6356. case GLSLstd450SmoothStep:
  6357. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "smoothstep");
  6358. break;
  6359. // Packing
  6360. case GLSLstd450Frexp:
  6361. register_call_out_argument(args[1]);
  6362. forced_temporaries.insert(id);
  6363. emit_binary_func_op(result_type, id, args[0], args[1], "frexp");
  6364. break;
  6365. case GLSLstd450FrexpStruct:
  6366. {
  6367. auto &type = get<SPIRType>(result_type);
  6368. emit_uninitialized_temporary_expression(result_type, id);
  6369. statement(to_expression(id), ".", to_member_name(type, 0), " = ", "frexp(", to_expression(args[0]), ", ",
  6370. to_expression(id), ".", to_member_name(type, 1), ");");
  6371. break;
  6372. }
  6373. case GLSLstd450Ldexp:
  6374. {
  6375. bool forward = should_forward(args[0]) && should_forward(args[1]);
  6376. auto op0 = to_unpacked_expression(args[0]);
  6377. auto op1 = to_unpacked_expression(args[1]);
  6378. auto &op1_type = expression_type(args[1]);
  6379. if (op1_type.basetype != SPIRType::Int)
  6380. {
  6381. // Need a value cast here.
  6382. auto target_type = op1_type;
  6383. target_type.basetype = SPIRType::Int;
  6384. op1 = join(type_to_glsl_constructor(target_type), "(", op1, ")");
  6385. }
  6386. auto expr = join("ldexp(", op0, ", ", op1, ")");
  6387. emit_op(result_type, id, expr, forward);
  6388. inherit_expression_dependencies(id, args[0]);
  6389. inherit_expression_dependencies(id, args[1]);
  6390. break;
  6391. }
  6392. case GLSLstd450PackSnorm4x8:
  6393. emit_unary_func_op(result_type, id, args[0], "packSnorm4x8");
  6394. break;
  6395. case GLSLstd450PackUnorm4x8:
  6396. emit_unary_func_op(result_type, id, args[0], "packUnorm4x8");
  6397. break;
  6398. case GLSLstd450PackSnorm2x16:
  6399. emit_unary_func_op(result_type, id, args[0], "packSnorm2x16");
  6400. break;
  6401. case GLSLstd450PackUnorm2x16:
  6402. emit_unary_func_op(result_type, id, args[0], "packUnorm2x16");
  6403. break;
  6404. case GLSLstd450PackHalf2x16:
  6405. emit_unary_func_op(result_type, id, args[0], "packHalf2x16");
  6406. break;
  6407. case GLSLstd450UnpackSnorm4x8:
  6408. emit_unary_func_op(result_type, id, args[0], "unpackSnorm4x8");
  6409. break;
  6410. case GLSLstd450UnpackUnorm4x8:
  6411. emit_unary_func_op(result_type, id, args[0], "unpackUnorm4x8");
  6412. break;
  6413. case GLSLstd450UnpackSnorm2x16:
  6414. emit_unary_func_op(result_type, id, args[0], "unpackSnorm2x16");
  6415. break;
  6416. case GLSLstd450UnpackUnorm2x16:
  6417. emit_unary_func_op(result_type, id, args[0], "unpackUnorm2x16");
  6418. break;
  6419. case GLSLstd450UnpackHalf2x16:
  6420. emit_unary_func_op(result_type, id, args[0], "unpackHalf2x16");
  6421. break;
  6422. case GLSLstd450PackDouble2x32:
  6423. emit_unary_func_op(result_type, id, args[0], "packDouble2x32");
  6424. break;
  6425. case GLSLstd450UnpackDouble2x32:
  6426. emit_unary_func_op(result_type, id, args[0], "unpackDouble2x32");
  6427. break;
  6428. // Vector math
  6429. case GLSLstd450Length:
  6430. emit_unary_func_op(result_type, id, args[0], "length");
  6431. break;
  6432. case GLSLstd450Distance:
  6433. emit_binary_func_op(result_type, id, args[0], args[1], "distance");
  6434. break;
  6435. case GLSLstd450Cross:
  6436. emit_binary_func_op(result_type, id, args[0], args[1], "cross");
  6437. break;
  6438. case GLSLstd450Normalize:
  6439. emit_unary_func_op(result_type, id, args[0], "normalize");
  6440. break;
  6441. case GLSLstd450FaceForward:
  6442. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "faceforward");
  6443. break;
  6444. case GLSLstd450Reflect:
  6445. emit_binary_func_op(result_type, id, args[0], args[1], "reflect");
  6446. break;
  6447. case GLSLstd450Refract:
  6448. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "refract");
  6449. break;
  6450. // Bit-fiddling
  6451. case GLSLstd450FindILsb:
  6452. // findLSB always returns int.
  6453. emit_unary_func_op_cast(result_type, id, args[0], "findLSB", expression_type(args[0]).basetype, int_type);
  6454. break;
  6455. case GLSLstd450FindSMsb:
  6456. emit_unary_func_op_cast(result_type, id, args[0], "findMSB", int_type, int_type);
  6457. break;
  6458. case GLSLstd450FindUMsb:
  6459. emit_unary_func_op_cast(result_type, id, args[0], "findMSB", uint_type,
  6460. int_type); // findMSB always returns int.
  6461. break;
  6462. // Multisampled varying
  6463. case GLSLstd450InterpolateAtCentroid:
  6464. emit_unary_func_op(result_type, id, args[0], "interpolateAtCentroid");
  6465. break;
  6466. case GLSLstd450InterpolateAtSample:
  6467. emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtSample");
  6468. break;
  6469. case GLSLstd450InterpolateAtOffset:
  6470. emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtOffset");
  6471. break;
  6472. case GLSLstd450NMin:
  6473. case GLSLstd450NMax:
  6474. {
  6475. emit_nminmax_op(result_type, id, args[0], args[1], op);
  6476. break;
  6477. }
  6478. case GLSLstd450NClamp:
  6479. {
  6480. // Make sure we have a unique ID here to avoid aliasing the extra sub-expressions between clamp and NMin sub-op.
  6481. // IDs cannot exceed 24 bits, so we can make use of the higher bits for some unique flags.
  6482. uint32_t &max_id = extra_sub_expressions[id | 0x80000000u];
  6483. if (!max_id)
  6484. max_id = ir.increase_bound_by(1);
  6485. // Inherit precision qualifiers.
  6486. ir.meta[max_id] = ir.meta[id];
  6487. emit_nminmax_op(result_type, max_id, args[0], args[1], GLSLstd450NMax);
  6488. emit_nminmax_op(result_type, id, max_id, args[2], GLSLstd450NMin);
  6489. break;
  6490. }
  6491. default:
  6492. statement("// unimplemented GLSL op ", eop);
  6493. break;
  6494. }
  6495. }
  6496. void CompilerGLSL::emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op)
  6497. {
  6498. // Need to emulate this call.
  6499. uint32_t &ids = extra_sub_expressions[id];
  6500. if (!ids)
  6501. {
  6502. ids = ir.increase_bound_by(5);
  6503. auto btype = get<SPIRType>(result_type);
  6504. btype.basetype = SPIRType::Boolean;
  6505. set<SPIRType>(ids, btype);
  6506. }
  6507. uint32_t btype_id = ids + 0;
  6508. uint32_t left_nan_id = ids + 1;
  6509. uint32_t right_nan_id = ids + 2;
  6510. uint32_t tmp_id = ids + 3;
  6511. uint32_t mixed_first_id = ids + 4;
  6512. // Inherit precision qualifiers.
  6513. ir.meta[tmp_id] = ir.meta[id];
  6514. ir.meta[mixed_first_id] = ir.meta[id];
  6515. emit_unary_func_op(btype_id, left_nan_id, op0, "isnan");
  6516. emit_unary_func_op(btype_id, right_nan_id, op1, "isnan");
  6517. emit_binary_func_op(result_type, tmp_id, op0, op1, op == GLSLstd450NMin ? "min" : "max");
  6518. emit_mix_op(result_type, mixed_first_id, tmp_id, op1, left_nan_id);
  6519. emit_mix_op(result_type, id, mixed_first_id, op0, right_nan_id);
  6520. }
  6521. void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
  6522. uint32_t)
  6523. {
  6524. require_extension_internal("GL_AMD_shader_ballot");
  6525. enum AMDShaderBallot
  6526. {
  6527. SwizzleInvocationsAMD = 1,
  6528. SwizzleInvocationsMaskedAMD = 2,
  6529. WriteInvocationAMD = 3,
  6530. MbcntAMD = 4
  6531. };
  6532. auto op = static_cast<AMDShaderBallot>(eop);
  6533. switch (op)
  6534. {
  6535. case SwizzleInvocationsAMD:
  6536. emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsAMD");
  6537. register_control_dependent_expression(id);
  6538. break;
  6539. case SwizzleInvocationsMaskedAMD:
  6540. emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsMaskedAMD");
  6541. register_control_dependent_expression(id);
  6542. break;
  6543. case WriteInvocationAMD:
  6544. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "writeInvocationAMD");
  6545. register_control_dependent_expression(id);
  6546. break;
  6547. case MbcntAMD:
  6548. emit_unary_func_op(result_type, id, args[0], "mbcntAMD");
  6549. register_control_dependent_expression(id);
  6550. break;
  6551. default:
  6552. statement("// unimplemented SPV AMD shader ballot op ", eop);
  6553. break;
  6554. }
  6555. }
  6556. void CompilerGLSL::emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type, uint32_t id, uint32_t eop,
  6557. const uint32_t *args, uint32_t)
  6558. {
  6559. require_extension_internal("GL_AMD_shader_explicit_vertex_parameter");
  6560. enum AMDShaderExplicitVertexParameter
  6561. {
  6562. InterpolateAtVertexAMD = 1
  6563. };
  6564. auto op = static_cast<AMDShaderExplicitVertexParameter>(eop);
  6565. switch (op)
  6566. {
  6567. case InterpolateAtVertexAMD:
  6568. emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtVertexAMD");
  6569. break;
  6570. default:
  6571. statement("// unimplemented SPV AMD shader explicit vertex parameter op ", eop);
  6572. break;
  6573. }
  6574. }
  6575. void CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop,
  6576. const uint32_t *args, uint32_t)
  6577. {
  6578. require_extension_internal("GL_AMD_shader_trinary_minmax");
  6579. enum AMDShaderTrinaryMinMax
  6580. {
  6581. FMin3AMD = 1,
  6582. UMin3AMD = 2,
  6583. SMin3AMD = 3,
  6584. FMax3AMD = 4,
  6585. UMax3AMD = 5,
  6586. SMax3AMD = 6,
  6587. FMid3AMD = 7,
  6588. UMid3AMD = 8,
  6589. SMid3AMD = 9
  6590. };
  6591. auto op = static_cast<AMDShaderTrinaryMinMax>(eop);
  6592. switch (op)
  6593. {
  6594. case FMin3AMD:
  6595. case UMin3AMD:
  6596. case SMin3AMD:
  6597. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "min3");
  6598. break;
  6599. case FMax3AMD:
  6600. case UMax3AMD:
  6601. case SMax3AMD:
  6602. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "max3");
  6603. break;
  6604. case FMid3AMD:
  6605. case UMid3AMD:
  6606. case SMid3AMD:
  6607. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "mid3");
  6608. break;
  6609. default:
  6610. statement("// unimplemented SPV AMD shader trinary minmax op ", eop);
  6611. break;
  6612. }
  6613. }
  6614. void CompilerGLSL::emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
  6615. uint32_t)
  6616. {
  6617. require_extension_internal("GL_AMD_gcn_shader");
  6618. enum AMDGCNShader
  6619. {
  6620. CubeFaceIndexAMD = 1,
  6621. CubeFaceCoordAMD = 2,
  6622. TimeAMD = 3
  6623. };
  6624. auto op = static_cast<AMDGCNShader>(eop);
  6625. switch (op)
  6626. {
  6627. case CubeFaceIndexAMD:
  6628. emit_unary_func_op(result_type, id, args[0], "cubeFaceIndexAMD");
  6629. break;
  6630. case CubeFaceCoordAMD:
  6631. emit_unary_func_op(result_type, id, args[0], "cubeFaceCoordAMD");
  6632. break;
  6633. case TimeAMD:
  6634. {
  6635. string expr = "timeAMD()";
  6636. emit_op(result_type, id, expr, true);
  6637. register_control_dependent_expression(id);
  6638. break;
  6639. }
  6640. default:
  6641. statement("// unimplemented SPV AMD gcn shader op ", eop);
  6642. break;
  6643. }
  6644. }
  6645. void CompilerGLSL::emit_subgroup_op(const Instruction &i)
  6646. {
  6647. const uint32_t *ops = stream(i);
  6648. auto op = static_cast<Op>(i.op);
  6649. if (!options.vulkan_semantics && !is_supported_subgroup_op_in_opengl(op))
  6650. SPIRV_CROSS_THROW("This subgroup operation is only supported in Vulkan semantics.");
  6651. // If we need to do implicit bitcasts, make sure we do it with the correct type.
  6652. uint32_t integer_width = get_integer_width_for_instruction(i);
  6653. auto int_type = to_signed_basetype(integer_width);
  6654. auto uint_type = to_unsigned_basetype(integer_width);
  6655. switch (op)
  6656. {
  6657. case OpGroupNonUniformElect:
  6658. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupElect);
  6659. break;
  6660. case OpGroupNonUniformBallotBitCount:
  6661. {
  6662. const GroupOperation operation = static_cast<GroupOperation>(ops[3]);
  6663. if (operation == GroupOperationReduce)
  6664. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotBitCount);
  6665. else if (operation == GroupOperationInclusiveScan || operation == GroupOperationExclusiveScan)
  6666. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout);
  6667. }
  6668. break;
  6669. case OpGroupNonUniformBallotBitExtract:
  6670. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotBitExtract);
  6671. break;
  6672. case OpGroupNonUniformInverseBallot:
  6673. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout);
  6674. break;
  6675. case OpGroupNonUniformBallot:
  6676. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallot);
  6677. break;
  6678. case OpGroupNonUniformBallotFindLSB:
  6679. case OpGroupNonUniformBallotFindMSB:
  6680. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotFindLSB_MSB);
  6681. break;
  6682. case OpGroupNonUniformBroadcast:
  6683. case OpGroupNonUniformBroadcastFirst:
  6684. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBroadcast_First);
  6685. break;
  6686. case OpGroupNonUniformShuffle:
  6687. case OpGroupNonUniformShuffleXor:
  6688. require_extension_internal("GL_KHR_shader_subgroup_shuffle");
  6689. break;
  6690. case OpGroupNonUniformShuffleUp:
  6691. case OpGroupNonUniformShuffleDown:
  6692. require_extension_internal("GL_KHR_shader_subgroup_shuffle_relative");
  6693. break;
  6694. case OpGroupNonUniformAll:
  6695. case OpGroupNonUniformAny:
  6696. case OpGroupNonUniformAllEqual:
  6697. {
  6698. const SPIRType &type = expression_type(ops[3]);
  6699. if (type.basetype == SPIRType::BaseType::Boolean && type.vecsize == 1u)
  6700. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupAll_Any_AllEqualBool);
  6701. else
  6702. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupAllEqualT);
  6703. }
  6704. break;
  6705. case OpGroupNonUniformFAdd:
  6706. case OpGroupNonUniformFMul:
  6707. case OpGroupNonUniformFMin:
  6708. case OpGroupNonUniformFMax:
  6709. case OpGroupNonUniformIAdd:
  6710. case OpGroupNonUniformIMul:
  6711. case OpGroupNonUniformSMin:
  6712. case OpGroupNonUniformSMax:
  6713. case OpGroupNonUniformUMin:
  6714. case OpGroupNonUniformUMax:
  6715. case OpGroupNonUniformBitwiseAnd:
  6716. case OpGroupNonUniformBitwiseOr:
  6717. case OpGroupNonUniformBitwiseXor:
  6718. {
  6719. auto operation = static_cast<GroupOperation>(ops[3]);
  6720. if (operation == GroupOperationClusteredReduce)
  6721. {
  6722. require_extension_internal("GL_KHR_shader_subgroup_clustered");
  6723. }
  6724. else if (operation == GroupOperationExclusiveScan || operation == GroupOperationInclusiveScan ||
  6725. operation == GroupOperationReduce)
  6726. {
  6727. require_extension_internal("GL_KHR_shader_subgroup_arithmetic");
  6728. }
  6729. else
  6730. SPIRV_CROSS_THROW("Invalid group operation.");
  6731. break;
  6732. }
  6733. case OpGroupNonUniformQuadSwap:
  6734. case OpGroupNonUniformQuadBroadcast:
  6735. require_extension_internal("GL_KHR_shader_subgroup_quad");
  6736. break;
  6737. default:
  6738. SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
  6739. }
  6740. uint32_t result_type = ops[0];
  6741. uint32_t id = ops[1];
  6742. auto scope = static_cast<Scope>(evaluate_constant_u32(ops[2]));
  6743. if (scope != ScopeSubgroup)
  6744. SPIRV_CROSS_THROW("Only subgroup scope is supported.");
  6745. switch (op)
  6746. {
  6747. case OpGroupNonUniformElect:
  6748. emit_op(result_type, id, "subgroupElect()", true);
  6749. break;
  6750. case OpGroupNonUniformBroadcast:
  6751. emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBroadcast");
  6752. break;
  6753. case OpGroupNonUniformBroadcastFirst:
  6754. emit_unary_func_op(result_type, id, ops[3], "subgroupBroadcastFirst");
  6755. break;
  6756. case OpGroupNonUniformBallot:
  6757. emit_unary_func_op(result_type, id, ops[3], "subgroupBallot");
  6758. break;
  6759. case OpGroupNonUniformInverseBallot:
  6760. emit_unary_func_op(result_type, id, ops[3], "subgroupInverseBallot");
  6761. break;
  6762. case OpGroupNonUniformBallotBitExtract:
  6763. emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBallotBitExtract");
  6764. break;
  6765. case OpGroupNonUniformBallotFindLSB:
  6766. emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindLSB");
  6767. break;
  6768. case OpGroupNonUniformBallotFindMSB:
  6769. emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindMSB");
  6770. break;
  6771. case OpGroupNonUniformBallotBitCount:
  6772. {
  6773. auto operation = static_cast<GroupOperation>(ops[3]);
  6774. if (operation == GroupOperationReduce)
  6775. emit_unary_func_op(result_type, id, ops[4], "subgroupBallotBitCount");
  6776. else if (operation == GroupOperationInclusiveScan)
  6777. emit_unary_func_op(result_type, id, ops[4], "subgroupBallotInclusiveBitCount");
  6778. else if (operation == GroupOperationExclusiveScan)
  6779. emit_unary_func_op(result_type, id, ops[4], "subgroupBallotExclusiveBitCount");
  6780. else
  6781. SPIRV_CROSS_THROW("Invalid BitCount operation.");
  6782. break;
  6783. }
  6784. case OpGroupNonUniformShuffle:
  6785. emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffle");
  6786. break;
  6787. case OpGroupNonUniformShuffleXor:
  6788. emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleXor");
  6789. break;
  6790. case OpGroupNonUniformShuffleUp:
  6791. emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleUp");
  6792. break;
  6793. case OpGroupNonUniformShuffleDown:
  6794. emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleDown");
  6795. break;
  6796. case OpGroupNonUniformAll:
  6797. emit_unary_func_op(result_type, id, ops[3], "subgroupAll");
  6798. break;
  6799. case OpGroupNonUniformAny:
  6800. emit_unary_func_op(result_type, id, ops[3], "subgroupAny");
  6801. break;
  6802. case OpGroupNonUniformAllEqual:
  6803. emit_unary_func_op(result_type, id, ops[3], "subgroupAllEqual");
  6804. break;
  6805. // clang-format off
  6806. #define GLSL_GROUP_OP(op, glsl_op) \
  6807. case OpGroupNonUniform##op: \
  6808. { \
  6809. auto operation = static_cast<GroupOperation>(ops[3]); \
  6810. if (operation == GroupOperationReduce) \
  6811. emit_unary_func_op(result_type, id, ops[4], "subgroup" #glsl_op); \
  6812. else if (operation == GroupOperationInclusiveScan) \
  6813. emit_unary_func_op(result_type, id, ops[4], "subgroupInclusive" #glsl_op); \
  6814. else if (operation == GroupOperationExclusiveScan) \
  6815. emit_unary_func_op(result_type, id, ops[4], "subgroupExclusive" #glsl_op); \
  6816. else if (operation == GroupOperationClusteredReduce) \
  6817. emit_binary_func_op(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op); \
  6818. else \
  6819. SPIRV_CROSS_THROW("Invalid group operation."); \
  6820. break; \
  6821. }
  6822. #define GLSL_GROUP_OP_CAST(op, glsl_op, type) \
  6823. case OpGroupNonUniform##op: \
  6824. { \
  6825. auto operation = static_cast<GroupOperation>(ops[3]); \
  6826. if (operation == GroupOperationReduce) \
  6827. emit_unary_func_op_cast(result_type, id, ops[4], "subgroup" #glsl_op, type, type); \
  6828. else if (operation == GroupOperationInclusiveScan) \
  6829. emit_unary_func_op_cast(result_type, id, ops[4], "subgroupInclusive" #glsl_op, type, type); \
  6830. else if (operation == GroupOperationExclusiveScan) \
  6831. emit_unary_func_op_cast(result_type, id, ops[4], "subgroupExclusive" #glsl_op, type, type); \
  6832. else if (operation == GroupOperationClusteredReduce) \
  6833. emit_binary_func_op_cast_clustered(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op, type); \
  6834. else \
  6835. SPIRV_CROSS_THROW("Invalid group operation."); \
  6836. break; \
  6837. }
  6838. GLSL_GROUP_OP(FAdd, Add)
  6839. GLSL_GROUP_OP(FMul, Mul)
  6840. GLSL_GROUP_OP(FMin, Min)
  6841. GLSL_GROUP_OP(FMax, Max)
  6842. GLSL_GROUP_OP(IAdd, Add)
  6843. GLSL_GROUP_OP(IMul, Mul)
  6844. GLSL_GROUP_OP_CAST(SMin, Min, int_type)
  6845. GLSL_GROUP_OP_CAST(SMax, Max, int_type)
  6846. GLSL_GROUP_OP_CAST(UMin, Min, uint_type)
  6847. GLSL_GROUP_OP_CAST(UMax, Max, uint_type)
  6848. GLSL_GROUP_OP(BitwiseAnd, And)
  6849. GLSL_GROUP_OP(BitwiseOr, Or)
  6850. GLSL_GROUP_OP(BitwiseXor, Xor)
  6851. #undef GLSL_GROUP_OP
  6852. #undef GLSL_GROUP_OP_CAST
  6853. // clang-format on
  6854. case OpGroupNonUniformQuadSwap:
  6855. {
  6856. uint32_t direction = evaluate_constant_u32(ops[4]);
  6857. if (direction == 0)
  6858. emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapHorizontal");
  6859. else if (direction == 1)
  6860. emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapVertical");
  6861. else if (direction == 2)
  6862. emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapDiagonal");
  6863. else
  6864. SPIRV_CROSS_THROW("Invalid quad swap direction.");
  6865. break;
  6866. }
  6867. case OpGroupNonUniformQuadBroadcast:
  6868. {
  6869. emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupQuadBroadcast");
  6870. break;
  6871. }
  6872. default:
  6873. SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
  6874. }
  6875. register_control_dependent_expression(id);
  6876. }
  6877. string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
  6878. {
  6879. // OpBitcast can deal with pointers.
  6880. if (out_type.pointer || in_type.pointer)
  6881. return type_to_glsl(out_type);
  6882. if (out_type.basetype == in_type.basetype)
  6883. return "";
  6884. assert(out_type.basetype != SPIRType::Boolean);
  6885. assert(in_type.basetype != SPIRType::Boolean);
  6886. bool integral_cast = type_is_integral(out_type) && type_is_integral(in_type);
  6887. bool same_size_cast = out_type.width == in_type.width;
  6888. // Trivial bitcast case, casts between integers.
  6889. if (integral_cast && same_size_cast)
  6890. return type_to_glsl(out_type);
  6891. // Catch-all 8-bit arithmetic casts (GL_EXT_shader_explicit_arithmetic_types).
  6892. if (out_type.width == 8 && in_type.width >= 16 && integral_cast && in_type.vecsize == 1)
  6893. return "unpack8";
  6894. else if (in_type.width == 8 && out_type.width == 16 && integral_cast && out_type.vecsize == 1)
  6895. return "pack16";
  6896. else if (in_type.width == 8 && out_type.width == 32 && integral_cast && out_type.vecsize == 1)
  6897. return "pack32";
  6898. // Floating <-> Integer special casts. Just have to enumerate all cases. :(
  6899. // 16-bit, 32-bit and 64-bit floats.
  6900. if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float)
  6901. {
  6902. if (is_legacy_es())
  6903. SPIRV_CROSS_THROW("Float -> Uint bitcast not supported on legacy ESSL.");
  6904. else if (!options.es && options.version < 330)
  6905. require_extension_internal("GL_ARB_shader_bit_encoding");
  6906. return "floatBitsToUint";
  6907. }
  6908. else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float)
  6909. {
  6910. if (is_legacy_es())
  6911. SPIRV_CROSS_THROW("Float -> Int bitcast not supported on legacy ESSL.");
  6912. else if (!options.es && options.version < 330)
  6913. require_extension_internal("GL_ARB_shader_bit_encoding");
  6914. return "floatBitsToInt";
  6915. }
  6916. else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt)
  6917. {
  6918. if (is_legacy_es())
  6919. SPIRV_CROSS_THROW("Uint -> Float bitcast not supported on legacy ESSL.");
  6920. else if (!options.es && options.version < 330)
  6921. require_extension_internal("GL_ARB_shader_bit_encoding");
  6922. return "uintBitsToFloat";
  6923. }
  6924. else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int)
  6925. {
  6926. if (is_legacy_es())
  6927. SPIRV_CROSS_THROW("Int -> Float bitcast not supported on legacy ESSL.");
  6928. else if (!options.es && options.version < 330)
  6929. require_extension_internal("GL_ARB_shader_bit_encoding");
  6930. return "intBitsToFloat";
  6931. }
  6932. else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double)
  6933. return "doubleBitsToInt64";
  6934. else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double)
  6935. return "doubleBitsToUint64";
  6936. else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64)
  6937. return "int64BitsToDouble";
  6938. else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64)
  6939. return "uint64BitsToDouble";
  6940. else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Half)
  6941. return "float16BitsToInt16";
  6942. else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::Half)
  6943. return "float16BitsToUint16";
  6944. else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::Short)
  6945. return "int16BitsToFloat16";
  6946. else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UShort)
  6947. return "uint16BitsToFloat16";
  6948. // And finally, some even more special purpose casts.
  6949. if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UInt && in_type.vecsize == 2)
  6950. return "packUint2x32";
  6951. else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UInt64 && out_type.vecsize == 2)
  6952. return "unpackUint2x32";
  6953. else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
  6954. return "unpackFloat2x16";
  6955. else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2)
  6956. return "packFloat2x16";
  6957. else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Short && in_type.vecsize == 2)
  6958. return "packInt2x16";
  6959. else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int && in_type.vecsize == 1)
  6960. return "unpackInt2x16";
  6961. else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UShort && in_type.vecsize == 2)
  6962. return "packUint2x16";
  6963. else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
  6964. return "unpackUint2x16";
  6965. else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Short && in_type.vecsize == 4)
  6966. return "packInt4x16";
  6967. else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int64 && in_type.vecsize == 1)
  6968. return "unpackInt4x16";
  6969. else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UShort && in_type.vecsize == 4)
  6970. return "packUint4x16";
  6971. else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt64 && in_type.vecsize == 1)
  6972. return "unpackUint4x16";
  6973. return "";
  6974. }
  6975. string CompilerGLSL::bitcast_glsl(const SPIRType &result_type, uint32_t argument)
  6976. {
  6977. auto op = bitcast_glsl_op(result_type, expression_type(argument));
  6978. if (op.empty())
  6979. return to_enclosed_unpacked_expression(argument);
  6980. else
  6981. return join(op, "(", to_unpacked_expression(argument), ")");
  6982. }
  6983. std::string CompilerGLSL::bitcast_expression(SPIRType::BaseType target_type, uint32_t arg)
  6984. {
  6985. auto expr = to_expression(arg);
  6986. auto &src_type = expression_type(arg);
  6987. if (src_type.basetype != target_type)
  6988. {
  6989. auto target = src_type;
  6990. target.basetype = target_type;
  6991. expr = join(bitcast_glsl_op(target, src_type), "(", expr, ")");
  6992. }
  6993. return expr;
  6994. }
  6995. std::string CompilerGLSL::bitcast_expression(const SPIRType &target_type, SPIRType::BaseType expr_type,
  6996. const std::string &expr)
  6997. {
  6998. if (target_type.basetype == expr_type)
  6999. return expr;
  7000. auto src_type = target_type;
  7001. src_type.basetype = expr_type;
  7002. return join(bitcast_glsl_op(target_type, src_type), "(", expr, ")");
  7003. }
  7004. string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
  7005. {
  7006. switch (builtin)
  7007. {
  7008. case BuiltInPosition:
  7009. return "gl_Position";
  7010. case BuiltInPointSize:
  7011. return "gl_PointSize";
  7012. case BuiltInClipDistance:
  7013. return "gl_ClipDistance";
  7014. case BuiltInCullDistance:
  7015. return "gl_CullDistance";
  7016. case BuiltInVertexId:
  7017. if (options.vulkan_semantics)
  7018. SPIRV_CROSS_THROW("Cannot implement gl_VertexID in Vulkan GLSL. This shader was created "
  7019. "with GL semantics.");
  7020. return "gl_VertexID";
  7021. case BuiltInInstanceId:
  7022. if (options.vulkan_semantics)
  7023. {
  7024. auto model = get_entry_point().model;
  7025. switch (model)
  7026. {
  7027. case spv::ExecutionModelIntersectionKHR:
  7028. case spv::ExecutionModelAnyHitKHR:
  7029. case spv::ExecutionModelClosestHitKHR:
  7030. // gl_InstanceID is allowed in these shaders.
  7031. break;
  7032. default:
  7033. SPIRV_CROSS_THROW("Cannot implement gl_InstanceID in Vulkan GLSL. This shader was "
  7034. "created with GL semantics.");
  7035. }
  7036. }
  7037. if (!options.es && options.version < 140)
  7038. {
  7039. require_extension_internal("GL_ARB_draw_instanced");
  7040. }
  7041. return "gl_InstanceID";
  7042. case BuiltInVertexIndex:
  7043. if (options.vulkan_semantics)
  7044. return "gl_VertexIndex";
  7045. else
  7046. return "gl_VertexID"; // gl_VertexID already has the base offset applied.
  7047. case BuiltInInstanceIndex:
  7048. if (options.vulkan_semantics)
  7049. return "gl_InstanceIndex";
  7050. if (!options.es && options.version < 140)
  7051. {
  7052. require_extension_internal("GL_ARB_draw_instanced");
  7053. }
  7054. if (options.vertex.support_nonzero_base_instance)
  7055. {
  7056. if (!options.vulkan_semantics)
  7057. {
  7058. // This is a soft-enable. We will opt-in to using gl_BaseInstanceARB if supported.
  7059. require_extension_internal("GL_ARB_shader_draw_parameters");
  7060. }
  7061. return "(gl_InstanceID + SPIRV_Cross_BaseInstance)"; // ... but not gl_InstanceID.
  7062. }
  7063. else
  7064. return "gl_InstanceID";
  7065. case BuiltInPrimitiveId:
  7066. if (storage == StorageClassInput && get_entry_point().model == ExecutionModelGeometry)
  7067. return "gl_PrimitiveIDIn";
  7068. else
  7069. return "gl_PrimitiveID";
  7070. case BuiltInInvocationId:
  7071. return "gl_InvocationID";
  7072. case BuiltInLayer:
  7073. return "gl_Layer";
  7074. case BuiltInViewportIndex:
  7075. return "gl_ViewportIndex";
  7076. case BuiltInTessLevelOuter:
  7077. return "gl_TessLevelOuter";
  7078. case BuiltInTessLevelInner:
  7079. return "gl_TessLevelInner";
  7080. case BuiltInTessCoord:
  7081. return "gl_TessCoord";
  7082. case BuiltInFragCoord:
  7083. return "gl_FragCoord";
  7084. case BuiltInPointCoord:
  7085. return "gl_PointCoord";
  7086. case BuiltInFrontFacing:
  7087. return "gl_FrontFacing";
  7088. case BuiltInFragDepth:
  7089. return "gl_FragDepth";
  7090. case BuiltInNumWorkgroups:
  7091. return "gl_NumWorkGroups";
  7092. case BuiltInWorkgroupSize:
  7093. return "gl_WorkGroupSize";
  7094. case BuiltInWorkgroupId:
  7095. return "gl_WorkGroupID";
  7096. case BuiltInLocalInvocationId:
  7097. return "gl_LocalInvocationID";
  7098. case BuiltInGlobalInvocationId:
  7099. return "gl_GlobalInvocationID";
  7100. case BuiltInLocalInvocationIndex:
  7101. return "gl_LocalInvocationIndex";
  7102. case BuiltInHelperInvocation:
  7103. return "gl_HelperInvocation";
  7104. case BuiltInBaseVertex:
  7105. if (options.es)
  7106. SPIRV_CROSS_THROW("BaseVertex not supported in ES profile.");
  7107. if (options.vulkan_semantics)
  7108. {
  7109. if (options.version < 460)
  7110. {
  7111. require_extension_internal("GL_ARB_shader_draw_parameters");
  7112. return "gl_BaseVertexARB";
  7113. }
  7114. return "gl_BaseVertex";
  7115. }
  7116. else
  7117. {
  7118. // On regular GL, this is soft-enabled and we emit ifdefs in code.
  7119. require_extension_internal("GL_ARB_shader_draw_parameters");
  7120. return "SPIRV_Cross_BaseVertex";
  7121. }
  7122. break;
  7123. case BuiltInBaseInstance:
  7124. if (options.es)
  7125. SPIRV_CROSS_THROW("BaseInstance not supported in ES profile.");
  7126. if (options.vulkan_semantics)
  7127. {
  7128. if (options.version < 460)
  7129. {
  7130. require_extension_internal("GL_ARB_shader_draw_parameters");
  7131. return "gl_BaseInstanceARB";
  7132. }
  7133. return "gl_BaseInstance";
  7134. }
  7135. else
  7136. {
  7137. // On regular GL, this is soft-enabled and we emit ifdefs in code.
  7138. require_extension_internal("GL_ARB_shader_draw_parameters");
  7139. return "SPIRV_Cross_BaseInstance";
  7140. }
  7141. break;
  7142. case BuiltInDrawIndex:
  7143. if (options.es)
  7144. SPIRV_CROSS_THROW("DrawIndex not supported in ES profile.");
  7145. if (options.vulkan_semantics)
  7146. {
  7147. if (options.version < 460)
  7148. {
  7149. require_extension_internal("GL_ARB_shader_draw_parameters");
  7150. return "gl_DrawIDARB";
  7151. }
  7152. return "gl_DrawID";
  7153. }
  7154. else
  7155. {
  7156. // On regular GL, this is soft-enabled and we emit ifdefs in code.
  7157. require_extension_internal("GL_ARB_shader_draw_parameters");
  7158. return "gl_DrawIDARB";
  7159. }
  7160. break;
  7161. case BuiltInSampleId:
  7162. if (options.es && options.version < 320)
  7163. require_extension_internal("GL_OES_sample_variables");
  7164. if (!options.es && options.version < 400)
  7165. SPIRV_CROSS_THROW("gl_SampleID not supported before GLSL 400.");
  7166. return "gl_SampleID";
  7167. case BuiltInSampleMask:
  7168. if (options.es && options.version < 320)
  7169. require_extension_internal("GL_OES_sample_variables");
  7170. if (!options.es && options.version < 400)
  7171. SPIRV_CROSS_THROW("gl_SampleMask/gl_SampleMaskIn not supported before GLSL 400.");
  7172. if (storage == StorageClassInput)
  7173. return "gl_SampleMaskIn";
  7174. else
  7175. return "gl_SampleMask";
  7176. case BuiltInSamplePosition:
  7177. if (options.es && options.version < 320)
  7178. require_extension_internal("GL_OES_sample_variables");
  7179. if (!options.es && options.version < 400)
  7180. SPIRV_CROSS_THROW("gl_SamplePosition not supported before GLSL 400.");
  7181. return "gl_SamplePosition";
  7182. case BuiltInViewIndex:
  7183. if (options.vulkan_semantics)
  7184. {
  7185. require_extension_internal("GL_EXT_multiview");
  7186. return "gl_ViewIndex";
  7187. }
  7188. else
  7189. {
  7190. require_extension_internal("GL_OVR_multiview2");
  7191. return "gl_ViewID_OVR";
  7192. }
  7193. case BuiltInNumSubgroups:
  7194. request_subgroup_feature(ShaderSubgroupSupportHelper::NumSubgroups);
  7195. return "gl_NumSubgroups";
  7196. case BuiltInSubgroupId:
  7197. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupID);
  7198. return "gl_SubgroupID";
  7199. case BuiltInSubgroupSize:
  7200. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupSize);
  7201. return "gl_SubgroupSize";
  7202. case BuiltInSubgroupLocalInvocationId:
  7203. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInvocationID);
  7204. return "gl_SubgroupInvocationID";
  7205. case BuiltInSubgroupEqMask:
  7206. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
  7207. return "gl_SubgroupEqMask";
  7208. case BuiltInSubgroupGeMask:
  7209. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
  7210. return "gl_SubgroupGeMask";
  7211. case BuiltInSubgroupGtMask:
  7212. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
  7213. return "gl_SubgroupGtMask";
  7214. case BuiltInSubgroupLeMask:
  7215. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
  7216. return "gl_SubgroupLeMask";
  7217. case BuiltInSubgroupLtMask:
  7218. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
  7219. return "gl_SubgroupLtMask";
  7220. case BuiltInLaunchIdKHR:
  7221. return ray_tracing_is_khr ? "gl_LaunchIDEXT" : "gl_LaunchIDNV";
  7222. case BuiltInLaunchSizeKHR:
  7223. return ray_tracing_is_khr ? "gl_LaunchSizeEXT" : "gl_LaunchSizeNV";
  7224. case BuiltInWorldRayOriginKHR:
  7225. return ray_tracing_is_khr ? "gl_WorldRayOriginEXT" : "gl_WorldRayOriginNV";
  7226. case BuiltInWorldRayDirectionKHR:
  7227. return ray_tracing_is_khr ? "gl_WorldRayDirectionEXT" : "gl_WorldRayDirectionNV";
  7228. case BuiltInObjectRayOriginKHR:
  7229. return ray_tracing_is_khr ? "gl_ObjectRayOriginEXT" : "gl_ObjectRayOriginNV";
  7230. case BuiltInObjectRayDirectionKHR:
  7231. return ray_tracing_is_khr ? "gl_ObjectRayDirectionEXT" : "gl_ObjectRayDirectionNV";
  7232. case BuiltInRayTminKHR:
  7233. return ray_tracing_is_khr ? "gl_RayTminEXT" : "gl_RayTminNV";
  7234. case BuiltInRayTmaxKHR:
  7235. return ray_tracing_is_khr ? "gl_RayTmaxEXT" : "gl_RayTmaxNV";
  7236. case BuiltInInstanceCustomIndexKHR:
  7237. return ray_tracing_is_khr ? "gl_InstanceCustomIndexEXT" : "gl_InstanceCustomIndexNV";
  7238. case BuiltInObjectToWorldKHR:
  7239. return ray_tracing_is_khr ? "gl_ObjectToWorldEXT" : "gl_ObjectToWorldNV";
  7240. case BuiltInWorldToObjectKHR:
  7241. return ray_tracing_is_khr ? "gl_WorldToObjectEXT" : "gl_WorldToObjectNV";
  7242. case BuiltInHitTNV:
  7243. // gl_HitTEXT is an alias of RayTMax in KHR.
  7244. return "gl_HitTNV";
  7245. case BuiltInHitKindKHR:
  7246. return ray_tracing_is_khr ? "gl_HitKindEXT" : "gl_HitKindNV";
  7247. case BuiltInIncomingRayFlagsKHR:
  7248. return ray_tracing_is_khr ? "gl_IncomingRayFlagsEXT" : "gl_IncomingRayFlagsNV";
  7249. case BuiltInBaryCoordNV:
  7250. {
  7251. if (options.es && options.version < 320)
  7252. SPIRV_CROSS_THROW("gl_BaryCoordNV requires ESSL 320.");
  7253. else if (!options.es && options.version < 450)
  7254. SPIRV_CROSS_THROW("gl_BaryCoordNV requires GLSL 450.");
  7255. require_extension_internal("GL_NV_fragment_shader_barycentric");
  7256. return "gl_BaryCoordNV";
  7257. }
  7258. case BuiltInBaryCoordNoPerspNV:
  7259. {
  7260. if (options.es && options.version < 320)
  7261. SPIRV_CROSS_THROW("gl_BaryCoordNoPerspNV requires ESSL 320.");
  7262. else if (!options.es && options.version < 450)
  7263. SPIRV_CROSS_THROW("gl_BaryCoordNoPerspNV requires GLSL 450.");
  7264. require_extension_internal("GL_NV_fragment_shader_barycentric");
  7265. return "gl_BaryCoordNoPerspNV";
  7266. }
  7267. case BuiltInFragStencilRefEXT:
  7268. {
  7269. if (!options.es)
  7270. {
  7271. require_extension_internal("GL_ARB_shader_stencil_export");
  7272. return "gl_FragStencilRefARB";
  7273. }
  7274. else
  7275. SPIRV_CROSS_THROW("Stencil export not supported in GLES.");
  7276. }
  7277. case BuiltInDeviceIndex:
  7278. if (!options.vulkan_semantics)
  7279. SPIRV_CROSS_THROW("Need Vulkan semantics for device group support.");
  7280. require_extension_internal("GL_EXT_device_group");
  7281. return "gl_DeviceIndex";
  7282. default:
  7283. return join("gl_BuiltIn_", convert_to_string(builtin));
  7284. }
  7285. }
  7286. const char *CompilerGLSL::index_to_swizzle(uint32_t index)
  7287. {
  7288. switch (index)
  7289. {
  7290. case 0:
  7291. return "x";
  7292. case 1:
  7293. return "y";
  7294. case 2:
  7295. return "z";
  7296. case 3:
  7297. return "w";
  7298. default:
  7299. SPIRV_CROSS_THROW("Swizzle index out of range");
  7300. }
  7301. }
  7302. void CompilerGLSL::access_chain_internal_append_index(std::string &expr, uint32_t /*base*/, const SPIRType *type,
  7303. AccessChainFlags flags, bool & /*access_chain_is_arrayed*/,
  7304. uint32_t index)
  7305. {
  7306. bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0;
  7307. bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
  7308. expr += "[";
  7309. // If we are indexing into an array of SSBOs or UBOs, we need to index it with a non-uniform qualifier.
  7310. bool nonuniform_index =
  7311. has_decoration(index, DecorationNonUniformEXT) &&
  7312. (has_decoration(type->self, DecorationBlock) || has_decoration(type->self, DecorationBufferBlock));
  7313. if (nonuniform_index)
  7314. {
  7315. expr += backend.nonuniform_qualifier;
  7316. expr += "(";
  7317. }
  7318. if (index_is_literal)
  7319. expr += convert_to_string(index);
  7320. else
  7321. expr += to_expression(index, register_expression_read);
  7322. if (nonuniform_index)
  7323. expr += ")";
  7324. expr += "]";
  7325. }
  7326. string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count,
  7327. AccessChainFlags flags, AccessChainMeta *meta)
  7328. {
  7329. string expr;
  7330. bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0;
  7331. bool msb_is_id = (flags & ACCESS_CHAIN_LITERAL_MSB_FORCE_ID) != 0;
  7332. bool chain_only = (flags & ACCESS_CHAIN_CHAIN_ONLY_BIT) != 0;
  7333. bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0;
  7334. bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
  7335. bool flatten_member_reference = (flags & ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT) != 0;
  7336. if (!chain_only)
  7337. {
  7338. // We handle transpose explicitly, so don't resolve that here.
  7339. auto *e = maybe_get<SPIRExpression>(base);
  7340. bool old_transpose = e && e->need_transpose;
  7341. if (e)
  7342. e->need_transpose = false;
  7343. expr = to_enclosed_expression(base, register_expression_read);
  7344. if (e)
  7345. e->need_transpose = old_transpose;
  7346. }
  7347. // Start traversing type hierarchy at the proper non-pointer types,
  7348. // but keep type_id referencing the original pointer for use below.
  7349. uint32_t type_id = expression_type_id(base);
  7350. if (!backend.native_pointers)
  7351. {
  7352. if (ptr_chain)
  7353. SPIRV_CROSS_THROW("Backend does not support native pointers and does not support OpPtrAccessChain.");
  7354. // Wrapped buffer reference pointer types will need to poke into the internal "value" member before
  7355. // continuing the access chain.
  7356. if (should_dereference(base))
  7357. {
  7358. auto &type = get<SPIRType>(type_id);
  7359. expr = dereference_expression(type, expr);
  7360. }
  7361. }
  7362. const auto *type = &get_pointee_type(type_id);
  7363. bool access_chain_is_arrayed = expr.find_first_of('[') != string::npos;
  7364. bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base);
  7365. bool is_packed = has_extended_decoration(base, SPIRVCrossDecorationPhysicalTypePacked);
  7366. uint32_t physical_type = get_extended_decoration(base, SPIRVCrossDecorationPhysicalTypeID);
  7367. bool is_invariant = has_decoration(base, DecorationInvariant);
  7368. bool pending_array_enclose = false;
  7369. bool dimension_flatten = false;
  7370. const auto append_index = [&](uint32_t index, bool is_literal) {
  7371. AccessChainFlags mod_flags = flags;
  7372. if (!is_literal)
  7373. mod_flags &= ~ACCESS_CHAIN_INDEX_IS_LITERAL_BIT;
  7374. access_chain_internal_append_index(expr, base, type, mod_flags, access_chain_is_arrayed, index);
  7375. };
  7376. for (uint32_t i = 0; i < count; i++)
  7377. {
  7378. uint32_t index = indices[i];
  7379. bool is_literal = index_is_literal;
  7380. if (is_literal && msb_is_id && (index >> 31u) != 0u)
  7381. {
  7382. is_literal = false;
  7383. index &= 0x7fffffffu;
  7384. }
  7385. // Pointer chains
  7386. if (ptr_chain && i == 0)
  7387. {
  7388. // If we are flattening multidimensional arrays, only create opening bracket on first
  7389. // array index.
  7390. if (options.flatten_multidimensional_arrays)
  7391. {
  7392. dimension_flatten = type->array.size() >= 1;
  7393. pending_array_enclose = dimension_flatten;
  7394. if (pending_array_enclose)
  7395. expr += "[";
  7396. }
  7397. if (options.flatten_multidimensional_arrays && dimension_flatten)
  7398. {
  7399. // If we are flattening multidimensional arrays, do manual stride computation.
  7400. if (is_literal)
  7401. expr += convert_to_string(index);
  7402. else
  7403. expr += to_enclosed_expression(index, register_expression_read);
  7404. for (auto j = uint32_t(type->array.size()); j; j--)
  7405. {
  7406. expr += " * ";
  7407. expr += enclose_expression(to_array_size(*type, j - 1));
  7408. }
  7409. if (type->array.empty())
  7410. pending_array_enclose = false;
  7411. else
  7412. expr += " + ";
  7413. if (!pending_array_enclose)
  7414. expr += "]";
  7415. }
  7416. else
  7417. {
  7418. append_index(index, is_literal);
  7419. }
  7420. if (type->basetype == SPIRType::ControlPointArray)
  7421. {
  7422. type_id = type->parent_type;
  7423. type = &get<SPIRType>(type_id);
  7424. }
  7425. access_chain_is_arrayed = true;
  7426. }
  7427. // Arrays
  7428. else if (!type->array.empty())
  7429. {
  7430. // If we are flattening multidimensional arrays, only create opening bracket on first
  7431. // array index.
  7432. if (options.flatten_multidimensional_arrays && !pending_array_enclose)
  7433. {
  7434. dimension_flatten = type->array.size() > 1;
  7435. pending_array_enclose = dimension_flatten;
  7436. if (pending_array_enclose)
  7437. expr += "[";
  7438. }
  7439. assert(type->parent_type);
  7440. auto *var = maybe_get<SPIRVariable>(base);
  7441. if (backend.force_gl_in_out_block && i == 0 && var && is_builtin_variable(*var) &&
  7442. !has_decoration(type->self, DecorationBlock))
  7443. {
  7444. // This deals with scenarios for tesc/geom where arrays of gl_Position[] are declared.
  7445. // Normally, these variables live in blocks when compiled from GLSL,
  7446. // but HLSL seems to just emit straight arrays here.
  7447. // We must pretend this access goes through gl_in/gl_out arrays
  7448. // to be able to access certain builtins as arrays.
  7449. auto builtin = ir.meta[base].decoration.builtin_type;
  7450. switch (builtin)
  7451. {
  7452. // case BuiltInCullDistance: // These are already arrays, need to figure out rules for these in tess/geom.
  7453. // case BuiltInClipDistance:
  7454. case BuiltInPosition:
  7455. case BuiltInPointSize:
  7456. if (var->storage == StorageClassInput)
  7457. expr = join("gl_in[", to_expression(index, register_expression_read), "].", expr);
  7458. else if (var->storage == StorageClassOutput)
  7459. expr = join("gl_out[", to_expression(index, register_expression_read), "].", expr);
  7460. else
  7461. append_index(index, is_literal);
  7462. break;
  7463. default:
  7464. append_index(index, is_literal);
  7465. break;
  7466. }
  7467. }
  7468. else if (options.flatten_multidimensional_arrays && dimension_flatten)
  7469. {
  7470. // If we are flattening multidimensional arrays, do manual stride computation.
  7471. auto &parent_type = get<SPIRType>(type->parent_type);
  7472. if (is_literal)
  7473. expr += convert_to_string(index);
  7474. else
  7475. expr += to_enclosed_expression(index, register_expression_read);
  7476. for (auto j = uint32_t(parent_type.array.size()); j; j--)
  7477. {
  7478. expr += " * ";
  7479. expr += enclose_expression(to_array_size(parent_type, j - 1));
  7480. }
  7481. if (parent_type.array.empty())
  7482. pending_array_enclose = false;
  7483. else
  7484. expr += " + ";
  7485. if (!pending_array_enclose)
  7486. expr += "]";
  7487. }
  7488. // Some builtins are arrays in SPIR-V but not in other languages, e.g. gl_SampleMask[] is an array in SPIR-V but not in Metal.
  7489. // By throwing away the index, we imply the index was 0, which it must be for gl_SampleMask.
  7490. else if (!builtin_translates_to_nonarray(BuiltIn(get_decoration(base, DecorationBuiltIn))))
  7491. {
  7492. append_index(index, is_literal);
  7493. }
  7494. type_id = type->parent_type;
  7495. type = &get<SPIRType>(type_id);
  7496. access_chain_is_arrayed = true;
  7497. }
  7498. // For structs, the index refers to a constant, which indexes into the members.
  7499. // We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
  7500. else if (type->basetype == SPIRType::Struct)
  7501. {
  7502. if (!is_literal)
  7503. index = evaluate_constant_u32(index);
  7504. if (index >= type->member_types.size())
  7505. SPIRV_CROSS_THROW("Member index is out of bounds!");
  7506. BuiltIn builtin;
  7507. if (is_member_builtin(*type, index, &builtin))
  7508. {
  7509. if (access_chain_is_arrayed)
  7510. {
  7511. expr += ".";
  7512. expr += builtin_to_glsl(builtin, type->storage);
  7513. }
  7514. else
  7515. expr = builtin_to_glsl(builtin, type->storage);
  7516. }
  7517. else
  7518. {
  7519. // If the member has a qualified name, use it as the entire chain
  7520. string qual_mbr_name = get_member_qualified_name(type_id, index);
  7521. if (!qual_mbr_name.empty())
  7522. expr = qual_mbr_name;
  7523. else if (flatten_member_reference)
  7524. expr += join("_", to_member_name(*type, index));
  7525. else
  7526. expr += to_member_reference(base, *type, index, ptr_chain);
  7527. }
  7528. if (has_member_decoration(type->self, index, DecorationInvariant))
  7529. is_invariant = true;
  7530. is_packed = member_is_packed_physical_type(*type, index);
  7531. if (member_is_remapped_physical_type(*type, index))
  7532. physical_type = get_extended_member_decoration(type->self, index, SPIRVCrossDecorationPhysicalTypeID);
  7533. else
  7534. physical_type = 0;
  7535. row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(*type, index);
  7536. type = &get<SPIRType>(type->member_types[index]);
  7537. }
  7538. // Matrix -> Vector
  7539. else if (type->columns > 1)
  7540. {
  7541. // If we have a row-major matrix here, we need to defer any transpose in case this access chain
  7542. // is used to store a column. We can resolve it right here and now if we access a scalar directly,
  7543. // by flipping indexing order of the matrix.
  7544. expr += "[";
  7545. if (is_literal)
  7546. expr += convert_to_string(index);
  7547. else
  7548. expr += to_expression(index, register_expression_read);
  7549. expr += "]";
  7550. type_id = type->parent_type;
  7551. type = &get<SPIRType>(type_id);
  7552. }
  7553. // Vector -> Scalar
  7554. else if (type->vecsize > 1)
  7555. {
  7556. string deferred_index;
  7557. if (row_major_matrix_needs_conversion)
  7558. {
  7559. // Flip indexing order.
  7560. auto column_index = expr.find_last_of('[');
  7561. if (column_index != string::npos)
  7562. {
  7563. deferred_index = expr.substr(column_index);
  7564. expr.resize(column_index);
  7565. }
  7566. }
  7567. // Internally, access chain implementation can also be used on composites,
  7568. // ignore scalar access workarounds in this case.
  7569. StorageClass effective_storage;
  7570. if (expression_type(base).pointer)
  7571. effective_storage = get_expression_effective_storage_class(base);
  7572. else
  7573. effective_storage = StorageClassGeneric;
  7574. if (!row_major_matrix_needs_conversion)
  7575. {
  7576. // On some backends, we might not be able to safely access individual scalars in a vector.
  7577. // To work around this, we might have to cast the access chain reference to something which can,
  7578. // like a pointer to scalar, which we can then index into.
  7579. prepare_access_chain_for_scalar_access(expr, get<SPIRType>(type->parent_type), effective_storage,
  7580. is_packed);
  7581. }
  7582. if (is_literal && !is_packed && !row_major_matrix_needs_conversion)
  7583. {
  7584. expr += ".";
  7585. expr += index_to_swizzle(index);
  7586. }
  7587. else if (ir.ids[index].get_type() == TypeConstant && !is_packed && !row_major_matrix_needs_conversion)
  7588. {
  7589. auto &c = get<SPIRConstant>(index);
  7590. if (c.specialization)
  7591. {
  7592. // If the index is a spec constant, we cannot turn extract into a swizzle.
  7593. expr += join("[", to_expression(index), "]");
  7594. }
  7595. else
  7596. {
  7597. expr += ".";
  7598. expr += index_to_swizzle(c.scalar());
  7599. }
  7600. }
  7601. else if (is_literal)
  7602. {
  7603. // For packed vectors, we can only access them as an array, not by swizzle.
  7604. expr += join("[", index, "]");
  7605. }
  7606. else
  7607. {
  7608. expr += "[";
  7609. expr += to_expression(index, register_expression_read);
  7610. expr += "]";
  7611. }
  7612. if (row_major_matrix_needs_conversion)
  7613. {
  7614. prepare_access_chain_for_scalar_access(expr, get<SPIRType>(type->parent_type), effective_storage,
  7615. is_packed);
  7616. }
  7617. expr += deferred_index;
  7618. row_major_matrix_needs_conversion = false;
  7619. is_packed = false;
  7620. physical_type = 0;
  7621. type_id = type->parent_type;
  7622. type = &get<SPIRType>(type_id);
  7623. }
  7624. else if (!backend.allow_truncated_access_chain)
  7625. SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
  7626. }
  7627. if (pending_array_enclose)
  7628. {
  7629. SPIRV_CROSS_THROW("Flattening of multidimensional arrays were enabled, "
  7630. "but the access chain was terminated in the middle of a multidimensional array. "
  7631. "This is not supported.");
  7632. }
  7633. if (meta)
  7634. {
  7635. meta->need_transpose = row_major_matrix_needs_conversion;
  7636. meta->storage_is_packed = is_packed;
  7637. meta->storage_is_invariant = is_invariant;
  7638. meta->storage_physical_type = physical_type;
  7639. }
  7640. return expr;
  7641. }
  7642. void CompilerGLSL::prepare_access_chain_for_scalar_access(std::string &, const SPIRType &, spv::StorageClass, bool &)
  7643. {
  7644. }
  7645. string CompilerGLSL::to_flattened_struct_member(const string &basename, const SPIRType &type, uint32_t index)
  7646. {
  7647. auto ret = join(basename, "_", to_member_name(type, index));
  7648. ParsedIR::sanitize_underscores(ret);
  7649. return ret;
  7650. }
  7651. string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type,
  7652. AccessChainMeta *meta, bool ptr_chain)
  7653. {
  7654. if (flattened_buffer_blocks.count(base))
  7655. {
  7656. uint32_t matrix_stride = 0;
  7657. uint32_t array_stride = 0;
  7658. bool need_transpose = false;
  7659. flattened_access_chain_offset(expression_type(base), indices, count, 0, 16, &need_transpose, &matrix_stride,
  7660. &array_stride, ptr_chain);
  7661. if (meta)
  7662. {
  7663. meta->need_transpose = target_type.columns > 1 && need_transpose;
  7664. meta->storage_is_packed = false;
  7665. }
  7666. return flattened_access_chain(base, indices, count, target_type, 0, matrix_stride, array_stride,
  7667. need_transpose);
  7668. }
  7669. else if (flattened_structs.count(base) && count > 0)
  7670. {
  7671. AccessChainFlags flags = ACCESS_CHAIN_CHAIN_ONLY_BIT | ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT;
  7672. if (ptr_chain)
  7673. flags |= ACCESS_CHAIN_PTR_CHAIN_BIT;
  7674. if (flattened_structs[base])
  7675. {
  7676. flags |= ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT;
  7677. if (meta)
  7678. meta->flattened_struct = target_type.basetype == SPIRType::Struct;
  7679. }
  7680. auto chain = access_chain_internal(base, indices, count, flags, nullptr).substr(1);
  7681. if (meta)
  7682. {
  7683. meta->need_transpose = false;
  7684. meta->storage_is_packed = false;
  7685. }
  7686. auto basename = to_flattened_access_chain_expression(base);
  7687. auto ret = join(basename, "_", chain);
  7688. ParsedIR::sanitize_underscores(ret);
  7689. return ret;
  7690. }
  7691. else
  7692. {
  7693. AccessChainFlags flags = ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT;
  7694. if (ptr_chain)
  7695. flags |= ACCESS_CHAIN_PTR_CHAIN_BIT;
  7696. return access_chain_internal(base, indices, count, flags, meta);
  7697. }
  7698. }
  7699. string CompilerGLSL::load_flattened_struct(const string &basename, const SPIRType &type)
  7700. {
  7701. auto expr = type_to_glsl_constructor(type);
  7702. expr += '(';
  7703. for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
  7704. {
  7705. if (i)
  7706. expr += ", ";
  7707. auto &member_type = get<SPIRType>(type.member_types[i]);
  7708. if (member_type.basetype == SPIRType::Struct)
  7709. expr += load_flattened_struct(to_flattened_struct_member(basename, type, i), member_type);
  7710. else
  7711. expr += to_flattened_struct_member(basename, type, i);
  7712. }
  7713. expr += ')';
  7714. return expr;
  7715. }
  7716. std::string CompilerGLSL::to_flattened_access_chain_expression(uint32_t id)
  7717. {
  7718. // Do not use to_expression as that will unflatten access chains.
  7719. string basename;
  7720. if (const auto *var = maybe_get<SPIRVariable>(id))
  7721. basename = to_name(var->self);
  7722. else if (const auto *expr = maybe_get<SPIRExpression>(id))
  7723. basename = expr->expression;
  7724. else
  7725. basename = to_expression(id);
  7726. return basename;
  7727. }
  7728. void CompilerGLSL::store_flattened_struct(const string &basename, uint32_t rhs_id, const SPIRType &type,
  7729. const SmallVector<uint32_t> &indices)
  7730. {
  7731. SmallVector<uint32_t> sub_indices = indices;
  7732. sub_indices.push_back(0);
  7733. auto *member_type = &type;
  7734. for (auto &index : indices)
  7735. member_type = &get<SPIRType>(member_type->member_types[index]);
  7736. for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++)
  7737. {
  7738. sub_indices.back() = i;
  7739. auto lhs = join(basename, "_", to_member_name(*member_type, i));
  7740. ParsedIR::sanitize_underscores(lhs);
  7741. if (get<SPIRType>(member_type->member_types[i]).basetype == SPIRType::Struct)
  7742. {
  7743. store_flattened_struct(lhs, rhs_id, type, sub_indices);
  7744. }
  7745. else
  7746. {
  7747. auto rhs = to_expression(rhs_id) + to_multi_member_reference(type, sub_indices);
  7748. statement(lhs, " = ", rhs, ";");
  7749. }
  7750. }
  7751. }
  7752. void CompilerGLSL::store_flattened_struct(uint32_t lhs_id, uint32_t value)
  7753. {
  7754. auto &type = expression_type(lhs_id);
  7755. auto basename = to_flattened_access_chain_expression(lhs_id);
  7756. store_flattened_struct(basename, value, type, {});
  7757. }
  7758. std::string CompilerGLSL::flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count,
  7759. const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride,
  7760. uint32_t /* array_stride */, bool need_transpose)
  7761. {
  7762. if (!target_type.array.empty())
  7763. SPIRV_CROSS_THROW("Access chains that result in an array can not be flattened");
  7764. else if (target_type.basetype == SPIRType::Struct)
  7765. return flattened_access_chain_struct(base, indices, count, target_type, offset);
  7766. else if (target_type.columns > 1)
  7767. return flattened_access_chain_matrix(base, indices, count, target_type, offset, matrix_stride, need_transpose);
  7768. else
  7769. return flattened_access_chain_vector(base, indices, count, target_type, offset, matrix_stride, need_transpose);
  7770. }
  7771. std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count,
  7772. const SPIRType &target_type, uint32_t offset)
  7773. {
  7774. std::string expr;
  7775. expr += type_to_glsl_constructor(target_type);
  7776. expr += "(";
  7777. for (uint32_t i = 0; i < uint32_t(target_type.member_types.size()); ++i)
  7778. {
  7779. if (i != 0)
  7780. expr += ", ";
  7781. const SPIRType &member_type = get<SPIRType>(target_type.member_types[i]);
  7782. uint32_t member_offset = type_struct_member_offset(target_type, i);
  7783. // The access chain terminates at the struct, so we need to find matrix strides and row-major information
  7784. // ahead of time.
  7785. bool need_transpose = false;
  7786. uint32_t matrix_stride = 0;
  7787. if (member_type.columns > 1)
  7788. {
  7789. need_transpose = combined_decoration_for_member(target_type, i).get(DecorationRowMajor);
  7790. matrix_stride = type_struct_member_matrix_stride(target_type, i);
  7791. }
  7792. auto tmp = flattened_access_chain(base, indices, count, member_type, offset + member_offset, matrix_stride,
  7793. 0 /* array_stride */, need_transpose);
  7794. // Cannot forward transpositions, so resolve them here.
  7795. if (need_transpose)
  7796. expr += convert_row_major_matrix(tmp, member_type, 0, false);
  7797. else
  7798. expr += tmp;
  7799. }
  7800. expr += ")";
  7801. return expr;
  7802. }
  7803. std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count,
  7804. const SPIRType &target_type, uint32_t offset,
  7805. uint32_t matrix_stride, bool need_transpose)
  7806. {
  7807. assert(matrix_stride);
  7808. SPIRType tmp_type = target_type;
  7809. if (need_transpose)
  7810. swap(tmp_type.vecsize, tmp_type.columns);
  7811. std::string expr;
  7812. expr += type_to_glsl_constructor(tmp_type);
  7813. expr += "(";
  7814. for (uint32_t i = 0; i < tmp_type.columns; i++)
  7815. {
  7816. if (i != 0)
  7817. expr += ", ";
  7818. expr += flattened_access_chain_vector(base, indices, count, tmp_type, offset + i * matrix_stride, matrix_stride,
  7819. /* need_transpose= */ false);
  7820. }
  7821. expr += ")";
  7822. return expr;
  7823. }
  7824. std::string CompilerGLSL::flattened_access_chain_vector(uint32_t base, const uint32_t *indices, uint32_t count,
  7825. const SPIRType &target_type, uint32_t offset,
  7826. uint32_t matrix_stride, bool need_transpose)
  7827. {
  7828. auto result = flattened_access_chain_offset(expression_type(base), indices, count, offset, 16);
  7829. auto buffer_name = to_name(expression_type(base).self);
  7830. if (need_transpose)
  7831. {
  7832. std::string expr;
  7833. if (target_type.vecsize > 1)
  7834. {
  7835. expr += type_to_glsl_constructor(target_type);
  7836. expr += "(";
  7837. }
  7838. for (uint32_t i = 0; i < target_type.vecsize; ++i)
  7839. {
  7840. if (i != 0)
  7841. expr += ", ";
  7842. uint32_t component_offset = result.second + i * matrix_stride;
  7843. assert(component_offset % (target_type.width / 8) == 0);
  7844. uint32_t index = component_offset / (target_type.width / 8);
  7845. expr += buffer_name;
  7846. expr += "[";
  7847. expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
  7848. expr += convert_to_string(index / 4);
  7849. expr += "]";
  7850. expr += vector_swizzle(1, index % 4);
  7851. }
  7852. if (target_type.vecsize > 1)
  7853. {
  7854. expr += ")";
  7855. }
  7856. return expr;
  7857. }
  7858. else
  7859. {
  7860. assert(result.second % (target_type.width / 8) == 0);
  7861. uint32_t index = result.second / (target_type.width / 8);
  7862. std::string expr;
  7863. expr += buffer_name;
  7864. expr += "[";
  7865. expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
  7866. expr += convert_to_string(index / 4);
  7867. expr += "]";
  7868. expr += vector_swizzle(target_type.vecsize, index % 4);
  7869. return expr;
  7870. }
  7871. }
  7872. std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(
  7873. const SPIRType &basetype, const uint32_t *indices, uint32_t count, uint32_t offset, uint32_t word_stride,
  7874. bool *need_transpose, uint32_t *out_matrix_stride, uint32_t *out_array_stride, bool ptr_chain)
  7875. {
  7876. // Start traversing type hierarchy at the proper non-pointer types.
  7877. const auto *type = &get_pointee_type(basetype);
  7878. std::string expr;
  7879. // Inherit matrix information in case we are access chaining a vector which might have come from a row major layout.
  7880. bool row_major_matrix_needs_conversion = need_transpose ? *need_transpose : false;
  7881. uint32_t matrix_stride = out_matrix_stride ? *out_matrix_stride : 0;
  7882. uint32_t array_stride = out_array_stride ? *out_array_stride : 0;
  7883. for (uint32_t i = 0; i < count; i++)
  7884. {
  7885. uint32_t index = indices[i];
  7886. // Pointers
  7887. if (ptr_chain && i == 0)
  7888. {
  7889. // Here, the pointer type will be decorated with an array stride.
  7890. array_stride = get_decoration(basetype.self, DecorationArrayStride);
  7891. if (!array_stride)
  7892. SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block.");
  7893. auto *constant = maybe_get<SPIRConstant>(index);
  7894. if (constant)
  7895. {
  7896. // Constant array access.
  7897. offset += constant->scalar() * array_stride;
  7898. }
  7899. else
  7900. {
  7901. // Dynamic array access.
  7902. if (array_stride % word_stride)
  7903. {
  7904. SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size "
  7905. "of a 4-component vector. "
  7906. "Likely culprit here is a float or vec2 array inside a push "
  7907. "constant block which is std430. "
  7908. "This cannot be flattened. Try using std140 layout instead.");
  7909. }
  7910. expr += to_enclosed_expression(index);
  7911. expr += " * ";
  7912. expr += convert_to_string(array_stride / word_stride);
  7913. expr += " + ";
  7914. }
  7915. }
  7916. // Arrays
  7917. else if (!type->array.empty())
  7918. {
  7919. auto *constant = maybe_get<SPIRConstant>(index);
  7920. if (constant)
  7921. {
  7922. // Constant array access.
  7923. offset += constant->scalar() * array_stride;
  7924. }
  7925. else
  7926. {
  7927. // Dynamic array access.
  7928. if (array_stride % word_stride)
  7929. {
  7930. SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size "
  7931. "of a 4-component vector. "
  7932. "Likely culprit here is a float or vec2 array inside a push "
  7933. "constant block which is std430. "
  7934. "This cannot be flattened. Try using std140 layout instead.");
  7935. }
  7936. expr += to_enclosed_expression(index, false);
  7937. expr += " * ";
  7938. expr += convert_to_string(array_stride / word_stride);
  7939. expr += " + ";
  7940. }
  7941. uint32_t parent_type = type->parent_type;
  7942. type = &get<SPIRType>(parent_type);
  7943. if (!type->array.empty())
  7944. array_stride = get_decoration(parent_type, DecorationArrayStride);
  7945. }
  7946. // For structs, the index refers to a constant, which indexes into the members.
  7947. // We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
  7948. else if (type->basetype == SPIRType::Struct)
  7949. {
  7950. index = evaluate_constant_u32(index);
  7951. if (index >= type->member_types.size())
  7952. SPIRV_CROSS_THROW("Member index is out of bounds!");
  7953. offset += type_struct_member_offset(*type, index);
  7954. auto &struct_type = *type;
  7955. type = &get<SPIRType>(type->member_types[index]);
  7956. if (type->columns > 1)
  7957. {
  7958. matrix_stride = type_struct_member_matrix_stride(struct_type, index);
  7959. row_major_matrix_needs_conversion =
  7960. combined_decoration_for_member(struct_type, index).get(DecorationRowMajor);
  7961. }
  7962. else
  7963. row_major_matrix_needs_conversion = false;
  7964. if (!type->array.empty())
  7965. array_stride = type_struct_member_array_stride(struct_type, index);
  7966. }
  7967. // Matrix -> Vector
  7968. else if (type->columns > 1)
  7969. {
  7970. auto *constant = maybe_get<SPIRConstant>(index);
  7971. if (constant)
  7972. {
  7973. index = evaluate_constant_u32(index);
  7974. offset += index * (row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride);
  7975. }
  7976. else
  7977. {
  7978. uint32_t indexing_stride = row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride;
  7979. // Dynamic array access.
  7980. if (indexing_stride % word_stride)
  7981. {
  7982. SPIRV_CROSS_THROW("Matrix stride for dynamic indexing must be divisible by the size of a "
  7983. "4-component vector. "
  7984. "Likely culprit here is a row-major matrix being accessed dynamically. "
  7985. "This cannot be flattened. Try using std140 layout instead.");
  7986. }
  7987. expr += to_enclosed_expression(index, false);
  7988. expr += " * ";
  7989. expr += convert_to_string(indexing_stride / word_stride);
  7990. expr += " + ";
  7991. }
  7992. type = &get<SPIRType>(type->parent_type);
  7993. }
  7994. // Vector -> Scalar
  7995. else if (type->vecsize > 1)
  7996. {
  7997. auto *constant = maybe_get<SPIRConstant>(index);
  7998. if (constant)
  7999. {
  8000. index = evaluate_constant_u32(index);
  8001. offset += index * (row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8));
  8002. }
  8003. else
  8004. {
  8005. uint32_t indexing_stride = row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8);
  8006. // Dynamic array access.
  8007. if (indexing_stride % word_stride)
  8008. {
  8009. SPIRV_CROSS_THROW("Stride for dynamic vector indexing must be divisible by the "
  8010. "size of a 4-component vector. "
  8011. "This cannot be flattened in legacy targets.");
  8012. }
  8013. expr += to_enclosed_expression(index, false);
  8014. expr += " * ";
  8015. expr += convert_to_string(indexing_stride / word_stride);
  8016. expr += " + ";
  8017. }
  8018. type = &get<SPIRType>(type->parent_type);
  8019. }
  8020. else
  8021. SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
  8022. }
  8023. if (need_transpose)
  8024. *need_transpose = row_major_matrix_needs_conversion;
  8025. if (out_matrix_stride)
  8026. *out_matrix_stride = matrix_stride;
  8027. if (out_array_stride)
  8028. *out_array_stride = array_stride;
  8029. return std::make_pair(expr, offset);
  8030. }
  8031. bool CompilerGLSL::should_dereference(uint32_t id)
  8032. {
  8033. const auto &type = expression_type(id);
  8034. // Non-pointer expressions don't need to be dereferenced.
  8035. if (!type.pointer)
  8036. return false;
  8037. // Handles shouldn't be dereferenced either.
  8038. if (!expression_is_lvalue(id))
  8039. return false;
  8040. // If id is a variable but not a phi variable, we should not dereference it.
  8041. if (auto *var = maybe_get<SPIRVariable>(id))
  8042. return var->phi_variable;
  8043. // If id is an access chain, we should not dereference it.
  8044. if (auto *expr = maybe_get<SPIRExpression>(id))
  8045. return !expr->access_chain;
  8046. // Otherwise, we should dereference this pointer expression.
  8047. return true;
  8048. }
  8049. bool CompilerGLSL::should_forward(uint32_t id) const
  8050. {
  8051. // If id is a variable we will try to forward it regardless of force_temporary check below
  8052. // This is important because otherwise we'll get local sampler copies (highp sampler2D foo = bar) that are invalid in OpenGL GLSL
  8053. auto *var = maybe_get<SPIRVariable>(id);
  8054. if (var && var->forwardable)
  8055. return true;
  8056. // For debugging emit temporary variables for all expressions
  8057. if (options.force_temporary)
  8058. return false;
  8059. // Immutable expression can always be forwarded.
  8060. if (is_immutable(id))
  8061. return true;
  8062. return false;
  8063. }
  8064. bool CompilerGLSL::should_suppress_usage_tracking(uint32_t id) const
  8065. {
  8066. // Used only by opcodes which don't do any real "work", they just swizzle data in some fashion.
  8067. return !expression_is_forwarded(id) || expression_suppresses_usage_tracking(id);
  8068. }
  8069. void CompilerGLSL::track_expression_read(uint32_t id)
  8070. {
  8071. switch (ir.ids[id].get_type())
  8072. {
  8073. case TypeExpression:
  8074. {
  8075. auto &e = get<SPIRExpression>(id);
  8076. for (auto implied_read : e.implied_read_expressions)
  8077. track_expression_read(implied_read);
  8078. break;
  8079. }
  8080. case TypeAccessChain:
  8081. {
  8082. auto &e = get<SPIRAccessChain>(id);
  8083. for (auto implied_read : e.implied_read_expressions)
  8084. track_expression_read(implied_read);
  8085. break;
  8086. }
  8087. default:
  8088. break;
  8089. }
  8090. // If we try to read a forwarded temporary more than once we will stamp out possibly complex code twice.
  8091. // In this case, it's better to just bind the complex expression to the temporary and read that temporary twice.
  8092. if (expression_is_forwarded(id) && !expression_suppresses_usage_tracking(id))
  8093. {
  8094. auto &v = expression_usage_counts[id];
  8095. v++;
  8096. // If we create an expression outside a loop,
  8097. // but access it inside a loop, we're implicitly reading it multiple times.
  8098. // If the expression in question is expensive, we should hoist it out to avoid relying on loop-invariant code motion
  8099. // working inside the backend compiler.
  8100. if (expression_read_implies_multiple_reads(id))
  8101. v++;
  8102. if (v >= 2)
  8103. {
  8104. //if (v == 2)
  8105. // fprintf(stderr, "ID %u was forced to temporary due to more than 1 expression use!\n", id);
  8106. forced_temporaries.insert(id);
  8107. // Force a recompile after this pass to avoid forwarding this variable.
  8108. force_recompile();
  8109. }
  8110. }
  8111. }
  8112. bool CompilerGLSL::args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure)
  8113. {
  8114. if (forced_temporaries.find(id) != end(forced_temporaries))
  8115. return false;
  8116. for (uint32_t i = 0; i < num_args; i++)
  8117. if (!should_forward(args[i]))
  8118. return false;
  8119. // We need to forward globals as well.
  8120. if (!pure)
  8121. {
  8122. for (auto global : global_variables)
  8123. if (!should_forward(global))
  8124. return false;
  8125. for (auto aliased : aliased_variables)
  8126. if (!should_forward(aliased))
  8127. return false;
  8128. }
  8129. return true;
  8130. }
  8131. void CompilerGLSL::register_impure_function_call()
  8132. {
  8133. // Impure functions can modify globals and aliased variables, so invalidate them as well.
  8134. for (auto global : global_variables)
  8135. flush_dependees(get<SPIRVariable>(global));
  8136. for (auto aliased : aliased_variables)
  8137. flush_dependees(get<SPIRVariable>(aliased));
  8138. }
  8139. void CompilerGLSL::register_call_out_argument(uint32_t id)
  8140. {
  8141. register_write(id);
  8142. auto *var = maybe_get<SPIRVariable>(id);
  8143. if (var)
  8144. flush_variable_declaration(var->self);
  8145. }
  8146. string CompilerGLSL::variable_decl_function_local(SPIRVariable &var)
  8147. {
  8148. // These variables are always function local,
  8149. // so make sure we emit the variable without storage qualifiers.
  8150. // Some backends will inject custom variables locally in a function
  8151. // with a storage qualifier which is not function-local.
  8152. auto old_storage = var.storage;
  8153. var.storage = StorageClassFunction;
  8154. auto expr = variable_decl(var);
  8155. var.storage = old_storage;
  8156. return expr;
  8157. }
  8158. void CompilerGLSL::emit_variable_temporary_copies(const SPIRVariable &var)
  8159. {
  8160. // Ensure that we declare phi-variable copies even if the original declaration isn't deferred
  8161. if (var.allocate_temporary_copy && !flushed_phi_variables.count(var.self))
  8162. {
  8163. auto &type = get<SPIRType>(var.basetype);
  8164. auto &flags = get_decoration_bitset(var.self);
  8165. statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, join("_", var.self, "_copy")), ";");
  8166. flushed_phi_variables.insert(var.self);
  8167. }
  8168. }
  8169. void CompilerGLSL::flush_variable_declaration(uint32_t id)
  8170. {
  8171. // Ensure that we declare phi-variable copies even if the original declaration isn't deferred
  8172. auto *var = maybe_get<SPIRVariable>(id);
  8173. if (var && var->deferred_declaration)
  8174. {
  8175. string initializer;
  8176. if (options.force_zero_initialized_variables &&
  8177. (var->storage == StorageClassFunction || var->storage == StorageClassGeneric ||
  8178. var->storage == StorageClassPrivate) &&
  8179. !var->initializer && type_can_zero_initialize(get_variable_data_type(*var)))
  8180. {
  8181. initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(*var)));
  8182. }
  8183. statement(variable_decl_function_local(*var), initializer, ";");
  8184. var->deferred_declaration = false;
  8185. }
  8186. if (var)
  8187. {
  8188. emit_variable_temporary_copies(*var);
  8189. }
  8190. }
  8191. bool CompilerGLSL::remove_duplicate_swizzle(string &op)
  8192. {
  8193. auto pos = op.find_last_of('.');
  8194. if (pos == string::npos || pos == 0)
  8195. return false;
  8196. string final_swiz = op.substr(pos + 1, string::npos);
  8197. if (backend.swizzle_is_function)
  8198. {
  8199. if (final_swiz.size() < 2)
  8200. return false;
  8201. if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()")
  8202. final_swiz.erase(final_swiz.size() - 2, string::npos);
  8203. else
  8204. return false;
  8205. }
  8206. // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
  8207. // If so, and previous swizzle is of same length,
  8208. // we can drop the final swizzle altogether.
  8209. for (uint32_t i = 0; i < final_swiz.size(); i++)
  8210. {
  8211. static const char expected[] = { 'x', 'y', 'z', 'w' };
  8212. if (i >= 4 || final_swiz[i] != expected[i])
  8213. return false;
  8214. }
  8215. auto prevpos = op.find_last_of('.', pos - 1);
  8216. if (prevpos == string::npos)
  8217. return false;
  8218. prevpos++;
  8219. // Make sure there are only swizzles here ...
  8220. for (auto i = prevpos; i < pos; i++)
  8221. {
  8222. if (op[i] < 'w' || op[i] > 'z')
  8223. {
  8224. // If swizzles are foo.xyz() like in C++ backend for example, check for that.
  8225. if (backend.swizzle_is_function && i + 2 == pos && op[i] == '(' && op[i + 1] == ')')
  8226. break;
  8227. return false;
  8228. }
  8229. }
  8230. // If original swizzle is large enough, just carve out the components we need.
  8231. // E.g. foobar.wyx.xy will turn into foobar.wy.
  8232. if (pos - prevpos >= final_swiz.size())
  8233. {
  8234. op.erase(prevpos + final_swiz.size(), string::npos);
  8235. // Add back the function call ...
  8236. if (backend.swizzle_is_function)
  8237. op += "()";
  8238. }
  8239. return true;
  8240. }
  8241. // Optimizes away vector swizzles where we have something like
  8242. // vec3 foo;
  8243. // foo.xyz <-- swizzle expression does nothing.
  8244. // This is a very common pattern after OpCompositeCombine.
  8245. bool CompilerGLSL::remove_unity_swizzle(uint32_t base, string &op)
  8246. {
  8247. auto pos = op.find_last_of('.');
  8248. if (pos == string::npos || pos == 0)
  8249. return false;
  8250. string final_swiz = op.substr(pos + 1, string::npos);
  8251. if (backend.swizzle_is_function)
  8252. {
  8253. if (final_swiz.size() < 2)
  8254. return false;
  8255. if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()")
  8256. final_swiz.erase(final_swiz.size() - 2, string::npos);
  8257. else
  8258. return false;
  8259. }
  8260. // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
  8261. // If so, and previous swizzle is of same length,
  8262. // we can drop the final swizzle altogether.
  8263. for (uint32_t i = 0; i < final_swiz.size(); i++)
  8264. {
  8265. static const char expected[] = { 'x', 'y', 'z', 'w' };
  8266. if (i >= 4 || final_swiz[i] != expected[i])
  8267. return false;
  8268. }
  8269. auto &type = expression_type(base);
  8270. // Sanity checking ...
  8271. assert(type.columns == 1 && type.array.empty());
  8272. if (type.vecsize == final_swiz.size())
  8273. op.erase(pos, string::npos);
  8274. return true;
  8275. }
  8276. string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32_t *elems, uint32_t length)
  8277. {
  8278. ID base = 0;
  8279. string op;
  8280. string subop;
  8281. // Can only merge swizzles for vectors.
  8282. auto &type = get<SPIRType>(return_type);
  8283. bool can_apply_swizzle_opt = type.basetype != SPIRType::Struct && type.array.empty() && type.columns == 1;
  8284. bool swizzle_optimization = false;
  8285. for (uint32_t i = 0; i < length; i++)
  8286. {
  8287. auto *e = maybe_get<SPIRExpression>(elems[i]);
  8288. // If we're merging another scalar which belongs to the same base
  8289. // object, just merge the swizzles to avoid triggering more than 1 expression read as much as possible!
  8290. if (can_apply_swizzle_opt && e && e->base_expression && e->base_expression == base)
  8291. {
  8292. // Only supposed to be used for vector swizzle -> scalar.
  8293. assert(!e->expression.empty() && e->expression.front() == '.');
  8294. subop += e->expression.substr(1, string::npos);
  8295. swizzle_optimization = true;
  8296. }
  8297. else
  8298. {
  8299. // We'll likely end up with duplicated swizzles, e.g.
  8300. // foobar.xyz.xyz from patterns like
  8301. // OpVectorShuffle
  8302. // OpCompositeExtract x 3
  8303. // OpCompositeConstruct 3x + other scalar.
  8304. // Just modify op in-place.
  8305. if (swizzle_optimization)
  8306. {
  8307. if (backend.swizzle_is_function)
  8308. subop += "()";
  8309. // Don't attempt to remove unity swizzling if we managed to remove duplicate swizzles.
  8310. // The base "foo" might be vec4, while foo.xyz is vec3 (OpVectorShuffle) and looks like a vec3 due to the .xyz tacked on.
  8311. // We only want to remove the swizzles if we're certain that the resulting base will be the same vecsize.
  8312. // Essentially, we can only remove one set of swizzles, since that's what we have control over ...
  8313. // Case 1:
  8314. // foo.yxz.xyz: Duplicate swizzle kicks in, giving foo.yxz, we are done.
  8315. // foo.yxz was the result of OpVectorShuffle and we don't know the type of foo.
  8316. // Case 2:
  8317. // foo.xyz: Duplicate swizzle won't kick in.
  8318. // If foo is vec3, we can remove xyz, giving just foo.
  8319. if (!remove_duplicate_swizzle(subop))
  8320. remove_unity_swizzle(base, subop);
  8321. // Strips away redundant parens if we created them during component extraction.
  8322. strip_enclosed_expression(subop);
  8323. swizzle_optimization = false;
  8324. op += subop;
  8325. }
  8326. else
  8327. op += subop;
  8328. if (i)
  8329. op += ", ";
  8330. bool uses_buffer_offset =
  8331. type.basetype == SPIRType::Struct && has_member_decoration(type.self, i, DecorationOffset);
  8332. subop = to_composite_constructor_expression(elems[i], uses_buffer_offset);
  8333. }
  8334. base = e ? e->base_expression : ID(0);
  8335. }
  8336. if (swizzle_optimization)
  8337. {
  8338. if (backend.swizzle_is_function)
  8339. subop += "()";
  8340. if (!remove_duplicate_swizzle(subop))
  8341. remove_unity_swizzle(base, subop);
  8342. // Strips away redundant parens if we created them during component extraction.
  8343. strip_enclosed_expression(subop);
  8344. }
  8345. op += subop;
  8346. return op;
  8347. }
  8348. bool CompilerGLSL::skip_argument(uint32_t id) const
  8349. {
  8350. if (!combined_image_samplers.empty() || !options.vulkan_semantics)
  8351. {
  8352. auto &type = expression_type(id);
  8353. if (type.basetype == SPIRType::Sampler || (type.basetype == SPIRType::Image && type.image.sampled == 1))
  8354. return true;
  8355. }
  8356. return false;
  8357. }
  8358. bool CompilerGLSL::optimize_read_modify_write(const SPIRType &type, const string &lhs, const string &rhs)
  8359. {
  8360. // Do this with strings because we have a very clear pattern we can check for and it avoids
  8361. // adding lots of special cases to the code emission.
  8362. if (rhs.size() < lhs.size() + 3)
  8363. return false;
  8364. // Do not optimize matrices. They are a bit awkward to reason about in general
  8365. // (in which order does operation happen?), and it does not work on MSL anyways.
  8366. if (type.vecsize > 1 && type.columns > 1)
  8367. return false;
  8368. auto index = rhs.find(lhs);
  8369. if (index != 0)
  8370. return false;
  8371. // TODO: Shift operators, but it's not important for now.
  8372. auto op = rhs.find_first_of("+-/*%|&^", lhs.size() + 1);
  8373. if (op != lhs.size() + 1)
  8374. return false;
  8375. // Check that the op is followed by space. This excludes && and ||.
  8376. if (rhs[op + 1] != ' ')
  8377. return false;
  8378. char bop = rhs[op];
  8379. auto expr = rhs.substr(lhs.size() + 3);
  8380. // Try to find increments and decrements. Makes it look neater as += 1, -= 1 is fairly rare to see in real code.
  8381. // Find some common patterns which are equivalent.
  8382. if ((bop == '+' || bop == '-') && (expr == "1" || expr == "uint(1)" || expr == "1u" || expr == "int(1u)"))
  8383. statement(lhs, bop, bop, ";");
  8384. else
  8385. statement(lhs, " ", bop, "= ", expr, ";");
  8386. return true;
  8387. }
  8388. void CompilerGLSL::register_control_dependent_expression(uint32_t expr)
  8389. {
  8390. if (forwarded_temporaries.find(expr) == end(forwarded_temporaries))
  8391. return;
  8392. assert(current_emitting_block);
  8393. current_emitting_block->invalidate_expressions.push_back(expr);
  8394. }
  8395. void CompilerGLSL::emit_block_instructions(SPIRBlock &block)
  8396. {
  8397. current_emitting_block = &block;
  8398. for (auto &op : block.ops)
  8399. emit_instruction(op);
  8400. current_emitting_block = nullptr;
  8401. }
  8402. void CompilerGLSL::disallow_forwarding_in_expression_chain(const SPIRExpression &expr)
  8403. {
  8404. // Allow trivially forwarded expressions like OpLoad or trivial shuffles,
  8405. // these will be marked as having suppressed usage tracking.
  8406. // Our only concern is to make sure arithmetic operations are done in similar ways.
  8407. if (expression_is_forwarded(expr.self) && !expression_suppresses_usage_tracking(expr.self) &&
  8408. forced_invariant_temporaries.count(expr.self) == 0)
  8409. {
  8410. forced_temporaries.insert(expr.self);
  8411. forced_invariant_temporaries.insert(expr.self);
  8412. force_recompile();
  8413. for (auto &dependent : expr.expression_dependencies)
  8414. disallow_forwarding_in_expression_chain(get<SPIRExpression>(dependent));
  8415. }
  8416. }
  8417. void CompilerGLSL::handle_store_to_invariant_variable(uint32_t store_id, uint32_t value_id)
  8418. {
  8419. // Variables or access chains marked invariant are complicated. We will need to make sure the code-gen leading up to
  8420. // this variable is consistent. The failure case for SPIRV-Cross is when an expression is forced to a temporary
  8421. // in one translation unit, but not another, e.g. due to multiple use of an expression.
  8422. // This causes variance despite the output variable being marked invariant, so the solution here is to force all dependent
  8423. // expressions to be temporaries.
  8424. // It is uncertain if this is enough to support invariant in all possible cases, but it should be good enough
  8425. // for all reasonable uses of invariant.
  8426. if (!has_decoration(store_id, DecorationInvariant))
  8427. return;
  8428. auto *expr = maybe_get<SPIRExpression>(value_id);
  8429. if (!expr)
  8430. return;
  8431. disallow_forwarding_in_expression_chain(*expr);
  8432. }
  8433. void CompilerGLSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression)
  8434. {
  8435. auto rhs = to_pointer_expression(rhs_expression);
  8436. // Statements to OpStore may be empty if it is a struct with zero members. Just forward the store to /dev/null.
  8437. if (!rhs.empty())
  8438. {
  8439. handle_store_to_invariant_variable(lhs_expression, rhs_expression);
  8440. auto lhs = to_dereferenced_expression(lhs_expression);
  8441. // We might need to cast in order to store to a builtin.
  8442. cast_to_builtin_store(lhs_expression, rhs, expression_type(rhs_expression));
  8443. // Tries to optimize assignments like "<lhs> = <lhs> op expr".
  8444. // While this is purely cosmetic, this is important for legacy ESSL where loop
  8445. // variable increments must be in either i++ or i += const-expr.
  8446. // Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0.
  8447. if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs))
  8448. statement(lhs, " = ", rhs, ";");
  8449. register_write(lhs_expression);
  8450. }
  8451. }
  8452. uint32_t CompilerGLSL::get_integer_width_for_instruction(const Instruction &instr) const
  8453. {
  8454. if (instr.length < 3)
  8455. return 32;
  8456. auto *ops = stream(instr);
  8457. switch (instr.op)
  8458. {
  8459. case OpSConvert:
  8460. case OpConvertSToF:
  8461. case OpUConvert:
  8462. case OpConvertUToF:
  8463. case OpIEqual:
  8464. case OpINotEqual:
  8465. case OpSLessThan:
  8466. case OpSLessThanEqual:
  8467. case OpSGreaterThan:
  8468. case OpSGreaterThanEqual:
  8469. case OpULessThan:
  8470. case OpULessThanEqual:
  8471. case OpUGreaterThan:
  8472. case OpUGreaterThanEqual:
  8473. return expression_type(ops[2]).width;
  8474. default:
  8475. {
  8476. // We can look at result type which is more robust.
  8477. auto *type = maybe_get<SPIRType>(ops[0]);
  8478. if (type && type_is_integral(*type))
  8479. return type->width;
  8480. else
  8481. return 32;
  8482. }
  8483. }
  8484. }
  8485. uint32_t CompilerGLSL::get_integer_width_for_glsl_instruction(GLSLstd450 op, const uint32_t *ops, uint32_t length) const
  8486. {
  8487. if (length < 1)
  8488. return 32;
  8489. switch (op)
  8490. {
  8491. case GLSLstd450SAbs:
  8492. case GLSLstd450SSign:
  8493. case GLSLstd450UMin:
  8494. case GLSLstd450SMin:
  8495. case GLSLstd450UMax:
  8496. case GLSLstd450SMax:
  8497. case GLSLstd450UClamp:
  8498. case GLSLstd450SClamp:
  8499. case GLSLstd450FindSMsb:
  8500. case GLSLstd450FindUMsb:
  8501. return expression_type(ops[0]).width;
  8502. default:
  8503. {
  8504. // We don't need to care about other opcodes, just return 32.
  8505. return 32;
  8506. }
  8507. }
  8508. }
  8509. void CompilerGLSL::emit_instruction(const Instruction &instruction)
  8510. {
  8511. auto ops = stream(instruction);
  8512. auto opcode = static_cast<Op>(instruction.op);
  8513. uint32_t length = instruction.length;
  8514. #define GLSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op)
  8515. #define GLSL_BOP_CAST(op, type) \
  8516. emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
  8517. #define GLSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op)
  8518. #define GLSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op)
  8519. #define GLSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op)
  8520. #define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
  8521. #define GLSL_BFOP_CAST(op, type) \
  8522. emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
  8523. #define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
  8524. #define GLSL_UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op)
  8525. // If we need to do implicit bitcasts, make sure we do it with the correct type.
  8526. uint32_t integer_width = get_integer_width_for_instruction(instruction);
  8527. auto int_type = to_signed_basetype(integer_width);
  8528. auto uint_type = to_unsigned_basetype(integer_width);
  8529. switch (opcode)
  8530. {
  8531. // Dealing with memory
  8532. case OpLoad:
  8533. {
  8534. uint32_t result_type = ops[0];
  8535. uint32_t id = ops[1];
  8536. uint32_t ptr = ops[2];
  8537. flush_variable_declaration(ptr);
  8538. // If we're loading from memory that cannot be changed by the shader,
  8539. // just forward the expression directly to avoid needless temporaries.
  8540. // If an expression is mutable and forwardable, we speculate that it is immutable.
  8541. bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries);
  8542. // If loading a non-native row-major matrix, mark the expression as need_transpose.
  8543. bool need_transpose = false;
  8544. bool old_need_transpose = false;
  8545. auto *ptr_expression = maybe_get<SPIRExpression>(ptr);
  8546. if (forward)
  8547. {
  8548. // If we're forwarding the load, we're also going to forward transpose state, so don't transpose while
  8549. // taking the expression.
  8550. if (ptr_expression && ptr_expression->need_transpose)
  8551. {
  8552. old_need_transpose = true;
  8553. ptr_expression->need_transpose = false;
  8554. need_transpose = true;
  8555. }
  8556. else if (is_non_native_row_major_matrix(ptr))
  8557. need_transpose = true;
  8558. }
  8559. // If we are forwarding this load,
  8560. // don't register the read to access chain here, defer that to when we actually use the expression,
  8561. // using the add_implied_read_expression mechanism.
  8562. string expr;
  8563. bool is_packed = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked);
  8564. bool is_remapped = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID);
  8565. if (forward || (!is_packed && !is_remapped))
  8566. {
  8567. // For the simple case, we do not need to deal with repacking.
  8568. expr = to_dereferenced_expression(ptr, false);
  8569. }
  8570. else
  8571. {
  8572. // If we are not forwarding the expression, we need to unpack and resolve any physical type remapping here before
  8573. // storing the expression to a temporary.
  8574. expr = to_unpacked_expression(ptr);
  8575. }
  8576. auto &type = get<SPIRType>(result_type);
  8577. auto &expr_type = expression_type(ptr);
  8578. // If the expression has more vector components than the result type, insert
  8579. // a swizzle. This shouldn't happen normally on valid SPIR-V, but it might
  8580. // happen with e.g. the MSL backend replacing the type of an input variable.
  8581. if (expr_type.vecsize > type.vecsize)
  8582. expr = enclose_expression(expr + vector_swizzle(type.vecsize, 0));
  8583. // We might need to cast in order to load from a builtin.
  8584. cast_from_builtin_load(ptr, expr, type);
  8585. // We might be trying to load a gl_Position[N], where we should be
  8586. // doing float4[](gl_in[i].gl_Position, ...) instead.
  8587. // Similar workarounds are required for input arrays in tessellation.
  8588. unroll_array_from_complex_load(id, ptr, expr);
  8589. // Shouldn't need to check for ID, but current glslang codegen requires it in some cases
  8590. // when loading Image/Sampler descriptors. It does not hurt to check ID as well.
  8591. if (has_decoration(id, DecorationNonUniformEXT) || has_decoration(ptr, DecorationNonUniformEXT))
  8592. {
  8593. propagate_nonuniform_qualifier(ptr);
  8594. convert_non_uniform_expression(type, expr);
  8595. }
  8596. if (forward && ptr_expression)
  8597. ptr_expression->need_transpose = old_need_transpose;
  8598. bool flattened = ptr_expression && flattened_buffer_blocks.count(ptr_expression->loaded_from) != 0;
  8599. if (backend.needs_row_major_load_workaround && !is_non_native_row_major_matrix(ptr) && !flattened)
  8600. rewrite_load_for_wrapped_row_major(expr, result_type, ptr);
  8601. // By default, suppress usage tracking since using same expression multiple times does not imply any extra work.
  8602. // However, if we try to load a complex, composite object from a flattened buffer,
  8603. // we should avoid emitting the same code over and over and lower the result to a temporary.
  8604. bool usage_tracking = flattened && (type.basetype == SPIRType::Struct || (type.columns > 1));
  8605. SPIRExpression *e = nullptr;
  8606. if (!forward && expression_is_non_value_type_array(ptr))
  8607. {
  8608. // Complicated load case where we need to make a copy of ptr, but we cannot, because
  8609. // it is an array, and our backend does not support arrays as value types.
  8610. // Emit the temporary, and copy it explicitly.
  8611. e = &emit_uninitialized_temporary_expression(result_type, id);
  8612. emit_array_copy(to_expression(id), ptr, StorageClassFunction, get_expression_effective_storage_class(ptr));
  8613. }
  8614. else
  8615. e = &emit_op(result_type, id, expr, forward, !usage_tracking);
  8616. e->need_transpose = need_transpose;
  8617. register_read(id, ptr, forward);
  8618. if (forward)
  8619. {
  8620. // Pass through whether the result is of a packed type and the physical type ID.
  8621. if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked))
  8622. set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
  8623. if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID))
  8624. {
  8625. set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID,
  8626. get_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID));
  8627. }
  8628. }
  8629. else
  8630. {
  8631. // This might have been set on an earlier compilation iteration, force it to be unset.
  8632. unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
  8633. unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
  8634. }
  8635. inherit_expression_dependencies(id, ptr);
  8636. if (forward)
  8637. add_implied_read_expression(*e, ptr);
  8638. break;
  8639. }
  8640. case OpInBoundsAccessChain:
  8641. case OpAccessChain:
  8642. case OpPtrAccessChain:
  8643. {
  8644. auto *var = maybe_get<SPIRVariable>(ops[2]);
  8645. if (var)
  8646. flush_variable_declaration(var->self);
  8647. // If the base is immutable, the access chain pointer must also be.
  8648. // If an expression is mutable and forwardable, we speculate that it is immutable.
  8649. AccessChainMeta meta;
  8650. bool ptr_chain = opcode == OpPtrAccessChain;
  8651. auto e = access_chain(ops[2], &ops[3], length - 3, get<SPIRType>(ops[0]), &meta, ptr_chain);
  8652. auto &expr = set<SPIRExpression>(ops[1], move(e), ops[0], should_forward(ops[2]));
  8653. auto *backing_variable = maybe_get_backing_variable(ops[2]);
  8654. expr.loaded_from = backing_variable ? backing_variable->self : ID(ops[2]);
  8655. expr.need_transpose = meta.need_transpose;
  8656. expr.access_chain = true;
  8657. // Mark the result as being packed. Some platforms handled packed vectors differently than non-packed.
  8658. if (meta.storage_is_packed)
  8659. set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypePacked);
  8660. if (meta.storage_physical_type != 0)
  8661. set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type);
  8662. if (meta.storage_is_invariant)
  8663. set_decoration(ops[1], DecorationInvariant);
  8664. if (meta.flattened_struct)
  8665. flattened_structs[ops[1]] = true;
  8666. // If we have some expression dependencies in our access chain, this access chain is technically a forwarded
  8667. // temporary which could be subject to invalidation.
  8668. // Need to assume we're forwarded while calling inherit_expression_depdendencies.
  8669. forwarded_temporaries.insert(ops[1]);
  8670. // The access chain itself is never forced to a temporary, but its dependencies might.
  8671. suppressed_usage_tracking.insert(ops[1]);
  8672. for (uint32_t i = 2; i < length; i++)
  8673. {
  8674. inherit_expression_dependencies(ops[1], ops[i]);
  8675. add_implied_read_expression(expr, ops[i]);
  8676. }
  8677. // If we have no dependencies after all, i.e., all indices in the access chain are immutable temporaries,
  8678. // we're not forwarded after all.
  8679. if (expr.expression_dependencies.empty())
  8680. forwarded_temporaries.erase(ops[1]);
  8681. if (has_decoration(ops[1], DecorationNonUniformEXT))
  8682. propagate_nonuniform_qualifier(ops[1]);
  8683. break;
  8684. }
  8685. case OpStore:
  8686. {
  8687. auto *var = maybe_get<SPIRVariable>(ops[0]);
  8688. if (var && var->statically_assigned)
  8689. var->static_expression = ops[1];
  8690. else if (var && var->loop_variable && !var->loop_variable_enable)
  8691. var->static_expression = ops[1];
  8692. else if (var && var->remapped_variable && var->static_expression)
  8693. {
  8694. // Skip the write.
  8695. }
  8696. else if (flattened_structs.count(ops[0]))
  8697. {
  8698. store_flattened_struct(ops[0], ops[1]);
  8699. register_write(ops[0]);
  8700. }
  8701. else
  8702. {
  8703. emit_store_statement(ops[0], ops[1]);
  8704. }
  8705. // Storing a pointer results in a variable pointer, so we must conservatively assume
  8706. // we can write through it.
  8707. if (expression_type(ops[1]).pointer)
  8708. register_write(ops[1]);
  8709. break;
  8710. }
  8711. case OpArrayLength:
  8712. {
  8713. uint32_t result_type = ops[0];
  8714. uint32_t id = ops[1];
  8715. auto e = access_chain_internal(ops[2], &ops[3], length - 3, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
  8716. set<SPIRExpression>(id, join(type_to_glsl(get<SPIRType>(result_type)), "(", e, ".length())"), result_type,
  8717. true);
  8718. break;
  8719. }
  8720. // Function calls
  8721. case OpFunctionCall:
  8722. {
  8723. uint32_t result_type = ops[0];
  8724. uint32_t id = ops[1];
  8725. uint32_t func = ops[2];
  8726. const auto *arg = &ops[3];
  8727. length -= 3;
  8728. auto &callee = get<SPIRFunction>(func);
  8729. auto &return_type = get<SPIRType>(callee.return_type);
  8730. bool pure = function_is_pure(callee);
  8731. bool callee_has_out_variables = false;
  8732. bool emit_return_value_as_argument = false;
  8733. // Invalidate out variables passed to functions since they can be OpStore'd to.
  8734. for (uint32_t i = 0; i < length; i++)
  8735. {
  8736. if (callee.arguments[i].write_count)
  8737. {
  8738. register_call_out_argument(arg[i]);
  8739. callee_has_out_variables = true;
  8740. }
  8741. flush_variable_declaration(arg[i]);
  8742. }
  8743. if (!return_type.array.empty() && !backend.can_return_array)
  8744. {
  8745. callee_has_out_variables = true;
  8746. emit_return_value_as_argument = true;
  8747. }
  8748. if (!pure)
  8749. register_impure_function_call();
  8750. string funexpr;
  8751. SmallVector<string> arglist;
  8752. funexpr += to_name(func) + "(";
  8753. if (emit_return_value_as_argument)
  8754. {
  8755. statement(type_to_glsl(return_type), " ", to_name(id), type_to_array_glsl(return_type), ";");
  8756. arglist.push_back(to_name(id));
  8757. }
  8758. for (uint32_t i = 0; i < length; i++)
  8759. {
  8760. // Do not pass in separate images or samplers if we're remapping
  8761. // to combined image samplers.
  8762. if (skip_argument(arg[i]))
  8763. continue;
  8764. arglist.push_back(to_func_call_arg(callee.arguments[i], arg[i]));
  8765. }
  8766. for (auto &combined : callee.combined_parameters)
  8767. {
  8768. auto image_id = combined.global_image ? combined.image_id : VariableID(arg[combined.image_id]);
  8769. auto sampler_id = combined.global_sampler ? combined.sampler_id : VariableID(arg[combined.sampler_id]);
  8770. arglist.push_back(to_combined_image_sampler(image_id, sampler_id));
  8771. }
  8772. append_global_func_args(callee, length, arglist);
  8773. funexpr += merge(arglist);
  8774. funexpr += ")";
  8775. // Check for function call constraints.
  8776. check_function_call_constraints(arg, length);
  8777. if (return_type.basetype != SPIRType::Void)
  8778. {
  8779. // If the function actually writes to an out variable,
  8780. // take the conservative route and do not forward.
  8781. // The problem is that we might not read the function
  8782. // result (and emit the function) before an out variable
  8783. // is read (common case when return value is ignored!
  8784. // In order to avoid start tracking invalid variables,
  8785. // just avoid the forwarding problem altogether.
  8786. bool forward = args_will_forward(id, arg, length, pure) && !callee_has_out_variables && pure &&
  8787. (forced_temporaries.find(id) == end(forced_temporaries));
  8788. if (emit_return_value_as_argument)
  8789. {
  8790. statement(funexpr, ";");
  8791. set<SPIRExpression>(id, to_name(id), result_type, true);
  8792. }
  8793. else
  8794. emit_op(result_type, id, funexpr, forward);
  8795. // Function calls are implicit loads from all variables in question.
  8796. // Set dependencies for them.
  8797. for (uint32_t i = 0; i < length; i++)
  8798. register_read(id, arg[i], forward);
  8799. // If we're going to forward the temporary result,
  8800. // put dependencies on every variable that must not change.
  8801. if (forward)
  8802. register_global_read_dependencies(callee, id);
  8803. }
  8804. else
  8805. statement(funexpr, ";");
  8806. break;
  8807. }
  8808. // Composite munging
  8809. case OpCompositeConstruct:
  8810. {
  8811. uint32_t result_type = ops[0];
  8812. uint32_t id = ops[1];
  8813. const auto *const elems = &ops[2];
  8814. length -= 2;
  8815. bool forward = true;
  8816. for (uint32_t i = 0; i < length; i++)
  8817. forward = forward && should_forward(elems[i]);
  8818. auto &out_type = get<SPIRType>(result_type);
  8819. auto *in_type = length > 0 ? &expression_type(elems[0]) : nullptr;
  8820. // Only splat if we have vector constructors.
  8821. // Arrays and structs must be initialized properly in full.
  8822. bool composite = !out_type.array.empty() || out_type.basetype == SPIRType::Struct;
  8823. bool splat = false;
  8824. bool swizzle_splat = false;
  8825. if (in_type)
  8826. {
  8827. splat = in_type->vecsize == 1 && in_type->columns == 1 && !composite && backend.use_constructor_splatting;
  8828. swizzle_splat = in_type->vecsize == 1 && in_type->columns == 1 && backend.can_swizzle_scalar;
  8829. if (ir.ids[elems[0]].get_type() == TypeConstant && !type_is_floating_point(*in_type))
  8830. {
  8831. // Cannot swizzle literal integers as a special case.
  8832. swizzle_splat = false;
  8833. }
  8834. }
  8835. if (splat || swizzle_splat)
  8836. {
  8837. uint32_t input = elems[0];
  8838. for (uint32_t i = 0; i < length; i++)
  8839. {
  8840. if (input != elems[i])
  8841. {
  8842. splat = false;
  8843. swizzle_splat = false;
  8844. }
  8845. }
  8846. }
  8847. if (out_type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline)
  8848. forward = false;
  8849. if (!out_type.array.empty() && !backend.can_declare_arrays_inline)
  8850. forward = false;
  8851. if (type_is_empty(out_type) && !backend.supports_empty_struct)
  8852. forward = false;
  8853. string constructor_op;
  8854. if (backend.use_initializer_list && composite)
  8855. {
  8856. bool needs_trailing_tracket = false;
  8857. // Only use this path if we are building composites.
  8858. // This path cannot be used for arithmetic.
  8859. if (backend.use_typed_initializer_list && out_type.basetype == SPIRType::Struct && out_type.array.empty())
  8860. constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type));
  8861. else if (backend.use_typed_initializer_list && backend.array_is_value_type && !out_type.array.empty())
  8862. {
  8863. // MSL path. Array constructor is baked into type here, do not use _constructor variant.
  8864. constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type)) + "(";
  8865. needs_trailing_tracket = true;
  8866. }
  8867. constructor_op += "{ ";
  8868. if (type_is_empty(out_type) && !backend.supports_empty_struct)
  8869. constructor_op += "0";
  8870. else if (splat)
  8871. constructor_op += to_unpacked_expression(elems[0]);
  8872. else
  8873. constructor_op += build_composite_combiner(result_type, elems, length);
  8874. constructor_op += " }";
  8875. if (needs_trailing_tracket)
  8876. constructor_op += ")";
  8877. }
  8878. else if (swizzle_splat && !composite)
  8879. {
  8880. constructor_op = remap_swizzle(get<SPIRType>(result_type), 1, to_unpacked_expression(elems[0]));
  8881. }
  8882. else
  8883. {
  8884. constructor_op = type_to_glsl_constructor(get<SPIRType>(result_type)) + "(";
  8885. if (type_is_empty(out_type) && !backend.supports_empty_struct)
  8886. constructor_op += "0";
  8887. else if (splat)
  8888. constructor_op += to_unpacked_expression(elems[0]);
  8889. else
  8890. constructor_op += build_composite_combiner(result_type, elems, length);
  8891. constructor_op += ")";
  8892. }
  8893. if (!constructor_op.empty())
  8894. {
  8895. emit_op(result_type, id, constructor_op, forward);
  8896. for (uint32_t i = 0; i < length; i++)
  8897. inherit_expression_dependencies(id, elems[i]);
  8898. }
  8899. break;
  8900. }
  8901. case OpVectorInsertDynamic:
  8902. {
  8903. uint32_t result_type = ops[0];
  8904. uint32_t id = ops[1];
  8905. uint32_t vec = ops[2];
  8906. uint32_t comp = ops[3];
  8907. uint32_t index = ops[4];
  8908. flush_variable_declaration(vec);
  8909. // Make a copy, then use access chain to store the variable.
  8910. statement(declare_temporary(result_type, id), to_expression(vec), ";");
  8911. set<SPIRExpression>(id, to_name(id), result_type, true);
  8912. auto chain = access_chain_internal(id, &index, 1, 0, nullptr);
  8913. statement(chain, " = ", to_unpacked_expression(comp), ";");
  8914. break;
  8915. }
  8916. case OpVectorExtractDynamic:
  8917. {
  8918. uint32_t result_type = ops[0];
  8919. uint32_t id = ops[1];
  8920. auto expr = access_chain_internal(ops[2], &ops[3], 1, 0, nullptr);
  8921. emit_op(result_type, id, expr, should_forward(ops[2]));
  8922. inherit_expression_dependencies(id, ops[2]);
  8923. inherit_expression_dependencies(id, ops[3]);
  8924. break;
  8925. }
  8926. case OpCompositeExtract:
  8927. {
  8928. uint32_t result_type = ops[0];
  8929. uint32_t id = ops[1];
  8930. length -= 3;
  8931. auto &type = get<SPIRType>(result_type);
  8932. // We can only split the expression here if our expression is forwarded as a temporary.
  8933. bool allow_base_expression = forced_temporaries.find(id) == end(forced_temporaries);
  8934. // Do not allow base expression for struct members. We risk doing "swizzle" optimizations in this case.
  8935. auto &composite_type = expression_type(ops[2]);
  8936. bool composite_type_is_complex = composite_type.basetype == SPIRType::Struct || !composite_type.array.empty();
  8937. if (composite_type_is_complex)
  8938. allow_base_expression = false;
  8939. // Packed expressions or physical ID mapped expressions cannot be split up.
  8940. if (has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypePacked) ||
  8941. has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypeID))
  8942. allow_base_expression = false;
  8943. // Cannot use base expression for row-major matrix row-extraction since we need to interleave access pattern
  8944. // into the base expression.
  8945. if (is_non_native_row_major_matrix(ops[2]))
  8946. allow_base_expression = false;
  8947. AccessChainMeta meta;
  8948. SPIRExpression *e = nullptr;
  8949. auto *c = maybe_get<SPIRConstant>(ops[2]);
  8950. if (c && !c->specialization && !composite_type_is_complex)
  8951. {
  8952. auto expr = to_extract_constant_composite_expression(result_type, *c, ops + 3, length);
  8953. e = &emit_op(result_type, id, expr, true, true);
  8954. }
  8955. else if (allow_base_expression && should_forward(ops[2]) && type.vecsize == 1 && type.columns == 1 && length == 1)
  8956. {
  8957. // Only apply this optimization if result is scalar.
  8958. // We want to split the access chain from the base.
  8959. // This is so we can later combine different CompositeExtract results
  8960. // with CompositeConstruct without emitting code like
  8961. //
  8962. // vec3 temp = texture(...).xyz
  8963. // vec4(temp.x, temp.y, temp.z, 1.0).
  8964. //
  8965. // when we actually wanted to emit this
  8966. // vec4(texture(...).xyz, 1.0).
  8967. //
  8968. // Including the base will prevent this and would trigger multiple reads
  8969. // from expression causing it to be forced to an actual temporary in GLSL.
  8970. auto expr = access_chain_internal(ops[2], &ops[3], length,
  8971. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_CHAIN_ONLY_BIT, &meta);
  8972. e = &emit_op(result_type, id, expr, true, should_suppress_usage_tracking(ops[2]));
  8973. inherit_expression_dependencies(id, ops[2]);
  8974. e->base_expression = ops[2];
  8975. }
  8976. else
  8977. {
  8978. auto expr = access_chain_internal(ops[2], &ops[3], length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta);
  8979. e = &emit_op(result_type, id, expr, should_forward(ops[2]), should_suppress_usage_tracking(ops[2]));
  8980. inherit_expression_dependencies(id, ops[2]);
  8981. }
  8982. // Pass through some meta information to the loaded expression.
  8983. // We can still end up loading a buffer type to a variable, then CompositeExtract from it
  8984. // instead of loading everything through an access chain.
  8985. e->need_transpose = meta.need_transpose;
  8986. if (meta.storage_is_packed)
  8987. set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
  8988. if (meta.storage_physical_type != 0)
  8989. set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type);
  8990. if (meta.storage_is_invariant)
  8991. set_decoration(id, DecorationInvariant);
  8992. break;
  8993. }
  8994. case OpCompositeInsert:
  8995. {
  8996. uint32_t result_type = ops[0];
  8997. uint32_t id = ops[1];
  8998. uint32_t obj = ops[2];
  8999. uint32_t composite = ops[3];
  9000. const auto *elems = &ops[4];
  9001. length -= 4;
  9002. flush_variable_declaration(composite);
  9003. // Make a copy, then use access chain to store the variable.
  9004. statement(declare_temporary(result_type, id), to_expression(composite), ";");
  9005. set<SPIRExpression>(id, to_name(id), result_type, true);
  9006. auto chain = access_chain_internal(id, elems, length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
  9007. statement(chain, " = ", to_unpacked_expression(obj), ";");
  9008. break;
  9009. }
  9010. case OpCopyMemory:
  9011. {
  9012. uint32_t lhs = ops[0];
  9013. uint32_t rhs = ops[1];
  9014. if (lhs != rhs)
  9015. {
  9016. flush_variable_declaration(lhs);
  9017. flush_variable_declaration(rhs);
  9018. statement(to_expression(lhs), " = ", to_unpacked_expression(rhs), ";");
  9019. register_write(lhs);
  9020. }
  9021. break;
  9022. }
  9023. case OpCopyLogical:
  9024. {
  9025. // This is used for copying object of different types, arrays and structs.
  9026. // We need to unroll the copy, element-by-element.
  9027. uint32_t result_type = ops[0];
  9028. uint32_t id = ops[1];
  9029. uint32_t rhs = ops[2];
  9030. emit_uninitialized_temporary_expression(result_type, id);
  9031. emit_copy_logical_type(id, result_type, rhs, expression_type_id(rhs), {});
  9032. break;
  9033. }
  9034. case OpCopyObject:
  9035. {
  9036. uint32_t result_type = ops[0];
  9037. uint32_t id = ops[1];
  9038. uint32_t rhs = ops[2];
  9039. bool pointer = get<SPIRType>(result_type).pointer;
  9040. auto *chain = maybe_get<SPIRAccessChain>(rhs);
  9041. auto *imgsamp = maybe_get<SPIRCombinedImageSampler>(rhs);
  9042. if (chain)
  9043. {
  9044. // Cannot lower to a SPIRExpression, just copy the object.
  9045. auto &e = set<SPIRAccessChain>(id, *chain);
  9046. e.self = id;
  9047. }
  9048. else if (imgsamp)
  9049. {
  9050. // Cannot lower to a SPIRExpression, just copy the object.
  9051. // GLSL does not currently use this type and will never get here, but MSL does.
  9052. // Handled here instead of CompilerMSL for better integration and general handling,
  9053. // and in case GLSL or other subclasses require it in the future.
  9054. auto &e = set<SPIRCombinedImageSampler>(id, *imgsamp);
  9055. e.self = id;
  9056. }
  9057. else if (expression_is_lvalue(rhs) && !pointer)
  9058. {
  9059. // Need a copy.
  9060. // For pointer types, we copy the pointer itself.
  9061. statement(declare_temporary(result_type, id), to_unpacked_expression(rhs), ";");
  9062. set<SPIRExpression>(id, to_name(id), result_type, true);
  9063. }
  9064. else
  9065. {
  9066. // RHS expression is immutable, so just forward it.
  9067. // Copying these things really make no sense, but
  9068. // seems to be allowed anyways.
  9069. auto &e = set<SPIRExpression>(id, to_expression(rhs), result_type, true);
  9070. if (pointer)
  9071. {
  9072. auto *var = maybe_get_backing_variable(rhs);
  9073. e.loaded_from = var ? var->self : ID(0);
  9074. }
  9075. // If we're copying an access chain, need to inherit the read expressions.
  9076. auto *rhs_expr = maybe_get<SPIRExpression>(rhs);
  9077. if (rhs_expr)
  9078. {
  9079. e.implied_read_expressions = rhs_expr->implied_read_expressions;
  9080. e.expression_dependencies = rhs_expr->expression_dependencies;
  9081. }
  9082. }
  9083. break;
  9084. }
  9085. case OpVectorShuffle:
  9086. {
  9087. uint32_t result_type = ops[0];
  9088. uint32_t id = ops[1];
  9089. uint32_t vec0 = ops[2];
  9090. uint32_t vec1 = ops[3];
  9091. const auto *elems = &ops[4];
  9092. length -= 4;
  9093. auto &type0 = expression_type(vec0);
  9094. // If we have the undefined swizzle index -1, we need to swizzle in undefined data,
  9095. // or in our case, T(0).
  9096. bool shuffle = false;
  9097. for (uint32_t i = 0; i < length; i++)
  9098. if (elems[i] >= type0.vecsize || elems[i] == 0xffffffffu)
  9099. shuffle = true;
  9100. // Cannot use swizzles with packed expressions, force shuffle path.
  9101. if (!shuffle && has_extended_decoration(vec0, SPIRVCrossDecorationPhysicalTypePacked))
  9102. shuffle = true;
  9103. string expr;
  9104. bool should_fwd, trivial_forward;
  9105. if (shuffle)
  9106. {
  9107. should_fwd = should_forward(vec0) && should_forward(vec1);
  9108. trivial_forward = should_suppress_usage_tracking(vec0) && should_suppress_usage_tracking(vec1);
  9109. // Constructor style and shuffling from two different vectors.
  9110. SmallVector<string> args;
  9111. for (uint32_t i = 0; i < length; i++)
  9112. {
  9113. if (elems[i] == 0xffffffffu)
  9114. {
  9115. // Use a constant 0 here.
  9116. // We could use the first component or similar, but then we risk propagating
  9117. // a value we might not need, and bog down codegen.
  9118. SPIRConstant c;
  9119. c.constant_type = type0.parent_type;
  9120. assert(type0.parent_type != ID(0));
  9121. args.push_back(constant_expression(c));
  9122. }
  9123. else if (elems[i] >= type0.vecsize)
  9124. args.push_back(to_extract_component_expression(vec1, elems[i] - type0.vecsize));
  9125. else
  9126. args.push_back(to_extract_component_expression(vec0, elems[i]));
  9127. }
  9128. expr += join(type_to_glsl_constructor(get<SPIRType>(result_type)), "(", merge(args), ")");
  9129. }
  9130. else
  9131. {
  9132. should_fwd = should_forward(vec0);
  9133. trivial_forward = should_suppress_usage_tracking(vec0);
  9134. // We only source from first vector, so can use swizzle.
  9135. // If the vector is packed, unpack it before applying a swizzle (needed for MSL)
  9136. expr += to_enclosed_unpacked_expression(vec0);
  9137. expr += ".";
  9138. for (uint32_t i = 0; i < length; i++)
  9139. {
  9140. assert(elems[i] != 0xffffffffu);
  9141. expr += index_to_swizzle(elems[i]);
  9142. }
  9143. if (backend.swizzle_is_function && length > 1)
  9144. expr += "()";
  9145. }
  9146. // A shuffle is trivial in that it doesn't actually *do* anything.
  9147. // We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed.
  9148. emit_op(result_type, id, expr, should_fwd, trivial_forward);
  9149. inherit_expression_dependencies(id, vec0);
  9150. if (vec0 != vec1)
  9151. inherit_expression_dependencies(id, vec1);
  9152. break;
  9153. }
  9154. // ALU
  9155. case OpIsNan:
  9156. GLSL_UFOP(isnan);
  9157. break;
  9158. case OpIsInf:
  9159. GLSL_UFOP(isinf);
  9160. break;
  9161. case OpSNegate:
  9162. case OpFNegate:
  9163. GLSL_UOP(-);
  9164. break;
  9165. case OpIAdd:
  9166. {
  9167. // For simple arith ops, prefer the output type if there's a mismatch to avoid extra bitcasts.
  9168. auto type = get<SPIRType>(ops[0]).basetype;
  9169. GLSL_BOP_CAST(+, type);
  9170. break;
  9171. }
  9172. case OpFAdd:
  9173. GLSL_BOP(+);
  9174. break;
  9175. case OpISub:
  9176. {
  9177. auto type = get<SPIRType>(ops[0]).basetype;
  9178. GLSL_BOP_CAST(-, type);
  9179. break;
  9180. }
  9181. case OpFSub:
  9182. GLSL_BOP(-);
  9183. break;
  9184. case OpIMul:
  9185. {
  9186. auto type = get<SPIRType>(ops[0]).basetype;
  9187. GLSL_BOP_CAST(*, type);
  9188. break;
  9189. }
  9190. case OpVectorTimesMatrix:
  9191. case OpMatrixTimesVector:
  9192. {
  9193. // If the matrix needs transpose, just flip the multiply order.
  9194. auto *e = maybe_get<SPIRExpression>(ops[opcode == OpMatrixTimesVector ? 2 : 3]);
  9195. if (e && e->need_transpose)
  9196. {
  9197. e->need_transpose = false;
  9198. string expr;
  9199. if (opcode == OpMatrixTimesVector)
  9200. expr = join(to_enclosed_unpacked_expression(ops[3]), " * ",
  9201. enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])));
  9202. else
  9203. expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ",
  9204. to_enclosed_unpacked_expression(ops[2]));
  9205. bool forward = should_forward(ops[2]) && should_forward(ops[3]);
  9206. emit_op(ops[0], ops[1], expr, forward);
  9207. e->need_transpose = true;
  9208. inherit_expression_dependencies(ops[1], ops[2]);
  9209. inherit_expression_dependencies(ops[1], ops[3]);
  9210. }
  9211. else
  9212. GLSL_BOP(*);
  9213. break;
  9214. }
  9215. case OpMatrixTimesMatrix:
  9216. {
  9217. auto *a = maybe_get<SPIRExpression>(ops[2]);
  9218. auto *b = maybe_get<SPIRExpression>(ops[3]);
  9219. // If both matrices need transpose, we can multiply in flipped order and tag the expression as transposed.
  9220. // a^T * b^T = (b * a)^T.
  9221. if (a && b && a->need_transpose && b->need_transpose)
  9222. {
  9223. a->need_transpose = false;
  9224. b->need_transpose = false;
  9225. auto expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ",
  9226. enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])));
  9227. bool forward = should_forward(ops[2]) && should_forward(ops[3]);
  9228. auto &e = emit_op(ops[0], ops[1], expr, forward);
  9229. e.need_transpose = true;
  9230. a->need_transpose = true;
  9231. b->need_transpose = true;
  9232. inherit_expression_dependencies(ops[1], ops[2]);
  9233. inherit_expression_dependencies(ops[1], ops[3]);
  9234. }
  9235. else
  9236. GLSL_BOP(*);
  9237. break;
  9238. }
  9239. case OpFMul:
  9240. case OpMatrixTimesScalar:
  9241. case OpVectorTimesScalar:
  9242. GLSL_BOP(*);
  9243. break;
  9244. case OpOuterProduct:
  9245. GLSL_BFOP(outerProduct);
  9246. break;
  9247. case OpDot:
  9248. GLSL_BFOP(dot);
  9249. break;
  9250. case OpTranspose:
  9251. if (options.version < 120) // Matches GLSL 1.10 / ESSL 1.00
  9252. {
  9253. // transpose() is not available, so instead, flip need_transpose,
  9254. // which can later be turned into an emulated transpose op by
  9255. // convert_row_major_matrix(), if necessary.
  9256. uint32_t result_type = ops[0];
  9257. uint32_t result_id = ops[1];
  9258. uint32_t input = ops[2];
  9259. // Force need_transpose to false temporarily to prevent
  9260. // to_expression() from doing the transpose.
  9261. bool need_transpose = false;
  9262. auto *input_e = maybe_get<SPIRExpression>(input);
  9263. if (input_e)
  9264. swap(need_transpose, input_e->need_transpose);
  9265. bool forward = should_forward(input);
  9266. auto &e = emit_op(result_type, result_id, to_expression(input), forward);
  9267. e.need_transpose = !need_transpose;
  9268. // Restore the old need_transpose flag.
  9269. if (input_e)
  9270. input_e->need_transpose = need_transpose;
  9271. }
  9272. else
  9273. GLSL_UFOP(transpose);
  9274. break;
  9275. case OpSRem:
  9276. {
  9277. uint32_t result_type = ops[0];
  9278. uint32_t result_id = ops[1];
  9279. uint32_t op0 = ops[2];
  9280. uint32_t op1 = ops[3];
  9281. // Needs special handling.
  9282. bool forward = should_forward(op0) && should_forward(op1);
  9283. auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "(",
  9284. to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")");
  9285. emit_op(result_type, result_id, expr, forward);
  9286. inherit_expression_dependencies(result_id, op0);
  9287. inherit_expression_dependencies(result_id, op1);
  9288. break;
  9289. }
  9290. case OpSDiv:
  9291. GLSL_BOP_CAST(/, int_type);
  9292. break;
  9293. case OpUDiv:
  9294. GLSL_BOP_CAST(/, uint_type);
  9295. break;
  9296. case OpIAddCarry:
  9297. case OpISubBorrow:
  9298. {
  9299. if (options.es && options.version < 310)
  9300. SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310.");
  9301. else if (!options.es && options.version < 400)
  9302. SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 400.");
  9303. uint32_t result_type = ops[0];
  9304. uint32_t result_id = ops[1];
  9305. uint32_t op0 = ops[2];
  9306. uint32_t op1 = ops[3];
  9307. auto &type = get<SPIRType>(result_type);
  9308. emit_uninitialized_temporary_expression(result_type, result_id);
  9309. const char *op = opcode == OpIAddCarry ? "uaddCarry" : "usubBorrow";
  9310. statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", op, "(", to_expression(op0), ", ",
  9311. to_expression(op1), ", ", to_expression(result_id), ".", to_member_name(type, 1), ");");
  9312. break;
  9313. }
  9314. case OpUMulExtended:
  9315. case OpSMulExtended:
  9316. {
  9317. if (options.es && options.version < 310)
  9318. SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310.");
  9319. else if (!options.es && options.version < 400)
  9320. SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 4000.");
  9321. uint32_t result_type = ops[0];
  9322. uint32_t result_id = ops[1];
  9323. uint32_t op0 = ops[2];
  9324. uint32_t op1 = ops[3];
  9325. auto &type = get<SPIRType>(result_type);
  9326. emit_uninitialized_temporary_expression(result_type, result_id);
  9327. const char *op = opcode == OpUMulExtended ? "umulExtended" : "imulExtended";
  9328. statement(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(result_id), ".",
  9329. to_member_name(type, 1), ", ", to_expression(result_id), ".", to_member_name(type, 0), ");");
  9330. break;
  9331. }
  9332. case OpFDiv:
  9333. GLSL_BOP(/);
  9334. break;
  9335. case OpShiftRightLogical:
  9336. GLSL_BOP_CAST(>>, uint_type);
  9337. break;
  9338. case OpShiftRightArithmetic:
  9339. GLSL_BOP_CAST(>>, int_type);
  9340. break;
  9341. case OpShiftLeftLogical:
  9342. {
  9343. auto type = get<SPIRType>(ops[0]).basetype;
  9344. GLSL_BOP_CAST(<<, type);
  9345. break;
  9346. }
  9347. case OpBitwiseOr:
  9348. {
  9349. auto type = get<SPIRType>(ops[0]).basetype;
  9350. GLSL_BOP_CAST(|, type);
  9351. break;
  9352. }
  9353. case OpBitwiseXor:
  9354. {
  9355. auto type = get<SPIRType>(ops[0]).basetype;
  9356. GLSL_BOP_CAST(^, type);
  9357. break;
  9358. }
  9359. case OpBitwiseAnd:
  9360. {
  9361. auto type = get<SPIRType>(ops[0]).basetype;
  9362. GLSL_BOP_CAST(&, type);
  9363. break;
  9364. }
  9365. case OpNot:
  9366. GLSL_UOP(~);
  9367. break;
  9368. case OpUMod:
  9369. GLSL_BOP_CAST(%, uint_type);
  9370. break;
  9371. case OpSMod:
  9372. GLSL_BOP_CAST(%, int_type);
  9373. break;
  9374. case OpFMod:
  9375. GLSL_BFOP(mod);
  9376. break;
  9377. case OpFRem:
  9378. {
  9379. if (is_legacy())
  9380. SPIRV_CROSS_THROW("OpFRem requires trunc() and is only supported on non-legacy targets. A workaround is "
  9381. "needed for legacy.");
  9382. uint32_t result_type = ops[0];
  9383. uint32_t result_id = ops[1];
  9384. uint32_t op0 = ops[2];
  9385. uint32_t op1 = ops[3];
  9386. // Needs special handling.
  9387. bool forward = should_forward(op0) && should_forward(op1);
  9388. auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "trunc(",
  9389. to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")");
  9390. emit_op(result_type, result_id, expr, forward);
  9391. inherit_expression_dependencies(result_id, op0);
  9392. inherit_expression_dependencies(result_id, op1);
  9393. break;
  9394. }
  9395. // Relational
  9396. case OpAny:
  9397. GLSL_UFOP(any);
  9398. break;
  9399. case OpAll:
  9400. GLSL_UFOP(all);
  9401. break;
  9402. case OpSelect:
  9403. emit_mix_op(ops[0], ops[1], ops[4], ops[3], ops[2]);
  9404. break;
  9405. case OpLogicalOr:
  9406. {
  9407. // No vector variant in GLSL for logical OR.
  9408. auto result_type = ops[0];
  9409. auto id = ops[1];
  9410. auto &type = get<SPIRType>(result_type);
  9411. if (type.vecsize > 1)
  9412. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "||", false, SPIRType::Unknown);
  9413. else
  9414. GLSL_BOP(||);
  9415. break;
  9416. }
  9417. case OpLogicalAnd:
  9418. {
  9419. // No vector variant in GLSL for logical AND.
  9420. auto result_type = ops[0];
  9421. auto id = ops[1];
  9422. auto &type = get<SPIRType>(result_type);
  9423. if (type.vecsize > 1)
  9424. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "&&", false, SPIRType::Unknown);
  9425. else
  9426. GLSL_BOP(&&);
  9427. break;
  9428. }
  9429. case OpLogicalNot:
  9430. {
  9431. auto &type = get<SPIRType>(ops[0]);
  9432. if (type.vecsize > 1)
  9433. GLSL_UFOP(not );
  9434. else
  9435. GLSL_UOP(!);
  9436. break;
  9437. }
  9438. case OpIEqual:
  9439. {
  9440. if (expression_type(ops[2]).vecsize > 1)
  9441. GLSL_BFOP_CAST(equal, int_type);
  9442. else
  9443. GLSL_BOP_CAST(==, int_type);
  9444. break;
  9445. }
  9446. case OpLogicalEqual:
  9447. case OpFOrdEqual:
  9448. {
  9449. if (expression_type(ops[2]).vecsize > 1)
  9450. GLSL_BFOP(equal);
  9451. else
  9452. GLSL_BOP(==);
  9453. break;
  9454. }
  9455. case OpINotEqual:
  9456. {
  9457. if (expression_type(ops[2]).vecsize > 1)
  9458. GLSL_BFOP_CAST(notEqual, int_type);
  9459. else
  9460. GLSL_BOP_CAST(!=, int_type);
  9461. break;
  9462. }
  9463. case OpLogicalNotEqual:
  9464. case OpFOrdNotEqual:
  9465. {
  9466. if (expression_type(ops[2]).vecsize > 1)
  9467. GLSL_BFOP(notEqual);
  9468. else
  9469. GLSL_BOP(!=);
  9470. break;
  9471. }
  9472. case OpUGreaterThan:
  9473. case OpSGreaterThan:
  9474. {
  9475. auto type = opcode == OpUGreaterThan ? uint_type : int_type;
  9476. if (expression_type(ops[2]).vecsize > 1)
  9477. GLSL_BFOP_CAST(greaterThan, type);
  9478. else
  9479. GLSL_BOP_CAST(>, type);
  9480. break;
  9481. }
  9482. case OpFOrdGreaterThan:
  9483. {
  9484. if (expression_type(ops[2]).vecsize > 1)
  9485. GLSL_BFOP(greaterThan);
  9486. else
  9487. GLSL_BOP(>);
  9488. break;
  9489. }
  9490. case OpUGreaterThanEqual:
  9491. case OpSGreaterThanEqual:
  9492. {
  9493. auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type;
  9494. if (expression_type(ops[2]).vecsize > 1)
  9495. GLSL_BFOP_CAST(greaterThanEqual, type);
  9496. else
  9497. GLSL_BOP_CAST(>=, type);
  9498. break;
  9499. }
  9500. case OpFOrdGreaterThanEqual:
  9501. {
  9502. if (expression_type(ops[2]).vecsize > 1)
  9503. GLSL_BFOP(greaterThanEqual);
  9504. else
  9505. GLSL_BOP(>=);
  9506. break;
  9507. }
  9508. case OpULessThan:
  9509. case OpSLessThan:
  9510. {
  9511. auto type = opcode == OpULessThan ? uint_type : int_type;
  9512. if (expression_type(ops[2]).vecsize > 1)
  9513. GLSL_BFOP_CAST(lessThan, type);
  9514. else
  9515. GLSL_BOP_CAST(<, type);
  9516. break;
  9517. }
  9518. case OpFOrdLessThan:
  9519. {
  9520. if (expression_type(ops[2]).vecsize > 1)
  9521. GLSL_BFOP(lessThan);
  9522. else
  9523. GLSL_BOP(<);
  9524. break;
  9525. }
  9526. case OpULessThanEqual:
  9527. case OpSLessThanEqual:
  9528. {
  9529. auto type = opcode == OpULessThanEqual ? uint_type : int_type;
  9530. if (expression_type(ops[2]).vecsize > 1)
  9531. GLSL_BFOP_CAST(lessThanEqual, type);
  9532. else
  9533. GLSL_BOP_CAST(<=, type);
  9534. break;
  9535. }
  9536. case OpFOrdLessThanEqual:
  9537. {
  9538. if (expression_type(ops[2]).vecsize > 1)
  9539. GLSL_BFOP(lessThanEqual);
  9540. else
  9541. GLSL_BOP(<=);
  9542. break;
  9543. }
  9544. // Conversion
  9545. case OpSConvert:
  9546. case OpConvertSToF:
  9547. case OpUConvert:
  9548. case OpConvertUToF:
  9549. {
  9550. auto input_type = opcode == OpSConvert || opcode == OpConvertSToF ? int_type : uint_type;
  9551. uint32_t result_type = ops[0];
  9552. uint32_t id = ops[1];
  9553. auto &type = get<SPIRType>(result_type);
  9554. auto &arg_type = expression_type(ops[2]);
  9555. auto func = type_to_glsl_constructor(type);
  9556. if (arg_type.width < type.width || type_is_floating_point(type))
  9557. emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), input_type, type.basetype);
  9558. else
  9559. emit_unary_func_op(result_type, id, ops[2], func.c_str());
  9560. break;
  9561. }
  9562. case OpConvertFToU:
  9563. case OpConvertFToS:
  9564. {
  9565. // Cast to expected arithmetic type, then potentially bitcast away to desired signedness.
  9566. uint32_t result_type = ops[0];
  9567. uint32_t id = ops[1];
  9568. auto &type = get<SPIRType>(result_type);
  9569. auto expected_type = type;
  9570. auto &float_type = expression_type(ops[2]);
  9571. expected_type.basetype =
  9572. opcode == OpConvertFToS ? to_signed_basetype(type.width) : to_unsigned_basetype(type.width);
  9573. auto func = type_to_glsl_constructor(expected_type);
  9574. emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), float_type.basetype, expected_type.basetype);
  9575. break;
  9576. }
  9577. case OpFConvert:
  9578. {
  9579. uint32_t result_type = ops[0];
  9580. uint32_t id = ops[1];
  9581. auto func = type_to_glsl_constructor(get<SPIRType>(result_type));
  9582. emit_unary_func_op(result_type, id, ops[2], func.c_str());
  9583. break;
  9584. }
  9585. case OpBitcast:
  9586. {
  9587. uint32_t result_type = ops[0];
  9588. uint32_t id = ops[1];
  9589. uint32_t arg = ops[2];
  9590. if (!emit_complex_bitcast(result_type, id, arg))
  9591. {
  9592. auto op = bitcast_glsl_op(get<SPIRType>(result_type), expression_type(arg));
  9593. emit_unary_func_op(result_type, id, arg, op.c_str());
  9594. }
  9595. break;
  9596. }
  9597. case OpQuantizeToF16:
  9598. {
  9599. uint32_t result_type = ops[0];
  9600. uint32_t id = ops[1];
  9601. uint32_t arg = ops[2];
  9602. string op;
  9603. auto &type = get<SPIRType>(result_type);
  9604. switch (type.vecsize)
  9605. {
  9606. case 1:
  9607. op = join("unpackHalf2x16(packHalf2x16(vec2(", to_expression(arg), "))).x");
  9608. break;
  9609. case 2:
  9610. op = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), "))");
  9611. break;
  9612. case 3:
  9613. {
  9614. auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))");
  9615. auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zz)).x");
  9616. op = join("vec3(", op0, ", ", op1, ")");
  9617. break;
  9618. }
  9619. case 4:
  9620. {
  9621. auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))");
  9622. auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zw))");
  9623. op = join("vec4(", op0, ", ", op1, ")");
  9624. break;
  9625. }
  9626. default:
  9627. SPIRV_CROSS_THROW("Illegal argument to OpQuantizeToF16.");
  9628. }
  9629. emit_op(result_type, id, op, should_forward(arg));
  9630. inherit_expression_dependencies(id, arg);
  9631. break;
  9632. }
  9633. // Derivatives
  9634. case OpDPdx:
  9635. GLSL_UFOP(dFdx);
  9636. if (is_legacy_es())
  9637. require_extension_internal("GL_OES_standard_derivatives");
  9638. register_control_dependent_expression(ops[1]);
  9639. break;
  9640. case OpDPdy:
  9641. GLSL_UFOP(dFdy);
  9642. if (is_legacy_es())
  9643. require_extension_internal("GL_OES_standard_derivatives");
  9644. register_control_dependent_expression(ops[1]);
  9645. break;
  9646. case OpDPdxFine:
  9647. GLSL_UFOP(dFdxFine);
  9648. if (options.es)
  9649. {
  9650. SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
  9651. }
  9652. if (options.version < 450)
  9653. require_extension_internal("GL_ARB_derivative_control");
  9654. register_control_dependent_expression(ops[1]);
  9655. break;
  9656. case OpDPdyFine:
  9657. GLSL_UFOP(dFdyFine);
  9658. if (options.es)
  9659. {
  9660. SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
  9661. }
  9662. if (options.version < 450)
  9663. require_extension_internal("GL_ARB_derivative_control");
  9664. register_control_dependent_expression(ops[1]);
  9665. break;
  9666. case OpDPdxCoarse:
  9667. if (options.es)
  9668. {
  9669. SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
  9670. }
  9671. GLSL_UFOP(dFdxCoarse);
  9672. if (options.version < 450)
  9673. require_extension_internal("GL_ARB_derivative_control");
  9674. register_control_dependent_expression(ops[1]);
  9675. break;
  9676. case OpDPdyCoarse:
  9677. GLSL_UFOP(dFdyCoarse);
  9678. if (options.es)
  9679. {
  9680. SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
  9681. }
  9682. if (options.version < 450)
  9683. require_extension_internal("GL_ARB_derivative_control");
  9684. register_control_dependent_expression(ops[1]);
  9685. break;
  9686. case OpFwidth:
  9687. GLSL_UFOP(fwidth);
  9688. if (is_legacy_es())
  9689. require_extension_internal("GL_OES_standard_derivatives");
  9690. register_control_dependent_expression(ops[1]);
  9691. break;
  9692. case OpFwidthCoarse:
  9693. GLSL_UFOP(fwidthCoarse);
  9694. if (options.es)
  9695. {
  9696. SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
  9697. }
  9698. if (options.version < 450)
  9699. require_extension_internal("GL_ARB_derivative_control");
  9700. register_control_dependent_expression(ops[1]);
  9701. break;
  9702. case OpFwidthFine:
  9703. GLSL_UFOP(fwidthFine);
  9704. if (options.es)
  9705. {
  9706. SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
  9707. }
  9708. if (options.version < 450)
  9709. require_extension_internal("GL_ARB_derivative_control");
  9710. register_control_dependent_expression(ops[1]);
  9711. break;
  9712. // Bitfield
  9713. case OpBitFieldInsert:
  9714. {
  9715. emit_bitfield_insert_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], "bitfieldInsert", SPIRType::Int);
  9716. break;
  9717. }
  9718. case OpBitFieldSExtract:
  9719. {
  9720. emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", int_type, int_type,
  9721. SPIRType::Int, SPIRType::Int);
  9722. break;
  9723. }
  9724. case OpBitFieldUExtract:
  9725. {
  9726. emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", uint_type, uint_type,
  9727. SPIRType::Int, SPIRType::Int);
  9728. break;
  9729. }
  9730. case OpBitReverse:
  9731. // BitReverse does not have issues with sign since result type must match input type.
  9732. GLSL_UFOP(bitfieldReverse);
  9733. break;
  9734. case OpBitCount:
  9735. {
  9736. auto basetype = expression_type(ops[2]).basetype;
  9737. emit_unary_func_op_cast(ops[0], ops[1], ops[2], "bitCount", basetype, int_type);
  9738. break;
  9739. }
  9740. // Atomics
  9741. case OpAtomicExchange:
  9742. {
  9743. uint32_t result_type = ops[0];
  9744. uint32_t id = ops[1];
  9745. uint32_t ptr = ops[2];
  9746. // Ignore semantics for now, probably only relevant to CL.
  9747. uint32_t val = ops[5];
  9748. const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange";
  9749. forced_temporaries.insert(id);
  9750. emit_binary_func_op(result_type, id, ptr, val, op);
  9751. flush_all_atomic_capable_variables();
  9752. break;
  9753. }
  9754. case OpAtomicCompareExchange:
  9755. {
  9756. uint32_t result_type = ops[0];
  9757. uint32_t id = ops[1];
  9758. uint32_t ptr = ops[2];
  9759. uint32_t val = ops[6];
  9760. uint32_t comp = ops[7];
  9761. const char *op = check_atomic_image(ptr) ? "imageAtomicCompSwap" : "atomicCompSwap";
  9762. forced_temporaries.insert(id);
  9763. emit_trinary_func_op(result_type, id, ptr, comp, val, op);
  9764. flush_all_atomic_capable_variables();
  9765. break;
  9766. }
  9767. case OpAtomicLoad:
  9768. {
  9769. // In plain GLSL, we have no atomic loads, so emulate this by fetch adding by 0 and hope compiler figures it out.
  9770. // Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL.
  9771. auto &type = expression_type(ops[2]);
  9772. forced_temporaries.insert(ops[1]);
  9773. bool atomic_image = check_atomic_image(ops[2]);
  9774. bool unsigned_type = (type.basetype == SPIRType::UInt) ||
  9775. (atomic_image && get<SPIRType>(type.image.type).basetype == SPIRType::UInt);
  9776. const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd";
  9777. const char *increment = unsigned_type ? "0u" : "0";
  9778. emit_op(ops[0], ops[1], join(op, "(", to_expression(ops[2]), ", ", increment, ")"), false);
  9779. flush_all_atomic_capable_variables();
  9780. break;
  9781. }
  9782. case OpAtomicStore:
  9783. {
  9784. // In plain GLSL, we have no atomic stores, so emulate this with an atomic exchange where we don't consume the result.
  9785. // Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL.
  9786. uint32_t ptr = ops[0];
  9787. // Ignore semantics for now, probably only relevant to CL.
  9788. uint32_t val = ops[3];
  9789. const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange";
  9790. statement(op, "(", to_expression(ptr), ", ", to_expression(val), ");");
  9791. flush_all_atomic_capable_variables();
  9792. break;
  9793. }
  9794. case OpAtomicIIncrement:
  9795. case OpAtomicIDecrement:
  9796. {
  9797. forced_temporaries.insert(ops[1]);
  9798. auto &type = expression_type(ops[2]);
  9799. if (type.storage == StorageClassAtomicCounter)
  9800. {
  9801. // Legacy GLSL stuff, not sure if this is relevant to support.
  9802. if (opcode == OpAtomicIIncrement)
  9803. GLSL_UFOP(atomicCounterIncrement);
  9804. else
  9805. GLSL_UFOP(atomicCounterDecrement);
  9806. }
  9807. else
  9808. {
  9809. bool atomic_image = check_atomic_image(ops[2]);
  9810. bool unsigned_type = (type.basetype == SPIRType::UInt) ||
  9811. (atomic_image && get<SPIRType>(type.image.type).basetype == SPIRType::UInt);
  9812. const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd";
  9813. const char *increment = nullptr;
  9814. if (opcode == OpAtomicIIncrement && unsigned_type)
  9815. increment = "1u";
  9816. else if (opcode == OpAtomicIIncrement)
  9817. increment = "1";
  9818. else if (unsigned_type)
  9819. increment = "uint(-1)";
  9820. else
  9821. increment = "-1";
  9822. emit_op(ops[0], ops[1], join(op, "(", to_expression(ops[2]), ", ", increment, ")"), false);
  9823. }
  9824. flush_all_atomic_capable_variables();
  9825. break;
  9826. }
  9827. case OpAtomicIAdd:
  9828. {
  9829. const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
  9830. forced_temporaries.insert(ops[1]);
  9831. emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
  9832. flush_all_atomic_capable_variables();
  9833. break;
  9834. }
  9835. case OpAtomicISub:
  9836. {
  9837. const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
  9838. forced_temporaries.insert(ops[1]);
  9839. auto expr = join(op, "(", to_expression(ops[2]), ", -", to_enclosed_expression(ops[5]), ")");
  9840. emit_op(ops[0], ops[1], expr, should_forward(ops[2]) && should_forward(ops[5]));
  9841. flush_all_atomic_capable_variables();
  9842. break;
  9843. }
  9844. case OpAtomicSMin:
  9845. case OpAtomicUMin:
  9846. {
  9847. const char *op = check_atomic_image(ops[2]) ? "imageAtomicMin" : "atomicMin";
  9848. forced_temporaries.insert(ops[1]);
  9849. emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
  9850. flush_all_atomic_capable_variables();
  9851. break;
  9852. }
  9853. case OpAtomicSMax:
  9854. case OpAtomicUMax:
  9855. {
  9856. const char *op = check_atomic_image(ops[2]) ? "imageAtomicMax" : "atomicMax";
  9857. forced_temporaries.insert(ops[1]);
  9858. emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
  9859. flush_all_atomic_capable_variables();
  9860. break;
  9861. }
  9862. case OpAtomicAnd:
  9863. {
  9864. const char *op = check_atomic_image(ops[2]) ? "imageAtomicAnd" : "atomicAnd";
  9865. forced_temporaries.insert(ops[1]);
  9866. emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
  9867. flush_all_atomic_capable_variables();
  9868. break;
  9869. }
  9870. case OpAtomicOr:
  9871. {
  9872. const char *op = check_atomic_image(ops[2]) ? "imageAtomicOr" : "atomicOr";
  9873. forced_temporaries.insert(ops[1]);
  9874. emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
  9875. flush_all_atomic_capable_variables();
  9876. break;
  9877. }
  9878. case OpAtomicXor:
  9879. {
  9880. const char *op = check_atomic_image(ops[2]) ? "imageAtomicXor" : "atomicXor";
  9881. forced_temporaries.insert(ops[1]);
  9882. emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
  9883. flush_all_atomic_capable_variables();
  9884. break;
  9885. }
  9886. // Geometry shaders
  9887. case OpEmitVertex:
  9888. statement("EmitVertex();");
  9889. break;
  9890. case OpEndPrimitive:
  9891. statement("EndPrimitive();");
  9892. break;
  9893. case OpEmitStreamVertex:
  9894. {
  9895. if (options.es)
  9896. SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES.");
  9897. else if (!options.es && options.version < 400)
  9898. SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400.");
  9899. auto stream_expr = to_expression(ops[0]);
  9900. if (expression_type(ops[0]).basetype != SPIRType::Int)
  9901. stream_expr = join("int(", stream_expr, ")");
  9902. statement("EmitStreamVertex(", stream_expr, ");");
  9903. break;
  9904. }
  9905. case OpEndStreamPrimitive:
  9906. {
  9907. if (options.es)
  9908. SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES.");
  9909. else if (!options.es && options.version < 400)
  9910. SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400.");
  9911. auto stream_expr = to_expression(ops[0]);
  9912. if (expression_type(ops[0]).basetype != SPIRType::Int)
  9913. stream_expr = join("int(", stream_expr, ")");
  9914. statement("EndStreamPrimitive(", stream_expr, ");");
  9915. break;
  9916. }
  9917. // Textures
  9918. case OpImageSampleExplicitLod:
  9919. case OpImageSampleProjExplicitLod:
  9920. case OpImageSampleDrefExplicitLod:
  9921. case OpImageSampleProjDrefExplicitLod:
  9922. case OpImageSampleImplicitLod:
  9923. case OpImageSampleProjImplicitLod:
  9924. case OpImageSampleDrefImplicitLod:
  9925. case OpImageSampleProjDrefImplicitLod:
  9926. case OpImageFetch:
  9927. case OpImageGather:
  9928. case OpImageDrefGather:
  9929. // Gets a bit hairy, so move this to a separate instruction.
  9930. emit_texture_op(instruction, false);
  9931. break;
  9932. case OpImageSparseSampleExplicitLod:
  9933. case OpImageSparseSampleProjExplicitLod:
  9934. case OpImageSparseSampleDrefExplicitLod:
  9935. case OpImageSparseSampleProjDrefExplicitLod:
  9936. case OpImageSparseSampleImplicitLod:
  9937. case OpImageSparseSampleProjImplicitLod:
  9938. case OpImageSparseSampleDrefImplicitLod:
  9939. case OpImageSparseSampleProjDrefImplicitLod:
  9940. case OpImageSparseFetch:
  9941. case OpImageSparseGather:
  9942. case OpImageSparseDrefGather:
  9943. // Gets a bit hairy, so move this to a separate instruction.
  9944. emit_texture_op(instruction, true);
  9945. break;
  9946. case OpImageSparseTexelsResident:
  9947. if (options.es)
  9948. SPIRV_CROSS_THROW("Sparse feedback is not supported in GLSL.");
  9949. require_extension_internal("GL_ARB_sparse_texture2");
  9950. emit_unary_func_op_cast(ops[0], ops[1], ops[2], "sparseTexelsResidentARB", int_type, SPIRType::Boolean);
  9951. break;
  9952. case OpImage:
  9953. {
  9954. uint32_t result_type = ops[0];
  9955. uint32_t id = ops[1];
  9956. // Suppress usage tracking.
  9957. auto &e = emit_op(result_type, id, to_expression(ops[2]), true, true);
  9958. // When using the image, we need to know which variable it is actually loaded from.
  9959. auto *var = maybe_get_backing_variable(ops[2]);
  9960. e.loaded_from = var ? var->self : ID(0);
  9961. break;
  9962. }
  9963. case OpImageQueryLod:
  9964. {
  9965. if (!options.es && options.version < 400)
  9966. {
  9967. require_extension_internal("GL_ARB_texture_query_lod");
  9968. // For some reason, the ARB spec is all-caps.
  9969. GLSL_BFOP(textureQueryLOD);
  9970. }
  9971. else if (options.es)
  9972. SPIRV_CROSS_THROW("textureQueryLod not supported in ES profile.");
  9973. else
  9974. GLSL_BFOP(textureQueryLod);
  9975. register_control_dependent_expression(ops[1]);
  9976. break;
  9977. }
  9978. case OpImageQueryLevels:
  9979. {
  9980. uint32_t result_type = ops[0];
  9981. uint32_t id = ops[1];
  9982. if (!options.es && options.version < 430)
  9983. require_extension_internal("GL_ARB_texture_query_levels");
  9984. if (options.es)
  9985. SPIRV_CROSS_THROW("textureQueryLevels not supported in ES profile.");
  9986. auto expr = join("textureQueryLevels(", convert_separate_image_to_expression(ops[2]), ")");
  9987. auto &restype = get<SPIRType>(ops[0]);
  9988. expr = bitcast_expression(restype, SPIRType::Int, expr);
  9989. emit_op(result_type, id, expr, true);
  9990. break;
  9991. }
  9992. case OpImageQuerySamples:
  9993. {
  9994. auto &type = expression_type(ops[2]);
  9995. uint32_t result_type = ops[0];
  9996. uint32_t id = ops[1];
  9997. string expr;
  9998. if (type.image.sampled == 2)
  9999. expr = join("imageSamples(", to_expression(ops[2]), ")");
  10000. else
  10001. expr = join("textureSamples(", convert_separate_image_to_expression(ops[2]), ")");
  10002. auto &restype = get<SPIRType>(ops[0]);
  10003. expr = bitcast_expression(restype, SPIRType::Int, expr);
  10004. emit_op(result_type, id, expr, true);
  10005. break;
  10006. }
  10007. case OpSampledImage:
  10008. {
  10009. uint32_t result_type = ops[0];
  10010. uint32_t id = ops[1];
  10011. emit_sampled_image_op(result_type, id, ops[2], ops[3]);
  10012. inherit_expression_dependencies(id, ops[2]);
  10013. inherit_expression_dependencies(id, ops[3]);
  10014. break;
  10015. }
  10016. case OpImageQuerySizeLod:
  10017. {
  10018. uint32_t result_type = ops[0];
  10019. uint32_t id = ops[1];
  10020. uint32_t img = ops[2];
  10021. std::string fname = "textureSize";
  10022. if (is_legacy_desktop())
  10023. {
  10024. auto &type = expression_type(img);
  10025. auto &imgtype = get<SPIRType>(type.self);
  10026. fname = legacy_tex_op(fname, imgtype, img);
  10027. }
  10028. else if (is_legacy_es())
  10029. SPIRV_CROSS_THROW("textureSize is not supported in ESSL 100.");
  10030. auto expr = join(fname, "(", convert_separate_image_to_expression(img), ", ",
  10031. bitcast_expression(SPIRType::Int, ops[3]), ")");
  10032. auto &restype = get<SPIRType>(ops[0]);
  10033. expr = bitcast_expression(restype, SPIRType::Int, expr);
  10034. emit_op(result_type, id, expr, true);
  10035. break;
  10036. }
  10037. // Image load/store
  10038. case OpImageRead:
  10039. case OpImageSparseRead:
  10040. {
  10041. // We added Nonreadable speculatively to the OpImage variable due to glslangValidator
  10042. // not adding the proper qualifiers.
  10043. // If it turns out we need to read the image after all, remove the qualifier and recompile.
  10044. auto *var = maybe_get_backing_variable(ops[2]);
  10045. if (var)
  10046. {
  10047. auto &flags = ir.meta[var->self].decoration.decoration_flags;
  10048. if (flags.get(DecorationNonReadable))
  10049. {
  10050. flags.clear(DecorationNonReadable);
  10051. force_recompile();
  10052. }
  10053. }
  10054. uint32_t result_type = ops[0];
  10055. uint32_t id = ops[1];
  10056. bool pure;
  10057. string imgexpr;
  10058. auto &type = expression_type(ops[2]);
  10059. if (var && var->remapped_variable) // Remapped input, just read as-is without any op-code
  10060. {
  10061. if (type.image.ms)
  10062. SPIRV_CROSS_THROW("Trying to remap multisampled image to variable, this is not possible.");
  10063. auto itr =
  10064. find_if(begin(pls_inputs), end(pls_inputs), [var](const PlsRemap &pls) { return pls.id == var->self; });
  10065. if (itr == end(pls_inputs))
  10066. {
  10067. // For non-PLS inputs, we rely on subpass type remapping information to get it right
  10068. // since ImageRead always returns 4-component vectors and the backing type is opaque.
  10069. if (!var->remapped_components)
  10070. SPIRV_CROSS_THROW("subpassInput was remapped, but remap_components is not set correctly.");
  10071. imgexpr = remap_swizzle(get<SPIRType>(result_type), var->remapped_components, to_expression(ops[2]));
  10072. }
  10073. else
  10074. {
  10075. // PLS input could have different number of components than what the SPIR expects, swizzle to
  10076. // the appropriate vector size.
  10077. uint32_t components = pls_format_to_components(itr->format);
  10078. imgexpr = remap_swizzle(get<SPIRType>(result_type), components, to_expression(ops[2]));
  10079. }
  10080. pure = true;
  10081. }
  10082. else if (type.image.dim == DimSubpassData)
  10083. {
  10084. if (var && subpass_input_is_framebuffer_fetch(var->self))
  10085. {
  10086. imgexpr = to_expression(var->self);
  10087. }
  10088. else if (options.vulkan_semantics)
  10089. {
  10090. // With Vulkan semantics, use the proper Vulkan GLSL construct.
  10091. if (type.image.ms)
  10092. {
  10093. uint32_t operands = ops[4];
  10094. if (operands != ImageOperandsSampleMask || length != 6)
  10095. SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
  10096. "operand mask was used.");
  10097. uint32_t samples = ops[5];
  10098. imgexpr = join("subpassLoad(", to_expression(ops[2]), ", ", to_expression(samples), ")");
  10099. }
  10100. else
  10101. imgexpr = join("subpassLoad(", to_expression(ops[2]), ")");
  10102. }
  10103. else
  10104. {
  10105. if (type.image.ms)
  10106. {
  10107. uint32_t operands = ops[4];
  10108. if (operands != ImageOperandsSampleMask || length != 6)
  10109. SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
  10110. "operand mask was used.");
  10111. uint32_t samples = ops[5];
  10112. imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), ",
  10113. to_expression(samples), ")");
  10114. }
  10115. else
  10116. {
  10117. // Implement subpass loads via texture barrier style sampling.
  10118. imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), 0)");
  10119. }
  10120. }
  10121. imgexpr = remap_swizzle(get<SPIRType>(result_type), 4, imgexpr);
  10122. pure = true;
  10123. }
  10124. else
  10125. {
  10126. bool sparse = opcode == OpImageSparseRead;
  10127. uint32_t sparse_code_id = 0;
  10128. uint32_t sparse_texel_id = 0;
  10129. if (sparse)
  10130. emit_sparse_feedback_temporaries(ops[0], ops[1], sparse_code_id, sparse_texel_id);
  10131. // imageLoad only accepts int coords, not uint.
  10132. auto coord_expr = to_expression(ops[3]);
  10133. auto target_coord_type = expression_type(ops[3]);
  10134. target_coord_type.basetype = SPIRType::Int;
  10135. coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr);
  10136. // Plain image load/store.
  10137. if (sparse)
  10138. {
  10139. if (type.image.ms)
  10140. {
  10141. uint32_t operands = ops[4];
  10142. if (operands != ImageOperandsSampleMask || length != 6)
  10143. SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
  10144. "operand mask was used.");
  10145. uint32_t samples = ops[5];
  10146. statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_expression(ops[2]), ", ",
  10147. coord_expr, ", ", to_expression(samples), ", ", to_expression(sparse_texel_id), ");");
  10148. }
  10149. else
  10150. {
  10151. statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_expression(ops[2]), ", ",
  10152. coord_expr, ", ", to_expression(sparse_texel_id), ");");
  10153. }
  10154. imgexpr = join(type_to_glsl(get<SPIRType>(result_type)), "(", to_expression(sparse_code_id), ", ",
  10155. to_expression(sparse_texel_id), ")");
  10156. }
  10157. else
  10158. {
  10159. if (type.image.ms)
  10160. {
  10161. uint32_t operands = ops[4];
  10162. if (operands != ImageOperandsSampleMask || length != 6)
  10163. SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
  10164. "operand mask was used.");
  10165. uint32_t samples = ops[5];
  10166. imgexpr =
  10167. join("imageLoad(", to_expression(ops[2]), ", ", coord_expr, ", ", to_expression(samples), ")");
  10168. }
  10169. else
  10170. imgexpr = join("imageLoad(", to_expression(ops[2]), ", ", coord_expr, ")");
  10171. }
  10172. if (!sparse)
  10173. imgexpr = remap_swizzle(get<SPIRType>(result_type), 4, imgexpr);
  10174. pure = false;
  10175. }
  10176. if (var && var->forwardable)
  10177. {
  10178. bool forward = forced_temporaries.find(id) == end(forced_temporaries);
  10179. auto &e = emit_op(result_type, id, imgexpr, forward);
  10180. // We only need to track dependencies if we're reading from image load/store.
  10181. if (!pure)
  10182. {
  10183. e.loaded_from = var->self;
  10184. if (forward)
  10185. var->dependees.push_back(id);
  10186. }
  10187. }
  10188. else
  10189. emit_op(result_type, id, imgexpr, false);
  10190. inherit_expression_dependencies(id, ops[2]);
  10191. if (type.image.ms)
  10192. inherit_expression_dependencies(id, ops[5]);
  10193. break;
  10194. }
  10195. case OpImageTexelPointer:
  10196. {
  10197. uint32_t result_type = ops[0];
  10198. uint32_t id = ops[1];
  10199. auto coord_expr = to_expression(ops[3]);
  10200. auto target_coord_type = expression_type(ops[3]);
  10201. target_coord_type.basetype = SPIRType::Int;
  10202. coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr);
  10203. auto expr = join(to_expression(ops[2]), ", ", coord_expr);
  10204. if (has_decoration(id, DecorationNonUniformEXT) || has_decoration(ops[2], DecorationNonUniformEXT))
  10205. convert_non_uniform_expression(expression_type(ops[2]), expr);
  10206. auto &e = set<SPIRExpression>(id, expr, result_type, true);
  10207. // When using the pointer, we need to know which variable it is actually loaded from.
  10208. auto *var = maybe_get_backing_variable(ops[2]);
  10209. e.loaded_from = var ? var->self : ID(0);
  10210. inherit_expression_dependencies(id, ops[3]);
  10211. break;
  10212. }
  10213. case OpImageWrite:
  10214. {
  10215. // We added Nonwritable speculatively to the OpImage variable due to glslangValidator
  10216. // not adding the proper qualifiers.
  10217. // If it turns out we need to write to the image after all, remove the qualifier and recompile.
  10218. auto *var = maybe_get_backing_variable(ops[0]);
  10219. if (var)
  10220. {
  10221. auto &flags = ir.meta[var->self].decoration.decoration_flags;
  10222. if (flags.get(DecorationNonWritable))
  10223. {
  10224. flags.clear(DecorationNonWritable);
  10225. force_recompile();
  10226. }
  10227. }
  10228. auto &type = expression_type(ops[0]);
  10229. auto &value_type = expression_type(ops[2]);
  10230. auto store_type = value_type;
  10231. store_type.vecsize = 4;
  10232. // imageStore only accepts int coords, not uint.
  10233. auto coord_expr = to_expression(ops[1]);
  10234. auto target_coord_type = expression_type(ops[1]);
  10235. target_coord_type.basetype = SPIRType::Int;
  10236. coord_expr = bitcast_expression(target_coord_type, expression_type(ops[1]).basetype, coord_expr);
  10237. if (type.image.ms)
  10238. {
  10239. uint32_t operands = ops[3];
  10240. if (operands != ImageOperandsSampleMask || length != 5)
  10241. SPIRV_CROSS_THROW("Multisampled image used in OpImageWrite, but unexpected operand mask was used.");
  10242. uint32_t samples = ops[4];
  10243. statement("imageStore(", to_expression(ops[0]), ", ", coord_expr, ", ", to_expression(samples), ", ",
  10244. remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");");
  10245. }
  10246. else
  10247. statement("imageStore(", to_expression(ops[0]), ", ", coord_expr, ", ",
  10248. remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");");
  10249. if (var && variable_storage_is_aliased(*var))
  10250. flush_all_aliased_variables();
  10251. break;
  10252. }
  10253. case OpImageQuerySize:
  10254. {
  10255. auto &type = expression_type(ops[2]);
  10256. uint32_t result_type = ops[0];
  10257. uint32_t id = ops[1];
  10258. if (type.basetype == SPIRType::Image)
  10259. {
  10260. string expr;
  10261. if (type.image.sampled == 2)
  10262. {
  10263. if (!options.es && options.version < 430)
  10264. require_extension_internal("GL_ARB_shader_image_size");
  10265. else if (options.es && options.version < 310)
  10266. SPIRV_CROSS_THROW("At least ESSL 3.10 required for imageSize.");
  10267. // The size of an image is always constant.
  10268. expr = join("imageSize(", to_expression(ops[2]), ")");
  10269. }
  10270. else
  10271. {
  10272. // This path is hit for samplerBuffers and multisampled images which do not have LOD.
  10273. std::string fname = "textureSize";
  10274. if (is_legacy())
  10275. {
  10276. auto &imgtype = get<SPIRType>(type.self);
  10277. fname = legacy_tex_op(fname, imgtype, ops[2]);
  10278. }
  10279. expr = join(fname, "(", convert_separate_image_to_expression(ops[2]), ")");
  10280. }
  10281. auto &restype = get<SPIRType>(ops[0]);
  10282. expr = bitcast_expression(restype, SPIRType::Int, expr);
  10283. emit_op(result_type, id, expr, true);
  10284. }
  10285. else
  10286. SPIRV_CROSS_THROW("Invalid type for OpImageQuerySize.");
  10287. break;
  10288. }
  10289. // Compute
  10290. case OpControlBarrier:
  10291. case OpMemoryBarrier:
  10292. {
  10293. uint32_t execution_scope = 0;
  10294. uint32_t memory;
  10295. uint32_t semantics;
  10296. if (opcode == OpMemoryBarrier)
  10297. {
  10298. memory = evaluate_constant_u32(ops[0]);
  10299. semantics = evaluate_constant_u32(ops[1]);
  10300. }
  10301. else
  10302. {
  10303. execution_scope = evaluate_constant_u32(ops[0]);
  10304. memory = evaluate_constant_u32(ops[1]);
  10305. semantics = evaluate_constant_u32(ops[2]);
  10306. }
  10307. if (execution_scope == ScopeSubgroup || memory == ScopeSubgroup)
  10308. {
  10309. // OpControlBarrier with ScopeSubgroup is subgroupBarrier()
  10310. if (opcode != OpControlBarrier)
  10311. {
  10312. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMemBarrier);
  10313. }
  10314. else
  10315. {
  10316. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBarrier);
  10317. }
  10318. }
  10319. if (execution_scope != ScopeSubgroup && get_entry_point().model == ExecutionModelTessellationControl)
  10320. {
  10321. // Control shaders only have barriers, and it implies memory barriers.
  10322. if (opcode == OpControlBarrier)
  10323. statement("barrier();");
  10324. break;
  10325. }
  10326. // We only care about these flags, acquire/release and friends are not relevant to GLSL.
  10327. semantics = mask_relevant_memory_semantics(semantics);
  10328. if (opcode == OpMemoryBarrier)
  10329. {
  10330. // If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier
  10331. // does what we need, so we avoid redundant barriers.
  10332. const Instruction *next = get_next_instruction_in_block(instruction);
  10333. if (next && next->op == OpControlBarrier)
  10334. {
  10335. auto *next_ops = stream(*next);
  10336. uint32_t next_memory = evaluate_constant_u32(next_ops[1]);
  10337. uint32_t next_semantics = evaluate_constant_u32(next_ops[2]);
  10338. next_semantics = mask_relevant_memory_semantics(next_semantics);
  10339. bool memory_scope_covered = false;
  10340. if (next_memory == memory)
  10341. memory_scope_covered = true;
  10342. else if (next_semantics == MemorySemanticsWorkgroupMemoryMask)
  10343. {
  10344. // If we only care about workgroup memory, either Device or Workgroup scope is fine,
  10345. // scope does not have to match.
  10346. if ((next_memory == ScopeDevice || next_memory == ScopeWorkgroup) &&
  10347. (memory == ScopeDevice || memory == ScopeWorkgroup))
  10348. {
  10349. memory_scope_covered = true;
  10350. }
  10351. }
  10352. else if (memory == ScopeWorkgroup && next_memory == ScopeDevice)
  10353. {
  10354. // The control barrier has device scope, but the memory barrier just has workgroup scope.
  10355. memory_scope_covered = true;
  10356. }
  10357. // If we have the same memory scope, and all memory types are covered, we're good.
  10358. if (memory_scope_covered && (semantics & next_semantics) == semantics)
  10359. break;
  10360. }
  10361. }
  10362. // We are synchronizing some memory or syncing execution,
  10363. // so we cannot forward any loads beyond the memory barrier.
  10364. if (semantics || opcode == OpControlBarrier)
  10365. {
  10366. assert(current_emitting_block);
  10367. flush_control_dependent_expressions(current_emitting_block->self);
  10368. flush_all_active_variables();
  10369. }
  10370. if (memory == ScopeWorkgroup) // Only need to consider memory within a group
  10371. {
  10372. if (semantics == MemorySemanticsWorkgroupMemoryMask)
  10373. {
  10374. // OpControlBarrier implies a memory barrier for shared memory as well.
  10375. bool implies_shared_barrier = opcode == OpControlBarrier && execution_scope == ScopeWorkgroup;
  10376. if (!implies_shared_barrier)
  10377. statement("memoryBarrierShared();");
  10378. }
  10379. else if (semantics != 0)
  10380. statement("groupMemoryBarrier();");
  10381. }
  10382. else if (memory == ScopeSubgroup)
  10383. {
  10384. const uint32_t all_barriers =
  10385. MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask;
  10386. if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
  10387. {
  10388. // These are not relevant for GLSL, but assume it means memoryBarrier().
  10389. // memoryBarrier() does everything, so no need to test anything else.
  10390. statement("subgroupMemoryBarrier();");
  10391. }
  10392. else if ((semantics & all_barriers) == all_barriers)
  10393. {
  10394. // Short-hand instead of emitting 3 barriers.
  10395. statement("subgroupMemoryBarrier();");
  10396. }
  10397. else
  10398. {
  10399. // Pick out individual barriers.
  10400. if (semantics & MemorySemanticsWorkgroupMemoryMask)
  10401. statement("subgroupMemoryBarrierShared();");
  10402. if (semantics & MemorySemanticsUniformMemoryMask)
  10403. statement("subgroupMemoryBarrierBuffer();");
  10404. if (semantics & MemorySemanticsImageMemoryMask)
  10405. statement("subgroupMemoryBarrierImage();");
  10406. }
  10407. }
  10408. else
  10409. {
  10410. const uint32_t all_barriers =
  10411. MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask;
  10412. if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
  10413. {
  10414. // These are not relevant for GLSL, but assume it means memoryBarrier().
  10415. // memoryBarrier() does everything, so no need to test anything else.
  10416. statement("memoryBarrier();");
  10417. }
  10418. else if ((semantics & all_barriers) == all_barriers)
  10419. {
  10420. // Short-hand instead of emitting 4 barriers.
  10421. statement("memoryBarrier();");
  10422. }
  10423. else
  10424. {
  10425. // Pick out individual barriers.
  10426. if (semantics & MemorySemanticsWorkgroupMemoryMask)
  10427. statement("memoryBarrierShared();");
  10428. if (semantics & MemorySemanticsUniformMemoryMask)
  10429. statement("memoryBarrierBuffer();");
  10430. if (semantics & MemorySemanticsImageMemoryMask)
  10431. statement("memoryBarrierImage();");
  10432. }
  10433. }
  10434. if (opcode == OpControlBarrier)
  10435. {
  10436. if (execution_scope == ScopeSubgroup)
  10437. statement("subgroupBarrier();");
  10438. else
  10439. statement("barrier();");
  10440. }
  10441. break;
  10442. }
  10443. case OpExtInst:
  10444. {
  10445. uint32_t extension_set = ops[2];
  10446. if (get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL)
  10447. {
  10448. emit_glsl_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
  10449. }
  10450. else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_ballot)
  10451. {
  10452. emit_spv_amd_shader_ballot_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
  10453. }
  10454. else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter)
  10455. {
  10456. emit_spv_amd_shader_explicit_vertex_parameter_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
  10457. }
  10458. else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_trinary_minmax)
  10459. {
  10460. emit_spv_amd_shader_trinary_minmax_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
  10461. }
  10462. else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_gcn_shader)
  10463. {
  10464. emit_spv_amd_gcn_shader_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
  10465. }
  10466. else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_debug_info)
  10467. {
  10468. break; // Ignore SPIR-V debug information extended instructions.
  10469. }
  10470. else
  10471. {
  10472. statement("// unimplemented ext op ", instruction.op);
  10473. break;
  10474. }
  10475. break;
  10476. }
  10477. // Legacy sub-group stuff ...
  10478. case OpSubgroupBallotKHR:
  10479. {
  10480. uint32_t result_type = ops[0];
  10481. uint32_t id = ops[1];
  10482. string expr;
  10483. expr = join("uvec4(unpackUint2x32(ballotARB(" + to_expression(ops[2]) + ")), 0u, 0u)");
  10484. emit_op(result_type, id, expr, should_forward(ops[2]));
  10485. require_extension_internal("GL_ARB_shader_ballot");
  10486. inherit_expression_dependencies(id, ops[2]);
  10487. register_control_dependent_expression(ops[1]);
  10488. break;
  10489. }
  10490. case OpSubgroupFirstInvocationKHR:
  10491. {
  10492. uint32_t result_type = ops[0];
  10493. uint32_t id = ops[1];
  10494. emit_unary_func_op(result_type, id, ops[2], "readFirstInvocationARB");
  10495. require_extension_internal("GL_ARB_shader_ballot");
  10496. register_control_dependent_expression(ops[1]);
  10497. break;
  10498. }
  10499. case OpSubgroupReadInvocationKHR:
  10500. {
  10501. uint32_t result_type = ops[0];
  10502. uint32_t id = ops[1];
  10503. emit_binary_func_op(result_type, id, ops[2], ops[3], "readInvocationARB");
  10504. require_extension_internal("GL_ARB_shader_ballot");
  10505. register_control_dependent_expression(ops[1]);
  10506. break;
  10507. }
  10508. case OpSubgroupAllKHR:
  10509. {
  10510. uint32_t result_type = ops[0];
  10511. uint32_t id = ops[1];
  10512. emit_unary_func_op(result_type, id, ops[2], "allInvocationsARB");
  10513. require_extension_internal("GL_ARB_shader_group_vote");
  10514. register_control_dependent_expression(ops[1]);
  10515. break;
  10516. }
  10517. case OpSubgroupAnyKHR:
  10518. {
  10519. uint32_t result_type = ops[0];
  10520. uint32_t id = ops[1];
  10521. emit_unary_func_op(result_type, id, ops[2], "anyInvocationARB");
  10522. require_extension_internal("GL_ARB_shader_group_vote");
  10523. register_control_dependent_expression(ops[1]);
  10524. break;
  10525. }
  10526. case OpSubgroupAllEqualKHR:
  10527. {
  10528. uint32_t result_type = ops[0];
  10529. uint32_t id = ops[1];
  10530. emit_unary_func_op(result_type, id, ops[2], "allInvocationsEqualARB");
  10531. require_extension_internal("GL_ARB_shader_group_vote");
  10532. register_control_dependent_expression(ops[1]);
  10533. break;
  10534. }
  10535. case OpGroupIAddNonUniformAMD:
  10536. case OpGroupFAddNonUniformAMD:
  10537. {
  10538. uint32_t result_type = ops[0];
  10539. uint32_t id = ops[1];
  10540. emit_unary_func_op(result_type, id, ops[4], "addInvocationsNonUniformAMD");
  10541. require_extension_internal("GL_AMD_shader_ballot");
  10542. register_control_dependent_expression(ops[1]);
  10543. break;
  10544. }
  10545. case OpGroupFMinNonUniformAMD:
  10546. case OpGroupUMinNonUniformAMD:
  10547. case OpGroupSMinNonUniformAMD:
  10548. {
  10549. uint32_t result_type = ops[0];
  10550. uint32_t id = ops[1];
  10551. emit_unary_func_op(result_type, id, ops[4], "minInvocationsNonUniformAMD");
  10552. require_extension_internal("GL_AMD_shader_ballot");
  10553. register_control_dependent_expression(ops[1]);
  10554. break;
  10555. }
  10556. case OpGroupFMaxNonUniformAMD:
  10557. case OpGroupUMaxNonUniformAMD:
  10558. case OpGroupSMaxNonUniformAMD:
  10559. {
  10560. uint32_t result_type = ops[0];
  10561. uint32_t id = ops[1];
  10562. emit_unary_func_op(result_type, id, ops[4], "maxInvocationsNonUniformAMD");
  10563. require_extension_internal("GL_AMD_shader_ballot");
  10564. register_control_dependent_expression(ops[1]);
  10565. break;
  10566. }
  10567. case OpFragmentMaskFetchAMD:
  10568. {
  10569. auto &type = expression_type(ops[2]);
  10570. uint32_t result_type = ops[0];
  10571. uint32_t id = ops[1];
  10572. if (type.image.dim == spv::DimSubpassData)
  10573. {
  10574. emit_unary_func_op(result_type, id, ops[2], "fragmentMaskFetchAMD");
  10575. }
  10576. else
  10577. {
  10578. emit_binary_func_op(result_type, id, ops[2], ops[3], "fragmentMaskFetchAMD");
  10579. }
  10580. require_extension_internal("GL_AMD_shader_fragment_mask");
  10581. break;
  10582. }
  10583. case OpFragmentFetchAMD:
  10584. {
  10585. auto &type = expression_type(ops[2]);
  10586. uint32_t result_type = ops[0];
  10587. uint32_t id = ops[1];
  10588. if (type.image.dim == spv::DimSubpassData)
  10589. {
  10590. emit_binary_func_op(result_type, id, ops[2], ops[4], "fragmentFetchAMD");
  10591. }
  10592. else
  10593. {
  10594. emit_trinary_func_op(result_type, id, ops[2], ops[3], ops[4], "fragmentFetchAMD");
  10595. }
  10596. require_extension_internal("GL_AMD_shader_fragment_mask");
  10597. break;
  10598. }
  10599. // Vulkan 1.1 sub-group stuff ...
  10600. case OpGroupNonUniformElect:
  10601. case OpGroupNonUniformBroadcast:
  10602. case OpGroupNonUniformBroadcastFirst:
  10603. case OpGroupNonUniformBallot:
  10604. case OpGroupNonUniformInverseBallot:
  10605. case OpGroupNonUniformBallotBitExtract:
  10606. case OpGroupNonUniformBallotBitCount:
  10607. case OpGroupNonUniformBallotFindLSB:
  10608. case OpGroupNonUniformBallotFindMSB:
  10609. case OpGroupNonUniformShuffle:
  10610. case OpGroupNonUniformShuffleXor:
  10611. case OpGroupNonUniformShuffleUp:
  10612. case OpGroupNonUniformShuffleDown:
  10613. case OpGroupNonUniformAll:
  10614. case OpGroupNonUniformAny:
  10615. case OpGroupNonUniformAllEqual:
  10616. case OpGroupNonUniformFAdd:
  10617. case OpGroupNonUniformIAdd:
  10618. case OpGroupNonUniformFMul:
  10619. case OpGroupNonUniformIMul:
  10620. case OpGroupNonUniformFMin:
  10621. case OpGroupNonUniformFMax:
  10622. case OpGroupNonUniformSMin:
  10623. case OpGroupNonUniformSMax:
  10624. case OpGroupNonUniformUMin:
  10625. case OpGroupNonUniformUMax:
  10626. case OpGroupNonUniformBitwiseAnd:
  10627. case OpGroupNonUniformBitwiseOr:
  10628. case OpGroupNonUniformBitwiseXor:
  10629. case OpGroupNonUniformQuadSwap:
  10630. case OpGroupNonUniformQuadBroadcast:
  10631. emit_subgroup_op(instruction);
  10632. break;
  10633. case OpFUnordEqual:
  10634. case OpFUnordNotEqual:
  10635. case OpFUnordLessThan:
  10636. case OpFUnordGreaterThan:
  10637. case OpFUnordLessThanEqual:
  10638. case OpFUnordGreaterThanEqual:
  10639. {
  10640. // GLSL doesn't specify if floating point comparisons are ordered or unordered,
  10641. // but glslang always emits ordered floating point compares for GLSL.
  10642. // To get unordered compares, we can test the opposite thing and invert the result.
  10643. // This way, we force true when there is any NaN present.
  10644. uint32_t op0 = ops[2];
  10645. uint32_t op1 = ops[3];
  10646. string expr;
  10647. if (expression_type(op0).vecsize > 1)
  10648. {
  10649. const char *comp_op = nullptr;
  10650. switch (opcode)
  10651. {
  10652. case OpFUnordEqual:
  10653. comp_op = "notEqual";
  10654. break;
  10655. case OpFUnordNotEqual:
  10656. comp_op = "equal";
  10657. break;
  10658. case OpFUnordLessThan:
  10659. comp_op = "greaterThanEqual";
  10660. break;
  10661. case OpFUnordLessThanEqual:
  10662. comp_op = "greaterThan";
  10663. break;
  10664. case OpFUnordGreaterThan:
  10665. comp_op = "lessThanEqual";
  10666. break;
  10667. case OpFUnordGreaterThanEqual:
  10668. comp_op = "lessThan";
  10669. break;
  10670. default:
  10671. assert(0);
  10672. break;
  10673. }
  10674. expr = join("not(", comp_op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), "))");
  10675. }
  10676. else
  10677. {
  10678. const char *comp_op = nullptr;
  10679. switch (opcode)
  10680. {
  10681. case OpFUnordEqual:
  10682. comp_op = " != ";
  10683. break;
  10684. case OpFUnordNotEqual:
  10685. comp_op = " == ";
  10686. break;
  10687. case OpFUnordLessThan:
  10688. comp_op = " >= ";
  10689. break;
  10690. case OpFUnordLessThanEqual:
  10691. comp_op = " > ";
  10692. break;
  10693. case OpFUnordGreaterThan:
  10694. comp_op = " <= ";
  10695. break;
  10696. case OpFUnordGreaterThanEqual:
  10697. comp_op = " < ";
  10698. break;
  10699. default:
  10700. assert(0);
  10701. break;
  10702. }
  10703. expr = join("!(", to_enclosed_unpacked_expression(op0), comp_op, to_enclosed_unpacked_expression(op1), ")");
  10704. }
  10705. emit_op(ops[0], ops[1], expr, should_forward(op0) && should_forward(op1));
  10706. inherit_expression_dependencies(ops[1], op0);
  10707. inherit_expression_dependencies(ops[1], op1);
  10708. break;
  10709. }
  10710. case OpReportIntersectionKHR:
  10711. // NV is same opcode.
  10712. forced_temporaries.insert(ops[1]);
  10713. if (ray_tracing_is_khr)
  10714. GLSL_BFOP(reportIntersectionEXT);
  10715. else
  10716. GLSL_BFOP(reportIntersectionNV);
  10717. flush_control_dependent_expressions(current_emitting_block->self);
  10718. break;
  10719. case OpIgnoreIntersectionNV:
  10720. // KHR variant is a terminator.
  10721. statement("ignoreIntersectionNV();");
  10722. flush_control_dependent_expressions(current_emitting_block->self);
  10723. break;
  10724. case OpTerminateRayNV:
  10725. // KHR variant is a terminator.
  10726. statement("terminateRayNV();");
  10727. flush_control_dependent_expressions(current_emitting_block->self);
  10728. break;
  10729. case OpTraceNV:
  10730. if (has_decoration(ops[0], DecorationNonUniformEXT))
  10731. propagate_nonuniform_qualifier(ops[0]);
  10732. statement("traceNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ",
  10733. to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ",
  10734. to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ",
  10735. to_expression(ops[9]), ", ", to_expression(ops[10]), ");");
  10736. flush_control_dependent_expressions(current_emitting_block->self);
  10737. break;
  10738. case OpTraceRayKHR:
  10739. if (!has_decoration(ops[10], DecorationLocation))
  10740. SPIRV_CROSS_THROW("A memory declaration object must be used in TraceRayKHR.");
  10741. if (has_decoration(ops[0], DecorationNonUniformEXT))
  10742. propagate_nonuniform_qualifier(ops[0]);
  10743. statement("traceRayEXT(", to_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ",
  10744. to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ",
  10745. to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ",
  10746. to_expression(ops[9]), ", ", get_decoration(ops[10], DecorationLocation), ");");
  10747. flush_control_dependent_expressions(current_emitting_block->self);
  10748. break;
  10749. case OpExecuteCallableNV:
  10750. statement("executeCallableNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");");
  10751. flush_control_dependent_expressions(current_emitting_block->self);
  10752. break;
  10753. case OpExecuteCallableKHR:
  10754. if (!has_decoration(ops[1], DecorationLocation))
  10755. SPIRV_CROSS_THROW("A memory declaration object must be used in ExecuteCallableKHR.");
  10756. statement("executeCallableEXT(", to_expression(ops[0]), ", ", get_decoration(ops[1], DecorationLocation), ");");
  10757. flush_control_dependent_expressions(current_emitting_block->self);
  10758. break;
  10759. case OpConvertUToAccelerationStructureKHR:
  10760. GLSL_UFOP(accelerationStructureEXT);
  10761. break;
  10762. case OpConvertUToPtr:
  10763. {
  10764. auto &type = get<SPIRType>(ops[0]);
  10765. if (type.storage != StorageClassPhysicalStorageBufferEXT)
  10766. SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertUToPtr.");
  10767. auto op = type_to_glsl(type);
  10768. emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str());
  10769. break;
  10770. }
  10771. case OpConvertPtrToU:
  10772. {
  10773. auto &type = get<SPIRType>(ops[0]);
  10774. auto &ptr_type = expression_type(ops[2]);
  10775. if (ptr_type.storage != StorageClassPhysicalStorageBufferEXT)
  10776. SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertPtrToU.");
  10777. auto op = type_to_glsl(type);
  10778. emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str());
  10779. break;
  10780. }
  10781. case OpUndef:
  10782. // Undefined value has been declared.
  10783. break;
  10784. case OpLine:
  10785. {
  10786. emit_line_directive(ops[0], ops[1]);
  10787. break;
  10788. }
  10789. case OpNoLine:
  10790. break;
  10791. case OpDemoteToHelperInvocationEXT:
  10792. if (!options.vulkan_semantics)
  10793. SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL.");
  10794. require_extension_internal("GL_EXT_demote_to_helper_invocation");
  10795. statement(backend.demote_literal, ";");
  10796. break;
  10797. case OpIsHelperInvocationEXT:
  10798. if (!options.vulkan_semantics)
  10799. SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL.");
  10800. require_extension_internal("GL_EXT_demote_to_helper_invocation");
  10801. emit_op(ops[0], ops[1], "helperInvocationEXT()", false);
  10802. break;
  10803. case OpBeginInvocationInterlockEXT:
  10804. // If the interlock is complex, we emit this elsewhere.
  10805. if (!interlocked_is_complex)
  10806. {
  10807. if (options.es)
  10808. statement("beginInvocationInterlockNV();");
  10809. else
  10810. statement("beginInvocationInterlockARB();");
  10811. flush_all_active_variables();
  10812. // Make sure forwarding doesn't propagate outside interlock region.
  10813. }
  10814. break;
  10815. case OpEndInvocationInterlockEXT:
  10816. // If the interlock is complex, we emit this elsewhere.
  10817. if (!interlocked_is_complex)
  10818. {
  10819. if (options.es)
  10820. statement("endInvocationInterlockNV();");
  10821. else
  10822. statement("endInvocationInterlockARB();");
  10823. flush_all_active_variables();
  10824. // Make sure forwarding doesn't propagate outside interlock region.
  10825. }
  10826. break;
  10827. default:
  10828. statement("// unimplemented op ", instruction.op);
  10829. break;
  10830. }
  10831. }
  10832. // Appends function arguments, mapped from global variables, beyond the specified arg index.
  10833. // This is used when a function call uses fewer arguments than the function defines.
  10834. // This situation may occur if the function signature has been dynamically modified to
  10835. // extract global variables referenced from within the function, and convert them to
  10836. // function arguments. This is necessary for shader languages that do not support global
  10837. // access to shader input content from within a function (eg. Metal). Each additional
  10838. // function args uses the name of the global variable. Function nesting will modify the
  10839. // functions and function calls all the way up the nesting chain.
  10840. void CompilerGLSL::append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector<string> &arglist)
  10841. {
  10842. auto &args = func.arguments;
  10843. uint32_t arg_cnt = uint32_t(args.size());
  10844. for (uint32_t arg_idx = index; arg_idx < arg_cnt; arg_idx++)
  10845. {
  10846. auto &arg = args[arg_idx];
  10847. assert(arg.alias_global_variable);
  10848. // If the underlying variable needs to be declared
  10849. // (ie. a local variable with deferred declaration), do so now.
  10850. uint32_t var_id = get<SPIRVariable>(arg.id).basevariable;
  10851. if (var_id)
  10852. flush_variable_declaration(var_id);
  10853. arglist.push_back(to_func_call_arg(arg, arg.id));
  10854. }
  10855. }
  10856. string CompilerGLSL::to_member_name(const SPIRType &type, uint32_t index)
  10857. {
  10858. if (type.type_alias != TypeID(0) &&
  10859. !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
  10860. {
  10861. return to_member_name(get<SPIRType>(type.type_alias), index);
  10862. }
  10863. auto &memb = ir.meta[type.self].members;
  10864. if (index < memb.size() && !memb[index].alias.empty())
  10865. return memb[index].alias;
  10866. else
  10867. return join("_m", index);
  10868. }
  10869. string CompilerGLSL::to_member_reference(uint32_t, const SPIRType &type, uint32_t index, bool)
  10870. {
  10871. return join(".", to_member_name(type, index));
  10872. }
  10873. string CompilerGLSL::to_multi_member_reference(const SPIRType &type, const SmallVector<uint32_t> &indices)
  10874. {
  10875. string ret;
  10876. auto *member_type = &type;
  10877. for (auto &index : indices)
  10878. {
  10879. ret += join(".", to_member_name(*member_type, index));
  10880. member_type = &get<SPIRType>(member_type->member_types[index]);
  10881. }
  10882. return ret;
  10883. }
  10884. void CompilerGLSL::add_member_name(SPIRType &type, uint32_t index)
  10885. {
  10886. auto &memb = ir.meta[type.self].members;
  10887. if (index < memb.size() && !memb[index].alias.empty())
  10888. {
  10889. auto &name = memb[index].alias;
  10890. if (name.empty())
  10891. return;
  10892. ParsedIR::sanitize_identifier(name, true, true);
  10893. update_name_cache(type.member_name_cache, name);
  10894. }
  10895. }
  10896. // Checks whether the ID is a row_major matrix that requires conversion before use
  10897. bool CompilerGLSL::is_non_native_row_major_matrix(uint32_t id)
  10898. {
  10899. // Natively supported row-major matrices do not need to be converted.
  10900. // Legacy targets do not support row major.
  10901. if (backend.native_row_major_matrix && !is_legacy())
  10902. return false;
  10903. auto *e = maybe_get<SPIRExpression>(id);
  10904. if (e)
  10905. return e->need_transpose;
  10906. else
  10907. return has_decoration(id, DecorationRowMajor);
  10908. }
  10909. // Checks whether the member is a row_major matrix that requires conversion before use
  10910. bool CompilerGLSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index)
  10911. {
  10912. // Natively supported row-major matrices do not need to be converted.
  10913. if (backend.native_row_major_matrix && !is_legacy())
  10914. return false;
  10915. // Non-matrix or column-major matrix types do not need to be converted.
  10916. if (!has_member_decoration(type.self, index, DecorationRowMajor))
  10917. return false;
  10918. // Only square row-major matrices can be converted at this time.
  10919. // Converting non-square matrices will require defining custom GLSL function that
  10920. // swaps matrix elements while retaining the original dimensional form of the matrix.
  10921. const auto mbr_type = get<SPIRType>(type.member_types[index]);
  10922. if (mbr_type.columns != mbr_type.vecsize)
  10923. SPIRV_CROSS_THROW("Row-major matrices must be square on this platform.");
  10924. return true;
  10925. }
  10926. // Checks if we need to remap physical type IDs when declaring the type in a buffer.
  10927. bool CompilerGLSL::member_is_remapped_physical_type(const SPIRType &type, uint32_t index) const
  10928. {
  10929. return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID);
  10930. }
  10931. // Checks whether the member is in packed data type, that might need to be unpacked.
  10932. bool CompilerGLSL::member_is_packed_physical_type(const SPIRType &type, uint32_t index) const
  10933. {
  10934. return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypePacked);
  10935. }
  10936. // Wraps the expression string in a function call that converts the
  10937. // row_major matrix result of the expression to a column_major matrix.
  10938. // Base implementation uses the standard library transpose() function.
  10939. // Subclasses may override to use a different function.
  10940. string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, uint32_t /* physical_type_id */,
  10941. bool /*is_packed*/)
  10942. {
  10943. strip_enclosed_expression(exp_str);
  10944. if (!is_matrix(exp_type))
  10945. {
  10946. auto column_index = exp_str.find_last_of('[');
  10947. if (column_index == string::npos)
  10948. return exp_str;
  10949. auto column_expr = exp_str.substr(column_index);
  10950. exp_str.resize(column_index);
  10951. auto transposed_expr = type_to_glsl_constructor(exp_type) + "(";
  10952. // Loading a column from a row-major matrix. Unroll the load.
  10953. for (uint32_t c = 0; c < exp_type.vecsize; c++)
  10954. {
  10955. transposed_expr += join(exp_str, '[', c, ']', column_expr);
  10956. if (c + 1 < exp_type.vecsize)
  10957. transposed_expr += ", ";
  10958. }
  10959. transposed_expr += ")";
  10960. return transposed_expr;
  10961. }
  10962. else if (options.version < 120)
  10963. {
  10964. // GLSL 110, ES 100 do not have transpose(), so emulate it. Note that
  10965. // these GLSL versions do not support non-square matrices.
  10966. if (exp_type.vecsize == 2 && exp_type.columns == 2)
  10967. {
  10968. if (!requires_transpose_2x2)
  10969. {
  10970. requires_transpose_2x2 = true;
  10971. force_recompile();
  10972. }
  10973. }
  10974. else if (exp_type.vecsize == 3 && exp_type.columns == 3)
  10975. {
  10976. if (!requires_transpose_3x3)
  10977. {
  10978. requires_transpose_3x3 = true;
  10979. force_recompile();
  10980. }
  10981. }
  10982. else if (exp_type.vecsize == 4 && exp_type.columns == 4)
  10983. {
  10984. if (!requires_transpose_4x4)
  10985. {
  10986. requires_transpose_4x4 = true;
  10987. force_recompile();
  10988. }
  10989. }
  10990. else
  10991. SPIRV_CROSS_THROW("Non-square matrices are not supported in legacy GLSL, cannot transpose.");
  10992. return join("spvTranspose(", exp_str, ")");
  10993. }
  10994. else
  10995. return join("transpose(", exp_str, ")");
  10996. }
  10997. string CompilerGLSL::variable_decl(const SPIRType &type, const string &name, uint32_t id)
  10998. {
  10999. string type_name = type_to_glsl(type, id);
  11000. remap_variable_type_name(type, name, type_name);
  11001. return join(type_name, " ", name, type_to_array_glsl(type));
  11002. }
  11003. // Emit a structure member. Subclasses may override to modify output,
  11004. // or to dynamically add a padding member if needed.
  11005. void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
  11006. const string &qualifier, uint32_t)
  11007. {
  11008. auto &membertype = get<SPIRType>(member_type_id);
  11009. Bitset memberflags;
  11010. auto &memb = ir.meta[type.self].members;
  11011. if (index < memb.size())
  11012. memberflags = memb[index].decoration_flags;
  11013. string qualifiers;
  11014. bool is_block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
  11015. ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
  11016. if (is_block)
  11017. qualifiers = to_interpolation_qualifiers(memberflags);
  11018. statement(layout_for_member(type, index), qualifiers, qualifier, flags_to_qualifiers_glsl(membertype, memberflags),
  11019. variable_decl(membertype, to_member_name(type, index)), ";");
  11020. }
  11021. void CompilerGLSL::emit_struct_padding_target(const SPIRType &)
  11022. {
  11023. }
  11024. const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags)
  11025. {
  11026. // GL_EXT_buffer_reference variables can be marked as restrict.
  11027. if (flags.get(DecorationRestrictPointerEXT))
  11028. return "restrict ";
  11029. // Structs do not have precision qualifiers, neither do doubles (desktop only anyways, so no mediump/highp).
  11030. if (type.basetype != SPIRType::Float && type.basetype != SPIRType::Int && type.basetype != SPIRType::UInt &&
  11031. type.basetype != SPIRType::Image && type.basetype != SPIRType::SampledImage &&
  11032. type.basetype != SPIRType::Sampler)
  11033. return "";
  11034. if (options.es)
  11035. {
  11036. auto &execution = get_entry_point();
  11037. if (flags.get(DecorationRelaxedPrecision))
  11038. {
  11039. bool implied_fmediump = type.basetype == SPIRType::Float &&
  11040. options.fragment.default_float_precision == Options::Mediump &&
  11041. execution.model == ExecutionModelFragment;
  11042. bool implied_imediump = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
  11043. options.fragment.default_int_precision == Options::Mediump &&
  11044. execution.model == ExecutionModelFragment;
  11045. return implied_fmediump || implied_imediump ? "" : "mediump ";
  11046. }
  11047. else
  11048. {
  11049. bool implied_fhighp =
  11050. type.basetype == SPIRType::Float && ((options.fragment.default_float_precision == Options::Highp &&
  11051. execution.model == ExecutionModelFragment) ||
  11052. (execution.model != ExecutionModelFragment));
  11053. bool implied_ihighp = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
  11054. ((options.fragment.default_int_precision == Options::Highp &&
  11055. execution.model == ExecutionModelFragment) ||
  11056. (execution.model != ExecutionModelFragment));
  11057. return implied_fhighp || implied_ihighp ? "" : "highp ";
  11058. }
  11059. }
  11060. else if (backend.allow_precision_qualifiers)
  11061. {
  11062. // Vulkan GLSL supports precision qualifiers, even in desktop profiles, which is convenient.
  11063. // The default is highp however, so only emit mediump in the rare case that a shader has these.
  11064. if (flags.get(DecorationRelaxedPrecision))
  11065. return "mediump ";
  11066. else
  11067. return "";
  11068. }
  11069. else
  11070. return "";
  11071. }
  11072. const char *CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id)
  11073. {
  11074. auto &type = expression_type(id);
  11075. bool use_precision_qualifiers = backend.allow_precision_qualifiers || options.es;
  11076. if (use_precision_qualifiers && (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage))
  11077. {
  11078. // Force mediump for the sampler type. We cannot declare 16-bit or smaller image types.
  11079. auto &result_type = get<SPIRType>(type.image.type);
  11080. if (result_type.width < 32)
  11081. return "mediump ";
  11082. }
  11083. return flags_to_qualifiers_glsl(type, ir.meta[id].decoration.decoration_flags);
  11084. }
  11085. void CompilerGLSL::fixup_io_block_patch_qualifiers(const SPIRVariable &var)
  11086. {
  11087. // Works around weird behavior in glslangValidator where
  11088. // a patch out block is translated to just block members getting the decoration.
  11089. // To make glslang not complain when we compile again, we have to transform this back to a case where
  11090. // the variable itself has Patch decoration, and not members.
  11091. auto &type = get<SPIRType>(var.basetype);
  11092. if (has_decoration(type.self, DecorationBlock))
  11093. {
  11094. uint32_t member_count = uint32_t(type.member_types.size());
  11095. for (uint32_t i = 0; i < member_count; i++)
  11096. {
  11097. if (has_member_decoration(type.self, i, DecorationPatch))
  11098. {
  11099. set_decoration(var.self, DecorationPatch);
  11100. break;
  11101. }
  11102. }
  11103. if (has_decoration(var.self, DecorationPatch))
  11104. for (uint32_t i = 0; i < member_count; i++)
  11105. unset_member_decoration(type.self, i, DecorationPatch);
  11106. }
  11107. }
  11108. string CompilerGLSL::to_qualifiers_glsl(uint32_t id)
  11109. {
  11110. auto &flags = ir.meta[id].decoration.decoration_flags;
  11111. string res;
  11112. auto *var = maybe_get<SPIRVariable>(id);
  11113. if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied)
  11114. res += "shared ";
  11115. res += to_interpolation_qualifiers(flags);
  11116. if (var)
  11117. res += to_storage_qualifiers_glsl(*var);
  11118. auto &type = expression_type(id);
  11119. if (type.image.dim != DimSubpassData && type.image.sampled == 2)
  11120. {
  11121. if (flags.get(DecorationCoherent))
  11122. res += "coherent ";
  11123. if (flags.get(DecorationRestrict))
  11124. res += "restrict ";
  11125. if (flags.get(DecorationNonWritable))
  11126. res += "readonly ";
  11127. if (flags.get(DecorationNonReadable))
  11128. res += "writeonly ";
  11129. }
  11130. res += to_precision_qualifiers_glsl(id);
  11131. return res;
  11132. }
  11133. string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg)
  11134. {
  11135. // glslangValidator seems to make all arguments pointer no matter what which is rather bizarre ...
  11136. auto &type = expression_type(arg.id);
  11137. const char *direction = "";
  11138. if (type.pointer)
  11139. {
  11140. if (arg.write_count && arg.read_count)
  11141. direction = "inout ";
  11142. else if (arg.write_count)
  11143. direction = "out ";
  11144. }
  11145. return join(direction, to_qualifiers_glsl(arg.id), variable_decl(type, to_name(arg.id), arg.id));
  11146. }
  11147. string CompilerGLSL::to_initializer_expression(const SPIRVariable &var)
  11148. {
  11149. return to_expression(var.initializer);
  11150. }
  11151. string CompilerGLSL::to_zero_initialized_expression(uint32_t type_id)
  11152. {
  11153. #ifndef NDEBUG
  11154. auto &type = get<SPIRType>(type_id);
  11155. assert(type.storage == StorageClassPrivate || type.storage == StorageClassFunction ||
  11156. type.storage == StorageClassGeneric);
  11157. #endif
  11158. uint32_t id = ir.increase_bound_by(1);
  11159. ir.make_constant_null(id, type_id, false);
  11160. return constant_expression(get<SPIRConstant>(id));
  11161. }
  11162. bool CompilerGLSL::type_can_zero_initialize(const SPIRType &type) const
  11163. {
  11164. if (type.pointer)
  11165. return false;
  11166. if (!type.array.empty() && options.flatten_multidimensional_arrays)
  11167. return false;
  11168. for (auto &literal : type.array_size_literal)
  11169. if (!literal)
  11170. return false;
  11171. for (auto &memb : type.member_types)
  11172. if (!type_can_zero_initialize(get<SPIRType>(memb)))
  11173. return false;
  11174. return true;
  11175. }
  11176. string CompilerGLSL::variable_decl(const SPIRVariable &variable)
  11177. {
  11178. // Ignore the pointer type since GLSL doesn't have pointers.
  11179. auto &type = get_variable_data_type(variable);
  11180. if (type.pointer_depth > 1)
  11181. SPIRV_CROSS_THROW("Cannot declare pointer-to-pointer types.");
  11182. auto res = join(to_qualifiers_glsl(variable.self), variable_decl(type, to_name(variable.self), variable.self));
  11183. if (variable.loop_variable && variable.static_expression)
  11184. {
  11185. uint32_t expr = variable.static_expression;
  11186. if (ir.ids[expr].get_type() != TypeUndef)
  11187. res += join(" = ", to_expression(variable.static_expression));
  11188. else if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
  11189. res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable)));
  11190. }
  11191. else if (variable.initializer)
  11192. {
  11193. uint32_t expr = variable.initializer;
  11194. if (ir.ids[expr].get_type() != TypeUndef)
  11195. res += join(" = ", to_initializer_expression(variable));
  11196. else if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
  11197. res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable)));
  11198. }
  11199. return res;
  11200. }
  11201. const char *CompilerGLSL::to_pls_qualifiers_glsl(const SPIRVariable &variable)
  11202. {
  11203. auto &flags = ir.meta[variable.self].decoration.decoration_flags;
  11204. if (flags.get(DecorationRelaxedPrecision))
  11205. return "mediump ";
  11206. else
  11207. return "highp ";
  11208. }
  11209. string CompilerGLSL::pls_decl(const PlsRemap &var)
  11210. {
  11211. auto &variable = get<SPIRVariable>(var.id);
  11212. SPIRType type;
  11213. type.vecsize = pls_format_to_components(var.format);
  11214. type.basetype = pls_format_to_basetype(var.format);
  11215. return join(to_pls_layout(var.format), to_pls_qualifiers_glsl(variable), type_to_glsl(type), " ",
  11216. to_name(variable.self));
  11217. }
  11218. uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type) const
  11219. {
  11220. return to_array_size_literal(type, uint32_t(type.array.size() - 1));
  11221. }
  11222. uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type, uint32_t index) const
  11223. {
  11224. assert(type.array.size() == type.array_size_literal.size());
  11225. if (type.array_size_literal[index])
  11226. {
  11227. return type.array[index];
  11228. }
  11229. else
  11230. {
  11231. // Use the default spec constant value.
  11232. // This is the best we can do.
  11233. return evaluate_constant_u32(type.array[index]);
  11234. }
  11235. }
  11236. string CompilerGLSL::to_array_size(const SPIRType &type, uint32_t index)
  11237. {
  11238. assert(type.array.size() == type.array_size_literal.size());
  11239. auto &size = type.array[index];
  11240. if (!type.array_size_literal[index])
  11241. return to_expression(size);
  11242. else if (size)
  11243. return convert_to_string(size);
  11244. else if (!backend.unsized_array_supported)
  11245. {
  11246. // For runtime-sized arrays, we can work around
  11247. // lack of standard support for this by simply having
  11248. // a single element array.
  11249. //
  11250. // Runtime length arrays must always be the last element
  11251. // in an interface block.
  11252. return "1";
  11253. }
  11254. else
  11255. return "";
  11256. }
  11257. string CompilerGLSL::type_to_array_glsl(const SPIRType &type)
  11258. {
  11259. if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct)
  11260. {
  11261. // We are using a wrapped pointer type, and we should not emit any array declarations here.
  11262. return "";
  11263. }
  11264. if (type.array.empty())
  11265. return "";
  11266. if (options.flatten_multidimensional_arrays)
  11267. {
  11268. string res;
  11269. res += "[";
  11270. for (auto i = uint32_t(type.array.size()); i; i--)
  11271. {
  11272. res += enclose_expression(to_array_size(type, i - 1));
  11273. if (i > 1)
  11274. res += " * ";
  11275. }
  11276. res += "]";
  11277. return res;
  11278. }
  11279. else
  11280. {
  11281. if (type.array.size() > 1)
  11282. {
  11283. if (!options.es && options.version < 430)
  11284. require_extension_internal("GL_ARB_arrays_of_arrays");
  11285. else if (options.es && options.version < 310)
  11286. SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310. "
  11287. "Try using --flatten-multidimensional-arrays or set "
  11288. "options.flatten_multidimensional_arrays to true.");
  11289. }
  11290. string res;
  11291. for (auto i = uint32_t(type.array.size()); i; i--)
  11292. {
  11293. res += "[";
  11294. res += to_array_size(type, i - 1);
  11295. res += "]";
  11296. }
  11297. return res;
  11298. }
  11299. }
  11300. string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id)
  11301. {
  11302. auto &imagetype = get<SPIRType>(type.image.type);
  11303. string res;
  11304. switch (imagetype.basetype)
  11305. {
  11306. case SPIRType::Int:
  11307. case SPIRType::Short:
  11308. case SPIRType::SByte:
  11309. res = "i";
  11310. break;
  11311. case SPIRType::UInt:
  11312. case SPIRType::UShort:
  11313. case SPIRType::UByte:
  11314. res = "u";
  11315. break;
  11316. default:
  11317. break;
  11318. }
  11319. // For half image types, we will force mediump for the sampler, and cast to f16 after any sampling operation.
  11320. // We cannot express a true half texture type in GLSL. Neither for short integer formats for that matter.
  11321. if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics)
  11322. return res + "subpassInput" + (type.image.ms ? "MS" : "");
  11323. else if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData &&
  11324. subpass_input_is_framebuffer_fetch(id))
  11325. {
  11326. SPIRType sampled_type = get<SPIRType>(type.image.type);
  11327. sampled_type.vecsize = 4;
  11328. return type_to_glsl(sampled_type);
  11329. }
  11330. // If we're emulating subpassInput with samplers, force sampler2D
  11331. // so we don't have to specify format.
  11332. if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData)
  11333. {
  11334. // Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V.
  11335. if (type.image.dim == DimBuffer && type.image.sampled == 1)
  11336. res += "sampler";
  11337. else
  11338. res += type.image.sampled == 2 ? "image" : "texture";
  11339. }
  11340. else
  11341. res += "sampler";
  11342. switch (type.image.dim)
  11343. {
  11344. case Dim1D:
  11345. res += "1D";
  11346. break;
  11347. case Dim2D:
  11348. res += "2D";
  11349. break;
  11350. case Dim3D:
  11351. res += "3D";
  11352. break;
  11353. case DimCube:
  11354. res += "Cube";
  11355. break;
  11356. case DimRect:
  11357. if (options.es)
  11358. SPIRV_CROSS_THROW("Rectangle textures are not supported on OpenGL ES.");
  11359. if (is_legacy_desktop())
  11360. require_extension_internal("GL_ARB_texture_rectangle");
  11361. res += "2DRect";
  11362. break;
  11363. case DimBuffer:
  11364. if (options.es && options.version < 320)
  11365. require_extension_internal("GL_OES_texture_buffer");
  11366. else if (!options.es && options.version < 300)
  11367. require_extension_internal("GL_EXT_texture_buffer_object");
  11368. res += "Buffer";
  11369. break;
  11370. case DimSubpassData:
  11371. res += "2D";
  11372. break;
  11373. default:
  11374. SPIRV_CROSS_THROW("Only 1D, 2D, 2DRect, 3D, Buffer, InputTarget and Cube textures supported.");
  11375. }
  11376. if (type.image.ms)
  11377. res += "MS";
  11378. if (type.image.arrayed)
  11379. {
  11380. if (is_legacy_desktop())
  11381. require_extension_internal("GL_EXT_texture_array");
  11382. res += "Array";
  11383. }
  11384. // "Shadow" state in GLSL only exists for samplers and combined image samplers.
  11385. if (((type.basetype == SPIRType::SampledImage) || (type.basetype == SPIRType::Sampler)) &&
  11386. image_is_comparison(type, id))
  11387. {
  11388. res += "Shadow";
  11389. }
  11390. return res;
  11391. }
  11392. string CompilerGLSL::type_to_glsl_constructor(const SPIRType &type)
  11393. {
  11394. if (backend.use_array_constructor && type.array.size() > 1)
  11395. {
  11396. if (options.flatten_multidimensional_arrays)
  11397. SPIRV_CROSS_THROW("Cannot flatten constructors of multidimensional array constructors, "
  11398. "e.g. float[][]().");
  11399. else if (!options.es && options.version < 430)
  11400. require_extension_internal("GL_ARB_arrays_of_arrays");
  11401. else if (options.es && options.version < 310)
  11402. SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310.");
  11403. }
  11404. auto e = type_to_glsl(type);
  11405. if (backend.use_array_constructor)
  11406. {
  11407. for (uint32_t i = 0; i < type.array.size(); i++)
  11408. e += "[]";
  11409. }
  11410. return e;
  11411. }
  11412. // The optional id parameter indicates the object whose type we are trying
  11413. // to find the description for. It is optional. Most type descriptions do not
  11414. // depend on a specific object's use of that type.
  11415. string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id)
  11416. {
  11417. if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct)
  11418. {
  11419. // Need to create a magic type name which compacts the entire type information.
  11420. string name = type_to_glsl(get_pointee_type(type));
  11421. for (size_t i = 0; i < type.array.size(); i++)
  11422. {
  11423. if (type.array_size_literal[i])
  11424. name += join(type.array[i], "_");
  11425. else
  11426. name += join("id", type.array[i], "_");
  11427. }
  11428. name += "Pointer";
  11429. return name;
  11430. }
  11431. switch (type.basetype)
  11432. {
  11433. case SPIRType::Struct:
  11434. // Need OpName lookup here to get a "sensible" name for a struct.
  11435. if (backend.explicit_struct_type)
  11436. return join("struct ", to_name(type.self));
  11437. else
  11438. return to_name(type.self);
  11439. case SPIRType::Image:
  11440. case SPIRType::SampledImage:
  11441. return image_type_glsl(type, id);
  11442. case SPIRType::Sampler:
  11443. // The depth field is set by calling code based on the variable ID of the sampler, effectively reintroducing
  11444. // this distinction into the type system.
  11445. return comparison_ids.count(id) ? "samplerShadow" : "sampler";
  11446. case SPIRType::AccelerationStructure:
  11447. return ray_tracing_is_khr ? "accelerationStructureEXT" : "accelerationStructureNV";
  11448. case SPIRType::Void:
  11449. return "void";
  11450. default:
  11451. break;
  11452. }
  11453. if (type.basetype == SPIRType::UInt && is_legacy())
  11454. SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets.");
  11455. if (type.vecsize == 1 && type.columns == 1) // Scalar builtin
  11456. {
  11457. switch (type.basetype)
  11458. {
  11459. case SPIRType::Boolean:
  11460. return "bool";
  11461. case SPIRType::SByte:
  11462. return backend.basic_int8_type;
  11463. case SPIRType::UByte:
  11464. return backend.basic_uint8_type;
  11465. case SPIRType::Short:
  11466. return backend.basic_int16_type;
  11467. case SPIRType::UShort:
  11468. return backend.basic_uint16_type;
  11469. case SPIRType::Int:
  11470. return backend.basic_int_type;
  11471. case SPIRType::UInt:
  11472. return backend.basic_uint_type;
  11473. case SPIRType::AtomicCounter:
  11474. return "atomic_uint";
  11475. case SPIRType::Half:
  11476. return "float16_t";
  11477. case SPIRType::Float:
  11478. return "float";
  11479. case SPIRType::Double:
  11480. return "double";
  11481. case SPIRType::Int64:
  11482. return "int64_t";
  11483. case SPIRType::UInt64:
  11484. return "uint64_t";
  11485. default:
  11486. return "???";
  11487. }
  11488. }
  11489. else if (type.vecsize > 1 && type.columns == 1) // Vector builtin
  11490. {
  11491. switch (type.basetype)
  11492. {
  11493. case SPIRType::Boolean:
  11494. return join("bvec", type.vecsize);
  11495. case SPIRType::SByte:
  11496. return join("i8vec", type.vecsize);
  11497. case SPIRType::UByte:
  11498. return join("u8vec", type.vecsize);
  11499. case SPIRType::Short:
  11500. return join("i16vec", type.vecsize);
  11501. case SPIRType::UShort:
  11502. return join("u16vec", type.vecsize);
  11503. case SPIRType::Int:
  11504. return join("ivec", type.vecsize);
  11505. case SPIRType::UInt:
  11506. return join("uvec", type.vecsize);
  11507. case SPIRType::Half:
  11508. return join("f16vec", type.vecsize);
  11509. case SPIRType::Float:
  11510. return join("vec", type.vecsize);
  11511. case SPIRType::Double:
  11512. return join("dvec", type.vecsize);
  11513. case SPIRType::Int64:
  11514. return join("i64vec", type.vecsize);
  11515. case SPIRType::UInt64:
  11516. return join("u64vec", type.vecsize);
  11517. default:
  11518. return "???";
  11519. }
  11520. }
  11521. else if (type.vecsize == type.columns) // Simple Matrix builtin
  11522. {
  11523. switch (type.basetype)
  11524. {
  11525. case SPIRType::Boolean:
  11526. return join("bmat", type.vecsize);
  11527. case SPIRType::Int:
  11528. return join("imat", type.vecsize);
  11529. case SPIRType::UInt:
  11530. return join("umat", type.vecsize);
  11531. case SPIRType::Half:
  11532. return join("f16mat", type.vecsize);
  11533. case SPIRType::Float:
  11534. return join("mat", type.vecsize);
  11535. case SPIRType::Double:
  11536. return join("dmat", type.vecsize);
  11537. // Matrix types not supported for int64/uint64.
  11538. default:
  11539. return "???";
  11540. }
  11541. }
  11542. else
  11543. {
  11544. switch (type.basetype)
  11545. {
  11546. case SPIRType::Boolean:
  11547. return join("bmat", type.columns, "x", type.vecsize);
  11548. case SPIRType::Int:
  11549. return join("imat", type.columns, "x", type.vecsize);
  11550. case SPIRType::UInt:
  11551. return join("umat", type.columns, "x", type.vecsize);
  11552. case SPIRType::Half:
  11553. return join("f16mat", type.columns, "x", type.vecsize);
  11554. case SPIRType::Float:
  11555. return join("mat", type.columns, "x", type.vecsize);
  11556. case SPIRType::Double:
  11557. return join("dmat", type.columns, "x", type.vecsize);
  11558. // Matrix types not supported for int64/uint64.
  11559. default:
  11560. return "???";
  11561. }
  11562. }
  11563. }
  11564. void CompilerGLSL::add_variable(unordered_set<string> &variables_primary,
  11565. const unordered_set<string> &variables_secondary, string &name)
  11566. {
  11567. if (name.empty())
  11568. return;
  11569. ParsedIR::sanitize_underscores(name);
  11570. if (ParsedIR::is_globally_reserved_identifier(name, true))
  11571. {
  11572. name.clear();
  11573. return;
  11574. }
  11575. update_name_cache(variables_primary, variables_secondary, name);
  11576. }
  11577. void CompilerGLSL::add_local_variable_name(uint32_t id)
  11578. {
  11579. add_variable(local_variable_names, block_names, ir.meta[id].decoration.alias);
  11580. }
  11581. void CompilerGLSL::add_resource_name(uint32_t id)
  11582. {
  11583. add_variable(resource_names, block_names, ir.meta[id].decoration.alias);
  11584. }
  11585. void CompilerGLSL::add_header_line(const std::string &line)
  11586. {
  11587. header_lines.push_back(line);
  11588. }
  11589. bool CompilerGLSL::has_extension(const std::string &ext) const
  11590. {
  11591. auto itr = find(begin(forced_extensions), end(forced_extensions), ext);
  11592. return itr != end(forced_extensions);
  11593. }
  11594. void CompilerGLSL::require_extension(const std::string &ext)
  11595. {
  11596. if (!has_extension(ext))
  11597. forced_extensions.push_back(ext);
  11598. }
  11599. void CompilerGLSL::require_extension_internal(const string &ext)
  11600. {
  11601. if (backend.supports_extensions && !has_extension(ext))
  11602. {
  11603. forced_extensions.push_back(ext);
  11604. force_recompile();
  11605. }
  11606. }
  11607. void CompilerGLSL::flatten_buffer_block(VariableID id)
  11608. {
  11609. auto &var = get<SPIRVariable>(id);
  11610. auto &type = get<SPIRType>(var.basetype);
  11611. auto name = to_name(type.self, false);
  11612. auto &flags = ir.meta[type.self].decoration.decoration_flags;
  11613. if (!type.array.empty())
  11614. SPIRV_CROSS_THROW(name + " is an array of UBOs.");
  11615. if (type.basetype != SPIRType::Struct)
  11616. SPIRV_CROSS_THROW(name + " is not a struct.");
  11617. if (!flags.get(DecorationBlock))
  11618. SPIRV_CROSS_THROW(name + " is not a block.");
  11619. if (type.member_types.empty())
  11620. SPIRV_CROSS_THROW(name + " is an empty struct.");
  11621. flattened_buffer_blocks.insert(id);
  11622. }
  11623. bool CompilerGLSL::builtin_translates_to_nonarray(spv::BuiltIn /*builtin*/) const
  11624. {
  11625. return false; // GLSL itself does not need to translate array builtin types to non-array builtin types
  11626. }
  11627. bool CompilerGLSL::check_atomic_image(uint32_t id)
  11628. {
  11629. auto &type = expression_type(id);
  11630. if (type.storage == StorageClassImage)
  11631. {
  11632. if (options.es && options.version < 320)
  11633. require_extension_internal("GL_OES_shader_image_atomic");
  11634. auto *var = maybe_get_backing_variable(id);
  11635. if (var)
  11636. {
  11637. auto &flags = ir.meta[var->self].decoration.decoration_flags;
  11638. if (flags.get(DecorationNonWritable) || flags.get(DecorationNonReadable))
  11639. {
  11640. flags.clear(DecorationNonWritable);
  11641. flags.clear(DecorationNonReadable);
  11642. force_recompile();
  11643. }
  11644. }
  11645. return true;
  11646. }
  11647. else
  11648. return false;
  11649. }
  11650. void CompilerGLSL::add_function_overload(const SPIRFunction &func)
  11651. {
  11652. Hasher hasher;
  11653. for (auto &arg : func.arguments)
  11654. {
  11655. // Parameters can vary with pointer type or not,
  11656. // but that will not change the signature in GLSL/HLSL,
  11657. // so strip the pointer type before hashing.
  11658. uint32_t type_id = get_pointee_type_id(arg.type);
  11659. auto &type = get<SPIRType>(type_id);
  11660. if (!combined_image_samplers.empty())
  11661. {
  11662. // If we have combined image samplers, we cannot really trust the image and sampler arguments
  11663. // we pass down to callees, because they may be shuffled around.
  11664. // Ignore these arguments, to make sure that functions need to differ in some other way
  11665. // to be considered different overloads.
  11666. if (type.basetype == SPIRType::SampledImage ||
  11667. (type.basetype == SPIRType::Image && type.image.sampled == 1) || type.basetype == SPIRType::Sampler)
  11668. {
  11669. continue;
  11670. }
  11671. }
  11672. hasher.u32(type_id);
  11673. }
  11674. uint64_t types_hash = hasher.get();
  11675. auto function_name = to_name(func.self);
  11676. auto itr = function_overloads.find(function_name);
  11677. if (itr != end(function_overloads))
  11678. {
  11679. // There exists a function with this name already.
  11680. auto &overloads = itr->second;
  11681. if (overloads.count(types_hash) != 0)
  11682. {
  11683. // Overload conflict, assign a new name.
  11684. add_resource_name(func.self);
  11685. function_overloads[to_name(func.self)].insert(types_hash);
  11686. }
  11687. else
  11688. {
  11689. // Can reuse the name.
  11690. overloads.insert(types_hash);
  11691. }
  11692. }
  11693. else
  11694. {
  11695. // First time we see this function name.
  11696. add_resource_name(func.self);
  11697. function_overloads[to_name(func.self)].insert(types_hash);
  11698. }
  11699. }
  11700. void CompilerGLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags)
  11701. {
  11702. if (func.self != ir.default_entry_point)
  11703. add_function_overload(func);
  11704. // Avoid shadow declarations.
  11705. local_variable_names = resource_names;
  11706. string decl;
  11707. auto &type = get<SPIRType>(func.return_type);
  11708. decl += flags_to_qualifiers_glsl(type, return_flags);
  11709. decl += type_to_glsl(type);
  11710. decl += type_to_array_glsl(type);
  11711. decl += " ";
  11712. if (func.self == ir.default_entry_point)
  11713. {
  11714. // If we need complex fallback in GLSL, we just wrap main() in a function
  11715. // and interlock the entire shader ...
  11716. if (interlocked_is_complex)
  11717. decl += "spvMainInterlockedBody";
  11718. else
  11719. decl += "main";
  11720. processing_entry_point = true;
  11721. }
  11722. else
  11723. decl += to_name(func.self);
  11724. decl += "(";
  11725. SmallVector<string> arglist;
  11726. for (auto &arg : func.arguments)
  11727. {
  11728. // Do not pass in separate images or samplers if we're remapping
  11729. // to combined image samplers.
  11730. if (skip_argument(arg.id))
  11731. continue;
  11732. // Might change the variable name if it already exists in this function.
  11733. // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
  11734. // to use same name for variables.
  11735. // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
  11736. add_local_variable_name(arg.id);
  11737. arglist.push_back(argument_decl(arg));
  11738. // Hold a pointer to the parameter so we can invalidate the readonly field if needed.
  11739. auto *var = maybe_get<SPIRVariable>(arg.id);
  11740. if (var)
  11741. var->parameter = &arg;
  11742. }
  11743. for (auto &arg : func.shadow_arguments)
  11744. {
  11745. // Might change the variable name if it already exists in this function.
  11746. // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
  11747. // to use same name for variables.
  11748. // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
  11749. add_local_variable_name(arg.id);
  11750. arglist.push_back(argument_decl(arg));
  11751. // Hold a pointer to the parameter so we can invalidate the readonly field if needed.
  11752. auto *var = maybe_get<SPIRVariable>(arg.id);
  11753. if (var)
  11754. var->parameter = &arg;
  11755. }
  11756. decl += merge(arglist);
  11757. decl += ")";
  11758. statement(decl);
  11759. }
  11760. void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags)
  11761. {
  11762. // Avoid potential cycles.
  11763. if (func.active)
  11764. return;
  11765. func.active = true;
  11766. // If we depend on a function, emit that function before we emit our own function.
  11767. for (auto block : func.blocks)
  11768. {
  11769. auto &b = get<SPIRBlock>(block);
  11770. for (auto &i : b.ops)
  11771. {
  11772. auto ops = stream(i);
  11773. auto op = static_cast<Op>(i.op);
  11774. if (op == OpFunctionCall)
  11775. {
  11776. // Recursively emit functions which are called.
  11777. uint32_t id = ops[2];
  11778. emit_function(get<SPIRFunction>(id), ir.meta[ops[1]].decoration.decoration_flags);
  11779. }
  11780. }
  11781. }
  11782. if (func.entry_line.file_id != 0)
  11783. emit_line_directive(func.entry_line.file_id, func.entry_line.line_literal);
  11784. emit_function_prototype(func, return_flags);
  11785. begin_scope();
  11786. if (func.self == ir.default_entry_point)
  11787. emit_entry_point_declarations();
  11788. current_function = &func;
  11789. auto &entry_block = get<SPIRBlock>(func.entry_block);
  11790. sort(begin(func.constant_arrays_needed_on_stack), end(func.constant_arrays_needed_on_stack));
  11791. for (auto &array : func.constant_arrays_needed_on_stack)
  11792. {
  11793. auto &c = get<SPIRConstant>(array);
  11794. auto &type = get<SPIRType>(c.constant_type);
  11795. statement(variable_decl(type, join("_", array, "_array_copy")), " = ", constant_expression(c), ";");
  11796. }
  11797. for (auto &v : func.local_variables)
  11798. {
  11799. auto &var = get<SPIRVariable>(v);
  11800. var.deferred_declaration = false;
  11801. if (var.storage == StorageClassWorkgroup)
  11802. {
  11803. // Special variable type which cannot have initializer,
  11804. // need to be declared as standalone variables.
  11805. // Comes from MSL which can push global variables as local variables in main function.
  11806. add_local_variable_name(var.self);
  11807. statement(variable_decl(var), ";");
  11808. var.deferred_declaration = false;
  11809. }
  11810. else if (var.storage == StorageClassPrivate)
  11811. {
  11812. // These variables will not have had their CFG usage analyzed, so move it to the entry block.
  11813. // Comes from MSL which can push global variables as local variables in main function.
  11814. // We could just declare them right now, but we would miss out on an important initialization case which is
  11815. // LUT declaration in MSL.
  11816. // If we don't declare the variable when it is assigned we're forced to go through a helper function
  11817. // which copies elements one by one.
  11818. add_local_variable_name(var.self);
  11819. if (var.initializer)
  11820. {
  11821. statement(variable_decl(var), ";");
  11822. var.deferred_declaration = false;
  11823. }
  11824. else
  11825. {
  11826. auto &dominated = entry_block.dominated_variables;
  11827. if (find(begin(dominated), end(dominated), var.self) == end(dominated))
  11828. entry_block.dominated_variables.push_back(var.self);
  11829. var.deferred_declaration = true;
  11830. }
  11831. }
  11832. else if (var.storage == StorageClassFunction && var.remapped_variable && var.static_expression)
  11833. {
  11834. // No need to declare this variable, it has a static expression.
  11835. var.deferred_declaration = false;
  11836. }
  11837. else if (expression_is_lvalue(v))
  11838. {
  11839. add_local_variable_name(var.self);
  11840. // Loop variables should never be declared early, they are explicitly emitted in a loop.
  11841. if (var.initializer && !var.loop_variable)
  11842. statement(variable_decl_function_local(var), ";");
  11843. else
  11844. {
  11845. // Don't declare variable until first use to declutter the GLSL output quite a lot.
  11846. // If we don't touch the variable before first branch,
  11847. // declare it then since we need variable declaration to be in top scope.
  11848. var.deferred_declaration = true;
  11849. }
  11850. }
  11851. else
  11852. {
  11853. // HACK: SPIR-V in older glslang output likes to use samplers and images as local variables, but GLSL does not allow this.
  11854. // For these types (non-lvalue), we enforce forwarding through a shadowed variable.
  11855. // This means that when we OpStore to these variables, we just write in the expression ID directly.
  11856. // This breaks any kind of branching, since the variable must be statically assigned.
  11857. // Branching on samplers and images would be pretty much impossible to fake in GLSL.
  11858. var.statically_assigned = true;
  11859. }
  11860. var.loop_variable_enable = false;
  11861. // Loop variables are never declared outside their for-loop, so block any implicit declaration.
  11862. if (var.loop_variable)
  11863. var.deferred_declaration = false;
  11864. }
  11865. // Enforce declaration order for regression testing purposes.
  11866. for (auto &block_id : func.blocks)
  11867. {
  11868. auto &block = get<SPIRBlock>(block_id);
  11869. sort(begin(block.dominated_variables), end(block.dominated_variables));
  11870. }
  11871. for (auto &line : current_function->fixup_hooks_in)
  11872. line();
  11873. emit_block_chain(entry_block);
  11874. end_scope();
  11875. processing_entry_point = false;
  11876. statement("");
  11877. // Make sure deferred declaration state for local variables is cleared when we are done with function.
  11878. // We risk declaring Private/Workgroup variables in places we are not supposed to otherwise.
  11879. for (auto &v : func.local_variables)
  11880. {
  11881. auto &var = get<SPIRVariable>(v);
  11882. var.deferred_declaration = false;
  11883. }
  11884. }
  11885. void CompilerGLSL::emit_fixup()
  11886. {
  11887. if (is_vertex_like_shader())
  11888. {
  11889. if (options.vertex.fixup_clipspace)
  11890. {
  11891. const char *suffix = backend.float_literal_suffix ? "f" : "";
  11892. statement("gl_Position.z = 2.0", suffix, " * gl_Position.z - gl_Position.w;");
  11893. }
  11894. if (options.vertex.flip_vert_y)
  11895. statement("gl_Position.y = -gl_Position.y;");
  11896. }
  11897. }
  11898. void CompilerGLSL::flush_phi(BlockID from, BlockID to)
  11899. {
  11900. auto &child = get<SPIRBlock>(to);
  11901. if (child.ignore_phi_from_block == from)
  11902. return;
  11903. unordered_set<uint32_t> temporary_phi_variables;
  11904. for (auto itr = begin(child.phi_variables); itr != end(child.phi_variables); ++itr)
  11905. {
  11906. auto &phi = *itr;
  11907. if (phi.parent == from)
  11908. {
  11909. auto &var = get<SPIRVariable>(phi.function_variable);
  11910. // A Phi variable might be a loop variable, so flush to static expression.
  11911. if (var.loop_variable && !var.loop_variable_enable)
  11912. var.static_expression = phi.local_variable;
  11913. else
  11914. {
  11915. flush_variable_declaration(phi.function_variable);
  11916. // Check if we are going to write to a Phi variable that another statement will read from
  11917. // as part of another Phi node in our target block.
  11918. // For this case, we will need to copy phi.function_variable to a temporary, and use that for future reads.
  11919. // This is judged to be extremely rare, so deal with it here using a simple, but suboptimal algorithm.
  11920. bool need_saved_temporary =
  11921. find_if(itr + 1, end(child.phi_variables), [&](const SPIRBlock::Phi &future_phi) -> bool {
  11922. return future_phi.local_variable == ID(phi.function_variable) && future_phi.parent == from;
  11923. }) != end(child.phi_variables);
  11924. if (need_saved_temporary)
  11925. {
  11926. // Need to make sure we declare the phi variable with a copy at the right scope.
  11927. // We cannot safely declare a temporary here since we might be inside a continue block.
  11928. if (!var.allocate_temporary_copy)
  11929. {
  11930. var.allocate_temporary_copy = true;
  11931. force_recompile();
  11932. }
  11933. statement("_", phi.function_variable, "_copy", " = ", to_name(phi.function_variable), ";");
  11934. temporary_phi_variables.insert(phi.function_variable);
  11935. }
  11936. // This might be called in continue block, so make sure we
  11937. // use this to emit ESSL 1.0 compliant increments/decrements.
  11938. auto lhs = to_expression(phi.function_variable);
  11939. string rhs;
  11940. if (temporary_phi_variables.count(phi.local_variable))
  11941. rhs = join("_", phi.local_variable, "_copy");
  11942. else
  11943. rhs = to_pointer_expression(phi.local_variable);
  11944. if (!optimize_read_modify_write(get<SPIRType>(var.basetype), lhs, rhs))
  11945. statement(lhs, " = ", rhs, ";");
  11946. }
  11947. register_write(phi.function_variable);
  11948. }
  11949. }
  11950. }
  11951. void CompilerGLSL::branch_to_continue(BlockID from, BlockID to)
  11952. {
  11953. auto &to_block = get<SPIRBlock>(to);
  11954. if (from == to)
  11955. return;
  11956. assert(is_continue(to));
  11957. if (to_block.complex_continue)
  11958. {
  11959. // Just emit the whole block chain as is.
  11960. auto usage_counts = expression_usage_counts;
  11961. emit_block_chain(to_block);
  11962. // Expression usage counts are moot after returning from the continue block.
  11963. expression_usage_counts = usage_counts;
  11964. }
  11965. else
  11966. {
  11967. auto &from_block = get<SPIRBlock>(from);
  11968. bool outside_control_flow = false;
  11969. uint32_t loop_dominator = 0;
  11970. // FIXME: Refactor this to not use the old loop_dominator tracking.
  11971. if (from_block.merge_block)
  11972. {
  11973. // If we are a loop header, we don't set the loop dominator,
  11974. // so just use "self" here.
  11975. loop_dominator = from;
  11976. }
  11977. else if (from_block.loop_dominator != BlockID(SPIRBlock::NoDominator))
  11978. {
  11979. loop_dominator = from_block.loop_dominator;
  11980. }
  11981. if (loop_dominator != 0)
  11982. {
  11983. auto &cfg = get_cfg_for_current_function();
  11984. // For non-complex continue blocks, we implicitly branch to the continue block
  11985. // by having the continue block be part of the loop header in for (; ; continue-block).
  11986. outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(loop_dominator, from);
  11987. }
  11988. // Some simplification for for-loops. We always end up with a useless continue;
  11989. // statement since we branch to a loop block.
  11990. // Walk the CFG, if we unconditionally execute the block calling continue assuming we're in the loop block,
  11991. // we can avoid writing out an explicit continue statement.
  11992. // Similar optimization to return statements if we know we're outside flow control.
  11993. if (!outside_control_flow)
  11994. statement("continue;");
  11995. }
  11996. }
  11997. void CompilerGLSL::branch(BlockID from, BlockID to)
  11998. {
  11999. flush_phi(from, to);
  12000. flush_control_dependent_expressions(from);
  12001. bool to_is_continue = is_continue(to);
  12002. // This is only a continue if we branch to our loop dominator.
  12003. if ((ir.block_meta[to] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) != 0 && get<SPIRBlock>(from).loop_dominator == to)
  12004. {
  12005. // This can happen if we had a complex continue block which was emitted.
  12006. // Once the continue block tries to branch to the loop header, just emit continue;
  12007. // and end the chain here.
  12008. statement("continue;");
  12009. }
  12010. else if (from != to && is_break(to))
  12011. {
  12012. // We cannot break to ourselves, so check explicitly for from != to.
  12013. // This case can trigger if a loop header is all three of these things:
  12014. // - Continue block
  12015. // - Loop header
  12016. // - Break merge target all at once ...
  12017. // Very dirty workaround.
  12018. // Switch constructs are able to break, but they cannot break out of a loop at the same time.
  12019. // Only sensible solution is to make a ladder variable, which we declare at the top of the switch block,
  12020. // write to the ladder here, and defer the break.
  12021. // The loop we're breaking out of must dominate the switch block, or there is no ladder breaking case.
  12022. if (current_emitting_switch && is_loop_break(to) &&
  12023. current_emitting_switch->loop_dominator != BlockID(SPIRBlock::NoDominator) &&
  12024. get<SPIRBlock>(current_emitting_switch->loop_dominator).merge_block == to)
  12025. {
  12026. if (!current_emitting_switch->need_ladder_break)
  12027. {
  12028. force_recompile();
  12029. current_emitting_switch->need_ladder_break = true;
  12030. }
  12031. statement("_", current_emitting_switch->self, "_ladder_break = true;");
  12032. }
  12033. statement("break;");
  12034. }
  12035. else if (to_is_continue || from == to)
  12036. {
  12037. // For from == to case can happen for a do-while loop which branches into itself.
  12038. // We don't mark these cases as continue blocks, but the only possible way to branch into
  12039. // ourselves is through means of continue blocks.
  12040. // If we are merging to a continue block, there is no need to emit the block chain for continue here.
  12041. // We can branch to the continue block after we merge execution.
  12042. // Here we make use of structured control flow rules from spec:
  12043. // 2.11: - the merge block declared by a header block cannot be a merge block declared by any other header block
  12044. // - each header block must strictly dominate its merge block, unless the merge block is unreachable in the CFG
  12045. // If we are branching to a merge block, we must be inside a construct which dominates the merge block.
  12046. auto &block_meta = ir.block_meta[to];
  12047. bool branching_to_merge =
  12048. (block_meta & (ParsedIR::BLOCK_META_SELECTION_MERGE_BIT | ParsedIR::BLOCK_META_MULTISELECT_MERGE_BIT |
  12049. ParsedIR::BLOCK_META_LOOP_MERGE_BIT)) != 0;
  12050. if (!to_is_continue || !branching_to_merge)
  12051. branch_to_continue(from, to);
  12052. }
  12053. else if (!is_conditional(to))
  12054. emit_block_chain(get<SPIRBlock>(to));
  12055. // It is important that we check for break before continue.
  12056. // A block might serve two purposes, a break block for the inner scope, and
  12057. // a continue block in the outer scope.
  12058. // Inner scope always takes precedence.
  12059. }
  12060. void CompilerGLSL::branch(BlockID from, uint32_t cond, BlockID true_block, BlockID false_block)
  12061. {
  12062. auto &from_block = get<SPIRBlock>(from);
  12063. BlockID merge_block = from_block.merge == SPIRBlock::MergeSelection ? from_block.next_block : BlockID(0);
  12064. // If we branch directly to our selection merge target, we don't need a code path.
  12065. bool true_block_needs_code = true_block != merge_block || flush_phi_required(from, true_block);
  12066. bool false_block_needs_code = false_block != merge_block || flush_phi_required(from, false_block);
  12067. if (!true_block_needs_code && !false_block_needs_code)
  12068. return;
  12069. emit_block_hints(get<SPIRBlock>(from));
  12070. if (true_block_needs_code)
  12071. {
  12072. statement("if (", to_expression(cond), ")");
  12073. begin_scope();
  12074. branch(from, true_block);
  12075. end_scope();
  12076. if (false_block_needs_code)
  12077. {
  12078. statement("else");
  12079. begin_scope();
  12080. branch(from, false_block);
  12081. end_scope();
  12082. }
  12083. }
  12084. else if (false_block_needs_code)
  12085. {
  12086. // Only need false path, use negative conditional.
  12087. statement("if (!", to_enclosed_expression(cond), ")");
  12088. begin_scope();
  12089. branch(from, false_block);
  12090. end_scope();
  12091. }
  12092. }
  12093. // FIXME: This currently cannot handle complex continue blocks
  12094. // as in do-while.
  12095. // This should be seen as a "trivial" continue block.
  12096. string CompilerGLSL::emit_continue_block(uint32_t continue_block, bool follow_true_block, bool follow_false_block)
  12097. {
  12098. auto *block = &get<SPIRBlock>(continue_block);
  12099. // While emitting the continue block, declare_temporary will check this
  12100. // if we have to emit temporaries.
  12101. current_continue_block = block;
  12102. SmallVector<string> statements;
  12103. // Capture all statements into our list.
  12104. auto *old = redirect_statement;
  12105. redirect_statement = &statements;
  12106. // Stamp out all blocks one after each other.
  12107. while ((ir.block_meta[block->self] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) == 0)
  12108. {
  12109. // Write out all instructions we have in this block.
  12110. emit_block_instructions(*block);
  12111. // For plain branchless for/while continue blocks.
  12112. if (block->next_block)
  12113. {
  12114. flush_phi(continue_block, block->next_block);
  12115. block = &get<SPIRBlock>(block->next_block);
  12116. }
  12117. // For do while blocks. The last block will be a select block.
  12118. else if (block->true_block && follow_true_block)
  12119. {
  12120. flush_phi(continue_block, block->true_block);
  12121. block = &get<SPIRBlock>(block->true_block);
  12122. }
  12123. else if (block->false_block && follow_false_block)
  12124. {
  12125. flush_phi(continue_block, block->false_block);
  12126. block = &get<SPIRBlock>(block->false_block);
  12127. }
  12128. else
  12129. {
  12130. SPIRV_CROSS_THROW("Invalid continue block detected!");
  12131. }
  12132. }
  12133. // Restore old pointer.
  12134. redirect_statement = old;
  12135. // Somewhat ugly, strip off the last ';' since we use ',' instead.
  12136. // Ideally, we should select this behavior in statement().
  12137. for (auto &s : statements)
  12138. {
  12139. if (!s.empty() && s.back() == ';')
  12140. s.erase(s.size() - 1, 1);
  12141. }
  12142. current_continue_block = nullptr;
  12143. return merge(statements);
  12144. }
  12145. void CompilerGLSL::emit_while_loop_initializers(const SPIRBlock &block)
  12146. {
  12147. // While loops do not take initializers, so declare all of them outside.
  12148. for (auto &loop_var : block.loop_variables)
  12149. {
  12150. auto &var = get<SPIRVariable>(loop_var);
  12151. statement(variable_decl(var), ";");
  12152. }
  12153. }
  12154. string CompilerGLSL::emit_for_loop_initializers(const SPIRBlock &block)
  12155. {
  12156. if (block.loop_variables.empty())
  12157. return "";
  12158. bool same_types = for_loop_initializers_are_same_type(block);
  12159. // We can only declare for loop initializers if all variables are of same type.
  12160. // If we cannot do this, declare individual variables before the loop header.
  12161. // We might have a loop variable candidate which was not assigned to for some reason.
  12162. uint32_t missing_initializers = 0;
  12163. for (auto &variable : block.loop_variables)
  12164. {
  12165. uint32_t expr = get<SPIRVariable>(variable).static_expression;
  12166. // Sometimes loop variables are initialized with OpUndef, but we can just declare
  12167. // a plain variable without initializer in this case.
  12168. if (expr == 0 || ir.ids[expr].get_type() == TypeUndef)
  12169. missing_initializers++;
  12170. }
  12171. if (block.loop_variables.size() == 1 && missing_initializers == 0)
  12172. {
  12173. return variable_decl(get<SPIRVariable>(block.loop_variables.front()));
  12174. }
  12175. else if (!same_types || missing_initializers == uint32_t(block.loop_variables.size()))
  12176. {
  12177. for (auto &loop_var : block.loop_variables)
  12178. statement(variable_decl(get<SPIRVariable>(loop_var)), ";");
  12179. return "";
  12180. }
  12181. else
  12182. {
  12183. // We have a mix of loop variables, either ones with a clear initializer, or ones without.
  12184. // Separate the two streams.
  12185. string expr;
  12186. for (auto &loop_var : block.loop_variables)
  12187. {
  12188. uint32_t static_expr = get<SPIRVariable>(loop_var).static_expression;
  12189. if (static_expr == 0 || ir.ids[static_expr].get_type() == TypeUndef)
  12190. {
  12191. statement(variable_decl(get<SPIRVariable>(loop_var)), ";");
  12192. }
  12193. else
  12194. {
  12195. auto &var = get<SPIRVariable>(loop_var);
  12196. auto &type = get_variable_data_type(var);
  12197. if (expr.empty())
  12198. {
  12199. // For loop initializers are of the form <type id = value, id = value, id = value, etc ...
  12200. expr = join(to_qualifiers_glsl(var.self), type_to_glsl(type), " ");
  12201. }
  12202. else
  12203. {
  12204. expr += ", ";
  12205. // In MSL, being based on C++, the asterisk marking a pointer
  12206. // binds to the identifier, not the type.
  12207. if (type.pointer)
  12208. expr += "* ";
  12209. }
  12210. expr += join(to_name(loop_var), " = ", to_pointer_expression(var.static_expression));
  12211. }
  12212. }
  12213. return expr;
  12214. }
  12215. }
  12216. bool CompilerGLSL::for_loop_initializers_are_same_type(const SPIRBlock &block)
  12217. {
  12218. if (block.loop_variables.size() <= 1)
  12219. return true;
  12220. uint32_t expected = 0;
  12221. Bitset expected_flags;
  12222. for (auto &var : block.loop_variables)
  12223. {
  12224. // Don't care about uninitialized variables as they will not be part of the initializers.
  12225. uint32_t expr = get<SPIRVariable>(var).static_expression;
  12226. if (expr == 0 || ir.ids[expr].get_type() == TypeUndef)
  12227. continue;
  12228. if (expected == 0)
  12229. {
  12230. expected = get<SPIRVariable>(var).basetype;
  12231. expected_flags = get_decoration_bitset(var);
  12232. }
  12233. else if (expected != get<SPIRVariable>(var).basetype)
  12234. return false;
  12235. // Precision flags and things like that must also match.
  12236. if (expected_flags != get_decoration_bitset(var))
  12237. return false;
  12238. }
  12239. return true;
  12240. }
  12241. bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method)
  12242. {
  12243. SPIRBlock::ContinueBlockType continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
  12244. if (method == SPIRBlock::MergeToSelectForLoop || method == SPIRBlock::MergeToSelectContinueForLoop)
  12245. {
  12246. uint32_t current_count = statement_count;
  12247. // If we're trying to create a true for loop,
  12248. // we need to make sure that all opcodes before branch statement do not actually emit any code.
  12249. // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
  12250. emit_block_instructions(block);
  12251. bool condition_is_temporary = forced_temporaries.find(block.condition) == end(forced_temporaries);
  12252. // This can work! We only did trivial things which could be forwarded in block body!
  12253. if (current_count == statement_count && condition_is_temporary)
  12254. {
  12255. switch (continue_type)
  12256. {
  12257. case SPIRBlock::ForLoop:
  12258. {
  12259. // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
  12260. flush_undeclared_variables(block);
  12261. // Important that we do this in this order because
  12262. // emitting the continue block can invalidate the condition expression.
  12263. auto initializer = emit_for_loop_initializers(block);
  12264. auto condition = to_expression(block.condition);
  12265. // Condition might have to be inverted.
  12266. if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
  12267. condition = join("!", enclose_expression(condition));
  12268. emit_block_hints(block);
  12269. if (method != SPIRBlock::MergeToSelectContinueForLoop)
  12270. {
  12271. auto continue_block = emit_continue_block(block.continue_block, false, false);
  12272. statement("for (", initializer, "; ", condition, "; ", continue_block, ")");
  12273. }
  12274. else
  12275. statement("for (", initializer, "; ", condition, "; )");
  12276. break;
  12277. }
  12278. case SPIRBlock::WhileLoop:
  12279. {
  12280. // This block may be a dominating block, so make sure we flush undeclared variables before building the while loop header.
  12281. flush_undeclared_variables(block);
  12282. emit_while_loop_initializers(block);
  12283. emit_block_hints(block);
  12284. auto condition = to_expression(block.condition);
  12285. // Condition might have to be inverted.
  12286. if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
  12287. condition = join("!", enclose_expression(condition));
  12288. statement("while (", condition, ")");
  12289. break;
  12290. }
  12291. default:
  12292. block.disable_block_optimization = true;
  12293. force_recompile();
  12294. begin_scope(); // We'll see an end_scope() later.
  12295. return false;
  12296. }
  12297. begin_scope();
  12298. return true;
  12299. }
  12300. else
  12301. {
  12302. block.disable_block_optimization = true;
  12303. force_recompile();
  12304. begin_scope(); // We'll see an end_scope() later.
  12305. return false;
  12306. }
  12307. }
  12308. else if (method == SPIRBlock::MergeToDirectForLoop)
  12309. {
  12310. auto &child = get<SPIRBlock>(block.next_block);
  12311. // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
  12312. flush_undeclared_variables(child);
  12313. uint32_t current_count = statement_count;
  12314. // If we're trying to create a true for loop,
  12315. // we need to make sure that all opcodes before branch statement do not actually emit any code.
  12316. // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
  12317. emit_block_instructions(child);
  12318. bool condition_is_temporary = forced_temporaries.find(child.condition) == end(forced_temporaries);
  12319. if (current_count == statement_count && condition_is_temporary)
  12320. {
  12321. uint32_t target_block = child.true_block;
  12322. switch (continue_type)
  12323. {
  12324. case SPIRBlock::ForLoop:
  12325. {
  12326. // Important that we do this in this order because
  12327. // emitting the continue block can invalidate the condition expression.
  12328. auto initializer = emit_for_loop_initializers(block);
  12329. auto condition = to_expression(child.condition);
  12330. // Condition might have to be inverted.
  12331. if (execution_is_noop(get<SPIRBlock>(child.true_block), get<SPIRBlock>(block.merge_block)))
  12332. {
  12333. condition = join("!", enclose_expression(condition));
  12334. target_block = child.false_block;
  12335. }
  12336. auto continue_block = emit_continue_block(block.continue_block, false, false);
  12337. emit_block_hints(block);
  12338. statement("for (", initializer, "; ", condition, "; ", continue_block, ")");
  12339. break;
  12340. }
  12341. case SPIRBlock::WhileLoop:
  12342. {
  12343. emit_while_loop_initializers(block);
  12344. emit_block_hints(block);
  12345. auto condition = to_expression(child.condition);
  12346. // Condition might have to be inverted.
  12347. if (execution_is_noop(get<SPIRBlock>(child.true_block), get<SPIRBlock>(block.merge_block)))
  12348. {
  12349. condition = join("!", enclose_expression(condition));
  12350. target_block = child.false_block;
  12351. }
  12352. statement("while (", condition, ")");
  12353. break;
  12354. }
  12355. default:
  12356. block.disable_block_optimization = true;
  12357. force_recompile();
  12358. begin_scope(); // We'll see an end_scope() later.
  12359. return false;
  12360. }
  12361. begin_scope();
  12362. branch(child.self, target_block);
  12363. return true;
  12364. }
  12365. else
  12366. {
  12367. block.disable_block_optimization = true;
  12368. force_recompile();
  12369. begin_scope(); // We'll see an end_scope() later.
  12370. return false;
  12371. }
  12372. }
  12373. else
  12374. return false;
  12375. }
  12376. void CompilerGLSL::flush_undeclared_variables(SPIRBlock &block)
  12377. {
  12378. for (auto &v : block.dominated_variables)
  12379. flush_variable_declaration(v);
  12380. }
  12381. void CompilerGLSL::emit_hoisted_temporaries(SmallVector<pair<TypeID, ID>> &temporaries)
  12382. {
  12383. // If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header.
  12384. // Need to sort these to ensure that reference output is stable.
  12385. sort(begin(temporaries), end(temporaries),
  12386. [](const pair<TypeID, ID> &a, const pair<TypeID, ID> &b) { return a.second < b.second; });
  12387. for (auto &tmp : temporaries)
  12388. {
  12389. add_local_variable_name(tmp.second);
  12390. auto &flags = ir.meta[tmp.second].decoration.decoration_flags;
  12391. auto &type = get<SPIRType>(tmp.first);
  12392. // Not all targets support pointer literals, so don't bother with that case.
  12393. string initializer;
  12394. if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
  12395. initializer = join(" = ", to_zero_initialized_expression(tmp.first));
  12396. statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(tmp.second)), initializer, ";");
  12397. hoisted_temporaries.insert(tmp.second);
  12398. forced_temporaries.insert(tmp.second);
  12399. // The temporary might be read from before it's assigned, set up the expression now.
  12400. set<SPIRExpression>(tmp.second, to_name(tmp.second), tmp.first, true);
  12401. }
  12402. }
  12403. void CompilerGLSL::emit_block_chain(SPIRBlock &block)
  12404. {
  12405. bool select_branch_to_true_block = false;
  12406. bool select_branch_to_false_block = false;
  12407. bool skip_direct_branch = false;
  12408. bool emitted_loop_header_variables = false;
  12409. bool force_complex_continue_block = false;
  12410. ValueSaver<uint32_t> loop_level_saver(current_loop_level);
  12411. if (block.merge == SPIRBlock::MergeLoop)
  12412. add_loop_level();
  12413. emit_hoisted_temporaries(block.declare_temporary);
  12414. SPIRBlock::ContinueBlockType continue_type = SPIRBlock::ContinueNone;
  12415. if (block.continue_block)
  12416. {
  12417. continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
  12418. // If we know we cannot emit a loop, mark the block early as a complex loop so we don't force unnecessary recompiles.
  12419. if (continue_type == SPIRBlock::ComplexLoop)
  12420. block.complex_continue = true;
  12421. }
  12422. // If we have loop variables, stop masking out access to the variable now.
  12423. for (auto var_id : block.loop_variables)
  12424. {
  12425. auto &var = get<SPIRVariable>(var_id);
  12426. var.loop_variable_enable = true;
  12427. // We're not going to declare the variable directly, so emit a copy here.
  12428. emit_variable_temporary_copies(var);
  12429. }
  12430. // Remember deferred declaration state. We will restore it before returning.
  12431. SmallVector<bool, 64> rearm_dominated_variables(block.dominated_variables.size());
  12432. for (size_t i = 0; i < block.dominated_variables.size(); i++)
  12433. {
  12434. uint32_t var_id = block.dominated_variables[i];
  12435. auto &var = get<SPIRVariable>(var_id);
  12436. rearm_dominated_variables[i] = var.deferred_declaration;
  12437. }
  12438. // This is the method often used by spirv-opt to implement loops.
  12439. // The loop header goes straight into the continue block.
  12440. // However, don't attempt this on ESSL 1.0, because if a loop variable is used in a continue block,
  12441. // it *MUST* be used in the continue block. This loop method will not work.
  12442. if (!is_legacy_es() && block_is_loop_candidate(block, SPIRBlock::MergeToSelectContinueForLoop))
  12443. {
  12444. flush_undeclared_variables(block);
  12445. if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectContinueForLoop))
  12446. {
  12447. if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
  12448. select_branch_to_false_block = true;
  12449. else
  12450. select_branch_to_true_block = true;
  12451. emitted_loop_header_variables = true;
  12452. force_complex_continue_block = true;
  12453. }
  12454. }
  12455. // This is the older loop behavior in glslang which branches to loop body directly from the loop header.
  12456. else if (block_is_loop_candidate(block, SPIRBlock::MergeToSelectForLoop))
  12457. {
  12458. flush_undeclared_variables(block);
  12459. if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectForLoop))
  12460. {
  12461. // The body of while, is actually just the true (or false) block, so always branch there unconditionally.
  12462. if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
  12463. select_branch_to_false_block = true;
  12464. else
  12465. select_branch_to_true_block = true;
  12466. emitted_loop_header_variables = true;
  12467. }
  12468. }
  12469. // This is the newer loop behavior in glslang which branches from Loop header directly to
  12470. // a new block, which in turn has a OpBranchSelection without a selection merge.
  12471. else if (block_is_loop_candidate(block, SPIRBlock::MergeToDirectForLoop))
  12472. {
  12473. flush_undeclared_variables(block);
  12474. if (attempt_emit_loop_header(block, SPIRBlock::MergeToDirectForLoop))
  12475. {
  12476. skip_direct_branch = true;
  12477. emitted_loop_header_variables = true;
  12478. }
  12479. }
  12480. else if (continue_type == SPIRBlock::DoWhileLoop)
  12481. {
  12482. flush_undeclared_variables(block);
  12483. emit_while_loop_initializers(block);
  12484. emitted_loop_header_variables = true;
  12485. // We have some temporaries where the loop header is the dominator.
  12486. // We risk a case where we have code like:
  12487. // for (;;) { create-temporary; break; } consume-temporary;
  12488. // so force-declare temporaries here.
  12489. emit_hoisted_temporaries(block.potential_declare_temporary);
  12490. statement("do");
  12491. begin_scope();
  12492. emit_block_instructions(block);
  12493. }
  12494. else if (block.merge == SPIRBlock::MergeLoop)
  12495. {
  12496. flush_undeclared_variables(block);
  12497. emit_while_loop_initializers(block);
  12498. emitted_loop_header_variables = true;
  12499. // We have a generic loop without any distinguishable pattern like for, while or do while.
  12500. get<SPIRBlock>(block.continue_block).complex_continue = true;
  12501. continue_type = SPIRBlock::ComplexLoop;
  12502. // We have some temporaries where the loop header is the dominator.
  12503. // We risk a case where we have code like:
  12504. // for (;;) { create-temporary; break; } consume-temporary;
  12505. // so force-declare temporaries here.
  12506. emit_hoisted_temporaries(block.potential_declare_temporary);
  12507. statement("for (;;)");
  12508. begin_scope();
  12509. emit_block_instructions(block);
  12510. }
  12511. else
  12512. {
  12513. emit_block_instructions(block);
  12514. }
  12515. // If we didn't successfully emit a loop header and we had loop variable candidates, we have a problem
  12516. // as writes to said loop variables might have been masked out, we need a recompile.
  12517. if (!emitted_loop_header_variables && !block.loop_variables.empty())
  12518. {
  12519. force_recompile();
  12520. for (auto var : block.loop_variables)
  12521. get<SPIRVariable>(var).loop_variable = false;
  12522. block.loop_variables.clear();
  12523. }
  12524. flush_undeclared_variables(block);
  12525. bool emit_next_block = true;
  12526. // Handle end of block.
  12527. switch (block.terminator)
  12528. {
  12529. case SPIRBlock::Direct:
  12530. // True when emitting complex continue block.
  12531. if (block.loop_dominator == block.next_block)
  12532. {
  12533. branch(block.self, block.next_block);
  12534. emit_next_block = false;
  12535. }
  12536. // True if MergeToDirectForLoop succeeded.
  12537. else if (skip_direct_branch)
  12538. emit_next_block = false;
  12539. else if (is_continue(block.next_block) || is_break(block.next_block) || is_conditional(block.next_block))
  12540. {
  12541. branch(block.self, block.next_block);
  12542. emit_next_block = false;
  12543. }
  12544. break;
  12545. case SPIRBlock::Select:
  12546. // True if MergeToSelectForLoop or MergeToSelectContinueForLoop succeeded.
  12547. if (select_branch_to_true_block)
  12548. {
  12549. if (force_complex_continue_block)
  12550. {
  12551. assert(block.true_block == block.continue_block);
  12552. // We're going to emit a continue block directly here, so make sure it's marked as complex.
  12553. auto &complex_continue = get<SPIRBlock>(block.continue_block).complex_continue;
  12554. bool old_complex = complex_continue;
  12555. complex_continue = true;
  12556. branch(block.self, block.true_block);
  12557. complex_continue = old_complex;
  12558. }
  12559. else
  12560. branch(block.self, block.true_block);
  12561. }
  12562. else if (select_branch_to_false_block)
  12563. {
  12564. if (force_complex_continue_block)
  12565. {
  12566. assert(block.false_block == block.continue_block);
  12567. // We're going to emit a continue block directly here, so make sure it's marked as complex.
  12568. auto &complex_continue = get<SPIRBlock>(block.continue_block).complex_continue;
  12569. bool old_complex = complex_continue;
  12570. complex_continue = true;
  12571. branch(block.self, block.false_block);
  12572. complex_continue = old_complex;
  12573. }
  12574. else
  12575. branch(block.self, block.false_block);
  12576. }
  12577. else
  12578. branch(block.self, block.condition, block.true_block, block.false_block);
  12579. break;
  12580. case SPIRBlock::MultiSelect:
  12581. {
  12582. auto &type = expression_type(block.condition);
  12583. bool unsigned_case =
  12584. type.basetype == SPIRType::UInt || type.basetype == SPIRType::UShort || type.basetype == SPIRType::UByte;
  12585. if (block.merge == SPIRBlock::MergeNone)
  12586. SPIRV_CROSS_THROW("Switch statement is not structured");
  12587. if (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64)
  12588. {
  12589. // SPIR-V spec suggests this is allowed, but we cannot support it in higher level languages.
  12590. SPIRV_CROSS_THROW("Cannot use 64-bit switch selectors.");
  12591. }
  12592. const char *label_suffix = "";
  12593. if (type.basetype == SPIRType::UInt && backend.uint32_t_literal_suffix)
  12594. label_suffix = "u";
  12595. else if (type.basetype == SPIRType::UShort)
  12596. label_suffix = backend.uint16_t_literal_suffix;
  12597. else if (type.basetype == SPIRType::Short)
  12598. label_suffix = backend.int16_t_literal_suffix;
  12599. SPIRBlock *old_emitting_switch = current_emitting_switch;
  12600. current_emitting_switch = &block;
  12601. if (block.need_ladder_break)
  12602. statement("bool _", block.self, "_ladder_break = false;");
  12603. // Find all unique case constructs.
  12604. unordered_map<uint32_t, SmallVector<uint32_t>> case_constructs;
  12605. SmallVector<uint32_t> block_declaration_order;
  12606. SmallVector<uint32_t> literals_to_merge;
  12607. // If a switch case branches to the default block for some reason, we can just remove that literal from consideration
  12608. // and let the default: block handle it.
  12609. // 2.11 in SPIR-V spec states that for fall-through cases, there is a very strict declaration order which we can take advantage of here.
  12610. // We only need to consider possible fallthrough if order[i] branches to order[i + 1].
  12611. for (auto &c : block.cases)
  12612. {
  12613. if (c.block != block.next_block && c.block != block.default_block)
  12614. {
  12615. if (!case_constructs.count(c.block))
  12616. block_declaration_order.push_back(c.block);
  12617. case_constructs[c.block].push_back(c.value);
  12618. }
  12619. else if (c.block == block.next_block && block.default_block != block.next_block)
  12620. {
  12621. // We might have to flush phi inside specific case labels.
  12622. // If we can piggyback on default:, do so instead.
  12623. literals_to_merge.push_back(c.value);
  12624. }
  12625. }
  12626. // Empty literal array -> default.
  12627. if (block.default_block != block.next_block)
  12628. {
  12629. auto &default_block = get<SPIRBlock>(block.default_block);
  12630. // We need to slide in the default block somewhere in this chain
  12631. // if there are fall-through scenarios since the default is declared separately in OpSwitch.
  12632. // Only consider trivial fall-through cases here.
  12633. size_t num_blocks = block_declaration_order.size();
  12634. bool injected_block = false;
  12635. for (size_t i = 0; i < num_blocks; i++)
  12636. {
  12637. auto &case_block = get<SPIRBlock>(block_declaration_order[i]);
  12638. if (execution_is_direct_branch(case_block, default_block))
  12639. {
  12640. // Fallthrough to default block, we must inject the default block here.
  12641. block_declaration_order.insert(begin(block_declaration_order) + i + 1, block.default_block);
  12642. injected_block = true;
  12643. break;
  12644. }
  12645. else if (execution_is_direct_branch(default_block, case_block))
  12646. {
  12647. // Default case is falling through to another case label, we must inject the default block here.
  12648. block_declaration_order.insert(begin(block_declaration_order) + i, block.default_block);
  12649. injected_block = true;
  12650. break;
  12651. }
  12652. }
  12653. // Order does not matter.
  12654. if (!injected_block)
  12655. block_declaration_order.push_back(block.default_block);
  12656. else if (is_legacy_es())
  12657. SPIRV_CROSS_THROW("Default case label fallthrough to other case label is not supported in ESSL 1.0.");
  12658. case_constructs[block.default_block] = {};
  12659. }
  12660. size_t num_blocks = block_declaration_order.size();
  12661. const auto to_case_label = [](uint32_t literal, bool is_unsigned_case) -> string {
  12662. return is_unsigned_case ? convert_to_string(literal) : convert_to_string(int32_t(literal));
  12663. };
  12664. const auto to_legacy_case_label = [&](uint32_t condition, const SmallVector<uint32_t> &labels,
  12665. const char *suffix) -> string {
  12666. string ret;
  12667. size_t count = labels.size();
  12668. for (size_t i = 0; i < count; i++)
  12669. {
  12670. if (i)
  12671. ret += " || ";
  12672. ret += join(count > 1 ? "(" : "", to_enclosed_expression(condition), " == ", labels[i], suffix,
  12673. count > 1 ? ")" : "");
  12674. }
  12675. return ret;
  12676. };
  12677. // We need to deal with a complex scenario for OpPhi. If we have case-fallthrough and Phi in the picture,
  12678. // we need to flush phi nodes outside the switch block in a branch,
  12679. // and skip any Phi handling inside the case label to make fall-through work as expected.
  12680. // This kind of code-gen is super awkward and it's a last resort. Normally we would want to handle this
  12681. // inside the case label if at all possible.
  12682. for (size_t i = 1; backend.support_case_fallthrough && i < num_blocks; i++)
  12683. {
  12684. if (flush_phi_required(block.self, block_declaration_order[i]) &&
  12685. flush_phi_required(block_declaration_order[i - 1], block_declaration_order[i]))
  12686. {
  12687. uint32_t target_block = block_declaration_order[i];
  12688. // Make sure we flush Phi, it might have been marked to be ignored earlier.
  12689. get<SPIRBlock>(target_block).ignore_phi_from_block = 0;
  12690. auto &literals = case_constructs[target_block];
  12691. if (literals.empty())
  12692. {
  12693. // Oh boy, gotta make a complete negative test instead! o.o
  12694. // Find all possible literals that would *not* make us enter the default block.
  12695. // If none of those literals match, we flush Phi ...
  12696. SmallVector<string> conditions;
  12697. for (size_t j = 0; j < num_blocks; j++)
  12698. {
  12699. auto &negative_literals = case_constructs[block_declaration_order[j]];
  12700. for (auto &case_label : negative_literals)
  12701. conditions.push_back(join(to_enclosed_expression(block.condition),
  12702. " != ", to_case_label(case_label, unsigned_case)));
  12703. }
  12704. statement("if (", merge(conditions, " && "), ")");
  12705. begin_scope();
  12706. flush_phi(block.self, target_block);
  12707. end_scope();
  12708. }
  12709. else
  12710. {
  12711. SmallVector<string> conditions;
  12712. conditions.reserve(literals.size());
  12713. for (auto &case_label : literals)
  12714. conditions.push_back(join(to_enclosed_expression(block.condition),
  12715. " == ", to_case_label(case_label, unsigned_case)));
  12716. statement("if (", merge(conditions, " || "), ")");
  12717. begin_scope();
  12718. flush_phi(block.self, target_block);
  12719. end_scope();
  12720. }
  12721. // Mark the block so that we don't flush Phi from header to case label.
  12722. get<SPIRBlock>(target_block).ignore_phi_from_block = block.self;
  12723. }
  12724. }
  12725. // If there is only one default block, and no cases, this is a case where SPIRV-opt decided to emulate
  12726. // non-structured exits with the help of a switch block.
  12727. // This is buggy on FXC, so just emit the logical equivalent of a do { } while(false), which is more idiomatic.
  12728. bool degenerate_switch = block.default_block != block.merge_block && block.cases.empty();
  12729. if (degenerate_switch || is_legacy_es())
  12730. {
  12731. // ESSL 1.0 is not guaranteed to support do/while.
  12732. if (is_legacy_es())
  12733. {
  12734. uint32_t counter = statement_count;
  12735. statement("for (int spvDummy", counter, " = 0; spvDummy", counter,
  12736. " < 1; spvDummy", counter, "++)");
  12737. }
  12738. else
  12739. statement("do");
  12740. }
  12741. else
  12742. {
  12743. emit_block_hints(block);
  12744. statement("switch (", to_expression(block.condition), ")");
  12745. }
  12746. begin_scope();
  12747. for (size_t i = 0; i < num_blocks; i++)
  12748. {
  12749. uint32_t target_block = block_declaration_order[i];
  12750. auto &literals = case_constructs[target_block];
  12751. if (literals.empty())
  12752. {
  12753. // Default case.
  12754. if (!degenerate_switch)
  12755. {
  12756. if (is_legacy_es())
  12757. statement("else");
  12758. else
  12759. statement("default:");
  12760. }
  12761. }
  12762. else
  12763. {
  12764. if (is_legacy_es())
  12765. {
  12766. statement((i ? "else " : ""), "if (", to_legacy_case_label(block.condition, literals, label_suffix),
  12767. ")");
  12768. }
  12769. else
  12770. {
  12771. for (auto &case_literal : literals)
  12772. {
  12773. // The case label value must be sign-extended properly in SPIR-V, so we can assume 32-bit values here.
  12774. statement("case ", to_case_label(case_literal, unsigned_case), label_suffix, ":");
  12775. }
  12776. }
  12777. }
  12778. auto &case_block = get<SPIRBlock>(target_block);
  12779. if (backend.support_case_fallthrough && i + 1 < num_blocks &&
  12780. execution_is_direct_branch(case_block, get<SPIRBlock>(block_declaration_order[i + 1])))
  12781. {
  12782. // We will fall through here, so just terminate the block chain early.
  12783. // We still need to deal with Phi potentially.
  12784. // No need for a stack-like thing here since we only do fall-through when there is a
  12785. // single trivial branch to fall-through target..
  12786. current_emitting_switch_fallthrough = true;
  12787. }
  12788. else
  12789. current_emitting_switch_fallthrough = false;
  12790. if (!degenerate_switch)
  12791. begin_scope();
  12792. branch(block.self, target_block);
  12793. if (!degenerate_switch)
  12794. end_scope();
  12795. current_emitting_switch_fallthrough = false;
  12796. }
  12797. // Might still have to flush phi variables if we branch from loop header directly to merge target.
  12798. if (flush_phi_required(block.self, block.next_block))
  12799. {
  12800. if (block.default_block == block.next_block || !literals_to_merge.empty())
  12801. {
  12802. for (auto &case_literal : literals_to_merge)
  12803. statement("case ", to_case_label(case_literal, unsigned_case), label_suffix, ":");
  12804. if (block.default_block == block.next_block)
  12805. {
  12806. if (is_legacy_es())
  12807. statement("else");
  12808. else
  12809. statement("default:");
  12810. }
  12811. begin_scope();
  12812. flush_phi(block.self, block.next_block);
  12813. statement("break;");
  12814. end_scope();
  12815. }
  12816. }
  12817. if (degenerate_switch && !is_legacy_es())
  12818. end_scope_decl("while(false)");
  12819. else
  12820. end_scope();
  12821. if (block.need_ladder_break)
  12822. {
  12823. statement("if (_", block.self, "_ladder_break)");
  12824. begin_scope();
  12825. statement("break;");
  12826. end_scope();
  12827. }
  12828. current_emitting_switch = old_emitting_switch;
  12829. break;
  12830. }
  12831. case SPIRBlock::Return:
  12832. {
  12833. for (auto &line : current_function->fixup_hooks_out)
  12834. line();
  12835. if (processing_entry_point)
  12836. emit_fixup();
  12837. auto &cfg = get_cfg_for_current_function();
  12838. if (block.return_value)
  12839. {
  12840. auto &type = expression_type(block.return_value);
  12841. if (!type.array.empty() && !backend.can_return_array)
  12842. {
  12843. // If we cannot return arrays, we will have a special out argument we can write to instead.
  12844. // The backend is responsible for setting this up, and redirection the return values as appropriate.
  12845. if (ir.ids[block.return_value].get_type() != TypeUndef)
  12846. {
  12847. emit_array_copy("spvReturnValue", block.return_value, StorageClassFunction,
  12848. get_expression_effective_storage_class(block.return_value));
  12849. }
  12850. if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) ||
  12851. block.loop_dominator != BlockID(SPIRBlock::NoDominator))
  12852. {
  12853. statement("return;");
  12854. }
  12855. }
  12856. else
  12857. {
  12858. // OpReturnValue can return Undef, so don't emit anything for this case.
  12859. if (ir.ids[block.return_value].get_type() != TypeUndef)
  12860. statement("return ", to_expression(block.return_value), ";");
  12861. }
  12862. }
  12863. else if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) ||
  12864. block.loop_dominator != BlockID(SPIRBlock::NoDominator))
  12865. {
  12866. // If this block is the very final block and not called from control flow,
  12867. // we do not need an explicit return which looks out of place. Just end the function here.
  12868. // In the very weird case of for(;;) { return; } executing return is unconditional,
  12869. // but we actually need a return here ...
  12870. statement("return;");
  12871. }
  12872. break;
  12873. }
  12874. case SPIRBlock::Kill:
  12875. statement(backend.discard_literal, ";");
  12876. break;
  12877. case SPIRBlock::Unreachable:
  12878. emit_next_block = false;
  12879. break;
  12880. case SPIRBlock::IgnoreIntersection:
  12881. statement("ignoreIntersectionEXT;");
  12882. break;
  12883. case SPIRBlock::TerminateRay:
  12884. statement("terminateRayEXT;");
  12885. break;
  12886. default:
  12887. SPIRV_CROSS_THROW("Unimplemented block terminator.");
  12888. }
  12889. if (block.next_block && emit_next_block)
  12890. {
  12891. // If we hit this case, we're dealing with an unconditional branch, which means we will output
  12892. // that block after this. If we had selection merge, we already flushed phi variables.
  12893. if (block.merge != SPIRBlock::MergeSelection)
  12894. {
  12895. flush_phi(block.self, block.next_block);
  12896. // For a direct branch, need to remember to invalidate expressions in the next linear block instead.
  12897. get<SPIRBlock>(block.next_block).invalidate_expressions = block.invalidate_expressions;
  12898. }
  12899. // For switch fallthrough cases, we terminate the chain here, but we still need to handle Phi.
  12900. if (!current_emitting_switch_fallthrough)
  12901. {
  12902. // For merge selects we might have ignored the fact that a merge target
  12903. // could have been a break; or continue;
  12904. // We will need to deal with it here.
  12905. if (is_loop_break(block.next_block))
  12906. {
  12907. // Cannot check for just break, because switch statements will also use break.
  12908. assert(block.merge == SPIRBlock::MergeSelection);
  12909. statement("break;");
  12910. }
  12911. else if (is_continue(block.next_block))
  12912. {
  12913. assert(block.merge == SPIRBlock::MergeSelection);
  12914. branch_to_continue(block.self, block.next_block);
  12915. }
  12916. else if (BlockID(block.self) != block.next_block)
  12917. emit_block_chain(get<SPIRBlock>(block.next_block));
  12918. }
  12919. }
  12920. if (block.merge == SPIRBlock::MergeLoop)
  12921. {
  12922. if (continue_type == SPIRBlock::DoWhileLoop)
  12923. {
  12924. // Make sure that we run the continue block to get the expressions set, but this
  12925. // should become an empty string.
  12926. // We have no fallbacks if we cannot forward everything to temporaries ...
  12927. const auto &continue_block = get<SPIRBlock>(block.continue_block);
  12928. bool positive_test = execution_is_noop(get<SPIRBlock>(continue_block.true_block),
  12929. get<SPIRBlock>(continue_block.loop_dominator));
  12930. uint32_t current_count = statement_count;
  12931. auto statements = emit_continue_block(block.continue_block, positive_test, !positive_test);
  12932. if (statement_count != current_count)
  12933. {
  12934. // The DoWhile block has side effects, force ComplexLoop pattern next pass.
  12935. get<SPIRBlock>(block.continue_block).complex_continue = true;
  12936. force_recompile();
  12937. }
  12938. // Might have to invert the do-while test here.
  12939. auto condition = to_expression(continue_block.condition);
  12940. if (!positive_test)
  12941. condition = join("!", enclose_expression(condition));
  12942. end_scope_decl(join("while (", condition, ")"));
  12943. }
  12944. else
  12945. end_scope();
  12946. loop_level_saver.release();
  12947. // We cannot break out of two loops at once, so don't check for break; here.
  12948. // Using block.self as the "from" block isn't quite right, but it has the same scope
  12949. // and dominance structure, so it's fine.
  12950. if (is_continue(block.merge_block))
  12951. branch_to_continue(block.self, block.merge_block);
  12952. else
  12953. emit_block_chain(get<SPIRBlock>(block.merge_block));
  12954. }
  12955. // Forget about control dependent expressions now.
  12956. block.invalidate_expressions.clear();
  12957. // After we return, we must be out of scope, so if we somehow have to re-emit this function,
  12958. // re-declare variables if necessary.
  12959. assert(rearm_dominated_variables.size() == block.dominated_variables.size());
  12960. for (size_t i = 0; i < block.dominated_variables.size(); i++)
  12961. {
  12962. uint32_t var = block.dominated_variables[i];
  12963. get<SPIRVariable>(var).deferred_declaration = rearm_dominated_variables[i];
  12964. }
  12965. // Just like for deferred declaration, we need to forget about loop variable enable
  12966. // if our block chain is reinstantiated later.
  12967. for (auto &var_id : block.loop_variables)
  12968. get<SPIRVariable>(var_id).loop_variable_enable = false;
  12969. }
  12970. void CompilerGLSL::begin_scope()
  12971. {
  12972. statement("{");
  12973. indent++;
  12974. }
  12975. void CompilerGLSL::end_scope()
  12976. {
  12977. if (!indent)
  12978. SPIRV_CROSS_THROW("Popping empty indent stack.");
  12979. indent--;
  12980. statement("}");
  12981. }
  12982. void CompilerGLSL::end_scope(const string &trailer)
  12983. {
  12984. if (!indent)
  12985. SPIRV_CROSS_THROW("Popping empty indent stack.");
  12986. indent--;
  12987. statement("}", trailer);
  12988. }
  12989. void CompilerGLSL::end_scope_decl()
  12990. {
  12991. if (!indent)
  12992. SPIRV_CROSS_THROW("Popping empty indent stack.");
  12993. indent--;
  12994. statement("};");
  12995. }
  12996. void CompilerGLSL::end_scope_decl(const string &decl)
  12997. {
  12998. if (!indent)
  12999. SPIRV_CROSS_THROW("Popping empty indent stack.");
  13000. indent--;
  13001. statement("} ", decl, ";");
  13002. }
  13003. void CompilerGLSL::check_function_call_constraints(const uint32_t *args, uint32_t length)
  13004. {
  13005. // If our variable is remapped, and we rely on type-remapping information as
  13006. // well, then we cannot pass the variable as a function parameter.
  13007. // Fixing this is non-trivial without stamping out variants of the same function,
  13008. // so for now warn about this and suggest workarounds instead.
  13009. for (uint32_t i = 0; i < length; i++)
  13010. {
  13011. auto *var = maybe_get<SPIRVariable>(args[i]);
  13012. if (!var || !var->remapped_variable)
  13013. continue;
  13014. auto &type = get<SPIRType>(var->basetype);
  13015. if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData)
  13016. {
  13017. SPIRV_CROSS_THROW("Tried passing a remapped subpassInput variable to a function. "
  13018. "This will not work correctly because type-remapping information is lost. "
  13019. "To workaround, please consider not passing the subpass input as a function parameter, "
  13020. "or use in/out variables instead which do not need type remapping information.");
  13021. }
  13022. }
  13023. }
  13024. const Instruction *CompilerGLSL::get_next_instruction_in_block(const Instruction &instr)
  13025. {
  13026. // FIXME: This is kind of hacky. There should be a cleaner way.
  13027. auto offset = uint32_t(&instr - current_emitting_block->ops.data());
  13028. if ((offset + 1) < current_emitting_block->ops.size())
  13029. return &current_emitting_block->ops[offset + 1];
  13030. else
  13031. return nullptr;
  13032. }
  13033. uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics)
  13034. {
  13035. return semantics & (MemorySemanticsAtomicCounterMemoryMask | MemorySemanticsImageMemoryMask |
  13036. MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask |
  13037. MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask);
  13038. }
  13039. void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageClass, StorageClass)
  13040. {
  13041. statement(lhs, " = ", to_expression(rhs_id), ";");
  13042. }
  13043. void CompilerGLSL::unroll_array_from_complex_load(uint32_t target_id, uint32_t source_id, std::string &expr)
  13044. {
  13045. if (!backend.force_gl_in_out_block)
  13046. return;
  13047. // This path is only relevant for GL backends.
  13048. auto *var = maybe_get<SPIRVariable>(source_id);
  13049. if (!var)
  13050. return;
  13051. if (var->storage != StorageClassInput)
  13052. return;
  13053. auto &type = get_variable_data_type(*var);
  13054. if (type.array.empty())
  13055. return;
  13056. auto builtin = BuiltIn(get_decoration(var->self, DecorationBuiltIn));
  13057. bool is_builtin = is_builtin_variable(*var) && (builtin == BuiltInPointSize || builtin == BuiltInPosition);
  13058. bool is_tess = is_tessellation_shader();
  13059. bool is_patch = has_decoration(var->self, DecorationPatch);
  13060. // Tessellation input arrays are special in that they are unsized, so we cannot directly copy from it.
  13061. // We must unroll the array load.
  13062. // For builtins, we couldn't catch this case normally,
  13063. // because this is resolved in the OpAccessChain in most cases.
  13064. // If we load the entire array, we have no choice but to unroll here.
  13065. if (!is_patch && (is_builtin || is_tess))
  13066. {
  13067. auto new_expr = join("_", target_id, "_unrolled");
  13068. statement(variable_decl(type, new_expr, target_id), ";");
  13069. string array_expr;
  13070. if (type.array_size_literal.back())
  13071. {
  13072. array_expr = convert_to_string(type.array.back());
  13073. if (type.array.back() == 0)
  13074. SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array.");
  13075. }
  13076. else
  13077. array_expr = to_expression(type.array.back());
  13078. // The array size might be a specialization constant, so use a for-loop instead.
  13079. statement("for (int i = 0; i < int(", array_expr, "); i++)");
  13080. begin_scope();
  13081. if (is_builtin)
  13082. statement(new_expr, "[i] = gl_in[i].", expr, ";");
  13083. else
  13084. statement(new_expr, "[i] = ", expr, "[i];");
  13085. end_scope();
  13086. expr = move(new_expr);
  13087. }
  13088. }
  13089. void CompilerGLSL::cast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type)
  13090. {
  13091. auto *var = maybe_get_backing_variable(source_id);
  13092. if (var)
  13093. source_id = var->self;
  13094. // Only interested in standalone builtin variables.
  13095. if (!has_decoration(source_id, DecorationBuiltIn))
  13096. return;
  13097. auto builtin = static_cast<BuiltIn>(get_decoration(source_id, DecorationBuiltIn));
  13098. auto expected_type = expr_type.basetype;
  13099. // TODO: Fill in for more builtins.
  13100. switch (builtin)
  13101. {
  13102. case BuiltInLayer:
  13103. case BuiltInPrimitiveId:
  13104. case BuiltInViewportIndex:
  13105. case BuiltInInstanceId:
  13106. case BuiltInInstanceIndex:
  13107. case BuiltInVertexId:
  13108. case BuiltInVertexIndex:
  13109. case BuiltInSampleId:
  13110. case BuiltInBaseVertex:
  13111. case BuiltInBaseInstance:
  13112. case BuiltInDrawIndex:
  13113. case BuiltInFragStencilRefEXT:
  13114. case BuiltInInstanceCustomIndexNV:
  13115. expected_type = SPIRType::Int;
  13116. break;
  13117. case BuiltInGlobalInvocationId:
  13118. case BuiltInLocalInvocationId:
  13119. case BuiltInWorkgroupId:
  13120. case BuiltInLocalInvocationIndex:
  13121. case BuiltInWorkgroupSize:
  13122. case BuiltInNumWorkgroups:
  13123. case BuiltInIncomingRayFlagsNV:
  13124. case BuiltInLaunchIdNV:
  13125. case BuiltInLaunchSizeNV:
  13126. expected_type = SPIRType::UInt;
  13127. break;
  13128. default:
  13129. break;
  13130. }
  13131. if (expected_type != expr_type.basetype)
  13132. expr = bitcast_expression(expr_type, expected_type, expr);
  13133. }
  13134. void CompilerGLSL::cast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type)
  13135. {
  13136. // Only interested in standalone builtin variables.
  13137. if (!has_decoration(target_id, DecorationBuiltIn))
  13138. return;
  13139. auto builtin = static_cast<BuiltIn>(get_decoration(target_id, DecorationBuiltIn));
  13140. auto expected_type = expr_type.basetype;
  13141. // TODO: Fill in for more builtins.
  13142. switch (builtin)
  13143. {
  13144. case BuiltInLayer:
  13145. case BuiltInPrimitiveId:
  13146. case BuiltInViewportIndex:
  13147. case BuiltInFragStencilRefEXT:
  13148. expected_type = SPIRType::Int;
  13149. break;
  13150. default:
  13151. break;
  13152. }
  13153. if (expected_type != expr_type.basetype)
  13154. {
  13155. auto type = expr_type;
  13156. type.basetype = expected_type;
  13157. expr = bitcast_expression(type, expr_type.basetype, expr);
  13158. }
  13159. }
  13160. void CompilerGLSL::convert_non_uniform_expression(const SPIRType &type, std::string &expr)
  13161. {
  13162. if (*backend.nonuniform_qualifier == '\0')
  13163. return;
  13164. // Handle SPV_EXT_descriptor_indexing.
  13165. if (type.basetype == SPIRType::Sampler || type.basetype == SPIRType::SampledImage ||
  13166. type.basetype == SPIRType::Image || type.basetype == SPIRType::AccelerationStructure)
  13167. {
  13168. // The image/sampler ID must be declared as non-uniform.
  13169. // However, it is not legal GLSL to have
  13170. // nonuniformEXT(samplers[index]), so we must move the nonuniform qualifier
  13171. // to the array indexing, like
  13172. // samplers[nonuniformEXT(index)].
  13173. // While the access chain will generally be nonuniformEXT, it's not necessarily so,
  13174. // so we might have to fixup the OpLoad-ed expression late.
  13175. auto start_array_index = expr.find_first_of('[');
  13176. if (start_array_index == string::npos)
  13177. return;
  13178. // Check for the edge case that a non-arrayed resource was marked to be nonuniform,
  13179. // and the bracket we found is actually part of non-resource related data.
  13180. if (expr.find_first_of(',') < start_array_index)
  13181. return;
  13182. // We've opened a bracket, track expressions until we can close the bracket.
  13183. // This must be our image index.
  13184. size_t end_array_index = string::npos;
  13185. unsigned bracket_count = 1;
  13186. for (size_t index = start_array_index + 1; index < expr.size(); index++)
  13187. {
  13188. if (expr[index] == ']')
  13189. {
  13190. if (--bracket_count == 0)
  13191. {
  13192. end_array_index = index;
  13193. break;
  13194. }
  13195. }
  13196. else if (expr[index] == '[')
  13197. bracket_count++;
  13198. }
  13199. assert(bracket_count == 0);
  13200. // Doesn't really make sense to declare a non-arrayed image with nonuniformEXT, but there's
  13201. // nothing we can do here to express that.
  13202. if (start_array_index == string::npos || end_array_index == string::npos || end_array_index < start_array_index)
  13203. return;
  13204. start_array_index++;
  13205. expr = join(expr.substr(0, start_array_index), backend.nonuniform_qualifier, "(",
  13206. expr.substr(start_array_index, end_array_index - start_array_index), ")",
  13207. expr.substr(end_array_index, string::npos));
  13208. }
  13209. }
  13210. void CompilerGLSL::emit_block_hints(const SPIRBlock &)
  13211. {
  13212. }
  13213. void CompilerGLSL::preserve_alias_on_reset(uint32_t id)
  13214. {
  13215. preserved_aliases[id] = get_name(id);
  13216. }
  13217. void CompilerGLSL::reset_name_caches()
  13218. {
  13219. for (auto &preserved : preserved_aliases)
  13220. set_name(preserved.first, preserved.second);
  13221. preserved_aliases.clear();
  13222. resource_names.clear();
  13223. block_input_names.clear();
  13224. block_output_names.clear();
  13225. block_ubo_names.clear();
  13226. block_ssbo_names.clear();
  13227. block_names.clear();
  13228. function_overloads.clear();
  13229. }
  13230. void CompilerGLSL::fixup_type_alias()
  13231. {
  13232. // Due to how some backends work, the "master" type of type_alias must be a block-like type if it exists.
  13233. ir.for_each_typed_id<SPIRType>([&](uint32_t self, SPIRType &type) {
  13234. if (!type.type_alias)
  13235. return;
  13236. if (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock))
  13237. {
  13238. // Top-level block types should never alias anything else.
  13239. type.type_alias = 0;
  13240. }
  13241. else if (type_is_block_like(type) && type.self == ID(self))
  13242. {
  13243. // A block-like type is any type which contains Offset decoration, but not top-level blocks,
  13244. // i.e. blocks which are placed inside buffers.
  13245. // Become the master.
  13246. ir.for_each_typed_id<SPIRType>([&](uint32_t other_id, SPIRType &other_type) {
  13247. if (other_id == self)
  13248. return;
  13249. if (other_type.type_alias == type.type_alias)
  13250. other_type.type_alias = self;
  13251. });
  13252. this->get<SPIRType>(type.type_alias).type_alias = self;
  13253. type.type_alias = 0;
  13254. }
  13255. });
  13256. }
  13257. void CompilerGLSL::reorder_type_alias()
  13258. {
  13259. // Reorder declaration of types so that the master of the type alias is always emitted first.
  13260. // We need this in case a type B depends on type A (A must come before in the vector), but A is an alias of a type Abuffer, which
  13261. // means declaration of A doesn't happen (yet), and order would be B, ABuffer and not ABuffer, B. Fix this up here.
  13262. auto loop_lock = ir.create_loop_hard_lock();
  13263. auto &type_ids = ir.ids_for_type[TypeType];
  13264. for (auto alias_itr = begin(type_ids); alias_itr != end(type_ids); ++alias_itr)
  13265. {
  13266. auto &type = get<SPIRType>(*alias_itr);
  13267. if (type.type_alias != TypeID(0) &&
  13268. !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
  13269. {
  13270. // We will skip declaring this type, so make sure the type_alias type comes before.
  13271. auto master_itr = find(begin(type_ids), end(type_ids), ID(type.type_alias));
  13272. assert(master_itr != end(type_ids));
  13273. if (alias_itr < master_itr)
  13274. {
  13275. // Must also swap the type order for the constant-type joined array.
  13276. auto &joined_types = ir.ids_for_constant_or_type;
  13277. auto alt_alias_itr = find(begin(joined_types), end(joined_types), *alias_itr);
  13278. auto alt_master_itr = find(begin(joined_types), end(joined_types), *master_itr);
  13279. assert(alt_alias_itr != end(joined_types));
  13280. assert(alt_master_itr != end(joined_types));
  13281. swap(*alias_itr, *master_itr);
  13282. swap(*alt_alias_itr, *alt_master_itr);
  13283. }
  13284. }
  13285. }
  13286. }
  13287. void CompilerGLSL::emit_line_directive(uint32_t file_id, uint32_t line_literal)
  13288. {
  13289. // If we are redirecting statements, ignore the line directive.
  13290. // Common case here is continue blocks.
  13291. if (redirect_statement)
  13292. return;
  13293. if (options.emit_line_directives)
  13294. {
  13295. require_extension_internal("GL_GOOGLE_cpp_style_line_directive");
  13296. statement_no_indent("#line ", line_literal, " \"", get<SPIRString>(file_id).str, "\"");
  13297. }
  13298. }
  13299. void CompilerGLSL::propagate_nonuniform_qualifier(uint32_t id)
  13300. {
  13301. // SPIR-V might only tag the very last ID with NonUniformEXT, but for codegen,
  13302. // we need to know NonUniformEXT a little earlier, when the resource is actually loaded.
  13303. // Back-propagate the qualifier based on the expression dependency chain.
  13304. if (!has_decoration(id, DecorationNonUniformEXT))
  13305. {
  13306. set_decoration(id, DecorationNonUniformEXT);
  13307. force_recompile();
  13308. }
  13309. auto *e = maybe_get<SPIRExpression>(id);
  13310. auto *combined = maybe_get<SPIRCombinedImageSampler>(id);
  13311. auto *chain = maybe_get<SPIRAccessChain>(id);
  13312. if (e)
  13313. {
  13314. for (auto &expr : e->expression_dependencies)
  13315. propagate_nonuniform_qualifier(expr);
  13316. for (auto &expr : e->implied_read_expressions)
  13317. propagate_nonuniform_qualifier(expr);
  13318. }
  13319. else if (combined)
  13320. {
  13321. propagate_nonuniform_qualifier(combined->image);
  13322. propagate_nonuniform_qualifier(combined->sampler);
  13323. }
  13324. else if (chain)
  13325. {
  13326. for (auto &expr : chain->implied_read_expressions)
  13327. propagate_nonuniform_qualifier(expr);
  13328. }
  13329. }
  13330. void CompilerGLSL::emit_copy_logical_type(uint32_t lhs_id, uint32_t lhs_type_id, uint32_t rhs_id, uint32_t rhs_type_id,
  13331. SmallVector<uint32_t> chain)
  13332. {
  13333. // Fully unroll all member/array indices one by one.
  13334. auto &lhs_type = get<SPIRType>(lhs_type_id);
  13335. auto &rhs_type = get<SPIRType>(rhs_type_id);
  13336. if (!lhs_type.array.empty())
  13337. {
  13338. // Could use a loop here to support specialization constants, but it gets rather complicated with nested array types,
  13339. // and this is a rather obscure opcode anyways, keep it simple unless we are forced to.
  13340. uint32_t array_size = to_array_size_literal(lhs_type);
  13341. chain.push_back(0);
  13342. for (uint32_t i = 0; i < array_size; i++)
  13343. {
  13344. chain.back() = i;
  13345. emit_copy_logical_type(lhs_id, lhs_type.parent_type, rhs_id, rhs_type.parent_type, chain);
  13346. }
  13347. }
  13348. else if (lhs_type.basetype == SPIRType::Struct)
  13349. {
  13350. chain.push_back(0);
  13351. uint32_t member_count = uint32_t(lhs_type.member_types.size());
  13352. for (uint32_t i = 0; i < member_count; i++)
  13353. {
  13354. chain.back() = i;
  13355. emit_copy_logical_type(lhs_id, lhs_type.member_types[i], rhs_id, rhs_type.member_types[i], chain);
  13356. }
  13357. }
  13358. else
  13359. {
  13360. // Need to handle unpack/packing fixups since this can differ wildly between the logical types,
  13361. // particularly in MSL.
  13362. // To deal with this, we emit access chains and go through emit_store_statement
  13363. // to deal with all the special cases we can encounter.
  13364. AccessChainMeta lhs_meta, rhs_meta;
  13365. auto lhs = access_chain_internal(lhs_id, chain.data(), uint32_t(chain.size()),
  13366. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &lhs_meta);
  13367. auto rhs = access_chain_internal(rhs_id, chain.data(), uint32_t(chain.size()),
  13368. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &rhs_meta);
  13369. uint32_t id = ir.increase_bound_by(2);
  13370. lhs_id = id;
  13371. rhs_id = id + 1;
  13372. {
  13373. auto &lhs_expr = set<SPIRExpression>(lhs_id, move(lhs), lhs_type_id, true);
  13374. lhs_expr.need_transpose = lhs_meta.need_transpose;
  13375. if (lhs_meta.storage_is_packed)
  13376. set_extended_decoration(lhs_id, SPIRVCrossDecorationPhysicalTypePacked);
  13377. if (lhs_meta.storage_physical_type != 0)
  13378. set_extended_decoration(lhs_id, SPIRVCrossDecorationPhysicalTypeID, lhs_meta.storage_physical_type);
  13379. forwarded_temporaries.insert(lhs_id);
  13380. suppressed_usage_tracking.insert(lhs_id);
  13381. }
  13382. {
  13383. auto &rhs_expr = set<SPIRExpression>(rhs_id, move(rhs), rhs_type_id, true);
  13384. rhs_expr.need_transpose = rhs_meta.need_transpose;
  13385. if (rhs_meta.storage_is_packed)
  13386. set_extended_decoration(rhs_id, SPIRVCrossDecorationPhysicalTypePacked);
  13387. if (rhs_meta.storage_physical_type != 0)
  13388. set_extended_decoration(rhs_id, SPIRVCrossDecorationPhysicalTypeID, rhs_meta.storage_physical_type);
  13389. forwarded_temporaries.insert(rhs_id);
  13390. suppressed_usage_tracking.insert(rhs_id);
  13391. }
  13392. emit_store_statement(lhs_id, rhs_id);
  13393. }
  13394. }
  13395. bool CompilerGLSL::subpass_input_is_framebuffer_fetch(uint32_t id) const
  13396. {
  13397. if (!has_decoration(id, DecorationInputAttachmentIndex))
  13398. return false;
  13399. uint32_t input_attachment_index = get_decoration(id, DecorationInputAttachmentIndex);
  13400. for (auto &remap : subpass_to_framebuffer_fetch_attachment)
  13401. if (remap.first == input_attachment_index)
  13402. return true;
  13403. return false;
  13404. }
  13405. const SPIRVariable *CompilerGLSL::find_subpass_input_by_attachment_index(uint32_t index) const
  13406. {
  13407. const SPIRVariable *ret = nullptr;
  13408. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
  13409. if (has_decoration(var.self, DecorationInputAttachmentIndex) &&
  13410. get_decoration(var.self, DecorationInputAttachmentIndex) == index)
  13411. {
  13412. ret = &var;
  13413. }
  13414. });
  13415. return ret;
  13416. }
  13417. const SPIRVariable *CompilerGLSL::find_color_output_by_location(uint32_t location) const
  13418. {
  13419. const SPIRVariable *ret = nullptr;
  13420. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
  13421. if (var.storage == StorageClassOutput && get_decoration(var.self, DecorationLocation) == location)
  13422. ret = &var;
  13423. });
  13424. return ret;
  13425. }
  13426. void CompilerGLSL::emit_inout_fragment_outputs_copy_to_subpass_inputs()
  13427. {
  13428. for (auto &remap : subpass_to_framebuffer_fetch_attachment)
  13429. {
  13430. auto *subpass_var = find_subpass_input_by_attachment_index(remap.first);
  13431. auto *output_var = find_color_output_by_location(remap.second);
  13432. if (!subpass_var)
  13433. continue;
  13434. if (!output_var)
  13435. SPIRV_CROSS_THROW("Need to declare the corresponding fragment output variable to be able "
  13436. "to read from it.");
  13437. if (is_array(get<SPIRType>(output_var->basetype)))
  13438. SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_framebuffer_fetch with arrays of color outputs.");
  13439. auto &func = get<SPIRFunction>(get_entry_point().self);
  13440. func.fixup_hooks_in.push_back([=]() {
  13441. if (is_legacy())
  13442. {
  13443. statement(to_expression(subpass_var->self), " = ", "gl_LastFragData[",
  13444. get_decoration(output_var->self, DecorationLocation), "];");
  13445. }
  13446. else
  13447. {
  13448. uint32_t num_rt_components = this->get<SPIRType>(output_var->basetype).vecsize;
  13449. statement(to_expression(subpass_var->self), vector_swizzle(num_rt_components, 0), " = ",
  13450. to_expression(output_var->self), ";");
  13451. }
  13452. });
  13453. }
  13454. }
  13455. bool CompilerGLSL::variable_is_depth_or_compare(VariableID id) const
  13456. {
  13457. return image_is_comparison(get<SPIRType>(get<SPIRVariable>(id).basetype), id);
  13458. }
  13459. const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extension_name(Candidate c)
  13460. {
  13461. static const char *const retval[CandidateCount] = { "GL_KHR_shader_subgroup_ballot",
  13462. "GL_KHR_shader_subgroup_basic",
  13463. "GL_KHR_shader_subgroup_vote",
  13464. "GL_NV_gpu_shader_5",
  13465. "GL_NV_shader_thread_group",
  13466. "GL_NV_shader_thread_shuffle",
  13467. "GL_ARB_shader_ballot",
  13468. "GL_ARB_shader_group_vote",
  13469. "GL_AMD_gcn_shader" };
  13470. return retval[c];
  13471. }
  13472. SmallVector<std::string> CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_names(Candidate c)
  13473. {
  13474. switch (c)
  13475. {
  13476. case ARB_shader_ballot:
  13477. return { "GL_ARB_shader_int64" };
  13478. case AMD_gcn_shader:
  13479. return { "GL_AMD_gpu_shader_int64", "GL_NV_gpu_shader5" };
  13480. default:
  13481. return {};
  13482. }
  13483. }
  13484. const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_predicate(Candidate c)
  13485. {
  13486. switch (c)
  13487. {
  13488. case ARB_shader_ballot:
  13489. return "defined(GL_ARB_shader_int64)";
  13490. case AMD_gcn_shader:
  13491. return "(defined(GL_AMD_gpu_shader_int64) || defined(GL_NV_gpu_shader5))";
  13492. default:
  13493. return "";
  13494. }
  13495. }
  13496. CompilerGLSL::ShaderSubgroupSupportHelper::FeatureVector CompilerGLSL::ShaderSubgroupSupportHelper::
  13497. get_feature_dependencies(Feature feature)
  13498. {
  13499. switch (feature)
  13500. {
  13501. case SubgroupAllEqualT:
  13502. return { SubgroupBroadcast_First, SubgroupAll_Any_AllEqualBool };
  13503. case SubgroupElect:
  13504. return { SubgroupBallotFindLSB_MSB, SubgroupBallot, SubgroupInvocationID };
  13505. case SubgroupInverseBallot_InclBitCount_ExclBitCout:
  13506. return { SubgroupMask };
  13507. case SubgroupBallotBitCount:
  13508. return { SubgroupBallot };
  13509. default:
  13510. return {};
  13511. }
  13512. }
  13513. CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::
  13514. get_feature_dependency_mask(Feature feature)
  13515. {
  13516. return build_mask(get_feature_dependencies(feature));
  13517. }
  13518. bool CompilerGLSL::ShaderSubgroupSupportHelper::can_feature_be_implemented_without_extensions(Feature feature)
  13519. {
  13520. static const bool retval[FeatureCount] = { false, false, false, false, false, false,
  13521. true, // SubgroupBalloFindLSB_MSB
  13522. false, false, false, false,
  13523. true, // SubgroupMemBarrier - replaced with workgroup memory barriers
  13524. false, false, true, false };
  13525. return retval[feature];
  13526. }
  13527. CompilerGLSL::ShaderSubgroupSupportHelper::Candidate CompilerGLSL::ShaderSubgroupSupportHelper::
  13528. get_KHR_extension_for_feature(Feature feature)
  13529. {
  13530. static const Candidate extensions[FeatureCount] = {
  13531. KHR_shader_subgroup_ballot, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic,
  13532. KHR_shader_subgroup_basic, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_vote,
  13533. KHR_shader_subgroup_vote, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic,
  13534. KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot
  13535. };
  13536. return extensions[feature];
  13537. }
  13538. void CompilerGLSL::ShaderSubgroupSupportHelper::request_feature(Feature feature)
  13539. {
  13540. feature_mask |= (FeatureMask(1) << feature) | get_feature_dependency_mask(feature);
  13541. }
  13542. bool CompilerGLSL::ShaderSubgroupSupportHelper::is_feature_requested(Feature feature) const
  13543. {
  13544. return (feature_mask & (1u << feature)) != 0;
  13545. }
  13546. CompilerGLSL::ShaderSubgroupSupportHelper::Result CompilerGLSL::ShaderSubgroupSupportHelper::resolve() const
  13547. {
  13548. Result res;
  13549. for (uint32_t i = 0u; i < FeatureCount; ++i)
  13550. {
  13551. if (feature_mask & (1u << i))
  13552. {
  13553. auto feature = static_cast<Feature>(i);
  13554. std::unordered_set<uint32_t> unique_candidates;
  13555. auto candidates = get_candidates_for_feature(feature);
  13556. unique_candidates.insert(candidates.begin(), candidates.end());
  13557. auto deps = get_feature_dependencies(feature);
  13558. for (Feature d : deps)
  13559. {
  13560. candidates = get_candidates_for_feature(d);
  13561. if (!candidates.empty())
  13562. unique_candidates.insert(candidates.begin(), candidates.end());
  13563. }
  13564. for (uint32_t c : unique_candidates)
  13565. ++res.weights[static_cast<Candidate>(c)];
  13566. }
  13567. }
  13568. return res;
  13569. }
  13570. CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper::
  13571. get_candidates_for_feature(Feature ft, const Result &r)
  13572. {
  13573. auto c = get_candidates_for_feature(ft);
  13574. auto cmp = [&r](Candidate a, Candidate b) {
  13575. if (r.weights[a] == r.weights[b])
  13576. return a < b; // Prefer candidates with lower enum value
  13577. return r.weights[a] > r.weights[b];
  13578. };
  13579. std::sort(c.begin(), c.end(), cmp);
  13580. return c;
  13581. }
  13582. CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper::
  13583. get_candidates_for_feature(Feature feature)
  13584. {
  13585. switch (feature)
  13586. {
  13587. case SubgroupMask:
  13588. return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot };
  13589. case SubgroupSize:
  13590. return { KHR_shader_subgroup_basic, NV_shader_thread_group, AMD_gcn_shader, ARB_shader_ballot };
  13591. case SubgroupInvocationID:
  13592. return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot };
  13593. case SubgroupID:
  13594. return { KHR_shader_subgroup_basic, NV_shader_thread_group };
  13595. case NumSubgroups:
  13596. return { KHR_shader_subgroup_basic, NV_shader_thread_group };
  13597. case SubgroupBroadcast_First:
  13598. return { KHR_shader_subgroup_ballot, NV_shader_thread_shuffle, ARB_shader_ballot };
  13599. case SubgroupBallotFindLSB_MSB:
  13600. return { KHR_shader_subgroup_ballot, NV_shader_thread_group };
  13601. case SubgroupAll_Any_AllEqualBool:
  13602. return { KHR_shader_subgroup_vote, NV_gpu_shader_5, ARB_shader_group_vote, AMD_gcn_shader };
  13603. case SubgroupAllEqualT:
  13604. return {}; // depends on other features only
  13605. case SubgroupElect:
  13606. return {}; // depends on other features only
  13607. case SubgroupBallot:
  13608. return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot };
  13609. case SubgroupBarrier:
  13610. return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot, AMD_gcn_shader };
  13611. case SubgroupMemBarrier:
  13612. return { KHR_shader_subgroup_basic };
  13613. case SubgroupInverseBallot_InclBitCount_ExclBitCout:
  13614. return {};
  13615. case SubgroupBallotBitExtract:
  13616. return { NV_shader_thread_group };
  13617. case SubgroupBallotBitCount:
  13618. return {};
  13619. default:
  13620. return {};
  13621. }
  13622. }
  13623. CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::build_mask(
  13624. const SmallVector<Feature> &features)
  13625. {
  13626. FeatureMask mask = 0;
  13627. for (Feature f : features)
  13628. mask |= FeatureMask(1) << f;
  13629. return mask;
  13630. }
  13631. CompilerGLSL::ShaderSubgroupSupportHelper::Result::Result()
  13632. {
  13633. for (auto &weight : weights)
  13634. weight = 0;
  13635. // Make sure KHR_shader_subgroup extensions are always prefered.
  13636. const uint32_t big_num = FeatureCount;
  13637. weights[KHR_shader_subgroup_ballot] = big_num;
  13638. weights[KHR_shader_subgroup_basic] = big_num;
  13639. weights[KHR_shader_subgroup_vote] = big_num;
  13640. }
  13641. void CompilerGLSL::request_workaround_wrapper_overload(TypeID id)
  13642. {
  13643. // Must be ordered to maintain deterministic output, so vector is appropriate.
  13644. if (find(begin(workaround_ubo_load_overload_types), end(workaround_ubo_load_overload_types), id) ==
  13645. end(workaround_ubo_load_overload_types))
  13646. {
  13647. force_recompile();
  13648. workaround_ubo_load_overload_types.push_back(id);
  13649. }
  13650. }
  13651. void CompilerGLSL::rewrite_load_for_wrapped_row_major(std::string &expr, TypeID loaded_type, ID ptr)
  13652. {
  13653. // Loading row-major matrices from UBOs on older AMD Windows OpenGL drivers is problematic.
  13654. // To load these types correctly, we must first wrap them in a dummy function which only purpose is to
  13655. // ensure row_major decoration is actually respected.
  13656. auto *var = maybe_get_backing_variable(ptr);
  13657. if (!var)
  13658. return;
  13659. auto &backing_type = get<SPIRType>(var->basetype);
  13660. bool is_ubo = backing_type.basetype == SPIRType::Struct && backing_type.storage == StorageClassUniform &&
  13661. has_decoration(backing_type.self, DecorationBlock);
  13662. if (!is_ubo)
  13663. return;
  13664. auto *type = &get<SPIRType>(loaded_type);
  13665. bool rewrite = false;
  13666. if (is_matrix(*type))
  13667. {
  13668. // To avoid adding a lot of unnecessary meta tracking to forward the row_major state,
  13669. // we will simply look at the base struct itself. It is exceptionally rare to mix and match row-major/col-major state.
  13670. // If there is any row-major action going on, we apply the workaround.
  13671. // It is harmless to apply the workaround to column-major matrices, so this is still a valid solution.
  13672. // If an access chain occurred, the workaround is not required, so loading vectors or scalars don't need workaround.
  13673. type = &backing_type;
  13674. }
  13675. if (type->basetype == SPIRType::Struct)
  13676. {
  13677. // If we're loading a struct where any member is a row-major matrix, apply the workaround.
  13678. for (uint32_t i = 0; i < uint32_t(type->member_types.size()); i++)
  13679. {
  13680. if (combined_decoration_for_member(*type, i).get(DecorationRowMajor))
  13681. {
  13682. rewrite = true;
  13683. break;
  13684. }
  13685. }
  13686. }
  13687. if (rewrite)
  13688. {
  13689. request_workaround_wrapper_overload(loaded_type);
  13690. expr = join("spvWorkaroundRowMajor(", expr, ")");
  13691. }
  13692. }