spirv_glsl.cpp 654 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797779877997800780178027803780478057806780778087809781078117812781378147815781678177818781978207821782278237824782578267827782878297830783178327833783478357836783778387839784078417842784378447845784678477848784978507851785278537854785578567857785878597860786178627863786478657866786778687869787078717872787378747875787678777878787978807881788278837884788578867887788878897890789178927893789478957896789778987899790079017902790379047905790679077908790979107911791279137914791579167917791879197920792179227923792479257926792779287929793079317932793379347935793679377938793979407941794279437944794579467947794879497950795179527953795479557956795779587959796079617962796379647965796679677968796979707971797279737974797579767977797879797980798179827983798479857986798779887989799079917992799379947995799679977998799980008001800280038004800580068007800880098010801180128013801480158016801780188019802080218022802380248025802680278028802980308031803280338034803580368037803880398040804180428043804480458046804780488049805080518052805380548055805680578058805980608061806280638064806580668067806880698070807180728073807480758076807780788079808080818082808380848085808680878088808980908091809280938094809580968097809880998100810181028103810481058106810781088109811081118112811381148115811681178118811981208121812281238124812581268127812881298130813181328133813481358136813781388139814081418142814381448145814681478148814981508151815281538154815581568157815881598160816181628163816481658166816781688169817081718172817381748175817681778178817981808181818281838184818581868187818881898190819181928193819481958196819781988199820082018202820382048205820682078208820982108211821282138214821582168217821882198220822182228223822482258226822782288229823082318232823382348235823682378238823982408241824282438244824582468247824882498250825182528253825482558256825782588259826082618262826382648265826682678268826982708271827282738274827582768277827882798280828182828283828482858286828782888289829082918292829382948295829682978298829983008301830283038304830583068307830883098310831183128313831483158316831783188319832083218322832383248325832683278328832983308331833283338334833583368337833883398340834183428343834483458346834783488349835083518352835383548355835683578358835983608361836283638364836583668367836883698370837183728373837483758376837783788379838083818382838383848385838683878388838983908391839283938394839583968397839883998400840184028403840484058406840784088409841084118412841384148415841684178418841984208421842284238424842584268427842884298430843184328433843484358436843784388439844084418442844384448445844684478448844984508451845284538454845584568457845884598460846184628463846484658466846784688469847084718472847384748475847684778478847984808481848284838484848584868487848884898490849184928493849484958496849784988499850085018502850385048505850685078508850985108511851285138514851585168517851885198520852185228523852485258526852785288529853085318532853385348535853685378538853985408541854285438544854585468547854885498550855185528553855485558556855785588559856085618562856385648565856685678568856985708571857285738574857585768577857885798580858185828583858485858586858785888589859085918592859385948595859685978598859986008601860286038604860586068607860886098610861186128613861486158616861786188619862086218622862386248625862686278628862986308631863286338634863586368637863886398640864186428643864486458646864786488649865086518652865386548655865686578658865986608661866286638664866586668667866886698670867186728673867486758676867786788679868086818682868386848685868686878688868986908691869286938694869586968697869886998700870187028703870487058706870787088709871087118712871387148715871687178718871987208721872287238724872587268727872887298730873187328733873487358736873787388739874087418742874387448745874687478748874987508751875287538754875587568757875887598760876187628763876487658766876787688769877087718772877387748775877687778778877987808781878287838784878587868787878887898790879187928793879487958796879787988799880088018802880388048805880688078808880988108811881288138814881588168817881888198820882188228823882488258826882788288829883088318832883388348835883688378838883988408841884288438844884588468847884888498850885188528853885488558856885788588859886088618862886388648865886688678868886988708871887288738874887588768877887888798880888188828883888488858886888788888889889088918892889388948895889688978898889989008901890289038904890589068907890889098910891189128913891489158916891789188919892089218922892389248925892689278928892989308931893289338934893589368937893889398940894189428943894489458946894789488949895089518952895389548955895689578958895989608961896289638964896589668967896889698970897189728973897489758976897789788979898089818982898389848985898689878988898989908991899289938994899589968997899889999000900190029003900490059006900790089009901090119012901390149015901690179018901990209021902290239024902590269027902890299030903190329033903490359036903790389039904090419042904390449045904690479048904990509051905290539054905590569057905890599060906190629063906490659066906790689069907090719072907390749075907690779078907990809081908290839084908590869087908890899090909190929093909490959096909790989099910091019102910391049105910691079108910991109111911291139114911591169117911891199120912191229123912491259126912791289129913091319132913391349135913691379138913991409141914291439144914591469147914891499150915191529153915491559156915791589159916091619162916391649165916691679168916991709171917291739174917591769177917891799180918191829183918491859186918791889189919091919192919391949195919691979198919992009201920292039204920592069207920892099210921192129213921492159216921792189219922092219222922392249225922692279228922992309231923292339234923592369237923892399240924192429243924492459246924792489249925092519252925392549255925692579258925992609261926292639264926592669267926892699270927192729273927492759276927792789279928092819282928392849285928692879288928992909291929292939294929592969297929892999300930193029303930493059306930793089309931093119312931393149315931693179318931993209321932293239324932593269327932893299330933193329333933493359336933793389339934093419342934393449345934693479348934993509351935293539354935593569357935893599360936193629363936493659366936793689369937093719372937393749375937693779378937993809381938293839384938593869387938893899390939193929393939493959396939793989399940094019402940394049405940694079408940994109411941294139414941594169417941894199420942194229423942494259426942794289429943094319432943394349435943694379438943994409441944294439444944594469447944894499450945194529453945494559456945794589459946094619462946394649465946694679468946994709471947294739474947594769477947894799480948194829483948494859486948794889489949094919492949394949495949694979498949995009501950295039504950595069507950895099510951195129513951495159516951795189519952095219522952395249525952695279528952995309531953295339534953595369537953895399540954195429543954495459546954795489549955095519552955395549555955695579558955995609561956295639564956595669567956895699570957195729573957495759576957795789579958095819582958395849585958695879588958995909591959295939594959595969597959895999600960196029603960496059606960796089609961096119612961396149615961696179618961996209621962296239624962596269627962896299630963196329633963496359636963796389639964096419642964396449645964696479648964996509651965296539654965596569657965896599660966196629663966496659666966796689669967096719672967396749675967696779678967996809681968296839684968596869687968896899690969196929693969496959696969796989699970097019702970397049705970697079708970997109711971297139714971597169717971897199720972197229723972497259726972797289729973097319732973397349735973697379738973997409741974297439744974597469747974897499750975197529753975497559756975797589759976097619762976397649765976697679768976997709771977297739774977597769777977897799780978197829783978497859786978797889789979097919792979397949795979697979798979998009801980298039804980598069807980898099810981198129813981498159816981798189819982098219822982398249825982698279828982998309831983298339834983598369837983898399840984198429843984498459846984798489849985098519852985398549855985698579858985998609861986298639864986598669867986898699870987198729873987498759876987798789879988098819882988398849885988698879888988998909891989298939894989598969897989898999900990199029903990499059906990799089909991099119912991399149915991699179918991999209921992299239924992599269927992899299930993199329933993499359936993799389939994099419942994399449945994699479948994999509951995299539954995599569957995899599960996199629963996499659966996799689969997099719972997399749975997699779978997999809981998299839984998599869987998899899990999199929993999499959996999799989999100001000110002100031000410005100061000710008100091001010011100121001310014100151001610017100181001910020100211002210023100241002510026100271002810029100301003110032100331003410035100361003710038100391004010041100421004310044100451004610047100481004910050100511005210053100541005510056100571005810059100601006110062100631006410065100661006710068100691007010071100721007310074100751007610077100781007910080100811008210083100841008510086100871008810089100901009110092100931009410095100961009710098100991010010101101021010310104101051010610107101081010910110101111011210113101141011510116101171011810119101201012110122101231012410125101261012710128101291013010131101321013310134101351013610137101381013910140101411014210143101441014510146101471014810149101501015110152101531015410155101561015710158101591016010161101621016310164101651016610167101681016910170101711017210173101741017510176101771017810179101801018110182101831018410185101861018710188101891019010191101921019310194101951019610197101981019910200102011020210203102041020510206102071020810209102101021110212102131021410215102161021710218102191022010221102221022310224102251022610227102281022910230102311023210233102341023510236102371023810239102401024110242102431024410245102461024710248102491025010251102521025310254102551025610257102581025910260102611026210263102641026510266102671026810269102701027110272102731027410275102761027710278102791028010281102821028310284102851028610287102881028910290102911029210293102941029510296102971029810299103001030110302103031030410305103061030710308103091031010311103121031310314103151031610317103181031910320103211032210323103241032510326103271032810329103301033110332103331033410335103361033710338103391034010341103421034310344103451034610347103481034910350103511035210353103541035510356103571035810359103601036110362103631036410365103661036710368103691037010371103721037310374103751037610377103781037910380103811038210383103841038510386103871038810389103901039110392103931039410395103961039710398103991040010401104021040310404104051040610407104081040910410104111041210413104141041510416104171041810419104201042110422104231042410425104261042710428104291043010431104321043310434104351043610437104381043910440104411044210443104441044510446104471044810449104501045110452104531045410455104561045710458104591046010461104621046310464104651046610467104681046910470104711047210473104741047510476104771047810479104801048110482104831048410485104861048710488104891049010491104921049310494104951049610497104981049910500105011050210503105041050510506105071050810509105101051110512105131051410515105161051710518105191052010521105221052310524105251052610527105281052910530105311053210533105341053510536105371053810539105401054110542105431054410545105461054710548105491055010551105521055310554105551055610557105581055910560105611056210563105641056510566105671056810569105701057110572105731057410575105761057710578105791058010581105821058310584105851058610587105881058910590105911059210593105941059510596105971059810599106001060110602106031060410605106061060710608106091061010611106121061310614106151061610617106181061910620106211062210623106241062510626106271062810629106301063110632106331063410635106361063710638106391064010641106421064310644106451064610647106481064910650106511065210653106541065510656106571065810659106601066110662106631066410665106661066710668106691067010671106721067310674106751067610677106781067910680106811068210683106841068510686106871068810689106901069110692106931069410695106961069710698106991070010701107021070310704107051070610707107081070910710107111071210713107141071510716107171071810719107201072110722107231072410725107261072710728107291073010731107321073310734107351073610737107381073910740107411074210743107441074510746107471074810749107501075110752107531075410755107561075710758107591076010761107621076310764107651076610767107681076910770107711077210773107741077510776107771077810779107801078110782107831078410785107861078710788107891079010791107921079310794107951079610797107981079910800108011080210803108041080510806108071080810809108101081110812108131081410815108161081710818108191082010821108221082310824108251082610827108281082910830108311083210833108341083510836108371083810839108401084110842108431084410845108461084710848108491085010851108521085310854108551085610857108581085910860108611086210863108641086510866108671086810869108701087110872108731087410875108761087710878108791088010881108821088310884108851088610887108881088910890108911089210893108941089510896108971089810899109001090110902109031090410905109061090710908109091091010911109121091310914109151091610917109181091910920109211092210923109241092510926109271092810929109301093110932109331093410935109361093710938109391094010941109421094310944109451094610947109481094910950109511095210953109541095510956109571095810959109601096110962109631096410965109661096710968109691097010971109721097310974109751097610977109781097910980109811098210983109841098510986109871098810989109901099110992109931099410995109961099710998109991100011001110021100311004110051100611007110081100911010110111101211013110141101511016110171101811019110201102111022110231102411025110261102711028110291103011031110321103311034110351103611037110381103911040110411104211043110441104511046110471104811049110501105111052110531105411055110561105711058110591106011061110621106311064110651106611067110681106911070110711107211073110741107511076110771107811079110801108111082110831108411085110861108711088110891109011091110921109311094110951109611097110981109911100111011110211103111041110511106111071110811109111101111111112111131111411115111161111711118111191112011121111221112311124111251112611127111281112911130111311113211133111341113511136111371113811139111401114111142111431114411145111461114711148111491115011151111521115311154111551115611157111581115911160111611116211163111641116511166111671116811169111701117111172111731117411175111761117711178111791118011181111821118311184111851118611187111881118911190111911119211193111941119511196111971119811199112001120111202112031120411205112061120711208112091121011211112121121311214112151121611217112181121911220112211122211223112241122511226112271122811229112301123111232112331123411235112361123711238112391124011241112421124311244112451124611247112481124911250112511125211253112541125511256112571125811259112601126111262112631126411265112661126711268112691127011271112721127311274112751127611277112781127911280112811128211283112841128511286112871128811289112901129111292112931129411295112961129711298112991130011301113021130311304113051130611307113081130911310113111131211313113141131511316113171131811319113201132111322113231132411325113261132711328113291133011331113321133311334113351133611337113381133911340113411134211343113441134511346113471134811349113501135111352113531135411355113561135711358113591136011361113621136311364113651136611367113681136911370113711137211373113741137511376113771137811379113801138111382113831138411385113861138711388113891139011391113921139311394113951139611397113981139911400114011140211403114041140511406114071140811409114101141111412114131141411415114161141711418114191142011421114221142311424114251142611427114281142911430114311143211433114341143511436114371143811439114401144111442114431144411445114461144711448114491145011451114521145311454114551145611457114581145911460114611146211463114641146511466114671146811469114701147111472114731147411475114761147711478114791148011481114821148311484114851148611487114881148911490114911149211493114941149511496114971149811499115001150111502115031150411505115061150711508115091151011511115121151311514115151151611517115181151911520115211152211523115241152511526115271152811529115301153111532115331153411535115361153711538115391154011541115421154311544115451154611547115481154911550115511155211553115541155511556115571155811559115601156111562115631156411565115661156711568115691157011571115721157311574115751157611577115781157911580115811158211583115841158511586115871158811589115901159111592115931159411595115961159711598115991160011601116021160311604116051160611607116081160911610116111161211613116141161511616116171161811619116201162111622116231162411625116261162711628116291163011631116321163311634116351163611637116381163911640116411164211643116441164511646116471164811649116501165111652116531165411655116561165711658116591166011661116621166311664116651166611667116681166911670116711167211673116741167511676116771167811679116801168111682116831168411685116861168711688116891169011691116921169311694116951169611697116981169911700117011170211703117041170511706117071170811709117101171111712117131171411715117161171711718117191172011721117221172311724117251172611727117281172911730117311173211733117341173511736117371173811739117401174111742117431174411745117461174711748117491175011751117521175311754117551175611757117581175911760117611176211763117641176511766117671176811769117701177111772117731177411775117761177711778117791178011781117821178311784117851178611787117881178911790117911179211793117941179511796117971179811799118001180111802118031180411805118061180711808118091181011811118121181311814118151181611817118181181911820118211182211823118241182511826118271182811829118301183111832118331183411835118361183711838118391184011841118421184311844118451184611847118481184911850118511185211853118541185511856118571185811859118601186111862118631186411865118661186711868118691187011871118721187311874118751187611877118781187911880118811188211883118841188511886118871188811889118901189111892118931189411895118961189711898118991190011901119021190311904119051190611907119081190911910119111191211913119141191511916119171191811919119201192111922119231192411925119261192711928119291193011931119321193311934119351193611937119381193911940119411194211943119441194511946119471194811949119501195111952119531195411955119561195711958119591196011961119621196311964119651196611967119681196911970119711197211973119741197511976119771197811979119801198111982119831198411985119861198711988119891199011991119921199311994119951199611997119981199912000120011200212003120041200512006120071200812009120101201112012120131201412015120161201712018120191202012021120221202312024120251202612027120281202912030120311203212033120341203512036120371203812039120401204112042120431204412045120461204712048120491205012051120521205312054120551205612057120581205912060120611206212063120641206512066120671206812069120701207112072120731207412075120761207712078120791208012081120821208312084120851208612087120881208912090120911209212093120941209512096120971209812099121001210112102121031210412105121061210712108121091211012111121121211312114121151211612117121181211912120121211212212123121241212512126121271212812129121301213112132121331213412135121361213712138121391214012141121421214312144121451214612147121481214912150121511215212153121541215512156121571215812159121601216112162121631216412165121661216712168121691217012171121721217312174121751217612177121781217912180121811218212183121841218512186121871218812189121901219112192121931219412195121961219712198121991220012201122021220312204122051220612207122081220912210122111221212213122141221512216122171221812219122201222112222122231222412225122261222712228122291223012231122321223312234122351223612237122381223912240122411224212243122441224512246122471224812249122501225112252122531225412255122561225712258122591226012261122621226312264122651226612267122681226912270122711227212273122741227512276122771227812279122801228112282122831228412285122861228712288122891229012291122921229312294122951229612297122981229912300123011230212303123041230512306123071230812309123101231112312123131231412315123161231712318123191232012321123221232312324123251232612327123281232912330123311233212333123341233512336123371233812339123401234112342123431234412345123461234712348123491235012351123521235312354123551235612357123581235912360123611236212363123641236512366123671236812369123701237112372123731237412375123761237712378123791238012381123821238312384123851238612387123881238912390123911239212393123941239512396123971239812399124001240112402124031240412405124061240712408124091241012411124121241312414124151241612417124181241912420124211242212423124241242512426124271242812429124301243112432124331243412435124361243712438124391244012441124421244312444124451244612447124481244912450124511245212453124541245512456124571245812459124601246112462124631246412465124661246712468124691247012471124721247312474124751247612477124781247912480124811248212483124841248512486124871248812489124901249112492124931249412495124961249712498124991250012501125021250312504125051250612507125081250912510125111251212513125141251512516125171251812519125201252112522125231252412525125261252712528125291253012531125321253312534125351253612537125381253912540125411254212543125441254512546125471254812549125501255112552125531255412555125561255712558125591256012561125621256312564125651256612567125681256912570125711257212573125741257512576125771257812579125801258112582125831258412585125861258712588125891259012591125921259312594125951259612597125981259912600126011260212603126041260512606126071260812609126101261112612126131261412615126161261712618126191262012621126221262312624126251262612627126281262912630126311263212633126341263512636126371263812639126401264112642126431264412645126461264712648126491265012651126521265312654126551265612657126581265912660126611266212663126641266512666126671266812669126701267112672126731267412675126761267712678126791268012681126821268312684126851268612687126881268912690126911269212693126941269512696126971269812699127001270112702127031270412705127061270712708127091271012711127121271312714127151271612717127181271912720127211272212723127241272512726127271272812729127301273112732127331273412735127361273712738127391274012741127421274312744127451274612747127481274912750127511275212753127541275512756127571275812759127601276112762127631276412765127661276712768127691277012771127721277312774127751277612777127781277912780127811278212783127841278512786127871278812789127901279112792127931279412795127961279712798127991280012801128021280312804128051280612807128081280912810128111281212813128141281512816128171281812819128201282112822128231282412825128261282712828128291283012831128321283312834128351283612837128381283912840128411284212843128441284512846128471284812849128501285112852128531285412855128561285712858128591286012861128621286312864128651286612867128681286912870128711287212873128741287512876128771287812879128801288112882128831288412885128861288712888128891289012891128921289312894128951289612897128981289912900129011290212903129041290512906129071290812909129101291112912129131291412915129161291712918129191292012921129221292312924129251292612927129281292912930129311293212933129341293512936129371293812939129401294112942129431294412945129461294712948129491295012951129521295312954129551295612957129581295912960129611296212963129641296512966129671296812969129701297112972129731297412975129761297712978129791298012981129821298312984129851298612987129881298912990129911299212993129941299512996129971299812999130001300113002130031300413005130061300713008130091301013011130121301313014130151301613017130181301913020130211302213023130241302513026130271302813029130301303113032130331303413035130361303713038130391304013041130421304313044130451304613047130481304913050130511305213053130541305513056130571305813059130601306113062130631306413065130661306713068130691307013071130721307313074130751307613077130781307913080130811308213083130841308513086130871308813089130901309113092130931309413095130961309713098130991310013101131021310313104131051310613107131081310913110131111311213113131141311513116131171311813119131201312113122131231312413125131261312713128131291313013131131321313313134131351313613137131381313913140131411314213143131441314513146131471314813149131501315113152131531315413155131561315713158131591316013161131621316313164131651316613167131681316913170131711317213173131741317513176131771317813179131801318113182131831318413185131861318713188131891319013191131921319313194131951319613197131981319913200132011320213203132041320513206132071320813209132101321113212132131321413215132161321713218132191322013221132221322313224132251322613227132281322913230132311323213233132341323513236132371323813239132401324113242132431324413245132461324713248132491325013251132521325313254132551325613257132581325913260132611326213263132641326513266132671326813269132701327113272132731327413275132761327713278132791328013281132821328313284132851328613287132881328913290132911329213293132941329513296132971329813299133001330113302133031330413305133061330713308133091331013311133121331313314133151331613317133181331913320133211332213323133241332513326133271332813329133301333113332133331333413335133361333713338133391334013341133421334313344133451334613347133481334913350133511335213353133541335513356133571335813359133601336113362133631336413365133661336713368133691337013371133721337313374133751337613377133781337913380133811338213383133841338513386133871338813389133901339113392133931339413395133961339713398133991340013401134021340313404134051340613407134081340913410134111341213413134141341513416134171341813419134201342113422134231342413425134261342713428134291343013431134321343313434134351343613437134381343913440134411344213443134441344513446134471344813449134501345113452134531345413455134561345713458134591346013461134621346313464134651346613467134681346913470134711347213473134741347513476134771347813479134801348113482134831348413485134861348713488134891349013491134921349313494134951349613497134981349913500135011350213503135041350513506135071350813509135101351113512135131351413515135161351713518135191352013521135221352313524135251352613527135281352913530135311353213533135341353513536135371353813539135401354113542135431354413545135461354713548135491355013551135521355313554135551355613557135581355913560135611356213563135641356513566135671356813569135701357113572135731357413575135761357713578135791358013581135821358313584135851358613587135881358913590135911359213593135941359513596135971359813599136001360113602136031360413605136061360713608136091361013611136121361313614136151361613617136181361913620136211362213623136241362513626136271362813629136301363113632136331363413635136361363713638136391364013641136421364313644136451364613647136481364913650136511365213653136541365513656136571365813659136601366113662136631366413665136661366713668136691367013671136721367313674136751367613677136781367913680136811368213683136841368513686136871368813689136901369113692136931369413695136961369713698136991370013701137021370313704137051370613707137081370913710137111371213713137141371513716137171371813719137201372113722137231372413725137261372713728137291373013731137321373313734137351373613737137381373913740137411374213743137441374513746137471374813749137501375113752137531375413755137561375713758137591376013761137621376313764137651376613767137681376913770137711377213773137741377513776137771377813779137801378113782137831378413785137861378713788137891379013791137921379313794137951379613797137981379913800138011380213803138041380513806138071380813809138101381113812138131381413815138161381713818138191382013821138221382313824138251382613827138281382913830138311383213833138341383513836138371383813839138401384113842138431384413845138461384713848138491385013851138521385313854138551385613857138581385913860138611386213863138641386513866138671386813869138701387113872138731387413875138761387713878138791388013881138821388313884138851388613887138881388913890138911389213893138941389513896138971389813899139001390113902139031390413905139061390713908139091391013911139121391313914139151391613917139181391913920139211392213923139241392513926139271392813929139301393113932139331393413935139361393713938139391394013941139421394313944139451394613947139481394913950139511395213953139541395513956139571395813959139601396113962139631396413965139661396713968139691397013971139721397313974139751397613977139781397913980139811398213983139841398513986139871398813989139901399113992139931399413995139961399713998139991400014001140021400314004140051400614007140081400914010140111401214013140141401514016140171401814019140201402114022140231402414025140261402714028140291403014031140321403314034140351403614037140381403914040140411404214043140441404514046140471404814049140501405114052140531405414055140561405714058140591406014061140621406314064140651406614067140681406914070140711407214073140741407514076140771407814079140801408114082140831408414085140861408714088140891409014091140921409314094140951409614097140981409914100141011410214103141041410514106141071410814109141101411114112141131411414115141161411714118141191412014121141221412314124141251412614127141281412914130141311413214133141341413514136141371413814139141401414114142141431414414145141461414714148141491415014151141521415314154141551415614157141581415914160141611416214163141641416514166141671416814169141701417114172141731417414175141761417714178141791418014181141821418314184141851418614187141881418914190141911419214193141941419514196141971419814199142001420114202142031420414205142061420714208142091421014211142121421314214142151421614217142181421914220142211422214223142241422514226142271422814229142301423114232142331423414235142361423714238142391424014241142421424314244142451424614247142481424914250142511425214253142541425514256142571425814259142601426114262142631426414265142661426714268142691427014271142721427314274142751427614277142781427914280142811428214283142841428514286142871428814289142901429114292142931429414295142961429714298142991430014301143021430314304143051430614307143081430914310143111431214313143141431514316143171431814319143201432114322143231432414325143261432714328143291433014331143321433314334143351433614337143381433914340143411434214343143441434514346143471434814349143501435114352143531435414355143561435714358143591436014361143621436314364143651436614367143681436914370143711437214373143741437514376143771437814379143801438114382143831438414385143861438714388143891439014391143921439314394143951439614397143981439914400144011440214403144041440514406144071440814409144101441114412144131441414415144161441714418144191442014421144221442314424144251442614427144281442914430144311443214433144341443514436144371443814439144401444114442144431444414445144461444714448144491445014451144521445314454144551445614457144581445914460144611446214463144641446514466144671446814469144701447114472144731447414475144761447714478144791448014481144821448314484144851448614487144881448914490144911449214493144941449514496144971449814499145001450114502145031450414505145061450714508145091451014511145121451314514145151451614517145181451914520145211452214523145241452514526145271452814529145301453114532145331453414535145361453714538145391454014541145421454314544145451454614547145481454914550145511455214553145541455514556145571455814559145601456114562145631456414565145661456714568145691457014571145721457314574145751457614577145781457914580145811458214583145841458514586145871458814589145901459114592145931459414595145961459714598145991460014601146021460314604146051460614607146081460914610146111461214613146141461514616146171461814619146201462114622146231462414625146261462714628146291463014631146321463314634146351463614637146381463914640146411464214643146441464514646146471464814649146501465114652146531465414655146561465714658146591466014661146621466314664146651466614667146681466914670146711467214673146741467514676146771467814679146801468114682146831468414685146861468714688146891469014691146921469314694146951469614697146981469914700147011470214703147041470514706147071470814709147101471114712147131471414715147161471714718147191472014721147221472314724147251472614727147281472914730147311473214733147341473514736147371473814739147401474114742147431474414745147461474714748147491475014751147521475314754147551475614757147581475914760147611476214763147641476514766147671476814769147701477114772147731477414775147761477714778147791478014781147821478314784147851478614787147881478914790147911479214793147941479514796147971479814799148001480114802148031480414805148061480714808148091481014811148121481314814148151481614817148181481914820148211482214823148241482514826148271482814829148301483114832148331483414835148361483714838148391484014841148421484314844148451484614847148481484914850148511485214853148541485514856148571485814859148601486114862148631486414865148661486714868148691487014871148721487314874148751487614877148781487914880148811488214883148841488514886148871488814889148901489114892148931489414895148961489714898148991490014901149021490314904149051490614907149081490914910149111491214913149141491514916149171491814919149201492114922149231492414925149261492714928149291493014931149321493314934149351493614937149381493914940149411494214943149441494514946149471494814949149501495114952149531495414955149561495714958149591496014961149621496314964149651496614967149681496914970149711497214973149741497514976149771497814979149801498114982149831498414985149861498714988149891499014991149921499314994149951499614997149981499915000150011500215003150041500515006150071500815009150101501115012150131501415015150161501715018150191502015021150221502315024150251502615027150281502915030150311503215033150341503515036150371503815039150401504115042150431504415045150461504715048150491505015051150521505315054150551505615057150581505915060150611506215063150641506515066150671506815069150701507115072150731507415075150761507715078150791508015081150821508315084150851508615087150881508915090150911509215093150941509515096150971509815099151001510115102151031510415105151061510715108151091511015111151121511315114151151511615117151181511915120151211512215123151241512515126151271512815129151301513115132151331513415135151361513715138151391514015141151421514315144151451514615147151481514915150151511515215153151541515515156151571515815159151601516115162151631516415165151661516715168151691517015171151721517315174151751517615177151781517915180151811518215183151841518515186151871518815189151901519115192151931519415195151961519715198151991520015201152021520315204152051520615207152081520915210152111521215213152141521515216152171521815219152201522115222152231522415225152261522715228152291523015231152321523315234152351523615237152381523915240152411524215243152441524515246152471524815249152501525115252152531525415255152561525715258152591526015261152621526315264152651526615267152681526915270152711527215273152741527515276152771527815279152801528115282152831528415285152861528715288152891529015291152921529315294152951529615297152981529915300153011530215303153041530515306153071530815309153101531115312153131531415315153161531715318153191532015321153221532315324153251532615327153281532915330153311533215333153341533515336153371533815339153401534115342153431534415345153461534715348153491535015351153521535315354153551535615357153581535915360153611536215363153641536515366153671536815369153701537115372153731537415375153761537715378153791538015381153821538315384153851538615387153881538915390153911539215393153941539515396153971539815399154001540115402154031540415405154061540715408154091541015411154121541315414154151541615417154181541915420154211542215423154241542515426154271542815429154301543115432154331543415435154361543715438154391544015441154421544315444154451544615447154481544915450154511545215453154541545515456154571545815459154601546115462154631546415465154661546715468154691547015471154721547315474154751547615477154781547915480154811548215483154841548515486154871548815489154901549115492154931549415495154961549715498154991550015501155021550315504155051550615507155081550915510155111551215513155141551515516155171551815519155201552115522155231552415525155261552715528155291553015531155321553315534155351553615537155381553915540155411554215543155441554515546155471554815549155501555115552155531555415555155561555715558155591556015561155621556315564155651556615567155681556915570155711557215573155741557515576155771557815579155801558115582155831558415585155861558715588155891559015591155921559315594155951559615597155981559915600156011560215603156041560515606156071560815609156101561115612156131561415615156161561715618156191562015621156221562315624156251562615627156281562915630156311563215633156341563515636156371563815639156401564115642156431564415645156461564715648156491565015651156521565315654156551565615657156581565915660156611566215663156641566515666156671566815669156701567115672156731567415675156761567715678156791568015681156821568315684156851568615687156881568915690156911569215693156941569515696156971569815699157001570115702157031570415705157061570715708157091571015711157121571315714157151571615717157181571915720157211572215723157241572515726157271572815729157301573115732157331573415735157361573715738157391574015741157421574315744157451574615747157481574915750157511575215753157541575515756157571575815759157601576115762157631576415765157661576715768157691577015771157721577315774157751577615777157781577915780157811578215783157841578515786157871578815789157901579115792157931579415795157961579715798157991580015801158021580315804158051580615807158081580915810158111581215813158141581515816158171581815819158201582115822158231582415825158261582715828158291583015831158321583315834158351583615837158381583915840158411584215843158441584515846158471584815849158501585115852158531585415855158561585715858158591586015861158621586315864158651586615867158681586915870158711587215873158741587515876158771587815879158801588115882158831588415885158861588715888158891589015891158921589315894158951589615897158981589915900159011590215903159041590515906159071590815909159101591115912159131591415915159161591715918159191592015921159221592315924159251592615927159281592915930159311593215933159341593515936159371593815939159401594115942159431594415945159461594715948159491595015951159521595315954159551595615957159581595915960159611596215963159641596515966159671596815969159701597115972159731597415975159761597715978159791598015981159821598315984159851598615987159881598915990159911599215993159941599515996159971599815999160001600116002160031600416005160061600716008160091601016011160121601316014160151601616017160181601916020160211602216023160241602516026160271602816029160301603116032160331603416035160361603716038160391604016041160421604316044160451604616047160481604916050160511605216053160541605516056160571605816059160601606116062160631606416065160661606716068160691607016071160721607316074160751607616077160781607916080160811608216083160841608516086160871608816089160901609116092160931609416095160961609716098160991610016101161021610316104161051610616107161081610916110161111611216113161141611516116161171611816119161201612116122161231612416125161261612716128161291613016131161321613316134161351613616137161381613916140161411614216143161441614516146161471614816149161501615116152161531615416155161561615716158161591616016161161621616316164161651616616167161681616916170161711617216173161741617516176161771617816179161801618116182161831618416185161861618716188161891619016191161921619316194161951619616197161981619916200162011620216203162041620516206162071620816209162101621116212162131621416215162161621716218162191622016221162221622316224162251622616227162281622916230162311623216233162341623516236162371623816239162401624116242162431624416245162461624716248162491625016251162521625316254162551625616257162581625916260162611626216263162641626516266162671626816269162701627116272162731627416275162761627716278162791628016281162821628316284162851628616287162881628916290162911629216293162941629516296162971629816299163001630116302163031630416305163061630716308163091631016311163121631316314163151631616317163181631916320163211632216323163241632516326163271632816329163301633116332163331633416335163361633716338163391634016341163421634316344163451634616347163481634916350163511635216353163541635516356163571635816359163601636116362163631636416365163661636716368163691637016371163721637316374163751637616377163781637916380163811638216383163841638516386163871638816389163901639116392163931639416395163961639716398163991640016401164021640316404164051640616407164081640916410164111641216413164141641516416164171641816419164201642116422164231642416425164261642716428164291643016431164321643316434164351643616437164381643916440164411644216443164441644516446164471644816449164501645116452164531645416455164561645716458164591646016461164621646316464164651646616467164681646916470164711647216473164741647516476164771647816479164801648116482164831648416485164861648716488164891649016491164921649316494164951649616497164981649916500165011650216503165041650516506165071650816509165101651116512165131651416515165161651716518165191652016521165221652316524165251652616527165281652916530165311653216533165341653516536165371653816539165401654116542165431654416545165461654716548165491655016551165521655316554165551655616557165581655916560165611656216563165641656516566165671656816569165701657116572165731657416575165761657716578165791658016581165821658316584165851658616587165881658916590165911659216593165941659516596165971659816599166001660116602166031660416605166061660716608166091661016611166121661316614166151661616617166181661916620166211662216623166241662516626166271662816629166301663116632166331663416635166361663716638166391664016641166421664316644166451664616647166481664916650166511665216653166541665516656166571665816659166601666116662166631666416665166661666716668166691667016671166721667316674166751667616677166781667916680166811668216683166841668516686166871668816689166901669116692166931669416695166961669716698166991670016701167021670316704167051670616707167081670916710167111671216713167141671516716167171671816719167201672116722167231672416725167261672716728167291673016731167321673316734167351673616737167381673916740167411674216743167441674516746167471674816749167501675116752167531675416755167561675716758167591676016761167621676316764167651676616767167681676916770167711677216773167741677516776167771677816779167801678116782167831678416785167861678716788167891679016791167921679316794167951679616797167981679916800168011680216803168041680516806168071680816809168101681116812168131681416815168161681716818168191682016821168221682316824168251682616827168281682916830168311683216833168341683516836168371683816839168401684116842168431684416845168461684716848168491685016851168521685316854168551685616857168581685916860168611686216863168641686516866168671686816869168701687116872168731687416875168761687716878168791688016881168821688316884168851688616887168881688916890168911689216893168941689516896168971689816899169001690116902169031690416905169061690716908169091691016911169121691316914169151691616917169181691916920169211692216923169241692516926169271692816929169301693116932169331693416935169361693716938169391694016941169421694316944169451694616947169481694916950169511695216953169541695516956169571695816959169601696116962169631696416965169661696716968169691697016971169721697316974169751697616977169781697916980169811698216983169841698516986169871698816989169901699116992169931699416995169961699716998169991700017001170021700317004170051700617007170081700917010170111701217013170141701517016170171701817019170201702117022170231702417025170261702717028170291703017031170321703317034170351703617037170381703917040170411704217043170441704517046170471704817049170501705117052170531705417055170561705717058170591706017061170621706317064170651706617067170681706917070170711707217073170741707517076170771707817079170801708117082170831708417085170861708717088170891709017091170921709317094170951709617097170981709917100171011710217103171041710517106171071710817109171101711117112171131711417115171161711717118171191712017121171221712317124171251712617127171281712917130171311713217133171341713517136171371713817139171401714117142171431714417145171461714717148171491715017151171521715317154171551715617157171581715917160171611716217163171641716517166171671716817169171701717117172171731717417175171761717717178171791718017181171821718317184171851718617187171881718917190171911719217193171941719517196171971719817199172001720117202172031720417205172061720717208172091721017211172121721317214172151721617217172181721917220172211722217223172241722517226172271722817229172301723117232172331723417235172361723717238172391724017241172421724317244172451724617247172481724917250172511725217253172541725517256172571725817259172601726117262172631726417265172661726717268172691727017271172721727317274172751727617277172781727917280172811728217283172841728517286172871728817289172901729117292172931729417295172961729717298172991730017301173021730317304173051730617307173081730917310173111731217313173141731517316173171731817319173201732117322173231732417325173261732717328173291733017331173321733317334173351733617337173381733917340173411734217343173441734517346173471734817349173501735117352173531735417355173561735717358173591736017361173621736317364173651736617367173681736917370173711737217373173741737517376173771737817379173801738117382173831738417385173861738717388173891739017391173921739317394173951739617397173981739917400174011740217403174041740517406174071740817409174101741117412174131741417415174161741717418174191742017421174221742317424174251742617427174281742917430174311743217433174341743517436174371743817439174401744117442174431744417445174461744717448174491745017451174521745317454174551745617457174581745917460174611746217463174641746517466174671746817469174701747117472174731747417475174761747717478174791748017481174821748317484174851748617487174881748917490174911749217493174941749517496174971749817499175001750117502175031750417505175061750717508175091751017511175121751317514175151751617517175181751917520175211752217523175241752517526175271752817529175301753117532175331753417535175361753717538175391754017541175421754317544175451754617547175481754917550175511755217553175541755517556175571755817559175601756117562175631756417565175661756717568175691757017571175721757317574175751757617577175781757917580175811758217583175841758517586175871758817589175901759117592175931759417595175961759717598175991760017601176021760317604176051760617607176081760917610176111761217613176141761517616176171761817619176201762117622176231762417625176261762717628176291763017631176321763317634176351763617637176381763917640176411764217643176441764517646176471764817649176501765117652176531765417655176561765717658176591766017661176621766317664176651766617667176681766917670176711767217673176741767517676176771767817679176801768117682176831768417685176861768717688176891769017691176921769317694176951769617697176981769917700177011770217703177041770517706177071770817709177101771117712177131771417715177161771717718177191772017721177221772317724177251772617727177281772917730177311773217733177341773517736177371773817739177401774117742177431774417745177461774717748177491775017751177521775317754177551775617757177581775917760177611776217763177641776517766177671776817769177701777117772177731777417775177761777717778177791778017781177821778317784177851778617787177881778917790177911779217793177941779517796177971779817799178001780117802178031780417805178061780717808178091781017811178121781317814178151781617817178181781917820178211782217823178241782517826178271782817829178301783117832178331783417835178361783717838178391784017841178421784317844178451784617847178481784917850178511785217853178541785517856178571785817859178601786117862178631786417865178661786717868178691787017871178721787317874178751787617877178781787917880178811788217883178841788517886178871788817889178901789117892178931789417895178961789717898178991790017901179021790317904179051790617907179081790917910179111791217913179141791517916179171791817919179201792117922179231792417925179261792717928179291793017931179321793317934179351793617937179381793917940179411794217943179441794517946179471794817949179501795117952179531795417955179561795717958179591796017961179621796317964179651796617967179681796917970179711797217973179741797517976179771797817979179801798117982179831798417985179861798717988179891799017991179921799317994179951799617997179981799918000180011800218003180041800518006180071800818009180101801118012180131801418015180161801718018180191802018021180221802318024180251802618027180281802918030180311803218033180341803518036180371803818039180401804118042180431804418045180461804718048180491805018051180521805318054180551805618057180581805918060180611806218063180641806518066180671806818069180701807118072180731807418075180761807718078180791808018081180821808318084180851808618087180881808918090180911809218093180941809518096180971809818099181001810118102181031810418105181061810718108181091811018111181121811318114181151811618117181181811918120181211812218123181241812518126181271812818129181301813118132181331813418135181361813718138181391814018141181421814318144181451814618147181481814918150181511815218153181541815518156181571815818159181601816118162181631816418165181661816718168181691817018171181721817318174181751817618177181781817918180181811818218183181841818518186181871818818189181901819118192181931819418195181961819718198181991820018201182021820318204182051820618207182081820918210182111821218213182141821518216182171821818219182201822118222182231822418225182261822718228182291823018231182321823318234182351823618237182381823918240182411824218243182441824518246182471824818249182501825118252182531825418255182561825718258182591826018261182621826318264182651826618267182681826918270182711827218273182741827518276182771827818279182801828118282182831828418285182861828718288182891829018291182921829318294182951829618297182981829918300183011830218303183041830518306183071830818309183101831118312183131831418315183161831718318183191832018321183221832318324183251832618327183281832918330183311833218333183341833518336183371833818339183401834118342183431834418345183461834718348183491835018351183521835318354183551835618357183581835918360183611836218363183641836518366183671836818369183701837118372183731837418375183761837718378183791838018381183821838318384183851838618387183881838918390183911839218393183941839518396183971839818399184001840118402184031840418405184061840718408184091841018411184121841318414184151841618417184181841918420184211842218423184241842518426184271842818429184301843118432184331843418435184361843718438184391844018441184421844318444184451844618447184481844918450184511845218453184541845518456184571845818459184601846118462184631846418465184661846718468184691847018471184721847318474184751847618477184781847918480184811848218483184841848518486184871848818489184901849118492184931849418495184961849718498184991850018501185021850318504185051850618507185081850918510185111851218513185141851518516185171851818519185201852118522185231852418525185261852718528185291853018531185321853318534185351853618537185381853918540185411854218543185441854518546185471854818549185501855118552185531855418555185561855718558185591856018561185621856318564185651856618567185681856918570185711857218573185741857518576185771857818579185801858118582185831858418585185861858718588185891859018591185921859318594185951859618597185981859918600186011860218603186041860518606186071860818609186101861118612186131861418615186161861718618186191862018621186221862318624186251862618627186281862918630186311863218633186341863518636186371863818639186401864118642186431864418645186461864718648186491865018651186521865318654186551865618657186581865918660186611866218663186641866518666186671866818669186701867118672186731867418675186761867718678186791868018681186821868318684186851868618687186881868918690186911869218693186941869518696186971869818699187001870118702187031870418705187061870718708187091871018711187121871318714187151871618717187181871918720187211872218723187241872518726187271872818729187301873118732187331873418735187361873718738187391874018741187421874318744187451874618747187481874918750187511875218753187541875518756187571875818759187601876118762187631876418765187661876718768187691877018771187721877318774187751877618777187781877918780187811878218783187841878518786187871878818789187901879118792187931879418795187961879718798187991880018801188021880318804188051880618807188081880918810188111881218813188141881518816188171881818819188201882118822188231882418825188261882718828188291883018831188321883318834188351883618837188381883918840188411884218843188441884518846188471884818849188501885118852188531885418855188561885718858188591886018861188621886318864188651886618867188681886918870188711887218873188741887518876188771887818879188801888118882188831888418885188861888718888188891889018891188921889318894188951889618897188981889918900189011890218903189041890518906189071890818909189101891118912189131891418915189161891718918189191892018921189221892318924189251892618927189281892918930189311893218933189341893518936189371893818939189401894118942189431894418945189461894718948189491895018951189521895318954189551895618957189581895918960189611896218963189641896518966189671896818969189701897118972189731897418975189761897718978189791898018981189821898318984189851898618987189881898918990189911899218993189941899518996189971899818999190001900119002190031900419005190061900719008190091901019011190121901319014190151901619017190181901919020190211902219023190241902519026190271902819029190301903119032190331903419035190361903719038190391904019041190421904319044190451904619047190481904919050190511905219053190541905519056190571905819059190601906119062190631906419065190661906719068190691907019071190721907319074190751907619077190781907919080190811908219083190841908519086190871908819089190901909119092190931909419095190961909719098190991910019101191021910319104191051910619107191081910919110191111911219113191141911519116191171911819119191201912119122191231912419125191261912719128191291913019131191321913319134191351913619137191381913919140191411914219143191441914519146191471914819149191501915119152191531915419155191561915719158191591916019161191621916319164191651916619167191681916919170191711917219173191741917519176191771917819179191801918119182191831918419185191861918719188191891919019191191921919319194191951919619197191981919919200192011920219203192041920519206192071920819209192101921119212192131921419215192161921719218192191922019221192221922319224192251922619227192281922919230192311923219233192341923519236192371923819239192401924119242192431924419245192461924719248192491925019251192521925319254192551925619257192581925919260192611926219263192641926519266192671926819269192701927119272192731927419275192761927719278192791928019281192821928319284192851928619287192881928919290192911929219293192941929519296192971929819299193001930119302193031930419305193061930719308193091931019311193121931319314193151931619317193181931919320193211932219323193241932519326193271932819329193301933119332193331933419335193361933719338193391934019341193421934319344193451934619347193481934919350193511935219353193541935519356193571935819359193601936119362193631936419365193661936719368193691937019371193721937319374193751937619377193781937919380193811938219383193841938519386193871938819389193901939119392193931939419395193961939719398193991940019401194021940319404194051940619407194081940919410194111941219413194141941519416194171941819419194201942119422194231942419425194261942719428194291943019431194321943319434194351943619437194381943919440194411944219443194441944519446194471944819449194501945119452194531945419455194561945719458194591946019461194621946319464194651946619467194681946919470194711947219473194741947519476194771947819479194801948119482194831948419485194861948719488194891949019491194921949319494194951949619497194981949919500195011950219503195041950519506195071950819509195101951119512195131951419515195161951719518195191952019521195221952319524195251952619527195281952919530195311953219533195341953519536195371953819539195401954119542195431954419545195461954719548195491955019551195521955319554195551955619557195581955919560195611956219563195641956519566195671956819569195701957119572195731957419575195761957719578195791958019581195821958319584195851958619587195881958919590195911959219593195941959519596195971959819599196001960119602196031960419605196061960719608196091961019611196121961319614196151961619617196181961919620196211962219623196241962519626196271962819629196301963119632196331963419635196361963719638196391964019641196421964319644196451964619647196481964919650196511965219653196541965519656196571965819659196601966119662196631966419665196661966719668196691967019671196721967319674196751967619677196781967919680196811968219683196841968519686196871968819689196901969119692196931969419695196961969719698196991970019701197021970319704197051970619707197081970919710197111971219713197141971519716197171971819719197201972119722197231972419725197261972719728197291973019731197321973319734197351973619737197381973919740197411974219743197441974519746197471974819749197501975119752197531975419755197561975719758197591976019761197621976319764197651976619767197681976919770197711977219773197741977519776197771977819779197801978119782197831978419785197861978719788197891979019791197921979319794197951979619797197981979919800198011980219803198041980519806198071980819809198101981119812198131981419815198161981719818198191982019821198221982319824198251982619827198281982919830198311983219833198341983519836198371983819839198401984119842198431984419845198461984719848198491985019851198521985319854198551985619857198581985919860198611986219863198641986519866198671986819869198701987119872198731987419875198761987719878198791988019881198821988319884198851988619887198881988919890198911989219893198941989519896198971989819899199001990119902199031990419905199061990719908199091991019911199121991319914199151991619917199181991919920199211992219923199241992519926199271992819929199301993119932199331993419935199361993719938199391994019941199421994319944199451994619947199481994919950199511995219953199541995519956199571995819959199601996119962199631996419965199661996719968199691997019971199721997319974199751997619977199781997919980199811998219983199841998519986199871998819989199901999119992199931999419995199961999719998199992000020001200022000320004200052000620007200082000920010200112001220013200142001520016200172001820019200202002120022200232002420025200262002720028200292003020031200322003320034200352003620037200382003920040200412004220043200442004520046200472004820049200502005120052200532005420055200562005720058200592006020061200622006320064200652006620067200682006920070200712007220073200742007520076200772007820079200802008120082200832008420085200862008720088200892009020091200922009320094200952009620097200982009920100201012010220103201042010520106201072010820109201102011120112201132011420115201162011720118201192012020121201222012320124201252012620127201282012920130201312013220133201342013520136201372013820139201402014120142201432014420145201462014720148201492015020151201522015320154201552015620157201582015920160201612016220163201642016520166201672016820169201702017120172201732017420175201762017720178201792018020181201822018320184201852018620187201882018920190201912019220193201942019520196201972019820199202002020120202202032020420205202062020720208202092021020211202122021320214202152021620217202182021920220202212022220223202242022520226202272022820229202302023120232202332023420235202362023720238202392024020241202422024320244202452024620247202482024920250202512025220253202542025520256202572025820259202602026120262202632026420265202662026720268202692027020271202722027320274202752027620277202782027920280202812028220283202842028520286202872028820289202902029120292
  1. /*
  2. * Copyright 2015-2021 Arm Limited
  3. * SPDX-License-Identifier: Apache-2.0 OR MIT
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /*
  18. * At your option, you may choose to accept this material under either:
  19. * 1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
  20. * 2. The MIT License, found at <http://opensource.org/licenses/MIT>.
  21. */
  22. #include "spirv_glsl.hpp"
  23. #include "GLSL.std.450.h"
  24. #include "spirv_common.hpp"
  25. #include <algorithm>
  26. #include <assert.h>
  27. #include <cmath>
  28. #include <limits>
  29. #include <locale.h>
  30. #include <utility>
  31. #include <array>
  32. #ifndef _WIN32
  33. #include <langinfo.h>
  34. #endif
  35. #include <locale.h>
  36. using namespace SPIRV_CROSS_SPV_HEADER_NAMESPACE;
  37. using namespace SPIRV_CROSS_NAMESPACE;
  38. using namespace std;
  39. namespace SPIRV_CROSS_NAMESPACE
  40. {
  41. enum ExtraSubExpressionType
  42. {
  43. // Create masks above any legal ID range to allow multiple address spaces into the extra_sub_expressions map.
  44. EXTRA_SUB_EXPRESSION_TYPE_STREAM_OFFSET = 0x10000000,
  45. EXTRA_SUB_EXPRESSION_TYPE_AUX = 0x20000000
  46. };
  47. struct GlslConstantNameMapping
  48. {
  49. uint32_t value;
  50. const char *alias;
  51. };
  52. #define DEF_GLSL_MAPPING(x) { x, "gl_" #x }
  53. #define DEF_GLSL_MAPPING_EXT(x) { x##KHR, "gl_" #x }
  54. static const GlslConstantNameMapping CoopVecComponentTypeNames[] = {
  55. DEF_GLSL_MAPPING(ComponentTypeFloat16NV),
  56. DEF_GLSL_MAPPING(ComponentTypeFloat32NV),
  57. DEF_GLSL_MAPPING(ComponentTypeFloat64NV),
  58. DEF_GLSL_MAPPING(ComponentTypeSignedInt8NV),
  59. DEF_GLSL_MAPPING(ComponentTypeSignedInt16NV),
  60. DEF_GLSL_MAPPING(ComponentTypeSignedInt32NV),
  61. DEF_GLSL_MAPPING(ComponentTypeSignedInt64NV),
  62. DEF_GLSL_MAPPING(ComponentTypeUnsignedInt8NV),
  63. DEF_GLSL_MAPPING(ComponentTypeUnsignedInt16NV),
  64. DEF_GLSL_MAPPING(ComponentTypeUnsignedInt32NV),
  65. DEF_GLSL_MAPPING(ComponentTypeUnsignedInt64NV),
  66. DEF_GLSL_MAPPING(ComponentTypeSignedInt8PackedNV),
  67. DEF_GLSL_MAPPING(ComponentTypeUnsignedInt8PackedNV),
  68. DEF_GLSL_MAPPING(ComponentTypeFloatE4M3NV),
  69. DEF_GLSL_MAPPING(ComponentTypeFloatE5M2NV),
  70. };
  71. static const GlslConstantNameMapping CoopVecMatrixLayoutNames[] = {
  72. DEF_GLSL_MAPPING(CooperativeVectorMatrixLayoutRowMajorNV),
  73. DEF_GLSL_MAPPING(CooperativeVectorMatrixLayoutColumnMajorNV),
  74. DEF_GLSL_MAPPING(CooperativeVectorMatrixLayoutInferencingOptimalNV),
  75. DEF_GLSL_MAPPING(CooperativeVectorMatrixLayoutTrainingOptimalNV),
  76. };
  77. static const GlslConstantNameMapping CoopMatMatrixLayoutNames[] = {
  78. DEF_GLSL_MAPPING_EXT(CooperativeMatrixLayoutRowMajor),
  79. DEF_GLSL_MAPPING_EXT(CooperativeMatrixLayoutColumnMajor),
  80. };
  81. #undef DEF_GLSL_MAPPING
  82. #undef DEF_GLSL_MAPPING_EXT
  83. static bool is_unsigned_opcode(Op op)
  84. {
  85. // Don't have to be exhaustive, only relevant for legacy target checking ...
  86. switch (op)
  87. {
  88. case OpShiftRightLogical:
  89. case OpUGreaterThan:
  90. case OpUGreaterThanEqual:
  91. case OpULessThan:
  92. case OpULessThanEqual:
  93. case OpUConvert:
  94. case OpUDiv:
  95. case OpUMod:
  96. case OpUMulExtended:
  97. case OpConvertUToF:
  98. case OpConvertFToU:
  99. return true;
  100. default:
  101. return false;
  102. }
  103. }
  104. static bool is_unsigned_glsl_opcode(GLSLstd450 op)
  105. {
  106. // Don't have to be exhaustive, only relevant for legacy target checking ...
  107. switch (op)
  108. {
  109. case GLSLstd450UClamp:
  110. case GLSLstd450UMin:
  111. case GLSLstd450UMax:
  112. case GLSLstd450FindUMsb:
  113. return true;
  114. default:
  115. return false;
  116. }
  117. }
  118. static bool packing_is_vec4_padded(BufferPackingStandard packing)
  119. {
  120. switch (packing)
  121. {
  122. case BufferPackingHLSLCbuffer:
  123. case BufferPackingHLSLCbufferPackOffset:
  124. case BufferPackingStd140:
  125. case BufferPackingStd140EnhancedLayout:
  126. return true;
  127. default:
  128. return false;
  129. }
  130. }
  131. static bool packing_is_hlsl(BufferPackingStandard packing)
  132. {
  133. switch (packing)
  134. {
  135. case BufferPackingHLSLCbuffer:
  136. case BufferPackingHLSLCbufferPackOffset:
  137. return true;
  138. default:
  139. return false;
  140. }
  141. }
  142. static bool packing_has_flexible_offset(BufferPackingStandard packing)
  143. {
  144. switch (packing)
  145. {
  146. case BufferPackingStd140:
  147. case BufferPackingStd430:
  148. case BufferPackingScalar:
  149. case BufferPackingHLSLCbuffer:
  150. return false;
  151. default:
  152. return true;
  153. }
  154. }
  155. static bool packing_is_scalar(BufferPackingStandard packing)
  156. {
  157. switch (packing)
  158. {
  159. case BufferPackingScalar:
  160. case BufferPackingScalarEnhancedLayout:
  161. return true;
  162. default:
  163. return false;
  164. }
  165. }
  166. static BufferPackingStandard packing_to_substruct_packing(BufferPackingStandard packing)
  167. {
  168. switch (packing)
  169. {
  170. case BufferPackingStd140EnhancedLayout:
  171. return BufferPackingStd140;
  172. case BufferPackingStd430EnhancedLayout:
  173. return BufferPackingStd430;
  174. case BufferPackingHLSLCbufferPackOffset:
  175. return BufferPackingHLSLCbuffer;
  176. case BufferPackingScalarEnhancedLayout:
  177. return BufferPackingScalar;
  178. default:
  179. return packing;
  180. }
  181. }
  182. }
  183. void CompilerGLSL::init()
  184. {
  185. if (ir.source.known)
  186. {
  187. options.es = ir.source.es;
  188. options.version = ir.source.version;
  189. }
  190. // Query the locale to see what the decimal point is.
  191. // We'll rely on fixing it up ourselves in the rare case we have a comma-as-decimal locale
  192. // rather than setting locales ourselves. Settings locales in a safe and isolated way is rather
  193. // tricky.
  194. #ifdef _WIN32
  195. // On Windows, localeconv uses thread-local storage, so it should be fine.
  196. const struct lconv *conv = localeconv();
  197. if (conv && conv->decimal_point)
  198. current_locale_radix_character = *conv->decimal_point;
  199. #elif defined(__ANDROID__) && __ANDROID_API__ < 26
  200. // nl_langinfo is not supported on this platform, fall back to the worse alternative.
  201. const struct lconv *conv = localeconv();
  202. if (conv && conv->decimal_point)
  203. current_locale_radix_character = *conv->decimal_point;
  204. #else
  205. // localeconv, the portable function is not MT safe ...
  206. const char *decimal_point = nl_langinfo(RADIXCHAR);
  207. if (decimal_point && *decimal_point != '\0')
  208. current_locale_radix_character = *decimal_point;
  209. #endif
  210. }
  211. static const char *to_pls_layout(PlsFormat format)
  212. {
  213. switch (format)
  214. {
  215. case PlsR11FG11FB10F:
  216. return "layout(r11f_g11f_b10f) ";
  217. case PlsR32F:
  218. return "layout(r32f) ";
  219. case PlsRG16F:
  220. return "layout(rg16f) ";
  221. case PlsRGB10A2:
  222. return "layout(rgb10_a2) ";
  223. case PlsRGBA8:
  224. return "layout(rgba8) ";
  225. case PlsRG16:
  226. return "layout(rg16) ";
  227. case PlsRGBA8I:
  228. return "layout(rgba8i)";
  229. case PlsRG16I:
  230. return "layout(rg16i) ";
  231. case PlsRGB10A2UI:
  232. return "layout(rgb10_a2ui) ";
  233. case PlsRGBA8UI:
  234. return "layout(rgba8ui) ";
  235. case PlsRG16UI:
  236. return "layout(rg16ui) ";
  237. case PlsR32UI:
  238. return "layout(r32ui) ";
  239. default:
  240. return "";
  241. }
  242. }
  243. static std::pair<Op, SPIRType::BaseType> pls_format_to_basetype(PlsFormat format)
  244. {
  245. switch (format)
  246. {
  247. default:
  248. case PlsR11FG11FB10F:
  249. case PlsR32F:
  250. case PlsRG16F:
  251. case PlsRGB10A2:
  252. case PlsRGBA8:
  253. case PlsRG16:
  254. return std::make_pair(OpTypeFloat, SPIRType::Float);
  255. case PlsRGBA8I:
  256. case PlsRG16I:
  257. return std::make_pair(OpTypeInt, SPIRType::Int);
  258. case PlsRGB10A2UI:
  259. case PlsRGBA8UI:
  260. case PlsRG16UI:
  261. case PlsR32UI:
  262. return std::make_pair(OpTypeInt, SPIRType::UInt);
  263. }
  264. }
  265. static uint32_t pls_format_to_components(PlsFormat format)
  266. {
  267. switch (format)
  268. {
  269. default:
  270. case PlsR32F:
  271. case PlsR32UI:
  272. return 1;
  273. case PlsRG16F:
  274. case PlsRG16:
  275. case PlsRG16UI:
  276. case PlsRG16I:
  277. return 2;
  278. case PlsR11FG11FB10F:
  279. return 3;
  280. case PlsRGB10A2:
  281. case PlsRGBA8:
  282. case PlsRGBA8I:
  283. case PlsRGB10A2UI:
  284. case PlsRGBA8UI:
  285. return 4;
  286. }
  287. }
  288. const char *CompilerGLSL::vector_swizzle(int vecsize, int index)
  289. {
  290. static const char *const swizzle[4][4] = {
  291. { ".x", ".y", ".z", ".w" },
  292. { ".xy", ".yz", ".zw", nullptr },
  293. { ".xyz", ".yzw", nullptr, nullptr },
  294. #if defined(__GNUC__) && (__GNUC__ == 9)
  295. // This works around a GCC 9 bug, see details in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90947.
  296. // This array ends up being compiled as all nullptrs, tripping the assertions below.
  297. { "", nullptr, nullptr, "$" },
  298. #else
  299. { "", nullptr, nullptr, nullptr },
  300. #endif
  301. };
  302. assert(vecsize >= 1 && vecsize <= 4);
  303. assert(index >= 0 && index < 4);
  304. assert(swizzle[vecsize - 1][index]);
  305. return swizzle[vecsize - 1][index];
  306. }
  307. void CompilerGLSL::reset(uint32_t iteration_count)
  308. {
  309. // Sanity check the iteration count to be robust against a certain class of bugs where
  310. // we keep forcing recompilations without making clear forward progress.
  311. // In buggy situations we will loop forever, or loop for an unbounded number of iterations.
  312. // Certain types of recompilations are considered to make forward progress,
  313. // but in almost all situations, we'll never see more than 3 iterations.
  314. // It is highly context-sensitive when we need to force recompilation,
  315. // and it is not practical with the current architecture
  316. // to resolve everything up front.
  317. if (iteration_count >= options.force_recompile_max_debug_iterations && !is_force_recompile_forward_progress)
  318. SPIRV_CROSS_THROW("Maximum compilation loops detected and no forward progress was made. Must be a SPIRV-Cross bug!");
  319. // We do some speculative optimizations which should pretty much always work out,
  320. // but just in case the SPIR-V is rather weird, recompile until it's happy.
  321. // This typically only means one extra pass.
  322. clear_force_recompile();
  323. // Clear invalid expression tracking.
  324. invalid_expressions.clear();
  325. composite_insert_overwritten.clear();
  326. current_function = nullptr;
  327. // Clear temporary usage tracking.
  328. expression_usage_counts.clear();
  329. forwarded_temporaries.clear();
  330. suppressed_usage_tracking.clear();
  331. // Ensure that we declare phi-variable copies even if the original declaration isn't deferred
  332. flushed_phi_variables.clear();
  333. current_emitting_switch_stack.clear();
  334. reset_name_caches();
  335. ir.for_each_typed_id<SPIRFunction>([&](uint32_t, SPIRFunction &func) {
  336. func.active = false;
  337. func.flush_undeclared = true;
  338. });
  339. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { var.dependees.clear(); });
  340. ir.reset_all_of_type<SPIRExpression>();
  341. ir.reset_all_of_type<SPIRAccessChain>();
  342. statement_count = 0;
  343. indent = 0;
  344. current_loop_level = 0;
  345. }
  346. void CompilerGLSL::remap_pls_variables()
  347. {
  348. for (auto &input : pls_inputs)
  349. {
  350. auto &var = get<SPIRVariable>(input.id);
  351. bool input_is_target = false;
  352. if (var.storage == StorageClassUniformConstant)
  353. {
  354. auto &type = get<SPIRType>(var.basetype);
  355. input_is_target = type.image.dim == DimSubpassData;
  356. }
  357. if (var.storage != StorageClassInput && !input_is_target)
  358. SPIRV_CROSS_THROW("Can only use in and target variables for PLS inputs.");
  359. var.remapped_variable = true;
  360. }
  361. for (auto &output : pls_outputs)
  362. {
  363. auto &var = get<SPIRVariable>(output.id);
  364. if (var.storage != StorageClassOutput)
  365. SPIRV_CROSS_THROW("Can only use out variables for PLS outputs.");
  366. var.remapped_variable = true;
  367. }
  368. }
  369. void CompilerGLSL::remap_ext_framebuffer_fetch(uint32_t input_attachment_index, uint32_t color_location, bool coherent)
  370. {
  371. subpass_to_framebuffer_fetch_attachment.push_back({ input_attachment_index, color_location });
  372. inout_color_attachments.push_back({ color_location, coherent });
  373. }
  374. bool CompilerGLSL::location_is_framebuffer_fetch(uint32_t location) const
  375. {
  376. return std::find_if(begin(inout_color_attachments), end(inout_color_attachments),
  377. [&](const std::pair<uint32_t, bool> &elem) {
  378. return elem.first == location;
  379. }) != end(inout_color_attachments);
  380. }
  381. bool CompilerGLSL::location_is_non_coherent_framebuffer_fetch(uint32_t location) const
  382. {
  383. return std::find_if(begin(inout_color_attachments), end(inout_color_attachments),
  384. [&](const std::pair<uint32_t, bool> &elem) {
  385. return elem.first == location && !elem.second;
  386. }) != end(inout_color_attachments);
  387. }
  388. void CompilerGLSL::find_static_extensions()
  389. {
  390. ir.for_each_typed_id<SPIRType>([&](uint32_t, const SPIRType &type) {
  391. if (type.basetype == SPIRType::Double)
  392. {
  393. if (options.es)
  394. SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
  395. if (!options.es && options.version < 400)
  396. require_extension_internal("GL_ARB_gpu_shader_fp64");
  397. }
  398. else if (type.basetype == SPIRType::Int64 || type.basetype == SPIRType::UInt64)
  399. {
  400. if (options.es && options.version < 310) // GL_NV_gpu_shader5 fallback requires 310.
  401. SPIRV_CROSS_THROW("64-bit integers not supported in ES profile before version 310.");
  402. require_extension_internal("GL_ARB_gpu_shader_int64");
  403. }
  404. else if (type.basetype == SPIRType::Half)
  405. {
  406. require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_float16");
  407. if (options.vulkan_semantics)
  408. require_extension_internal("GL_EXT_shader_16bit_storage");
  409. }
  410. else if (type.basetype == SPIRType::SByte || type.basetype == SPIRType::UByte)
  411. {
  412. require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int8");
  413. if (options.vulkan_semantics)
  414. require_extension_internal("GL_EXT_shader_8bit_storage");
  415. }
  416. else if (type.basetype == SPIRType::Short || type.basetype == SPIRType::UShort)
  417. {
  418. require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int16");
  419. if (options.vulkan_semantics)
  420. require_extension_internal("GL_EXT_shader_16bit_storage");
  421. }
  422. });
  423. auto &execution = get_entry_point();
  424. switch (execution.model)
  425. {
  426. case ExecutionModelGLCompute:
  427. if (!options.es && options.version < 430)
  428. require_extension_internal("GL_ARB_compute_shader");
  429. if (options.es && options.version < 310)
  430. SPIRV_CROSS_THROW("At least ESSL 3.10 required for compute shaders.");
  431. break;
  432. case ExecutionModelGeometry:
  433. if (options.es && options.version < 320)
  434. require_extension_internal("GL_EXT_geometry_shader");
  435. if (!options.es && options.version < 150)
  436. require_extension_internal("GL_ARB_geometry_shader4");
  437. if (execution.flags.get(ExecutionModeInvocations) && execution.invocations != 1)
  438. {
  439. // Instanced GS is part of 400 core or this extension.
  440. if (!options.es && options.version < 400)
  441. require_extension_internal("GL_ARB_gpu_shader5");
  442. }
  443. break;
  444. case ExecutionModelTessellationEvaluation:
  445. case ExecutionModelTessellationControl:
  446. if (options.es && options.version < 320)
  447. require_extension_internal("GL_EXT_tessellation_shader");
  448. if (!options.es && options.version < 400)
  449. require_extension_internal("GL_ARB_tessellation_shader");
  450. break;
  451. case ExecutionModelRayGenerationKHR:
  452. case ExecutionModelIntersectionKHR:
  453. case ExecutionModelAnyHitKHR:
  454. case ExecutionModelClosestHitKHR:
  455. case ExecutionModelMissKHR:
  456. case ExecutionModelCallableKHR:
  457. // NV enums are aliases.
  458. if (options.es || options.version < 460)
  459. SPIRV_CROSS_THROW("Ray tracing shaders require non-es profile with version 460 or above.");
  460. if (!options.vulkan_semantics)
  461. SPIRV_CROSS_THROW("Ray tracing requires Vulkan semantics.");
  462. // Need to figure out if we should target KHR or NV extension based on capabilities.
  463. for (auto &cap : ir.declared_capabilities)
  464. {
  465. if (cap == CapabilityRayTracingKHR || cap == CapabilityRayQueryKHR ||
  466. cap == CapabilityRayTraversalPrimitiveCullingKHR)
  467. {
  468. ray_tracing_is_khr = true;
  469. break;
  470. }
  471. }
  472. if (ray_tracing_is_khr)
  473. {
  474. // In KHR ray tracing we pass payloads by pointer instead of location,
  475. // so make sure we assign locations properly.
  476. ray_tracing_khr_fixup_locations();
  477. require_extension_internal("GL_EXT_ray_tracing");
  478. }
  479. else
  480. require_extension_internal("GL_NV_ray_tracing");
  481. break;
  482. case ExecutionModelMeshEXT:
  483. case ExecutionModelTaskEXT:
  484. if (options.es || options.version < 450)
  485. SPIRV_CROSS_THROW("Mesh shaders require GLSL 450 or above.");
  486. if (!options.vulkan_semantics)
  487. SPIRV_CROSS_THROW("Mesh shaders require Vulkan semantics.");
  488. require_extension_internal("GL_EXT_mesh_shader");
  489. break;
  490. default:
  491. break;
  492. }
  493. if (!pls_inputs.empty() || !pls_outputs.empty())
  494. {
  495. if (execution.model != ExecutionModelFragment)
  496. SPIRV_CROSS_THROW("Can only use GL_EXT_shader_pixel_local_storage in fragment shaders.");
  497. require_extension_internal("GL_EXT_shader_pixel_local_storage");
  498. }
  499. if (!inout_color_attachments.empty())
  500. {
  501. if (execution.model != ExecutionModelFragment)
  502. SPIRV_CROSS_THROW("Can only use GL_EXT_shader_framebuffer_fetch in fragment shaders.");
  503. if (options.vulkan_semantics)
  504. SPIRV_CROSS_THROW("Cannot use EXT_shader_framebuffer_fetch in Vulkan GLSL.");
  505. bool has_coherent = false;
  506. bool has_incoherent = false;
  507. for (auto &att : inout_color_attachments)
  508. {
  509. if (att.second)
  510. has_coherent = true;
  511. else
  512. has_incoherent = true;
  513. }
  514. if (has_coherent)
  515. require_extension_internal("GL_EXT_shader_framebuffer_fetch");
  516. if (has_incoherent)
  517. require_extension_internal("GL_EXT_shader_framebuffer_fetch_non_coherent");
  518. }
  519. if (options.separate_shader_objects && !options.es && options.version < 410)
  520. require_extension_internal("GL_ARB_separate_shader_objects");
  521. if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64)
  522. {
  523. if (!options.vulkan_semantics)
  524. SPIRV_CROSS_THROW("GL_EXT_buffer_reference is only supported in Vulkan GLSL.");
  525. if (options.es && options.version < 320)
  526. SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires ESSL 320.");
  527. else if (!options.es && options.version < 450)
  528. SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires GLSL 450.");
  529. require_extension_internal("GL_EXT_buffer_reference2");
  530. }
  531. else if (ir.addressing_model != AddressingModelLogical)
  532. {
  533. SPIRV_CROSS_THROW("Only Logical and PhysicalStorageBuffer64 addressing models are supported.");
  534. }
  535. // Check for nonuniform qualifier and passthrough.
  536. // Instead of looping over all decorations to find this, just look at capabilities.
  537. for (auto &cap : ir.declared_capabilities)
  538. {
  539. switch (cap)
  540. {
  541. case CapabilityShaderNonUniformEXT:
  542. if (!options.vulkan_semantics)
  543. require_extension_internal("GL_NV_gpu_shader5");
  544. else
  545. require_extension_internal("GL_EXT_nonuniform_qualifier");
  546. break;
  547. case CapabilityRuntimeDescriptorArrayEXT:
  548. if (!options.vulkan_semantics)
  549. SPIRV_CROSS_THROW("GL_EXT_nonuniform_qualifier is only supported in Vulkan GLSL.");
  550. require_extension_internal("GL_EXT_nonuniform_qualifier");
  551. break;
  552. case CapabilityGeometryShaderPassthroughNV:
  553. if (execution.model == ExecutionModelGeometry)
  554. {
  555. require_extension_internal("GL_NV_geometry_shader_passthrough");
  556. execution.geometry_passthrough = true;
  557. }
  558. break;
  559. case CapabilityVariablePointers:
  560. case CapabilityVariablePointersStorageBuffer:
  561. SPIRV_CROSS_THROW("VariablePointers capability is not supported in GLSL.");
  562. case CapabilityMultiView:
  563. if (options.vulkan_semantics)
  564. require_extension_internal("GL_EXT_multiview");
  565. else
  566. {
  567. require_extension_internal("GL_OVR_multiview2");
  568. if (options.ovr_multiview_view_count == 0)
  569. SPIRV_CROSS_THROW("ovr_multiview_view_count must be non-zero when using GL_OVR_multiview2.");
  570. if (get_execution_model() != ExecutionModelVertex)
  571. SPIRV_CROSS_THROW("OVR_multiview2 can only be used with Vertex shaders.");
  572. }
  573. break;
  574. case CapabilityRayQueryKHR:
  575. if (options.es || options.version < 460 || !options.vulkan_semantics)
  576. SPIRV_CROSS_THROW("RayQuery requires Vulkan GLSL 460.");
  577. require_extension_internal("GL_EXT_ray_query");
  578. ray_tracing_is_khr = true;
  579. break;
  580. case CapabilityRayQueryPositionFetchKHR:
  581. if (options.es || options.version < 460 || !options.vulkan_semantics)
  582. SPIRV_CROSS_THROW("RayQuery Position Fetch requires Vulkan GLSL 460.");
  583. require_extension_internal("GL_EXT_ray_tracing_position_fetch");
  584. ray_tracing_is_khr = true;
  585. break;
  586. case CapabilityRayTracingPositionFetchKHR:
  587. if (options.es || options.version < 460 || !options.vulkan_semantics)
  588. SPIRV_CROSS_THROW("Ray Tracing Position Fetch requires Vulkan GLSL 460.");
  589. require_extension_internal("GL_EXT_ray_tracing_position_fetch");
  590. ray_tracing_is_khr = true;
  591. break;
  592. case CapabilityRayTraversalPrimitiveCullingKHR:
  593. if (options.es || options.version < 460 || !options.vulkan_semantics)
  594. SPIRV_CROSS_THROW("RayQuery requires Vulkan GLSL 460.");
  595. require_extension_internal("GL_EXT_ray_flags_primitive_culling");
  596. ray_tracing_is_khr = true;
  597. break;
  598. case CapabilityRayTracingClusterAccelerationStructureNV:
  599. if (options.es || options.version < 460 || !options.vulkan_semantics)
  600. SPIRV_CROSS_THROW("Cluster AS requires Vulkan GLSL 460.");
  601. require_extension_internal("GL_NV_cluster_acceleration_structure");
  602. ray_tracing_is_khr = true;
  603. break;
  604. case CapabilityTensorsARM:
  605. if (options.es || options.version < 460 || !options.vulkan_semantics)
  606. SPIRV_CROSS_THROW("Tensor requires Vulkan GLSL 460.");
  607. require_extension_internal("GL_ARM_tensors");
  608. break;
  609. default:
  610. break;
  611. }
  612. }
  613. if (options.ovr_multiview_view_count)
  614. {
  615. if (options.vulkan_semantics)
  616. SPIRV_CROSS_THROW("OVR_multiview2 cannot be used with Vulkan semantics.");
  617. if (get_execution_model() != ExecutionModelVertex)
  618. SPIRV_CROSS_THROW("OVR_multiview2 can only be used with Vertex shaders.");
  619. require_extension_internal("GL_OVR_multiview2");
  620. }
  621. if (execution.flags.get(ExecutionModeQuadDerivativesKHR) ||
  622. (execution.flags.get(ExecutionModeRequireFullQuadsKHR) && get_execution_model() == ExecutionModelFragment))
  623. {
  624. require_extension_internal("GL_EXT_shader_quad_control");
  625. }
  626. // KHR one is likely to get promoted at some point, so if we don't see an explicit SPIR-V extension, assume KHR.
  627. for (auto &ext : ir.declared_extensions)
  628. if (ext == "SPV_NV_fragment_shader_barycentric")
  629. barycentric_is_nv = true;
  630. }
  631. void CompilerGLSL::require_polyfill(Polyfill polyfill, bool relaxed)
  632. {
  633. uint32_t &polyfills = (relaxed && (options.es || options.vulkan_semantics)) ?
  634. required_polyfills_relaxed : required_polyfills;
  635. if ((polyfills & polyfill) == 0)
  636. {
  637. polyfills |= polyfill;
  638. force_recompile();
  639. }
  640. }
  641. void CompilerGLSL::ray_tracing_khr_fixup_locations()
  642. {
  643. uint32_t location = 0;
  644. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  645. // Incoming payload storage can also be used for tracing.
  646. if (var.storage != StorageClassRayPayloadKHR && var.storage != StorageClassCallableDataKHR &&
  647. var.storage != StorageClassIncomingRayPayloadKHR && var.storage != StorageClassIncomingCallableDataKHR)
  648. return;
  649. if (is_hidden_variable(var))
  650. return;
  651. set_decoration(var.self, DecorationLocation, location++);
  652. });
  653. }
  654. string CompilerGLSL::compile()
  655. {
  656. ir.fixup_reserved_names();
  657. if (!options.vulkan_semantics)
  658. {
  659. // only NV_gpu_shader5 supports divergent indexing on OpenGL, and it does so without extra qualifiers
  660. backend.nonuniform_qualifier = "";
  661. backend.needs_row_major_load_workaround = options.enable_row_major_load_workaround;
  662. }
  663. backend.allow_precision_qualifiers = options.vulkan_semantics || options.es;
  664. backend.force_gl_in_out_block = true;
  665. backend.supports_extensions = true;
  666. backend.use_array_constructor = true;
  667. backend.workgroup_size_is_hidden = true;
  668. backend.requires_relaxed_precision_analysis = options.es || options.vulkan_semantics;
  669. backend.support_precise_qualifier =
  670. (!options.es && options.version >= 400) || (options.es && options.version >= 320);
  671. backend.constant_null_initializer = "{ }";
  672. backend.requires_matching_array_initializer = true;
  673. if (is_legacy_es())
  674. backend.support_case_fallthrough = false;
  675. // Scan the SPIR-V to find trivial uses of extensions.
  676. fixup_anonymous_struct_names();
  677. fixup_type_alias();
  678. reorder_type_alias();
  679. build_function_control_flow_graphs_and_analyze();
  680. find_static_extensions();
  681. fixup_image_load_store_access();
  682. update_active_builtins();
  683. analyze_image_and_sampler_usage();
  684. analyze_interlocked_resource_usage();
  685. if (!inout_color_attachments.empty())
  686. emit_inout_fragment_outputs_copy_to_subpass_inputs();
  687. // Shaders might cast unrelated data to pointers of non-block types.
  688. // Find all such instances and make sure we can cast the pointers to a synthesized block type.
  689. if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64)
  690. analyze_non_block_pointer_types();
  691. uint32_t pass_count = 0;
  692. do
  693. {
  694. reset(pass_count);
  695. buffer.reset();
  696. emit_header();
  697. emit_resources();
  698. emit_extension_workarounds(get_execution_model());
  699. if (required_polyfills != 0)
  700. emit_polyfills(required_polyfills, false);
  701. if ((options.es || options.vulkan_semantics) && required_polyfills_relaxed != 0)
  702. emit_polyfills(required_polyfills_relaxed, true);
  703. emit_function(get<SPIRFunction>(ir.default_entry_point), Bitset());
  704. pass_count++;
  705. } while (is_forcing_recompilation());
  706. // Implement the interlocked wrapper function at the end.
  707. // The body was implemented in lieu of main().
  708. if (interlocked_is_complex)
  709. {
  710. statement("void main()");
  711. begin_scope();
  712. statement("// Interlocks were used in a way not compatible with GLSL, this is very slow.");
  713. statement("SPIRV_Cross_beginInvocationInterlock();");
  714. statement("spvMainInterlockedBody();");
  715. statement("SPIRV_Cross_endInvocationInterlock();");
  716. end_scope();
  717. }
  718. // Entry point in GLSL is always main().
  719. get_entry_point().name = "main";
  720. return buffer.str();
  721. }
  722. std::string CompilerGLSL::get_partial_source()
  723. {
  724. return buffer.str();
  725. }
  726. void CompilerGLSL::build_workgroup_size(SmallVector<string> &arguments, const SpecializationConstant &wg_x,
  727. const SpecializationConstant &wg_y, const SpecializationConstant &wg_z)
  728. {
  729. auto &execution = get_entry_point();
  730. bool builtin_workgroup = execution.workgroup_size.constant != 0;
  731. bool use_local_size_id = !builtin_workgroup && execution.flags.get(ExecutionModeLocalSizeId);
  732. if (wg_x.id)
  733. {
  734. if (options.vulkan_semantics)
  735. arguments.push_back(join("local_size_x_id = ", wg_x.constant_id));
  736. else
  737. arguments.push_back(join("local_size_x = ", get<SPIRConstant>(wg_x.id).specialization_constant_macro_name));
  738. }
  739. else if (use_local_size_id && execution.workgroup_size.id_x)
  740. arguments.push_back(join("local_size_x = ", get<SPIRConstant>(execution.workgroup_size.id_x).scalar()));
  741. else
  742. arguments.push_back(join("local_size_x = ", execution.workgroup_size.x));
  743. if (wg_y.id)
  744. {
  745. if (options.vulkan_semantics)
  746. arguments.push_back(join("local_size_y_id = ", wg_y.constant_id));
  747. else
  748. arguments.push_back(join("local_size_y = ", get<SPIRConstant>(wg_y.id).specialization_constant_macro_name));
  749. }
  750. else if (use_local_size_id && execution.workgroup_size.id_y)
  751. arguments.push_back(join("local_size_y = ", get<SPIRConstant>(execution.workgroup_size.id_y).scalar()));
  752. else
  753. arguments.push_back(join("local_size_y = ", execution.workgroup_size.y));
  754. if (wg_z.id)
  755. {
  756. if (options.vulkan_semantics)
  757. arguments.push_back(join("local_size_z_id = ", wg_z.constant_id));
  758. else
  759. arguments.push_back(join("local_size_z = ", get<SPIRConstant>(wg_z.id).specialization_constant_macro_name));
  760. }
  761. else if (use_local_size_id && execution.workgroup_size.id_z)
  762. arguments.push_back(join("local_size_z = ", get<SPIRConstant>(execution.workgroup_size.id_z).scalar()));
  763. else
  764. arguments.push_back(join("local_size_z = ", execution.workgroup_size.z));
  765. }
  766. void CompilerGLSL::request_subgroup_feature(ShaderSubgroupSupportHelper::Feature feature)
  767. {
  768. if (options.vulkan_semantics)
  769. {
  770. auto khr_extension = ShaderSubgroupSupportHelper::get_KHR_extension_for_feature(feature);
  771. require_extension_internal(ShaderSubgroupSupportHelper::get_extension_name(khr_extension));
  772. }
  773. else
  774. {
  775. if (!shader_subgroup_supporter.is_feature_requested(feature))
  776. force_recompile();
  777. shader_subgroup_supporter.request_feature(feature);
  778. }
  779. }
  780. void CompilerGLSL::emit_header()
  781. {
  782. auto &execution = get_entry_point();
  783. statement("#version ", options.version, options.es && options.version > 100 ? " es" : "");
  784. if (!options.es && options.version < 420)
  785. {
  786. // Needed for binding = # on UBOs, etc.
  787. if (options.enable_420pack_extension)
  788. {
  789. statement("#ifdef GL_ARB_shading_language_420pack");
  790. statement("#extension GL_ARB_shading_language_420pack : require");
  791. statement("#endif");
  792. }
  793. // Needed for: layout(early_fragment_tests) in;
  794. if (execution.flags.get(ExecutionModeEarlyFragmentTests))
  795. require_extension_internal("GL_ARB_shader_image_load_store");
  796. }
  797. // Needed for: layout(post_depth_coverage) in;
  798. if (execution.flags.get(ExecutionModePostDepthCoverage))
  799. require_extension_internal("GL_ARB_post_depth_coverage");
  800. // Needed for: layout({pixel,sample}_interlock_[un]ordered) in;
  801. bool interlock_used = execution.flags.get(ExecutionModePixelInterlockOrderedEXT) ||
  802. execution.flags.get(ExecutionModePixelInterlockUnorderedEXT) ||
  803. execution.flags.get(ExecutionModeSampleInterlockOrderedEXT) ||
  804. execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT);
  805. if (interlock_used)
  806. {
  807. if (options.es)
  808. {
  809. if (options.version < 310)
  810. SPIRV_CROSS_THROW("At least ESSL 3.10 required for fragment shader interlock.");
  811. require_extension_internal("GL_NV_fragment_shader_interlock");
  812. }
  813. else
  814. {
  815. if (options.version < 420)
  816. require_extension_internal("GL_ARB_shader_image_load_store");
  817. require_extension_internal("GL_ARB_fragment_shader_interlock");
  818. }
  819. }
  820. for (auto &ext : forced_extensions)
  821. {
  822. if (ext == "GL_ARB_gpu_shader_int64")
  823. {
  824. statement("#if defined(GL_ARB_gpu_shader_int64)");
  825. statement("#extension GL_ARB_gpu_shader_int64 : require");
  826. if (!options.vulkan_semantics || options.es)
  827. {
  828. statement("#elif defined(GL_NV_gpu_shader5)");
  829. statement("#extension GL_NV_gpu_shader5 : require");
  830. }
  831. statement("#else");
  832. statement("#error No extension available for 64-bit integers.");
  833. statement("#endif");
  834. }
  835. else if (ext == "GL_EXT_shader_explicit_arithmetic_types_float16")
  836. {
  837. // Special case, this extension has a potential fallback to another vendor extension in normal GLSL.
  838. // GL_AMD_gpu_shader_half_float is a superset, so try that first.
  839. statement("#if defined(GL_AMD_gpu_shader_half_float)");
  840. statement("#extension GL_AMD_gpu_shader_half_float : require");
  841. if (!options.vulkan_semantics)
  842. {
  843. statement("#elif defined(GL_NV_gpu_shader5)");
  844. statement("#extension GL_NV_gpu_shader5 : require");
  845. }
  846. else
  847. {
  848. statement("#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)");
  849. statement("#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require");
  850. }
  851. statement("#else");
  852. statement("#error No extension available for FP16.");
  853. statement("#endif");
  854. }
  855. else if (ext == "GL_EXT_shader_explicit_arithmetic_types_int8")
  856. {
  857. if (options.vulkan_semantics)
  858. statement("#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require");
  859. else
  860. {
  861. statement("#if defined(GL_EXT_shader_explicit_arithmetic_types_int8)");
  862. statement("#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require");
  863. statement("#elif defined(GL_NV_gpu_shader5)");
  864. statement("#extension GL_NV_gpu_shader5 : require");
  865. statement("#else");
  866. statement("#error No extension available for Int8.");
  867. statement("#endif");
  868. }
  869. }
  870. else if (ext == "GL_EXT_shader_explicit_arithmetic_types_int16")
  871. {
  872. if (options.vulkan_semantics)
  873. statement("#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require");
  874. else
  875. {
  876. statement("#if defined(GL_EXT_shader_explicit_arithmetic_types_int16)");
  877. statement("#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require");
  878. statement("#elif defined(GL_AMD_gpu_shader_int16)");
  879. statement("#extension GL_AMD_gpu_shader_int16 : require");
  880. statement("#elif defined(GL_NV_gpu_shader5)");
  881. statement("#extension GL_NV_gpu_shader5 : require");
  882. statement("#else");
  883. statement("#error No extension available for Int16.");
  884. statement("#endif");
  885. }
  886. }
  887. else if (ext == "GL_ARB_post_depth_coverage")
  888. {
  889. if (options.es)
  890. statement("#extension GL_EXT_post_depth_coverage : require");
  891. else
  892. {
  893. statement("#if defined(GL_ARB_post_depth_coverge)");
  894. statement("#extension GL_ARB_post_depth_coverage : require");
  895. statement("#else");
  896. statement("#extension GL_EXT_post_depth_coverage : require");
  897. statement("#endif");
  898. }
  899. }
  900. else if (!options.vulkan_semantics && ext == "GL_ARB_shader_draw_parameters")
  901. {
  902. // Soft-enable this extension on plain GLSL.
  903. statement("#ifdef ", ext);
  904. statement("#extension ", ext, " : enable");
  905. statement("#endif");
  906. }
  907. else if (ext == "GL_EXT_control_flow_attributes")
  908. {
  909. // These are just hints so we can conditionally enable and fallback in the shader.
  910. statement("#if defined(GL_EXT_control_flow_attributes)");
  911. statement("#extension GL_EXT_control_flow_attributes : require");
  912. statement("#define SPIRV_CROSS_FLATTEN [[flatten]]");
  913. statement("#define SPIRV_CROSS_BRANCH [[dont_flatten]]");
  914. statement("#define SPIRV_CROSS_UNROLL [[unroll]]");
  915. statement("#define SPIRV_CROSS_LOOP [[dont_unroll]]");
  916. statement("#else");
  917. statement("#define SPIRV_CROSS_FLATTEN");
  918. statement("#define SPIRV_CROSS_BRANCH");
  919. statement("#define SPIRV_CROSS_UNROLL");
  920. statement("#define SPIRV_CROSS_LOOP");
  921. statement("#endif");
  922. }
  923. else if (ext == "GL_NV_fragment_shader_interlock")
  924. {
  925. statement("#extension GL_NV_fragment_shader_interlock : require");
  926. statement("#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockNV()");
  927. statement("#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockNV()");
  928. }
  929. else if (ext == "GL_ARB_fragment_shader_interlock")
  930. {
  931. statement("#ifdef GL_ARB_fragment_shader_interlock");
  932. statement("#extension GL_ARB_fragment_shader_interlock : enable");
  933. statement("#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()");
  934. statement("#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()");
  935. statement("#elif defined(GL_INTEL_fragment_shader_ordering)");
  936. statement("#extension GL_INTEL_fragment_shader_ordering : enable");
  937. statement("#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()");
  938. statement("#define SPIRV_Cross_endInvocationInterlock()");
  939. statement("#endif");
  940. }
  941. else
  942. statement("#extension ", ext, " : require");
  943. }
  944. if (!options.vulkan_semantics)
  945. {
  946. using Supp = ShaderSubgroupSupportHelper;
  947. auto result = shader_subgroup_supporter.resolve();
  948. for (uint32_t feature_index = 0; feature_index < Supp::FeatureCount; feature_index++)
  949. {
  950. auto feature = static_cast<Supp::Feature>(feature_index);
  951. if (!shader_subgroup_supporter.is_feature_requested(feature))
  952. continue;
  953. auto exts = Supp::get_candidates_for_feature(feature, result);
  954. if (exts.empty())
  955. continue;
  956. statement("");
  957. for (auto &ext : exts)
  958. {
  959. const char *name = Supp::get_extension_name(ext);
  960. const char *extra_predicate = Supp::get_extra_required_extension_predicate(ext);
  961. auto extra_names = Supp::get_extra_required_extension_names(ext);
  962. statement(&ext != &exts.front() ? "#elif" : "#if", " defined(", name, ")",
  963. (*extra_predicate != '\0' ? " && " : ""), extra_predicate);
  964. for (const auto &e : extra_names)
  965. statement("#extension ", e, " : enable");
  966. statement("#extension ", name, " : require");
  967. }
  968. if (!Supp::can_feature_be_implemented_without_extensions(feature))
  969. {
  970. statement("#else");
  971. statement("#error No extensions available to emulate requested subgroup feature.");
  972. }
  973. statement("#endif");
  974. }
  975. }
  976. for (auto &header : header_lines)
  977. statement(header);
  978. SmallVector<string> inputs;
  979. SmallVector<string> outputs;
  980. switch (execution.model)
  981. {
  982. case ExecutionModelVertex:
  983. if (options.ovr_multiview_view_count)
  984. inputs.push_back(join("num_views = ", options.ovr_multiview_view_count));
  985. break;
  986. case ExecutionModelGeometry:
  987. if ((execution.flags.get(ExecutionModeInvocations)) && execution.invocations != 1)
  988. inputs.push_back(join("invocations = ", execution.invocations));
  989. if (execution.flags.get(ExecutionModeInputPoints))
  990. inputs.push_back("points");
  991. if (execution.flags.get(ExecutionModeInputLines))
  992. inputs.push_back("lines");
  993. if (execution.flags.get(ExecutionModeInputLinesAdjacency))
  994. inputs.push_back("lines_adjacency");
  995. if (execution.flags.get(ExecutionModeTriangles))
  996. inputs.push_back("triangles");
  997. if (execution.flags.get(ExecutionModeInputTrianglesAdjacency))
  998. inputs.push_back("triangles_adjacency");
  999. if (!execution.geometry_passthrough)
  1000. {
  1001. // For passthrough, these are implies and cannot be declared in shader.
  1002. outputs.push_back(join("max_vertices = ", execution.output_vertices));
  1003. if (execution.flags.get(ExecutionModeOutputTriangleStrip))
  1004. outputs.push_back("triangle_strip");
  1005. if (execution.flags.get(ExecutionModeOutputPoints))
  1006. outputs.push_back("points");
  1007. if (execution.flags.get(ExecutionModeOutputLineStrip))
  1008. outputs.push_back("line_strip");
  1009. }
  1010. break;
  1011. case ExecutionModelTessellationControl:
  1012. if (execution.flags.get(ExecutionModeOutputVertices))
  1013. outputs.push_back(join("vertices = ", execution.output_vertices));
  1014. break;
  1015. case ExecutionModelTessellationEvaluation:
  1016. if (execution.flags.get(ExecutionModeQuads))
  1017. inputs.push_back("quads");
  1018. if (execution.flags.get(ExecutionModeTriangles))
  1019. inputs.push_back("triangles");
  1020. if (execution.flags.get(ExecutionModeIsolines))
  1021. inputs.push_back("isolines");
  1022. if (execution.flags.get(ExecutionModePointMode))
  1023. inputs.push_back("point_mode");
  1024. if (!execution.flags.get(ExecutionModeIsolines))
  1025. {
  1026. if (execution.flags.get(ExecutionModeVertexOrderCw))
  1027. inputs.push_back("cw");
  1028. if (execution.flags.get(ExecutionModeVertexOrderCcw))
  1029. inputs.push_back("ccw");
  1030. }
  1031. if (execution.flags.get(ExecutionModeSpacingFractionalEven))
  1032. inputs.push_back("fractional_even_spacing");
  1033. if (execution.flags.get(ExecutionModeSpacingFractionalOdd))
  1034. inputs.push_back("fractional_odd_spacing");
  1035. if (execution.flags.get(ExecutionModeSpacingEqual))
  1036. inputs.push_back("equal_spacing");
  1037. break;
  1038. case ExecutionModelGLCompute:
  1039. case ExecutionModelTaskEXT:
  1040. case ExecutionModelMeshEXT:
  1041. {
  1042. if (execution.workgroup_size.constant != 0 || execution.flags.get(ExecutionModeLocalSizeId))
  1043. {
  1044. SpecializationConstant wg_x, wg_y, wg_z;
  1045. get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
  1046. // If there are any spec constants on legacy GLSL, defer declaration, we need to set up macro
  1047. // declarations before we can emit the work group size.
  1048. if (options.vulkan_semantics ||
  1049. ((wg_x.id == ConstantID(0)) && (wg_y.id == ConstantID(0)) && (wg_z.id == ConstantID(0))))
  1050. build_workgroup_size(inputs, wg_x, wg_y, wg_z);
  1051. }
  1052. else
  1053. {
  1054. inputs.push_back(join("local_size_x = ", execution.workgroup_size.x));
  1055. inputs.push_back(join("local_size_y = ", execution.workgroup_size.y));
  1056. inputs.push_back(join("local_size_z = ", execution.workgroup_size.z));
  1057. }
  1058. if (execution.model == ExecutionModelMeshEXT)
  1059. {
  1060. outputs.push_back(join("max_vertices = ", execution.output_vertices));
  1061. outputs.push_back(join("max_primitives = ", execution.output_primitives));
  1062. if (execution.flags.get(ExecutionModeOutputTrianglesEXT))
  1063. outputs.push_back("triangles");
  1064. else if (execution.flags.get(ExecutionModeOutputLinesEXT))
  1065. outputs.push_back("lines");
  1066. else if (execution.flags.get(ExecutionModeOutputPoints))
  1067. outputs.push_back("points");
  1068. }
  1069. break;
  1070. }
  1071. case ExecutionModelFragment:
  1072. if (options.es)
  1073. {
  1074. switch (options.fragment.default_float_precision)
  1075. {
  1076. case Options::Lowp:
  1077. statement("precision lowp float;");
  1078. break;
  1079. case Options::Mediump:
  1080. statement("precision mediump float;");
  1081. break;
  1082. case Options::Highp:
  1083. statement("precision highp float;");
  1084. break;
  1085. default:
  1086. break;
  1087. }
  1088. switch (options.fragment.default_int_precision)
  1089. {
  1090. case Options::Lowp:
  1091. statement("precision lowp int;");
  1092. break;
  1093. case Options::Mediump:
  1094. statement("precision mediump int;");
  1095. break;
  1096. case Options::Highp:
  1097. statement("precision highp int;");
  1098. break;
  1099. default:
  1100. break;
  1101. }
  1102. }
  1103. if (execution.flags.get(ExecutionModeEarlyFragmentTests))
  1104. inputs.push_back("early_fragment_tests");
  1105. if (execution.flags.get(ExecutionModePostDepthCoverage))
  1106. inputs.push_back("post_depth_coverage");
  1107. if (interlock_used)
  1108. statement("#if defined(GL_ARB_fragment_shader_interlock)");
  1109. if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT))
  1110. statement("layout(pixel_interlock_ordered) in;");
  1111. else if (execution.flags.get(ExecutionModePixelInterlockUnorderedEXT))
  1112. statement("layout(pixel_interlock_unordered) in;");
  1113. else if (execution.flags.get(ExecutionModeSampleInterlockOrderedEXT))
  1114. statement("layout(sample_interlock_ordered) in;");
  1115. else if (execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT))
  1116. statement("layout(sample_interlock_unordered) in;");
  1117. if (interlock_used)
  1118. {
  1119. statement("#elif !defined(GL_INTEL_fragment_shader_ordering)");
  1120. statement("#error Fragment Shader Interlock/Ordering extension missing!");
  1121. statement("#endif");
  1122. }
  1123. if (!options.es && execution.flags.get(ExecutionModeDepthGreater))
  1124. statement("layout(depth_greater) out float gl_FragDepth;");
  1125. else if (!options.es && execution.flags.get(ExecutionModeDepthLess))
  1126. statement("layout(depth_less) out float gl_FragDepth;");
  1127. if (execution.flags.get(ExecutionModeRequireFullQuadsKHR))
  1128. statement("layout(full_quads) in;");
  1129. break;
  1130. default:
  1131. break;
  1132. }
  1133. for (auto &cap : ir.declared_capabilities)
  1134. if (cap == CapabilityRayTraversalPrimitiveCullingKHR)
  1135. statement("layout(primitive_culling);");
  1136. if (execution.flags.get(ExecutionModeQuadDerivativesKHR))
  1137. statement("layout(quad_derivatives) in;");
  1138. if (!inputs.empty())
  1139. statement("layout(", merge(inputs), ") in;");
  1140. if (!outputs.empty())
  1141. statement("layout(", merge(outputs), ") out;");
  1142. statement("");
  1143. }
  1144. bool CompilerGLSL::type_is_empty(const SPIRType &type)
  1145. {
  1146. return type.basetype == SPIRType::Struct && type.member_types.empty();
  1147. }
  1148. void CompilerGLSL::emit_struct(SPIRType &type)
  1149. {
  1150. // Struct types can be stamped out multiple times
  1151. // with just different offsets, matrix layouts, etc ...
  1152. // Type-punning with these types is legal, which complicates things
  1153. // when we are storing struct and array types in an SSBO for example.
  1154. // If the type master is packed however, we can no longer assume that the struct declaration will be redundant.
  1155. if (type.type_alias != TypeID(0) &&
  1156. !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
  1157. return;
  1158. add_resource_name(type.self);
  1159. auto name = type_to_glsl(type);
  1160. statement(!backend.explicit_struct_type ? "struct " : "", name);
  1161. begin_scope();
  1162. type.member_name_cache.clear();
  1163. uint32_t i = 0;
  1164. bool emitted = false;
  1165. for (auto &member : type.member_types)
  1166. {
  1167. add_member_name(type, i);
  1168. emit_struct_member(type, member, i);
  1169. i++;
  1170. emitted = true;
  1171. }
  1172. // Don't declare empty structs in GLSL, this is not allowed.
  1173. if (type_is_empty(type) && !backend.supports_empty_struct)
  1174. {
  1175. statement("int empty_struct_member;");
  1176. emitted = true;
  1177. }
  1178. if (has_extended_decoration(type.self, SPIRVCrossDecorationPaddingTarget))
  1179. emit_struct_padding_target(type);
  1180. end_scope_decl();
  1181. if (emitted)
  1182. statement("");
  1183. }
  1184. string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags)
  1185. {
  1186. string res;
  1187. //if (flags & (1ull << DecorationSmooth))
  1188. // res += "smooth ";
  1189. if (flags.get(DecorationFlat))
  1190. res += "flat ";
  1191. if (flags.get(DecorationNoPerspective))
  1192. {
  1193. if (options.es)
  1194. {
  1195. if (options.version < 300)
  1196. SPIRV_CROSS_THROW("noperspective requires ESSL 300.");
  1197. require_extension_internal("GL_NV_shader_noperspective_interpolation");
  1198. }
  1199. else if (is_legacy_desktop())
  1200. require_extension_internal("GL_EXT_gpu_shader4");
  1201. res += "noperspective ";
  1202. }
  1203. if (flags.get(DecorationCentroid))
  1204. res += "centroid ";
  1205. if (flags.get(DecorationPatch))
  1206. res += "patch ";
  1207. if (flags.get(DecorationSample))
  1208. {
  1209. if (options.es)
  1210. {
  1211. if (options.version < 300)
  1212. SPIRV_CROSS_THROW("sample requires ESSL 300.");
  1213. else if (options.version < 320)
  1214. require_extension_internal("GL_OES_shader_multisample_interpolation");
  1215. }
  1216. res += "sample ";
  1217. }
  1218. if (flags.get(DecorationInvariant) && (options.es || options.version >= 120))
  1219. res += "invariant ";
  1220. if (flags.get(DecorationPerPrimitiveEXT))
  1221. {
  1222. res += "perprimitiveEXT ";
  1223. require_extension_internal("GL_EXT_mesh_shader");
  1224. }
  1225. if (flags.get(DecorationExplicitInterpAMD))
  1226. {
  1227. require_extension_internal("GL_AMD_shader_explicit_vertex_parameter");
  1228. res += "__explicitInterpAMD ";
  1229. }
  1230. if (flags.get(DecorationPerVertexKHR))
  1231. {
  1232. if (options.es && options.version < 320)
  1233. SPIRV_CROSS_THROW("pervertexEXT requires ESSL 320.");
  1234. else if (!options.es && options.version < 450)
  1235. SPIRV_CROSS_THROW("pervertexEXT requires GLSL 450.");
  1236. if (barycentric_is_nv)
  1237. {
  1238. require_extension_internal("GL_NV_fragment_shader_barycentric");
  1239. res += "pervertexNV ";
  1240. }
  1241. else
  1242. {
  1243. require_extension_internal("GL_EXT_fragment_shader_barycentric");
  1244. res += "pervertexEXT ";
  1245. }
  1246. }
  1247. return res;
  1248. }
  1249. string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index)
  1250. {
  1251. if (is_legacy())
  1252. return "";
  1253. bool is_block = has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock);
  1254. if (!is_block)
  1255. return "";
  1256. auto &memb = ir.meta[type.self].members;
  1257. if (index >= memb.size())
  1258. return "";
  1259. auto &dec = memb[index];
  1260. SmallVector<string> attr;
  1261. if (has_member_decoration(type.self, index, DecorationPassthroughNV))
  1262. attr.push_back("passthrough");
  1263. // We can only apply layouts on members in block interfaces.
  1264. // This is a bit problematic because in SPIR-V decorations are applied on the struct types directly.
  1265. // This is not supported on GLSL, so we have to make the assumption that if a struct within our buffer block struct
  1266. // has a decoration, it was originally caused by a top-level layout() qualifier in GLSL.
  1267. //
  1268. // We would like to go from (SPIR-V style):
  1269. //
  1270. // struct Foo { layout(row_major) mat4 matrix; };
  1271. // buffer UBO { Foo foo; };
  1272. //
  1273. // to
  1274. //
  1275. // struct Foo { mat4 matrix; }; // GLSL doesn't support any layout shenanigans in raw struct declarations.
  1276. // buffer UBO { layout(row_major) Foo foo; }; // Apply the layout on top-level.
  1277. auto flags = combined_decoration_for_member(type, index);
  1278. if (flags.get(DecorationRowMajor))
  1279. attr.push_back("row_major");
  1280. // We don't emit any global layouts, so column_major is default.
  1281. //if (flags & (1ull << DecorationColMajor))
  1282. // attr.push_back("column_major");
  1283. if (dec.decoration_flags.get(DecorationLocation) && can_use_io_location(type.storage, true))
  1284. attr.push_back(join("location = ", dec.location));
  1285. // Can only declare component if we can declare location.
  1286. if (dec.decoration_flags.get(DecorationComponent) && can_use_io_location(type.storage, true))
  1287. {
  1288. if (!options.es)
  1289. {
  1290. if (options.version < 440 && options.version >= 140)
  1291. require_extension_internal("GL_ARB_enhanced_layouts");
  1292. else if (options.version < 140)
  1293. SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40.");
  1294. attr.push_back(join("component = ", dec.component));
  1295. }
  1296. else
  1297. SPIRV_CROSS_THROW("Component decoration is not supported in ES targets.");
  1298. }
  1299. // SPIRVCrossDecorationPacked is set by layout_for_variable earlier to mark that we need to emit offset qualifiers.
  1300. // This is only done selectively in GLSL as needed.
  1301. if (has_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset) &&
  1302. dec.decoration_flags.get(DecorationOffset))
  1303. attr.push_back(join("offset = ", dec.offset));
  1304. else if (type.storage == StorageClassOutput && dec.decoration_flags.get(DecorationOffset))
  1305. attr.push_back(join("xfb_offset = ", dec.offset));
  1306. if (attr.empty())
  1307. return "";
  1308. string res = "layout(";
  1309. res += merge(attr);
  1310. res += ") ";
  1311. return res;
  1312. }
  1313. const char *CompilerGLSL::format_to_glsl(ImageFormat format)
  1314. {
  1315. if (options.es && is_desktop_only_format(format))
  1316. SPIRV_CROSS_THROW("Attempting to use image format not supported in ES profile.");
  1317. switch (format)
  1318. {
  1319. case ImageFormatRgba32f:
  1320. return "rgba32f";
  1321. case ImageFormatRgba16f:
  1322. return "rgba16f";
  1323. case ImageFormatR32f:
  1324. return "r32f";
  1325. case ImageFormatRgba8:
  1326. return "rgba8";
  1327. case ImageFormatRgba8Snorm:
  1328. return "rgba8_snorm";
  1329. case ImageFormatRg32f:
  1330. return "rg32f";
  1331. case ImageFormatRg16f:
  1332. return "rg16f";
  1333. case ImageFormatRgba32i:
  1334. return "rgba32i";
  1335. case ImageFormatRgba16i:
  1336. return "rgba16i";
  1337. case ImageFormatR32i:
  1338. return "r32i";
  1339. case ImageFormatRgba8i:
  1340. return "rgba8i";
  1341. case ImageFormatRg32i:
  1342. return "rg32i";
  1343. case ImageFormatRg16i:
  1344. return "rg16i";
  1345. case ImageFormatRgba32ui:
  1346. return "rgba32ui";
  1347. case ImageFormatRgba16ui:
  1348. return "rgba16ui";
  1349. case ImageFormatR32ui:
  1350. return "r32ui";
  1351. case ImageFormatRgba8ui:
  1352. return "rgba8ui";
  1353. case ImageFormatRg32ui:
  1354. return "rg32ui";
  1355. case ImageFormatRg16ui:
  1356. return "rg16ui";
  1357. case ImageFormatR11fG11fB10f:
  1358. return "r11f_g11f_b10f";
  1359. case ImageFormatR16f:
  1360. return "r16f";
  1361. case ImageFormatRgb10A2:
  1362. return "rgb10_a2";
  1363. case ImageFormatR8:
  1364. return "r8";
  1365. case ImageFormatRg8:
  1366. return "rg8";
  1367. case ImageFormatR16:
  1368. return "r16";
  1369. case ImageFormatRg16:
  1370. return "rg16";
  1371. case ImageFormatRgba16:
  1372. return "rgba16";
  1373. case ImageFormatR16Snorm:
  1374. return "r16_snorm";
  1375. case ImageFormatRg16Snorm:
  1376. return "rg16_snorm";
  1377. case ImageFormatRgba16Snorm:
  1378. return "rgba16_snorm";
  1379. case ImageFormatR8Snorm:
  1380. return "r8_snorm";
  1381. case ImageFormatRg8Snorm:
  1382. return "rg8_snorm";
  1383. case ImageFormatR8ui:
  1384. return "r8ui";
  1385. case ImageFormatRg8ui:
  1386. return "rg8ui";
  1387. case ImageFormatR16ui:
  1388. return "r16ui";
  1389. case ImageFormatRgb10a2ui:
  1390. return "rgb10_a2ui";
  1391. case ImageFormatR8i:
  1392. return "r8i";
  1393. case ImageFormatRg8i:
  1394. return "rg8i";
  1395. case ImageFormatR16i:
  1396. return "r16i";
  1397. case ImageFormatR64i:
  1398. return "r64i";
  1399. case ImageFormatR64ui:
  1400. return "r64ui";
  1401. default:
  1402. case ImageFormatUnknown:
  1403. return nullptr;
  1404. }
  1405. }
  1406. uint32_t CompilerGLSL::type_to_packed_base_size(const SPIRType &type, BufferPackingStandard)
  1407. {
  1408. switch (type.basetype)
  1409. {
  1410. case SPIRType::Double:
  1411. case SPIRType::Int64:
  1412. case SPIRType::UInt64:
  1413. return 8;
  1414. case SPIRType::Float:
  1415. case SPIRType::Int:
  1416. case SPIRType::UInt:
  1417. return 4;
  1418. case SPIRType::Half:
  1419. case SPIRType::Short:
  1420. case SPIRType::UShort:
  1421. case SPIRType::BFloat16:
  1422. return 2;
  1423. case SPIRType::SByte:
  1424. case SPIRType::UByte:
  1425. case SPIRType::FloatE4M3:
  1426. case SPIRType::FloatE5M2:
  1427. return 1;
  1428. default:
  1429. SPIRV_CROSS_THROW("Unrecognized type in type_to_packed_base_size.");
  1430. }
  1431. }
  1432. uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, const Bitset &flags,
  1433. BufferPackingStandard packing)
  1434. {
  1435. // If using PhysicalStorageBuffer storage class, this is a pointer,
  1436. // and is 64-bit.
  1437. if (is_physical_pointer(type))
  1438. {
  1439. if (!type.pointer)
  1440. SPIRV_CROSS_THROW("Types in PhysicalStorageBuffer must be pointers.");
  1441. if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64)
  1442. {
  1443. if (packing_is_vec4_padded(packing) && type_is_array_of_pointers(type))
  1444. return 16;
  1445. else
  1446. return 8;
  1447. }
  1448. else
  1449. SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64 must be used for PhysicalStorageBuffer.");
  1450. }
  1451. else if (is_array(type))
  1452. {
  1453. uint32_t minimum_alignment = 1;
  1454. if (packing_is_vec4_padded(packing))
  1455. minimum_alignment = 16;
  1456. auto *tmp = &get<SPIRType>(type.parent_type);
  1457. while (!tmp->array.empty())
  1458. tmp = &get<SPIRType>(tmp->parent_type);
  1459. // Get the alignment of the base type, then maybe round up.
  1460. return max(minimum_alignment, type_to_packed_alignment(*tmp, flags, packing));
  1461. }
  1462. if (type.basetype == SPIRType::Struct)
  1463. {
  1464. // Rule 9. Structs alignments are maximum alignment of its members.
  1465. uint32_t alignment = 1;
  1466. for (uint32_t i = 0; i < type.member_types.size(); i++)
  1467. {
  1468. auto member_flags = ir.meta[type.self].members[i].decoration_flags;
  1469. alignment =
  1470. max(alignment, type_to_packed_alignment(get<SPIRType>(type.member_types[i]), member_flags, packing));
  1471. }
  1472. // In std140, struct alignment is rounded up to 16.
  1473. if (packing_is_vec4_padded(packing))
  1474. alignment = max<uint32_t>(alignment, 16u);
  1475. return alignment;
  1476. }
  1477. else
  1478. {
  1479. const uint32_t base_alignment = type_to_packed_base_size(type, packing);
  1480. // Alignment requirement for scalar block layout is always the alignment for the most basic component.
  1481. if (packing_is_scalar(packing))
  1482. return base_alignment;
  1483. // Vectors are *not* aligned in HLSL, but there's an extra rule where vectors cannot straddle
  1484. // a vec4, this is handled outside since that part knows our current offset.
  1485. if (type.columns == 1 && packing_is_hlsl(packing))
  1486. return base_alignment;
  1487. // From 7.6.2.2 in GL 4.5 core spec.
  1488. // Rule 1
  1489. if (type.vecsize == 1 && type.columns == 1)
  1490. return base_alignment;
  1491. // Rule 2
  1492. if ((type.vecsize == 2 || type.vecsize == 4) && type.columns == 1)
  1493. return type.vecsize * base_alignment;
  1494. // Rule 3
  1495. if (type.vecsize == 3 && type.columns == 1)
  1496. return 4 * base_alignment;
  1497. // Rule 4 implied. Alignment does not change in std430.
  1498. // Rule 5. Column-major matrices are stored as arrays of
  1499. // vectors.
  1500. if (flags.get(DecorationColMajor) && type.columns > 1)
  1501. {
  1502. if (packing_is_vec4_padded(packing))
  1503. return 4 * base_alignment;
  1504. else if (type.vecsize == 3)
  1505. return 4 * base_alignment;
  1506. else
  1507. return type.vecsize * base_alignment;
  1508. }
  1509. // Rule 6 implied.
  1510. // Rule 7.
  1511. if (flags.get(DecorationRowMajor) && type.vecsize > 1)
  1512. {
  1513. if (packing_is_vec4_padded(packing))
  1514. return 4 * base_alignment;
  1515. else if (type.columns == 3)
  1516. return 4 * base_alignment;
  1517. else
  1518. return type.columns * base_alignment;
  1519. }
  1520. // Rule 8 implied.
  1521. }
  1522. SPIRV_CROSS_THROW("Did not find suitable rule for type. Bogus decorations?");
  1523. }
  1524. uint32_t CompilerGLSL::type_to_packed_array_stride(const SPIRType &type, const Bitset &flags,
  1525. BufferPackingStandard packing)
  1526. {
  1527. // Array stride is equal to aligned size of the underlying type.
  1528. uint32_t parent = type.parent_type;
  1529. assert(parent);
  1530. auto &tmp = get<SPIRType>(parent);
  1531. uint32_t size = type_to_packed_size(tmp, flags, packing);
  1532. uint32_t alignment = type_to_packed_alignment(type, flags, packing);
  1533. return (size + alignment - 1) & ~(alignment - 1);
  1534. }
  1535. uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing)
  1536. {
  1537. // If using PhysicalStorageBuffer storage class, this is a pointer,
  1538. // and is 64-bit.
  1539. if (is_physical_pointer(type))
  1540. {
  1541. if (!type.pointer)
  1542. SPIRV_CROSS_THROW("Types in PhysicalStorageBuffer must be pointers.");
  1543. if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64)
  1544. return 8;
  1545. else
  1546. SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64 must be used for PhysicalStorageBuffer.");
  1547. }
  1548. else if (is_array(type))
  1549. {
  1550. uint32_t packed_size = to_array_size_literal(type) * type_to_packed_array_stride(type, flags, packing);
  1551. // For arrays of vectors and matrices in HLSL, the last element has a size which depends on its vector size,
  1552. // so that it is possible to pack other vectors into the last element.
  1553. if (packing_is_hlsl(packing) && type.basetype != SPIRType::Struct)
  1554. packed_size -= (4 - type.vecsize) * (type.width / 8);
  1555. return packed_size;
  1556. }
  1557. uint32_t size = 0;
  1558. if (type.basetype == SPIRType::Struct)
  1559. {
  1560. uint32_t pad_alignment = 1;
  1561. for (uint32_t i = 0; i < type.member_types.size(); i++)
  1562. {
  1563. auto member_flags = ir.meta[type.self].members[i].decoration_flags;
  1564. auto &member_type = get<SPIRType>(type.member_types[i]);
  1565. uint32_t packed_alignment = type_to_packed_alignment(member_type, member_flags, packing);
  1566. uint32_t alignment = max(packed_alignment, pad_alignment);
  1567. // The next member following a struct member is aligned to the base alignment of the struct that came before.
  1568. // GL 4.5 spec, 7.6.2.2.
  1569. if (member_type.basetype == SPIRType::Struct)
  1570. pad_alignment = packed_alignment;
  1571. else
  1572. pad_alignment = 1;
  1573. size = (size + alignment - 1) & ~(alignment - 1);
  1574. size += type_to_packed_size(member_type, member_flags, packing);
  1575. }
  1576. }
  1577. else
  1578. {
  1579. const uint32_t base_alignment = type_to_packed_base_size(type, packing);
  1580. if (packing_is_scalar(packing))
  1581. {
  1582. size = type.vecsize * type.columns * base_alignment;
  1583. }
  1584. else
  1585. {
  1586. if (type.columns == 1)
  1587. size = type.vecsize * base_alignment;
  1588. if (flags.get(DecorationColMajor) && type.columns > 1)
  1589. {
  1590. if (packing_is_vec4_padded(packing))
  1591. size = type.columns * 4 * base_alignment;
  1592. else if (type.vecsize == 3)
  1593. size = type.columns * 4 * base_alignment;
  1594. else
  1595. size = type.columns * type.vecsize * base_alignment;
  1596. }
  1597. if (flags.get(DecorationRowMajor) && type.vecsize > 1)
  1598. {
  1599. if (packing_is_vec4_padded(packing))
  1600. size = type.vecsize * 4 * base_alignment;
  1601. else if (type.columns == 3)
  1602. size = type.vecsize * 4 * base_alignment;
  1603. else
  1604. size = type.vecsize * type.columns * base_alignment;
  1605. }
  1606. // For matrices in HLSL, the last element has a size which depends on its vector size,
  1607. // so that it is possible to pack other vectors into the last element.
  1608. if (packing_is_hlsl(packing) && type.columns > 1)
  1609. size -= (4 - type.vecsize) * (type.width / 8);
  1610. }
  1611. }
  1612. return size;
  1613. }
  1614. bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing,
  1615. uint32_t *failed_validation_index, uint32_t start_offset,
  1616. uint32_t end_offset)
  1617. {
  1618. // This is very tricky and error prone, but try to be exhaustive and correct here.
  1619. // SPIR-V doesn't directly say if we're using std430 or std140.
  1620. // SPIR-V communicates this using Offset and ArrayStride decorations (which is what really matters),
  1621. // so we have to try to infer whether or not the original GLSL source was std140 or std430 based on this information.
  1622. // We do not have to consider shared or packed since these layouts are not allowed in Vulkan SPIR-V (they are useless anyways, and custom offsets would do the same thing).
  1623. //
  1624. // It is almost certain that we're using std430, but it gets tricky with arrays in particular.
  1625. // We will assume std430, but infer std140 if we can prove the struct is not compliant with std430.
  1626. //
  1627. // The only two differences between std140 and std430 are related to padding alignment/array stride
  1628. // in arrays and structs. In std140 they take minimum vec4 alignment.
  1629. // std430 only removes the vec4 requirement.
  1630. uint32_t offset = 0;
  1631. uint32_t pad_alignment = 1;
  1632. bool is_top_level_block =
  1633. has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock);
  1634. for (uint32_t i = 0; i < type.member_types.size(); i++)
  1635. {
  1636. auto &memb_type = get<SPIRType>(type.member_types[i]);
  1637. auto *type_meta = ir.find_meta(type.self);
  1638. auto member_flags = type_meta ? type_meta->members[i].decoration_flags : Bitset{};
  1639. // Verify alignment rules.
  1640. uint32_t packed_alignment = type_to_packed_alignment(memb_type, member_flags, packing);
  1641. // This is a rather dirty workaround to deal with some cases of OpSpecConstantOp used as array size, e.g:
  1642. // layout(constant_id = 0) const int s = 10;
  1643. // const int S = s + 5; // SpecConstantOp
  1644. // buffer Foo { int data[S]; }; // <-- Very hard for us to deduce a fixed value here,
  1645. // we would need full implementation of compile-time constant folding. :(
  1646. // If we are the last member of a struct, there might be cases where the actual size of that member is irrelevant
  1647. // for our analysis (e.g. unsized arrays).
  1648. // This lets us simply ignore that there are spec constant op sized arrays in our buffers.
  1649. // Querying size of this member will fail, so just don't call it unless we have to.
  1650. //
  1651. // This is likely "best effort" we can support without going into unacceptably complicated workarounds.
  1652. bool member_can_be_unsized =
  1653. is_top_level_block && size_t(i + 1) == type.member_types.size() && !memb_type.array.empty();
  1654. uint32_t packed_size = 0;
  1655. if (!member_can_be_unsized || packing_is_hlsl(packing))
  1656. packed_size = type_to_packed_size(memb_type, member_flags, packing);
  1657. // We only need to care about this if we have non-array types which can straddle the vec4 boundary.
  1658. uint32_t actual_offset = type_struct_member_offset(type, i);
  1659. if (packing_is_hlsl(packing))
  1660. {
  1661. // If a member straddles across a vec4 boundary, alignment is actually vec4.
  1662. uint32_t target_offset;
  1663. // If we intend to use explicit packing, we must check for improper straddle with that offset.
  1664. // In implicit packing, we must check with implicit offset, since the explicit offset
  1665. // might have already accounted for the straddle, and we'd miss the alignment promotion to vec4.
  1666. // This is important when packing sub-structs that don't support packoffset().
  1667. if (packing_has_flexible_offset(packing))
  1668. target_offset = actual_offset;
  1669. else
  1670. target_offset = offset;
  1671. uint32_t begin_word = target_offset / 16;
  1672. uint32_t end_word = (target_offset + packed_size - 1) / 16;
  1673. if (begin_word != end_word)
  1674. packed_alignment = max<uint32_t>(packed_alignment, 16u);
  1675. }
  1676. // Field is not in the specified range anymore and we can ignore any further fields.
  1677. if (actual_offset >= end_offset)
  1678. break;
  1679. uint32_t alignment = max(packed_alignment, pad_alignment);
  1680. offset = (offset + alignment - 1) & ~(alignment - 1);
  1681. // The next member following a struct member is aligned to the base alignment of the struct that came before.
  1682. // GL 4.5 spec, 7.6.2.2.
  1683. if (memb_type.basetype == SPIRType::Struct && !memb_type.pointer)
  1684. pad_alignment = packed_alignment;
  1685. else
  1686. pad_alignment = 1;
  1687. // Only care about packing if we are in the given range
  1688. if (actual_offset >= start_offset)
  1689. {
  1690. // We only care about offsets in std140, std430, etc ...
  1691. // For EnhancedLayout variants, we have the flexibility to choose our own offsets.
  1692. if (!packing_has_flexible_offset(packing))
  1693. {
  1694. if (actual_offset != offset) // This cannot be the packing we're looking for.
  1695. {
  1696. if (failed_validation_index)
  1697. *failed_validation_index = i;
  1698. return false;
  1699. }
  1700. }
  1701. else if ((actual_offset & (alignment - 1)) != 0)
  1702. {
  1703. // We still need to verify that alignment rules are observed, even if we have explicit offset.
  1704. if (failed_validation_index)
  1705. *failed_validation_index = i;
  1706. return false;
  1707. }
  1708. // Verify array stride rules.
  1709. if (is_array(memb_type) &&
  1710. type_to_packed_array_stride(memb_type, member_flags, packing) !=
  1711. type_struct_member_array_stride(type, i))
  1712. {
  1713. if (failed_validation_index)
  1714. *failed_validation_index = i;
  1715. return false;
  1716. }
  1717. // Verify that sub-structs also follow packing rules.
  1718. // We cannot use enhanced layouts on substructs, so they better be up to spec.
  1719. auto substruct_packing = packing_to_substruct_packing(packing);
  1720. if (!memb_type.pointer && !memb_type.member_types.empty() &&
  1721. !buffer_is_packing_standard(memb_type, substruct_packing))
  1722. {
  1723. if (failed_validation_index)
  1724. *failed_validation_index = i;
  1725. return false;
  1726. }
  1727. }
  1728. // Bump size.
  1729. offset = actual_offset + packed_size;
  1730. }
  1731. return true;
  1732. }
  1733. bool CompilerGLSL::can_use_io_location(StorageClass storage, bool block)
  1734. {
  1735. // Location specifiers are must have in SPIR-V, but they aren't really supported in earlier versions of GLSL.
  1736. // Be very explicit here about how to solve the issue.
  1737. if ((get_execution_model() != ExecutionModelVertex && storage == StorageClassInput) ||
  1738. (get_execution_model() != ExecutionModelFragment && storage == StorageClassOutput))
  1739. {
  1740. uint32_t minimum_desktop_version = block ? 440 : 410;
  1741. // ARB_enhanced_layouts vs ARB_separate_shader_objects ...
  1742. if (!options.es && options.version < minimum_desktop_version && !options.separate_shader_objects)
  1743. return false;
  1744. else if (options.es && options.version < 310)
  1745. return false;
  1746. }
  1747. if ((get_execution_model() == ExecutionModelVertex && storage == StorageClassInput) ||
  1748. (get_execution_model() == ExecutionModelFragment && storage == StorageClassOutput))
  1749. {
  1750. if (options.es && options.version < 300)
  1751. return false;
  1752. else if (!options.es && options.version < 330)
  1753. return false;
  1754. }
  1755. if (storage == StorageClassUniform || storage == StorageClassUniformConstant || storage == StorageClassPushConstant)
  1756. {
  1757. if (options.es && options.version < 310)
  1758. return false;
  1759. else if (!options.es && options.version < 430)
  1760. return false;
  1761. }
  1762. return true;
  1763. }
  1764. string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
  1765. {
  1766. // FIXME: Come up with a better solution for when to disable layouts.
  1767. // Having layouts depend on extensions as well as which types
  1768. // of layouts are used. For now, the simple solution is to just disable
  1769. // layouts for legacy versions.
  1770. if (is_legacy())
  1771. return "";
  1772. if (subpass_input_is_framebuffer_fetch(var.self))
  1773. return "";
  1774. SmallVector<string> attr;
  1775. auto &type = get<SPIRType>(var.basetype);
  1776. auto &flags = get_decoration_bitset(var.self);
  1777. auto &typeflags = get_decoration_bitset(type.self);
  1778. if (flags.get(DecorationPassthroughNV))
  1779. attr.push_back("passthrough");
  1780. if (options.vulkan_semantics && var.storage == StorageClassPushConstant)
  1781. attr.push_back("push_constant");
  1782. else if (var.storage == StorageClassShaderRecordBufferKHR)
  1783. attr.push_back(ray_tracing_is_khr ? "shaderRecordEXT" : "shaderRecordNV");
  1784. if (flags.get(DecorationRowMajor))
  1785. attr.push_back("row_major");
  1786. if (flags.get(DecorationColMajor))
  1787. attr.push_back("column_major");
  1788. if (options.vulkan_semantics)
  1789. {
  1790. if (flags.get(DecorationInputAttachmentIndex))
  1791. attr.push_back(join("input_attachment_index = ", get_decoration(var.self, DecorationInputAttachmentIndex)));
  1792. }
  1793. bool is_block = has_decoration(type.self, DecorationBlock);
  1794. if (flags.get(DecorationLocation) && can_use_io_location(var.storage, is_block))
  1795. {
  1796. Bitset combined_decoration;
  1797. for (uint32_t i = 0; i < ir.meta[type.self].members.size(); i++)
  1798. combined_decoration.merge_or(combined_decoration_for_member(type, i));
  1799. // If our members have location decorations, we don't need to
  1800. // emit location decorations at the top as well (looks weird).
  1801. if (!combined_decoration.get(DecorationLocation))
  1802. attr.push_back(join("location = ", get_decoration(var.self, DecorationLocation)));
  1803. }
  1804. if (get_execution_model() == ExecutionModelFragment && var.storage == StorageClassOutput &&
  1805. location_is_non_coherent_framebuffer_fetch(get_decoration(var.self, DecorationLocation)))
  1806. {
  1807. attr.push_back("noncoherent");
  1808. }
  1809. // Transform feedback
  1810. bool uses_enhanced_layouts = false;
  1811. if (is_block && var.storage == StorageClassOutput)
  1812. {
  1813. // For blocks, there is a restriction where xfb_stride/xfb_buffer must only be declared on the block itself,
  1814. // since all members must match the same xfb_buffer. The only thing we will declare for members of the block
  1815. // is the xfb_offset.
  1816. uint32_t member_count = uint32_t(type.member_types.size());
  1817. bool have_xfb_buffer_stride = false;
  1818. bool have_any_xfb_offset = false;
  1819. bool have_geom_stream = false;
  1820. uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0;
  1821. if (flags.get(DecorationXfbBuffer) && flags.get(DecorationXfbStride))
  1822. {
  1823. have_xfb_buffer_stride = true;
  1824. xfb_buffer = get_decoration(var.self, DecorationXfbBuffer);
  1825. xfb_stride = get_decoration(var.self, DecorationXfbStride);
  1826. }
  1827. if (flags.get(DecorationStream))
  1828. {
  1829. have_geom_stream = true;
  1830. geom_stream = get_decoration(var.self, DecorationStream);
  1831. }
  1832. // Verify that none of the members violate our assumption.
  1833. for (uint32_t i = 0; i < member_count; i++)
  1834. {
  1835. if (has_member_decoration(type.self, i, DecorationStream))
  1836. {
  1837. uint32_t member_geom_stream = get_member_decoration(type.self, i, DecorationStream);
  1838. if (have_geom_stream && member_geom_stream != geom_stream)
  1839. SPIRV_CROSS_THROW("IO block member Stream mismatch.");
  1840. have_geom_stream = true;
  1841. geom_stream = member_geom_stream;
  1842. }
  1843. // Only members with an Offset decoration participate in XFB.
  1844. if (!has_member_decoration(type.self, i, DecorationOffset))
  1845. continue;
  1846. have_any_xfb_offset = true;
  1847. if (has_member_decoration(type.self, i, DecorationXfbBuffer))
  1848. {
  1849. uint32_t buffer_index = get_member_decoration(type.self, i, DecorationXfbBuffer);
  1850. if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
  1851. SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
  1852. have_xfb_buffer_stride = true;
  1853. xfb_buffer = buffer_index;
  1854. }
  1855. if (has_member_decoration(type.self, i, DecorationXfbStride))
  1856. {
  1857. uint32_t stride = get_member_decoration(type.self, i, DecorationXfbStride);
  1858. if (have_xfb_buffer_stride && stride != xfb_stride)
  1859. SPIRV_CROSS_THROW("IO block member XfbStride mismatch.");
  1860. have_xfb_buffer_stride = true;
  1861. xfb_stride = stride;
  1862. }
  1863. }
  1864. if (have_xfb_buffer_stride && have_any_xfb_offset)
  1865. {
  1866. attr.push_back(join("xfb_buffer = ", xfb_buffer));
  1867. attr.push_back(join("xfb_stride = ", xfb_stride));
  1868. uses_enhanced_layouts = true;
  1869. }
  1870. if (have_geom_stream)
  1871. {
  1872. if (get_execution_model() != ExecutionModelGeometry)
  1873. SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
  1874. if (options.es)
  1875. SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
  1876. if (options.version < 400)
  1877. require_extension_internal("GL_ARB_transform_feedback3");
  1878. attr.push_back(join("stream = ", get_decoration(var.self, DecorationStream)));
  1879. }
  1880. }
  1881. else if (var.storage == StorageClassOutput)
  1882. {
  1883. if (flags.get(DecorationXfbBuffer) && flags.get(DecorationXfbStride) && flags.get(DecorationOffset))
  1884. {
  1885. // XFB for standalone variables, we can emit all decorations.
  1886. attr.push_back(join("xfb_buffer = ", get_decoration(var.self, DecorationXfbBuffer)));
  1887. attr.push_back(join("xfb_stride = ", get_decoration(var.self, DecorationXfbStride)));
  1888. attr.push_back(join("xfb_offset = ", get_decoration(var.self, DecorationOffset)));
  1889. uses_enhanced_layouts = true;
  1890. }
  1891. if (flags.get(DecorationStream))
  1892. {
  1893. if (get_execution_model() != ExecutionModelGeometry)
  1894. SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
  1895. if (options.es)
  1896. SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
  1897. if (options.version < 400)
  1898. require_extension_internal("GL_ARB_transform_feedback3");
  1899. attr.push_back(join("stream = ", get_decoration(var.self, DecorationStream)));
  1900. }
  1901. }
  1902. // Can only declare Component if we can declare location.
  1903. if (flags.get(DecorationComponent) && can_use_io_location(var.storage, is_block))
  1904. {
  1905. uses_enhanced_layouts = true;
  1906. attr.push_back(join("component = ", get_decoration(var.self, DecorationComponent)));
  1907. }
  1908. if (uses_enhanced_layouts)
  1909. {
  1910. if (!options.es)
  1911. {
  1912. if (options.version < 440 && options.version >= 140)
  1913. require_extension_internal("GL_ARB_enhanced_layouts");
  1914. else if (options.version < 140)
  1915. SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in targets below GLSL 1.40.");
  1916. if (!options.es && options.version < 440)
  1917. require_extension_internal("GL_ARB_enhanced_layouts");
  1918. }
  1919. else if (options.es)
  1920. SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in ESSL.");
  1921. }
  1922. if (flags.get(DecorationIndex))
  1923. attr.push_back(join("index = ", get_decoration(var.self, DecorationIndex)));
  1924. // Do not emit set = decoration in regular GLSL output, but
  1925. // we need to preserve it in Vulkan GLSL mode.
  1926. if (var.storage != StorageClassPushConstant && var.storage != StorageClassShaderRecordBufferKHR)
  1927. {
  1928. if (flags.get(DecorationDescriptorSet) && options.vulkan_semantics)
  1929. attr.push_back(join("set = ", get_decoration(var.self, DecorationDescriptorSet)));
  1930. }
  1931. bool push_constant_block = options.vulkan_semantics && var.storage == StorageClassPushConstant;
  1932. bool ssbo_block = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferKHR ||
  1933. (var.storage == StorageClassUniform && typeflags.get(DecorationBufferBlock));
  1934. bool emulated_ubo = var.storage == StorageClassPushConstant && options.emit_push_constant_as_uniform_buffer;
  1935. bool ubo_block = var.storage == StorageClassUniform && typeflags.get(DecorationBlock);
  1936. // GL 3.0/GLSL 1.30 is not considered legacy, but it doesn't have UBOs ...
  1937. bool can_use_buffer_blocks = (options.es && options.version >= 300) || (!options.es && options.version >= 140);
  1938. // pretend no UBOs when options say so
  1939. if (ubo_block && options.emit_uniform_buffer_as_plain_uniforms)
  1940. can_use_buffer_blocks = false;
  1941. bool can_use_binding;
  1942. if (options.es)
  1943. can_use_binding = options.version >= 310;
  1944. else
  1945. can_use_binding = options.enable_420pack_extension || (options.version >= 420);
  1946. // Make sure we don't emit binding layout for a classic uniform on GLSL 1.30.
  1947. if (!can_use_buffer_blocks && var.storage == StorageClassUniform)
  1948. can_use_binding = false;
  1949. if (var.storage == StorageClassShaderRecordBufferKHR)
  1950. can_use_binding = false;
  1951. if (can_use_binding && flags.get(DecorationBinding))
  1952. attr.push_back(join("binding = ", get_decoration(var.self, DecorationBinding)));
  1953. if (var.storage != StorageClassOutput && flags.get(DecorationOffset))
  1954. attr.push_back(join("offset = ", get_decoration(var.self, DecorationOffset)));
  1955. // Instead of adding explicit offsets for every element here, just assume we're using std140 or std430.
  1956. // If SPIR-V does not comply with either layout, we cannot really work around it.
  1957. if (can_use_buffer_blocks && (ubo_block || emulated_ubo))
  1958. {
  1959. attr.push_back(buffer_to_packing_standard(type, false, true));
  1960. }
  1961. else if (can_use_buffer_blocks && (push_constant_block || ssbo_block))
  1962. {
  1963. attr.push_back(buffer_to_packing_standard(type, true, true));
  1964. }
  1965. // For images, the type itself adds a layout qualifer.
  1966. // Only emit the format for storage images.
  1967. if (type.basetype == SPIRType::Image && type.image.sampled == 2)
  1968. {
  1969. const char *fmt = format_to_glsl(type.image.format);
  1970. if (fmt)
  1971. attr.push_back(fmt);
  1972. }
  1973. if (attr.empty())
  1974. return "";
  1975. string res = "layout(";
  1976. res += merge(attr);
  1977. res += ") ";
  1978. return res;
  1979. }
  1980. string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type,
  1981. bool support_std430_without_scalar_layout,
  1982. bool support_enhanced_layouts)
  1983. {
  1984. if (support_std430_without_scalar_layout && buffer_is_packing_standard(type, BufferPackingStd430))
  1985. return "std430";
  1986. else if (buffer_is_packing_standard(type, BufferPackingStd140))
  1987. return "std140";
  1988. else if (options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingScalar))
  1989. {
  1990. require_extension_internal("GL_EXT_scalar_block_layout");
  1991. return "scalar";
  1992. }
  1993. else if (support_std430_without_scalar_layout &&
  1994. support_enhanced_layouts &&
  1995. buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout))
  1996. {
  1997. if (options.es && !options.vulkan_semantics)
  1998. SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
  1999. "not support GL_ARB_enhanced_layouts.");
  2000. if (!options.es && !options.vulkan_semantics && options.version < 440)
  2001. require_extension_internal("GL_ARB_enhanced_layouts");
  2002. set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
  2003. return "std430";
  2004. }
  2005. else if (support_enhanced_layouts &&
  2006. buffer_is_packing_standard(type, BufferPackingStd140EnhancedLayout))
  2007. {
  2008. // Fallback time. We might be able to use the ARB_enhanced_layouts to deal with this difference,
  2009. // however, we can only use layout(offset) on the block itself, not any substructs, so the substructs better be the appropriate layout.
  2010. // Enhanced layouts seem to always work in Vulkan GLSL, so no need for extensions there.
  2011. if (options.es && !options.vulkan_semantics)
  2012. SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
  2013. "not support GL_ARB_enhanced_layouts.");
  2014. if (!options.es && !options.vulkan_semantics && options.version < 440)
  2015. require_extension_internal("GL_ARB_enhanced_layouts");
  2016. set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
  2017. return "std140";
  2018. }
  2019. else if (options.vulkan_semantics &&
  2020. support_enhanced_layouts &&
  2021. buffer_is_packing_standard(type, BufferPackingScalarEnhancedLayout))
  2022. {
  2023. set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
  2024. require_extension_internal("GL_EXT_scalar_block_layout");
  2025. return "scalar";
  2026. }
  2027. else if (!support_std430_without_scalar_layout && options.vulkan_semantics &&
  2028. buffer_is_packing_standard(type, BufferPackingStd430))
  2029. {
  2030. // UBOs can support std430 with GL_EXT_scalar_block_layout.
  2031. require_extension_internal("GL_EXT_scalar_block_layout");
  2032. return "std430";
  2033. }
  2034. else if (!support_std430_without_scalar_layout && options.vulkan_semantics &&
  2035. support_enhanced_layouts &&
  2036. buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout))
  2037. {
  2038. // UBOs can support std430 with GL_EXT_scalar_block_layout.
  2039. set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
  2040. require_extension_internal("GL_EXT_scalar_block_layout");
  2041. return "std430";
  2042. }
  2043. else
  2044. {
  2045. SPIRV_CROSS_THROW("Buffer block cannot be expressed as any of std430, std140, scalar, even with enhanced "
  2046. "layouts. You can try flattening this block to support a more flexible layout.");
  2047. }
  2048. }
  2049. void CompilerGLSL::emit_push_constant_block(const SPIRVariable &var)
  2050. {
  2051. if (flattened_buffer_blocks.count(var.self))
  2052. emit_buffer_block_flattened(var);
  2053. else if (options.vulkan_semantics)
  2054. emit_push_constant_block_vulkan(var);
  2055. else if (options.emit_push_constant_as_uniform_buffer)
  2056. emit_buffer_block_native(var);
  2057. else
  2058. emit_push_constant_block_glsl(var);
  2059. }
  2060. void CompilerGLSL::emit_push_constant_block_vulkan(const SPIRVariable &var)
  2061. {
  2062. emit_buffer_block(var);
  2063. }
  2064. void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var)
  2065. {
  2066. // OpenGL has no concept of push constant blocks, implement it as a uniform struct.
  2067. auto &type = get<SPIRType>(var.basetype);
  2068. unset_decoration(var.self, DecorationBinding);
  2069. unset_decoration(var.self, DecorationDescriptorSet);
  2070. #if 0
  2071. if (flags & ((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet)))
  2072. SPIRV_CROSS_THROW("Push constant blocks cannot be compiled to GLSL with Binding or Set syntax. "
  2073. "Remap to location with reflection API first or disable these decorations.");
  2074. #endif
  2075. // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
  2076. // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
  2077. bool block_flag = has_decoration(type.self, DecorationBlock);
  2078. unset_decoration(type.self, DecorationBlock);
  2079. emit_struct(type);
  2080. if (block_flag)
  2081. set_decoration(type.self, DecorationBlock);
  2082. emit_uniform(var);
  2083. statement("");
  2084. }
  2085. void CompilerGLSL::emit_buffer_block(const SPIRVariable &var)
  2086. {
  2087. auto &type = get<SPIRType>(var.basetype);
  2088. bool ubo_block = var.storage == StorageClassUniform && has_decoration(type.self, DecorationBlock);
  2089. if (flattened_buffer_blocks.count(var.self))
  2090. emit_buffer_block_flattened(var);
  2091. else if (is_legacy() || (!options.es && options.version == 130) ||
  2092. (ubo_block && options.emit_uniform_buffer_as_plain_uniforms))
  2093. emit_buffer_block_legacy(var);
  2094. else
  2095. emit_buffer_block_native(var);
  2096. }
  2097. void CompilerGLSL::emit_buffer_block_legacy(const SPIRVariable &var)
  2098. {
  2099. auto &type = get<SPIRType>(var.basetype);
  2100. bool ssbo = var.storage == StorageClassStorageBuffer ||
  2101. ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
  2102. if (ssbo)
  2103. SPIRV_CROSS_THROW("SSBOs not supported in legacy targets.");
  2104. // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
  2105. // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
  2106. auto &block_flags = ir.meta[type.self].decoration.decoration_flags;
  2107. bool block_flag = block_flags.get(DecorationBlock);
  2108. block_flags.clear(DecorationBlock);
  2109. emit_struct(type);
  2110. if (block_flag)
  2111. block_flags.set(DecorationBlock);
  2112. emit_uniform(var);
  2113. statement("");
  2114. }
  2115. void CompilerGLSL::emit_buffer_reference_block(uint32_t type_id, bool forward_declaration)
  2116. {
  2117. auto &type = get<SPIRType>(type_id);
  2118. string buffer_name;
  2119. if (forward_declaration && is_physical_pointer_to_buffer_block(type))
  2120. {
  2121. // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ...
  2122. // Allow aliased name since we might be declaring the block twice. Once with buffer reference (forward declared) and one proper declaration.
  2123. // The names must match up.
  2124. buffer_name = to_name(type.self, false);
  2125. // Shaders never use the block by interface name, so we don't
  2126. // have to track this other than updating name caches.
  2127. // If we have a collision for any reason, just fallback immediately.
  2128. if (ir.meta[type.self].decoration.alias.empty() ||
  2129. block_ssbo_names.find(buffer_name) != end(block_ssbo_names) ||
  2130. resource_names.find(buffer_name) != end(resource_names))
  2131. {
  2132. buffer_name = join("_", type.self);
  2133. }
  2134. // Make sure we get something unique for both global name scope and block name scope.
  2135. // See GLSL 4.5 spec: section 4.3.9 for details.
  2136. add_variable(block_ssbo_names, resource_names, buffer_name);
  2137. // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
  2138. // This cannot conflict with anything else, so we're safe now.
  2139. // We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope.
  2140. if (buffer_name.empty())
  2141. buffer_name = join("_", type.self);
  2142. block_names.insert(buffer_name);
  2143. block_ssbo_names.insert(buffer_name);
  2144. // Ensure we emit the correct name when emitting non-forward pointer type.
  2145. ir.meta[type.self].decoration.alias = buffer_name;
  2146. }
  2147. else
  2148. {
  2149. buffer_name = type_to_glsl(type);
  2150. }
  2151. if (!forward_declaration)
  2152. {
  2153. auto itr = physical_storage_type_to_alignment.find(type_id);
  2154. uint32_t alignment = 0;
  2155. if (itr != physical_storage_type_to_alignment.end())
  2156. alignment = itr->second.alignment;
  2157. if (is_physical_pointer_to_buffer_block(type))
  2158. {
  2159. SmallVector<std::string> attributes;
  2160. attributes.push_back("buffer_reference");
  2161. if (alignment)
  2162. attributes.push_back(join("buffer_reference_align = ", alignment));
  2163. attributes.push_back(buffer_to_packing_standard(type, true, true));
  2164. auto flags = ir.get_buffer_block_type_flags(type);
  2165. string decorations;
  2166. if (flags.get(DecorationRestrict))
  2167. decorations += " restrict";
  2168. if (flags.get(DecorationCoherent))
  2169. decorations += " coherent";
  2170. if (flags.get(DecorationNonReadable))
  2171. decorations += " writeonly";
  2172. if (flags.get(DecorationNonWritable))
  2173. decorations += " readonly";
  2174. statement("layout(", merge(attributes), ")", decorations, " buffer ", buffer_name);
  2175. }
  2176. else
  2177. {
  2178. string packing_standard;
  2179. if (type.basetype == SPIRType::Struct)
  2180. {
  2181. // The non-block type is embedded in a block, so we cannot use enhanced layouts :(
  2182. packing_standard = buffer_to_packing_standard(type, true, false) + ", ";
  2183. }
  2184. else if (is_array(get_pointee_type(type)))
  2185. {
  2186. SPIRType wrap_type{OpTypeStruct};
  2187. wrap_type.self = ir.increase_bound_by(1);
  2188. wrap_type.member_types.push_back(get_pointee_type_id(type_id));
  2189. ir.set_member_decoration(wrap_type.self, 0, DecorationOffset, 0);
  2190. packing_standard = buffer_to_packing_standard(wrap_type, true, false) + ", ";
  2191. }
  2192. if (alignment)
  2193. statement("layout(", packing_standard, "buffer_reference, buffer_reference_align = ", alignment, ") buffer ", buffer_name);
  2194. else
  2195. statement("layout(", packing_standard, "buffer_reference) buffer ", buffer_name);
  2196. }
  2197. begin_scope();
  2198. if (is_physical_pointer_to_buffer_block(type))
  2199. {
  2200. type.member_name_cache.clear();
  2201. uint32_t i = 0;
  2202. for (auto &member : type.member_types)
  2203. {
  2204. add_member_name(type, i);
  2205. emit_struct_member(type, member, i);
  2206. i++;
  2207. }
  2208. }
  2209. else
  2210. {
  2211. auto &pointee_type = get_pointee_type(type);
  2212. statement(type_to_glsl(pointee_type), " value", type_to_array_glsl(pointee_type, 0), ";");
  2213. }
  2214. end_scope_decl();
  2215. statement("");
  2216. }
  2217. else
  2218. {
  2219. statement("layout(buffer_reference) buffer ", buffer_name, ";");
  2220. }
  2221. }
  2222. void CompilerGLSL::emit_buffer_block_native(const SPIRVariable &var)
  2223. {
  2224. auto &type = get<SPIRType>(var.basetype);
  2225. Bitset flags = ir.get_buffer_block_flags(var);
  2226. bool ssbo = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferKHR ||
  2227. ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
  2228. bool is_restrict = ssbo && flags.get(DecorationRestrict);
  2229. bool is_writeonly = ssbo && flags.get(DecorationNonReadable);
  2230. bool is_readonly = ssbo && flags.get(DecorationNonWritable);
  2231. bool is_coherent = ssbo && flags.get(DecorationCoherent);
  2232. // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ...
  2233. auto buffer_name = to_name(type.self, false);
  2234. auto &block_namespace = ssbo ? block_ssbo_names : block_ubo_names;
  2235. // Shaders never use the block by interface name, so we don't
  2236. // have to track this other than updating name caches.
  2237. // If we have a collision for any reason, just fallback immediately.
  2238. if (ir.meta[type.self].decoration.alias.empty() || block_namespace.find(buffer_name) != end(block_namespace) ||
  2239. resource_names.find(buffer_name) != end(resource_names))
  2240. {
  2241. buffer_name = get_block_fallback_name(var.self);
  2242. }
  2243. // Make sure we get something unique for both global name scope and block name scope.
  2244. // See GLSL 4.5 spec: section 4.3.9 for details.
  2245. add_variable(block_namespace, resource_names, buffer_name);
  2246. // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
  2247. // This cannot conflict with anything else, so we're safe now.
  2248. // We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope.
  2249. if (buffer_name.empty())
  2250. buffer_name = join("_", get<SPIRType>(var.basetype).self, "_", var.self);
  2251. block_names.insert(buffer_name);
  2252. block_namespace.insert(buffer_name);
  2253. // Save for post-reflection later.
  2254. declared_block_names[var.self] = buffer_name;
  2255. statement(layout_for_variable(var), is_coherent ? "coherent " : "", is_restrict ? "restrict " : "",
  2256. is_writeonly ? "writeonly " : "", is_readonly ? "readonly " : "", ssbo ? "buffer " : "uniform ",
  2257. buffer_name);
  2258. begin_scope();
  2259. type.member_name_cache.clear();
  2260. uint32_t i = 0;
  2261. for (auto &member : type.member_types)
  2262. {
  2263. add_member_name(type, i);
  2264. emit_struct_member(type, member, i);
  2265. i++;
  2266. }
  2267. // Don't declare empty blocks in GLSL, this is not allowed.
  2268. if (type_is_empty(type) && !backend.supports_empty_struct)
  2269. statement("int empty_struct_member;");
  2270. // var.self can be used as a backup name for the block name,
  2271. // so we need to make sure we don't disturb the name here on a recompile.
  2272. // It will need to be reset if we have to recompile.
  2273. preserve_alias_on_reset(var.self);
  2274. add_resource_name(var.self);
  2275. end_scope_decl(to_name(var.self) + type_to_array_glsl(type, var.self));
  2276. statement("");
  2277. }
  2278. void CompilerGLSL::emit_buffer_block_flattened(const SPIRVariable &var)
  2279. {
  2280. auto &type = get<SPIRType>(var.basetype);
  2281. // Block names should never alias.
  2282. auto buffer_name = to_name(type.self, false);
  2283. size_t buffer_size = (get_declared_struct_size(type) + 15) / 16;
  2284. SPIRType::BaseType basic_type;
  2285. if (get_common_basic_type(type, basic_type))
  2286. {
  2287. SPIRType tmp { OpTypeVector };
  2288. tmp.basetype = basic_type;
  2289. tmp.vecsize = 4;
  2290. if (basic_type != SPIRType::Float && basic_type != SPIRType::Int && basic_type != SPIRType::UInt)
  2291. SPIRV_CROSS_THROW("Basic types in a flattened UBO must be float, int or uint.");
  2292. auto flags = ir.get_buffer_block_flags(var);
  2293. statement("uniform ", flags_to_qualifiers_glsl(tmp, 0, flags), type_to_glsl(tmp), " ", buffer_name, "[",
  2294. buffer_size, "];");
  2295. }
  2296. else
  2297. SPIRV_CROSS_THROW("All basic types in a flattened block must be the same.");
  2298. }
  2299. const char *CompilerGLSL::to_storage_qualifiers_glsl(const SPIRVariable &var)
  2300. {
  2301. auto &execution = get_entry_point();
  2302. if (subpass_input_is_framebuffer_fetch(var.self))
  2303. return "";
  2304. if (var.storage == StorageClassInput || var.storage == StorageClassOutput)
  2305. {
  2306. if (is_legacy() && execution.model == ExecutionModelVertex)
  2307. return var.storage == StorageClassInput ? "attribute " : "varying ";
  2308. else if (is_legacy() && execution.model == ExecutionModelFragment)
  2309. return "varying "; // Fragment outputs are renamed so they never hit this case.
  2310. else if (execution.model == ExecutionModelFragment && var.storage == StorageClassOutput)
  2311. {
  2312. uint32_t loc = get_decoration(var.self, DecorationLocation);
  2313. bool is_inout = location_is_framebuffer_fetch(loc);
  2314. if (is_inout)
  2315. return "inout ";
  2316. else
  2317. return "out ";
  2318. }
  2319. else
  2320. return var.storage == StorageClassInput ? "in " : "out ";
  2321. }
  2322. else if (var.storage == StorageClassUniformConstant || var.storage == StorageClassUniform ||
  2323. var.storage == StorageClassPushConstant || var.storage == StorageClassAtomicCounter)
  2324. {
  2325. return "uniform ";
  2326. }
  2327. else if (var.storage == StorageClassRayPayloadKHR)
  2328. {
  2329. return ray_tracing_is_khr ? "rayPayloadEXT " : "rayPayloadNV ";
  2330. }
  2331. else if (var.storage == StorageClassIncomingRayPayloadKHR)
  2332. {
  2333. return ray_tracing_is_khr ? "rayPayloadInEXT " : "rayPayloadInNV ";
  2334. }
  2335. else if (var.storage == StorageClassHitAttributeKHR)
  2336. {
  2337. return ray_tracing_is_khr ? "hitAttributeEXT " : "hitAttributeNV ";
  2338. }
  2339. else if (var.storage == StorageClassCallableDataKHR)
  2340. {
  2341. return ray_tracing_is_khr ? "callableDataEXT " : "callableDataNV ";
  2342. }
  2343. else if (var.storage == StorageClassIncomingCallableDataKHR)
  2344. {
  2345. return ray_tracing_is_khr ? "callableDataInEXT " : "callableDataInNV ";
  2346. }
  2347. return "";
  2348. }
  2349. void CompilerGLSL::emit_flattened_io_block_member(const std::string &basename, const SPIRType &type, const char *qual,
  2350. const SmallVector<uint32_t> &indices)
  2351. {
  2352. uint32_t member_type_id = type.self;
  2353. const SPIRType *member_type = &type;
  2354. const SPIRType *parent_type = nullptr;
  2355. auto flattened_name = basename;
  2356. for (auto &index : indices)
  2357. {
  2358. flattened_name += "_";
  2359. flattened_name += to_member_name(*member_type, index);
  2360. parent_type = member_type;
  2361. member_type_id = member_type->member_types[index];
  2362. member_type = &get<SPIRType>(member_type_id);
  2363. }
  2364. assert(member_type->basetype != SPIRType::Struct);
  2365. // We're overriding struct member names, so ensure we do so on the primary type.
  2366. if (parent_type->type_alias)
  2367. parent_type = &get<SPIRType>(parent_type->type_alias);
  2368. // Sanitize underscores because joining the two identifiers might create more than 1 underscore in a row,
  2369. // which is not allowed.
  2370. ParsedIR::sanitize_underscores(flattened_name);
  2371. uint32_t last_index = indices.back();
  2372. // Pass in the varying qualifier here so it will appear in the correct declaration order.
  2373. // Replace member name while emitting it so it encodes both struct name and member name.
  2374. auto backup_name = get_member_name(parent_type->self, last_index);
  2375. auto member_name = to_member_name(*parent_type, last_index);
  2376. set_member_name(parent_type->self, last_index, flattened_name);
  2377. emit_struct_member(*parent_type, member_type_id, last_index, qual);
  2378. // Restore member name.
  2379. set_member_name(parent_type->self, last_index, member_name);
  2380. }
  2381. void CompilerGLSL::emit_flattened_io_block_struct(const std::string &basename, const SPIRType &type, const char *qual,
  2382. const SmallVector<uint32_t> &indices)
  2383. {
  2384. auto sub_indices = indices;
  2385. sub_indices.push_back(0);
  2386. const SPIRType *member_type = &type;
  2387. for (auto &index : indices)
  2388. member_type = &get<SPIRType>(member_type->member_types[index]);
  2389. assert(member_type->basetype == SPIRType::Struct);
  2390. if (!member_type->array.empty())
  2391. SPIRV_CROSS_THROW("Cannot flatten array of structs in I/O blocks.");
  2392. for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++)
  2393. {
  2394. sub_indices.back() = i;
  2395. if (get<SPIRType>(member_type->member_types[i]).basetype == SPIRType::Struct)
  2396. emit_flattened_io_block_struct(basename, type, qual, sub_indices);
  2397. else
  2398. emit_flattened_io_block_member(basename, type, qual, sub_indices);
  2399. }
  2400. }
  2401. void CompilerGLSL::emit_flattened_io_block(const SPIRVariable &var, const char *qual)
  2402. {
  2403. auto &var_type = get<SPIRType>(var.basetype);
  2404. if (!var_type.array.empty())
  2405. SPIRV_CROSS_THROW("Array of varying structs cannot be flattened to legacy-compatible varyings.");
  2406. // Emit flattened types based on the type alias. Normally, we are never supposed to emit
  2407. // struct declarations for aliased types.
  2408. auto &type = var_type.type_alias ? get<SPIRType>(var_type.type_alias) : var_type;
  2409. auto old_flags = ir.meta[type.self].decoration.decoration_flags;
  2410. // Emit the members as if they are part of a block to get all qualifiers.
  2411. ir.meta[type.self].decoration.decoration_flags.set(DecorationBlock);
  2412. type.member_name_cache.clear();
  2413. SmallVector<uint32_t> member_indices;
  2414. member_indices.push_back(0);
  2415. auto basename = to_name(var.self);
  2416. uint32_t i = 0;
  2417. for (auto &member : type.member_types)
  2418. {
  2419. add_member_name(type, i);
  2420. auto &membertype = get<SPIRType>(member);
  2421. member_indices.back() = i;
  2422. if (membertype.basetype == SPIRType::Struct)
  2423. emit_flattened_io_block_struct(basename, type, qual, member_indices);
  2424. else
  2425. emit_flattened_io_block_member(basename, type, qual, member_indices);
  2426. i++;
  2427. }
  2428. ir.meta[type.self].decoration.decoration_flags = old_flags;
  2429. // Treat this variable as fully flattened from now on.
  2430. flattened_structs[var.self] = true;
  2431. }
  2432. void CompilerGLSL::emit_interface_block(const SPIRVariable &var)
  2433. {
  2434. auto &type = get<SPIRType>(var.basetype);
  2435. if (var.storage == StorageClassInput && type.basetype == SPIRType::Double &&
  2436. !options.es && options.version < 410)
  2437. {
  2438. require_extension_internal("GL_ARB_vertex_attrib_64bit");
  2439. }
  2440. // Either make it plain in/out or in/out blocks depending on what shader is doing ...
  2441. bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock);
  2442. const char *qual = to_storage_qualifiers_glsl(var);
  2443. if (block)
  2444. {
  2445. // ESSL earlier than 310 and GLSL earlier than 150 did not support
  2446. // I/O variables which are struct types.
  2447. // To support this, flatten the struct into separate varyings instead.
  2448. if (options.force_flattened_io_blocks || (options.es && options.version < 310) ||
  2449. (!options.es && options.version < 150))
  2450. {
  2451. // I/O blocks on ES require version 310 with Android Extension Pack extensions, or core version 320.
  2452. // On desktop, I/O blocks were introduced with geometry shaders in GL 3.2 (GLSL 150).
  2453. emit_flattened_io_block(var, qual);
  2454. }
  2455. else
  2456. {
  2457. if (options.es && options.version < 320)
  2458. {
  2459. // Geometry and tessellation extensions imply this extension.
  2460. if (!has_extension("GL_EXT_geometry_shader") && !has_extension("GL_EXT_tessellation_shader"))
  2461. require_extension_internal("GL_EXT_shader_io_blocks");
  2462. }
  2463. // Workaround to make sure we can emit "patch in/out" correctly.
  2464. fixup_io_block_patch_primitive_qualifiers(var);
  2465. // Block names should never alias.
  2466. auto block_name = to_name(type.self, false);
  2467. // The namespace for I/O blocks is separate from other variables in GLSL.
  2468. auto &block_namespace = type.storage == StorageClassInput ? block_input_names : block_output_names;
  2469. // Shaders never use the block by interface name, so we don't
  2470. // have to track this other than updating name caches.
  2471. if (block_name.empty() || block_namespace.find(block_name) != end(block_namespace))
  2472. block_name = get_fallback_name(type.self);
  2473. else
  2474. block_namespace.insert(block_name);
  2475. // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
  2476. // This cannot conflict with anything else, so we're safe now.
  2477. if (block_name.empty())
  2478. block_name = join("_", get<SPIRType>(var.basetype).self, "_", var.self);
  2479. // Instance names cannot alias block names.
  2480. resource_names.insert(block_name);
  2481. const char *block_qualifier;
  2482. if (has_decoration(var.self, DecorationPatch))
  2483. block_qualifier = "patch ";
  2484. else if (has_decoration(var.self, DecorationPerPrimitiveEXT))
  2485. block_qualifier = "perprimitiveEXT ";
  2486. else if (has_decoration(var.self, DecorationPerVertexKHR))
  2487. block_qualifier = "pervertexEXT ";
  2488. else
  2489. block_qualifier = "";
  2490. statement(layout_for_variable(var), block_qualifier, qual, block_name);
  2491. begin_scope();
  2492. type.member_name_cache.clear();
  2493. uint32_t i = 0;
  2494. for (auto &member : type.member_types)
  2495. {
  2496. add_member_name(type, i);
  2497. emit_struct_member(type, member, i);
  2498. i++;
  2499. }
  2500. add_resource_name(var.self);
  2501. end_scope_decl(join(to_name(var.self), type_to_array_glsl(type, var.self)));
  2502. statement("");
  2503. }
  2504. }
  2505. else
  2506. {
  2507. // ESSL earlier than 310 and GLSL earlier than 150 did not support
  2508. // I/O variables which are struct types.
  2509. // To support this, flatten the struct into separate varyings instead.
  2510. if (type.basetype == SPIRType::Struct &&
  2511. (options.force_flattened_io_blocks || (options.es && options.version < 310) ||
  2512. (!options.es && options.version < 150)))
  2513. {
  2514. emit_flattened_io_block(var, qual);
  2515. }
  2516. else
  2517. {
  2518. add_resource_name(var.self);
  2519. // Legacy GLSL did not support int attributes, we automatically
  2520. // declare them as float and cast them on load/store
  2521. SPIRType newtype = type;
  2522. if (is_legacy() && var.storage == StorageClassInput && type.basetype == SPIRType::Int)
  2523. newtype.basetype = SPIRType::Float;
  2524. // Tessellation control and evaluation shaders must have either
  2525. // gl_MaxPatchVertices or unsized arrays for input arrays.
  2526. // Opt for unsized as it's the more "correct" variant to use.
  2527. if (type.storage == StorageClassInput && !type.array.empty() &&
  2528. !has_decoration(var.self, DecorationPatch) &&
  2529. (get_entry_point().model == ExecutionModelTessellationControl ||
  2530. get_entry_point().model == ExecutionModelTessellationEvaluation))
  2531. {
  2532. newtype.array.back() = 0;
  2533. newtype.array_size_literal.back() = true;
  2534. }
  2535. statement(layout_for_variable(var), to_qualifiers_glsl(var.self),
  2536. variable_decl(newtype, to_name(var.self), var.self), ";");
  2537. }
  2538. }
  2539. }
  2540. void CompilerGLSL::emit_uniform(const SPIRVariable &var)
  2541. {
  2542. auto &type = get<SPIRType>(var.basetype);
  2543. if (type.basetype == SPIRType::Image && type.image.sampled == 2 && type.image.dim != DimSubpassData)
  2544. {
  2545. if (!options.es && options.version < 420)
  2546. require_extension_internal("GL_ARB_shader_image_load_store");
  2547. else if (options.es && options.version < 310)
  2548. SPIRV_CROSS_THROW("At least ESSL 3.10 required for shader image load store.");
  2549. }
  2550. add_resource_name(var.self);
  2551. statement(layout_for_variable(var), variable_decl(var), ";");
  2552. }
  2553. string CompilerGLSL::constant_value_macro_name(uint32_t id) const
  2554. {
  2555. return join("SPIRV_CROSS_CONSTANT_ID_", id);
  2556. }
  2557. void CompilerGLSL::emit_specialization_constant_op(const SPIRConstantOp &constant)
  2558. {
  2559. auto &type = get<SPIRType>(constant.basetype);
  2560. // This will break. It is bogus and should not be legal.
  2561. if (type_is_top_level_block(type))
  2562. return;
  2563. add_resource_name(constant.self);
  2564. auto name = to_name(constant.self);
  2565. statement("const ", variable_decl(type, name), " = ", constant_op_expression(constant), ";");
  2566. }
  2567. int CompilerGLSL::get_constant_mapping_to_workgroup_component(const SPIRConstant &c) const
  2568. {
  2569. auto &entry_point = get_entry_point();
  2570. int index = -1;
  2571. // Need to redirect specialization constants which are used as WorkGroupSize to the builtin,
  2572. // since the spec constant declarations are never explicitly declared.
  2573. if (entry_point.workgroup_size.constant == 0 && entry_point.flags.get(ExecutionModeLocalSizeId))
  2574. {
  2575. if (c.self == entry_point.workgroup_size.id_x)
  2576. index = 0;
  2577. else if (c.self == entry_point.workgroup_size.id_y)
  2578. index = 1;
  2579. else if (c.self == entry_point.workgroup_size.id_z)
  2580. index = 2;
  2581. }
  2582. return index;
  2583. }
  2584. void CompilerGLSL::emit_constant(const SPIRConstant &constant)
  2585. {
  2586. auto &type = get<SPIRType>(constant.constant_type);
  2587. // This will break. It is bogus and should not be legal.
  2588. if (type_is_top_level_block(type))
  2589. return;
  2590. SpecializationConstant wg_x, wg_y, wg_z;
  2591. ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
  2592. // This specialization constant is implicitly declared by emitting layout() in;
  2593. if (constant.self == workgroup_size_id)
  2594. return;
  2595. // These specialization constants are implicitly declared by emitting layout() in;
  2596. // In legacy GLSL, we will still need to emit macros for these, so a layout() in; declaration
  2597. // later can use macro overrides for work group size.
  2598. bool is_workgroup_size_constant = ConstantID(constant.self) == wg_x.id || ConstantID(constant.self) == wg_y.id ||
  2599. ConstantID(constant.self) == wg_z.id;
  2600. if (options.vulkan_semantics && is_workgroup_size_constant)
  2601. {
  2602. // Vulkan GLSL does not need to declare workgroup spec constants explicitly, it is handled in layout().
  2603. return;
  2604. }
  2605. else if (!options.vulkan_semantics && is_workgroup_size_constant &&
  2606. !has_decoration(constant.self, DecorationSpecId))
  2607. {
  2608. // Only bother declaring a workgroup size if it is actually a specialization constant, because we need macros.
  2609. return;
  2610. }
  2611. add_resource_name(constant.self);
  2612. auto name = to_name(constant.self);
  2613. // Only scalars have constant IDs.
  2614. if (has_decoration(constant.self, DecorationSpecId))
  2615. {
  2616. if (options.vulkan_semantics)
  2617. {
  2618. statement("layout(constant_id = ", get_decoration(constant.self, DecorationSpecId), ") const ",
  2619. variable_decl(type, name), " = ", constant_expression(constant), ";");
  2620. }
  2621. else
  2622. {
  2623. const string &macro_name = constant.specialization_constant_macro_name;
  2624. statement("#ifndef ", macro_name);
  2625. statement("#define ", macro_name, " ", constant_expression(constant));
  2626. statement("#endif");
  2627. // For workgroup size constants, only emit the macros.
  2628. if (!is_workgroup_size_constant)
  2629. statement("const ", variable_decl(type, name), " = ", macro_name, ";");
  2630. }
  2631. }
  2632. else
  2633. {
  2634. statement("const ", variable_decl(type, name), " = ", constant_expression(constant), ";");
  2635. }
  2636. }
  2637. void CompilerGLSL::emit_entry_point_declarations()
  2638. {
  2639. }
  2640. void CompilerGLSL::replace_illegal_names(const unordered_set<string> &keywords)
  2641. {
  2642. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
  2643. if (is_hidden_variable(var))
  2644. return;
  2645. auto *meta = ir.find_meta(var.self);
  2646. if (!meta)
  2647. return;
  2648. auto &m = meta->decoration;
  2649. if (keywords.find(m.alias) != end(keywords))
  2650. m.alias = join("_", m.alias);
  2651. });
  2652. ir.for_each_typed_id<SPIRFunction>([&](uint32_t, const SPIRFunction &func) {
  2653. auto *meta = ir.find_meta(func.self);
  2654. if (!meta)
  2655. return;
  2656. auto &m = meta->decoration;
  2657. if (keywords.find(m.alias) != end(keywords))
  2658. m.alias = join("_", m.alias);
  2659. });
  2660. ir.for_each_typed_id<SPIRType>([&](uint32_t, const SPIRType &type) {
  2661. auto *meta = ir.find_meta(type.self);
  2662. if (!meta)
  2663. return;
  2664. auto &m = meta->decoration;
  2665. if (keywords.find(m.alias) != end(keywords))
  2666. m.alias = join("_", m.alias);
  2667. for (auto &memb : meta->members)
  2668. if (keywords.find(memb.alias) != end(keywords))
  2669. memb.alias = join("_", memb.alias);
  2670. });
  2671. }
  2672. void CompilerGLSL::replace_illegal_names()
  2673. {
  2674. // clang-format off
  2675. static const unordered_set<string> keywords = {
  2676. "abs", "acos", "acosh", "all", "any", "asin", "asinh", "atan", "atanh",
  2677. "atomicAdd", "atomicCompSwap", "atomicCounter", "atomicCounterDecrement", "atomicCounterIncrement",
  2678. "atomicExchange", "atomicMax", "atomicMin", "atomicOr", "atomicXor",
  2679. "bitCount", "bitfieldExtract", "bitfieldInsert", "bitfieldReverse",
  2680. "ceil", "cos", "cosh", "cross", "degrees",
  2681. "dFdx", "dFdxCoarse", "dFdxFine",
  2682. "dFdy", "dFdyCoarse", "dFdyFine",
  2683. "distance", "dot", "EmitStreamVertex", "EmitVertex", "EndPrimitive", "EndStreamPrimitive", "equal", "exp", "exp2",
  2684. "faceforward", "findLSB", "findMSB", "float16BitsToInt16", "float16BitsToUint16", "floatBitsToInt", "floatBitsToUint", "floor", "fma", "fract",
  2685. "frexp", "fwidth", "fwidthCoarse", "fwidthFine",
  2686. "greaterThan", "greaterThanEqual", "groupMemoryBarrier",
  2687. "imageAtomicAdd", "imageAtomicAnd", "imageAtomicCompSwap", "imageAtomicExchange", "imageAtomicMax", "imageAtomicMin", "imageAtomicOr", "imageAtomicXor",
  2688. "imageLoad", "imageSamples", "imageSize", "imageStore", "imulExtended", "int16BitsToFloat16", "intBitsToFloat", "interpolateAtOffset", "interpolateAtCentroid", "interpolateAtSample",
  2689. "inverse", "inversesqrt", "isinf", "isnan", "ldexp", "length", "lessThan", "lessThanEqual", "log", "log2",
  2690. "matrixCompMult", "max", "memoryBarrier", "memoryBarrierAtomicCounter", "memoryBarrierBuffer", "memoryBarrierImage", "memoryBarrierShared",
  2691. "min", "mix", "mod", "modf", "noise", "noise1", "noise2", "noise3", "noise4", "normalize", "not", "notEqual",
  2692. "outerProduct", "packDouble2x32", "packHalf2x16", "packInt2x16", "packInt4x16", "packSnorm2x16", "packSnorm4x8",
  2693. "packUint2x16", "packUint4x16", "packUnorm2x16", "packUnorm4x8", "pow",
  2694. "radians", "reflect", "refract", "round", "roundEven", "sign", "sin", "sinh", "smoothstep", "sqrt", "step",
  2695. "tan", "tanh", "texelFetch", "texelFetchOffset", "texture", "textureGather", "textureGatherOffset", "textureGatherOffsets",
  2696. "textureGrad", "textureGradOffset", "textureLod", "textureLodOffset", "textureOffset", "textureProj", "textureProjGrad",
  2697. "textureProjGradOffset", "textureProjLod", "textureProjLodOffset", "textureProjOffset", "textureQueryLevels", "textureQueryLod", "textureSamples", "textureSize",
  2698. "transpose", "trunc", "uaddCarry", "uint16BitsToFloat16", "uintBitsToFloat", "umulExtended", "unpackDouble2x32", "unpackHalf2x16", "unpackInt2x16", "unpackInt4x16",
  2699. "unpackSnorm2x16", "unpackSnorm4x8", "unpackUint2x16", "unpackUint4x16", "unpackUnorm2x16", "unpackUnorm4x8", "usubBorrow",
  2700. "active", "asm", "atomic_uint", "attribute", "bool", "break", "buffer",
  2701. "bvec2", "bvec3", "bvec4", "case", "cast", "centroid", "class", "coherent", "common", "const", "continue", "default", "discard",
  2702. "dmat2", "dmat2x2", "dmat2x3", "dmat2x4", "dmat3", "dmat3x2", "dmat3x3", "dmat3x4", "dmat4", "dmat4x2", "dmat4x3", "dmat4x4",
  2703. "do", "double", "dvec2", "dvec3", "dvec4", "else", "enum", "extern", "external", "false", "filter", "fixed", "flat", "float",
  2704. "for", "fvec2", "fvec3", "fvec4", "goto", "half", "highp", "hvec2", "hvec3", "hvec4", "if", "iimage1D", "iimage1DArray",
  2705. "iimage2D", "iimage2DArray", "iimage2DMS", "iimage2DMSArray", "iimage2DRect", "iimage3D", "iimageBuffer", "iimageCube",
  2706. "iimageCubeArray", "image1D", "image1DArray", "image2D", "image2DArray", "image2DMS", "image2DMSArray", "image2DRect",
  2707. "image3D", "imageBuffer", "imageCube", "imageCubeArray", "in", "inline", "inout", "input", "int", "interface", "invariant",
  2708. "isampler1D", "isampler1DArray", "isampler2D", "isampler2DArray", "isampler2DMS", "isampler2DMSArray", "isampler2DRect",
  2709. "isampler3D", "isamplerBuffer", "isamplerCube", "isamplerCubeArray", "ivec2", "ivec3", "ivec4", "layout", "long", "lowp",
  2710. "mat2", "mat2x2", "mat2x3", "mat2x4", "mat3", "mat3x2", "mat3x3", "mat3x4", "mat4", "mat4x2", "mat4x3", "mat4x4", "mediump",
  2711. "namespace", "noinline", "noperspective", "out", "output", "packed", "partition", "patch", "precise", "precision", "public", "readonly",
  2712. "resource", "restrict", "return", "sample", "sampler1D", "sampler1DArray", "sampler1DArrayShadow",
  2713. "sampler1DShadow", "sampler2D", "sampler2DArray", "sampler2DArrayShadow", "sampler2DMS", "sampler2DMSArray",
  2714. "sampler2DRect", "sampler2DRectShadow", "sampler2DShadow", "sampler3D", "sampler3DRect", "samplerBuffer",
  2715. "samplerCube", "samplerCubeArray", "samplerCubeArrayShadow", "samplerCubeShadow", "shared", "short", "sizeof", "smooth", "static",
  2716. "struct", "subroutine", "superp", "switch", "template", "this", "true", "typedef", "uimage1D", "uimage1DArray", "uimage2D",
  2717. "uimage2DArray", "uimage2DMS", "uimage2DMSArray", "uimage2DRect", "uimage3D", "uimageBuffer", "uimageCube",
  2718. "uimageCubeArray", "uint", "uniform", "union", "unsigned", "usampler1D", "usampler1DArray", "usampler2D", "usampler2DArray",
  2719. "usampler2DMS", "usampler2DMSArray", "usampler2DRect", "usampler3D", "usamplerBuffer", "usamplerCube",
  2720. "usamplerCubeArray", "using", "uvec2", "uvec3", "uvec4", "varying", "vec2", "vec3", "vec4", "void", "volatile",
  2721. "while", "writeonly",
  2722. };
  2723. // clang-format on
  2724. replace_illegal_names(keywords);
  2725. }
  2726. void CompilerGLSL::replace_fragment_output(SPIRVariable &var)
  2727. {
  2728. auto &m = ir.meta[var.self].decoration;
  2729. uint32_t location = 0;
  2730. if (m.decoration_flags.get(DecorationLocation))
  2731. location = m.location;
  2732. // If our variable is arrayed, we must not emit the array part of this as the SPIR-V will
  2733. // do the access chain part of this for us.
  2734. auto &type = get<SPIRType>(var.basetype);
  2735. if (type.array.empty())
  2736. {
  2737. // Redirect the write to a specific render target in legacy GLSL.
  2738. m.alias = join("gl_FragData[", location, "]");
  2739. if (is_legacy_es() && location != 0)
  2740. require_extension_internal("GL_EXT_draw_buffers");
  2741. }
  2742. else if (type.array.size() == 1)
  2743. {
  2744. // If location is non-zero, we probably have to add an offset.
  2745. // This gets really tricky since we'd have to inject an offset in the access chain.
  2746. // FIXME: This seems like an extremely odd-ball case, so it's probably fine to leave it like this for now.
  2747. m.alias = "gl_FragData";
  2748. if (location != 0)
  2749. SPIRV_CROSS_THROW("Arrayed output variable used, but location is not 0. "
  2750. "This is unimplemented in SPIRV-Cross.");
  2751. if (is_legacy_es())
  2752. require_extension_internal("GL_EXT_draw_buffers");
  2753. }
  2754. else
  2755. SPIRV_CROSS_THROW("Array-of-array output variable used. This cannot be implemented in legacy GLSL.");
  2756. var.compat_builtin = true; // We don't want to declare this variable, but use the name as-is.
  2757. }
  2758. void CompilerGLSL::replace_fragment_outputs()
  2759. {
  2760. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  2761. auto &type = this->get<SPIRType>(var.basetype);
  2762. if (!is_builtin_variable(var) && !var.remapped_variable && type.pointer && var.storage == StorageClassOutput)
  2763. replace_fragment_output(var);
  2764. });
  2765. }
  2766. string CompilerGLSL::remap_swizzle(const SPIRType &out_type, uint32_t input_components, const string &expr)
  2767. {
  2768. if (out_type.vecsize == input_components)
  2769. return expr;
  2770. else if (input_components == 1 && !backend.can_swizzle_scalar)
  2771. return join(type_to_glsl(out_type), "(", expr, ")");
  2772. else
  2773. {
  2774. // FIXME: This will not work with packed expressions.
  2775. auto e = enclose_expression(expr) + ".";
  2776. // Just clamp the swizzle index if we have more outputs than inputs.
  2777. for (uint32_t c = 0; c < out_type.vecsize; c++)
  2778. e += index_to_swizzle(min(c, input_components - 1));
  2779. if (backend.swizzle_is_function && out_type.vecsize > 1)
  2780. e += "()";
  2781. remove_duplicate_swizzle(e);
  2782. return e;
  2783. }
  2784. }
  2785. void CompilerGLSL::emit_pls()
  2786. {
  2787. auto &execution = get_entry_point();
  2788. if (execution.model != ExecutionModelFragment)
  2789. SPIRV_CROSS_THROW("Pixel local storage only supported in fragment shaders.");
  2790. if (!options.es)
  2791. SPIRV_CROSS_THROW("Pixel local storage only supported in OpenGL ES.");
  2792. if (options.version < 300)
  2793. SPIRV_CROSS_THROW("Pixel local storage only supported in ESSL 3.0 and above.");
  2794. if (!pls_inputs.empty())
  2795. {
  2796. statement("__pixel_local_inEXT _PLSIn");
  2797. begin_scope();
  2798. for (auto &input : pls_inputs)
  2799. statement(pls_decl(input), ";");
  2800. end_scope_decl();
  2801. statement("");
  2802. }
  2803. if (!pls_outputs.empty())
  2804. {
  2805. statement("__pixel_local_outEXT _PLSOut");
  2806. begin_scope();
  2807. for (auto &output : pls_outputs)
  2808. statement(pls_decl(output), ";");
  2809. end_scope_decl();
  2810. statement("");
  2811. }
  2812. }
  2813. void CompilerGLSL::fixup_image_load_store_access()
  2814. {
  2815. if (!options.enable_storage_image_qualifier_deduction)
  2816. return;
  2817. ir.for_each_typed_id<SPIRVariable>([&](uint32_t var, const SPIRVariable &) {
  2818. auto &vartype = expression_type(var);
  2819. if (vartype.basetype == SPIRType::Image && vartype.image.sampled == 2)
  2820. {
  2821. // Very old glslangValidator and HLSL compilers do not emit required qualifiers here.
  2822. // Solve this by making the image access as restricted as possible and loosen up if we need to.
  2823. // If any no-read/no-write flags are actually set, assume that the compiler knows what it's doing.
  2824. if (!has_decoration(var, DecorationNonWritable) && !has_decoration(var, DecorationNonReadable))
  2825. {
  2826. set_decoration(var, DecorationNonWritable);
  2827. set_decoration(var, DecorationNonReadable);
  2828. }
  2829. }
  2830. });
  2831. }
  2832. static bool is_block_builtin(BuiltIn builtin)
  2833. {
  2834. return builtin == BuiltInPosition || builtin == BuiltInPointSize || builtin == BuiltInClipDistance ||
  2835. builtin == BuiltInCullDistance;
  2836. }
  2837. bool CompilerGLSL::should_force_emit_builtin_block(StorageClass storage)
  2838. {
  2839. // If the builtin block uses XFB, we need to force explicit redeclaration of the builtin block.
  2840. if (storage != StorageClassOutput)
  2841. return false;
  2842. bool should_force = false;
  2843. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  2844. if (should_force)
  2845. return;
  2846. auto &type = this->get<SPIRType>(var.basetype);
  2847. bool block = has_decoration(type.self, DecorationBlock);
  2848. if (var.storage == storage && block && is_builtin_variable(var))
  2849. {
  2850. uint32_t member_count = uint32_t(type.member_types.size());
  2851. for (uint32_t i = 0; i < member_count; i++)
  2852. {
  2853. if (has_member_decoration(type.self, i, DecorationBuiltIn) &&
  2854. is_block_builtin(BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn))) &&
  2855. has_member_decoration(type.self, i, DecorationOffset))
  2856. {
  2857. should_force = true;
  2858. }
  2859. }
  2860. }
  2861. else if (var.storage == storage && !block && is_builtin_variable(var))
  2862. {
  2863. if (is_block_builtin(BuiltIn(get_decoration(type.self, DecorationBuiltIn))) &&
  2864. has_decoration(var.self, DecorationOffset))
  2865. {
  2866. should_force = true;
  2867. }
  2868. }
  2869. });
  2870. // If we're declaring clip/cull planes with control points we need to force block declaration.
  2871. if ((get_execution_model() == ExecutionModelTessellationControl ||
  2872. get_execution_model() == ExecutionModelMeshEXT) &&
  2873. (clip_distance_count || cull_distance_count))
  2874. {
  2875. should_force = true;
  2876. }
  2877. // Either glslang bug or oversight, but global invariant position does not work in mesh shaders.
  2878. if (get_execution_model() == ExecutionModelMeshEXT && position_invariant)
  2879. should_force = true;
  2880. return should_force;
  2881. }
  2882. void CompilerGLSL::fixup_implicit_builtin_block_names(ExecutionModel model)
  2883. {
  2884. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  2885. auto &type = this->get<SPIRType>(var.basetype);
  2886. bool block = has_decoration(type.self, DecorationBlock);
  2887. if ((var.storage == StorageClassOutput || var.storage == StorageClassInput) && block &&
  2888. is_builtin_variable(var))
  2889. {
  2890. if (model != ExecutionModelMeshEXT)
  2891. {
  2892. // Make sure the array has a supported name in the code.
  2893. if (var.storage == StorageClassOutput)
  2894. set_name(var.self, "gl_out");
  2895. else if (var.storage == StorageClassInput)
  2896. set_name(var.self, "gl_in");
  2897. }
  2898. else
  2899. {
  2900. auto flags = get_buffer_block_flags(var.self);
  2901. if (flags.get(DecorationPerPrimitiveEXT))
  2902. {
  2903. set_name(var.self, "gl_MeshPrimitivesEXT");
  2904. set_name(type.self, "gl_MeshPerPrimitiveEXT");
  2905. }
  2906. else
  2907. {
  2908. set_name(var.self, "gl_MeshVerticesEXT");
  2909. set_name(type.self, "gl_MeshPerVertexEXT");
  2910. }
  2911. }
  2912. }
  2913. if (model == ExecutionModelMeshEXT && var.storage == StorageClassOutput && !block)
  2914. {
  2915. auto *m = ir.find_meta(var.self);
  2916. if (m && m->decoration.builtin)
  2917. {
  2918. auto builtin_type = m->decoration.builtin_type;
  2919. if (builtin_type == BuiltInPrimitivePointIndicesEXT)
  2920. set_name(var.self, "gl_PrimitivePointIndicesEXT");
  2921. else if (builtin_type == BuiltInPrimitiveLineIndicesEXT)
  2922. set_name(var.self, "gl_PrimitiveLineIndicesEXT");
  2923. else if (builtin_type == BuiltInPrimitiveTriangleIndicesEXT)
  2924. set_name(var.self, "gl_PrimitiveTriangleIndicesEXT");
  2925. }
  2926. }
  2927. });
  2928. }
  2929. void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionModel model)
  2930. {
  2931. Bitset emitted_builtins;
  2932. Bitset global_builtins;
  2933. const SPIRVariable *block_var = nullptr;
  2934. bool emitted_block = false;
  2935. // Need to use declared size in the type.
  2936. // These variables might have been declared, but not statically used, so we haven't deduced their size yet.
  2937. uint32_t cull_distance_size = 0;
  2938. uint32_t clip_distance_size = 0;
  2939. bool have_xfb_buffer_stride = false;
  2940. bool have_geom_stream = false;
  2941. bool have_any_xfb_offset = false;
  2942. uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0;
  2943. std::unordered_map<uint32_t, uint32_t> builtin_xfb_offsets;
  2944. const auto builtin_is_per_vertex_set = [](BuiltIn builtin) -> bool {
  2945. return builtin == BuiltInPosition || builtin == BuiltInPointSize ||
  2946. builtin == BuiltInClipDistance || builtin == BuiltInCullDistance;
  2947. };
  2948. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  2949. auto &type = this->get<SPIRType>(var.basetype);
  2950. bool block = has_decoration(type.self, DecorationBlock);
  2951. Bitset builtins;
  2952. if (var.storage == storage && block && is_builtin_variable(var))
  2953. {
  2954. uint32_t index = 0;
  2955. for (auto &m : ir.meta[type.self].members)
  2956. {
  2957. if (m.builtin && builtin_is_per_vertex_set(m.builtin_type))
  2958. {
  2959. builtins.set(m.builtin_type);
  2960. if (m.builtin_type == BuiltInCullDistance)
  2961. cull_distance_size = to_array_size_literal(this->get<SPIRType>(type.member_types[index]));
  2962. else if (m.builtin_type == BuiltInClipDistance)
  2963. clip_distance_size = to_array_size_literal(this->get<SPIRType>(type.member_types[index]));
  2964. if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationOffset))
  2965. {
  2966. have_any_xfb_offset = true;
  2967. builtin_xfb_offsets[m.builtin_type] = m.offset;
  2968. }
  2969. if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationStream))
  2970. {
  2971. uint32_t stream = m.stream;
  2972. if (have_geom_stream && geom_stream != stream)
  2973. SPIRV_CROSS_THROW("IO block member Stream mismatch.");
  2974. have_geom_stream = true;
  2975. geom_stream = stream;
  2976. }
  2977. }
  2978. index++;
  2979. }
  2980. if (storage == StorageClassOutput && has_decoration(var.self, DecorationXfbBuffer) &&
  2981. has_decoration(var.self, DecorationXfbStride))
  2982. {
  2983. uint32_t buffer_index = get_decoration(var.self, DecorationXfbBuffer);
  2984. uint32_t stride = get_decoration(var.self, DecorationXfbStride);
  2985. if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
  2986. SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
  2987. if (have_xfb_buffer_stride && stride != xfb_stride)
  2988. SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
  2989. have_xfb_buffer_stride = true;
  2990. xfb_buffer = buffer_index;
  2991. xfb_stride = stride;
  2992. }
  2993. if (storage == StorageClassOutput && has_decoration(var.self, DecorationStream))
  2994. {
  2995. uint32_t stream = get_decoration(var.self, DecorationStream);
  2996. if (have_geom_stream && geom_stream != stream)
  2997. SPIRV_CROSS_THROW("IO block member Stream mismatch.");
  2998. have_geom_stream = true;
  2999. geom_stream = stream;
  3000. }
  3001. }
  3002. else if (var.storage == storage && !block && is_builtin_variable(var))
  3003. {
  3004. // While we're at it, collect all declared global builtins (HLSL mostly ...).
  3005. auto &m = ir.meta[var.self].decoration;
  3006. if (m.builtin && builtin_is_per_vertex_set(m.builtin_type))
  3007. {
  3008. // For mesh/tesc output, Clip/Cull is an array-of-array. Look at innermost array type
  3009. // for correct result.
  3010. global_builtins.set(m.builtin_type);
  3011. if (m.builtin_type == BuiltInCullDistance)
  3012. cull_distance_size = to_array_size_literal(type, 0);
  3013. else if (m.builtin_type == BuiltInClipDistance)
  3014. clip_distance_size = to_array_size_literal(type, 0);
  3015. if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationXfbStride) &&
  3016. m.decoration_flags.get(DecorationXfbBuffer) && m.decoration_flags.get(DecorationOffset))
  3017. {
  3018. have_any_xfb_offset = true;
  3019. builtin_xfb_offsets[m.builtin_type] = m.offset;
  3020. uint32_t buffer_index = m.xfb_buffer;
  3021. uint32_t stride = m.xfb_stride;
  3022. if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
  3023. SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
  3024. if (have_xfb_buffer_stride && stride != xfb_stride)
  3025. SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
  3026. have_xfb_buffer_stride = true;
  3027. xfb_buffer = buffer_index;
  3028. xfb_stride = stride;
  3029. }
  3030. if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationStream))
  3031. {
  3032. uint32_t stream = get_decoration(var.self, DecorationStream);
  3033. if (have_geom_stream && geom_stream != stream)
  3034. SPIRV_CROSS_THROW("IO block member Stream mismatch.");
  3035. have_geom_stream = true;
  3036. geom_stream = stream;
  3037. }
  3038. }
  3039. }
  3040. if (builtins.empty())
  3041. return;
  3042. if (emitted_block)
  3043. SPIRV_CROSS_THROW("Cannot use more than one builtin I/O block.");
  3044. emitted_builtins = builtins;
  3045. emitted_block = true;
  3046. block_var = &var;
  3047. });
  3048. global_builtins =
  3049. Bitset(global_builtins.get_lower() & ((1ull << BuiltInPosition) | (1ull << BuiltInPointSize) |
  3050. (1ull << BuiltInClipDistance) | (1ull << BuiltInCullDistance)));
  3051. // Try to collect all other declared builtins.
  3052. if (!emitted_block)
  3053. emitted_builtins = global_builtins;
  3054. // Can't declare an empty interface block.
  3055. if (emitted_builtins.empty())
  3056. return;
  3057. if (storage == StorageClassOutput)
  3058. {
  3059. SmallVector<string> attr;
  3060. if (have_xfb_buffer_stride && have_any_xfb_offset)
  3061. {
  3062. if (!options.es)
  3063. {
  3064. if (options.version < 440 && options.version >= 140)
  3065. require_extension_internal("GL_ARB_enhanced_layouts");
  3066. else if (options.version < 140)
  3067. SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40.");
  3068. if (!options.es && options.version < 440)
  3069. require_extension_internal("GL_ARB_enhanced_layouts");
  3070. }
  3071. else if (options.es)
  3072. SPIRV_CROSS_THROW("Need GL_ARB_enhanced_layouts for xfb_stride or xfb_buffer.");
  3073. attr.push_back(join("xfb_buffer = ", xfb_buffer, ", xfb_stride = ", xfb_stride));
  3074. }
  3075. if (have_geom_stream)
  3076. {
  3077. if (get_execution_model() != ExecutionModelGeometry)
  3078. SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
  3079. if (options.es)
  3080. SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
  3081. if (options.version < 400)
  3082. require_extension_internal("GL_ARB_transform_feedback3");
  3083. attr.push_back(join("stream = ", geom_stream));
  3084. }
  3085. if (model == ExecutionModelMeshEXT)
  3086. statement("out gl_MeshPerVertexEXT");
  3087. else if (!attr.empty())
  3088. statement("layout(", merge(attr), ") out gl_PerVertex");
  3089. else
  3090. statement("out gl_PerVertex");
  3091. }
  3092. else
  3093. {
  3094. // If we have passthrough, there is no way PerVertex cannot be passthrough.
  3095. if (get_entry_point().geometry_passthrough)
  3096. statement("layout(passthrough) in gl_PerVertex");
  3097. else
  3098. statement("in gl_PerVertex");
  3099. }
  3100. begin_scope();
  3101. if (emitted_builtins.get(BuiltInPosition))
  3102. {
  3103. auto itr = builtin_xfb_offsets.find(BuiltInPosition);
  3104. if (itr != end(builtin_xfb_offsets))
  3105. statement("layout(xfb_offset = ", itr->second, ") vec4 gl_Position;");
  3106. else if (position_invariant)
  3107. statement("invariant vec4 gl_Position;");
  3108. else
  3109. statement("vec4 gl_Position;");
  3110. }
  3111. if (emitted_builtins.get(BuiltInPointSize))
  3112. {
  3113. auto itr = builtin_xfb_offsets.find(BuiltInPointSize);
  3114. if (itr != end(builtin_xfb_offsets))
  3115. statement("layout(xfb_offset = ", itr->second, ") float gl_PointSize;");
  3116. else
  3117. statement("float gl_PointSize;");
  3118. }
  3119. if (emitted_builtins.get(BuiltInClipDistance))
  3120. {
  3121. auto itr = builtin_xfb_offsets.find(BuiltInClipDistance);
  3122. if (itr != end(builtin_xfb_offsets))
  3123. statement("layout(xfb_offset = ", itr->second, ") float gl_ClipDistance[", clip_distance_size, "];");
  3124. else
  3125. statement("float gl_ClipDistance[", clip_distance_size, "];");
  3126. }
  3127. if (emitted_builtins.get(BuiltInCullDistance))
  3128. {
  3129. auto itr = builtin_xfb_offsets.find(BuiltInCullDistance);
  3130. if (itr != end(builtin_xfb_offsets))
  3131. statement("layout(xfb_offset = ", itr->second, ") float gl_CullDistance[", cull_distance_size, "];");
  3132. else
  3133. statement("float gl_CullDistance[", cull_distance_size, "];");
  3134. }
  3135. bool builtin_array = model == ExecutionModelTessellationControl ||
  3136. (model == ExecutionModelMeshEXT && storage == StorageClassOutput) ||
  3137. (model == ExecutionModelGeometry && storage == StorageClassInput) ||
  3138. (model == ExecutionModelTessellationEvaluation && storage == StorageClassInput);
  3139. if (builtin_array)
  3140. {
  3141. const char *instance_name;
  3142. if (model == ExecutionModelMeshEXT)
  3143. instance_name = "gl_MeshVerticesEXT"; // Per primitive is never synthesized.
  3144. else
  3145. instance_name = storage == StorageClassInput ? "gl_in" : "gl_out";
  3146. if (model == ExecutionModelTessellationControl && storage == StorageClassOutput)
  3147. end_scope_decl(join(instance_name, "[", get_entry_point().output_vertices, "]"));
  3148. else
  3149. end_scope_decl(join(instance_name, "[]"));
  3150. }
  3151. else
  3152. end_scope_decl();
  3153. statement("");
  3154. }
  3155. bool CompilerGLSL::variable_is_lut(const SPIRVariable &var) const
  3156. {
  3157. bool statically_assigned = var.statically_assigned && var.static_expression != ID(0) && var.remapped_variable;
  3158. if (statically_assigned)
  3159. {
  3160. auto *constant = maybe_get<SPIRConstant>(var.static_expression);
  3161. if (constant && constant->is_used_as_lut)
  3162. return true;
  3163. }
  3164. return false;
  3165. }
  3166. void CompilerGLSL::emit_resources()
  3167. {
  3168. auto &execution = get_entry_point();
  3169. replace_illegal_names();
  3170. // Legacy GL uses gl_FragData[], redeclare all fragment outputs
  3171. // with builtins.
  3172. if (execution.model == ExecutionModelFragment && is_legacy())
  3173. replace_fragment_outputs();
  3174. // Emit PLS blocks if we have such variables.
  3175. if (!pls_inputs.empty() || !pls_outputs.empty())
  3176. emit_pls();
  3177. switch (execution.model)
  3178. {
  3179. case ExecutionModelGeometry:
  3180. case ExecutionModelTessellationControl:
  3181. case ExecutionModelTessellationEvaluation:
  3182. case ExecutionModelMeshEXT:
  3183. fixup_implicit_builtin_block_names(execution.model);
  3184. break;
  3185. default:
  3186. break;
  3187. }
  3188. bool global_invariant_position = position_invariant && (options.es || options.version >= 120);
  3189. // Emit custom gl_PerVertex for SSO compatibility.
  3190. if (options.separate_shader_objects && !options.es && execution.model != ExecutionModelFragment)
  3191. {
  3192. switch (execution.model)
  3193. {
  3194. case ExecutionModelGeometry:
  3195. case ExecutionModelTessellationControl:
  3196. case ExecutionModelTessellationEvaluation:
  3197. emit_declared_builtin_block(StorageClassInput, execution.model);
  3198. emit_declared_builtin_block(StorageClassOutput, execution.model);
  3199. global_invariant_position = false;
  3200. break;
  3201. case ExecutionModelVertex:
  3202. case ExecutionModelMeshEXT:
  3203. emit_declared_builtin_block(StorageClassOutput, execution.model);
  3204. global_invariant_position = false;
  3205. break;
  3206. default:
  3207. break;
  3208. }
  3209. }
  3210. else if (should_force_emit_builtin_block(StorageClassOutput))
  3211. {
  3212. emit_declared_builtin_block(StorageClassOutput, execution.model);
  3213. global_invariant_position = false;
  3214. }
  3215. else if (execution.geometry_passthrough)
  3216. {
  3217. // Need to declare gl_in with Passthrough.
  3218. // If we're doing passthrough, we cannot emit an output block, so the output block test above will never pass.
  3219. emit_declared_builtin_block(StorageClassInput, execution.model);
  3220. }
  3221. else
  3222. {
  3223. // Need to redeclare clip/cull distance with explicit size to use them.
  3224. // SPIR-V mandates these builtins have a size declared.
  3225. const char *storage = execution.model == ExecutionModelFragment ? "in" : "out";
  3226. if (clip_distance_count != 0)
  3227. statement(storage, " float gl_ClipDistance[", clip_distance_count, "];");
  3228. if (cull_distance_count != 0)
  3229. statement(storage, " float gl_CullDistance[", cull_distance_count, "];");
  3230. if (clip_distance_count != 0 || cull_distance_count != 0)
  3231. statement("");
  3232. }
  3233. if (global_invariant_position)
  3234. {
  3235. statement("invariant gl_Position;");
  3236. statement("");
  3237. }
  3238. bool emitted = false;
  3239. if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64)
  3240. {
  3241. // Output buffer reference block forward declarations.
  3242. ir.for_each_typed_id<SPIRType>([&](uint32_t id, SPIRType &type)
  3243. {
  3244. if (is_physical_pointer(type))
  3245. {
  3246. bool emit_type = true;
  3247. if (!is_physical_pointer_to_buffer_block(type))
  3248. {
  3249. // Only forward-declare if we intend to emit it in the non_block_pointer types.
  3250. // Otherwise, these are just "benign" pointer types that exist as a result of access chains.
  3251. emit_type = std::find(physical_storage_non_block_pointer_types.begin(),
  3252. physical_storage_non_block_pointer_types.end(),
  3253. id) != physical_storage_non_block_pointer_types.end();
  3254. }
  3255. if (emit_type)
  3256. {
  3257. emit_buffer_reference_block(id, true);
  3258. emitted = true;
  3259. }
  3260. }
  3261. });
  3262. }
  3263. if (emitted)
  3264. statement("");
  3265. emitted = false;
  3266. // If emitted Vulkan GLSL,
  3267. // emit specialization constants as actual floats,
  3268. // spec op expressions will redirect to the constant name.
  3269. //
  3270. {
  3271. auto loop_lock = ir.create_loop_hard_lock();
  3272. for (auto &id_ : ir.ids_for_constant_undef_or_type)
  3273. {
  3274. auto &id = ir.ids[id_];
  3275. // Skip declaring any bogus constants or undefs which use block types.
  3276. // We don't declare block types directly, so this will never work.
  3277. // Should not be legal SPIR-V, so this is considered a workaround.
  3278. if (id.get_type() == TypeConstant)
  3279. {
  3280. auto &c = id.get<SPIRConstant>();
  3281. bool needs_declaration = c.specialization || c.is_used_as_lut;
  3282. if (needs_declaration)
  3283. {
  3284. if (!options.vulkan_semantics && c.specialization)
  3285. {
  3286. c.specialization_constant_macro_name =
  3287. constant_value_macro_name(get_decoration(c.self, DecorationSpecId));
  3288. }
  3289. emit_constant(c);
  3290. emitted = true;
  3291. }
  3292. }
  3293. else if (id.get_type() == TypeConstantOp)
  3294. {
  3295. emit_specialization_constant_op(id.get<SPIRConstantOp>());
  3296. emitted = true;
  3297. }
  3298. else if (id.get_type() == TypeType)
  3299. {
  3300. auto *type = &id.get<SPIRType>();
  3301. bool is_natural_struct = type->basetype == SPIRType::Struct && type->array.empty() && !type->pointer &&
  3302. (!has_decoration(type->self, DecorationBlock) &&
  3303. !has_decoration(type->self, DecorationBufferBlock));
  3304. // Special case, ray payload and hit attribute blocks are not really blocks, just regular structs.
  3305. if (type->basetype == SPIRType::Struct && type->pointer &&
  3306. has_decoration(type->self, DecorationBlock) &&
  3307. (type->storage == StorageClassRayPayloadKHR || type->storage == StorageClassIncomingRayPayloadKHR ||
  3308. type->storage == StorageClassHitAttributeKHR))
  3309. {
  3310. type = &get<SPIRType>(type->parent_type);
  3311. is_natural_struct = true;
  3312. }
  3313. if (is_natural_struct)
  3314. {
  3315. if (emitted)
  3316. statement("");
  3317. emitted = false;
  3318. emit_struct(*type);
  3319. }
  3320. }
  3321. else if (id.get_type() == TypeUndef)
  3322. {
  3323. auto &undef = id.get<SPIRUndef>();
  3324. auto &type = this->get<SPIRType>(undef.basetype);
  3325. // OpUndef can be void for some reason ...
  3326. if (type.basetype == SPIRType::Void)
  3327. continue;
  3328. // This will break. It is bogus and should not be legal.
  3329. if (type_is_top_level_block(type))
  3330. continue;
  3331. string initializer;
  3332. if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
  3333. initializer = join(" = ", to_zero_initialized_expression(undef.basetype));
  3334. // FIXME: If used in a constant, we must declare it as one.
  3335. statement(variable_decl(type, to_name(undef.self), undef.self), initializer, ";");
  3336. emitted = true;
  3337. }
  3338. }
  3339. }
  3340. if (emitted)
  3341. statement("");
  3342. // If we needed to declare work group size late, check here.
  3343. // If the work group size depends on a specialization constant, we need to declare the layout() block
  3344. // after constants (and their macros) have been declared.
  3345. if (execution.model == ExecutionModelGLCompute && !options.vulkan_semantics &&
  3346. (execution.workgroup_size.constant != 0 || execution.flags.get(ExecutionModeLocalSizeId)))
  3347. {
  3348. SpecializationConstant wg_x, wg_y, wg_z;
  3349. get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
  3350. if ((wg_x.id != ConstantID(0)) || (wg_y.id != ConstantID(0)) || (wg_z.id != ConstantID(0)))
  3351. {
  3352. SmallVector<string> inputs;
  3353. build_workgroup_size(inputs, wg_x, wg_y, wg_z);
  3354. statement("layout(", merge(inputs), ") in;");
  3355. statement("");
  3356. }
  3357. }
  3358. emitted = false;
  3359. if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64)
  3360. {
  3361. // Output buffer reference blocks.
  3362. // Buffer reference blocks can reference themselves to support things like linked lists.
  3363. for (auto type : physical_storage_non_block_pointer_types)
  3364. emit_buffer_reference_block(type, false);
  3365. ir.for_each_typed_id<SPIRType>([&](uint32_t id, SPIRType &type) {
  3366. if (is_physical_pointer_to_buffer_block(type))
  3367. emit_buffer_reference_block(id, false);
  3368. });
  3369. }
  3370. // Output UBOs and SSBOs
  3371. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  3372. auto &type = this->get<SPIRType>(var.basetype);
  3373. bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform ||
  3374. type.storage == StorageClassShaderRecordBufferKHR;
  3375. bool has_block_flags = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
  3376. ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
  3377. if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) &&
  3378. has_block_flags)
  3379. {
  3380. emit_buffer_block(var);
  3381. }
  3382. });
  3383. // Output push constant blocks
  3384. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  3385. auto &type = this->get<SPIRType>(var.basetype);
  3386. if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassPushConstant &&
  3387. !is_hidden_variable(var))
  3388. {
  3389. emit_push_constant_block(var);
  3390. }
  3391. });
  3392. bool skip_separate_image_sampler = !combined_image_samplers.empty() || !options.vulkan_semantics;
  3393. // Output Uniform Constants (values, samplers, images, etc).
  3394. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  3395. auto &type = this->get<SPIRType>(var.basetype);
  3396. // If we're remapping separate samplers and images, only emit the combined samplers.
  3397. if (skip_separate_image_sampler)
  3398. {
  3399. // Sampler buffers are always used without a sampler, and they will also work in regular GL.
  3400. bool sampler_buffer = type.basetype == SPIRType::Image && type.image.dim == DimBuffer;
  3401. bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1;
  3402. bool separate_sampler = type.basetype == SPIRType::Sampler;
  3403. if (!sampler_buffer && (separate_image || separate_sampler))
  3404. return;
  3405. }
  3406. if (var.storage != StorageClassFunction && type.pointer &&
  3407. (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter ||
  3408. type.storage == StorageClassRayPayloadKHR || type.storage == StorageClassIncomingRayPayloadKHR ||
  3409. type.storage == StorageClassCallableDataKHR || type.storage == StorageClassIncomingCallableDataKHR ||
  3410. type.storage == StorageClassHitAttributeKHR) &&
  3411. !is_hidden_variable(var))
  3412. {
  3413. emit_uniform(var);
  3414. emitted = true;
  3415. }
  3416. });
  3417. if (emitted)
  3418. statement("");
  3419. emitted = false;
  3420. bool emitted_base_instance = false;
  3421. // Output in/out interfaces.
  3422. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  3423. auto &type = this->get<SPIRType>(var.basetype);
  3424. bool is_hidden = is_hidden_variable(var);
  3425. // Unused output I/O variables might still be required to implement framebuffer fetch.
  3426. if (var.storage == StorageClassOutput && !is_legacy() &&
  3427. location_is_framebuffer_fetch(get_decoration(var.self, DecorationLocation)) != 0)
  3428. {
  3429. is_hidden = false;
  3430. }
  3431. if (var.storage != StorageClassFunction && type.pointer &&
  3432. (var.storage == StorageClassInput || var.storage == StorageClassOutput) &&
  3433. interface_variable_exists_in_entry_point(var.self) && !is_hidden)
  3434. {
  3435. if (options.es && get_execution_model() == ExecutionModelVertex && var.storage == StorageClassInput &&
  3436. type.array.size() == 1)
  3437. {
  3438. SPIRV_CROSS_THROW("OpenGL ES doesn't support array input variables in vertex shader.");
  3439. }
  3440. emit_interface_block(var);
  3441. emitted = true;
  3442. }
  3443. else if (is_builtin_variable(var))
  3444. {
  3445. auto builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
  3446. // For gl_InstanceIndex emulation on GLES, the API user needs to
  3447. // supply this uniform.
  3448. // The draw parameter extension is soft-enabled on GL with some fallbacks.
  3449. if (!options.vulkan_semantics)
  3450. {
  3451. if (!emitted_base_instance &&
  3452. ((options.vertex.support_nonzero_base_instance && builtin == BuiltInInstanceIndex) ||
  3453. (builtin == BuiltInBaseInstance)))
  3454. {
  3455. statement("#ifdef GL_ARB_shader_draw_parameters");
  3456. statement("#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB");
  3457. statement("#else");
  3458. // A crude, but simple workaround which should be good enough for non-indirect draws.
  3459. statement("uniform int SPIRV_Cross_BaseInstance;");
  3460. statement("#endif");
  3461. emitted = true;
  3462. emitted_base_instance = true;
  3463. }
  3464. else if (builtin == BuiltInBaseVertex)
  3465. {
  3466. statement("#ifdef GL_ARB_shader_draw_parameters");
  3467. statement("#define SPIRV_Cross_BaseVertex gl_BaseVertexARB");
  3468. statement("#else");
  3469. // A crude, but simple workaround which should be good enough for non-indirect draws.
  3470. statement("uniform int SPIRV_Cross_BaseVertex;");
  3471. statement("#endif");
  3472. }
  3473. else if (builtin == BuiltInDrawIndex)
  3474. {
  3475. statement("#ifndef GL_ARB_shader_draw_parameters");
  3476. // Cannot really be worked around.
  3477. statement("#error GL_ARB_shader_draw_parameters is not supported.");
  3478. statement("#endif");
  3479. }
  3480. }
  3481. }
  3482. });
  3483. // Global variables.
  3484. for (auto global : global_variables)
  3485. {
  3486. auto &var = get<SPIRVariable>(global);
  3487. if (is_hidden_variable(var, true))
  3488. continue;
  3489. if (var.storage != StorageClassOutput)
  3490. {
  3491. if (!variable_is_lut(var))
  3492. {
  3493. add_resource_name(var.self);
  3494. string initializer;
  3495. if (options.force_zero_initialized_variables && var.storage == StorageClassPrivate &&
  3496. !var.initializer && !var.static_expression && type_can_zero_initialize(get_variable_data_type(var)))
  3497. {
  3498. initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(var)));
  3499. }
  3500. statement(variable_decl(var), initializer, ";");
  3501. emitted = true;
  3502. }
  3503. }
  3504. else if (var.initializer && maybe_get<SPIRConstant>(var.initializer) != nullptr)
  3505. {
  3506. emit_output_variable_initializer(var);
  3507. }
  3508. }
  3509. if (emitted)
  3510. statement("");
  3511. }
  3512. void CompilerGLSL::emit_output_variable_initializer(const SPIRVariable &var)
  3513. {
  3514. // If a StorageClassOutput variable has an initializer, we need to initialize it in main().
  3515. auto &entry_func = this->get<SPIRFunction>(ir.default_entry_point);
  3516. auto &type = get<SPIRType>(var.basetype);
  3517. bool is_patch = has_decoration(var.self, DecorationPatch);
  3518. bool is_block = has_decoration(type.self, DecorationBlock);
  3519. bool is_control_point = get_execution_model() == ExecutionModelTessellationControl && !is_patch;
  3520. if (is_block)
  3521. {
  3522. uint32_t member_count = uint32_t(type.member_types.size());
  3523. bool type_is_array = type.array.size() == 1;
  3524. uint32_t array_size = 1;
  3525. if (type_is_array)
  3526. array_size = to_array_size_literal(type);
  3527. uint32_t iteration_count = is_control_point ? 1 : array_size;
  3528. // If the initializer is a block, we must initialize each block member one at a time.
  3529. for (uint32_t i = 0; i < member_count; i++)
  3530. {
  3531. // These outputs might not have been properly declared, so don't initialize them in that case.
  3532. if (has_member_decoration(type.self, i, DecorationBuiltIn))
  3533. {
  3534. if (get_member_decoration(type.self, i, DecorationBuiltIn) == BuiltInCullDistance &&
  3535. !cull_distance_count)
  3536. continue;
  3537. if (get_member_decoration(type.self, i, DecorationBuiltIn) == BuiltInClipDistance &&
  3538. !clip_distance_count)
  3539. continue;
  3540. }
  3541. // We need to build a per-member array first, essentially transposing from AoS to SoA.
  3542. // This code path hits when we have an array of blocks.
  3543. string lut_name;
  3544. if (type_is_array)
  3545. {
  3546. lut_name = join("_", var.self, "_", i, "_init");
  3547. uint32_t member_type_id = get<SPIRType>(var.basetype).member_types[i];
  3548. auto &member_type = get<SPIRType>(member_type_id);
  3549. auto array_type = member_type;
  3550. array_type.parent_type = member_type_id;
  3551. array_type.op = OpTypeArray;
  3552. array_type.array.push_back(array_size);
  3553. array_type.array_size_literal.push_back(true);
  3554. SmallVector<string> exprs;
  3555. exprs.reserve(array_size);
  3556. auto &c = get<SPIRConstant>(var.initializer);
  3557. for (uint32_t j = 0; j < array_size; j++)
  3558. exprs.push_back(to_expression(get<SPIRConstant>(c.subconstants[j]).subconstants[i]));
  3559. statement("const ", type_to_glsl(array_type), " ", lut_name, type_to_array_glsl(array_type, 0), " = ",
  3560. type_to_glsl_constructor(array_type), "(", merge(exprs, ", "), ");");
  3561. }
  3562. for (uint32_t j = 0; j < iteration_count; j++)
  3563. {
  3564. entry_func.fixup_hooks_in.push_back([=, &var]() {
  3565. AccessChainMeta meta;
  3566. auto &c = this->get<SPIRConstant>(var.initializer);
  3567. uint32_t invocation_id = 0;
  3568. uint32_t member_index_id = 0;
  3569. if (is_control_point)
  3570. {
  3571. uint32_t ids = ir.increase_bound_by(3);
  3572. auto &uint_type = set<SPIRType>(ids, OpTypeInt);
  3573. uint_type.basetype = SPIRType::UInt;
  3574. uint_type.width = 32;
  3575. set<SPIRExpression>(ids + 1, builtin_to_glsl(BuiltInInvocationId, StorageClassInput), ids, true);
  3576. set<SPIRConstant>(ids + 2, ids, i, false);
  3577. invocation_id = ids + 1;
  3578. member_index_id = ids + 2;
  3579. }
  3580. if (is_patch)
  3581. {
  3582. statement("if (gl_InvocationID == 0)");
  3583. begin_scope();
  3584. }
  3585. if (type_is_array && !is_control_point)
  3586. {
  3587. uint32_t indices[2] = { j, i };
  3588. auto chain = access_chain_internal(var.self, indices, 2, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta);
  3589. statement(chain, " = ", lut_name, "[", j, "];");
  3590. }
  3591. else if (is_control_point)
  3592. {
  3593. uint32_t indices[2] = { invocation_id, member_index_id };
  3594. auto chain = access_chain_internal(var.self, indices, 2, 0, &meta);
  3595. statement(chain, " = ", lut_name, "[", builtin_to_glsl(BuiltInInvocationId, StorageClassInput), "];");
  3596. }
  3597. else
  3598. {
  3599. auto chain =
  3600. access_chain_internal(var.self, &i, 1, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta);
  3601. statement(chain, " = ", to_expression(c.subconstants[i]), ";");
  3602. }
  3603. if (is_patch)
  3604. end_scope();
  3605. });
  3606. }
  3607. }
  3608. }
  3609. else if (is_control_point)
  3610. {
  3611. auto lut_name = join("_", var.self, "_init");
  3612. statement("const ", type_to_glsl(type), " ", lut_name, type_to_array_glsl(type, 0),
  3613. " = ", to_expression(var.initializer), ";");
  3614. entry_func.fixup_hooks_in.push_back([&, lut_name]() {
  3615. statement(to_expression(var.self), "[gl_InvocationID] = ", lut_name, "[gl_InvocationID];");
  3616. });
  3617. }
  3618. else if (has_decoration(var.self, DecorationBuiltIn) &&
  3619. BuiltIn(get_decoration(var.self, DecorationBuiltIn)) == BuiltInSampleMask)
  3620. {
  3621. // We cannot copy the array since gl_SampleMask is unsized in GLSL. Unroll time! <_<
  3622. entry_func.fixup_hooks_in.push_back([&] {
  3623. auto &c = this->get<SPIRConstant>(var.initializer);
  3624. uint32_t num_constants = uint32_t(c.subconstants.size());
  3625. for (uint32_t i = 0; i < num_constants; i++)
  3626. {
  3627. // Don't use to_expression on constant since it might be uint, just fish out the raw int.
  3628. statement(to_expression(var.self), "[", i, "] = ",
  3629. convert_to_string(this->get<SPIRConstant>(c.subconstants[i]).scalar_i32()), ";");
  3630. }
  3631. });
  3632. }
  3633. else
  3634. {
  3635. auto lut_name = join("_", var.self, "_init");
  3636. statement("const ", type_to_glsl(type), " ", lut_name,
  3637. type_to_array_glsl(type, var.self), " = ", to_expression(var.initializer), ";");
  3638. entry_func.fixup_hooks_in.push_back([&, lut_name, is_patch]() {
  3639. if (is_patch)
  3640. {
  3641. statement("if (gl_InvocationID == 0)");
  3642. begin_scope();
  3643. }
  3644. statement(to_expression(var.self), " = ", lut_name, ";");
  3645. if (is_patch)
  3646. end_scope();
  3647. });
  3648. }
  3649. }
  3650. void CompilerGLSL::emit_subgroup_arithmetic_workaround(const std::string &func, Op op, GroupOperation group_op)
  3651. {
  3652. std::string result;
  3653. switch (group_op)
  3654. {
  3655. case GroupOperationReduce:
  3656. result = "reduction";
  3657. break;
  3658. case GroupOperationExclusiveScan:
  3659. result = "excl_scan";
  3660. break;
  3661. case GroupOperationInclusiveScan:
  3662. result = "incl_scan";
  3663. break;
  3664. default:
  3665. SPIRV_CROSS_THROW("Unsupported workaround for arithmetic group operation");
  3666. }
  3667. struct TypeInfo
  3668. {
  3669. std::string type;
  3670. std::string identity;
  3671. };
  3672. std::vector<TypeInfo> type_infos;
  3673. switch (op)
  3674. {
  3675. case OpGroupNonUniformIAdd:
  3676. {
  3677. type_infos.emplace_back(TypeInfo{ "uint", "0u" });
  3678. type_infos.emplace_back(TypeInfo{ "uvec2", "uvec2(0u)" });
  3679. type_infos.emplace_back(TypeInfo{ "uvec3", "uvec3(0u)" });
  3680. type_infos.emplace_back(TypeInfo{ "uvec4", "uvec4(0u)" });
  3681. type_infos.emplace_back(TypeInfo{ "int", "0" });
  3682. type_infos.emplace_back(TypeInfo{ "ivec2", "ivec2(0)" });
  3683. type_infos.emplace_back(TypeInfo{ "ivec3", "ivec3(0)" });
  3684. type_infos.emplace_back(TypeInfo{ "ivec4", "ivec4(0)" });
  3685. break;
  3686. }
  3687. case OpGroupNonUniformFAdd:
  3688. {
  3689. type_infos.emplace_back(TypeInfo{ "float", "0.0f" });
  3690. type_infos.emplace_back(TypeInfo{ "vec2", "vec2(0.0f)" });
  3691. type_infos.emplace_back(TypeInfo{ "vec3", "vec3(0.0f)" });
  3692. type_infos.emplace_back(TypeInfo{ "vec4", "vec4(0.0f)" });
  3693. // ARB_gpu_shader_fp64 is required in GL4.0 which in turn is required by NV_thread_shuffle
  3694. type_infos.emplace_back(TypeInfo{ "double", "0.0LF" });
  3695. type_infos.emplace_back(TypeInfo{ "dvec2", "dvec2(0.0LF)" });
  3696. type_infos.emplace_back(TypeInfo{ "dvec3", "dvec3(0.0LF)" });
  3697. type_infos.emplace_back(TypeInfo{ "dvec4", "dvec4(0.0LF)" });
  3698. break;
  3699. }
  3700. case OpGroupNonUniformIMul:
  3701. {
  3702. type_infos.emplace_back(TypeInfo{ "uint", "1u" });
  3703. type_infos.emplace_back(TypeInfo{ "uvec2", "uvec2(1u)" });
  3704. type_infos.emplace_back(TypeInfo{ "uvec3", "uvec3(1u)" });
  3705. type_infos.emplace_back(TypeInfo{ "uvec4", "uvec4(1u)" });
  3706. type_infos.emplace_back(TypeInfo{ "int", "1" });
  3707. type_infos.emplace_back(TypeInfo{ "ivec2", "ivec2(1)" });
  3708. type_infos.emplace_back(TypeInfo{ "ivec3", "ivec3(1)" });
  3709. type_infos.emplace_back(TypeInfo{ "ivec4", "ivec4(1)" });
  3710. break;
  3711. }
  3712. case OpGroupNonUniformFMul:
  3713. {
  3714. type_infos.emplace_back(TypeInfo{ "float", "1.0f" });
  3715. type_infos.emplace_back(TypeInfo{ "vec2", "vec2(1.0f)" });
  3716. type_infos.emplace_back(TypeInfo{ "vec3", "vec3(1.0f)" });
  3717. type_infos.emplace_back(TypeInfo{ "vec4", "vec4(1.0f)" });
  3718. type_infos.emplace_back(TypeInfo{ "double", "0.0LF" });
  3719. type_infos.emplace_back(TypeInfo{ "dvec2", "dvec2(1.0LF)" });
  3720. type_infos.emplace_back(TypeInfo{ "dvec3", "dvec3(1.0LF)" });
  3721. type_infos.emplace_back(TypeInfo{ "dvec4", "dvec4(1.0LF)" });
  3722. break;
  3723. }
  3724. default:
  3725. SPIRV_CROSS_THROW("Unsupported workaround for arithmetic group operation");
  3726. }
  3727. const bool op_is_addition = op == OpGroupNonUniformIAdd || op == OpGroupNonUniformFAdd;
  3728. const bool op_is_multiplication = op == OpGroupNonUniformIMul || op == OpGroupNonUniformFMul;
  3729. std::string op_symbol;
  3730. if (op_is_addition)
  3731. {
  3732. op_symbol = "+=";
  3733. }
  3734. else if (op_is_multiplication)
  3735. {
  3736. op_symbol = "*=";
  3737. }
  3738. for (const TypeInfo &t : type_infos)
  3739. {
  3740. statement(t.type, " ", func, "(", t.type, " v)");
  3741. begin_scope();
  3742. statement(t.type, " ", result, " = ", t.identity, ";");
  3743. statement("uvec4 active_threads = subgroupBallot(true);");
  3744. statement("if (subgroupBallotBitCount(active_threads) == gl_SubgroupSize)");
  3745. begin_scope();
  3746. statement("uint total = gl_SubgroupSize / 2u;");
  3747. statement(result, " = v;");
  3748. statement("for (uint i = 1u; i <= total; i <<= 1u)");
  3749. begin_scope();
  3750. statement("bool valid;");
  3751. if (group_op == GroupOperationReduce)
  3752. {
  3753. statement(t.type, " s = shuffleXorNV(", result, ", i, gl_SubgroupSize, valid);");
  3754. }
  3755. else if (group_op == GroupOperationExclusiveScan || group_op == GroupOperationInclusiveScan)
  3756. {
  3757. statement(t.type, " s = shuffleUpNV(", result, ", i, gl_SubgroupSize, valid);");
  3758. }
  3759. if (op_is_addition || op_is_multiplication)
  3760. {
  3761. statement(result, " ", op_symbol, " valid ? s : ", t.identity, ";");
  3762. }
  3763. end_scope();
  3764. if (group_op == GroupOperationExclusiveScan)
  3765. {
  3766. statement(result, " = shuffleUpNV(", result, ", 1u, gl_SubgroupSize);");
  3767. statement("if (subgroupElect())");
  3768. begin_scope();
  3769. statement(result, " = ", t.identity, ";");
  3770. end_scope();
  3771. }
  3772. end_scope();
  3773. statement("else");
  3774. begin_scope();
  3775. if (group_op == GroupOperationExclusiveScan)
  3776. {
  3777. statement("uint total = subgroupBallotBitCount(gl_SubgroupLtMask);");
  3778. }
  3779. else if (group_op == GroupOperationInclusiveScan)
  3780. {
  3781. statement("uint total = subgroupBallotBitCount(gl_SubgroupLeMask);");
  3782. }
  3783. statement("for (uint i = 0u; i < gl_SubgroupSize; ++i)");
  3784. begin_scope();
  3785. statement("bool valid = subgroupBallotBitExtract(active_threads, i);");
  3786. statement(t.type, " s = shuffleNV(v, i, gl_SubgroupSize);");
  3787. if (group_op == GroupOperationExclusiveScan || group_op == GroupOperationInclusiveScan)
  3788. {
  3789. statement("valid = valid && (i < total);");
  3790. }
  3791. if (op_is_addition || op_is_multiplication)
  3792. {
  3793. statement(result, " ", op_symbol, " valid ? s : ", t.identity, ";");
  3794. }
  3795. end_scope();
  3796. end_scope();
  3797. statement("return ", result, ";");
  3798. end_scope();
  3799. }
  3800. }
  3801. void CompilerGLSL::emit_extension_workarounds(ExecutionModel model)
  3802. {
  3803. static const char *workaround_types[] = { "int", "ivec2", "ivec3", "ivec4", "uint", "uvec2", "uvec3", "uvec4",
  3804. "float", "vec2", "vec3", "vec4", "double", "dvec2", "dvec3", "dvec4" };
  3805. if (!options.vulkan_semantics)
  3806. {
  3807. using Supp = ShaderSubgroupSupportHelper;
  3808. auto result = shader_subgroup_supporter.resolve();
  3809. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupMask))
  3810. {
  3811. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupMask, result);
  3812. for (auto &e : exts)
  3813. {
  3814. const char *name = Supp::get_extension_name(e);
  3815. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3816. switch (e)
  3817. {
  3818. case Supp::NV_shader_thread_group:
  3819. statement("#define gl_SubgroupEqMask uvec4(gl_ThreadEqMaskNV, 0u, 0u, 0u)");
  3820. statement("#define gl_SubgroupGeMask uvec4(gl_ThreadGeMaskNV, 0u, 0u, 0u)");
  3821. statement("#define gl_SubgroupGtMask uvec4(gl_ThreadGtMaskNV, 0u, 0u, 0u)");
  3822. statement("#define gl_SubgroupLeMask uvec4(gl_ThreadLeMaskNV, 0u, 0u, 0u)");
  3823. statement("#define gl_SubgroupLtMask uvec4(gl_ThreadLtMaskNV, 0u, 0u, 0u)");
  3824. break;
  3825. case Supp::ARB_shader_ballot:
  3826. statement("#define gl_SubgroupEqMask uvec4(unpackUint2x32(gl_SubGroupEqMaskARB), 0u, 0u)");
  3827. statement("#define gl_SubgroupGeMask uvec4(unpackUint2x32(gl_SubGroupGeMaskARB), 0u, 0u)");
  3828. statement("#define gl_SubgroupGtMask uvec4(unpackUint2x32(gl_SubGroupGtMaskARB), 0u, 0u)");
  3829. statement("#define gl_SubgroupLeMask uvec4(unpackUint2x32(gl_SubGroupLeMaskARB), 0u, 0u)");
  3830. statement("#define gl_SubgroupLtMask uvec4(unpackUint2x32(gl_SubGroupLtMaskARB), 0u, 0u)");
  3831. break;
  3832. default:
  3833. break;
  3834. }
  3835. }
  3836. statement("#endif");
  3837. statement("");
  3838. }
  3839. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupSize))
  3840. {
  3841. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupSize, result);
  3842. for (auto &e : exts)
  3843. {
  3844. const char *name = Supp::get_extension_name(e);
  3845. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3846. switch (e)
  3847. {
  3848. case Supp::NV_shader_thread_group:
  3849. statement("#define gl_SubgroupSize gl_WarpSizeNV");
  3850. break;
  3851. case Supp::ARB_shader_ballot:
  3852. statement("#define gl_SubgroupSize gl_SubGroupSizeARB");
  3853. break;
  3854. case Supp::AMD_gcn_shader:
  3855. statement("#define gl_SubgroupSize uint(gl_SIMDGroupSizeAMD)");
  3856. break;
  3857. default:
  3858. break;
  3859. }
  3860. }
  3861. statement("#endif");
  3862. statement("");
  3863. }
  3864. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupInvocationID))
  3865. {
  3866. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupInvocationID, result);
  3867. for (auto &e : exts)
  3868. {
  3869. const char *name = Supp::get_extension_name(e);
  3870. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3871. switch (e)
  3872. {
  3873. case Supp::NV_shader_thread_group:
  3874. statement("#define gl_SubgroupInvocationID gl_ThreadInWarpNV");
  3875. break;
  3876. case Supp::ARB_shader_ballot:
  3877. statement("#define gl_SubgroupInvocationID gl_SubGroupInvocationARB");
  3878. break;
  3879. default:
  3880. break;
  3881. }
  3882. }
  3883. statement("#endif");
  3884. statement("");
  3885. }
  3886. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupID))
  3887. {
  3888. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupID, result);
  3889. for (auto &e : exts)
  3890. {
  3891. const char *name = Supp::get_extension_name(e);
  3892. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3893. switch (e)
  3894. {
  3895. case Supp::NV_shader_thread_group:
  3896. statement("#define gl_SubgroupID gl_WarpIDNV");
  3897. break;
  3898. default:
  3899. break;
  3900. }
  3901. }
  3902. statement("#endif");
  3903. statement("");
  3904. }
  3905. if (shader_subgroup_supporter.is_feature_requested(Supp::NumSubgroups))
  3906. {
  3907. auto exts = Supp::get_candidates_for_feature(Supp::NumSubgroups, result);
  3908. for (auto &e : exts)
  3909. {
  3910. const char *name = Supp::get_extension_name(e);
  3911. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3912. switch (e)
  3913. {
  3914. case Supp::NV_shader_thread_group:
  3915. statement("#define gl_NumSubgroups gl_WarpsPerSMNV");
  3916. break;
  3917. default:
  3918. break;
  3919. }
  3920. }
  3921. statement("#endif");
  3922. statement("");
  3923. }
  3924. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBroadcast_First))
  3925. {
  3926. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBroadcast_First, result);
  3927. for (auto &e : exts)
  3928. {
  3929. const char *name = Supp::get_extension_name(e);
  3930. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3931. switch (e)
  3932. {
  3933. case Supp::NV_shader_thread_shuffle:
  3934. for (const char *t : workaround_types)
  3935. {
  3936. statement(t, " subgroupBroadcastFirst(", t,
  3937. " value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }");
  3938. }
  3939. for (const char *t : workaround_types)
  3940. {
  3941. statement(t, " subgroupBroadcast(", t,
  3942. " value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }");
  3943. }
  3944. break;
  3945. case Supp::ARB_shader_ballot:
  3946. for (const char *t : workaround_types)
  3947. {
  3948. statement(t, " subgroupBroadcastFirst(", t,
  3949. " value) { return readFirstInvocationARB(value); }");
  3950. }
  3951. for (const char *t : workaround_types)
  3952. {
  3953. statement(t, " subgroupBroadcast(", t,
  3954. " value, uint id) { return readInvocationARB(value, id); }");
  3955. }
  3956. break;
  3957. default:
  3958. break;
  3959. }
  3960. }
  3961. statement("#endif");
  3962. statement("");
  3963. }
  3964. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotFindLSB_MSB))
  3965. {
  3966. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBallotFindLSB_MSB, result);
  3967. for (auto &e : exts)
  3968. {
  3969. const char *name = Supp::get_extension_name(e);
  3970. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3971. switch (e)
  3972. {
  3973. case Supp::NV_shader_thread_group:
  3974. statement("uint subgroupBallotFindLSB(uvec4 value) { return findLSB(value.x); }");
  3975. statement("uint subgroupBallotFindMSB(uvec4 value) { return findMSB(value.x); }");
  3976. break;
  3977. default:
  3978. break;
  3979. }
  3980. }
  3981. statement("#else");
  3982. statement("uint subgroupBallotFindLSB(uvec4 value)");
  3983. begin_scope();
  3984. statement("int firstLive = findLSB(value.x);");
  3985. statement("return uint(firstLive != -1 ? firstLive : (findLSB(value.y) + 32));");
  3986. end_scope();
  3987. statement("uint subgroupBallotFindMSB(uvec4 value)");
  3988. begin_scope();
  3989. statement("int firstLive = findMSB(value.y);");
  3990. statement("return uint(firstLive != -1 ? (firstLive + 32) : findMSB(value.x));");
  3991. end_scope();
  3992. statement("#endif");
  3993. statement("");
  3994. }
  3995. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupAll_Any_AllEqualBool))
  3996. {
  3997. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupAll_Any_AllEqualBool, result);
  3998. for (auto &e : exts)
  3999. {
  4000. const char *name = Supp::get_extension_name(e);
  4001. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  4002. switch (e)
  4003. {
  4004. case Supp::NV_gpu_shader_5:
  4005. statement("bool subgroupAll(bool value) { return allThreadsNV(value); }");
  4006. statement("bool subgroupAny(bool value) { return anyThreadNV(value); }");
  4007. statement("bool subgroupAllEqual(bool value) { return allThreadsEqualNV(value); }");
  4008. break;
  4009. case Supp::ARB_shader_group_vote:
  4010. statement("bool subgroupAll(bool v) { return allInvocationsARB(v); }");
  4011. statement("bool subgroupAny(bool v) { return anyInvocationARB(v); }");
  4012. statement("bool subgroupAllEqual(bool v) { return allInvocationsEqualARB(v); }");
  4013. break;
  4014. case Supp::AMD_gcn_shader:
  4015. statement("bool subgroupAll(bool value) { return ballotAMD(value) == ballotAMD(true); }");
  4016. statement("bool subgroupAny(bool value) { return ballotAMD(value) != 0ull; }");
  4017. statement("bool subgroupAllEqual(bool value) { uint64_t b = ballotAMD(value); return b == 0ull || "
  4018. "b == ballotAMD(true); }");
  4019. break;
  4020. default:
  4021. break;
  4022. }
  4023. }
  4024. statement("#endif");
  4025. statement("");
  4026. }
  4027. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupAllEqualT))
  4028. {
  4029. statement("#ifndef GL_KHR_shader_subgroup_vote");
  4030. statement(
  4031. "#define _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(type) bool subgroupAllEqual(type value) { return "
  4032. "subgroupAllEqual(subgroupBroadcastFirst(value) == value); }");
  4033. for (const char *t : workaround_types)
  4034. statement("_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(", t, ")");
  4035. statement("#undef _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND");
  4036. statement("#endif");
  4037. statement("");
  4038. }
  4039. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallot))
  4040. {
  4041. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBallot, result);
  4042. for (auto &e : exts)
  4043. {
  4044. const char *name = Supp::get_extension_name(e);
  4045. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  4046. switch (e)
  4047. {
  4048. case Supp::NV_shader_thread_group:
  4049. statement("uvec4 subgroupBallot(bool v) { return uvec4(ballotThreadNV(v), 0u, 0u, 0u); }");
  4050. break;
  4051. case Supp::ARB_shader_ballot:
  4052. statement("uvec4 subgroupBallot(bool v) { return uvec4(unpackUint2x32(ballotARB(v)), 0u, 0u); }");
  4053. break;
  4054. default:
  4055. break;
  4056. }
  4057. }
  4058. statement("#endif");
  4059. statement("");
  4060. }
  4061. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupElect))
  4062. {
  4063. statement("#ifndef GL_KHR_shader_subgroup_basic");
  4064. statement("bool subgroupElect()");
  4065. begin_scope();
  4066. statement("uvec4 activeMask = subgroupBallot(true);");
  4067. statement("uint firstLive = subgroupBallotFindLSB(activeMask);");
  4068. statement("return gl_SubgroupInvocationID == firstLive;");
  4069. end_scope();
  4070. statement("#endif");
  4071. statement("");
  4072. }
  4073. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBarrier))
  4074. {
  4075. // Extensions we're using in place of GL_KHR_shader_subgroup_basic state
  4076. // that subgroup execute in lockstep so this barrier is implicit.
  4077. // However the GL 4.6 spec also states that `barrier` implies a shared memory barrier,
  4078. // and a specific test of optimizing scans by leveraging lock-step invocation execution,
  4079. // has shown that a `memoryBarrierShared` is needed in place of a `subgroupBarrier`.
  4080. // https://github.com/buildaworldnet/IrrlichtBAW/commit/d8536857991b89a30a6b65d29441e51b64c2c7ad#diff-9f898d27be1ea6fc79b03d9b361e299334c1a347b6e4dc344ee66110c6aa596aR19
  4081. statement("#ifndef GL_KHR_shader_subgroup_basic");
  4082. statement("void subgroupBarrier() { memoryBarrierShared(); }");
  4083. statement("#endif");
  4084. statement("");
  4085. }
  4086. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupMemBarrier))
  4087. {
  4088. if (model == ExecutionModelGLCompute)
  4089. {
  4090. statement("#ifndef GL_KHR_shader_subgroup_basic");
  4091. statement("void subgroupMemoryBarrier() { groupMemoryBarrier(); }");
  4092. statement("void subgroupMemoryBarrierBuffer() { groupMemoryBarrier(); }");
  4093. statement("void subgroupMemoryBarrierShared() { memoryBarrierShared(); }");
  4094. statement("void subgroupMemoryBarrierImage() { groupMemoryBarrier(); }");
  4095. statement("#endif");
  4096. }
  4097. else
  4098. {
  4099. statement("#ifndef GL_KHR_shader_subgroup_basic");
  4100. statement("void subgroupMemoryBarrier() { memoryBarrier(); }");
  4101. statement("void subgroupMemoryBarrierBuffer() { memoryBarrierBuffer(); }");
  4102. statement("void subgroupMemoryBarrierImage() { memoryBarrierImage(); }");
  4103. statement("#endif");
  4104. }
  4105. statement("");
  4106. }
  4107. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupInverseBallot_InclBitCount_ExclBitCout))
  4108. {
  4109. statement("#ifndef GL_KHR_shader_subgroup_ballot");
  4110. statement("bool subgroupInverseBallot(uvec4 value)");
  4111. begin_scope();
  4112. statement("return any(notEqual(value.xy & gl_SubgroupEqMask.xy, uvec2(0u)));");
  4113. end_scope();
  4114. statement("uint subgroupBallotInclusiveBitCount(uvec4 value)");
  4115. begin_scope();
  4116. statement("uvec2 v = value.xy & gl_SubgroupLeMask.xy;");
  4117. statement("ivec2 c = bitCount(v);");
  4118. statement_no_indent("#ifdef GL_NV_shader_thread_group");
  4119. statement("return uint(c.x);");
  4120. statement_no_indent("#else");
  4121. statement("return uint(c.x + c.y);");
  4122. statement_no_indent("#endif");
  4123. end_scope();
  4124. statement("uint subgroupBallotExclusiveBitCount(uvec4 value)");
  4125. begin_scope();
  4126. statement("uvec2 v = value.xy & gl_SubgroupLtMask.xy;");
  4127. statement("ivec2 c = bitCount(v);");
  4128. statement_no_indent("#ifdef GL_NV_shader_thread_group");
  4129. statement("return uint(c.x);");
  4130. statement_no_indent("#else");
  4131. statement("return uint(c.x + c.y);");
  4132. statement_no_indent("#endif");
  4133. end_scope();
  4134. statement("#endif");
  4135. statement("");
  4136. }
  4137. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotBitCount))
  4138. {
  4139. statement("#ifndef GL_KHR_shader_subgroup_ballot");
  4140. statement("uint subgroupBallotBitCount(uvec4 value)");
  4141. begin_scope();
  4142. statement("ivec2 c = bitCount(value.xy);");
  4143. statement_no_indent("#ifdef GL_NV_shader_thread_group");
  4144. statement("return uint(c.x);");
  4145. statement_no_indent("#else");
  4146. statement("return uint(c.x + c.y);");
  4147. statement_no_indent("#endif");
  4148. end_scope();
  4149. statement("#endif");
  4150. statement("");
  4151. }
  4152. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotBitExtract))
  4153. {
  4154. statement("#ifndef GL_KHR_shader_subgroup_ballot");
  4155. statement("bool subgroupBallotBitExtract(uvec4 value, uint index)");
  4156. begin_scope();
  4157. statement_no_indent("#ifdef GL_NV_shader_thread_group");
  4158. statement("uint shifted = value.x >> index;");
  4159. statement_no_indent("#else");
  4160. statement("uint shifted = value[index >> 5u] >> (index & 0x1fu);");
  4161. statement_no_indent("#endif");
  4162. statement("return (shifted & 1u) != 0u;");
  4163. end_scope();
  4164. statement("#endif");
  4165. statement("");
  4166. }
  4167. auto arithmetic_feature_helper =
  4168. [&](Supp::Feature feat, std::string func_name, Op op, GroupOperation group_op)
  4169. {
  4170. if (shader_subgroup_supporter.is_feature_requested(feat))
  4171. {
  4172. auto exts = Supp::get_candidates_for_feature(feat, result);
  4173. for (auto &e : exts)
  4174. {
  4175. const char *name = Supp::get_extension_name(e);
  4176. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  4177. switch (e)
  4178. {
  4179. case Supp::NV_shader_thread_shuffle:
  4180. emit_subgroup_arithmetic_workaround(func_name, op, group_op);
  4181. break;
  4182. default:
  4183. break;
  4184. }
  4185. }
  4186. statement("#endif");
  4187. statement("");
  4188. }
  4189. };
  4190. arithmetic_feature_helper(Supp::SubgroupArithmeticIAddReduce, "subgroupAdd", OpGroupNonUniformIAdd,
  4191. GroupOperationReduce);
  4192. arithmetic_feature_helper(Supp::SubgroupArithmeticIAddExclusiveScan, "subgroupExclusiveAdd",
  4193. OpGroupNonUniformIAdd, GroupOperationExclusiveScan);
  4194. arithmetic_feature_helper(Supp::SubgroupArithmeticIAddInclusiveScan, "subgroupInclusiveAdd",
  4195. OpGroupNonUniformIAdd, GroupOperationInclusiveScan);
  4196. arithmetic_feature_helper(Supp::SubgroupArithmeticFAddReduce, "subgroupAdd", OpGroupNonUniformFAdd,
  4197. GroupOperationReduce);
  4198. arithmetic_feature_helper(Supp::SubgroupArithmeticFAddExclusiveScan, "subgroupExclusiveAdd",
  4199. OpGroupNonUniformFAdd, GroupOperationExclusiveScan);
  4200. arithmetic_feature_helper(Supp::SubgroupArithmeticFAddInclusiveScan, "subgroupInclusiveAdd",
  4201. OpGroupNonUniformFAdd, GroupOperationInclusiveScan);
  4202. arithmetic_feature_helper(Supp::SubgroupArithmeticIMulReduce, "subgroupMul", OpGroupNonUniformIMul,
  4203. GroupOperationReduce);
  4204. arithmetic_feature_helper(Supp::SubgroupArithmeticIMulExclusiveScan, "subgroupExclusiveMul",
  4205. OpGroupNonUniformIMul, GroupOperationExclusiveScan);
  4206. arithmetic_feature_helper(Supp::SubgroupArithmeticIMulInclusiveScan, "subgroupInclusiveMul",
  4207. OpGroupNonUniformIMul, GroupOperationInclusiveScan);
  4208. arithmetic_feature_helper(Supp::SubgroupArithmeticFMulReduce, "subgroupMul", OpGroupNonUniformFMul,
  4209. GroupOperationReduce);
  4210. arithmetic_feature_helper(Supp::SubgroupArithmeticFMulExclusiveScan, "subgroupExclusiveMul",
  4211. OpGroupNonUniformFMul, GroupOperationExclusiveScan);
  4212. arithmetic_feature_helper(Supp::SubgroupArithmeticFMulInclusiveScan, "subgroupInclusiveMul",
  4213. OpGroupNonUniformFMul, GroupOperationInclusiveScan);
  4214. }
  4215. if (!workaround_ubo_load_overload_types.empty())
  4216. {
  4217. for (auto &type_id : workaround_ubo_load_overload_types)
  4218. {
  4219. auto &type = get<SPIRType>(type_id);
  4220. if (options.es && is_matrix(type))
  4221. {
  4222. // Need both variants.
  4223. // GLSL cannot overload on precision, so need to dispatch appropriately.
  4224. statement("highp ", type_to_glsl(type), " spvWorkaroundRowMajor(highp ", type_to_glsl(type), " wrap) { return wrap; }");
  4225. statement("mediump ", type_to_glsl(type), " spvWorkaroundRowMajorMP(mediump ", type_to_glsl(type), " wrap) { return wrap; }");
  4226. }
  4227. else
  4228. {
  4229. statement(type_to_glsl(type), " spvWorkaroundRowMajor(", type_to_glsl(type), " wrap) { return wrap; }");
  4230. }
  4231. }
  4232. statement("");
  4233. }
  4234. }
  4235. void CompilerGLSL::emit_polyfills(uint32_t polyfills, bool relaxed)
  4236. {
  4237. const char *qual = "";
  4238. const char *suffix = (options.es && relaxed) ? "MP" : "";
  4239. if (options.es)
  4240. qual = relaxed ? "mediump " : "highp ";
  4241. if (polyfills & PolyfillTranspose2x2)
  4242. {
  4243. statement(qual, "mat2 spvTranspose", suffix, "(", qual, "mat2 m)");
  4244. begin_scope();
  4245. statement("return mat2(m[0][0], m[1][0], m[0][1], m[1][1]);");
  4246. end_scope();
  4247. statement("");
  4248. }
  4249. if (polyfills & PolyfillTranspose3x3)
  4250. {
  4251. statement(qual, "mat3 spvTranspose", suffix, "(", qual, "mat3 m)");
  4252. begin_scope();
  4253. statement("return mat3(m[0][0], m[1][0], m[2][0], m[0][1], m[1][1], m[2][1], m[0][2], m[1][2], m[2][2]);");
  4254. end_scope();
  4255. statement("");
  4256. }
  4257. if (polyfills & PolyfillTranspose4x4)
  4258. {
  4259. statement(qual, "mat4 spvTranspose", suffix, "(", qual, "mat4 m)");
  4260. begin_scope();
  4261. statement("return mat4(m[0][0], m[1][0], m[2][0], m[3][0], m[0][1], m[1][1], m[2][1], m[3][1], m[0][2], "
  4262. "m[1][2], m[2][2], m[3][2], m[0][3], m[1][3], m[2][3], m[3][3]);");
  4263. end_scope();
  4264. statement("");
  4265. }
  4266. if (polyfills & PolyfillDeterminant2x2)
  4267. {
  4268. statement(qual, "float spvDeterminant", suffix, "(", qual, "mat2 m)");
  4269. begin_scope();
  4270. statement("return m[0][0] * m[1][1] - m[0][1] * m[1][0];");
  4271. end_scope();
  4272. statement("");
  4273. }
  4274. if (polyfills & PolyfillDeterminant3x3)
  4275. {
  4276. statement(qual, "float spvDeterminant", suffix, "(", qual, "mat3 m)");
  4277. begin_scope();
  4278. statement("return dot(m[0], vec3(m[1][1] * m[2][2] - m[1][2] * m[2][1], "
  4279. "m[1][2] * m[2][0] - m[1][0] * m[2][2], "
  4280. "m[1][0] * m[2][1] - m[1][1] * m[2][0]));");
  4281. end_scope();
  4282. statement("");
  4283. }
  4284. if (polyfills & PolyfillDeterminant4x4)
  4285. {
  4286. statement(qual, "float spvDeterminant", suffix, "(", qual, "mat4 m)");
  4287. begin_scope();
  4288. statement("return dot(m[0], vec4("
  4289. "m[2][1] * m[3][2] * m[1][3] - m[3][1] * m[2][2] * m[1][3] + m[3][1] * m[1][2] * m[2][3] - m[1][1] * m[3][2] * m[2][3] - m[2][1] * m[1][2] * m[3][3] + m[1][1] * m[2][2] * m[3][3], "
  4290. "m[3][0] * m[2][2] * m[1][3] - m[2][0] * m[3][2] * m[1][3] - m[3][0] * m[1][2] * m[2][3] + m[1][0] * m[3][2] * m[2][3] + m[2][0] * m[1][2] * m[3][3] - m[1][0] * m[2][2] * m[3][3], "
  4291. "m[2][0] * m[3][1] * m[1][3] - m[3][0] * m[2][1] * m[1][3] + m[3][0] * m[1][1] * m[2][3] - m[1][0] * m[3][1] * m[2][3] - m[2][0] * m[1][1] * m[3][3] + m[1][0] * m[2][1] * m[3][3], "
  4292. "m[3][0] * m[2][1] * m[1][2] - m[2][0] * m[3][1] * m[1][2] - m[3][0] * m[1][1] * m[2][2] + m[1][0] * m[3][1] * m[2][2] + m[2][0] * m[1][1] * m[3][2] - m[1][0] * m[2][1] * m[3][2]));");
  4293. end_scope();
  4294. statement("");
  4295. }
  4296. if (polyfills & PolyfillMatrixInverse2x2)
  4297. {
  4298. statement(qual, "mat2 spvInverse", suffix, "(", qual, "mat2 m)");
  4299. begin_scope();
  4300. statement("return mat2(m[1][1], -m[0][1], -m[1][0], m[0][0]) "
  4301. "* (1.0 / (m[0][0] * m[1][1] - m[1][0] * m[0][1]));");
  4302. end_scope();
  4303. statement("");
  4304. }
  4305. if (polyfills & PolyfillMatrixInverse3x3)
  4306. {
  4307. statement(qual, "mat3 spvInverse", suffix, "(", qual, "mat3 m)");
  4308. begin_scope();
  4309. statement(qual, "vec3 t = vec3(m[1][1] * m[2][2] - m[1][2] * m[2][1], m[1][2] * m[2][0] - m[1][0] * m[2][2], m[1][0] * m[2][1] - m[1][1] * m[2][0]);");
  4310. statement("return mat3(t[0], "
  4311. "m[0][2] * m[2][1] - m[0][1] * m[2][2], "
  4312. "m[0][1] * m[1][2] - m[0][2] * m[1][1], "
  4313. "t[1], "
  4314. "m[0][0] * m[2][2] - m[0][2] * m[2][0], "
  4315. "m[0][2] * m[1][0] - m[0][0] * m[1][2], "
  4316. "t[2], "
  4317. "m[0][1] * m[2][0] - m[0][0] * m[2][1], "
  4318. "m[0][0] * m[1][1] - m[0][1] * m[1][0]) "
  4319. "* (1.0 / dot(m[0], t));");
  4320. end_scope();
  4321. statement("");
  4322. }
  4323. if (polyfills & PolyfillMatrixInverse4x4)
  4324. {
  4325. statement(qual, "mat4 spvInverse", suffix, "(", qual, "mat4 m)");
  4326. begin_scope();
  4327. statement(qual, "vec4 t = vec4("
  4328. "m[2][1] * m[3][2] * m[1][3] - m[3][1] * m[2][2] * m[1][3] + m[3][1] * m[1][2] * m[2][3] - m[1][1] * m[3][2] * m[2][3] - m[2][1] * m[1][2] * m[3][3] + m[1][1] * m[2][2] * m[3][3], "
  4329. "m[3][0] * m[2][2] * m[1][3] - m[2][0] * m[3][2] * m[1][3] - m[3][0] * m[1][2] * m[2][3] + m[1][0] * m[3][2] * m[2][3] + m[2][0] * m[1][2] * m[3][3] - m[1][0] * m[2][2] * m[3][3], "
  4330. "m[2][0] * m[3][1] * m[1][3] - m[3][0] * m[2][1] * m[1][3] + m[3][0] * m[1][1] * m[2][3] - m[1][0] * m[3][1] * m[2][3] - m[2][0] * m[1][1] * m[3][3] + m[1][0] * m[2][1] * m[3][3], "
  4331. "m[3][0] * m[2][1] * m[1][2] - m[2][0] * m[3][1] * m[1][2] - m[3][0] * m[1][1] * m[2][2] + m[1][0] * m[3][1] * m[2][2] + m[2][0] * m[1][1] * m[3][2] - m[1][0] * m[2][1] * m[3][2]);");
  4332. statement("return mat4("
  4333. "t[0], "
  4334. "m[3][1] * m[2][2] * m[0][3] - m[2][1] * m[3][2] * m[0][3] - m[3][1] * m[0][2] * m[2][3] + m[0][1] * m[3][2] * m[2][3] + m[2][1] * m[0][2] * m[3][3] - m[0][1] * m[2][2] * m[3][3], "
  4335. "m[1][1] * m[3][2] * m[0][3] - m[3][1] * m[1][2] * m[0][3] + m[3][1] * m[0][2] * m[1][3] - m[0][1] * m[3][2] * m[1][3] - m[1][1] * m[0][2] * m[3][3] + m[0][1] * m[1][2] * m[3][3], "
  4336. "m[2][1] * m[1][2] * m[0][3] - m[1][1] * m[2][2] * m[0][3] - m[2][1] * m[0][2] * m[1][3] + m[0][1] * m[2][2] * m[1][3] + m[1][1] * m[0][2] * m[2][3] - m[0][1] * m[1][2] * m[2][3], "
  4337. "t[1], "
  4338. "m[2][0] * m[3][2] * m[0][3] - m[3][0] * m[2][2] * m[0][3] + m[3][0] * m[0][2] * m[2][3] - m[0][0] * m[3][2] * m[2][3] - m[2][0] * m[0][2] * m[3][3] + m[0][0] * m[2][2] * m[3][3], "
  4339. "m[3][0] * m[1][2] * m[0][3] - m[1][0] * m[3][2] * m[0][3] - m[3][0] * m[0][2] * m[1][3] + m[0][0] * m[3][2] * m[1][3] + m[1][0] * m[0][2] * m[3][3] - m[0][0] * m[1][2] * m[3][3], "
  4340. "m[1][0] * m[2][2] * m[0][3] - m[2][0] * m[1][2] * m[0][3] + m[2][0] * m[0][2] * m[1][3] - m[0][0] * m[2][2] * m[1][3] - m[1][0] * m[0][2] * m[2][3] + m[0][0] * m[1][2] * m[2][3], "
  4341. "t[2], "
  4342. "m[3][0] * m[2][1] * m[0][3] - m[2][0] * m[3][1] * m[0][3] - m[3][0] * m[0][1] * m[2][3] + m[0][0] * m[3][1] * m[2][3] + m[2][0] * m[0][1] * m[3][3] - m[0][0] * m[2][1] * m[3][3], "
  4343. "m[1][0] * m[3][1] * m[0][3] - m[3][0] * m[1][1] * m[0][3] + m[3][0] * m[0][1] * m[1][3] - m[0][0] * m[3][1] * m[1][3] - m[1][0] * m[0][1] * m[3][3] + m[0][0] * m[1][1] * m[3][3], "
  4344. "m[2][0] * m[1][1] * m[0][3] - m[1][0] * m[2][1] * m[0][3] - m[2][0] * m[0][1] * m[1][3] + m[0][0] * m[2][1] * m[1][3] + m[1][0] * m[0][1] * m[2][3] - m[0][0] * m[1][1] * m[2][3], "
  4345. "t[3], "
  4346. "m[2][0] * m[3][1] * m[0][2] - m[3][0] * m[2][1] * m[0][2] + m[3][0] * m[0][1] * m[2][2] - m[0][0] * m[3][1] * m[2][2] - m[2][0] * m[0][1] * m[3][2] + m[0][0] * m[2][1] * m[3][2], "
  4347. "m[3][0] * m[1][1] * m[0][2] - m[1][0] * m[3][1] * m[0][2] - m[3][0] * m[0][1] * m[1][2] + m[0][0] * m[3][1] * m[1][2] + m[1][0] * m[0][1] * m[3][2] - m[0][0] * m[1][1] * m[3][2], "
  4348. "m[1][0] * m[2][1] * m[0][2] - m[2][0] * m[1][1] * m[0][2] + m[2][0] * m[0][1] * m[1][2] - m[0][0] * m[2][1] * m[1][2] - m[1][0] * m[0][1] * m[2][2] + m[0][0] * m[1][1] * m[2][2]) "
  4349. "* (1.0 / dot(m[0], t));");
  4350. end_scope();
  4351. statement("");
  4352. }
  4353. if (!relaxed)
  4354. {
  4355. static const Polyfill polys[3][3] = {
  4356. { PolyfillNMin16, PolyfillNMin32, PolyfillNMin64 },
  4357. { PolyfillNMax16, PolyfillNMax32, PolyfillNMax64 },
  4358. { PolyfillNClamp16, PolyfillNClamp32, PolyfillNClamp64 },
  4359. };
  4360. static const GLSLstd450 glsl_ops[] = { GLSLstd450NMin, GLSLstd450NMax, GLSLstd450NClamp };
  4361. static const char *spv_ops[] = { "spvNMin", "spvNMax", "spvNClamp" };
  4362. bool has_poly = false;
  4363. for (uint32_t i = 0; i < 3; i++)
  4364. {
  4365. for (uint32_t j = 0; j < 3; j++)
  4366. {
  4367. if ((polyfills & polys[i][j]) == 0)
  4368. continue;
  4369. const char *types[3][4] = {
  4370. { "float16_t", "f16vec2", "f16vec3", "f16vec4" },
  4371. { "float", "vec2", "vec3", "vec4" },
  4372. { "double", "dvec2", "dvec3", "dvec4" },
  4373. };
  4374. for (uint32_t k = 0; k < 4; k++)
  4375. {
  4376. auto *type = types[j][k];
  4377. if (i < 2)
  4378. {
  4379. statement("spirv_instruction(set = \"GLSL.std.450\", id = ", glsl_ops[i], ") ",
  4380. type, " ", spv_ops[i], "(", type, ", ", type, ");");
  4381. }
  4382. else
  4383. {
  4384. statement("spirv_instruction(set = \"GLSL.std.450\", id = ", glsl_ops[i], ") ",
  4385. type, " ", spv_ops[i], "(", type, ", ", type, ", ", type, ");");
  4386. }
  4387. has_poly = true;
  4388. }
  4389. }
  4390. }
  4391. if (has_poly)
  4392. statement("");
  4393. }
  4394. else
  4395. {
  4396. // Mediump intrinsics don't work correctly, so wrap the intrinsic in an outer shell that ensures mediump
  4397. // propagation.
  4398. static const Polyfill polys[3][3] = {
  4399. { PolyfillNMin16, PolyfillNMin32, PolyfillNMin64 },
  4400. { PolyfillNMax16, PolyfillNMax32, PolyfillNMax64 },
  4401. { PolyfillNClamp16, PolyfillNClamp32, PolyfillNClamp64 },
  4402. };
  4403. static const char *spv_ops[] = { "spvNMin", "spvNMax", "spvNClamp" };
  4404. for (uint32_t i = 0; i < 3; i++)
  4405. {
  4406. for (uint32_t j = 0; j < 3; j++)
  4407. {
  4408. if ((polyfills & polys[i][j]) == 0)
  4409. continue;
  4410. const char *types[3][4] = {
  4411. { "float16_t", "f16vec2", "f16vec3", "f16vec4" },
  4412. { "float", "vec2", "vec3", "vec4" },
  4413. { "double", "dvec2", "dvec3", "dvec4" },
  4414. };
  4415. for (uint32_t k = 0; k < 4; k++)
  4416. {
  4417. auto *type = types[j][k];
  4418. if (i < 2)
  4419. {
  4420. statement("mediump ", type, " ", spv_ops[i], "Relaxed(",
  4421. "mediump ", type, " a, mediump ", type, " b)");
  4422. begin_scope();
  4423. statement("mediump ", type, " res = ", spv_ops[i], "(a, b);");
  4424. statement("return res;");
  4425. end_scope();
  4426. statement("");
  4427. }
  4428. else
  4429. {
  4430. statement("mediump ", type, " ", spv_ops[i], "Relaxed(",
  4431. "mediump ", type, " a, mediump ", type, " b, mediump ", type, " c)");
  4432. begin_scope();
  4433. statement("mediump ", type, " res = ", spv_ops[i], "(a, b, c);");
  4434. statement("return res;");
  4435. end_scope();
  4436. statement("");
  4437. }
  4438. }
  4439. }
  4440. }
  4441. }
  4442. }
  4443. // Returns a string representation of the ID, usable as a function arg.
  4444. // Default is to simply return the expression representation fo the arg ID.
  4445. // Subclasses may override to modify the return value.
  4446. string CompilerGLSL::to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id)
  4447. {
  4448. // BDA expects pointers through function interface.
  4449. if (!arg.alias_global_variable && is_physical_or_buffer_pointer(expression_type(id)))
  4450. return to_pointer_expression(id);
  4451. // Make sure that we use the name of the original variable, and not the parameter alias.
  4452. uint32_t name_id = id;
  4453. auto *var = maybe_get<SPIRVariable>(id);
  4454. if (var && var->basevariable)
  4455. name_id = var->basevariable;
  4456. return to_unpacked_expression(name_id);
  4457. }
  4458. void CompilerGLSL::force_temporary_and_recompile(uint32_t id)
  4459. {
  4460. auto res = forced_temporaries.insert(id);
  4461. // Forcing new temporaries guarantees forward progress.
  4462. if (res.second)
  4463. force_recompile_guarantee_forward_progress();
  4464. else
  4465. force_recompile();
  4466. }
  4467. uint32_t CompilerGLSL::consume_temporary_in_precision_context(uint32_t type_id, uint32_t id, Options::Precision precision)
  4468. {
  4469. // Constants do not have innate precision.
  4470. auto handle_type = ir.ids[id].get_type();
  4471. if (handle_type == TypeConstant || handle_type == TypeConstantOp || handle_type == TypeUndef)
  4472. return id;
  4473. // Ignore anything that isn't 32-bit values.
  4474. auto &type = get<SPIRType>(type_id);
  4475. if (type.pointer)
  4476. return id;
  4477. if (type.basetype != SPIRType::Float && type.basetype != SPIRType::UInt && type.basetype != SPIRType::Int)
  4478. return id;
  4479. if (precision == Options::DontCare)
  4480. {
  4481. // If precision is consumed as don't care (operations only consisting of constants),
  4482. // we need to bind the expression to a temporary,
  4483. // otherwise we have no way of controlling the precision later.
  4484. auto itr = forced_temporaries.insert(id);
  4485. if (itr.second)
  4486. force_recompile_guarantee_forward_progress();
  4487. return id;
  4488. }
  4489. auto current_precision = has_decoration(id, DecorationRelaxedPrecision) ? Options::Mediump : Options::Highp;
  4490. if (current_precision == precision)
  4491. return id;
  4492. auto itr = temporary_to_mirror_precision_alias.find(id);
  4493. if (itr == temporary_to_mirror_precision_alias.end())
  4494. {
  4495. uint32_t alias_id = ir.increase_bound_by(1);
  4496. auto &m = ir.meta[alias_id];
  4497. if (auto *input_m = ir.find_meta(id))
  4498. m = *input_m;
  4499. const char *prefix;
  4500. if (precision == Options::Mediump)
  4501. {
  4502. set_decoration(alias_id, DecorationRelaxedPrecision);
  4503. prefix = "mp_copy_";
  4504. }
  4505. else
  4506. {
  4507. unset_decoration(alias_id, DecorationRelaxedPrecision);
  4508. prefix = "hp_copy_";
  4509. }
  4510. auto alias_name = join(prefix, to_name(id));
  4511. ParsedIR::sanitize_underscores(alias_name);
  4512. set_name(alias_id, alias_name);
  4513. emit_op(type_id, alias_id, to_expression(id), true);
  4514. temporary_to_mirror_precision_alias[id] = alias_id;
  4515. forced_temporaries.insert(id);
  4516. forced_temporaries.insert(alias_id);
  4517. force_recompile_guarantee_forward_progress();
  4518. id = alias_id;
  4519. }
  4520. else
  4521. {
  4522. id = itr->second;
  4523. }
  4524. return id;
  4525. }
  4526. void CompilerGLSL::handle_invalid_expression(uint32_t id)
  4527. {
  4528. // We tried to read an invalidated expression.
  4529. // This means we need another pass at compilation, but next time,
  4530. // force temporary variables so that they cannot be invalidated.
  4531. force_temporary_and_recompile(id);
  4532. // If the invalid expression happened as a result of a CompositeInsert
  4533. // overwrite, we must block this from happening next iteration.
  4534. if (composite_insert_overwritten.count(id))
  4535. block_composite_insert_overwrite.insert(id);
  4536. }
  4537. // Converts the format of the current expression from packed to unpacked,
  4538. // by wrapping the expression in a constructor of the appropriate type.
  4539. // GLSL does not support packed formats, so simply return the expression.
  4540. // Subclasses that do will override.
  4541. string CompilerGLSL::unpack_expression_type(string expr_str, const SPIRType &, uint32_t, bool, bool)
  4542. {
  4543. return expr_str;
  4544. }
  4545. // Sometimes we proactively enclosed an expression where it turns out we might have not needed it after all.
  4546. void CompilerGLSL::strip_enclosed_expression(string &expr)
  4547. {
  4548. if (expr.size() < 2 || expr.front() != '(' || expr.back() != ')')
  4549. return;
  4550. // Have to make sure that our first and last parens actually enclose everything inside it.
  4551. uint32_t paren_count = 0;
  4552. for (auto &c : expr)
  4553. {
  4554. if (c == '(')
  4555. paren_count++;
  4556. else if (c == ')')
  4557. {
  4558. paren_count--;
  4559. // If we hit 0 and this is not the final char, our first and final parens actually don't
  4560. // enclose the expression, and we cannot strip, e.g.: (a + b) * (c + d).
  4561. if (paren_count == 0 && &c != &expr.back())
  4562. return;
  4563. }
  4564. }
  4565. expr.erase(expr.size() - 1, 1);
  4566. expr.erase(begin(expr));
  4567. }
  4568. bool CompilerGLSL::needs_enclose_expression(const std::string &expr)
  4569. {
  4570. bool need_parens = false;
  4571. // If the expression starts with a unary we need to enclose to deal with cases where we have back-to-back
  4572. // unary expressions.
  4573. if (!expr.empty())
  4574. {
  4575. auto c = expr.front();
  4576. if (c == '-' || c == '+' || c == '!' || c == '~' || c == '&' || c == '*')
  4577. need_parens = true;
  4578. }
  4579. if (!need_parens)
  4580. {
  4581. uint32_t paren_count = 0;
  4582. for (auto c : expr)
  4583. {
  4584. if (c == '(' || c == '[')
  4585. paren_count++;
  4586. else if (c == ')' || c == ']')
  4587. {
  4588. assert(paren_count);
  4589. paren_count--;
  4590. }
  4591. else if (c == ' ' && paren_count == 0)
  4592. {
  4593. need_parens = true;
  4594. break;
  4595. }
  4596. }
  4597. assert(paren_count == 0);
  4598. }
  4599. return need_parens;
  4600. }
  4601. string CompilerGLSL::enclose_expression(const string &expr)
  4602. {
  4603. // If this expression contains any spaces which are not enclosed by parentheses,
  4604. // we need to enclose it so we can treat the whole string as an expression.
  4605. // This happens when two expressions have been part of a binary op earlier.
  4606. if (needs_enclose_expression(expr))
  4607. return join('(', expr, ')');
  4608. else
  4609. return expr;
  4610. }
  4611. string CompilerGLSL::dereference_expression(const SPIRType &expr_type, const std::string &expr)
  4612. {
  4613. // If this expression starts with an address-of operator ('&'), then
  4614. // just return the part after the operator.
  4615. // TODO: Strip parens if unnecessary?
  4616. if (expr.front() == '&')
  4617. return expr.substr(1);
  4618. else if (backend.native_pointers)
  4619. return join('*', expr);
  4620. else if (is_physical_pointer(expr_type) && !is_physical_pointer_to_buffer_block(expr_type))
  4621. return join(enclose_expression(expr), ".value");
  4622. else
  4623. return expr;
  4624. }
  4625. string CompilerGLSL::address_of_expression(const std::string &expr)
  4626. {
  4627. if (expr.size() > 3 && expr[0] == '(' && expr[1] == '*' && expr.back() == ')')
  4628. {
  4629. // If we have an expression which looks like (*foo), taking the address of it is the same as stripping
  4630. // the first two and last characters. We might have to enclose the expression.
  4631. // This doesn't work for cases like (*foo + 10),
  4632. // but this is an r-value expression which we cannot take the address of anyways.
  4633. return enclose_expression(expr.substr(2, expr.size() - 3));
  4634. }
  4635. else if (expr.front() == '*')
  4636. {
  4637. // If this expression starts with a dereference operator ('*'), then
  4638. // just return the part after the operator.
  4639. return expr.substr(1);
  4640. }
  4641. else
  4642. return join('&', enclose_expression(expr));
  4643. }
  4644. // Just like to_expression except that we enclose the expression inside parentheses if needed.
  4645. string CompilerGLSL::to_enclosed_expression(uint32_t id, bool register_expression_read)
  4646. {
  4647. return enclose_expression(to_expression(id, register_expression_read));
  4648. }
  4649. // Used explicitly when we want to read a row-major expression, but without any transpose shenanigans.
  4650. // need_transpose must be forced to false.
  4651. string CompilerGLSL::to_unpacked_row_major_matrix_expression(uint32_t id)
  4652. {
  4653. return unpack_expression_type(to_expression(id), expression_type(id),
  4654. get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
  4655. has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), true);
  4656. }
  4657. string CompilerGLSL::to_unpacked_expression(uint32_t id, bool register_expression_read)
  4658. {
  4659. // If we need to transpose, it will also take care of unpacking rules.
  4660. auto *e = maybe_get<SPIRExpression>(id);
  4661. bool need_transpose = e && e->need_transpose;
  4662. bool is_remapped = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
  4663. bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
  4664. if (!need_transpose && (is_remapped || is_packed))
  4665. {
  4666. return unpack_expression_type(to_expression(id, register_expression_read),
  4667. get_pointee_type(expression_type_id(id)),
  4668. get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
  4669. has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), false);
  4670. }
  4671. else
  4672. return to_expression(id, register_expression_read);
  4673. }
  4674. string CompilerGLSL::to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read)
  4675. {
  4676. return enclose_expression(to_unpacked_expression(id, register_expression_read));
  4677. }
  4678. string CompilerGLSL::to_dereferenced_expression(uint32_t id, bool register_expression_read)
  4679. {
  4680. auto &type = expression_type(id);
  4681. if (is_pointer(type) && should_dereference(id))
  4682. return dereference_expression(type, to_enclosed_expression(id, register_expression_read));
  4683. else
  4684. return to_expression(id, register_expression_read);
  4685. }
  4686. string CompilerGLSL::to_pointer_expression(uint32_t id, bool register_expression_read)
  4687. {
  4688. auto &type = expression_type(id);
  4689. if (is_pointer(type) && expression_is_lvalue(id) && !should_dereference(id))
  4690. return address_of_expression(to_enclosed_expression(id, register_expression_read));
  4691. else
  4692. return to_unpacked_expression(id, register_expression_read);
  4693. }
  4694. string CompilerGLSL::to_enclosed_pointer_expression(uint32_t id, bool register_expression_read)
  4695. {
  4696. auto &type = expression_type(id);
  4697. if (is_pointer(type) && expression_is_lvalue(id) && !should_dereference(id))
  4698. return address_of_expression(to_enclosed_expression(id, register_expression_read));
  4699. else
  4700. return to_enclosed_unpacked_expression(id, register_expression_read);
  4701. }
  4702. string CompilerGLSL::to_extract_component_expression(uint32_t id, uint32_t index)
  4703. {
  4704. auto expr = to_enclosed_expression(id);
  4705. if (has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked))
  4706. return join(expr, "[", index, "]");
  4707. else
  4708. return join(expr, ".", index_to_swizzle(index));
  4709. }
  4710. string CompilerGLSL::to_extract_constant_composite_expression(uint32_t result_type, const SPIRConstant &c,
  4711. const uint32_t *chain, uint32_t length)
  4712. {
  4713. // It is kinda silly if application actually enter this path since they know the constant up front.
  4714. // It is useful here to extract the plain constant directly.
  4715. SPIRConstant tmp;
  4716. tmp.constant_type = result_type;
  4717. auto &composite_type = get<SPIRType>(c.constant_type);
  4718. assert(composite_type.basetype != SPIRType::Struct && composite_type.array.empty());
  4719. assert(!c.specialization);
  4720. if (is_matrix(composite_type))
  4721. {
  4722. if (length == 2)
  4723. {
  4724. tmp.m.c[0].vecsize = 1;
  4725. tmp.m.columns = 1;
  4726. tmp.m.c[0].r[0] = c.m.c[chain[0]].r[chain[1]];
  4727. }
  4728. else
  4729. {
  4730. assert(length == 1);
  4731. tmp.m.c[0].vecsize = composite_type.vecsize;
  4732. tmp.m.columns = 1;
  4733. tmp.m.c[0] = c.m.c[chain[0]];
  4734. }
  4735. }
  4736. else
  4737. {
  4738. assert(length == 1);
  4739. tmp.m.c[0].vecsize = 1;
  4740. tmp.m.columns = 1;
  4741. tmp.m.c[0].r[0] = c.m.c[0].r[chain[0]];
  4742. }
  4743. return constant_expression(tmp);
  4744. }
  4745. string CompilerGLSL::to_rerolled_array_expression(const SPIRType &parent_type,
  4746. const string &base_expr, const SPIRType &type)
  4747. {
  4748. bool remapped_boolean = parent_type.basetype == SPIRType::Struct &&
  4749. type.basetype == SPIRType::Boolean &&
  4750. backend.boolean_in_struct_remapped_type != SPIRType::Boolean;
  4751. SPIRType tmp_type { OpNop };
  4752. if (remapped_boolean)
  4753. {
  4754. tmp_type = get<SPIRType>(type.parent_type);
  4755. tmp_type.basetype = backend.boolean_in_struct_remapped_type;
  4756. }
  4757. else if (type.basetype == SPIRType::Boolean && backend.boolean_in_struct_remapped_type != SPIRType::Boolean)
  4758. {
  4759. // It's possible that we have an r-value expression that was OpLoaded from a struct.
  4760. // We have to reroll this and explicitly cast the input to bool, because the r-value is short.
  4761. tmp_type = get<SPIRType>(type.parent_type);
  4762. remapped_boolean = true;
  4763. }
  4764. uint32_t size = to_array_size_literal(type);
  4765. auto &parent = get<SPIRType>(type.parent_type);
  4766. string expr = "{ ";
  4767. for (uint32_t i = 0; i < size; i++)
  4768. {
  4769. auto subexpr = join(base_expr, "[", convert_to_string(i), "]");
  4770. if (!is_array(parent))
  4771. {
  4772. if (remapped_boolean)
  4773. subexpr = join(type_to_glsl(tmp_type), "(", subexpr, ")");
  4774. expr += subexpr;
  4775. }
  4776. else
  4777. expr += to_rerolled_array_expression(parent_type, subexpr, parent);
  4778. if (i + 1 < size)
  4779. expr += ", ";
  4780. }
  4781. expr += " }";
  4782. return expr;
  4783. }
  4784. string CompilerGLSL::to_composite_constructor_expression(const SPIRType &parent_type, uint32_t id, bool block_like_type)
  4785. {
  4786. auto &type = expression_type(id);
  4787. bool reroll_array = false;
  4788. bool remapped_boolean = parent_type.basetype == SPIRType::Struct &&
  4789. type.basetype == SPIRType::Boolean &&
  4790. backend.boolean_in_struct_remapped_type != SPIRType::Boolean;
  4791. if (is_array(type))
  4792. {
  4793. reroll_array = !backend.array_is_value_type ||
  4794. (block_like_type && !backend.array_is_value_type_in_buffer_blocks);
  4795. if (remapped_boolean)
  4796. {
  4797. // Forced to reroll if we have to change bool[] to short[].
  4798. reroll_array = true;
  4799. }
  4800. }
  4801. if (reroll_array)
  4802. {
  4803. // For this case, we need to "re-roll" an array initializer from a temporary.
  4804. // We cannot simply pass the array directly, since it decays to a pointer and it cannot
  4805. // participate in a struct initializer. E.g.
  4806. // float arr[2] = { 1.0, 2.0 };
  4807. // Foo foo = { arr }; must be transformed to
  4808. // Foo foo = { { arr[0], arr[1] } };
  4809. // The array sizes cannot be deduced from specialization constants since we cannot use any loops.
  4810. // We're only triggering one read of the array expression, but this is fine since arrays have to be declared
  4811. // as temporaries anyways.
  4812. return to_rerolled_array_expression(parent_type, to_enclosed_expression(id), type);
  4813. }
  4814. else
  4815. {
  4816. auto expr = to_unpacked_expression(id);
  4817. if (remapped_boolean)
  4818. {
  4819. auto tmp_type = type;
  4820. tmp_type.basetype = backend.boolean_in_struct_remapped_type;
  4821. expr = join(type_to_glsl(tmp_type), "(", expr, ")");
  4822. }
  4823. return expr;
  4824. }
  4825. }
  4826. string CompilerGLSL::to_non_uniform_aware_expression(uint32_t id)
  4827. {
  4828. string expr = to_expression(id);
  4829. if (has_decoration(id, DecorationNonUniform))
  4830. convert_non_uniform_expression(expr, id);
  4831. return expr;
  4832. }
  4833. string CompilerGLSL::to_atomic_ptr_expression(uint32_t id)
  4834. {
  4835. string expr = to_non_uniform_aware_expression(id);
  4836. // If we have naked pointer to POD, we need to dereference to get the proper ".value" resolve.
  4837. if (should_dereference(id))
  4838. expr = dereference_expression(expression_type(id), expr);
  4839. return expr;
  4840. }
  4841. string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read)
  4842. {
  4843. auto itr = invalid_expressions.find(id);
  4844. if (itr != end(invalid_expressions))
  4845. handle_invalid_expression(id);
  4846. if (ir.ids[id].get_type() == TypeExpression)
  4847. {
  4848. // We might have a more complex chain of dependencies.
  4849. // A possible scenario is that we
  4850. //
  4851. // %1 = OpLoad
  4852. // %2 = OpDoSomething %1 %1. here %2 will have a dependency on %1.
  4853. // %3 = OpDoSomethingAgain %2 %2. Here %3 will lose the link to %1 since we don't propagate the dependencies like that.
  4854. // OpStore %1 %foo // Here we can invalidate %1, and hence all expressions which depend on %1. Only %2 will know since it's part of invalid_expressions.
  4855. // %4 = OpDoSomethingAnotherTime %3 %3 // If we forward all expressions we will see %1 expression after store, not before.
  4856. //
  4857. // However, we can propagate up a list of depended expressions when we used %2, so we can check if %2 is invalid when reading %3 after the store,
  4858. // and see that we should not forward reads of the original variable.
  4859. auto &expr = get<SPIRExpression>(id);
  4860. for (uint32_t dep : expr.expression_dependencies)
  4861. if (invalid_expressions.find(dep) != end(invalid_expressions))
  4862. handle_invalid_expression(dep);
  4863. }
  4864. if (register_expression_read)
  4865. track_expression_read(id);
  4866. switch (ir.ids[id].get_type())
  4867. {
  4868. case TypeExpression:
  4869. {
  4870. auto &e = get<SPIRExpression>(id);
  4871. if (e.base_expression)
  4872. return to_enclosed_expression(e.base_expression) + e.expression;
  4873. else if (e.need_transpose)
  4874. {
  4875. // This should not be reached for access chains, since we always deal explicitly with transpose state
  4876. // when consuming an access chain expression.
  4877. uint32_t physical_type_id = get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
  4878. bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
  4879. bool relaxed = has_decoration(id, DecorationRelaxedPrecision);
  4880. return convert_row_major_matrix(e.expression, get<SPIRType>(e.expression_type), physical_type_id,
  4881. is_packed, relaxed);
  4882. }
  4883. else if (flattened_structs.count(id))
  4884. {
  4885. return load_flattened_struct(e.expression, get<SPIRType>(e.expression_type));
  4886. }
  4887. else
  4888. {
  4889. if (is_forcing_recompilation())
  4890. {
  4891. // During first compilation phase, certain expression patterns can trigger exponential growth of memory.
  4892. // Avoid this by returning dummy expressions during this phase.
  4893. // Do not use empty expressions here, because those are sentinels for other cases.
  4894. return "_";
  4895. }
  4896. else
  4897. return e.expression;
  4898. }
  4899. }
  4900. case TypeConstant:
  4901. {
  4902. auto &c = get<SPIRConstant>(id);
  4903. auto &type = get<SPIRType>(c.constant_type);
  4904. // WorkGroupSize may be a constant.
  4905. if (has_decoration(c.self, DecorationBuiltIn))
  4906. return builtin_to_glsl(BuiltIn(get_decoration(c.self, DecorationBuiltIn)), StorageClassGeneric);
  4907. else if (c.specialization)
  4908. {
  4909. if (backend.workgroup_size_is_hidden)
  4910. {
  4911. int wg_index = get_constant_mapping_to_workgroup_component(c);
  4912. if (wg_index >= 0)
  4913. {
  4914. auto wg_size = join(builtin_to_glsl(BuiltInWorkgroupSize, StorageClassInput), vector_swizzle(1, wg_index));
  4915. if (type.basetype != SPIRType::UInt)
  4916. wg_size = bitcast_expression(type, SPIRType::UInt, wg_size);
  4917. return wg_size;
  4918. }
  4919. }
  4920. if (expression_is_forwarded(id))
  4921. return constant_expression(c);
  4922. return to_name(id);
  4923. }
  4924. else if (c.is_used_as_lut)
  4925. return to_name(id);
  4926. else if (type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline)
  4927. return to_name(id);
  4928. else if (!type.array.empty() && !backend.can_declare_arrays_inline)
  4929. return to_name(id);
  4930. else
  4931. return constant_expression(c);
  4932. }
  4933. case TypeConstantOp:
  4934. return to_name(id);
  4935. case TypeVariable:
  4936. {
  4937. auto &var = get<SPIRVariable>(id);
  4938. // If we try to use a loop variable before the loop header, we have to redirect it to the static expression,
  4939. // the variable has not been declared yet.
  4940. if (var.statically_assigned || (var.loop_variable && !var.loop_variable_enable))
  4941. {
  4942. // We might try to load from a loop variable before it has been initialized.
  4943. // Prefer static expression and fallback to initializer.
  4944. if (var.static_expression)
  4945. return to_expression(var.static_expression);
  4946. else if (var.initializer)
  4947. return to_expression(var.initializer);
  4948. else
  4949. {
  4950. // We cannot declare the variable yet, so have to fake it.
  4951. uint32_t undef_id = ir.increase_bound_by(1);
  4952. return emit_uninitialized_temporary_expression(get_variable_data_type_id(var), undef_id).expression;
  4953. }
  4954. }
  4955. else if (var.deferred_declaration)
  4956. {
  4957. var.deferred_declaration = false;
  4958. return variable_decl(var);
  4959. }
  4960. else if (flattened_structs.count(id))
  4961. {
  4962. return load_flattened_struct(to_name(id), get<SPIRType>(var.basetype));
  4963. }
  4964. else
  4965. {
  4966. auto &dec = ir.meta[var.self].decoration;
  4967. if (dec.builtin)
  4968. return builtin_to_glsl(dec.builtin_type, var.storage);
  4969. else
  4970. return to_name(id);
  4971. }
  4972. }
  4973. case TypeCombinedImageSampler:
  4974. // This type should never be taken the expression of directly.
  4975. // The intention is that texture sampling functions will extract the image and samplers
  4976. // separately and take their expressions as needed.
  4977. // GLSL does not use this type because OpSampledImage immediately creates a combined image sampler
  4978. // expression ala sampler2D(texture, sampler).
  4979. SPIRV_CROSS_THROW("Combined image samplers have no default expression representation.");
  4980. case TypeAccessChain:
  4981. // We cannot express this type. They only have meaning in other OpAccessChains, OpStore or OpLoad.
  4982. SPIRV_CROSS_THROW("Access chains have no default expression representation.");
  4983. default:
  4984. return to_name(id);
  4985. }
  4986. }
  4987. SmallVector<ConstantID> CompilerGLSL::get_composite_constant_ids(ConstantID const_id)
  4988. {
  4989. if (auto *constant = maybe_get<SPIRConstant>(const_id))
  4990. {
  4991. const auto &type = get<SPIRType>(constant->constant_type);
  4992. if (is_array(type) || type.basetype == SPIRType::Struct)
  4993. return constant->subconstants;
  4994. if (is_matrix(type))
  4995. return SmallVector<ConstantID>(constant->m.id);
  4996. if (is_vector(type))
  4997. return SmallVector<ConstantID>(constant->m.c[0].id);
  4998. SPIRV_CROSS_THROW("Unexpected scalar constant!");
  4999. }
  5000. if (!const_composite_insert_ids.count(const_id))
  5001. SPIRV_CROSS_THROW("Unimplemented for this OpSpecConstantOp!");
  5002. return const_composite_insert_ids[const_id];
  5003. }
  5004. void CompilerGLSL::fill_composite_constant(SPIRConstant &constant, TypeID type_id,
  5005. const SmallVector<ConstantID> &initializers)
  5006. {
  5007. auto &type = get<SPIRType>(type_id);
  5008. constant.specialization = true;
  5009. if (is_array(type) || type.basetype == SPIRType::Struct)
  5010. {
  5011. constant.subconstants = initializers;
  5012. }
  5013. else if (is_matrix(type))
  5014. {
  5015. constant.m.columns = type.columns;
  5016. for (uint32_t i = 0; i < type.columns; ++i)
  5017. {
  5018. constant.m.id[i] = initializers[i];
  5019. constant.m.c[i].vecsize = type.vecsize;
  5020. }
  5021. }
  5022. else if (is_vector(type))
  5023. {
  5024. constant.m.c[0].vecsize = type.vecsize;
  5025. for (uint32_t i = 0; i < type.vecsize; ++i)
  5026. constant.m.c[0].id[i] = initializers[i];
  5027. }
  5028. else
  5029. SPIRV_CROSS_THROW("Unexpected scalar in SpecConstantOp CompositeInsert!");
  5030. }
  5031. void CompilerGLSL::set_composite_constant(ConstantID const_id, TypeID type_id,
  5032. const SmallVector<ConstantID> &initializers)
  5033. {
  5034. if (maybe_get<SPIRConstantOp>(const_id))
  5035. {
  5036. const_composite_insert_ids[const_id] = initializers;
  5037. return;
  5038. }
  5039. auto &constant = set<SPIRConstant>(const_id, type_id);
  5040. fill_composite_constant(constant, type_id, initializers);
  5041. forwarded_temporaries.insert(const_id);
  5042. }
  5043. TypeID CompilerGLSL::get_composite_member_type(TypeID type_id, uint32_t member_idx)
  5044. {
  5045. auto &type = get<SPIRType>(type_id);
  5046. if (is_array(type))
  5047. return type.parent_type;
  5048. if (type.basetype == SPIRType::Struct)
  5049. return type.member_types[member_idx];
  5050. if (is_matrix(type))
  5051. return type.parent_type;
  5052. if (is_vector(type))
  5053. return type.parent_type;
  5054. SPIRV_CROSS_THROW("Shouldn't reach lower than vector handling OpSpecConstantOp CompositeInsert!");
  5055. }
  5056. string CompilerGLSL::constant_op_expression(const SPIRConstantOp &cop)
  5057. {
  5058. auto &type = get<SPIRType>(cop.basetype);
  5059. bool binary = false;
  5060. bool unary = false;
  5061. string op;
  5062. if (is_legacy() && is_unsigned_opcode(cop.opcode))
  5063. SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets.");
  5064. // TODO: Find a clean way to reuse emit_instruction.
  5065. switch (cop.opcode)
  5066. {
  5067. case OpSConvert:
  5068. case OpUConvert:
  5069. case OpFConvert:
  5070. op = type_to_glsl_constructor(type);
  5071. break;
  5072. #define GLSL_BOP(opname, x) \
  5073. case Op##opname: \
  5074. binary = true; \
  5075. op = x; \
  5076. break
  5077. #define GLSL_UOP(opname, x) \
  5078. case Op##opname: \
  5079. unary = true; \
  5080. op = x; \
  5081. break
  5082. GLSL_UOP(SNegate, "-");
  5083. GLSL_UOP(Not, "~");
  5084. GLSL_BOP(IAdd, "+");
  5085. GLSL_BOP(ISub, "-");
  5086. GLSL_BOP(IMul, "*");
  5087. GLSL_BOP(SDiv, "/");
  5088. GLSL_BOP(UDiv, "/");
  5089. GLSL_BOP(UMod, "%");
  5090. GLSL_BOP(SMod, "%");
  5091. GLSL_BOP(ShiftRightLogical, ">>");
  5092. GLSL_BOP(ShiftRightArithmetic, ">>");
  5093. GLSL_BOP(ShiftLeftLogical, "<<");
  5094. GLSL_BOP(BitwiseOr, "|");
  5095. GLSL_BOP(BitwiseXor, "^");
  5096. GLSL_BOP(BitwiseAnd, "&");
  5097. GLSL_BOP(LogicalOr, "||");
  5098. GLSL_BOP(LogicalAnd, "&&");
  5099. GLSL_UOP(LogicalNot, "!");
  5100. GLSL_BOP(LogicalEqual, "==");
  5101. GLSL_BOP(LogicalNotEqual, "!=");
  5102. GLSL_BOP(IEqual, "==");
  5103. GLSL_BOP(INotEqual, "!=");
  5104. GLSL_BOP(ULessThan, "<");
  5105. GLSL_BOP(SLessThan, "<");
  5106. GLSL_BOP(ULessThanEqual, "<=");
  5107. GLSL_BOP(SLessThanEqual, "<=");
  5108. GLSL_BOP(UGreaterThan, ">");
  5109. GLSL_BOP(SGreaterThan, ">");
  5110. GLSL_BOP(UGreaterThanEqual, ">=");
  5111. GLSL_BOP(SGreaterThanEqual, ">=");
  5112. case OpSRem:
  5113. {
  5114. uint32_t op0 = cop.arguments[0];
  5115. uint32_t op1 = cop.arguments[1];
  5116. return join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "(",
  5117. to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")");
  5118. }
  5119. case OpSelect:
  5120. {
  5121. if (cop.arguments.size() < 3)
  5122. SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
  5123. // This one is pretty annoying. It's triggered from
  5124. // uint(bool), int(bool) from spec constants.
  5125. // In order to preserve its compile-time constness in Vulkan GLSL,
  5126. // we need to reduce the OpSelect expression back to this simplified model.
  5127. // If we cannot, fail.
  5128. if (to_trivial_mix_op(type, op, cop.arguments[2], cop.arguments[1], cop.arguments[0]))
  5129. {
  5130. // Implement as a simple cast down below.
  5131. }
  5132. else
  5133. {
  5134. // Implement a ternary and pray the compiler understands it :)
  5135. return to_ternary_expression(type, cop.arguments[0], cop.arguments[1], cop.arguments[2]);
  5136. }
  5137. break;
  5138. }
  5139. case OpVectorShuffle:
  5140. {
  5141. string expr = type_to_glsl_constructor(type);
  5142. expr += "(";
  5143. uint32_t left_components = expression_type(cop.arguments[0]).vecsize;
  5144. string left_arg = to_enclosed_expression(cop.arguments[0]);
  5145. string right_arg = to_enclosed_expression(cop.arguments[1]);
  5146. for (uint32_t i = 2; i < uint32_t(cop.arguments.size()); i++)
  5147. {
  5148. uint32_t index = cop.arguments[i];
  5149. if (index == 0xFFFFFFFF)
  5150. {
  5151. SPIRConstant c;
  5152. c.constant_type = type.parent_type;
  5153. assert(type.parent_type != ID(0));
  5154. expr += constant_expression(c);
  5155. }
  5156. else if (index >= left_components)
  5157. {
  5158. expr += right_arg + "." + "xyzw"[index - left_components];
  5159. }
  5160. else
  5161. {
  5162. expr += left_arg + "." + "xyzw"[index];
  5163. }
  5164. if (i + 1 < uint32_t(cop.arguments.size()))
  5165. expr += ", ";
  5166. }
  5167. expr += ")";
  5168. return expr;
  5169. }
  5170. case OpCompositeExtract:
  5171. {
  5172. // Trivial vector extracts (of WorkGroupSize typically),
  5173. // punch through to the input spec constant if the composite is used as array size.
  5174. const auto *c = maybe_get<SPIRConstant>(cop.arguments[0]);
  5175. string expr;
  5176. if (c && cop.arguments.size() == 2 && c->is_used_as_array_length &&
  5177. !backend.supports_spec_constant_array_size &&
  5178. is_vector(get<SPIRType>(c->constant_type)))
  5179. {
  5180. expr = to_expression(c->specialization_constant_id(0, cop.arguments[1]));
  5181. }
  5182. else
  5183. {
  5184. expr = access_chain_internal(cop.arguments[0], &cop.arguments[1], uint32_t(cop.arguments.size() - 1),
  5185. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
  5186. }
  5187. return expr;
  5188. }
  5189. case OpCompositeInsert:
  5190. {
  5191. SmallVector<ConstantID> new_init = get_composite_constant_ids(cop.arguments[1]);
  5192. uint32_t idx;
  5193. uint32_t target_id = cop.self;
  5194. uint32_t target_type_id = cop.basetype;
  5195. // We have to drill down to the part we want to modify, and create new
  5196. // constants for each containing part.
  5197. for (idx = 2; idx < cop.arguments.size() - 1; ++idx)
  5198. {
  5199. uint32_t new_const = ir.increase_bound_by(1);
  5200. uint32_t old_const = new_init[cop.arguments[idx]];
  5201. new_init[cop.arguments[idx]] = new_const;
  5202. set_composite_constant(target_id, target_type_id, new_init);
  5203. new_init = get_composite_constant_ids(old_const);
  5204. target_id = new_const;
  5205. target_type_id = get_composite_member_type(target_type_id, cop.arguments[idx]);
  5206. }
  5207. // Now replace the initializer with the one from this instruction.
  5208. new_init[cop.arguments[idx]] = cop.arguments[0];
  5209. set_composite_constant(target_id, target_type_id, new_init);
  5210. SPIRConstant tmp_const(cop.basetype);
  5211. fill_composite_constant(tmp_const, cop.basetype, const_composite_insert_ids[cop.self]);
  5212. return constant_expression(tmp_const);
  5213. }
  5214. default:
  5215. // Some opcodes are unimplemented here, these are currently not possible to test from glslang.
  5216. SPIRV_CROSS_THROW("Unimplemented spec constant op.");
  5217. }
  5218. uint32_t bit_width = 0;
  5219. if (unary || binary || cop.opcode == OpSConvert || cop.opcode == OpUConvert)
  5220. bit_width = expression_type(cop.arguments[0]).width;
  5221. SPIRType::BaseType input_type;
  5222. bool skip_cast_if_equal_type = opcode_is_sign_invariant(cop.opcode);
  5223. switch (cop.opcode)
  5224. {
  5225. case OpIEqual:
  5226. case OpINotEqual:
  5227. input_type = to_signed_basetype(bit_width);
  5228. break;
  5229. case OpSLessThan:
  5230. case OpSLessThanEqual:
  5231. case OpSGreaterThan:
  5232. case OpSGreaterThanEqual:
  5233. case OpSMod:
  5234. case OpSDiv:
  5235. case OpShiftRightArithmetic:
  5236. case OpSConvert:
  5237. case OpSNegate:
  5238. input_type = to_signed_basetype(bit_width);
  5239. break;
  5240. case OpULessThan:
  5241. case OpULessThanEqual:
  5242. case OpUGreaterThan:
  5243. case OpUGreaterThanEqual:
  5244. case OpUMod:
  5245. case OpUDiv:
  5246. case OpShiftRightLogical:
  5247. case OpUConvert:
  5248. input_type = to_unsigned_basetype(bit_width);
  5249. break;
  5250. default:
  5251. input_type = type.basetype;
  5252. break;
  5253. }
  5254. #undef GLSL_BOP
  5255. #undef GLSL_UOP
  5256. if (binary)
  5257. {
  5258. if (cop.arguments.size() < 2)
  5259. SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
  5260. string cast_op0;
  5261. string cast_op1;
  5262. auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, cop.arguments[0],
  5263. cop.arguments[1], skip_cast_if_equal_type);
  5264. if (type.basetype != input_type && type.basetype != SPIRType::Boolean)
  5265. {
  5266. expected_type.basetype = input_type;
  5267. auto expr = bitcast_glsl_op(type, expected_type);
  5268. expr += '(';
  5269. expr += join(cast_op0, " ", op, " ", cast_op1);
  5270. expr += ')';
  5271. return expr;
  5272. }
  5273. else
  5274. return join("(", cast_op0, " ", op, " ", cast_op1, ")");
  5275. }
  5276. else if (unary)
  5277. {
  5278. if (cop.arguments.size() < 1)
  5279. SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
  5280. // Auto-bitcast to result type as needed.
  5281. // Works around various casting scenarios in glslang as there is no OpBitcast for specialization constants.
  5282. return join("(", op, bitcast_glsl(type, cop.arguments[0]), ")");
  5283. }
  5284. else if (cop.opcode == OpSConvert || cop.opcode == OpUConvert)
  5285. {
  5286. if (cop.arguments.size() < 1)
  5287. SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
  5288. auto &arg_type = expression_type(cop.arguments[0]);
  5289. if (arg_type.width < type.width && input_type != arg_type.basetype)
  5290. {
  5291. auto expected = arg_type;
  5292. expected.basetype = input_type;
  5293. return join(op, "(", bitcast_glsl(expected, cop.arguments[0]), ")");
  5294. }
  5295. else
  5296. return join(op, "(", to_expression(cop.arguments[0]), ")");
  5297. }
  5298. else
  5299. {
  5300. if (cop.arguments.size() < 1)
  5301. SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
  5302. return join(op, "(", to_expression(cop.arguments[0]), ")");
  5303. }
  5304. }
  5305. string CompilerGLSL::constant_expression(const SPIRConstant &c,
  5306. bool inside_block_like_struct_scope,
  5307. bool inside_struct_scope)
  5308. {
  5309. auto &type = get<SPIRType>(c.constant_type);
  5310. if (is_pointer(type))
  5311. {
  5312. return backend.null_pointer_literal;
  5313. }
  5314. else if (c.is_null_array_specialized_length && backend.requires_matching_array_initializer)
  5315. {
  5316. require_extension_internal("GL_EXT_null_initializer");
  5317. return backend.constant_null_initializer;
  5318. }
  5319. else if (c.replicated && type.op != OpTypeArray)
  5320. {
  5321. if (type.op == OpTypeMatrix)
  5322. {
  5323. uint32_t num_elements = type.columns;
  5324. // GLSL does not allow the replication constructor for matrices
  5325. // mat4(vec4(0.0)) needs to be manually expanded to mat4(vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0));
  5326. std::string res;
  5327. res += type_to_glsl(type);
  5328. res += "(";
  5329. for (uint32_t i = 0; i < num_elements; i++)
  5330. {
  5331. res += to_expression(c.subconstants[0]);
  5332. if (i < num_elements - 1)
  5333. res += ", ";
  5334. }
  5335. res += ")";
  5336. return res;
  5337. }
  5338. else
  5339. {
  5340. return join(type_to_glsl(type), "(", to_expression(c.subconstants[0]), ")");
  5341. }
  5342. }
  5343. else if (!c.subconstants.empty())
  5344. {
  5345. // Handles Arrays and structures.
  5346. string res;
  5347. // Only consider the decay if we are inside a struct scope where we are emitting a member with Offset decoration.
  5348. // Outside a block-like struct declaration, we can always bind to a constant array with templated type.
  5349. // Should look at ArrayStride here as well, but it's possible to declare a constant struct
  5350. // with Offset = 0, using no ArrayStride on the enclosed array type.
  5351. // A particular CTS test hits this scenario.
  5352. bool array_type_decays = inside_block_like_struct_scope &&
  5353. is_array(type) &&
  5354. !backend.array_is_value_type_in_buffer_blocks;
  5355. // Allow Metal to use the array<T> template to make arrays a value type
  5356. bool needs_trailing_tracket = false;
  5357. if (backend.use_initializer_list && backend.use_typed_initializer_list && type.basetype == SPIRType::Struct &&
  5358. !is_array(type))
  5359. {
  5360. res = type_to_glsl_constructor(type) + "{ ";
  5361. }
  5362. else if (backend.use_initializer_list && backend.use_typed_initializer_list && backend.array_is_value_type &&
  5363. is_array(type) && !array_type_decays)
  5364. {
  5365. const auto *p_type = &type;
  5366. SPIRType tmp_type { OpNop };
  5367. if (inside_struct_scope &&
  5368. backend.boolean_in_struct_remapped_type != SPIRType::Boolean &&
  5369. type.basetype == SPIRType::Boolean)
  5370. {
  5371. tmp_type = type;
  5372. tmp_type.basetype = backend.boolean_in_struct_remapped_type;
  5373. p_type = &tmp_type;
  5374. }
  5375. res = type_to_glsl_constructor(*p_type) + "({ ";
  5376. needs_trailing_tracket = true;
  5377. }
  5378. else if (backend.use_initializer_list)
  5379. {
  5380. res = "{ ";
  5381. }
  5382. else
  5383. {
  5384. res = type_to_glsl_constructor(type) + "(";
  5385. }
  5386. uint32_t subconstant_index = 0;
  5387. size_t num_elements = c.subconstants.size();
  5388. if (c.replicated)
  5389. {
  5390. if (type.array.size() != 1)
  5391. SPIRV_CROSS_THROW("Multidimensional arrays not yet supported as replicated constans");
  5392. num_elements = type.array[0];
  5393. }
  5394. for (size_t i = 0; i < num_elements; i++)
  5395. {
  5396. auto &elem = c.subconstants[c.replicated ? 0 : i];
  5397. if (auto *op = maybe_get<SPIRConstantOp>(elem))
  5398. {
  5399. res += constant_op_expression(*op);
  5400. }
  5401. else if (maybe_get<SPIRUndef>(elem) != nullptr)
  5402. {
  5403. res += to_name(elem);
  5404. }
  5405. else
  5406. {
  5407. auto &subc = get<SPIRConstant>(elem);
  5408. if (subc.specialization && !expression_is_forwarded(elem))
  5409. res += to_name(elem);
  5410. else
  5411. {
  5412. if (!is_array(type) && type.basetype == SPIRType::Struct)
  5413. {
  5414. // When we get down to emitting struct members, override the block-like information.
  5415. // For constants, we can freely mix and match block-like state.
  5416. inside_block_like_struct_scope =
  5417. has_member_decoration(type.self, subconstant_index, DecorationOffset);
  5418. }
  5419. if (type.basetype == SPIRType::Struct)
  5420. inside_struct_scope = true;
  5421. res += constant_expression(subc, inside_block_like_struct_scope, inside_struct_scope);
  5422. }
  5423. }
  5424. if (i != num_elements - 1)
  5425. res += ", ";
  5426. subconstant_index++;
  5427. }
  5428. res += backend.use_initializer_list ? " }" : ")";
  5429. if (needs_trailing_tracket)
  5430. res += ")";
  5431. return res;
  5432. }
  5433. else if (type.basetype == SPIRType::Struct && type.member_types.size() == 0)
  5434. {
  5435. // Metal tessellation likes empty structs which are then constant expressions.
  5436. if (backend.supports_empty_struct)
  5437. return "{ }";
  5438. else if (backend.use_typed_initializer_list)
  5439. return join(type_to_glsl(type), "{ 0 }");
  5440. else if (backend.use_initializer_list)
  5441. return "{ 0 }";
  5442. else
  5443. return join(type_to_glsl(type), "(0)");
  5444. }
  5445. else if (c.columns() == 1 && type.op != OpTypeCooperativeMatrixKHR)
  5446. {
  5447. auto res = constant_expression_vector(c, 0);
  5448. if (inside_struct_scope &&
  5449. backend.boolean_in_struct_remapped_type != SPIRType::Boolean &&
  5450. type.basetype == SPIRType::Boolean)
  5451. {
  5452. SPIRType tmp_type = type;
  5453. tmp_type.basetype = backend.boolean_in_struct_remapped_type;
  5454. res = join(type_to_glsl(tmp_type), "(", res, ")");
  5455. }
  5456. return res;
  5457. }
  5458. else
  5459. {
  5460. string res = type_to_glsl(type) + "(";
  5461. for (uint32_t col = 0; col < c.columns(); col++)
  5462. {
  5463. if (c.specialization_constant_id(col) != 0)
  5464. res += to_name(c.specialization_constant_id(col));
  5465. else
  5466. res += constant_expression_vector(c, col);
  5467. if (col + 1 < c.columns())
  5468. res += ", ";
  5469. }
  5470. res += ")";
  5471. if (inside_struct_scope &&
  5472. backend.boolean_in_struct_remapped_type != SPIRType::Boolean &&
  5473. type.basetype == SPIRType::Boolean)
  5474. {
  5475. SPIRType tmp_type = type;
  5476. tmp_type.basetype = backend.boolean_in_struct_remapped_type;
  5477. res = join(type_to_glsl(tmp_type), "(", res, ")");
  5478. }
  5479. return res;
  5480. }
  5481. }
  5482. #ifdef _MSC_VER
  5483. // snprintf does not exist or is buggy on older MSVC versions, some of them
  5484. // being used by MinGW. Use sprintf instead and disable corresponding warning.
  5485. #pragma warning(push)
  5486. #pragma warning(disable : 4996)
  5487. #endif
  5488. string CompilerGLSL::convert_floate4m3_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
  5489. {
  5490. string res;
  5491. float float_value = c.scalar_floate4m3(col, row);
  5492. // There is no infinity in e4m3.
  5493. if (std::isnan(float_value))
  5494. {
  5495. SPIRType type { OpTypeFloat };
  5496. type.basetype = SPIRType::Half;
  5497. type.vecsize = 1;
  5498. type.columns = 1;
  5499. res = join(type_to_glsl(type), "(0.0 / 0.0)");
  5500. }
  5501. else
  5502. {
  5503. SPIRType type { OpTypeFloat };
  5504. type.basetype = SPIRType::FloatE4M3;
  5505. type.vecsize = 1;
  5506. type.columns = 1;
  5507. res = join(type_to_glsl(type), "(", format_float(float_value), ")");
  5508. }
  5509. return res;
  5510. }
  5511. string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
  5512. {
  5513. string res;
  5514. bool is_bfloat8 = get<SPIRType>(c.constant_type).basetype == SPIRType::FloatE5M2;
  5515. float float_value = is_bfloat8 ? c.scalar_bf8(col, row) : c.scalar_f16(col, row);
  5516. // There is no literal "hf" in GL_NV_gpu_shader5, so to avoid lots
  5517. // of complicated workarounds, just value-cast to the half type always.
  5518. if (std::isnan(float_value) || std::isinf(float_value))
  5519. {
  5520. SPIRType type { OpTypeFloat };
  5521. type.basetype = is_bfloat8 ? SPIRType::FloatE5M2 : SPIRType::Half;
  5522. type.vecsize = 1;
  5523. type.columns = 1;
  5524. if (float_value == numeric_limits<float>::infinity())
  5525. res = join(type_to_glsl(type), "(1.0 / 0.0)");
  5526. else if (float_value == -numeric_limits<float>::infinity())
  5527. res = join(type_to_glsl(type), "(-1.0 / 0.0)");
  5528. else if (std::isnan(float_value))
  5529. res = join(type_to_glsl(type), "(0.0 / 0.0)");
  5530. else
  5531. SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
  5532. }
  5533. else
  5534. {
  5535. SPIRType type { OpTypeFloat };
  5536. type.basetype = is_bfloat8 ? SPIRType::FloatE5M2 : SPIRType::Half;
  5537. type.vecsize = 1;
  5538. type.columns = 1;
  5539. res = join(type_to_glsl(type), "(", format_float(float_value), ")");
  5540. }
  5541. return res;
  5542. }
  5543. string CompilerGLSL::convert_float_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
  5544. {
  5545. string res;
  5546. bool is_bfloat16 = get<SPIRType>(c.constant_type).basetype == SPIRType::BFloat16;
  5547. float float_value = is_bfloat16 ? c.scalar_bf16(col, row) : c.scalar_f32(col, row);
  5548. if (std::isnan(float_value) || std::isinf(float_value))
  5549. {
  5550. // Use special representation.
  5551. if (!is_legacy())
  5552. {
  5553. SPIRType out_type { OpTypeFloat };
  5554. SPIRType in_type { OpTypeInt };
  5555. out_type.basetype = SPIRType::Float;
  5556. in_type.basetype = SPIRType::UInt;
  5557. out_type.vecsize = 1;
  5558. in_type.vecsize = 1;
  5559. out_type.width = 32;
  5560. in_type.width = 32;
  5561. char print_buffer[32];
  5562. #ifdef _WIN32
  5563. sprintf(print_buffer, "0x%xu", c.scalar(col, row));
  5564. #else
  5565. snprintf(print_buffer, sizeof(print_buffer), "0x%xu", c.scalar(col, row));
  5566. #endif
  5567. const char *comment = "inf";
  5568. if (float_value == -numeric_limits<float>::infinity())
  5569. comment = "-inf";
  5570. else if (std::isnan(float_value))
  5571. comment = "nan";
  5572. res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, " /* ", comment, " */)");
  5573. }
  5574. else
  5575. {
  5576. if (float_value == numeric_limits<float>::infinity())
  5577. {
  5578. if (backend.float_literal_suffix)
  5579. res = "(1.0f / 0.0f)";
  5580. else
  5581. res = "(1.0 / 0.0)";
  5582. }
  5583. else if (float_value == -numeric_limits<float>::infinity())
  5584. {
  5585. if (backend.float_literal_suffix)
  5586. res = "(-1.0f / 0.0f)";
  5587. else
  5588. res = "(-1.0 / 0.0)";
  5589. }
  5590. else if (std::isnan(float_value))
  5591. {
  5592. if (backend.float_literal_suffix)
  5593. res = "(0.0f / 0.0f)";
  5594. else
  5595. res = "(0.0 / 0.0)";
  5596. }
  5597. else
  5598. SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
  5599. }
  5600. }
  5601. else
  5602. {
  5603. res = format_float(float_value);
  5604. if (backend.float_literal_suffix)
  5605. res += "f";
  5606. }
  5607. if (is_bfloat16)
  5608. res = join("bfloat16_t(", res, ")");
  5609. return res;
  5610. }
  5611. std::string CompilerGLSL::convert_double_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
  5612. {
  5613. string res;
  5614. double double_value = c.scalar_f64(col, row);
  5615. if (std::isnan(double_value) || std::isinf(double_value))
  5616. {
  5617. // Use special representation.
  5618. if (!is_legacy())
  5619. {
  5620. SPIRType out_type { OpTypeFloat };
  5621. SPIRType in_type { OpTypeInt };
  5622. out_type.basetype = SPIRType::Double;
  5623. in_type.basetype = SPIRType::UInt64;
  5624. out_type.vecsize = 1;
  5625. in_type.vecsize = 1;
  5626. out_type.width = 64;
  5627. in_type.width = 64;
  5628. uint64_t u64_value = c.scalar_u64(col, row);
  5629. if (options.es && options.version < 310) // GL_NV_gpu_shader5 fallback requires 310.
  5630. SPIRV_CROSS_THROW("64-bit integers not supported in ES profile before version 310.");
  5631. require_extension_internal("GL_ARB_gpu_shader_int64");
  5632. char print_buffer[64];
  5633. #ifdef _WIN32
  5634. sprintf(print_buffer, "0x%llx%s", static_cast<unsigned long long>(u64_value),
  5635. backend.long_long_literal_suffix ? "ull" : "ul");
  5636. #else
  5637. snprintf(print_buffer, sizeof(print_buffer), "0x%llx%s", static_cast<unsigned long long>(u64_value),
  5638. backend.long_long_literal_suffix ? "ull" : "ul");
  5639. #endif
  5640. const char *comment = "inf";
  5641. if (double_value == -numeric_limits<double>::infinity())
  5642. comment = "-inf";
  5643. else if (std::isnan(double_value))
  5644. comment = "nan";
  5645. res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, " /* ", comment, " */)");
  5646. }
  5647. else
  5648. {
  5649. if (options.es)
  5650. SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
  5651. if (options.version < 400)
  5652. require_extension_internal("GL_ARB_gpu_shader_fp64");
  5653. if (double_value == numeric_limits<double>::infinity())
  5654. {
  5655. if (backend.double_literal_suffix)
  5656. res = "(1.0lf / 0.0lf)";
  5657. else
  5658. res = "(1.0 / 0.0)";
  5659. }
  5660. else if (double_value == -numeric_limits<double>::infinity())
  5661. {
  5662. if (backend.double_literal_suffix)
  5663. res = "(-1.0lf / 0.0lf)";
  5664. else
  5665. res = "(-1.0 / 0.0)";
  5666. }
  5667. else if (std::isnan(double_value))
  5668. {
  5669. if (backend.double_literal_suffix)
  5670. res = "(0.0lf / 0.0lf)";
  5671. else
  5672. res = "(0.0 / 0.0)";
  5673. }
  5674. else
  5675. SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
  5676. }
  5677. }
  5678. else
  5679. {
  5680. res = format_double(double_value);
  5681. if (backend.double_literal_suffix)
  5682. res += "lf";
  5683. }
  5684. return res;
  5685. }
  5686. #ifdef _MSC_VER
  5687. #pragma warning(pop)
  5688. #endif
  5689. string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t vector)
  5690. {
  5691. auto type = get<SPIRType>(c.constant_type);
  5692. type.columns = 1;
  5693. auto scalar_type = type;
  5694. scalar_type.vecsize = 1;
  5695. string res;
  5696. bool splat = backend.use_constructor_splatting && c.vector_size() > 1;
  5697. bool swizzle_splat = backend.can_swizzle_scalar && c.vector_size() > 1;
  5698. if (!type_is_floating_point(type))
  5699. {
  5700. // Cannot swizzle literal integers as a special case.
  5701. swizzle_splat = false;
  5702. }
  5703. if (splat || swizzle_splat)
  5704. {
  5705. // Cannot use constant splatting if we have specialization constants somewhere in the vector.
  5706. for (uint32_t i = 0; i < c.vector_size(); i++)
  5707. {
  5708. if (c.specialization_constant_id(vector, i) != 0)
  5709. {
  5710. splat = false;
  5711. swizzle_splat = false;
  5712. break;
  5713. }
  5714. }
  5715. }
  5716. if (splat || swizzle_splat)
  5717. {
  5718. if (type.width == 64)
  5719. {
  5720. uint64_t ident = c.scalar_u64(vector, 0);
  5721. for (uint32_t i = 1; i < c.vector_size(); i++)
  5722. {
  5723. if (ident != c.scalar_u64(vector, i))
  5724. {
  5725. splat = false;
  5726. swizzle_splat = false;
  5727. break;
  5728. }
  5729. }
  5730. }
  5731. else
  5732. {
  5733. uint32_t ident = c.scalar(vector, 0);
  5734. for (uint32_t i = 1; i < c.vector_size(); i++)
  5735. {
  5736. if (ident != c.scalar(vector, i))
  5737. {
  5738. splat = false;
  5739. swizzle_splat = false;
  5740. }
  5741. }
  5742. }
  5743. }
  5744. if (c.vector_size() > 1 && !swizzle_splat)
  5745. res += type_to_glsl(type) + "(";
  5746. switch (type.basetype)
  5747. {
  5748. case SPIRType::FloatE4M3:
  5749. if (splat || swizzle_splat)
  5750. {
  5751. res += convert_floate4m3_to_string(c, vector, 0);
  5752. if (swizzle_splat)
  5753. res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
  5754. }
  5755. else
  5756. {
  5757. for (uint32_t i = 0; i < c.vector_size(); i++)
  5758. {
  5759. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  5760. res += to_expression(c.specialization_constant_id(vector, i));
  5761. else
  5762. res += convert_floate4m3_to_string(c, vector, i);
  5763. if (i + 1 < c.vector_size())
  5764. res += ", ";
  5765. }
  5766. }
  5767. break;
  5768. case SPIRType::FloatE5M2:
  5769. case SPIRType::Half:
  5770. if (splat || swizzle_splat)
  5771. {
  5772. res += convert_half_to_string(c, vector, 0);
  5773. if (swizzle_splat)
  5774. res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
  5775. }
  5776. else
  5777. {
  5778. for (uint32_t i = 0; i < c.vector_size(); i++)
  5779. {
  5780. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  5781. res += to_expression(c.specialization_constant_id(vector, i));
  5782. else
  5783. res += convert_half_to_string(c, vector, i);
  5784. if (i + 1 < c.vector_size())
  5785. res += ", ";
  5786. }
  5787. }
  5788. break;
  5789. case SPIRType::BFloat16:
  5790. case SPIRType::Float:
  5791. if (splat || swizzle_splat)
  5792. {
  5793. res += convert_float_to_string(c, vector, 0);
  5794. if (swizzle_splat)
  5795. res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
  5796. }
  5797. else
  5798. {
  5799. for (uint32_t i = 0; i < c.vector_size(); i++)
  5800. {
  5801. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  5802. res += to_expression(c.specialization_constant_id(vector, i));
  5803. else
  5804. res += convert_float_to_string(c, vector, i);
  5805. if (i + 1 < c.vector_size())
  5806. res += ", ";
  5807. }
  5808. }
  5809. break;
  5810. case SPIRType::Double:
  5811. if (splat || swizzle_splat)
  5812. {
  5813. res += convert_double_to_string(c, vector, 0);
  5814. if (swizzle_splat)
  5815. res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
  5816. }
  5817. else
  5818. {
  5819. for (uint32_t i = 0; i < c.vector_size(); i++)
  5820. {
  5821. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  5822. res += to_expression(c.specialization_constant_id(vector, i));
  5823. else
  5824. res += convert_double_to_string(c, vector, i);
  5825. if (i + 1 < c.vector_size())
  5826. res += ", ";
  5827. }
  5828. }
  5829. break;
  5830. case SPIRType::Int64:
  5831. {
  5832. auto tmp = type;
  5833. tmp.vecsize = 1;
  5834. tmp.columns = 1;
  5835. auto int64_type = type_to_glsl(tmp);
  5836. if (splat)
  5837. {
  5838. res += convert_to_string(c.scalar_i64(vector, 0), int64_type, backend.long_long_literal_suffix);
  5839. }
  5840. else
  5841. {
  5842. for (uint32_t i = 0; i < c.vector_size(); i++)
  5843. {
  5844. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  5845. res += to_expression(c.specialization_constant_id(vector, i));
  5846. else
  5847. res += convert_to_string(c.scalar_i64(vector, i), int64_type, backend.long_long_literal_suffix);
  5848. if (i + 1 < c.vector_size())
  5849. res += ", ";
  5850. }
  5851. }
  5852. break;
  5853. }
  5854. case SPIRType::UInt64:
  5855. if (splat)
  5856. {
  5857. res += convert_to_string(c.scalar_u64(vector, 0));
  5858. if (backend.long_long_literal_suffix)
  5859. res += "ull";
  5860. else
  5861. res += "ul";
  5862. }
  5863. else
  5864. {
  5865. for (uint32_t i = 0; i < c.vector_size(); i++)
  5866. {
  5867. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  5868. res += to_expression(c.specialization_constant_id(vector, i));
  5869. else
  5870. {
  5871. res += convert_to_string(c.scalar_u64(vector, i));
  5872. if (backend.long_long_literal_suffix)
  5873. res += "ull";
  5874. else
  5875. res += "ul";
  5876. }
  5877. if (i + 1 < c.vector_size())
  5878. res += ", ";
  5879. }
  5880. }
  5881. break;
  5882. case SPIRType::UInt:
  5883. if (splat)
  5884. {
  5885. res += convert_to_string(c.scalar(vector, 0));
  5886. if (is_legacy() && !has_extension("GL_EXT_gpu_shader4"))
  5887. {
  5888. // Fake unsigned constant literals with signed ones if possible.
  5889. // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
  5890. if (c.scalar_i32(vector, 0) < 0)
  5891. SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made the literal negative.");
  5892. }
  5893. else if (backend.uint32_t_literal_suffix)
  5894. res += "u";
  5895. }
  5896. else
  5897. {
  5898. for (uint32_t i = 0; i < c.vector_size(); i++)
  5899. {
  5900. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  5901. res += to_expression(c.specialization_constant_id(vector, i));
  5902. else
  5903. {
  5904. res += convert_to_string(c.scalar(vector, i));
  5905. if (is_legacy() && !has_extension("GL_EXT_gpu_shader4"))
  5906. {
  5907. // Fake unsigned constant literals with signed ones if possible.
  5908. // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
  5909. if (c.scalar_i32(vector, i) < 0)
  5910. SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made "
  5911. "the literal negative.");
  5912. }
  5913. else if (backend.uint32_t_literal_suffix)
  5914. res += "u";
  5915. }
  5916. if (i + 1 < c.vector_size())
  5917. res += ", ";
  5918. }
  5919. }
  5920. break;
  5921. case SPIRType::Int:
  5922. if (splat)
  5923. res += convert_to_string(c.scalar_i32(vector, 0));
  5924. else
  5925. {
  5926. for (uint32_t i = 0; i < c.vector_size(); i++)
  5927. {
  5928. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  5929. res += to_expression(c.specialization_constant_id(vector, i));
  5930. else
  5931. res += convert_to_string(c.scalar_i32(vector, i));
  5932. if (i + 1 < c.vector_size())
  5933. res += ", ";
  5934. }
  5935. }
  5936. break;
  5937. case SPIRType::UShort:
  5938. if (splat)
  5939. {
  5940. res += convert_to_string(c.scalar(vector, 0));
  5941. }
  5942. else
  5943. {
  5944. for (uint32_t i = 0; i < c.vector_size(); i++)
  5945. {
  5946. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  5947. res += to_expression(c.specialization_constant_id(vector, i));
  5948. else
  5949. {
  5950. if (*backend.uint16_t_literal_suffix)
  5951. {
  5952. res += convert_to_string(c.scalar_u16(vector, i));
  5953. res += backend.uint16_t_literal_suffix;
  5954. }
  5955. else
  5956. {
  5957. // If backend doesn't have a literal suffix, we need to value cast.
  5958. res += type_to_glsl(scalar_type);
  5959. res += "(";
  5960. res += convert_to_string(c.scalar_u16(vector, i));
  5961. res += ")";
  5962. }
  5963. }
  5964. if (i + 1 < c.vector_size())
  5965. res += ", ";
  5966. }
  5967. }
  5968. break;
  5969. case SPIRType::Short:
  5970. if (splat)
  5971. {
  5972. res += convert_to_string(c.scalar_i16(vector, 0));
  5973. }
  5974. else
  5975. {
  5976. for (uint32_t i = 0; i < c.vector_size(); i++)
  5977. {
  5978. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  5979. res += to_expression(c.specialization_constant_id(vector, i));
  5980. else
  5981. {
  5982. if (*backend.int16_t_literal_suffix)
  5983. {
  5984. res += convert_to_string(c.scalar_i16(vector, i));
  5985. res += backend.int16_t_literal_suffix;
  5986. }
  5987. else
  5988. {
  5989. // If backend doesn't have a literal suffix, we need to value cast.
  5990. res += type_to_glsl(scalar_type);
  5991. res += "(";
  5992. res += convert_to_string(c.scalar_i16(vector, i));
  5993. res += ")";
  5994. }
  5995. }
  5996. if (i + 1 < c.vector_size())
  5997. res += ", ";
  5998. }
  5999. }
  6000. break;
  6001. case SPIRType::UByte:
  6002. if (splat)
  6003. {
  6004. res += convert_to_string(c.scalar_u8(vector, 0));
  6005. }
  6006. else
  6007. {
  6008. for (uint32_t i = 0; i < c.vector_size(); i++)
  6009. {
  6010. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  6011. res += to_expression(c.specialization_constant_id(vector, i));
  6012. else
  6013. {
  6014. res += type_to_glsl(scalar_type);
  6015. res += "(";
  6016. res += convert_to_string(c.scalar_u8(vector, i));
  6017. res += ")";
  6018. }
  6019. if (i + 1 < c.vector_size())
  6020. res += ", ";
  6021. }
  6022. }
  6023. break;
  6024. case SPIRType::SByte:
  6025. if (splat)
  6026. {
  6027. res += convert_to_string(c.scalar_i8(vector, 0));
  6028. }
  6029. else
  6030. {
  6031. for (uint32_t i = 0; i < c.vector_size(); i++)
  6032. {
  6033. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  6034. res += to_expression(c.specialization_constant_id(vector, i));
  6035. else
  6036. {
  6037. res += type_to_glsl(scalar_type);
  6038. res += "(";
  6039. res += convert_to_string(c.scalar_i8(vector, i));
  6040. res += ")";
  6041. }
  6042. if (i + 1 < c.vector_size())
  6043. res += ", ";
  6044. }
  6045. }
  6046. break;
  6047. case SPIRType::Boolean:
  6048. if (splat)
  6049. res += c.scalar(vector, 0) ? "true" : "false";
  6050. else
  6051. {
  6052. for (uint32_t i = 0; i < c.vector_size(); i++)
  6053. {
  6054. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  6055. res += to_expression(c.specialization_constant_id(vector, i));
  6056. else
  6057. res += c.scalar(vector, i) ? "true" : "false";
  6058. if (i + 1 < c.vector_size())
  6059. res += ", ";
  6060. }
  6061. }
  6062. break;
  6063. default:
  6064. SPIRV_CROSS_THROW("Invalid constant expression basetype.");
  6065. }
  6066. if (c.vector_size() > 1 && !swizzle_splat)
  6067. res += ")";
  6068. return res;
  6069. }
  6070. SPIRExpression &CompilerGLSL::emit_uninitialized_temporary_expression(uint32_t type, uint32_t id)
  6071. {
  6072. forced_temporaries.insert(id);
  6073. emit_uninitialized_temporary(type, id);
  6074. return set<SPIRExpression>(id, to_name(id), type, true);
  6075. }
  6076. void CompilerGLSL::emit_uninitialized_temporary(uint32_t result_type, uint32_t result_id)
  6077. {
  6078. // If we're declaring temporaries inside continue blocks,
  6079. // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
  6080. if (!block_temporary_hoisting && current_continue_block && !hoisted_temporaries.count(result_id))
  6081. {
  6082. auto &header = get<SPIRBlock>(current_continue_block->loop_dominator);
  6083. if (find_if(begin(header.declare_temporary), end(header.declare_temporary),
  6084. [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
  6085. return tmp.first == result_type && tmp.second == result_id;
  6086. }) == end(header.declare_temporary))
  6087. {
  6088. header.declare_temporary.emplace_back(result_type, result_id);
  6089. hoisted_temporaries.insert(result_id);
  6090. force_recompile();
  6091. }
  6092. }
  6093. else if (hoisted_temporaries.count(result_id) == 0)
  6094. {
  6095. auto &type = get<SPIRType>(result_type);
  6096. auto &flags = get_decoration_bitset(result_id);
  6097. // The result_id has not been made into an expression yet, so use flags interface.
  6098. add_local_variable_name(result_id);
  6099. string initializer;
  6100. if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
  6101. initializer = join(" = ", to_zero_initialized_expression(result_type));
  6102. statement(flags_to_qualifiers_glsl(type, result_id, flags), variable_decl(type, to_name(result_id)), initializer, ";");
  6103. }
  6104. }
  6105. bool CompilerGLSL::can_declare_inline_temporary(uint32_t id) const
  6106. {
  6107. if (!block_temporary_hoisting && current_continue_block && !hoisted_temporaries.count(id))
  6108. return false;
  6109. if (hoisted_temporaries.count(id))
  6110. return false;
  6111. return true;
  6112. }
  6113. string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id)
  6114. {
  6115. auto &type = get<SPIRType>(result_type);
  6116. // If we're declaring temporaries inside continue blocks,
  6117. // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
  6118. if (!block_temporary_hoisting && current_continue_block && !hoisted_temporaries.count(result_id))
  6119. {
  6120. auto &header = get<SPIRBlock>(current_continue_block->loop_dominator);
  6121. if (find_if(begin(header.declare_temporary), end(header.declare_temporary),
  6122. [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
  6123. return tmp.first == result_type && tmp.second == result_id;
  6124. }) == end(header.declare_temporary))
  6125. {
  6126. header.declare_temporary.emplace_back(result_type, result_id);
  6127. hoisted_temporaries.insert(result_id);
  6128. force_recompile_guarantee_forward_progress();
  6129. }
  6130. return join(to_name(result_id), " = ");
  6131. }
  6132. else if (hoisted_temporaries.count(result_id))
  6133. {
  6134. // The temporary has already been declared earlier, so just "declare" the temporary by writing to it.
  6135. return join(to_name(result_id), " = ");
  6136. }
  6137. else
  6138. {
  6139. // The result_id has not been made into an expression yet, so use flags interface.
  6140. add_local_variable_name(result_id);
  6141. auto &flags = get_decoration_bitset(result_id);
  6142. return join(flags_to_qualifiers_glsl(type, result_id, flags), variable_decl(type, to_name(result_id)), " = ");
  6143. }
  6144. }
  6145. bool CompilerGLSL::expression_is_forwarded(uint32_t id) const
  6146. {
  6147. return forwarded_temporaries.count(id) != 0;
  6148. }
  6149. bool CompilerGLSL::expression_suppresses_usage_tracking(uint32_t id) const
  6150. {
  6151. return suppressed_usage_tracking.count(id) != 0;
  6152. }
  6153. bool CompilerGLSL::expression_read_implies_multiple_reads(uint32_t id) const
  6154. {
  6155. auto *expr = maybe_get<SPIRExpression>(id);
  6156. if (!expr)
  6157. return false;
  6158. // If we're emitting code at a deeper loop level than when we emitted the expression,
  6159. // we're probably reading the same expression over and over.
  6160. return current_loop_level > expr->emitted_loop_level;
  6161. }
  6162. SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding,
  6163. bool suppress_usage_tracking)
  6164. {
  6165. if (forwarding && (forced_temporaries.find(result_id) == end(forced_temporaries)))
  6166. {
  6167. // Just forward it without temporary.
  6168. // If the forward is trivial, we do not force flushing to temporary for this expression.
  6169. forwarded_temporaries.insert(result_id);
  6170. if (suppress_usage_tracking)
  6171. suppressed_usage_tracking.insert(result_id);
  6172. return set<SPIRExpression>(result_id, rhs, result_type, true);
  6173. }
  6174. else
  6175. {
  6176. // If expression isn't immutable, bind it to a temporary and make the new temporary immutable (they always are).
  6177. statement(declare_temporary(result_type, result_id), rhs, ";");
  6178. return set<SPIRExpression>(result_id, to_name(result_id), result_type, true);
  6179. }
  6180. }
  6181. void CompilerGLSL::emit_transposed_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding)
  6182. {
  6183. if (forwarding && (forced_temporaries.find(result_id) == end(forced_temporaries)))
  6184. {
  6185. // Just forward it without temporary.
  6186. // If the forward is trivial, we do not force flushing to temporary for this expression.
  6187. forwarded_temporaries.insert(result_id);
  6188. auto &e = set<SPIRExpression>(result_id, rhs, result_type, true);
  6189. e.need_transpose = true;
  6190. }
  6191. else if (can_declare_inline_temporary(result_id))
  6192. {
  6193. // If expression isn't immutable, bind it to a temporary and make the new temporary immutable (they always are).
  6194. // Since the expression is transposed, we have to ensure the temporary is the transposed type.
  6195. auto &transposed_type_id = extra_sub_expressions[result_id];
  6196. if (!transposed_type_id)
  6197. {
  6198. auto dummy_type = get<SPIRType>(result_type);
  6199. std::swap(dummy_type.columns, dummy_type.vecsize);
  6200. transposed_type_id = ir.increase_bound_by(1);
  6201. set<SPIRType>(transposed_type_id, dummy_type);
  6202. }
  6203. statement(declare_temporary(transposed_type_id, result_id), rhs, ";");
  6204. auto &e = set<SPIRExpression>(result_id, to_name(result_id), result_type, true);
  6205. e.need_transpose = true;
  6206. }
  6207. else
  6208. {
  6209. // If we cannot declare the temporary because it's already been hoisted, we don't have the
  6210. // chance to override the temporary type ourselves. Just transpose() the expression.
  6211. emit_op(result_type, result_id, join("transpose(", rhs, ")"), forwarding);
  6212. }
  6213. }
  6214. void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
  6215. {
  6216. bool forward = should_forward(op0);
  6217. emit_op(result_type, result_id, join(op, to_enclosed_unpacked_expression(op0)), forward);
  6218. inherit_expression_dependencies(result_id, op0);
  6219. }
  6220. void CompilerGLSL::emit_unary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
  6221. {
  6222. auto &type = get<SPIRType>(result_type);
  6223. bool forward = should_forward(op0);
  6224. emit_op(result_type, result_id, join(type_to_glsl(type), "(", op, to_enclosed_unpacked_expression(op0), ")"), forward);
  6225. inherit_expression_dependencies(result_id, op0);
  6226. }
  6227. void CompilerGLSL::emit_mesh_tasks(SPIRBlock &block)
  6228. {
  6229. statement("EmitMeshTasksEXT(",
  6230. to_unpacked_expression(block.mesh.groups[0]), ", ",
  6231. to_unpacked_expression(block.mesh.groups[1]), ", ",
  6232. to_unpacked_expression(block.mesh.groups[2]), ");");
  6233. }
  6234. void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op)
  6235. {
  6236. // Various FP arithmetic opcodes such as add, sub, mul will hit this.
  6237. bool force_temporary_precise = backend.support_precise_qualifier &&
  6238. has_legacy_nocontract(result_type, result_id) &&
  6239. type_is_floating_point(get<SPIRType>(result_type));
  6240. bool forward = should_forward(op0) && should_forward(op1) && !force_temporary_precise;
  6241. emit_op(result_type, result_id,
  6242. join(to_enclosed_unpacked_expression(op0), " ", op, " ", to_enclosed_unpacked_expression(op1)), forward);
  6243. inherit_expression_dependencies(result_id, op0);
  6244. inherit_expression_dependencies(result_id, op1);
  6245. }
  6246. void CompilerGLSL::emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op)
  6247. {
  6248. auto &type = get<SPIRType>(result_type);
  6249. auto expr = type_to_glsl_constructor(type);
  6250. expr += '(';
  6251. for (uint32_t i = 0; i < type.vecsize; i++)
  6252. {
  6253. // Make sure to call to_expression multiple times to ensure
  6254. // that these expressions are properly flushed to temporaries if needed.
  6255. expr += op;
  6256. expr += to_extract_component_expression(operand, i);
  6257. if (i + 1 < type.vecsize)
  6258. expr += ", ";
  6259. }
  6260. expr += ')';
  6261. emit_op(result_type, result_id, expr, should_forward(operand));
  6262. inherit_expression_dependencies(result_id, operand);
  6263. }
  6264. void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  6265. const char *op, bool negate, SPIRType::BaseType expected_type)
  6266. {
  6267. auto &type0 = expression_type(op0);
  6268. auto &type1 = expression_type(op1);
  6269. SPIRType target_type0 = type0;
  6270. SPIRType target_type1 = type1;
  6271. target_type0.basetype = expected_type;
  6272. target_type1.basetype = expected_type;
  6273. target_type0.vecsize = 1;
  6274. target_type1.vecsize = 1;
  6275. auto &type = get<SPIRType>(result_type);
  6276. auto expr = type_to_glsl_constructor(type);
  6277. expr += '(';
  6278. for (uint32_t i = 0; i < type.vecsize; i++)
  6279. {
  6280. // Make sure to call to_expression multiple times to ensure
  6281. // that these expressions are properly flushed to temporaries if needed.
  6282. if (negate)
  6283. expr += "!(";
  6284. if (expected_type != SPIRType::Unknown && type0.basetype != expected_type)
  6285. expr += bitcast_expression(target_type0, type0.basetype, to_extract_component_expression(op0, i));
  6286. else
  6287. expr += to_extract_component_expression(op0, i);
  6288. expr += ' ';
  6289. expr += op;
  6290. expr += ' ';
  6291. if (expected_type != SPIRType::Unknown && type1.basetype != expected_type)
  6292. expr += bitcast_expression(target_type1, type1.basetype, to_extract_component_expression(op1, i));
  6293. else
  6294. expr += to_extract_component_expression(op1, i);
  6295. if (negate)
  6296. expr += ")";
  6297. if (i + 1 < type.vecsize)
  6298. expr += ", ";
  6299. }
  6300. expr += ')';
  6301. emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
  6302. inherit_expression_dependencies(result_id, op0);
  6303. inherit_expression_dependencies(result_id, op1);
  6304. }
  6305. SPIRType CompilerGLSL::binary_op_bitcast_helper(string &cast_op0, string &cast_op1, SPIRType::BaseType &input_type,
  6306. uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type)
  6307. {
  6308. auto &type0 = expression_type(op0);
  6309. auto &type1 = expression_type(op1);
  6310. // We have to bitcast if our inputs are of different type, or if our types are not equal to expected inputs.
  6311. // For some functions like OpIEqual and INotEqual, we don't care if inputs are of different types than expected
  6312. // since equality test is exactly the same.
  6313. bool cast = (type0.basetype != type1.basetype) || (!skip_cast_if_equal_type && type0.basetype != input_type);
  6314. // Create a fake type so we can bitcast to it.
  6315. // We only deal with regular arithmetic types here like int, uints and so on.
  6316. SPIRType expected_type{type0.op};
  6317. expected_type.basetype = input_type;
  6318. expected_type.vecsize = type0.vecsize;
  6319. expected_type.columns = type0.columns;
  6320. expected_type.width = type0.width;
  6321. if (cast)
  6322. {
  6323. cast_op0 = bitcast_glsl(expected_type, op0);
  6324. cast_op1 = bitcast_glsl(expected_type, op1);
  6325. }
  6326. else
  6327. {
  6328. // If we don't cast, our actual input type is that of the first (or second) argument.
  6329. cast_op0 = to_enclosed_unpacked_expression(op0);
  6330. cast_op1 = to_enclosed_unpacked_expression(op1);
  6331. input_type = type0.basetype;
  6332. }
  6333. return expected_type;
  6334. }
  6335. bool CompilerGLSL::emit_complex_bitcast(uint32_t result_type, uint32_t id, uint32_t op0)
  6336. {
  6337. // Some bitcasts may require complex casting sequences, and are implemented here.
  6338. // Otherwise a simply unary function will do with bitcast_glsl_op.
  6339. auto &output_type = get<SPIRType>(result_type);
  6340. auto &input_type = expression_type(op0);
  6341. string expr;
  6342. if (output_type.basetype == SPIRType::Half && input_type.basetype == SPIRType::Float && input_type.vecsize == 1)
  6343. expr = join("unpackFloat2x16(floatBitsToUint(", to_unpacked_expression(op0), "))");
  6344. else if (output_type.basetype == SPIRType::Float && input_type.basetype == SPIRType::Half &&
  6345. input_type.vecsize == 2)
  6346. expr = join("uintBitsToFloat(packFloat2x16(", to_unpacked_expression(op0), "))");
  6347. else
  6348. return false;
  6349. emit_op(result_type, id, expr, should_forward(op0));
  6350. return true;
  6351. }
  6352. void CompilerGLSL::emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  6353. const char *op, SPIRType::BaseType input_type,
  6354. bool skip_cast_if_equal_type,
  6355. bool implicit_integer_promotion)
  6356. {
  6357. string cast_op0, cast_op1;
  6358. auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
  6359. auto &out_type = get<SPIRType>(result_type);
  6360. // We might have casted away from the result type, so bitcast again.
  6361. // For example, arithmetic right shift with uint inputs.
  6362. // Special case boolean outputs since relational opcodes output booleans instead of int/uint.
  6363. auto bitop = join(cast_op0, " ", op, " ", cast_op1);
  6364. string expr;
  6365. if (implicit_integer_promotion)
  6366. {
  6367. // Simple value cast.
  6368. expr = join(type_to_glsl(out_type), '(', bitop, ')');
  6369. }
  6370. else if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
  6371. {
  6372. expected_type.basetype = input_type;
  6373. expr = join(bitcast_glsl_op(out_type, expected_type), '(', bitop, ')');
  6374. }
  6375. else
  6376. {
  6377. expr = std::move(bitop);
  6378. }
  6379. emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
  6380. inherit_expression_dependencies(result_id, op0);
  6381. inherit_expression_dependencies(result_id, op1);
  6382. }
  6383. void CompilerGLSL::emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
  6384. {
  6385. bool forward = should_forward(op0);
  6386. emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ")"), forward);
  6387. inherit_expression_dependencies(result_id, op0);
  6388. }
  6389. void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  6390. const char *op)
  6391. {
  6392. // Opaque types (e.g. OpTypeSampledImage) must always be forwarded in GLSL
  6393. const auto &type = get_type(result_type);
  6394. bool must_forward = type_is_opaque_value(type);
  6395. bool forward = must_forward || (should_forward(op0) && should_forward(op1));
  6396. emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ")"),
  6397. forward);
  6398. inherit_expression_dependencies(result_id, op0);
  6399. inherit_expression_dependencies(result_id, op1);
  6400. }
  6401. void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  6402. const char *op)
  6403. {
  6404. auto &type = get<SPIRType>(result_type);
  6405. if (type_is_floating_point(type))
  6406. {
  6407. if (!options.vulkan_semantics)
  6408. SPIRV_CROSS_THROW("Floating point atomics requires Vulkan semantics.");
  6409. if (options.es)
  6410. SPIRV_CROSS_THROW("Floating point atomics requires desktop GLSL.");
  6411. require_extension_internal("GL_EXT_shader_atomic_float");
  6412. }
  6413. if (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64)
  6414. require_extension_internal("GL_EXT_shader_atomic_int64");
  6415. forced_temporaries.insert(result_id);
  6416. emit_op(result_type, result_id,
  6417. join(op, "(", to_atomic_ptr_expression(op0), ", ",
  6418. to_unpacked_expression(op1), ")"), false);
  6419. flush_all_atomic_capable_variables();
  6420. }
  6421. void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id,
  6422. uint32_t op0, uint32_t op1, uint32_t op2,
  6423. const char *op)
  6424. {
  6425. forced_temporaries.insert(result_id);
  6426. emit_op(result_type, result_id,
  6427. join(op, "(", to_non_uniform_aware_expression(op0), ", ",
  6428. to_unpacked_expression(op1), ", ", to_unpacked_expression(op2), ")"), false);
  6429. flush_all_atomic_capable_variables();
  6430. }
  6431. void CompilerGLSL::emit_unary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op,
  6432. SPIRType::BaseType input_type, SPIRType::BaseType expected_result_type)
  6433. {
  6434. auto &out_type = get<SPIRType>(result_type);
  6435. auto &expr_type = expression_type(op0);
  6436. auto expected_type = out_type;
  6437. // Bit-widths might be different in unary cases because we use it for SConvert/UConvert and friends.
  6438. expected_type.basetype = input_type;
  6439. expected_type.width = expr_type.width;
  6440. string cast_op;
  6441. if (expr_type.basetype != input_type)
  6442. {
  6443. if (expr_type.basetype == SPIRType::Boolean)
  6444. cast_op = join(type_to_glsl(expected_type), "(", to_unpacked_expression(op0), ")");
  6445. else
  6446. cast_op = bitcast_glsl(expected_type, op0);
  6447. }
  6448. else
  6449. cast_op = to_unpacked_expression(op0);
  6450. string expr;
  6451. if (out_type.basetype != expected_result_type)
  6452. {
  6453. expected_type.basetype = expected_result_type;
  6454. expected_type.width = out_type.width;
  6455. if (out_type.basetype == SPIRType::Boolean)
  6456. expr = type_to_glsl(out_type);
  6457. else
  6458. expr = bitcast_glsl_op(out_type, expected_type);
  6459. expr += '(';
  6460. expr += join(op, "(", cast_op, ")");
  6461. expr += ')';
  6462. }
  6463. else
  6464. {
  6465. expr += join(op, "(", cast_op, ")");
  6466. }
  6467. emit_op(result_type, result_id, expr, should_forward(op0));
  6468. inherit_expression_dependencies(result_id, op0);
  6469. }
  6470. // Very special case. Handling bitfieldExtract requires us to deal with different bitcasts of different signs
  6471. // and different vector sizes all at once. Need a special purpose method here.
  6472. void CompilerGLSL::emit_trinary_func_op_bitextract(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  6473. uint32_t op2, const char *op,
  6474. SPIRType::BaseType expected_result_type,
  6475. SPIRType::BaseType input_type0, SPIRType::BaseType input_type1,
  6476. SPIRType::BaseType input_type2)
  6477. {
  6478. auto &out_type = get<SPIRType>(result_type);
  6479. auto expected_type = out_type;
  6480. expected_type.basetype = input_type0;
  6481. string cast_op0 =
  6482. expression_type(op0).basetype != input_type0 ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
  6483. auto op1_expr = to_unpacked_expression(op1);
  6484. auto op2_expr = to_unpacked_expression(op2);
  6485. // Use value casts here instead. Input must be exactly int or uint, but SPIR-V might be 16-bit.
  6486. expected_type.basetype = input_type1;
  6487. expected_type.vecsize = 1;
  6488. string cast_op1 = expression_type(op1).basetype != input_type1 ?
  6489. join(type_to_glsl_constructor(expected_type), "(", op1_expr, ")") :
  6490. op1_expr;
  6491. expected_type.basetype = input_type2;
  6492. expected_type.vecsize = 1;
  6493. string cast_op2 = expression_type(op2).basetype != input_type2 ?
  6494. join(type_to_glsl_constructor(expected_type), "(", op2_expr, ")") :
  6495. op2_expr;
  6496. string expr;
  6497. if (out_type.basetype != expected_result_type)
  6498. {
  6499. expected_type.vecsize = out_type.vecsize;
  6500. expected_type.basetype = expected_result_type;
  6501. expr = bitcast_glsl_op(out_type, expected_type);
  6502. expr += '(';
  6503. expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
  6504. expr += ')';
  6505. }
  6506. else
  6507. {
  6508. expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
  6509. }
  6510. emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2));
  6511. inherit_expression_dependencies(result_id, op0);
  6512. inherit_expression_dependencies(result_id, op1);
  6513. inherit_expression_dependencies(result_id, op2);
  6514. }
  6515. void CompilerGLSL::emit_trinary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  6516. uint32_t op2, const char *op, SPIRType::BaseType input_type)
  6517. {
  6518. auto &out_type = get<SPIRType>(result_type);
  6519. auto expected_type = out_type;
  6520. expected_type.basetype = input_type;
  6521. string cast_op0 =
  6522. expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
  6523. string cast_op1 =
  6524. expression_type(op1).basetype != input_type ? bitcast_glsl(expected_type, op1) : to_unpacked_expression(op1);
  6525. string cast_op2 =
  6526. expression_type(op2).basetype != input_type ? bitcast_glsl(expected_type, op2) : to_unpacked_expression(op2);
  6527. string expr;
  6528. if (out_type.basetype != input_type)
  6529. {
  6530. expr = bitcast_glsl_op(out_type, expected_type);
  6531. expr += '(';
  6532. expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
  6533. expr += ')';
  6534. }
  6535. else
  6536. {
  6537. expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
  6538. }
  6539. emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2));
  6540. inherit_expression_dependencies(result_id, op0);
  6541. inherit_expression_dependencies(result_id, op1);
  6542. inherit_expression_dependencies(result_id, op2);
  6543. }
  6544. void CompilerGLSL::emit_binary_func_op_cast_clustered(uint32_t result_type, uint32_t result_id, uint32_t op0,
  6545. uint32_t op1, const char *op, SPIRType::BaseType input_type)
  6546. {
  6547. // Special purpose method for implementing clustered subgroup opcodes.
  6548. // Main difference is that op1 does not participate in any casting, it needs to be a literal.
  6549. auto &out_type = get<SPIRType>(result_type);
  6550. auto expected_type = out_type;
  6551. expected_type.basetype = input_type;
  6552. string cast_op0 =
  6553. expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
  6554. string expr;
  6555. if (out_type.basetype != input_type)
  6556. {
  6557. expr = bitcast_glsl_op(out_type, expected_type);
  6558. expr += '(';
  6559. expr += join(op, "(", cast_op0, ", ", to_expression(op1), ")");
  6560. expr += ')';
  6561. }
  6562. else
  6563. {
  6564. expr += join(op, "(", cast_op0, ", ", to_expression(op1), ")");
  6565. }
  6566. emit_op(result_type, result_id, expr, should_forward(op0));
  6567. inherit_expression_dependencies(result_id, op0);
  6568. }
  6569. void CompilerGLSL::emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  6570. const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
  6571. {
  6572. string cast_op0, cast_op1;
  6573. auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
  6574. auto &out_type = get<SPIRType>(result_type);
  6575. // Special case boolean outputs since relational opcodes output booleans instead of int/uint.
  6576. string expr;
  6577. if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
  6578. {
  6579. expected_type.basetype = input_type;
  6580. expr = bitcast_glsl_op(out_type, expected_type);
  6581. expr += '(';
  6582. expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
  6583. expr += ')';
  6584. }
  6585. else
  6586. {
  6587. expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
  6588. }
  6589. emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
  6590. inherit_expression_dependencies(result_id, op0);
  6591. inherit_expression_dependencies(result_id, op1);
  6592. }
  6593. void CompilerGLSL::emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  6594. uint32_t op2, const char *op)
  6595. {
  6596. bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2);
  6597. emit_op(result_type, result_id,
  6598. join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ",
  6599. to_unpacked_expression(op2), ")"),
  6600. forward);
  6601. inherit_expression_dependencies(result_id, op0);
  6602. inherit_expression_dependencies(result_id, op1);
  6603. inherit_expression_dependencies(result_id, op2);
  6604. }
  6605. void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  6606. uint32_t op2, uint32_t op3, const char *op)
  6607. {
  6608. bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3);
  6609. emit_op(result_type, result_id,
  6610. join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ",
  6611. to_unpacked_expression(op2), ", ", to_unpacked_expression(op3), ")"),
  6612. forward);
  6613. inherit_expression_dependencies(result_id, op0);
  6614. inherit_expression_dependencies(result_id, op1);
  6615. inherit_expression_dependencies(result_id, op2);
  6616. inherit_expression_dependencies(result_id, op3);
  6617. }
  6618. void CompilerGLSL::emit_bitfield_insert_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  6619. uint32_t op2, uint32_t op3, const char *op,
  6620. SPIRType::BaseType offset_count_type)
  6621. {
  6622. // Only need to cast offset/count arguments. Types of base/insert must be same as result type,
  6623. // and bitfieldInsert is sign invariant.
  6624. bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3);
  6625. auto op0_expr = to_unpacked_expression(op0);
  6626. auto op1_expr = to_unpacked_expression(op1);
  6627. auto op2_expr = to_unpacked_expression(op2);
  6628. auto op3_expr = to_unpacked_expression(op3);
  6629. assert(offset_count_type == SPIRType::UInt || offset_count_type == SPIRType::Int);
  6630. SPIRType target_type { OpTypeInt };
  6631. target_type.width = 32;
  6632. target_type.vecsize = 1;
  6633. target_type.basetype = offset_count_type;
  6634. if (expression_type(op2).basetype != offset_count_type)
  6635. {
  6636. // Value-cast here. Input might be 16-bit. GLSL requires int.
  6637. op2_expr = join(type_to_glsl_constructor(target_type), "(", op2_expr, ")");
  6638. }
  6639. if (expression_type(op3).basetype != offset_count_type)
  6640. {
  6641. // Value-cast here. Input might be 16-bit. GLSL requires int.
  6642. op3_expr = join(type_to_glsl_constructor(target_type), "(", op3_expr, ")");
  6643. }
  6644. emit_op(result_type, result_id, join(op, "(", op0_expr, ", ", op1_expr, ", ", op2_expr, ", ", op3_expr, ")"),
  6645. forward);
  6646. inherit_expression_dependencies(result_id, op0);
  6647. inherit_expression_dependencies(result_id, op1);
  6648. inherit_expression_dependencies(result_id, op2);
  6649. inherit_expression_dependencies(result_id, op3);
  6650. }
  6651. string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t tex)
  6652. {
  6653. const char *type;
  6654. switch (imgtype.image.dim)
  6655. {
  6656. case Dim1D:
  6657. // Force 2D path for ES.
  6658. if (options.es)
  6659. type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D";
  6660. else
  6661. type = (imgtype.image.arrayed && !options.es) ? "1DArray" : "1D";
  6662. break;
  6663. case Dim2D:
  6664. type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D";
  6665. break;
  6666. case Dim3D:
  6667. type = "3D";
  6668. break;
  6669. case DimCube:
  6670. type = "Cube";
  6671. break;
  6672. case DimRect:
  6673. type = "2DRect";
  6674. break;
  6675. case DimBuffer:
  6676. type = "Buffer";
  6677. break;
  6678. case DimSubpassData:
  6679. type = "2D";
  6680. break;
  6681. default:
  6682. type = "";
  6683. break;
  6684. }
  6685. // In legacy GLSL, an extension is required for textureLod in the fragment
  6686. // shader or textureGrad anywhere.
  6687. bool legacy_lod_ext = false;
  6688. auto &execution = get_entry_point();
  6689. if (op == "textureGrad" || op == "textureProjGrad" ||
  6690. ((op == "textureLod" || op == "textureProjLod") && execution.model != ExecutionModelVertex))
  6691. {
  6692. if (is_legacy_es())
  6693. {
  6694. legacy_lod_ext = true;
  6695. require_extension_internal("GL_EXT_shader_texture_lod");
  6696. }
  6697. else if (is_legacy_desktop())
  6698. require_extension_internal("GL_ARB_shader_texture_lod");
  6699. }
  6700. if (op == "textureLodOffset" || op == "textureProjLodOffset")
  6701. {
  6702. if (is_legacy_es())
  6703. SPIRV_CROSS_THROW(join(op, " not allowed in legacy ES"));
  6704. require_extension_internal("GL_EXT_gpu_shader4");
  6705. }
  6706. // GLES has very limited support for shadow samplers.
  6707. // Basically shadow2D and shadow2DProj work through EXT_shadow_samplers,
  6708. // everything else can just throw
  6709. bool is_comparison = is_depth_image(imgtype, tex);
  6710. if (is_comparison && is_legacy_es())
  6711. {
  6712. if (op == "texture" || op == "textureProj")
  6713. require_extension_internal("GL_EXT_shadow_samplers");
  6714. else
  6715. SPIRV_CROSS_THROW(join(op, " not allowed on depth samplers in legacy ES"));
  6716. if (imgtype.image.dim == DimCube)
  6717. return "shadowCubeNV";
  6718. }
  6719. if (op == "textureSize")
  6720. {
  6721. if (is_legacy_es())
  6722. SPIRV_CROSS_THROW("textureSize not supported in legacy ES");
  6723. if (is_comparison)
  6724. SPIRV_CROSS_THROW("textureSize not supported on shadow sampler in legacy GLSL");
  6725. require_extension_internal("GL_EXT_gpu_shader4");
  6726. }
  6727. if (op == "texelFetch" && is_legacy_es())
  6728. SPIRV_CROSS_THROW("texelFetch not supported in legacy ES");
  6729. bool is_es_and_depth = is_legacy_es() && is_comparison;
  6730. std::string type_prefix = is_comparison ? "shadow" : "texture";
  6731. if (op == "texture")
  6732. return is_es_and_depth ? join(type_prefix, type, "EXT") : join(type_prefix, type);
  6733. else if (op == "textureLod")
  6734. return join(type_prefix, type, legacy_lod_ext ? "LodEXT" : "Lod");
  6735. else if (op == "textureProj")
  6736. return join(type_prefix, type, is_es_and_depth ? "ProjEXT" : "Proj");
  6737. else if (op == "textureGrad")
  6738. return join(type_prefix, type, is_legacy_es() ? "GradEXT" : is_legacy_desktop() ? "GradARB" : "Grad");
  6739. else if (op == "textureProjLod")
  6740. return join(type_prefix, type, legacy_lod_ext ? "ProjLodEXT" : "ProjLod");
  6741. else if (op == "textureLodOffset")
  6742. return join(type_prefix, type, "LodOffset");
  6743. else if (op == "textureProjGrad")
  6744. return join(type_prefix, type,
  6745. is_legacy_es() ? "ProjGradEXT" : is_legacy_desktop() ? "ProjGradARB" : "ProjGrad");
  6746. else if (op == "textureProjLodOffset")
  6747. return join(type_prefix, type, "ProjLodOffset");
  6748. else if (op == "textureSize")
  6749. return join("textureSize", type);
  6750. else if (op == "texelFetch")
  6751. return join("texelFetch", type);
  6752. else
  6753. {
  6754. SPIRV_CROSS_THROW(join("Unsupported legacy texture op: ", op));
  6755. }
  6756. }
  6757. bool CompilerGLSL::to_trivial_mix_op(const SPIRType &type, string &op, uint32_t left, uint32_t right, uint32_t lerp)
  6758. {
  6759. auto *cleft = maybe_get<SPIRConstant>(left);
  6760. auto *cright = maybe_get<SPIRConstant>(right);
  6761. auto &lerptype = expression_type(lerp);
  6762. // If our targets aren't constants, we cannot use construction.
  6763. if (!cleft || !cright)
  6764. return false;
  6765. // If our targets are spec constants, we cannot use construction.
  6766. if (cleft->specialization || cright->specialization)
  6767. return false;
  6768. auto &value_type = get<SPIRType>(cleft->constant_type);
  6769. if (lerptype.basetype != SPIRType::Boolean)
  6770. return false;
  6771. if (value_type.basetype == SPIRType::Struct || is_array(value_type))
  6772. return false;
  6773. if (!backend.use_constructor_splatting && value_type.vecsize != lerptype.vecsize)
  6774. return false;
  6775. // Only valid way in SPIR-V 1.4 to use matrices in select is a scalar select.
  6776. // matrix(scalar) constructor fills in diagnonals, so gets messy very quickly.
  6777. // Just avoid this case.
  6778. if (value_type.columns > 1)
  6779. return false;
  6780. // If our bool selects between 0 and 1, we can cast from bool instead, making our trivial constructor.
  6781. bool ret = true;
  6782. for (uint32_t row = 0; ret && row < value_type.vecsize; row++)
  6783. {
  6784. switch (type.basetype)
  6785. {
  6786. case SPIRType::Short:
  6787. case SPIRType::UShort:
  6788. ret = cleft->scalar_u16(0, row) == 0 && cright->scalar_u16(0, row) == 1;
  6789. break;
  6790. case SPIRType::Int:
  6791. case SPIRType::UInt:
  6792. ret = cleft->scalar(0, row) == 0 && cright->scalar(0, row) == 1;
  6793. break;
  6794. case SPIRType::Half:
  6795. ret = cleft->scalar_f16(0, row) == 0.0f && cright->scalar_f16(0, row) == 1.0f;
  6796. break;
  6797. case SPIRType::Float:
  6798. ret = cleft->scalar_f32(0, row) == 0.0f && cright->scalar_f32(0, row) == 1.0f;
  6799. break;
  6800. case SPIRType::Double:
  6801. ret = cleft->scalar_f64(0, row) == 0.0 && cright->scalar_f64(0, row) == 1.0;
  6802. break;
  6803. case SPIRType::Int64:
  6804. case SPIRType::UInt64:
  6805. ret = cleft->scalar_u64(0, row) == 0 && cright->scalar_u64(0, row) == 1;
  6806. break;
  6807. default:
  6808. ret = false;
  6809. break;
  6810. }
  6811. }
  6812. if (ret)
  6813. op = type_to_glsl_constructor(type);
  6814. return ret;
  6815. }
  6816. string CompilerGLSL::to_ternary_expression(const SPIRType &restype, uint32_t select, uint32_t true_value,
  6817. uint32_t false_value)
  6818. {
  6819. string expr;
  6820. auto &lerptype = expression_type(select);
  6821. if (lerptype.vecsize == 1)
  6822. expr = join(to_enclosed_expression(select), " ? ", to_enclosed_pointer_expression(true_value), " : ",
  6823. to_enclosed_pointer_expression(false_value));
  6824. else
  6825. {
  6826. auto swiz = [this](uint32_t expression, uint32_t i) { return to_extract_component_expression(expression, i); };
  6827. expr = type_to_glsl_constructor(restype);
  6828. expr += "(";
  6829. for (uint32_t i = 0; i < restype.vecsize; i++)
  6830. {
  6831. expr += swiz(select, i);
  6832. expr += " ? ";
  6833. expr += swiz(true_value, i);
  6834. expr += " : ";
  6835. expr += swiz(false_value, i);
  6836. if (i + 1 < restype.vecsize)
  6837. expr += ", ";
  6838. }
  6839. expr += ")";
  6840. }
  6841. return expr;
  6842. }
  6843. void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp)
  6844. {
  6845. auto &lerptype = expression_type(lerp);
  6846. auto &restype = get<SPIRType>(result_type);
  6847. // If this results in a variable pointer, assume it may be written through.
  6848. if (restype.pointer)
  6849. {
  6850. register_write(left);
  6851. register_write(right);
  6852. }
  6853. string mix_op;
  6854. bool has_boolean_mix = *backend.boolean_mix_function &&
  6855. ((options.es && options.version >= 310) || (!options.es && options.version >= 450));
  6856. bool trivial_mix = to_trivial_mix_op(restype, mix_op, left, right, lerp);
  6857. // Cannot use boolean mix when the lerp argument is just one boolean,
  6858. // fall back to regular trinary statements.
  6859. if (lerptype.vecsize == 1)
  6860. has_boolean_mix = false;
  6861. // If we can reduce the mix to a simple cast, do so.
  6862. // This helps for cases like int(bool), uint(bool) which is implemented with
  6863. // OpSelect bool 1 0.
  6864. if (trivial_mix)
  6865. {
  6866. emit_unary_func_op(result_type, id, lerp, mix_op.c_str());
  6867. }
  6868. else if (!has_boolean_mix && lerptype.basetype == SPIRType::Boolean)
  6869. {
  6870. // Boolean mix not supported on desktop without extension.
  6871. // Was added in OpenGL 4.5 with ES 3.1 compat.
  6872. //
  6873. // Could use GL_EXT_shader_integer_mix on desktop at least,
  6874. // but Apple doesn't support it. :(
  6875. // Just implement it as ternary expressions.
  6876. auto expr = to_ternary_expression(get<SPIRType>(result_type), lerp, right, left);
  6877. emit_op(result_type, id, expr, should_forward(left) && should_forward(right) && should_forward(lerp));
  6878. inherit_expression_dependencies(id, left);
  6879. inherit_expression_dependencies(id, right);
  6880. inherit_expression_dependencies(id, lerp);
  6881. }
  6882. else if (lerptype.basetype == SPIRType::Boolean)
  6883. emit_trinary_func_op(result_type, id, left, right, lerp, backend.boolean_mix_function);
  6884. else
  6885. emit_trinary_func_op(result_type, id, left, right, lerp, "mix");
  6886. }
  6887. string CompilerGLSL::to_combined_image_sampler(VariableID image_id, VariableID samp_id)
  6888. {
  6889. // Keep track of the array indices we have used to load the image.
  6890. // We'll need to use the same array index into the combined image sampler array.
  6891. auto image_expr = to_non_uniform_aware_expression(image_id);
  6892. string array_expr;
  6893. auto array_index = image_expr.find_first_of('[');
  6894. if (array_index != string::npos)
  6895. array_expr = image_expr.substr(array_index, string::npos);
  6896. auto &args = current_function->arguments;
  6897. // For GLSL and ESSL targets, we must enumerate all possible combinations for sampler2D(texture2D, sampler) and redirect
  6898. // all possible combinations into new sampler2D uniforms.
  6899. auto *image = maybe_get_backing_variable(image_id);
  6900. auto *samp = maybe_get_backing_variable(samp_id);
  6901. if (image)
  6902. image_id = image->self;
  6903. if (samp)
  6904. samp_id = samp->self;
  6905. auto image_itr = find_if(begin(args), end(args),
  6906. [image_id](const SPIRFunction::Parameter &param) { return image_id == param.id; });
  6907. auto sampler_itr = find_if(begin(args), end(args),
  6908. [samp_id](const SPIRFunction::Parameter &param) { return samp_id == param.id; });
  6909. if (image_itr != end(args) || sampler_itr != end(args))
  6910. {
  6911. // If any parameter originates from a parameter, we will find it in our argument list.
  6912. bool global_image = image_itr == end(args);
  6913. bool global_sampler = sampler_itr == end(args);
  6914. VariableID iid = global_image ? image_id : VariableID(uint32_t(image_itr - begin(args)));
  6915. VariableID sid = global_sampler ? samp_id : VariableID(uint32_t(sampler_itr - begin(args)));
  6916. auto &combined = current_function->combined_parameters;
  6917. auto itr = find_if(begin(combined), end(combined), [=](const SPIRFunction::CombinedImageSamplerParameter &p) {
  6918. return p.global_image == global_image && p.global_sampler == global_sampler && p.image_id == iid &&
  6919. p.sampler_id == sid;
  6920. });
  6921. if (itr != end(combined))
  6922. return to_expression(itr->id) + array_expr;
  6923. else
  6924. {
  6925. SPIRV_CROSS_THROW("Cannot find mapping for combined sampler parameter, was "
  6926. "build_combined_image_samplers() used "
  6927. "before compile() was called?");
  6928. }
  6929. }
  6930. else
  6931. {
  6932. // For global sampler2D, look directly at the global remapping table.
  6933. auto &mapping = combined_image_samplers;
  6934. auto itr = find_if(begin(mapping), end(mapping), [image_id, samp_id](const CombinedImageSampler &combined) {
  6935. return combined.image_id == image_id && combined.sampler_id == samp_id;
  6936. });
  6937. if (itr != end(combined_image_samplers))
  6938. return to_expression(itr->combined_id) + array_expr;
  6939. else
  6940. {
  6941. SPIRV_CROSS_THROW("Cannot find mapping for combined sampler, was build_combined_image_samplers() used "
  6942. "before compile() was called?");
  6943. }
  6944. }
  6945. }
  6946. bool CompilerGLSL::is_supported_subgroup_op_in_opengl(Op op, const uint32_t *ops)
  6947. {
  6948. switch (op)
  6949. {
  6950. case OpGroupNonUniformElect:
  6951. case OpGroupNonUniformBallot:
  6952. case OpGroupNonUniformBallotFindLSB:
  6953. case OpGroupNonUniformBallotFindMSB:
  6954. case OpGroupNonUniformBroadcast:
  6955. case OpGroupNonUniformBroadcastFirst:
  6956. case OpGroupNonUniformAll:
  6957. case OpGroupNonUniformAny:
  6958. case OpGroupNonUniformAllEqual:
  6959. case OpControlBarrier:
  6960. case OpMemoryBarrier:
  6961. case OpGroupNonUniformBallotBitCount:
  6962. case OpGroupNonUniformBallotBitExtract:
  6963. case OpGroupNonUniformInverseBallot:
  6964. return true;
  6965. case OpGroupNonUniformIAdd:
  6966. case OpGroupNonUniformFAdd:
  6967. case OpGroupNonUniformIMul:
  6968. case OpGroupNonUniformFMul:
  6969. {
  6970. const GroupOperation operation = static_cast<GroupOperation>(ops[3]);
  6971. if (operation == GroupOperationReduce || operation == GroupOperationInclusiveScan ||
  6972. operation == GroupOperationExclusiveScan)
  6973. {
  6974. return true;
  6975. }
  6976. else
  6977. {
  6978. return false;
  6979. }
  6980. }
  6981. default:
  6982. return false;
  6983. }
  6984. }
  6985. void CompilerGLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id)
  6986. {
  6987. if (options.vulkan_semantics && combined_image_samplers.empty())
  6988. {
  6989. emit_binary_func_op(result_type, result_id, image_id, samp_id,
  6990. type_to_glsl(get<SPIRType>(result_type), result_id).c_str());
  6991. }
  6992. else
  6993. {
  6994. // Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types.
  6995. emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true, true);
  6996. }
  6997. // Make sure to suppress usage tracking and any expression invalidation.
  6998. // It is illegal to create temporaries of opaque types.
  6999. forwarded_temporaries.erase(result_id);
  7000. }
  7001. static inline bool image_opcode_is_sample_no_dref(Op op)
  7002. {
  7003. switch (op)
  7004. {
  7005. case OpImageSampleExplicitLod:
  7006. case OpImageSampleImplicitLod:
  7007. case OpImageSampleProjExplicitLod:
  7008. case OpImageSampleProjImplicitLod:
  7009. case OpImageFetch:
  7010. case OpImageRead:
  7011. case OpImageSparseSampleExplicitLod:
  7012. case OpImageSparseSampleImplicitLod:
  7013. case OpImageSparseSampleProjExplicitLod:
  7014. case OpImageSparseSampleProjImplicitLod:
  7015. case OpImageSparseFetch:
  7016. case OpImageSparseRead:
  7017. return true;
  7018. default:
  7019. return false;
  7020. }
  7021. }
  7022. void CompilerGLSL::emit_sparse_feedback_temporaries(uint32_t result_type_id, uint32_t id, uint32_t &feedback_id,
  7023. uint32_t &texel_id)
  7024. {
  7025. // Need to allocate two temporaries.
  7026. if (options.es)
  7027. SPIRV_CROSS_THROW("Sparse texture feedback is not supported on ESSL.");
  7028. require_extension_internal("GL_ARB_sparse_texture2");
  7029. auto &temps = extra_sub_expressions[id];
  7030. if (temps == 0)
  7031. temps = ir.increase_bound_by(2);
  7032. feedback_id = temps + 0;
  7033. texel_id = temps + 1;
  7034. auto &return_type = get<SPIRType>(result_type_id);
  7035. if (return_type.basetype != SPIRType::Struct || return_type.member_types.size() != 2)
  7036. SPIRV_CROSS_THROW("Invalid return type for sparse feedback.");
  7037. emit_uninitialized_temporary(return_type.member_types[0], feedback_id);
  7038. emit_uninitialized_temporary(return_type.member_types[1], texel_id);
  7039. }
  7040. uint32_t CompilerGLSL::get_sparse_feedback_texel_id(uint32_t id) const
  7041. {
  7042. auto itr = extra_sub_expressions.find(id);
  7043. if (itr == extra_sub_expressions.end())
  7044. return 0;
  7045. else
  7046. return itr->second + 1;
  7047. }
  7048. void CompilerGLSL::emit_texture_op(const Instruction &i, bool sparse)
  7049. {
  7050. auto *ops = stream(i);
  7051. auto op = static_cast<Op>(i.op);
  7052. SmallVector<uint32_t> inherited_expressions;
  7053. uint32_t result_type_id = ops[0];
  7054. uint32_t id = ops[1];
  7055. auto &return_type = get<SPIRType>(result_type_id);
  7056. uint32_t sparse_code_id = 0;
  7057. uint32_t sparse_texel_id = 0;
  7058. if (sparse)
  7059. emit_sparse_feedback_temporaries(result_type_id, id, sparse_code_id, sparse_texel_id);
  7060. bool forward = false;
  7061. string expr = to_texture_op(i, sparse, &forward, inherited_expressions);
  7062. if (sparse)
  7063. {
  7064. statement(to_expression(sparse_code_id), " = ", expr, ";");
  7065. expr = join(type_to_glsl(return_type), "(", to_expression(sparse_code_id), ", ", to_expression(sparse_texel_id),
  7066. ")");
  7067. forward = true;
  7068. inherited_expressions.clear();
  7069. }
  7070. emit_op(result_type_id, id, expr, forward);
  7071. for (auto &inherit : inherited_expressions)
  7072. inherit_expression_dependencies(id, inherit);
  7073. // Do not register sparse ops as control dependent as they are always lowered to a temporary.
  7074. switch (op)
  7075. {
  7076. case OpImageSampleDrefImplicitLod:
  7077. case OpImageSampleImplicitLod:
  7078. case OpImageSampleProjImplicitLod:
  7079. case OpImageSampleProjDrefImplicitLod:
  7080. register_control_dependent_expression(id);
  7081. break;
  7082. default:
  7083. break;
  7084. }
  7085. }
  7086. std::string CompilerGLSL::to_texture_op(const Instruction &i, bool sparse, bool *forward,
  7087. SmallVector<uint32_t> &inherited_expressions)
  7088. {
  7089. auto *ops = stream(i);
  7090. auto op = static_cast<Op>(i.op);
  7091. uint32_t length = i.length;
  7092. uint32_t result_type_id = ops[0];
  7093. VariableID img = ops[2];
  7094. uint32_t coord = ops[3];
  7095. uint32_t dref = 0;
  7096. uint32_t comp = 0;
  7097. bool gather = false;
  7098. bool proj = false;
  7099. bool fetch = false;
  7100. bool nonuniform_expression = false;
  7101. const uint32_t *opt = nullptr;
  7102. auto &result_type = get<SPIRType>(result_type_id);
  7103. inherited_expressions.push_back(coord);
  7104. if (has_decoration(img, DecorationNonUniform) && !maybe_get_backing_variable(img))
  7105. nonuniform_expression = true;
  7106. switch (op)
  7107. {
  7108. case OpImageSampleDrefImplicitLod:
  7109. case OpImageSampleDrefExplicitLod:
  7110. case OpImageSparseSampleDrefImplicitLod:
  7111. case OpImageSparseSampleDrefExplicitLod:
  7112. dref = ops[4];
  7113. opt = &ops[5];
  7114. length -= 5;
  7115. break;
  7116. case OpImageSampleProjDrefImplicitLod:
  7117. case OpImageSampleProjDrefExplicitLod:
  7118. case OpImageSparseSampleProjDrefImplicitLod:
  7119. case OpImageSparseSampleProjDrefExplicitLod:
  7120. dref = ops[4];
  7121. opt = &ops[5];
  7122. length -= 5;
  7123. proj = true;
  7124. break;
  7125. case OpImageDrefGather:
  7126. case OpImageSparseDrefGather:
  7127. dref = ops[4];
  7128. opt = &ops[5];
  7129. length -= 5;
  7130. gather = true;
  7131. if (options.es && options.version < 310)
  7132. SPIRV_CROSS_THROW("textureGather requires ESSL 310.");
  7133. else if (!options.es && options.version < 400)
  7134. SPIRV_CROSS_THROW("textureGather with depth compare requires GLSL 400.");
  7135. break;
  7136. case OpImageGather:
  7137. case OpImageSparseGather:
  7138. comp = ops[4];
  7139. opt = &ops[5];
  7140. length -= 5;
  7141. gather = true;
  7142. if (options.es && options.version < 310)
  7143. SPIRV_CROSS_THROW("textureGather requires ESSL 310.");
  7144. else if (!options.es && options.version < 400)
  7145. {
  7146. if (!expression_is_constant_null(comp))
  7147. SPIRV_CROSS_THROW("textureGather with component requires GLSL 400.");
  7148. require_extension_internal("GL_ARB_texture_gather");
  7149. }
  7150. break;
  7151. case OpImageFetch:
  7152. case OpImageSparseFetch:
  7153. case OpImageRead: // Reads == fetches in Metal (other langs will not get here)
  7154. opt = &ops[4];
  7155. length -= 4;
  7156. fetch = true;
  7157. break;
  7158. case OpImageSampleProjImplicitLod:
  7159. case OpImageSampleProjExplicitLod:
  7160. case OpImageSparseSampleProjImplicitLod:
  7161. case OpImageSparseSampleProjExplicitLod:
  7162. opt = &ops[4];
  7163. length -= 4;
  7164. proj = true;
  7165. break;
  7166. default:
  7167. opt = &ops[4];
  7168. length -= 4;
  7169. break;
  7170. }
  7171. // Bypass pointers because we need the real image struct
  7172. auto &type = expression_type(img);
  7173. auto &imgtype = get<SPIRType>(type.self);
  7174. uint32_t coord_components = 0;
  7175. switch (imgtype.image.dim)
  7176. {
  7177. case Dim1D:
  7178. coord_components = 1;
  7179. break;
  7180. case Dim2D:
  7181. coord_components = 2;
  7182. break;
  7183. case Dim3D:
  7184. coord_components = 3;
  7185. break;
  7186. case DimCube:
  7187. coord_components = 3;
  7188. break;
  7189. case DimBuffer:
  7190. coord_components = 1;
  7191. break;
  7192. default:
  7193. coord_components = 2;
  7194. break;
  7195. }
  7196. if (dref)
  7197. inherited_expressions.push_back(dref);
  7198. if (proj)
  7199. coord_components++;
  7200. if (imgtype.image.arrayed)
  7201. coord_components++;
  7202. uint32_t bias = 0;
  7203. uint32_t lod = 0;
  7204. uint32_t grad_x = 0;
  7205. uint32_t grad_y = 0;
  7206. uint32_t coffset = 0;
  7207. uint32_t offset = 0;
  7208. uint32_t coffsets = 0;
  7209. uint32_t sample = 0;
  7210. uint32_t minlod = 0;
  7211. uint32_t flags = 0;
  7212. if (length)
  7213. {
  7214. flags = *opt++;
  7215. length--;
  7216. }
  7217. auto test = [&](uint32_t &v, uint32_t flag) {
  7218. if (length && (flags & flag))
  7219. {
  7220. v = *opt++;
  7221. inherited_expressions.push_back(v);
  7222. length--;
  7223. }
  7224. };
  7225. test(bias, ImageOperandsBiasMask);
  7226. test(lod, ImageOperandsLodMask);
  7227. test(grad_x, ImageOperandsGradMask);
  7228. test(grad_y, ImageOperandsGradMask);
  7229. test(coffset, ImageOperandsConstOffsetMask);
  7230. test(offset, ImageOperandsOffsetMask);
  7231. test(coffsets, ImageOperandsConstOffsetsMask);
  7232. test(sample, ImageOperandsSampleMask);
  7233. test(minlod, ImageOperandsMinLodMask);
  7234. TextureFunctionBaseArguments base_args = {};
  7235. base_args.img = img;
  7236. base_args.imgtype = &imgtype;
  7237. base_args.is_fetch = fetch != 0;
  7238. base_args.is_gather = gather != 0;
  7239. base_args.is_proj = proj != 0;
  7240. string expr;
  7241. TextureFunctionNameArguments name_args = {};
  7242. name_args.base = base_args;
  7243. name_args.has_array_offsets = coffsets != 0;
  7244. name_args.has_offset = coffset != 0 || offset != 0;
  7245. name_args.has_grad = grad_x != 0 || grad_y != 0;
  7246. name_args.has_dref = dref != 0;
  7247. name_args.is_sparse_feedback = sparse;
  7248. name_args.has_min_lod = minlod != 0;
  7249. name_args.lod = lod;
  7250. expr += to_function_name(name_args);
  7251. expr += "(";
  7252. uint32_t sparse_texel_id = 0;
  7253. if (sparse)
  7254. sparse_texel_id = get_sparse_feedback_texel_id(ops[1]);
  7255. TextureFunctionArguments args = {};
  7256. args.base = base_args;
  7257. args.coord = coord;
  7258. args.coord_components = coord_components;
  7259. args.dref = dref;
  7260. args.grad_x = grad_x;
  7261. args.grad_y = grad_y;
  7262. args.lod = lod;
  7263. args.has_array_offsets = coffsets != 0;
  7264. if (coffsets)
  7265. args.offset = coffsets;
  7266. else if (coffset)
  7267. args.offset = coffset;
  7268. else
  7269. args.offset = offset;
  7270. args.bias = bias;
  7271. args.component = comp;
  7272. args.sample = sample;
  7273. args.sparse_texel = sparse_texel_id;
  7274. args.min_lod = minlod;
  7275. args.nonuniform_expression = nonuniform_expression;
  7276. expr += to_function_args(args, forward);
  7277. expr += ")";
  7278. // texture(samplerXShadow) returns float. shadowX() returns vec4, but only in desktop GLSL. Swizzle here.
  7279. if (is_legacy() && !options.es && is_depth_image(imgtype, img))
  7280. expr += ".r";
  7281. // Sampling from a texture which was deduced to be a depth image, might actually return 1 component here.
  7282. // Remap back to 4 components as sampling opcodes expect.
  7283. if (backend.comparison_image_samples_scalar && image_opcode_is_sample_no_dref(op))
  7284. {
  7285. bool image_is_depth = false;
  7286. const auto *combined = maybe_get<SPIRCombinedImageSampler>(img);
  7287. VariableID image_id = combined ? combined->image : img;
  7288. if (combined && is_depth_image(imgtype, combined->image))
  7289. image_is_depth = true;
  7290. else if (is_depth_image(imgtype, img))
  7291. image_is_depth = true;
  7292. // We must also check the backing variable for the image.
  7293. // We might have loaded an OpImage, and used that handle for two different purposes.
  7294. // Once with comparison, once without.
  7295. auto *image_variable = maybe_get_backing_variable(image_id);
  7296. if (image_variable && is_depth_image(get<SPIRType>(image_variable->basetype), image_variable->self))
  7297. image_is_depth = true;
  7298. if (image_is_depth)
  7299. expr = remap_swizzle(result_type, 1, expr);
  7300. }
  7301. if (!sparse && !backend.support_small_type_sampling_result && result_type.width < 32)
  7302. {
  7303. // Just value cast (narrowing) to expected type since we cannot rely on narrowing to work automatically.
  7304. // Hopefully compiler picks this up and converts the texturing instruction to the appropriate precision.
  7305. expr = join(type_to_glsl_constructor(result_type), "(", expr, ")");
  7306. }
  7307. // Deals with reads from MSL. We might need to downconvert to fewer components.
  7308. if (op == OpImageRead)
  7309. expr = remap_swizzle(result_type, 4, expr);
  7310. return expr;
  7311. }
  7312. bool CompilerGLSL::expression_is_constant_null(uint32_t id) const
  7313. {
  7314. auto *c = maybe_get<SPIRConstant>(id);
  7315. if (!c)
  7316. return false;
  7317. return c->constant_is_null();
  7318. }
  7319. bool CompilerGLSL::expression_is_non_value_type_array(uint32_t ptr)
  7320. {
  7321. auto &type = expression_type(ptr);
  7322. if (!is_array(get_pointee_type(type)))
  7323. return false;
  7324. if (!backend.array_is_value_type)
  7325. return true;
  7326. auto *var = maybe_get_backing_variable(ptr);
  7327. if (!var)
  7328. return false;
  7329. auto &backed_type = get<SPIRType>(var->basetype);
  7330. return !backend.array_is_value_type_in_buffer_blocks && backed_type.basetype == SPIRType::Struct &&
  7331. has_member_decoration(backed_type.self, 0, DecorationOffset);
  7332. }
  7333. // Returns the function name for a texture sampling function for the specified image and sampling characteristics.
  7334. // For some subclasses, the function is a method on the specified image.
  7335. string CompilerGLSL::to_function_name(const TextureFunctionNameArguments &args)
  7336. {
  7337. if (args.has_min_lod)
  7338. {
  7339. if (options.es)
  7340. SPIRV_CROSS_THROW("Sparse residency is not supported in ESSL.");
  7341. require_extension_internal("GL_ARB_sparse_texture_clamp");
  7342. }
  7343. string fname;
  7344. auto &imgtype = *args.base.imgtype;
  7345. VariableID tex = args.base.img;
  7346. // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
  7347. // To emulate this, we will have to use textureGrad with a constant gradient of 0.
  7348. // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
  7349. // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
  7350. bool workaround_lod_array_shadow_as_grad = false;
  7351. if (((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) &&
  7352. is_depth_image(imgtype, tex) && args.lod && !args.base.is_fetch)
  7353. {
  7354. if (!expression_is_constant_null(args.lod))
  7355. {
  7356. SPIRV_CROSS_THROW("textureLod on sampler2DArrayShadow is not constant 0.0. This cannot be "
  7357. "expressed in GLSL.");
  7358. }
  7359. workaround_lod_array_shadow_as_grad = true;
  7360. }
  7361. if (args.is_sparse_feedback)
  7362. fname += "sparse";
  7363. if (args.base.is_fetch)
  7364. fname += args.is_sparse_feedback ? "TexelFetch" : "texelFetch";
  7365. else
  7366. {
  7367. fname += args.is_sparse_feedback ? "Texture" : "texture";
  7368. if (args.base.is_gather)
  7369. fname += "Gather";
  7370. if (args.has_array_offsets)
  7371. fname += "Offsets";
  7372. if (args.base.is_proj)
  7373. fname += "Proj";
  7374. if (args.has_grad || workaround_lod_array_shadow_as_grad)
  7375. fname += "Grad";
  7376. if (args.lod != 0 && !workaround_lod_array_shadow_as_grad)
  7377. fname += "Lod";
  7378. }
  7379. if (args.has_offset)
  7380. fname += "Offset";
  7381. if (args.has_min_lod)
  7382. fname += "Clamp";
  7383. if (args.is_sparse_feedback || args.has_min_lod)
  7384. fname += "ARB";
  7385. return (is_legacy() && !args.base.is_gather) ? legacy_tex_op(fname, imgtype, tex) : fname;
  7386. }
  7387. std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id)
  7388. {
  7389. auto *var = maybe_get_backing_variable(id);
  7390. // If we are fetching from a plain OpTypeImage, we must combine with a dummy sampler in GLSL.
  7391. // In Vulkan GLSL, we can make use of the newer GL_EXT_samplerless_texture_functions.
  7392. if (var)
  7393. {
  7394. auto &type = get<SPIRType>(var->basetype);
  7395. if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer)
  7396. {
  7397. if (options.vulkan_semantics)
  7398. {
  7399. if (dummy_sampler_id)
  7400. {
  7401. // Don't need to consider Shadow state since the dummy sampler is always non-shadow.
  7402. auto sampled_type = type;
  7403. sampled_type.basetype = SPIRType::SampledImage;
  7404. return join(type_to_glsl(sampled_type), "(", to_non_uniform_aware_expression(id), ", ",
  7405. to_expression(dummy_sampler_id), ")");
  7406. }
  7407. else
  7408. {
  7409. // Newer glslang supports this extension to deal with texture2D as argument to texture functions.
  7410. require_extension_internal("GL_EXT_samplerless_texture_functions");
  7411. }
  7412. }
  7413. else
  7414. {
  7415. if (!dummy_sampler_id)
  7416. SPIRV_CROSS_THROW("Cannot find dummy sampler ID. Was "
  7417. "build_dummy_sampler_for_combined_images() called?");
  7418. return to_combined_image_sampler(id, dummy_sampler_id);
  7419. }
  7420. }
  7421. }
  7422. return to_non_uniform_aware_expression(id);
  7423. }
  7424. // Returns the function args for a texture sampling function for the specified image and sampling characteristics.
  7425. string CompilerGLSL::to_function_args(const TextureFunctionArguments &args, bool *p_forward)
  7426. {
  7427. VariableID img = args.base.img;
  7428. auto &imgtype = *args.base.imgtype;
  7429. string farg_str;
  7430. if (args.base.is_fetch)
  7431. farg_str = convert_separate_image_to_expression(img);
  7432. else
  7433. farg_str = to_non_uniform_aware_expression(img);
  7434. if (args.nonuniform_expression && farg_str.find_first_of('[') != string::npos)
  7435. {
  7436. // Only emit nonuniformEXT() wrapper if the underlying expression is arrayed in some way.
  7437. farg_str = join(backend.nonuniform_qualifier, "(", farg_str, ")");
  7438. }
  7439. bool swizz_func = backend.swizzle_is_function;
  7440. auto swizzle = [swizz_func](uint32_t comps, uint32_t in_comps) -> const char * {
  7441. if (comps == in_comps)
  7442. return "";
  7443. switch (comps)
  7444. {
  7445. case 1:
  7446. return ".x";
  7447. case 2:
  7448. return swizz_func ? ".xy()" : ".xy";
  7449. case 3:
  7450. return swizz_func ? ".xyz()" : ".xyz";
  7451. default:
  7452. return "";
  7453. }
  7454. };
  7455. bool forward = should_forward(args.coord);
  7456. // The IR can give us more components than we need, so chop them off as needed.
  7457. auto swizzle_expr = swizzle(args.coord_components, expression_type(args.coord).vecsize);
  7458. // Only enclose the UV expression if needed.
  7459. auto coord_expr =
  7460. (*swizzle_expr == '\0') ? to_expression(args.coord) : (to_enclosed_expression(args.coord) + swizzle_expr);
  7461. // texelFetch only takes int, not uint.
  7462. auto &coord_type = expression_type(args.coord);
  7463. if (coord_type.basetype == SPIRType::UInt)
  7464. {
  7465. auto expected_type = coord_type;
  7466. expected_type.vecsize = args.coord_components;
  7467. expected_type.basetype = SPIRType::Int;
  7468. coord_expr = bitcast_expression(expected_type, coord_type.basetype, coord_expr);
  7469. }
  7470. // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
  7471. // To emulate this, we will have to use textureGrad with a constant gradient of 0.
  7472. // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
  7473. // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
  7474. bool workaround_lod_array_shadow_as_grad =
  7475. ((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) &&
  7476. is_depth_image(imgtype, img) && args.lod != 0 && !args.base.is_fetch;
  7477. if (args.dref)
  7478. {
  7479. forward = forward && should_forward(args.dref);
  7480. // SPIR-V splits dref and coordinate.
  7481. if (args.base.is_gather ||
  7482. args.coord_components == 4) // GLSL also splits the arguments in two. Same for textureGather.
  7483. {
  7484. farg_str += ", ";
  7485. farg_str += to_expression(args.coord);
  7486. farg_str += ", ";
  7487. farg_str += to_expression(args.dref);
  7488. }
  7489. else if (args.base.is_proj)
  7490. {
  7491. // Have to reshuffle so we get vec4(coord, dref, proj), special case.
  7492. // Other shading languages splits up the arguments for coord and compare value like SPIR-V.
  7493. // The coordinate type for textureProj shadow is always vec4 even for sampler1DShadow.
  7494. farg_str += ", vec4(";
  7495. if (imgtype.image.dim == Dim1D)
  7496. {
  7497. // Could reuse coord_expr, but we will mess up the temporary usage checking.
  7498. farg_str += to_enclosed_expression(args.coord) + ".x";
  7499. farg_str += ", ";
  7500. farg_str += "0.0, ";
  7501. farg_str += to_expression(args.dref);
  7502. farg_str += ", ";
  7503. farg_str += to_enclosed_expression(args.coord) + ".y)";
  7504. }
  7505. else if (imgtype.image.dim == Dim2D)
  7506. {
  7507. // Could reuse coord_expr, but we will mess up the temporary usage checking.
  7508. farg_str += to_enclosed_expression(args.coord) + (swizz_func ? ".xy()" : ".xy");
  7509. farg_str += ", ";
  7510. farg_str += to_expression(args.dref);
  7511. farg_str += ", ";
  7512. farg_str += to_enclosed_expression(args.coord) + ".z)";
  7513. }
  7514. else
  7515. SPIRV_CROSS_THROW("Invalid type for textureProj with shadow.");
  7516. }
  7517. else
  7518. {
  7519. // Create a composite which merges coord/dref into a single vector.
  7520. auto type = expression_type(args.coord);
  7521. type.vecsize = args.coord_components + 1;
  7522. if (imgtype.image.dim == Dim1D && options.es)
  7523. type.vecsize++;
  7524. farg_str += ", ";
  7525. farg_str += type_to_glsl_constructor(type);
  7526. farg_str += "(";
  7527. if (imgtype.image.dim == Dim1D && options.es)
  7528. {
  7529. if (imgtype.image.arrayed)
  7530. {
  7531. farg_str += enclose_expression(coord_expr) + ".x";
  7532. farg_str += ", 0.0, ";
  7533. farg_str += enclose_expression(coord_expr) + ".y";
  7534. }
  7535. else
  7536. {
  7537. farg_str += coord_expr;
  7538. farg_str += ", 0.0";
  7539. }
  7540. }
  7541. else
  7542. farg_str += coord_expr;
  7543. farg_str += ", ";
  7544. farg_str += to_expression(args.dref);
  7545. farg_str += ")";
  7546. }
  7547. }
  7548. else
  7549. {
  7550. if (imgtype.image.dim == Dim1D && options.es)
  7551. {
  7552. // Have to fake a second coordinate.
  7553. if (type_is_floating_point(coord_type))
  7554. {
  7555. // Cannot mix proj and array.
  7556. if (imgtype.image.arrayed || args.base.is_proj)
  7557. {
  7558. coord_expr = join("vec3(", enclose_expression(coord_expr), ".x, 0.0, ",
  7559. enclose_expression(coord_expr), ".y)");
  7560. }
  7561. else
  7562. coord_expr = join("vec2(", coord_expr, ", 0.0)");
  7563. }
  7564. else
  7565. {
  7566. if (imgtype.image.arrayed)
  7567. {
  7568. coord_expr = join("ivec3(", enclose_expression(coord_expr),
  7569. ".x, 0, ",
  7570. enclose_expression(coord_expr), ".y)");
  7571. }
  7572. else
  7573. coord_expr = join("ivec2(", coord_expr, ", 0)");
  7574. }
  7575. }
  7576. farg_str += ", ";
  7577. farg_str += coord_expr;
  7578. }
  7579. if (args.grad_x || args.grad_y)
  7580. {
  7581. forward = forward && should_forward(args.grad_x);
  7582. forward = forward && should_forward(args.grad_y);
  7583. farg_str += ", ";
  7584. farg_str += to_expression(args.grad_x);
  7585. farg_str += ", ";
  7586. farg_str += to_expression(args.grad_y);
  7587. }
  7588. if (args.lod)
  7589. {
  7590. if (workaround_lod_array_shadow_as_grad)
  7591. {
  7592. // Implement textureGrad() instead. LOD == 0.0 is implemented as gradient of 0.0.
  7593. // Implementing this as plain texture() is not safe on some implementations.
  7594. if (imgtype.image.dim == Dim2D)
  7595. farg_str += ", vec2(0.0), vec2(0.0)";
  7596. else if (imgtype.image.dim == DimCube)
  7597. farg_str += ", vec3(0.0), vec3(0.0)";
  7598. }
  7599. else
  7600. {
  7601. forward = forward && should_forward(args.lod);
  7602. farg_str += ", ";
  7603. // Lod expression for TexelFetch in GLSL must be int, and only int.
  7604. if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms)
  7605. farg_str += bitcast_expression(SPIRType::Int, args.lod);
  7606. else
  7607. farg_str += to_expression(args.lod);
  7608. }
  7609. }
  7610. else if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms)
  7611. {
  7612. // Lod argument is optional in OpImageFetch, but we require a LOD value, pick 0 as the default.
  7613. farg_str += ", 0";
  7614. }
  7615. if (args.offset)
  7616. {
  7617. forward = forward && should_forward(args.offset);
  7618. farg_str += ", ";
  7619. farg_str += bitcast_expression(SPIRType::Int, args.offset);
  7620. }
  7621. if (args.sample)
  7622. {
  7623. farg_str += ", ";
  7624. farg_str += bitcast_expression(SPIRType::Int, args.sample);
  7625. }
  7626. if (args.min_lod)
  7627. {
  7628. farg_str += ", ";
  7629. farg_str += to_expression(args.min_lod);
  7630. }
  7631. if (args.sparse_texel)
  7632. {
  7633. // Sparse texel output parameter comes after everything else, except it's before the optional, component/bias arguments.
  7634. farg_str += ", ";
  7635. farg_str += to_expression(args.sparse_texel);
  7636. }
  7637. if (args.bias)
  7638. {
  7639. forward = forward && should_forward(args.bias);
  7640. farg_str += ", ";
  7641. farg_str += to_expression(args.bias);
  7642. }
  7643. if (args.component && !expression_is_constant_null(args.component))
  7644. {
  7645. forward = forward && should_forward(args.component);
  7646. farg_str += ", ";
  7647. farg_str += bitcast_expression(SPIRType::Int, args.component);
  7648. }
  7649. *p_forward = forward;
  7650. return farg_str;
  7651. }
  7652. Op CompilerGLSL::get_remapped_spirv_op(Op op) const
  7653. {
  7654. if (options.relax_nan_checks)
  7655. {
  7656. switch (op)
  7657. {
  7658. case OpFUnordLessThan:
  7659. op = OpFOrdLessThan;
  7660. break;
  7661. case OpFUnordLessThanEqual:
  7662. op = OpFOrdLessThanEqual;
  7663. break;
  7664. case OpFUnordGreaterThan:
  7665. op = OpFOrdGreaterThan;
  7666. break;
  7667. case OpFUnordGreaterThanEqual:
  7668. op = OpFOrdGreaterThanEqual;
  7669. break;
  7670. case OpFUnordEqual:
  7671. op = OpFOrdEqual;
  7672. break;
  7673. case OpFOrdNotEqual:
  7674. op = OpFUnordNotEqual;
  7675. break;
  7676. default:
  7677. break;
  7678. }
  7679. }
  7680. return op;
  7681. }
  7682. GLSLstd450 CompilerGLSL::get_remapped_glsl_op(GLSLstd450 std450_op) const
  7683. {
  7684. // Relax to non-NaN aware opcodes.
  7685. if (options.relax_nan_checks)
  7686. {
  7687. switch (std450_op)
  7688. {
  7689. case GLSLstd450NClamp:
  7690. std450_op = GLSLstd450FClamp;
  7691. break;
  7692. case GLSLstd450NMin:
  7693. std450_op = GLSLstd450FMin;
  7694. break;
  7695. case GLSLstd450NMax:
  7696. std450_op = GLSLstd450FMax;
  7697. break;
  7698. default:
  7699. break;
  7700. }
  7701. }
  7702. return std450_op;
  7703. }
  7704. void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t length)
  7705. {
  7706. auto op = static_cast<GLSLstd450>(eop);
  7707. if (is_legacy() && is_unsigned_glsl_opcode(op))
  7708. SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy GLSL targets.");
  7709. // If we need to do implicit bitcasts, make sure we do it with the correct type.
  7710. uint32_t integer_width = get_integer_width_for_glsl_instruction(op, args, length);
  7711. auto int_type = to_signed_basetype(integer_width);
  7712. auto uint_type = to_unsigned_basetype(integer_width);
  7713. op = get_remapped_glsl_op(op);
  7714. switch (op)
  7715. {
  7716. // FP fiddling
  7717. case GLSLstd450Round:
  7718. if (!is_legacy())
  7719. emit_unary_func_op(result_type, id, args[0], "round");
  7720. else
  7721. {
  7722. auto op0 = to_enclosed_expression(args[0]);
  7723. auto &op0_type = expression_type(args[0]);
  7724. auto expr = join("floor(", op0, " + ", type_to_glsl_constructor(op0_type), "(0.5))");
  7725. bool forward = should_forward(args[0]);
  7726. emit_op(result_type, id, expr, forward);
  7727. inherit_expression_dependencies(id, args[0]);
  7728. }
  7729. break;
  7730. case GLSLstd450RoundEven:
  7731. if (!is_legacy())
  7732. emit_unary_func_op(result_type, id, args[0], "roundEven");
  7733. else if (!options.es)
  7734. {
  7735. // This extension provides round() with round-to-even semantics.
  7736. require_extension_internal("GL_EXT_gpu_shader4");
  7737. emit_unary_func_op(result_type, id, args[0], "round");
  7738. }
  7739. else
  7740. SPIRV_CROSS_THROW("roundEven supported only in ESSL 300.");
  7741. break;
  7742. case GLSLstd450Trunc:
  7743. if (!is_legacy())
  7744. emit_unary_func_op(result_type, id, args[0], "trunc");
  7745. else
  7746. {
  7747. // Implement by value-casting to int and back.
  7748. bool forward = should_forward(args[0]);
  7749. auto op0 = to_unpacked_expression(args[0]);
  7750. auto &op0_type = expression_type(args[0]);
  7751. auto via_type = op0_type;
  7752. via_type.basetype = SPIRType::Int;
  7753. auto expr = join(type_to_glsl(op0_type), "(", type_to_glsl(via_type), "(", op0, "))");
  7754. emit_op(result_type, id, expr, forward);
  7755. inherit_expression_dependencies(id, args[0]);
  7756. }
  7757. break;
  7758. case GLSLstd450SAbs:
  7759. emit_unary_func_op_cast(result_type, id, args[0], "abs", int_type, int_type);
  7760. break;
  7761. case GLSLstd450FAbs:
  7762. emit_unary_func_op(result_type, id, args[0], "abs");
  7763. break;
  7764. case GLSLstd450SSign:
  7765. emit_unary_func_op_cast(result_type, id, args[0], "sign", int_type, int_type);
  7766. break;
  7767. case GLSLstd450FSign:
  7768. emit_unary_func_op(result_type, id, args[0], "sign");
  7769. break;
  7770. case GLSLstd450Floor:
  7771. emit_unary_func_op(result_type, id, args[0], "floor");
  7772. break;
  7773. case GLSLstd450Ceil:
  7774. emit_unary_func_op(result_type, id, args[0], "ceil");
  7775. break;
  7776. case GLSLstd450Fract:
  7777. emit_unary_func_op(result_type, id, args[0], "fract");
  7778. break;
  7779. case GLSLstd450Radians:
  7780. emit_unary_func_op(result_type, id, args[0], "radians");
  7781. break;
  7782. case GLSLstd450Degrees:
  7783. emit_unary_func_op(result_type, id, args[0], "degrees");
  7784. break;
  7785. case GLSLstd450Fma:
  7786. if ((!options.es && options.version < 400) || (options.es && options.version < 320))
  7787. {
  7788. auto expr = join(to_enclosed_expression(args[0]), " * ", to_enclosed_expression(args[1]), " + ",
  7789. to_enclosed_expression(args[2]));
  7790. emit_op(result_type, id, expr,
  7791. should_forward(args[0]) && should_forward(args[1]) && should_forward(args[2]));
  7792. for (uint32_t i = 0; i < 3; i++)
  7793. inherit_expression_dependencies(id, args[i]);
  7794. }
  7795. else
  7796. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "fma");
  7797. break;
  7798. case GLSLstd450Modf:
  7799. register_call_out_argument(args[1]);
  7800. if (!is_legacy())
  7801. {
  7802. forced_temporaries.insert(id);
  7803. emit_binary_func_op(result_type, id, args[0], args[1], "modf");
  7804. }
  7805. else
  7806. {
  7807. //NB. legacy GLSL doesn't have trunc() either, so we do a value cast
  7808. auto &op1_type = expression_type(args[1]);
  7809. auto via_type = op1_type;
  7810. via_type.basetype = SPIRType::Int;
  7811. statement(to_expression(args[1]), " = ",
  7812. type_to_glsl(op1_type), "(", type_to_glsl(via_type),
  7813. "(", to_expression(args[0]), "));");
  7814. emit_binary_op(result_type, id, args[0], args[1], "-");
  7815. }
  7816. break;
  7817. case GLSLstd450ModfStruct:
  7818. {
  7819. auto &type = get<SPIRType>(result_type);
  7820. emit_uninitialized_temporary_expression(result_type, id);
  7821. if (!is_legacy())
  7822. {
  7823. statement(to_expression(id), ".", to_member_name(type, 0), " = ", "modf(", to_expression(args[0]), ", ",
  7824. to_expression(id), ".", to_member_name(type, 1), ");");
  7825. }
  7826. else
  7827. {
  7828. //NB. legacy GLSL doesn't have trunc() either, so we do a value cast
  7829. auto &op0_type = expression_type(args[0]);
  7830. auto via_type = op0_type;
  7831. via_type.basetype = SPIRType::Int;
  7832. statement(to_expression(id), ".", to_member_name(type, 1), " = ", type_to_glsl(op0_type),
  7833. "(", type_to_glsl(via_type), "(", to_expression(args[0]), "));");
  7834. statement(to_expression(id), ".", to_member_name(type, 0), " = ", to_enclosed_expression(args[0]), " - ",
  7835. to_expression(id), ".", to_member_name(type, 1), ";");
  7836. }
  7837. break;
  7838. }
  7839. // Minmax
  7840. case GLSLstd450UMin:
  7841. emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", uint_type, false);
  7842. break;
  7843. case GLSLstd450SMin:
  7844. emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", int_type, false);
  7845. break;
  7846. case GLSLstd450FMin:
  7847. emit_binary_func_op(result_type, id, args[0], args[1], "min");
  7848. break;
  7849. case GLSLstd450FMax:
  7850. emit_binary_func_op(result_type, id, args[0], args[1], "max");
  7851. break;
  7852. case GLSLstd450UMax:
  7853. emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", uint_type, false);
  7854. break;
  7855. case GLSLstd450SMax:
  7856. emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", int_type, false);
  7857. break;
  7858. case GLSLstd450FClamp:
  7859. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp");
  7860. break;
  7861. case GLSLstd450UClamp:
  7862. emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", uint_type);
  7863. break;
  7864. case GLSLstd450SClamp:
  7865. emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", int_type);
  7866. break;
  7867. // Trig
  7868. case GLSLstd450Sin:
  7869. emit_unary_func_op(result_type, id, args[0], "sin");
  7870. break;
  7871. case GLSLstd450Cos:
  7872. emit_unary_func_op(result_type, id, args[0], "cos");
  7873. break;
  7874. case GLSLstd450Tan:
  7875. emit_unary_func_op(result_type, id, args[0], "tan");
  7876. break;
  7877. case GLSLstd450Asin:
  7878. emit_unary_func_op(result_type, id, args[0], "asin");
  7879. break;
  7880. case GLSLstd450Acos:
  7881. emit_unary_func_op(result_type, id, args[0], "acos");
  7882. break;
  7883. case GLSLstd450Atan:
  7884. emit_unary_func_op(result_type, id, args[0], "atan");
  7885. break;
  7886. case GLSLstd450Sinh:
  7887. if (!is_legacy())
  7888. emit_unary_func_op(result_type, id, args[0], "sinh");
  7889. else
  7890. {
  7891. bool forward = should_forward(args[0]);
  7892. auto expr = join("(exp(", to_expression(args[0]), ") - exp(-", to_enclosed_expression(args[0]), ")) * 0.5");
  7893. emit_op(result_type, id, expr, forward);
  7894. inherit_expression_dependencies(id, args[0]);
  7895. }
  7896. break;
  7897. case GLSLstd450Cosh:
  7898. if (!is_legacy())
  7899. emit_unary_func_op(result_type, id, args[0], "cosh");
  7900. else
  7901. {
  7902. bool forward = should_forward(args[0]);
  7903. auto expr = join("(exp(", to_expression(args[0]), ") + exp(-", to_enclosed_expression(args[0]), ")) * 0.5");
  7904. emit_op(result_type, id, expr, forward);
  7905. inherit_expression_dependencies(id, args[0]);
  7906. }
  7907. break;
  7908. case GLSLstd450Tanh:
  7909. if (!is_legacy())
  7910. emit_unary_func_op(result_type, id, args[0], "tanh");
  7911. else
  7912. {
  7913. // Create temporaries to store the result of exp(arg) and exp(-arg).
  7914. uint32_t &ids = extra_sub_expressions[id];
  7915. if (!ids)
  7916. {
  7917. ids = ir.increase_bound_by(2);
  7918. // Inherit precision qualifier (legacy has no NoContraction).
  7919. if (has_decoration(id, DecorationRelaxedPrecision))
  7920. {
  7921. set_decoration(ids, DecorationRelaxedPrecision);
  7922. set_decoration(ids + 1, DecorationRelaxedPrecision);
  7923. }
  7924. }
  7925. uint32_t epos_id = ids;
  7926. uint32_t eneg_id = ids + 1;
  7927. emit_op(result_type, epos_id, join("exp(", to_expression(args[0]), ")"), false);
  7928. emit_op(result_type, eneg_id, join("exp(-", to_enclosed_expression(args[0]), ")"), false);
  7929. inherit_expression_dependencies(epos_id, args[0]);
  7930. inherit_expression_dependencies(eneg_id, args[0]);
  7931. auto expr = join("(", to_enclosed_expression(epos_id), " - ", to_enclosed_expression(eneg_id), ") / "
  7932. "(", to_enclosed_expression(epos_id), " + ", to_enclosed_expression(eneg_id), ")");
  7933. emit_op(result_type, id, expr, true);
  7934. inherit_expression_dependencies(id, epos_id);
  7935. inherit_expression_dependencies(id, eneg_id);
  7936. }
  7937. break;
  7938. case GLSLstd450Asinh:
  7939. if (!is_legacy())
  7940. emit_unary_func_op(result_type, id, args[0], "asinh");
  7941. else
  7942. emit_emulated_ahyper_op(result_type, id, args[0], GLSLstd450Asinh);
  7943. break;
  7944. case GLSLstd450Acosh:
  7945. if (!is_legacy())
  7946. emit_unary_func_op(result_type, id, args[0], "acosh");
  7947. else
  7948. emit_emulated_ahyper_op(result_type, id, args[0], GLSLstd450Acosh);
  7949. break;
  7950. case GLSLstd450Atanh:
  7951. if (!is_legacy())
  7952. emit_unary_func_op(result_type, id, args[0], "atanh");
  7953. else
  7954. emit_emulated_ahyper_op(result_type, id, args[0], GLSLstd450Atanh);
  7955. break;
  7956. case GLSLstd450Atan2:
  7957. emit_binary_func_op(result_type, id, args[0], args[1], "atan");
  7958. break;
  7959. // Exponentials
  7960. case GLSLstd450Pow:
  7961. emit_binary_func_op(result_type, id, args[0], args[1], "pow");
  7962. break;
  7963. case GLSLstd450Exp:
  7964. emit_unary_func_op(result_type, id, args[0], "exp");
  7965. break;
  7966. case GLSLstd450Log:
  7967. emit_unary_func_op(result_type, id, args[0], "log");
  7968. break;
  7969. case GLSLstd450Exp2:
  7970. emit_unary_func_op(result_type, id, args[0], "exp2");
  7971. break;
  7972. case GLSLstd450Log2:
  7973. emit_unary_func_op(result_type, id, args[0], "log2");
  7974. break;
  7975. case GLSLstd450Sqrt:
  7976. emit_unary_func_op(result_type, id, args[0], "sqrt");
  7977. break;
  7978. case GLSLstd450InverseSqrt:
  7979. emit_unary_func_op(result_type, id, args[0], "inversesqrt");
  7980. break;
  7981. // Matrix math
  7982. case GLSLstd450Determinant:
  7983. {
  7984. // No need to transpose - it doesn't affect the determinant
  7985. auto *e = maybe_get<SPIRExpression>(args[0]);
  7986. bool old_transpose = e && e->need_transpose;
  7987. if (old_transpose)
  7988. e->need_transpose = false;
  7989. if (options.version < 150) // also matches ES 100
  7990. {
  7991. auto &type = expression_type(args[0]);
  7992. assert(type.vecsize >= 2 && type.vecsize <= 4);
  7993. assert(type.vecsize == type.columns);
  7994. // ARB_gpu_shader_fp64 needs GLSL 150, other types are not valid
  7995. if (type.basetype != SPIRType::Float)
  7996. SPIRV_CROSS_THROW("Unsupported type for matrix determinant");
  7997. bool relaxed = has_decoration(id, DecorationRelaxedPrecision);
  7998. require_polyfill(static_cast<Polyfill>(PolyfillDeterminant2x2 << (type.vecsize - 2)),
  7999. relaxed);
  8000. emit_unary_func_op(result_type, id, args[0],
  8001. (options.es && relaxed) ? "spvDeterminantMP" : "spvDeterminant");
  8002. }
  8003. else
  8004. emit_unary_func_op(result_type, id, args[0], "determinant");
  8005. if (old_transpose)
  8006. e->need_transpose = true;
  8007. break;
  8008. }
  8009. case GLSLstd450MatrixInverse:
  8010. {
  8011. // The inverse of the transpose is the same as the transpose of
  8012. // the inverse, so we can just flip need_transpose of the result.
  8013. auto *a = maybe_get<SPIRExpression>(args[0]);
  8014. bool old_transpose = a && a->need_transpose;
  8015. if (old_transpose)
  8016. a->need_transpose = false;
  8017. const char *func = "inverse";
  8018. if (options.version < 140) // also matches ES 100
  8019. {
  8020. auto &type = get<SPIRType>(result_type);
  8021. assert(type.vecsize >= 2 && type.vecsize <= 4);
  8022. assert(type.vecsize == type.columns);
  8023. // ARB_gpu_shader_fp64 needs GLSL 150, other types are invalid
  8024. if (type.basetype != SPIRType::Float)
  8025. SPIRV_CROSS_THROW("Unsupported type for matrix inverse");
  8026. bool relaxed = has_decoration(id, DecorationRelaxedPrecision);
  8027. require_polyfill(static_cast<Polyfill>(PolyfillMatrixInverse2x2 << (type.vecsize - 2)),
  8028. relaxed);
  8029. func = (options.es && relaxed) ? "spvInverseMP" : "spvInverse";
  8030. }
  8031. bool forward = should_forward(args[0]);
  8032. auto &e = emit_op(result_type, id, join(func, "(", to_unpacked_expression(args[0]), ")"), forward);
  8033. inherit_expression_dependencies(id, args[0]);
  8034. if (old_transpose)
  8035. {
  8036. e.need_transpose = true;
  8037. a->need_transpose = true;
  8038. }
  8039. break;
  8040. }
  8041. // Lerping
  8042. case GLSLstd450FMix:
  8043. case GLSLstd450IMix:
  8044. {
  8045. emit_mix_op(result_type, id, args[0], args[1], args[2]);
  8046. break;
  8047. }
  8048. case GLSLstd450Step:
  8049. emit_binary_func_op(result_type, id, args[0], args[1], "step");
  8050. break;
  8051. case GLSLstd450SmoothStep:
  8052. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "smoothstep");
  8053. break;
  8054. // Packing
  8055. case GLSLstd450Frexp:
  8056. register_call_out_argument(args[1]);
  8057. forced_temporaries.insert(id);
  8058. emit_binary_func_op(result_type, id, args[0], args[1], "frexp");
  8059. break;
  8060. case GLSLstd450FrexpStruct:
  8061. {
  8062. auto &type = get<SPIRType>(result_type);
  8063. emit_uninitialized_temporary_expression(result_type, id);
  8064. statement(to_expression(id), ".", to_member_name(type, 0), " = ", "frexp(", to_expression(args[0]), ", ",
  8065. to_expression(id), ".", to_member_name(type, 1), ");");
  8066. break;
  8067. }
  8068. case GLSLstd450Ldexp:
  8069. {
  8070. bool forward = should_forward(args[0]) && should_forward(args[1]);
  8071. auto op0 = to_unpacked_expression(args[0]);
  8072. auto op1 = to_unpacked_expression(args[1]);
  8073. auto &op1_type = expression_type(args[1]);
  8074. if (op1_type.basetype != SPIRType::Int)
  8075. {
  8076. // Need a value cast here.
  8077. auto target_type = op1_type;
  8078. target_type.basetype = SPIRType::Int;
  8079. op1 = join(type_to_glsl_constructor(target_type), "(", op1, ")");
  8080. }
  8081. auto expr = join("ldexp(", op0, ", ", op1, ")");
  8082. emit_op(result_type, id, expr, forward);
  8083. inherit_expression_dependencies(id, args[0]);
  8084. inherit_expression_dependencies(id, args[1]);
  8085. break;
  8086. }
  8087. case GLSLstd450PackSnorm4x8:
  8088. emit_unary_func_op(result_type, id, args[0], "packSnorm4x8");
  8089. break;
  8090. case GLSLstd450PackUnorm4x8:
  8091. emit_unary_func_op(result_type, id, args[0], "packUnorm4x8");
  8092. break;
  8093. case GLSLstd450PackSnorm2x16:
  8094. emit_unary_func_op(result_type, id, args[0], "packSnorm2x16");
  8095. break;
  8096. case GLSLstd450PackUnorm2x16:
  8097. emit_unary_func_op(result_type, id, args[0], "packUnorm2x16");
  8098. break;
  8099. case GLSLstd450PackHalf2x16:
  8100. emit_unary_func_op(result_type, id, args[0], "packHalf2x16");
  8101. break;
  8102. case GLSLstd450UnpackSnorm4x8:
  8103. emit_unary_func_op(result_type, id, args[0], "unpackSnorm4x8");
  8104. break;
  8105. case GLSLstd450UnpackUnorm4x8:
  8106. emit_unary_func_op(result_type, id, args[0], "unpackUnorm4x8");
  8107. break;
  8108. case GLSLstd450UnpackSnorm2x16:
  8109. emit_unary_func_op(result_type, id, args[0], "unpackSnorm2x16");
  8110. break;
  8111. case GLSLstd450UnpackUnorm2x16:
  8112. emit_unary_func_op(result_type, id, args[0], "unpackUnorm2x16");
  8113. break;
  8114. case GLSLstd450UnpackHalf2x16:
  8115. emit_unary_func_op(result_type, id, args[0], "unpackHalf2x16");
  8116. break;
  8117. case GLSLstd450PackDouble2x32:
  8118. emit_unary_func_op(result_type, id, args[0], "packDouble2x32");
  8119. break;
  8120. case GLSLstd450UnpackDouble2x32:
  8121. emit_unary_func_op(result_type, id, args[0], "unpackDouble2x32");
  8122. break;
  8123. // Vector math
  8124. case GLSLstd450Length:
  8125. emit_unary_func_op(result_type, id, args[0], "length");
  8126. break;
  8127. case GLSLstd450Distance:
  8128. emit_binary_func_op(result_type, id, args[0], args[1], "distance");
  8129. break;
  8130. case GLSLstd450Cross:
  8131. emit_binary_func_op(result_type, id, args[0], args[1], "cross");
  8132. break;
  8133. case GLSLstd450Normalize:
  8134. emit_unary_func_op(result_type, id, args[0], "normalize");
  8135. break;
  8136. case GLSLstd450FaceForward:
  8137. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "faceforward");
  8138. break;
  8139. case GLSLstd450Reflect:
  8140. emit_binary_func_op(result_type, id, args[0], args[1], "reflect");
  8141. break;
  8142. case GLSLstd450Refract:
  8143. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "refract");
  8144. break;
  8145. // Bit-fiddling
  8146. case GLSLstd450FindILsb:
  8147. // findLSB always returns int.
  8148. emit_unary_func_op_cast(result_type, id, args[0], "findLSB", expression_type(args[0]).basetype, int_type);
  8149. break;
  8150. case GLSLstd450FindSMsb:
  8151. emit_unary_func_op_cast(result_type, id, args[0], "findMSB", int_type, int_type);
  8152. break;
  8153. case GLSLstd450FindUMsb:
  8154. emit_unary_func_op_cast(result_type, id, args[0], "findMSB", uint_type,
  8155. int_type); // findMSB always returns int.
  8156. break;
  8157. // Multisampled varying
  8158. case GLSLstd450InterpolateAtCentroid:
  8159. emit_unary_func_op(result_type, id, args[0], "interpolateAtCentroid");
  8160. break;
  8161. case GLSLstd450InterpolateAtSample:
  8162. emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtSample");
  8163. break;
  8164. case GLSLstd450InterpolateAtOffset:
  8165. emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtOffset");
  8166. break;
  8167. case GLSLstd450NMin:
  8168. case GLSLstd450NMax:
  8169. {
  8170. if (options.vulkan_semantics)
  8171. {
  8172. require_extension_internal("GL_EXT_spirv_intrinsics");
  8173. bool relaxed = has_decoration(id, DecorationRelaxedPrecision);
  8174. Polyfill poly = {};
  8175. switch (get<SPIRType>(result_type).width)
  8176. {
  8177. case 16:
  8178. poly = op == GLSLstd450NMin ? PolyfillNMin16 : PolyfillNMax16;
  8179. break;
  8180. case 32:
  8181. poly = op == GLSLstd450NMin ? PolyfillNMin32 : PolyfillNMax32;
  8182. break;
  8183. case 64:
  8184. poly = op == GLSLstd450NMin ? PolyfillNMin64 : PolyfillNMax64;
  8185. break;
  8186. default:
  8187. SPIRV_CROSS_THROW("Invalid bit width for NMin/NMax.");
  8188. }
  8189. require_polyfill(poly, relaxed);
  8190. // Function return decorations are broken, so need to do double polyfill.
  8191. if (relaxed)
  8192. require_polyfill(poly, false);
  8193. const char *op_str;
  8194. if (relaxed)
  8195. op_str = op == GLSLstd450NMin ? "spvNMinRelaxed" : "spvNMaxRelaxed";
  8196. else
  8197. op_str = op == GLSLstd450NMin ? "spvNMin" : "spvNMax";
  8198. emit_binary_func_op(result_type, id, args[0], args[1], op_str);
  8199. }
  8200. else
  8201. {
  8202. emit_nminmax_op(result_type, id, args[0], args[1], op);
  8203. }
  8204. break;
  8205. }
  8206. case GLSLstd450NClamp:
  8207. {
  8208. if (options.vulkan_semantics)
  8209. {
  8210. require_extension_internal("GL_EXT_spirv_intrinsics");
  8211. bool relaxed = has_decoration(id, DecorationRelaxedPrecision);
  8212. Polyfill poly = {};
  8213. switch (get<SPIRType>(result_type).width)
  8214. {
  8215. case 16:
  8216. poly = PolyfillNClamp16;
  8217. break;
  8218. case 32:
  8219. poly = PolyfillNClamp32;
  8220. break;
  8221. case 64:
  8222. poly = PolyfillNClamp64;
  8223. break;
  8224. default:
  8225. SPIRV_CROSS_THROW("Invalid bit width for NMin/NMax.");
  8226. }
  8227. require_polyfill(poly, relaxed);
  8228. // Function return decorations are broken, so need to do double polyfill.
  8229. if (relaxed)
  8230. require_polyfill(poly, false);
  8231. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], relaxed ? "spvNClampRelaxed" : "spvNClamp");
  8232. }
  8233. else
  8234. {
  8235. // Make sure we have a unique ID here to avoid aliasing the extra sub-expressions between clamp and NMin sub-op.
  8236. // IDs cannot exceed 24 bits, so we can make use of the higher bits for some unique flags.
  8237. uint32_t &max_id = extra_sub_expressions[id | EXTRA_SUB_EXPRESSION_TYPE_AUX];
  8238. if (!max_id)
  8239. max_id = ir.increase_bound_by(1);
  8240. // Inherit precision qualifiers.
  8241. ir.meta[max_id] = ir.meta[id];
  8242. emit_nminmax_op(result_type, max_id, args[0], args[1], GLSLstd450NMax);
  8243. emit_nminmax_op(result_type, id, max_id, args[2], GLSLstd450NMin);
  8244. }
  8245. break;
  8246. }
  8247. default:
  8248. statement("// unimplemented GLSL op ", eop);
  8249. break;
  8250. }
  8251. }
  8252. void CompilerGLSL::emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op)
  8253. {
  8254. // Need to emulate this call.
  8255. uint32_t &ids = extra_sub_expressions[id];
  8256. if (!ids)
  8257. {
  8258. ids = ir.increase_bound_by(5);
  8259. auto btype = get<SPIRType>(result_type);
  8260. btype.basetype = SPIRType::Boolean;
  8261. set<SPIRType>(ids, btype);
  8262. }
  8263. uint32_t btype_id = ids + 0;
  8264. uint32_t left_nan_id = ids + 1;
  8265. uint32_t right_nan_id = ids + 2;
  8266. uint32_t tmp_id = ids + 3;
  8267. uint32_t mixed_first_id = ids + 4;
  8268. // Inherit precision qualifiers.
  8269. ir.meta[tmp_id] = ir.meta[id];
  8270. ir.meta[mixed_first_id] = ir.meta[id];
  8271. if (!is_legacy())
  8272. {
  8273. emit_unary_func_op(btype_id, left_nan_id, op0, "isnan");
  8274. emit_unary_func_op(btype_id, right_nan_id, op1, "isnan");
  8275. }
  8276. else if (expression_type(op0).vecsize > 1)
  8277. {
  8278. // If the number doesn't equal itself, it must be NaN
  8279. emit_binary_func_op(btype_id, left_nan_id, op0, op0, "notEqual");
  8280. emit_binary_func_op(btype_id, right_nan_id, op1, op1, "notEqual");
  8281. }
  8282. else
  8283. {
  8284. emit_binary_op(btype_id, left_nan_id, op0, op0, "!=");
  8285. emit_binary_op(btype_id, right_nan_id, op1, op1, "!=");
  8286. }
  8287. emit_binary_func_op(result_type, tmp_id, op0, op1, op == GLSLstd450NMin ? "min" : "max");
  8288. emit_mix_op(result_type, mixed_first_id, tmp_id, op1, left_nan_id);
  8289. emit_mix_op(result_type, id, mixed_first_id, op0, right_nan_id);
  8290. }
  8291. void CompilerGLSL::emit_emulated_ahyper_op(uint32_t result_type, uint32_t id, uint32_t op0, GLSLstd450 op)
  8292. {
  8293. const char *one = backend.float_literal_suffix ? "1.0f" : "1.0";
  8294. std::string expr;
  8295. bool forward = should_forward(op0);
  8296. switch (op)
  8297. {
  8298. case GLSLstd450Asinh:
  8299. expr = join("log(", to_enclosed_expression(op0), " + sqrt(",
  8300. to_enclosed_expression(op0), " * ", to_enclosed_expression(op0), " + ", one, "))");
  8301. emit_op(result_type, id, expr, forward);
  8302. break;
  8303. case GLSLstd450Acosh:
  8304. expr = join("log(", to_enclosed_expression(op0), " + sqrt(",
  8305. to_enclosed_expression(op0), " * ", to_enclosed_expression(op0), " - ", one, "))");
  8306. break;
  8307. case GLSLstd450Atanh:
  8308. expr = join("log((", one, " + ", to_enclosed_expression(op0), ") / "
  8309. "(", one, " - ", to_enclosed_expression(op0), ")) * 0.5",
  8310. backend.float_literal_suffix ? "f" : "");
  8311. break;
  8312. default:
  8313. SPIRV_CROSS_THROW("Invalid op.");
  8314. }
  8315. emit_op(result_type, id, expr, forward);
  8316. inherit_expression_dependencies(id, op0);
  8317. }
  8318. void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
  8319. uint32_t)
  8320. {
  8321. require_extension_internal("GL_AMD_shader_ballot");
  8322. enum AMDShaderBallot
  8323. {
  8324. SwizzleInvocationsAMD = 1,
  8325. SwizzleInvocationsMaskedAMD = 2,
  8326. WriteInvocationAMD = 3,
  8327. MbcntAMD = 4
  8328. };
  8329. auto op = static_cast<AMDShaderBallot>(eop);
  8330. switch (op)
  8331. {
  8332. case SwizzleInvocationsAMD:
  8333. emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsAMD");
  8334. register_control_dependent_expression(id);
  8335. break;
  8336. case SwizzleInvocationsMaskedAMD:
  8337. emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsMaskedAMD");
  8338. register_control_dependent_expression(id);
  8339. break;
  8340. case WriteInvocationAMD:
  8341. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "writeInvocationAMD");
  8342. register_control_dependent_expression(id);
  8343. break;
  8344. case MbcntAMD:
  8345. emit_unary_func_op(result_type, id, args[0], "mbcntAMD");
  8346. register_control_dependent_expression(id);
  8347. break;
  8348. default:
  8349. statement("// unimplemented SPV AMD shader ballot op ", eop);
  8350. break;
  8351. }
  8352. }
  8353. void CompilerGLSL::emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type, uint32_t id, uint32_t eop,
  8354. const uint32_t *args, uint32_t)
  8355. {
  8356. require_extension_internal("GL_AMD_shader_explicit_vertex_parameter");
  8357. enum AMDShaderExplicitVertexParameter
  8358. {
  8359. InterpolateAtVertexAMD = 1
  8360. };
  8361. auto op = static_cast<AMDShaderExplicitVertexParameter>(eop);
  8362. switch (op)
  8363. {
  8364. case InterpolateAtVertexAMD:
  8365. emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtVertexAMD");
  8366. break;
  8367. default:
  8368. statement("// unimplemented SPV AMD shader explicit vertex parameter op ", eop);
  8369. break;
  8370. }
  8371. }
  8372. void CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop,
  8373. const uint32_t *args, uint32_t)
  8374. {
  8375. require_extension_internal("GL_AMD_shader_trinary_minmax");
  8376. enum AMDShaderTrinaryMinMax
  8377. {
  8378. FMin3AMD = 1,
  8379. UMin3AMD = 2,
  8380. SMin3AMD = 3,
  8381. FMax3AMD = 4,
  8382. UMax3AMD = 5,
  8383. SMax3AMD = 6,
  8384. FMid3AMD = 7,
  8385. UMid3AMD = 8,
  8386. SMid3AMD = 9
  8387. };
  8388. auto op = static_cast<AMDShaderTrinaryMinMax>(eop);
  8389. switch (op)
  8390. {
  8391. case FMin3AMD:
  8392. case UMin3AMD:
  8393. case SMin3AMD:
  8394. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "min3");
  8395. break;
  8396. case FMax3AMD:
  8397. case UMax3AMD:
  8398. case SMax3AMD:
  8399. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "max3");
  8400. break;
  8401. case FMid3AMD:
  8402. case UMid3AMD:
  8403. case SMid3AMD:
  8404. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "mid3");
  8405. break;
  8406. default:
  8407. statement("// unimplemented SPV AMD shader trinary minmax op ", eop);
  8408. break;
  8409. }
  8410. }
  8411. void CompilerGLSL::emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
  8412. uint32_t)
  8413. {
  8414. require_extension_internal("GL_AMD_gcn_shader");
  8415. enum AMDGCNShader
  8416. {
  8417. CubeFaceIndexAMD = 1,
  8418. CubeFaceCoordAMD = 2,
  8419. TimeAMD = 3
  8420. };
  8421. auto op = static_cast<AMDGCNShader>(eop);
  8422. switch (op)
  8423. {
  8424. case CubeFaceIndexAMD:
  8425. emit_unary_func_op(result_type, id, args[0], "cubeFaceIndexAMD");
  8426. break;
  8427. case CubeFaceCoordAMD:
  8428. emit_unary_func_op(result_type, id, args[0], "cubeFaceCoordAMD");
  8429. break;
  8430. case TimeAMD:
  8431. {
  8432. string expr = "timeAMD()";
  8433. emit_op(result_type, id, expr, true);
  8434. register_control_dependent_expression(id);
  8435. break;
  8436. }
  8437. default:
  8438. statement("// unimplemented SPV AMD gcn shader op ", eop);
  8439. break;
  8440. }
  8441. }
  8442. void CompilerGLSL::emit_subgroup_op(const Instruction &i)
  8443. {
  8444. const uint32_t *ops = stream(i);
  8445. auto op = static_cast<Op>(i.op);
  8446. if (!options.vulkan_semantics && !is_supported_subgroup_op_in_opengl(op, ops))
  8447. SPIRV_CROSS_THROW("This subgroup operation is only supported in Vulkan semantics.");
  8448. // If we need to do implicit bitcasts, make sure we do it with the correct type.
  8449. uint32_t integer_width = get_integer_width_for_instruction(i);
  8450. auto int_type = to_signed_basetype(integer_width);
  8451. auto uint_type = to_unsigned_basetype(integer_width);
  8452. if (options.vulkan_semantics)
  8453. {
  8454. auto &return_type = get<SPIRType>(ops[0]);
  8455. switch (return_type.basetype)
  8456. {
  8457. case SPIRType::SByte:
  8458. case SPIRType::UByte:
  8459. require_extension_internal("GL_EXT_shader_subgroup_extended_types_int8");
  8460. break;
  8461. case SPIRType::Short:
  8462. case SPIRType::UShort:
  8463. require_extension_internal("GL_EXT_shader_subgroup_extended_types_int16");
  8464. break;
  8465. case SPIRType::Half:
  8466. require_extension_internal("GL_EXT_shader_subgroup_extended_types_float16");
  8467. break;
  8468. case SPIRType::Int64:
  8469. case SPIRType::UInt64:
  8470. require_extension_internal("GL_EXT_shader_subgroup_extended_types_int64");
  8471. break;
  8472. default:
  8473. break;
  8474. }
  8475. }
  8476. switch (op)
  8477. {
  8478. case OpGroupNonUniformElect:
  8479. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupElect);
  8480. break;
  8481. case OpGroupNonUniformBallotBitCount:
  8482. {
  8483. const GroupOperation operation = static_cast<GroupOperation>(ops[3]);
  8484. if (operation == GroupOperationReduce)
  8485. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotBitCount);
  8486. else if (operation == GroupOperationInclusiveScan || operation == GroupOperationExclusiveScan)
  8487. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout);
  8488. }
  8489. break;
  8490. case OpGroupNonUniformBallotBitExtract:
  8491. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotBitExtract);
  8492. break;
  8493. case OpGroupNonUniformInverseBallot:
  8494. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout);
  8495. break;
  8496. case OpGroupNonUniformBallot:
  8497. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallot);
  8498. break;
  8499. case OpGroupNonUniformBallotFindLSB:
  8500. case OpGroupNonUniformBallotFindMSB:
  8501. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotFindLSB_MSB);
  8502. break;
  8503. case OpGroupNonUniformBroadcast:
  8504. case OpGroupNonUniformBroadcastFirst:
  8505. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBroadcast_First);
  8506. break;
  8507. case OpGroupNonUniformShuffle:
  8508. case OpGroupNonUniformShuffleXor:
  8509. require_extension_internal("GL_KHR_shader_subgroup_shuffle");
  8510. break;
  8511. case OpGroupNonUniformShuffleUp:
  8512. case OpGroupNonUniformShuffleDown:
  8513. require_extension_internal("GL_KHR_shader_subgroup_shuffle_relative");
  8514. break;
  8515. case OpGroupNonUniformRotateKHR:
  8516. require_extension_internal("GL_KHR_shader_subgroup_rotate");
  8517. break;
  8518. case OpGroupNonUniformAll:
  8519. case OpGroupNonUniformAny:
  8520. case OpGroupNonUniformAllEqual:
  8521. {
  8522. const SPIRType &type = expression_type(ops[3]);
  8523. if (type.basetype == SPIRType::BaseType::Boolean && type.vecsize == 1u)
  8524. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupAll_Any_AllEqualBool);
  8525. else
  8526. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupAllEqualT);
  8527. }
  8528. break;
  8529. // clang-format off
  8530. #define GLSL_GROUP_OP(OP)\
  8531. case OpGroupNonUniform##OP:\
  8532. {\
  8533. auto operation = static_cast<GroupOperation>(ops[3]);\
  8534. if (operation == GroupOperationClusteredReduce)\
  8535. require_extension_internal("GL_KHR_shader_subgroup_clustered");\
  8536. else if (operation == GroupOperationReduce)\
  8537. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupArithmetic##OP##Reduce);\
  8538. else if (operation == GroupOperationExclusiveScan)\
  8539. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupArithmetic##OP##ExclusiveScan);\
  8540. else if (operation == GroupOperationInclusiveScan)\
  8541. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupArithmetic##OP##InclusiveScan);\
  8542. else\
  8543. SPIRV_CROSS_THROW("Invalid group operation.");\
  8544. break;\
  8545. }
  8546. GLSL_GROUP_OP(IAdd)
  8547. GLSL_GROUP_OP(FAdd)
  8548. GLSL_GROUP_OP(IMul)
  8549. GLSL_GROUP_OP(FMul)
  8550. #undef GLSL_GROUP_OP
  8551. // clang-format on
  8552. case OpGroupNonUniformFMin:
  8553. case OpGroupNonUniformFMax:
  8554. case OpGroupNonUniformSMin:
  8555. case OpGroupNonUniformSMax:
  8556. case OpGroupNonUniformUMin:
  8557. case OpGroupNonUniformUMax:
  8558. case OpGroupNonUniformBitwiseAnd:
  8559. case OpGroupNonUniformBitwiseOr:
  8560. case OpGroupNonUniformBitwiseXor:
  8561. case OpGroupNonUniformLogicalAnd:
  8562. case OpGroupNonUniformLogicalOr:
  8563. case OpGroupNonUniformLogicalXor:
  8564. {
  8565. auto operation = static_cast<GroupOperation>(ops[3]);
  8566. if (operation == GroupOperationClusteredReduce)
  8567. {
  8568. require_extension_internal("GL_KHR_shader_subgroup_clustered");
  8569. }
  8570. else if (operation == GroupOperationExclusiveScan || operation == GroupOperationInclusiveScan ||
  8571. operation == GroupOperationReduce)
  8572. {
  8573. require_extension_internal("GL_KHR_shader_subgroup_arithmetic");
  8574. }
  8575. else
  8576. SPIRV_CROSS_THROW("Invalid group operation.");
  8577. break;
  8578. }
  8579. case OpGroupNonUniformQuadSwap:
  8580. case OpGroupNonUniformQuadBroadcast:
  8581. require_extension_internal("GL_KHR_shader_subgroup_quad");
  8582. break;
  8583. case OpGroupNonUniformQuadAllKHR:
  8584. case OpGroupNonUniformQuadAnyKHR:
  8585. // Require both extensions to be enabled.
  8586. require_extension_internal("GL_KHR_shader_subgroup_vote");
  8587. require_extension_internal("GL_EXT_shader_quad_control");
  8588. break;
  8589. default:
  8590. SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
  8591. }
  8592. uint32_t result_type = ops[0];
  8593. uint32_t id = ops[1];
  8594. // These quad ops do not have a scope parameter.
  8595. if (op != OpGroupNonUniformQuadAllKHR && op != OpGroupNonUniformQuadAnyKHR)
  8596. {
  8597. auto scope = static_cast<Scope>(evaluate_constant_u32(ops[2]));
  8598. if (scope != ScopeSubgroup)
  8599. SPIRV_CROSS_THROW("Only subgroup scope is supported.");
  8600. }
  8601. switch (op)
  8602. {
  8603. case OpGroupNonUniformElect:
  8604. emit_op(result_type, id, "subgroupElect()", true);
  8605. break;
  8606. case OpGroupNonUniformBroadcast:
  8607. emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBroadcast");
  8608. break;
  8609. case OpGroupNonUniformBroadcastFirst:
  8610. emit_unary_func_op(result_type, id, ops[3], "subgroupBroadcastFirst");
  8611. break;
  8612. case OpGroupNonUniformBallot:
  8613. emit_unary_func_op(result_type, id, ops[3], "subgroupBallot");
  8614. break;
  8615. case OpGroupNonUniformInverseBallot:
  8616. emit_unary_func_op(result_type, id, ops[3], "subgroupInverseBallot");
  8617. break;
  8618. case OpGroupNonUniformBallotBitExtract:
  8619. emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBallotBitExtract");
  8620. break;
  8621. case OpGroupNonUniformBallotFindLSB:
  8622. emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindLSB");
  8623. break;
  8624. case OpGroupNonUniformBallotFindMSB:
  8625. emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindMSB");
  8626. break;
  8627. case OpGroupNonUniformBallotBitCount:
  8628. {
  8629. auto operation = static_cast<GroupOperation>(ops[3]);
  8630. if (operation == GroupOperationReduce)
  8631. emit_unary_func_op(result_type, id, ops[4], "subgroupBallotBitCount");
  8632. else if (operation == GroupOperationInclusiveScan)
  8633. emit_unary_func_op(result_type, id, ops[4], "subgroupBallotInclusiveBitCount");
  8634. else if (operation == GroupOperationExclusiveScan)
  8635. emit_unary_func_op(result_type, id, ops[4], "subgroupBallotExclusiveBitCount");
  8636. else
  8637. SPIRV_CROSS_THROW("Invalid BitCount operation.");
  8638. break;
  8639. }
  8640. case OpGroupNonUniformShuffle:
  8641. emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffle");
  8642. break;
  8643. case OpGroupNonUniformShuffleXor:
  8644. emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleXor");
  8645. break;
  8646. case OpGroupNonUniformShuffleUp:
  8647. emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleUp");
  8648. break;
  8649. case OpGroupNonUniformShuffleDown:
  8650. emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleDown");
  8651. break;
  8652. case OpGroupNonUniformRotateKHR:
  8653. if (i.length > 5)
  8654. emit_trinary_func_op(result_type, id, ops[3], ops[4], ops[5], "subgroupClusteredRotate");
  8655. else
  8656. emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupRotate");
  8657. break;
  8658. case OpGroupNonUniformAll:
  8659. emit_unary_func_op(result_type, id, ops[3], "subgroupAll");
  8660. break;
  8661. case OpGroupNonUniformAny:
  8662. emit_unary_func_op(result_type, id, ops[3], "subgroupAny");
  8663. break;
  8664. case OpGroupNonUniformAllEqual:
  8665. emit_unary_func_op(result_type, id, ops[3], "subgroupAllEqual");
  8666. break;
  8667. // clang-format off
  8668. #define GLSL_GROUP_OP(op, glsl_op) \
  8669. case OpGroupNonUniform##op: \
  8670. { \
  8671. auto operation = static_cast<GroupOperation>(ops[3]); \
  8672. if (operation == GroupOperationReduce) \
  8673. emit_unary_func_op(result_type, id, ops[4], "subgroup" #glsl_op); \
  8674. else if (operation == GroupOperationInclusiveScan) \
  8675. emit_unary_func_op(result_type, id, ops[4], "subgroupInclusive" #glsl_op); \
  8676. else if (operation == GroupOperationExclusiveScan) \
  8677. emit_unary_func_op(result_type, id, ops[4], "subgroupExclusive" #glsl_op); \
  8678. else if (operation == GroupOperationClusteredReduce) \
  8679. emit_binary_func_op(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op); \
  8680. else \
  8681. SPIRV_CROSS_THROW("Invalid group operation."); \
  8682. break; \
  8683. }
  8684. #define GLSL_GROUP_OP_CAST(op, glsl_op, type) \
  8685. case OpGroupNonUniform##op: \
  8686. { \
  8687. auto operation = static_cast<GroupOperation>(ops[3]); \
  8688. if (operation == GroupOperationReduce) \
  8689. emit_unary_func_op_cast(result_type, id, ops[4], "subgroup" #glsl_op, type, type); \
  8690. else if (operation == GroupOperationInclusiveScan) \
  8691. emit_unary_func_op_cast(result_type, id, ops[4], "subgroupInclusive" #glsl_op, type, type); \
  8692. else if (operation == GroupOperationExclusiveScan) \
  8693. emit_unary_func_op_cast(result_type, id, ops[4], "subgroupExclusive" #glsl_op, type, type); \
  8694. else if (operation == GroupOperationClusteredReduce) \
  8695. emit_binary_func_op_cast_clustered(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op, type); \
  8696. else \
  8697. SPIRV_CROSS_THROW("Invalid group operation."); \
  8698. break; \
  8699. }
  8700. GLSL_GROUP_OP(FAdd, Add)
  8701. GLSL_GROUP_OP(FMul, Mul)
  8702. GLSL_GROUP_OP(FMin, Min)
  8703. GLSL_GROUP_OP(FMax, Max)
  8704. GLSL_GROUP_OP(IAdd, Add)
  8705. GLSL_GROUP_OP(IMul, Mul)
  8706. GLSL_GROUP_OP_CAST(SMin, Min, int_type)
  8707. GLSL_GROUP_OP_CAST(SMax, Max, int_type)
  8708. GLSL_GROUP_OP_CAST(UMin, Min, uint_type)
  8709. GLSL_GROUP_OP_CAST(UMax, Max, uint_type)
  8710. GLSL_GROUP_OP(BitwiseAnd, And)
  8711. GLSL_GROUP_OP(BitwiseOr, Or)
  8712. GLSL_GROUP_OP(BitwiseXor, Xor)
  8713. GLSL_GROUP_OP(LogicalAnd, And)
  8714. GLSL_GROUP_OP(LogicalOr, Or)
  8715. GLSL_GROUP_OP(LogicalXor, Xor)
  8716. #undef GLSL_GROUP_OP
  8717. #undef GLSL_GROUP_OP_CAST
  8718. // clang-format on
  8719. case OpGroupNonUniformQuadSwap:
  8720. {
  8721. uint32_t direction = evaluate_constant_u32(ops[4]);
  8722. if (direction == 0)
  8723. emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapHorizontal");
  8724. else if (direction == 1)
  8725. emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapVertical");
  8726. else if (direction == 2)
  8727. emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapDiagonal");
  8728. else
  8729. SPIRV_CROSS_THROW("Invalid quad swap direction.");
  8730. break;
  8731. }
  8732. case OpGroupNonUniformQuadBroadcast:
  8733. {
  8734. emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupQuadBroadcast");
  8735. break;
  8736. }
  8737. case OpGroupNonUniformQuadAllKHR:
  8738. emit_unary_func_op(result_type, id, ops[2], "subgroupQuadAll");
  8739. break;
  8740. case OpGroupNonUniformQuadAnyKHR:
  8741. emit_unary_func_op(result_type, id, ops[2], "subgroupQuadAny");
  8742. break;
  8743. default:
  8744. SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
  8745. }
  8746. register_control_dependent_expression(id);
  8747. }
  8748. string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
  8749. {
  8750. // OpBitcast can deal with pointers.
  8751. if (out_type.pointer || in_type.pointer)
  8752. {
  8753. if (out_type.vecsize == 2 || in_type.vecsize == 2)
  8754. require_extension_internal("GL_EXT_buffer_reference_uvec2");
  8755. return type_to_glsl(out_type);
  8756. }
  8757. if (out_type.basetype == in_type.basetype)
  8758. return "";
  8759. assert(out_type.basetype != SPIRType::Boolean);
  8760. assert(in_type.basetype != SPIRType::Boolean);
  8761. bool integral_cast = type_is_integral(out_type) && type_is_integral(in_type);
  8762. bool same_size_cast = out_type.width == in_type.width;
  8763. // Trivial bitcast case, casts between integers.
  8764. if (integral_cast && same_size_cast)
  8765. return type_to_glsl(out_type);
  8766. // Catch-all 8-bit arithmetic casts (GL_EXT_shader_explicit_arithmetic_types).
  8767. if (out_type.width == 8 && in_type.width >= 16 && integral_cast && in_type.vecsize == 1)
  8768. return "unpack8";
  8769. else if (in_type.width == 8 && out_type.width == 16 && integral_cast && out_type.vecsize == 1)
  8770. return "pack16";
  8771. else if (in_type.width == 8 && out_type.width == 32 && integral_cast && out_type.vecsize == 1)
  8772. return "pack32";
  8773. // Floating <-> Integer special casts. Just have to enumerate all cases. :(
  8774. // 16-bit, 32-bit and 64-bit floats.
  8775. if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float)
  8776. {
  8777. if (is_legacy_es())
  8778. SPIRV_CROSS_THROW("Float -> Uint bitcast not supported on legacy ESSL.");
  8779. else if (!options.es && options.version < 330)
  8780. require_extension_internal("GL_ARB_shader_bit_encoding");
  8781. return "floatBitsToUint";
  8782. }
  8783. else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float)
  8784. {
  8785. if (is_legacy_es())
  8786. SPIRV_CROSS_THROW("Float -> Int bitcast not supported on legacy ESSL.");
  8787. else if (!options.es && options.version < 330)
  8788. require_extension_internal("GL_ARB_shader_bit_encoding");
  8789. return "floatBitsToInt";
  8790. }
  8791. else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt)
  8792. {
  8793. if (is_legacy_es())
  8794. SPIRV_CROSS_THROW("Uint -> Float bitcast not supported on legacy ESSL.");
  8795. else if (!options.es && options.version < 330)
  8796. require_extension_internal("GL_ARB_shader_bit_encoding");
  8797. return "uintBitsToFloat";
  8798. }
  8799. else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int)
  8800. {
  8801. if (is_legacy_es())
  8802. SPIRV_CROSS_THROW("Int -> Float bitcast not supported on legacy ESSL.");
  8803. else if (!options.es && options.version < 330)
  8804. require_extension_internal("GL_ARB_shader_bit_encoding");
  8805. return "intBitsToFloat";
  8806. }
  8807. else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double)
  8808. return "doubleBitsToInt64";
  8809. else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double)
  8810. return "doubleBitsToUint64";
  8811. else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64)
  8812. return "int64BitsToDouble";
  8813. else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64)
  8814. return "uint64BitsToDouble";
  8815. else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Half)
  8816. return "float16BitsToInt16";
  8817. else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::Half)
  8818. return "float16BitsToUint16";
  8819. else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::Short)
  8820. return "int16BitsToFloat16";
  8821. else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UShort)
  8822. return "uint16BitsToFloat16";
  8823. // And finally, some even more special purpose casts.
  8824. if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UInt && in_type.vecsize == 2)
  8825. return "packUint2x32";
  8826. else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UInt64 && out_type.vecsize == 2)
  8827. return "unpackUint2x32";
  8828. else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
  8829. return "unpackFloat2x16";
  8830. else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2)
  8831. return "packFloat2x16";
  8832. else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Short && in_type.vecsize == 2)
  8833. return "packInt2x16";
  8834. else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int && in_type.vecsize == 1)
  8835. return "unpackInt2x16";
  8836. else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UShort && in_type.vecsize == 2)
  8837. return "packUint2x16";
  8838. else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
  8839. return "unpackUint2x16";
  8840. else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Short && in_type.vecsize == 4)
  8841. return "packInt4x16";
  8842. else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int64 && in_type.vecsize == 1)
  8843. return "unpackInt4x16";
  8844. else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UShort && in_type.vecsize == 4)
  8845. return "packUint4x16";
  8846. else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt64 && in_type.vecsize == 1)
  8847. return "unpackUint4x16";
  8848. else if (out_type.basetype == SPIRType::BFloat16 && in_type.basetype == SPIRType::UShort)
  8849. return "uintBitsToBFloat16EXT";
  8850. else if (out_type.basetype == SPIRType::BFloat16 && in_type.basetype == SPIRType::Short)
  8851. return "intBitsToBFloat16EXT";
  8852. else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::BFloat16)
  8853. return "bfloat16BitsToUintEXT";
  8854. else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::BFloat16)
  8855. return "bfloat16BitsToIntEXT";
  8856. else if (out_type.basetype == SPIRType::FloatE4M3 && in_type.basetype == SPIRType::UByte)
  8857. return "uintBitsToFloate4m3EXT";
  8858. else if (out_type.basetype == SPIRType::FloatE4M3 && in_type.basetype == SPIRType::SByte)
  8859. return "intBitsToFloate4m3EXT";
  8860. else if (out_type.basetype == SPIRType::UByte && in_type.basetype == SPIRType::FloatE4M3)
  8861. return "floate4m3BitsToUintEXT";
  8862. else if (out_type.basetype == SPIRType::SByte && in_type.basetype == SPIRType::FloatE4M3)
  8863. return "floate4m3BitsToIntEXT";
  8864. else if (out_type.basetype == SPIRType::FloatE5M2 && in_type.basetype == SPIRType::UByte)
  8865. return "uintBitsToFloate5m2EXT";
  8866. else if (out_type.basetype == SPIRType::FloatE5M2 && in_type.basetype == SPIRType::SByte)
  8867. return "intBitsToFloate5m2EXT";
  8868. else if (out_type.basetype == SPIRType::UByte && in_type.basetype == SPIRType::FloatE5M2)
  8869. return "floate5m2BitsToUintEXT";
  8870. else if (out_type.basetype == SPIRType::SByte && in_type.basetype == SPIRType::FloatE5M2)
  8871. return "floate5m2BitsToIntEXT";
  8872. return "";
  8873. }
  8874. string CompilerGLSL::bitcast_glsl(const SPIRType &result_type, uint32_t argument)
  8875. {
  8876. auto op = bitcast_glsl_op(result_type, expression_type(argument));
  8877. if (op.empty())
  8878. return to_enclosed_unpacked_expression(argument);
  8879. else
  8880. return join(op, "(", to_unpacked_expression(argument), ")");
  8881. }
  8882. std::string CompilerGLSL::bitcast_expression(SPIRType::BaseType target_type, uint32_t arg)
  8883. {
  8884. auto expr = to_expression(arg);
  8885. auto &src_type = expression_type(arg);
  8886. if (src_type.basetype != target_type)
  8887. {
  8888. auto target = src_type;
  8889. target.basetype = target_type;
  8890. expr = join(bitcast_glsl_op(target, src_type), "(", expr, ")");
  8891. }
  8892. return expr;
  8893. }
  8894. std::string CompilerGLSL::bitcast_expression(const SPIRType &target_type, SPIRType::BaseType expr_type,
  8895. const std::string &expr)
  8896. {
  8897. if (target_type.basetype == expr_type)
  8898. return expr;
  8899. auto src_type = target_type;
  8900. src_type.basetype = expr_type;
  8901. return join(bitcast_glsl_op(target_type, src_type), "(", expr, ")");
  8902. }
  8903. string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
  8904. {
  8905. switch (builtin)
  8906. {
  8907. case BuiltInPosition:
  8908. return "gl_Position";
  8909. case BuiltInPointSize:
  8910. return "gl_PointSize";
  8911. case BuiltInClipDistance:
  8912. {
  8913. if (options.es)
  8914. require_extension_internal("GL_EXT_clip_cull_distance");
  8915. return "gl_ClipDistance";
  8916. }
  8917. case BuiltInCullDistance:
  8918. {
  8919. if (options.es)
  8920. require_extension_internal("GL_EXT_clip_cull_distance");
  8921. return "gl_CullDistance";
  8922. }
  8923. case BuiltInVertexId:
  8924. if (options.vulkan_semantics)
  8925. SPIRV_CROSS_THROW("Cannot implement gl_VertexID in Vulkan GLSL. This shader was created "
  8926. "with GL semantics.");
  8927. return "gl_VertexID";
  8928. case BuiltInInstanceId:
  8929. if (options.vulkan_semantics)
  8930. {
  8931. auto model = get_entry_point().model;
  8932. switch (model)
  8933. {
  8934. case ExecutionModelIntersectionKHR:
  8935. case ExecutionModelAnyHitKHR:
  8936. case ExecutionModelClosestHitKHR:
  8937. // gl_InstanceID is allowed in these shaders.
  8938. break;
  8939. default:
  8940. SPIRV_CROSS_THROW("Cannot implement gl_InstanceID in Vulkan GLSL. This shader was "
  8941. "created with GL semantics.");
  8942. }
  8943. }
  8944. if (!options.es && options.version < 140)
  8945. {
  8946. require_extension_internal("GL_ARB_draw_instanced");
  8947. }
  8948. return "gl_InstanceID";
  8949. case BuiltInVertexIndex:
  8950. if (options.vulkan_semantics)
  8951. return "gl_VertexIndex";
  8952. else
  8953. return "gl_VertexID"; // gl_VertexID already has the base offset applied.
  8954. case BuiltInInstanceIndex:
  8955. if (options.vulkan_semantics)
  8956. return "gl_InstanceIndex";
  8957. if (!options.es && options.version < 140)
  8958. {
  8959. require_extension_internal("GL_ARB_draw_instanced");
  8960. }
  8961. if (options.vertex.support_nonzero_base_instance)
  8962. {
  8963. if (!options.vulkan_semantics)
  8964. {
  8965. // This is a soft-enable. We will opt-in to using gl_BaseInstanceARB if supported.
  8966. require_extension_internal("GL_ARB_shader_draw_parameters");
  8967. }
  8968. return "(gl_InstanceID + SPIRV_Cross_BaseInstance)"; // ... but not gl_InstanceID.
  8969. }
  8970. else
  8971. return "gl_InstanceID";
  8972. case BuiltInPrimitiveId:
  8973. if (storage == StorageClassInput && get_entry_point().model == ExecutionModelGeometry)
  8974. return "gl_PrimitiveIDIn";
  8975. else
  8976. return "gl_PrimitiveID";
  8977. case BuiltInInvocationId:
  8978. return "gl_InvocationID";
  8979. case BuiltInLayer:
  8980. {
  8981. auto model = get_execution_model();
  8982. if (model == ExecutionModelVertex || model == ExecutionModelTessellationEvaluation)
  8983. {
  8984. if (options.es)
  8985. require_extension_internal("GL_NV_viewport_array2");
  8986. else
  8987. require_extension_internal("GL_ARB_shader_viewport_layer_array");
  8988. }
  8989. return "gl_Layer";
  8990. }
  8991. case BuiltInViewportIndex:
  8992. return "gl_ViewportIndex";
  8993. case BuiltInTessLevelOuter:
  8994. return "gl_TessLevelOuter";
  8995. case BuiltInTessLevelInner:
  8996. return "gl_TessLevelInner";
  8997. case BuiltInTessCoord:
  8998. return "gl_TessCoord";
  8999. case BuiltInPatchVertices:
  9000. return "gl_PatchVerticesIn";
  9001. case BuiltInFragCoord:
  9002. return "gl_FragCoord";
  9003. case BuiltInPointCoord:
  9004. return "gl_PointCoord";
  9005. case BuiltInFrontFacing:
  9006. return "gl_FrontFacing";
  9007. case BuiltInFragDepth:
  9008. return "gl_FragDepth";
  9009. case BuiltInNumWorkgroups:
  9010. return "gl_NumWorkGroups";
  9011. case BuiltInWorkgroupSize:
  9012. return "gl_WorkGroupSize";
  9013. case BuiltInWorkgroupId:
  9014. return "gl_WorkGroupID";
  9015. case BuiltInLocalInvocationId:
  9016. return "gl_LocalInvocationID";
  9017. case BuiltInGlobalInvocationId:
  9018. return "gl_GlobalInvocationID";
  9019. case BuiltInLocalInvocationIndex:
  9020. return "gl_LocalInvocationIndex";
  9021. case BuiltInHelperInvocation:
  9022. return "gl_HelperInvocation";
  9023. case BuiltInBaseVertex:
  9024. if (options.es)
  9025. SPIRV_CROSS_THROW("BaseVertex not supported in ES profile.");
  9026. if (options.vulkan_semantics)
  9027. {
  9028. if (options.version < 460)
  9029. {
  9030. require_extension_internal("GL_ARB_shader_draw_parameters");
  9031. return "gl_BaseVertexARB";
  9032. }
  9033. return "gl_BaseVertex";
  9034. }
  9035. // On regular GL, this is soft-enabled and we emit ifdefs in code.
  9036. require_extension_internal("GL_ARB_shader_draw_parameters");
  9037. return "SPIRV_Cross_BaseVertex";
  9038. case BuiltInBaseInstance:
  9039. if (options.es)
  9040. SPIRV_CROSS_THROW("BaseInstance not supported in ES profile.");
  9041. if (options.vulkan_semantics)
  9042. {
  9043. if (options.version < 460)
  9044. {
  9045. require_extension_internal("GL_ARB_shader_draw_parameters");
  9046. return "gl_BaseInstanceARB";
  9047. }
  9048. return "gl_BaseInstance";
  9049. }
  9050. // On regular GL, this is soft-enabled and we emit ifdefs in code.
  9051. require_extension_internal("GL_ARB_shader_draw_parameters");
  9052. return "SPIRV_Cross_BaseInstance";
  9053. case BuiltInDrawIndex:
  9054. if (options.es)
  9055. SPIRV_CROSS_THROW("DrawIndex not supported in ES profile.");
  9056. if (options.vulkan_semantics)
  9057. {
  9058. if (options.version < 460)
  9059. {
  9060. require_extension_internal("GL_ARB_shader_draw_parameters");
  9061. return "gl_DrawIDARB";
  9062. }
  9063. return "gl_DrawID";
  9064. }
  9065. // On regular GL, this is soft-enabled and we emit ifdefs in code.
  9066. require_extension_internal("GL_ARB_shader_draw_parameters");
  9067. return "gl_DrawIDARB";
  9068. case BuiltInSampleId:
  9069. if (is_legacy())
  9070. SPIRV_CROSS_THROW("Sample variables not supported in legacy GLSL.");
  9071. else if (options.es && options.version < 320)
  9072. require_extension_internal("GL_OES_sample_variables");
  9073. else if (!options.es && options.version < 400)
  9074. require_extension_internal("GL_ARB_sample_shading");
  9075. return "gl_SampleID";
  9076. case BuiltInSampleMask:
  9077. if (is_legacy())
  9078. SPIRV_CROSS_THROW("Sample variables not supported in legacy GLSL.");
  9079. else if (options.es && options.version < 320)
  9080. require_extension_internal("GL_OES_sample_variables");
  9081. else if (!options.es && options.version < 400)
  9082. require_extension_internal("GL_ARB_sample_shading");
  9083. if (storage == StorageClassInput)
  9084. return "gl_SampleMaskIn";
  9085. else
  9086. return "gl_SampleMask";
  9087. case BuiltInSamplePosition:
  9088. if (is_legacy())
  9089. SPIRV_CROSS_THROW("Sample variables not supported in legacy GLSL.");
  9090. else if (options.es && options.version < 320)
  9091. require_extension_internal("GL_OES_sample_variables");
  9092. else if (!options.es && options.version < 400)
  9093. require_extension_internal("GL_ARB_sample_shading");
  9094. return "gl_SamplePosition";
  9095. case BuiltInViewIndex:
  9096. if (options.vulkan_semantics)
  9097. return "gl_ViewIndex";
  9098. else
  9099. return "gl_ViewID_OVR";
  9100. case BuiltInNumSubgroups:
  9101. request_subgroup_feature(ShaderSubgroupSupportHelper::NumSubgroups);
  9102. return "gl_NumSubgroups";
  9103. case BuiltInSubgroupId:
  9104. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupID);
  9105. return "gl_SubgroupID";
  9106. case BuiltInSubgroupSize:
  9107. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupSize);
  9108. return "gl_SubgroupSize";
  9109. case BuiltInSubgroupLocalInvocationId:
  9110. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInvocationID);
  9111. return "gl_SubgroupInvocationID";
  9112. case BuiltInSubgroupEqMask:
  9113. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
  9114. return "gl_SubgroupEqMask";
  9115. case BuiltInSubgroupGeMask:
  9116. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
  9117. return "gl_SubgroupGeMask";
  9118. case BuiltInSubgroupGtMask:
  9119. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
  9120. return "gl_SubgroupGtMask";
  9121. case BuiltInSubgroupLeMask:
  9122. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
  9123. return "gl_SubgroupLeMask";
  9124. case BuiltInSubgroupLtMask:
  9125. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
  9126. return "gl_SubgroupLtMask";
  9127. case BuiltInLaunchIdKHR:
  9128. return ray_tracing_is_khr ? "gl_LaunchIDEXT" : "gl_LaunchIDNV";
  9129. case BuiltInLaunchSizeKHR:
  9130. return ray_tracing_is_khr ? "gl_LaunchSizeEXT" : "gl_LaunchSizeNV";
  9131. case BuiltInWorldRayOriginKHR:
  9132. return ray_tracing_is_khr ? "gl_WorldRayOriginEXT" : "gl_WorldRayOriginNV";
  9133. case BuiltInWorldRayDirectionKHR:
  9134. return ray_tracing_is_khr ? "gl_WorldRayDirectionEXT" : "gl_WorldRayDirectionNV";
  9135. case BuiltInObjectRayOriginKHR:
  9136. return ray_tracing_is_khr ? "gl_ObjectRayOriginEXT" : "gl_ObjectRayOriginNV";
  9137. case BuiltInObjectRayDirectionKHR:
  9138. return ray_tracing_is_khr ? "gl_ObjectRayDirectionEXT" : "gl_ObjectRayDirectionNV";
  9139. case BuiltInRayTminKHR:
  9140. return ray_tracing_is_khr ? "gl_RayTminEXT" : "gl_RayTminNV";
  9141. case BuiltInRayTmaxKHR:
  9142. return ray_tracing_is_khr ? "gl_RayTmaxEXT" : "gl_RayTmaxNV";
  9143. case BuiltInInstanceCustomIndexKHR:
  9144. return ray_tracing_is_khr ? "gl_InstanceCustomIndexEXT" : "gl_InstanceCustomIndexNV";
  9145. case BuiltInObjectToWorldKHR:
  9146. return ray_tracing_is_khr ? "gl_ObjectToWorldEXT" : "gl_ObjectToWorldNV";
  9147. case BuiltInWorldToObjectKHR:
  9148. return ray_tracing_is_khr ? "gl_WorldToObjectEXT" : "gl_WorldToObjectNV";
  9149. case BuiltInHitTNV:
  9150. // gl_HitTEXT is an alias of RayTMax in KHR.
  9151. return "gl_HitTNV";
  9152. case BuiltInHitKindKHR:
  9153. return ray_tracing_is_khr ? "gl_HitKindEXT" : "gl_HitKindNV";
  9154. case BuiltInIncomingRayFlagsKHR:
  9155. return ray_tracing_is_khr ? "gl_IncomingRayFlagsEXT" : "gl_IncomingRayFlagsNV";
  9156. case BuiltInBaryCoordKHR:
  9157. {
  9158. if (options.es && options.version < 320)
  9159. SPIRV_CROSS_THROW("gl_BaryCoordEXT requires ESSL 320.");
  9160. else if (!options.es && options.version < 450)
  9161. SPIRV_CROSS_THROW("gl_BaryCoordEXT requires GLSL 450.");
  9162. if (barycentric_is_nv)
  9163. {
  9164. require_extension_internal("GL_NV_fragment_shader_barycentric");
  9165. return "gl_BaryCoordNV";
  9166. }
  9167. else
  9168. {
  9169. require_extension_internal("GL_EXT_fragment_shader_barycentric");
  9170. return "gl_BaryCoordEXT";
  9171. }
  9172. }
  9173. case BuiltInBaryCoordNoPerspNV:
  9174. {
  9175. if (options.es && options.version < 320)
  9176. SPIRV_CROSS_THROW("gl_BaryCoordNoPerspEXT requires ESSL 320.");
  9177. else if (!options.es && options.version < 450)
  9178. SPIRV_CROSS_THROW("gl_BaryCoordNoPerspEXT requires GLSL 450.");
  9179. if (barycentric_is_nv)
  9180. {
  9181. require_extension_internal("GL_NV_fragment_shader_barycentric");
  9182. return "gl_BaryCoordNoPerspNV";
  9183. }
  9184. else
  9185. {
  9186. require_extension_internal("GL_EXT_fragment_shader_barycentric");
  9187. return "gl_BaryCoordNoPerspEXT";
  9188. }
  9189. }
  9190. case BuiltInFragStencilRefEXT:
  9191. {
  9192. if (!options.es)
  9193. {
  9194. require_extension_internal("GL_ARB_shader_stencil_export");
  9195. return "gl_FragStencilRefARB";
  9196. }
  9197. else
  9198. SPIRV_CROSS_THROW("Stencil export not supported in GLES.");
  9199. }
  9200. case BuiltInPrimitiveShadingRateKHR:
  9201. {
  9202. if (!options.vulkan_semantics)
  9203. SPIRV_CROSS_THROW("Can only use PrimitiveShadingRateKHR in Vulkan GLSL.");
  9204. require_extension_internal("GL_EXT_fragment_shading_rate");
  9205. return "gl_PrimitiveShadingRateEXT";
  9206. }
  9207. case BuiltInShadingRateKHR:
  9208. {
  9209. if (!options.vulkan_semantics)
  9210. SPIRV_CROSS_THROW("Can only use ShadingRateKHR in Vulkan GLSL.");
  9211. require_extension_internal("GL_EXT_fragment_shading_rate");
  9212. return "gl_ShadingRateEXT";
  9213. }
  9214. case BuiltInDeviceIndex:
  9215. if (!options.vulkan_semantics)
  9216. SPIRV_CROSS_THROW("Need Vulkan semantics for device group support.");
  9217. require_extension_internal("GL_EXT_device_group");
  9218. return "gl_DeviceIndex";
  9219. case BuiltInFullyCoveredEXT:
  9220. if (!options.es)
  9221. require_extension_internal("GL_NV_conservative_raster_underestimation");
  9222. else
  9223. SPIRV_CROSS_THROW("Need desktop GL to use GL_NV_conservative_raster_underestimation.");
  9224. return "gl_FragFullyCoveredNV";
  9225. case BuiltInPrimitiveTriangleIndicesEXT:
  9226. return "gl_PrimitiveTriangleIndicesEXT";
  9227. case BuiltInPrimitiveLineIndicesEXT:
  9228. return "gl_PrimitiveLineIndicesEXT";
  9229. case BuiltInPrimitivePointIndicesEXT:
  9230. return "gl_PrimitivePointIndicesEXT";
  9231. case BuiltInCullPrimitiveEXT:
  9232. return "gl_CullPrimitiveEXT";
  9233. case BuiltInHitTriangleVertexPositionsKHR:
  9234. {
  9235. if (!options.vulkan_semantics)
  9236. SPIRV_CROSS_THROW("Need Vulkan semantics for EXT_ray_tracing_position_fetch.");
  9237. require_extension_internal("GL_EXT_ray_tracing_position_fetch");
  9238. return "gl_HitTriangleVertexPositionsEXT";
  9239. }
  9240. case BuiltInClusterIDNV:
  9241. {
  9242. if (!options.vulkan_semantics)
  9243. SPIRV_CROSS_THROW("Can only use ClusterIDNV in Vulkan GLSL.");
  9244. require_extension_internal("GL_NV_cluster_acceleration_structure");
  9245. return "gl_ClusterIDNV";
  9246. }
  9247. default:
  9248. return join("gl_BuiltIn_", convert_to_string(builtin));
  9249. }
  9250. }
  9251. const char *CompilerGLSL::index_to_swizzle(uint32_t index)
  9252. {
  9253. switch (index)
  9254. {
  9255. case 0:
  9256. return "x";
  9257. case 1:
  9258. return "y";
  9259. case 2:
  9260. return "z";
  9261. case 3:
  9262. return "w";
  9263. default:
  9264. return "x"; // Don't crash, but engage the "undefined behavior" described for out-of-bounds logical addressing in spec.
  9265. }
  9266. }
  9267. void CompilerGLSL::access_chain_internal_append_index(std::string &expr, uint32_t /*base*/, const SPIRType * /*type*/,
  9268. AccessChainFlags flags, bool &access_chain_is_arrayed,
  9269. uint32_t index)
  9270. {
  9271. bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0;
  9272. bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0;
  9273. bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
  9274. string idx_expr = index_is_literal ? convert_to_string(index) : to_unpacked_expression(index, register_expression_read);
  9275. // For the case where the base of an OpPtrAccessChain already ends in [n],
  9276. // we need to use the index as an offset to the existing index, otherwise,
  9277. // we can just use the index directly.
  9278. if (ptr_chain && access_chain_is_arrayed)
  9279. {
  9280. size_t split_pos = expr.find_last_of(']');
  9281. size_t enclose_split = expr.find_last_of(')');
  9282. // If we have already enclosed the expression, don't try to be clever, it will break.
  9283. if (split_pos > enclose_split || enclose_split == string::npos)
  9284. {
  9285. string expr_front = expr.substr(0, split_pos);
  9286. string expr_back = expr.substr(split_pos);
  9287. expr = expr_front + " + " + enclose_expression(idx_expr) + expr_back;
  9288. return;
  9289. }
  9290. }
  9291. expr += "[";
  9292. expr += idx_expr;
  9293. expr += "]";
  9294. }
  9295. bool CompilerGLSL::access_chain_needs_stage_io_builtin_translation(uint32_t)
  9296. {
  9297. return true;
  9298. }
  9299. string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count,
  9300. AccessChainFlags flags, AccessChainMeta *meta)
  9301. {
  9302. string expr;
  9303. bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0;
  9304. bool msb_is_id = (flags & ACCESS_CHAIN_LITERAL_MSB_FORCE_ID) != 0;
  9305. bool chain_only = (flags & ACCESS_CHAIN_CHAIN_ONLY_BIT) != 0;
  9306. bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0;
  9307. bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
  9308. bool flatten_member_reference = (flags & ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT) != 0;
  9309. if (!chain_only)
  9310. {
  9311. // We handle transpose explicitly, so don't resolve that here.
  9312. auto *e = maybe_get<SPIRExpression>(base);
  9313. bool old_transpose = e && e->need_transpose;
  9314. if (e)
  9315. e->need_transpose = false;
  9316. expr = to_enclosed_expression(base, register_expression_read);
  9317. if (e)
  9318. e->need_transpose = old_transpose;
  9319. }
  9320. // Start traversing type hierarchy at the proper non-pointer types,
  9321. // but keep type_id referencing the original pointer for use below.
  9322. uint32_t type_id = expression_type_id(base);
  9323. const auto *type = &get_pointee_type(type_id);
  9324. if (!backend.native_pointers)
  9325. {
  9326. if (ptr_chain)
  9327. SPIRV_CROSS_THROW("Backend does not support native pointers and does not support OpPtrAccessChain.");
  9328. // Wrapped buffer reference pointer types will need to poke into the internal "value" member before
  9329. // continuing the access chain.
  9330. if (should_dereference(base))
  9331. expr = dereference_expression(get<SPIRType>(type_id), expr);
  9332. }
  9333. else if (should_dereference(base) && type->basetype != SPIRType::Struct && !ptr_chain)
  9334. expr = join("(", dereference_expression(*type, expr), ")");
  9335. bool access_chain_is_arrayed = expr.find_first_of('[') != string::npos;
  9336. bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base);
  9337. bool is_packed = has_extended_decoration(base, SPIRVCrossDecorationPhysicalTypePacked);
  9338. uint32_t physical_type = get_extended_decoration(base, SPIRVCrossDecorationPhysicalTypeID);
  9339. bool is_invariant = has_decoration(base, DecorationInvariant);
  9340. bool relaxed_precision = has_decoration(base, DecorationRelaxedPrecision);
  9341. bool pending_array_enclose = false;
  9342. bool dimension_flatten = false;
  9343. bool access_meshlet_position_y = false;
  9344. bool chain_is_builtin = false;
  9345. BuiltIn chained_builtin = {};
  9346. if (auto *base_expr = maybe_get<SPIRExpression>(base))
  9347. {
  9348. access_meshlet_position_y = base_expr->access_meshlet_position_y;
  9349. }
  9350. // If we are translating access to a structured buffer, the first subscript '._m0' must be hidden
  9351. bool hide_first_subscript = count > 1 && is_user_type_structured(base);
  9352. const auto append_index = [&](uint32_t index, bool is_literal, bool is_ptr_chain = false) {
  9353. AccessChainFlags mod_flags = flags;
  9354. if (!is_literal)
  9355. mod_flags &= ~ACCESS_CHAIN_INDEX_IS_LITERAL_BIT;
  9356. if (!is_ptr_chain)
  9357. mod_flags &= ~ACCESS_CHAIN_PTR_CHAIN_BIT;
  9358. access_chain_internal_append_index(expr, base, type, mod_flags, access_chain_is_arrayed, index);
  9359. if (check_physical_type_cast(expr, type, physical_type))
  9360. physical_type = 0;
  9361. };
  9362. for (uint32_t i = 0; i < count; i++)
  9363. {
  9364. uint32_t index = indices[i];
  9365. bool is_literal = index_is_literal;
  9366. if (is_literal && msb_is_id && (index >> 31u) != 0u)
  9367. {
  9368. is_literal = false;
  9369. index &= 0x7fffffffu;
  9370. }
  9371. bool ptr_chain_array_entry = ptr_chain && i == 0 && is_array(*type);
  9372. if (ptr_chain_array_entry)
  9373. {
  9374. // This is highly unusual code, since normally we'd use plain AccessChain, but it's still allowed.
  9375. // We are considered to have a pointer to array and one element shifts by one array at a time.
  9376. // If we use normal array indexing, we'll first decay to pointer, and lose the array-ness,
  9377. // so we have to take pointer to array explicitly.
  9378. if (!should_dereference(base))
  9379. expr = enclose_expression(address_of_expression(expr));
  9380. }
  9381. if (ptr_chain && i == 0)
  9382. {
  9383. // Pointer chains
  9384. // If we are flattening multidimensional arrays, only create opening bracket on first
  9385. // array index.
  9386. if (options.flatten_multidimensional_arrays)
  9387. {
  9388. dimension_flatten = type->array.size() >= 1;
  9389. pending_array_enclose = dimension_flatten;
  9390. if (pending_array_enclose)
  9391. expr += "[";
  9392. }
  9393. if (options.flatten_multidimensional_arrays && dimension_flatten)
  9394. {
  9395. // If we are flattening multidimensional arrays, do manual stride computation.
  9396. if (is_literal)
  9397. expr += convert_to_string(index);
  9398. else
  9399. expr += to_enclosed_expression(index, register_expression_read);
  9400. for (auto j = uint32_t(type->array.size()); j; j--)
  9401. {
  9402. expr += " * ";
  9403. expr += enclose_expression(to_array_size(*type, j - 1));
  9404. }
  9405. if (type->array.empty())
  9406. pending_array_enclose = false;
  9407. else
  9408. expr += " + ";
  9409. if (!pending_array_enclose)
  9410. expr += "]";
  9411. }
  9412. else
  9413. {
  9414. if (flags & ACCESS_CHAIN_PTR_CHAIN_POINTER_ARITH_BIT)
  9415. {
  9416. SPIRType tmp_type(OpTypeInt);
  9417. tmp_type.basetype = SPIRType::UInt64;
  9418. tmp_type.width = 64;
  9419. tmp_type.vecsize = 1;
  9420. tmp_type.columns = 1;
  9421. TypeID ptr_type_id = expression_type_id(base);
  9422. const SPIRType &ptr_type = get<SPIRType>(ptr_type_id);
  9423. const SPIRType &pointee_type = get_pointee_type(ptr_type);
  9424. // This only runs in native pointer backends.
  9425. // Can replace reinterpret_cast with a backend string if ever needed.
  9426. // We expect this to count as a de-reference.
  9427. // This leaks some MSL details, but feels slightly overkill to
  9428. // add yet another virtual interface just for this.
  9429. auto intptr_expr = join("reinterpret_cast<", type_to_glsl(tmp_type), ">(", expr, ")");
  9430. intptr_expr += join(" + ", to_enclosed_unpacked_expression(index), " * ",
  9431. get_decoration(ptr_type_id, DecorationArrayStride));
  9432. if (flags & ACCESS_CHAIN_PTR_CHAIN_CAST_TO_SCALAR_BIT)
  9433. {
  9434. is_packed = true;
  9435. expr = join("*reinterpret_cast<device packed_", type_to_glsl(pointee_type),
  9436. " *>(", intptr_expr, ")");
  9437. }
  9438. else
  9439. {
  9440. expr = join("*reinterpret_cast<", type_to_glsl(ptr_type), ">(", intptr_expr, ")");
  9441. }
  9442. }
  9443. else
  9444. append_index(index, is_literal, true);
  9445. }
  9446. if (type->basetype == SPIRType::ControlPointArray)
  9447. {
  9448. type_id = type->parent_type;
  9449. type = &get<SPIRType>(type_id);
  9450. }
  9451. access_chain_is_arrayed = true;
  9452. // Explicitly enclose the expression if this is one of the weird pointer-to-array cases.
  9453. // We don't want any future indexing to add to this array dereference.
  9454. // Enclosing the expression blocks that and avoids any shenanigans with operand priority.
  9455. if (ptr_chain_array_entry)
  9456. expr = join("(", expr, ")");
  9457. }
  9458. // Arrays and OpTypeCooperativeVectorNV (aka fancy arrays)
  9459. else if (!type->array.empty() || type->op == OpTypeCooperativeVectorNV)
  9460. {
  9461. // If we are flattening multidimensional arrays, only create opening bracket on first
  9462. // array index.
  9463. if (options.flatten_multidimensional_arrays && !pending_array_enclose)
  9464. {
  9465. dimension_flatten = type->array.size() > 1;
  9466. pending_array_enclose = dimension_flatten;
  9467. if (pending_array_enclose)
  9468. expr += "[";
  9469. }
  9470. assert(type->parent_type);
  9471. auto *var = maybe_get<SPIRVariable>(base);
  9472. if (backend.force_gl_in_out_block && i == 0 && var && is_builtin_variable(*var) &&
  9473. !has_decoration(type->self, DecorationBlock))
  9474. {
  9475. // This deals with scenarios for tesc/geom where arrays of gl_Position[] are declared.
  9476. // Normally, these variables live in blocks when compiled from GLSL,
  9477. // but HLSL seems to just emit straight arrays here.
  9478. // We must pretend this access goes through gl_in/gl_out arrays
  9479. // to be able to access certain builtins as arrays.
  9480. // Similar concerns apply for mesh shaders where we have to redirect to gl_MeshVerticesEXT or MeshPrimitivesEXT.
  9481. auto builtin = ir.meta[base].decoration.builtin_type;
  9482. bool mesh_shader = get_execution_model() == ExecutionModelMeshEXT;
  9483. chain_is_builtin = true;
  9484. chained_builtin = builtin;
  9485. switch (builtin)
  9486. {
  9487. case BuiltInCullDistance:
  9488. case BuiltInClipDistance:
  9489. if (type->array.size() == 1) // Red herring. Only consider block IO for two-dimensional arrays here.
  9490. {
  9491. append_index(index, is_literal);
  9492. break;
  9493. }
  9494. // fallthrough
  9495. case BuiltInPosition:
  9496. case BuiltInPointSize:
  9497. if (mesh_shader)
  9498. expr = join("gl_MeshVerticesEXT[", to_expression(index, register_expression_read), "].", expr);
  9499. else if (var->storage == StorageClassInput)
  9500. expr = join("gl_in[", to_expression(index, register_expression_read), "].", expr);
  9501. else if (var->storage == StorageClassOutput)
  9502. expr = join("gl_out[", to_expression(index, register_expression_read), "].", expr);
  9503. else
  9504. append_index(index, is_literal);
  9505. break;
  9506. case BuiltInPrimitiveId:
  9507. case BuiltInLayer:
  9508. case BuiltInViewportIndex:
  9509. case BuiltInCullPrimitiveEXT:
  9510. case BuiltInPrimitiveShadingRateKHR:
  9511. if (mesh_shader)
  9512. expr = join("gl_MeshPrimitivesEXT[", to_expression(index, register_expression_read), "].", expr);
  9513. else
  9514. append_index(index, is_literal);
  9515. break;
  9516. default:
  9517. append_index(index, is_literal);
  9518. break;
  9519. }
  9520. }
  9521. else if (backend.force_merged_mesh_block && i == 0 && var &&
  9522. !is_builtin_variable(*var) && var->storage == StorageClassOutput)
  9523. {
  9524. if (is_per_primitive_variable(*var))
  9525. expr = join("gl_MeshPrimitivesEXT[", to_expression(index, register_expression_read), "].", expr);
  9526. else
  9527. expr = join("gl_MeshVerticesEXT[", to_expression(index, register_expression_read), "].", expr);
  9528. }
  9529. else if (options.flatten_multidimensional_arrays && dimension_flatten)
  9530. {
  9531. // If we are flattening multidimensional arrays, do manual stride computation.
  9532. auto &parent_type = get<SPIRType>(type->parent_type);
  9533. if (is_literal)
  9534. expr += convert_to_string(index);
  9535. else
  9536. expr += to_enclosed_expression(index, register_expression_read);
  9537. for (auto j = uint32_t(parent_type.array.size()); j; j--)
  9538. {
  9539. expr += " * ";
  9540. expr += enclose_expression(to_array_size(parent_type, j - 1));
  9541. }
  9542. if (parent_type.array.empty())
  9543. pending_array_enclose = false;
  9544. else
  9545. expr += " + ";
  9546. if (!pending_array_enclose)
  9547. expr += "]";
  9548. }
  9549. else if (index_is_literal || !builtin_translates_to_nonarray(BuiltIn(get_decoration(base, DecorationBuiltIn))))
  9550. {
  9551. // Some builtins are arrays in SPIR-V but not in other languages, e.g. gl_SampleMask[] is an array in SPIR-V but not in Metal.
  9552. // By throwing away the index, we imply the index was 0, which it must be for gl_SampleMask.
  9553. // For literal indices we are working on composites, so we ignore this since we have already converted to proper array.
  9554. append_index(index, is_literal);
  9555. }
  9556. if (var && has_decoration(var->self, DecorationBuiltIn) &&
  9557. get_decoration(var->self, DecorationBuiltIn) == BuiltInPosition &&
  9558. get_execution_model() == ExecutionModelMeshEXT)
  9559. {
  9560. access_meshlet_position_y = true;
  9561. }
  9562. type_id = type->parent_type;
  9563. type = &get<SPIRType>(type_id);
  9564. // If the physical type has an unnatural vecsize,
  9565. // we must assume it's a faked struct where the .data member
  9566. // is used for the real payload.
  9567. if (physical_type && (is_vector(*type) || is_scalar(*type)))
  9568. {
  9569. auto &phys = get<SPIRType>(physical_type);
  9570. if (phys.vecsize > 4)
  9571. expr += ".data";
  9572. }
  9573. access_chain_is_arrayed = true;
  9574. }
  9575. // For structs, the index refers to a constant, which indexes into the members, possibly through a redirection mapping.
  9576. // We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
  9577. else if (type->basetype == SPIRType::Struct)
  9578. {
  9579. if (!is_literal)
  9580. index = evaluate_constant_u32(index);
  9581. if (index < uint32_t(type->member_type_index_redirection.size()))
  9582. index = type->member_type_index_redirection[index];
  9583. if (index >= type->member_types.size())
  9584. SPIRV_CROSS_THROW("Member index is out of bounds!");
  9585. if (hide_first_subscript)
  9586. {
  9587. // First "._m0" subscript has been hidden, subsequent fields must be emitted even for structured buffers
  9588. hide_first_subscript = false;
  9589. }
  9590. else
  9591. {
  9592. BuiltIn builtin = BuiltInMax;
  9593. if (is_member_builtin(*type, index, &builtin) && access_chain_needs_stage_io_builtin_translation(base))
  9594. {
  9595. if (access_chain_is_arrayed)
  9596. {
  9597. expr += ".";
  9598. expr += builtin_to_glsl(builtin, type->storage);
  9599. }
  9600. else
  9601. expr = builtin_to_glsl(builtin, type->storage);
  9602. if (builtin == BuiltInPosition && get_execution_model() == ExecutionModelMeshEXT)
  9603. {
  9604. access_meshlet_position_y = true;
  9605. }
  9606. chain_is_builtin = true;
  9607. chained_builtin = builtin;
  9608. }
  9609. else
  9610. {
  9611. // If the member has a qualified name, use it as the entire chain
  9612. string qual_mbr_name = get_member_qualified_name(type_id, index);
  9613. if (!qual_mbr_name.empty())
  9614. expr = qual_mbr_name;
  9615. else if (flatten_member_reference)
  9616. expr += join("_", to_member_name(*type, index));
  9617. else
  9618. {
  9619. // Any pointer de-refences for values are handled in the first access chain.
  9620. // For pointer chains, the pointer-ness is resolved through an array access.
  9621. // The only time this is not true is when accessing array of SSBO/UBO.
  9622. // This case is explicitly handled.
  9623. expr += to_member_reference(base, *type, index, ptr_chain || i != 0);
  9624. }
  9625. }
  9626. }
  9627. if (has_member_decoration(type->self, index, DecorationInvariant))
  9628. is_invariant = true;
  9629. if (has_member_decoration(type->self, index, DecorationRelaxedPrecision))
  9630. relaxed_precision = true;
  9631. is_packed = member_is_packed_physical_type(*type, index);
  9632. if (member_is_remapped_physical_type(*type, index))
  9633. physical_type = get_extended_member_decoration(type->self, index, SPIRVCrossDecorationPhysicalTypeID);
  9634. else
  9635. physical_type = 0;
  9636. row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(*type, index);
  9637. type = &get<SPIRType>(type->member_types[index]);
  9638. }
  9639. // Matrix -> Vector
  9640. else if (type->columns > 1)
  9641. {
  9642. // If we have a row-major matrix here, we need to defer any transpose in case this access chain
  9643. // is used to store a column. We can resolve it right here and now if we access a scalar directly,
  9644. // by flipping indexing order of the matrix.
  9645. expr += "[";
  9646. if (is_literal)
  9647. expr += convert_to_string(index);
  9648. else
  9649. expr += to_unpacked_expression(index, register_expression_read);
  9650. expr += "]";
  9651. // If the physical type has an unnatural vecsize,
  9652. // we must assume it's a faked struct where the .data member
  9653. // is used for the real payload.
  9654. if (physical_type)
  9655. {
  9656. auto &phys = get<SPIRType>(physical_type);
  9657. if (phys.vecsize > 4 || phys.columns > 4)
  9658. expr += ".data";
  9659. }
  9660. type_id = type->parent_type;
  9661. type = &get<SPIRType>(type_id);
  9662. }
  9663. // Vector -> Scalar
  9664. else if (type->op == OpTypeCooperativeMatrixKHR || type->vecsize > 1)
  9665. {
  9666. string deferred_index;
  9667. if (row_major_matrix_needs_conversion)
  9668. {
  9669. // Flip indexing order.
  9670. auto column_index = expr.find_last_of('[');
  9671. if (column_index != string::npos)
  9672. {
  9673. deferred_index = expr.substr(column_index);
  9674. auto end_deferred_index = deferred_index.find_last_of(']');
  9675. if (end_deferred_index != string::npos && end_deferred_index + 1 != deferred_index.size())
  9676. {
  9677. // If we have any data member fixups, it must be transposed so that it refers to this index.
  9678. // E.g. [0].data followed by [1] would be shuffled to [1][0].data which is wrong,
  9679. // and needs to be [1].data[0] instead.
  9680. end_deferred_index++;
  9681. deferred_index = deferred_index.substr(end_deferred_index) +
  9682. deferred_index.substr(0, end_deferred_index);
  9683. }
  9684. expr.resize(column_index);
  9685. }
  9686. }
  9687. // Internally, access chain implementation can also be used on composites,
  9688. // ignore scalar access workarounds in this case.
  9689. StorageClass effective_storage = StorageClassGeneric;
  9690. bool ignore_potential_sliced_writes = false;
  9691. if ((flags & ACCESS_CHAIN_FORCE_COMPOSITE_BIT) == 0)
  9692. {
  9693. if (expression_type(base).pointer)
  9694. effective_storage = get_expression_effective_storage_class(base);
  9695. // Special consideration for control points.
  9696. // Control points can only be written by InvocationID, so there is no need
  9697. // to consider scalar access chains here.
  9698. // Cleans up some cases where it's very painful to determine the accurate storage class
  9699. // since blocks can be partially masked ...
  9700. auto *var = maybe_get_backing_variable(base);
  9701. if (var && var->storage == StorageClassOutput &&
  9702. get_execution_model() == ExecutionModelTessellationControl &&
  9703. !has_decoration(var->self, DecorationPatch))
  9704. {
  9705. ignore_potential_sliced_writes = true;
  9706. }
  9707. }
  9708. else
  9709. ignore_potential_sliced_writes = true;
  9710. if (!row_major_matrix_needs_conversion && !ignore_potential_sliced_writes)
  9711. {
  9712. // On some backends, we might not be able to safely access individual scalars in a vector.
  9713. // To work around this, we might have to cast the access chain reference to something which can,
  9714. // like a pointer to scalar, which we can then index into.
  9715. prepare_access_chain_for_scalar_access(expr, get<SPIRType>(type->parent_type), effective_storage,
  9716. is_packed);
  9717. }
  9718. if (is_literal)
  9719. {
  9720. bool out_of_bounds = index >= type->vecsize && type->op != OpTypeCooperativeMatrixKHR;
  9721. if (!is_packed && !row_major_matrix_needs_conversion && type->op != OpTypeCooperativeMatrixKHR)
  9722. {
  9723. expr += ".";
  9724. expr += index_to_swizzle(out_of_bounds ? 0 : index);
  9725. }
  9726. else
  9727. {
  9728. // For packed vectors, we can only access them as an array, not by swizzle.
  9729. expr += join("[", out_of_bounds ? 0 : index, "]");
  9730. }
  9731. }
  9732. else if (ir.ids[index].get_type() == TypeConstant && !is_packed && !row_major_matrix_needs_conversion)
  9733. {
  9734. auto &c = get<SPIRConstant>(index);
  9735. bool out_of_bounds = (c.scalar() >= type->vecsize);
  9736. if (c.specialization)
  9737. {
  9738. // If the index is a spec constant, we cannot turn extract into a swizzle.
  9739. expr += join("[", out_of_bounds ? "0" : to_expression(index), "]");
  9740. }
  9741. else
  9742. {
  9743. expr += ".";
  9744. expr += index_to_swizzle(out_of_bounds ? 0 : c.scalar());
  9745. }
  9746. }
  9747. else
  9748. {
  9749. expr += "[";
  9750. expr += to_unpacked_expression(index, register_expression_read);
  9751. expr += "]";
  9752. }
  9753. if (row_major_matrix_needs_conversion && !ignore_potential_sliced_writes)
  9754. {
  9755. if (prepare_access_chain_for_scalar_access(expr, get<SPIRType>(type->parent_type), effective_storage,
  9756. is_packed))
  9757. {
  9758. // We're in a pointer context now, so just remove any member dereference.
  9759. auto first_index = deferred_index.find_first_of('[');
  9760. if (first_index != string::npos && first_index != 0)
  9761. deferred_index = deferred_index.substr(first_index);
  9762. }
  9763. }
  9764. if (access_meshlet_position_y)
  9765. {
  9766. if (is_literal)
  9767. {
  9768. access_meshlet_position_y = index == 1;
  9769. }
  9770. else
  9771. {
  9772. const auto *c = maybe_get<SPIRConstant>(index);
  9773. if (c)
  9774. access_meshlet_position_y = c->scalar() == 1;
  9775. else
  9776. {
  9777. // We don't know, but we have to assume no.
  9778. // Flip Y in mesh shaders is an opt-in horrible hack, so we'll have to assume shaders try to behave.
  9779. access_meshlet_position_y = false;
  9780. }
  9781. }
  9782. }
  9783. expr += deferred_index;
  9784. row_major_matrix_needs_conversion = false;
  9785. is_packed = false;
  9786. physical_type = 0;
  9787. type_id = type->parent_type;
  9788. type = &get<SPIRType>(type_id);
  9789. }
  9790. else if (!backend.allow_truncated_access_chain)
  9791. SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
  9792. }
  9793. if (pending_array_enclose)
  9794. {
  9795. SPIRV_CROSS_THROW("Flattening of multidimensional arrays were enabled, "
  9796. "but the access chain was terminated in the middle of a multidimensional array. "
  9797. "This is not supported.");
  9798. }
  9799. if (meta)
  9800. {
  9801. meta->need_transpose = row_major_matrix_needs_conversion;
  9802. meta->storage_is_packed = is_packed;
  9803. meta->storage_is_invariant = is_invariant;
  9804. meta->storage_physical_type = physical_type;
  9805. meta->relaxed_precision = relaxed_precision;
  9806. meta->access_meshlet_position_y = access_meshlet_position_y;
  9807. meta->chain_is_builtin = chain_is_builtin;
  9808. meta->builtin = chained_builtin;
  9809. }
  9810. return expr;
  9811. }
  9812. bool CompilerGLSL::check_physical_type_cast(std::string &, const SPIRType *, uint32_t)
  9813. {
  9814. return false;
  9815. }
  9816. bool CompilerGLSL::prepare_access_chain_for_scalar_access(std::string &, const SPIRType &, StorageClass, bool &)
  9817. {
  9818. return false;
  9819. }
  9820. string CompilerGLSL::to_flattened_struct_member(const string &basename, const SPIRType &type, uint32_t index)
  9821. {
  9822. auto ret = join(basename, "_", to_member_name(type, index));
  9823. ParsedIR::sanitize_underscores(ret);
  9824. return ret;
  9825. }
  9826. uint32_t CompilerGLSL::get_physical_type_stride(const SPIRType &) const
  9827. {
  9828. SPIRV_CROSS_THROW("Invalid to call get_physical_type_stride on a backend without native pointer support.");
  9829. }
  9830. string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type,
  9831. AccessChainMeta *meta, bool ptr_chain)
  9832. {
  9833. if (flattened_buffer_blocks.count(base))
  9834. {
  9835. uint32_t matrix_stride = 0;
  9836. uint32_t array_stride = 0;
  9837. bool need_transpose = false;
  9838. flattened_access_chain_offset(expression_type(base), indices, count, 0, 16, &need_transpose, &matrix_stride,
  9839. &array_stride, ptr_chain);
  9840. if (meta)
  9841. {
  9842. meta->need_transpose = target_type.columns > 1 && need_transpose;
  9843. meta->storage_is_packed = false;
  9844. }
  9845. return flattened_access_chain(base, indices, count, target_type, 0, matrix_stride, array_stride,
  9846. need_transpose);
  9847. }
  9848. else if (flattened_structs.count(base) && count > 0)
  9849. {
  9850. AccessChainFlags flags = ACCESS_CHAIN_CHAIN_ONLY_BIT | ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT;
  9851. if (ptr_chain)
  9852. flags |= ACCESS_CHAIN_PTR_CHAIN_BIT;
  9853. if (flattened_structs[base])
  9854. {
  9855. flags |= ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT;
  9856. if (meta)
  9857. meta->flattened_struct = target_type.basetype == SPIRType::Struct;
  9858. }
  9859. auto chain = access_chain_internal(base, indices, count, flags, nullptr).substr(1);
  9860. if (meta)
  9861. {
  9862. meta->need_transpose = false;
  9863. meta->storage_is_packed = false;
  9864. }
  9865. auto basename = to_flattened_access_chain_expression(base);
  9866. auto ret = join(basename, "_", chain);
  9867. ParsedIR::sanitize_underscores(ret);
  9868. return ret;
  9869. }
  9870. else
  9871. {
  9872. AccessChainFlags flags = ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT;
  9873. if (ptr_chain)
  9874. {
  9875. flags |= ACCESS_CHAIN_PTR_CHAIN_BIT;
  9876. // PtrAccessChain could get complicated.
  9877. TypeID type_id = expression_type_id(base);
  9878. if (backend.native_pointers && has_decoration(type_id, DecorationArrayStride))
  9879. {
  9880. // If there is a mismatch we have to go via 64-bit pointer arithmetic :'(
  9881. // Using packed hacks only gets us so far, and is not designed to deal with pointer to
  9882. // random values. It works for structs though.
  9883. auto &pointee_type = get_pointee_type(get<SPIRType>(type_id));
  9884. uint32_t physical_stride = get_physical_type_stride(pointee_type);
  9885. uint32_t requested_stride = get_decoration(type_id, DecorationArrayStride);
  9886. if (physical_stride != requested_stride)
  9887. {
  9888. flags |= ACCESS_CHAIN_PTR_CHAIN_POINTER_ARITH_BIT;
  9889. if (is_vector(pointee_type))
  9890. flags |= ACCESS_CHAIN_PTR_CHAIN_CAST_TO_SCALAR_BIT;
  9891. }
  9892. }
  9893. }
  9894. return access_chain_internal(base, indices, count, flags, meta);
  9895. }
  9896. }
  9897. string CompilerGLSL::load_flattened_struct(const string &basename, const SPIRType &type)
  9898. {
  9899. auto expr = type_to_glsl_constructor(type);
  9900. expr += '(';
  9901. for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
  9902. {
  9903. if (i)
  9904. expr += ", ";
  9905. auto &member_type = get<SPIRType>(type.member_types[i]);
  9906. if (member_type.basetype == SPIRType::Struct)
  9907. expr += load_flattened_struct(to_flattened_struct_member(basename, type, i), member_type);
  9908. else
  9909. expr += to_flattened_struct_member(basename, type, i);
  9910. }
  9911. expr += ')';
  9912. return expr;
  9913. }
  9914. std::string CompilerGLSL::to_flattened_access_chain_expression(uint32_t id)
  9915. {
  9916. // Do not use to_expression as that will unflatten access chains.
  9917. string basename;
  9918. if (const auto *var = maybe_get<SPIRVariable>(id))
  9919. basename = to_name(var->self);
  9920. else if (const auto *expr = maybe_get<SPIRExpression>(id))
  9921. basename = expr->expression;
  9922. else
  9923. basename = to_expression(id);
  9924. return basename;
  9925. }
  9926. void CompilerGLSL::store_flattened_struct(const string &basename, uint32_t rhs_id, const SPIRType &type,
  9927. const SmallVector<uint32_t> &indices)
  9928. {
  9929. SmallVector<uint32_t> sub_indices = indices;
  9930. sub_indices.push_back(0);
  9931. auto *member_type = &type;
  9932. for (auto &index : indices)
  9933. member_type = &get<SPIRType>(member_type->member_types[index]);
  9934. for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++)
  9935. {
  9936. sub_indices.back() = i;
  9937. auto lhs = join(basename, "_", to_member_name(*member_type, i));
  9938. ParsedIR::sanitize_underscores(lhs);
  9939. if (get<SPIRType>(member_type->member_types[i]).basetype == SPIRType::Struct)
  9940. {
  9941. store_flattened_struct(lhs, rhs_id, type, sub_indices);
  9942. }
  9943. else
  9944. {
  9945. auto rhs = to_expression(rhs_id) + to_multi_member_reference(type, sub_indices);
  9946. statement(lhs, " = ", rhs, ";");
  9947. }
  9948. }
  9949. }
  9950. void CompilerGLSL::store_flattened_struct(uint32_t lhs_id, uint32_t value)
  9951. {
  9952. auto &type = expression_type(lhs_id);
  9953. auto basename = to_flattened_access_chain_expression(lhs_id);
  9954. store_flattened_struct(basename, value, type, {});
  9955. }
  9956. std::string CompilerGLSL::flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count,
  9957. const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride,
  9958. uint32_t /* array_stride */, bool need_transpose)
  9959. {
  9960. if (!target_type.array.empty())
  9961. SPIRV_CROSS_THROW("Access chains that result in an array can not be flattened");
  9962. else if (target_type.basetype == SPIRType::Struct)
  9963. return flattened_access_chain_struct(base, indices, count, target_type, offset);
  9964. else if (target_type.columns > 1)
  9965. return flattened_access_chain_matrix(base, indices, count, target_type, offset, matrix_stride, need_transpose);
  9966. else
  9967. return flattened_access_chain_vector(base, indices, count, target_type, offset, matrix_stride, need_transpose);
  9968. }
  9969. std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count,
  9970. const SPIRType &target_type, uint32_t offset)
  9971. {
  9972. std::string expr;
  9973. if (backend.can_declare_struct_inline)
  9974. {
  9975. expr += type_to_glsl_constructor(target_type);
  9976. expr += "(";
  9977. }
  9978. else
  9979. expr += "{";
  9980. for (uint32_t i = 0; i < uint32_t(target_type.member_types.size()); ++i)
  9981. {
  9982. if (i != 0)
  9983. expr += ", ";
  9984. const SPIRType &member_type = get<SPIRType>(target_type.member_types[i]);
  9985. uint32_t member_offset = type_struct_member_offset(target_type, i);
  9986. // The access chain terminates at the struct, so we need to find matrix strides and row-major information
  9987. // ahead of time.
  9988. bool need_transpose = false;
  9989. bool relaxed = false;
  9990. uint32_t matrix_stride = 0;
  9991. if (member_type.columns > 1)
  9992. {
  9993. auto decorations = combined_decoration_for_member(target_type, i);
  9994. need_transpose = decorations.get(DecorationRowMajor);
  9995. relaxed = decorations.get(DecorationRelaxedPrecision);
  9996. matrix_stride = type_struct_member_matrix_stride(target_type, i);
  9997. }
  9998. auto tmp = flattened_access_chain(base, indices, count, member_type, offset + member_offset, matrix_stride,
  9999. 0 /* array_stride */, need_transpose);
  10000. // Cannot forward transpositions, so resolve them here.
  10001. if (need_transpose)
  10002. expr += convert_row_major_matrix(tmp, member_type, 0, false, relaxed);
  10003. else
  10004. expr += tmp;
  10005. }
  10006. expr += backend.can_declare_struct_inline ? ")" : "}";
  10007. return expr;
  10008. }
  10009. std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count,
  10010. const SPIRType &target_type, uint32_t offset,
  10011. uint32_t matrix_stride, bool need_transpose)
  10012. {
  10013. assert(matrix_stride);
  10014. SPIRType tmp_type = target_type;
  10015. if (need_transpose)
  10016. swap(tmp_type.vecsize, tmp_type.columns);
  10017. std::string expr;
  10018. expr += type_to_glsl_constructor(tmp_type);
  10019. expr += "(";
  10020. for (uint32_t i = 0; i < tmp_type.columns; i++)
  10021. {
  10022. if (i != 0)
  10023. expr += ", ";
  10024. expr += flattened_access_chain_vector(base, indices, count, tmp_type, offset + i * matrix_stride, matrix_stride,
  10025. /* need_transpose= */ false);
  10026. }
  10027. expr += ")";
  10028. return expr;
  10029. }
  10030. std::string CompilerGLSL::flattened_access_chain_vector(uint32_t base, const uint32_t *indices, uint32_t count,
  10031. const SPIRType &target_type, uint32_t offset,
  10032. uint32_t matrix_stride, bool need_transpose)
  10033. {
  10034. auto result = flattened_access_chain_offset(expression_type(base), indices, count, offset, 16);
  10035. auto buffer_name = to_name(expression_type(base).self);
  10036. if (need_transpose)
  10037. {
  10038. std::string expr;
  10039. if (target_type.vecsize > 1)
  10040. {
  10041. expr += type_to_glsl_constructor(target_type);
  10042. expr += "(";
  10043. }
  10044. for (uint32_t i = 0; i < target_type.vecsize; ++i)
  10045. {
  10046. if (i != 0)
  10047. expr += ", ";
  10048. uint32_t component_offset = result.second + i * matrix_stride;
  10049. assert(component_offset % (target_type.width / 8) == 0);
  10050. uint32_t index = component_offset / (target_type.width / 8);
  10051. expr += buffer_name;
  10052. expr += "[";
  10053. expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
  10054. expr += convert_to_string(index / 4);
  10055. expr += "]";
  10056. expr += vector_swizzle(1, index % 4);
  10057. }
  10058. if (target_type.vecsize > 1)
  10059. {
  10060. expr += ")";
  10061. }
  10062. return expr;
  10063. }
  10064. else
  10065. {
  10066. assert(result.second % (target_type.width / 8) == 0);
  10067. uint32_t index = result.second / (target_type.width / 8);
  10068. std::string expr;
  10069. expr += buffer_name;
  10070. expr += "[";
  10071. expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
  10072. expr += convert_to_string(index / 4);
  10073. expr += "]";
  10074. expr += vector_swizzle(target_type.vecsize, index % 4);
  10075. return expr;
  10076. }
  10077. }
  10078. std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(
  10079. const SPIRType &basetype, const uint32_t *indices, uint32_t count, uint32_t offset, uint32_t word_stride,
  10080. bool *need_transpose, uint32_t *out_matrix_stride, uint32_t *out_array_stride, bool ptr_chain)
  10081. {
  10082. // Start traversing type hierarchy at the proper non-pointer types.
  10083. const auto *type = &get_pointee_type(basetype);
  10084. std::string expr;
  10085. // Inherit matrix information in case we are access chaining a vector which might have come from a row major layout.
  10086. bool row_major_matrix_needs_conversion = need_transpose ? *need_transpose : false;
  10087. uint32_t matrix_stride = out_matrix_stride ? *out_matrix_stride : 0;
  10088. uint32_t array_stride = out_array_stride ? *out_array_stride : 0;
  10089. for (uint32_t i = 0; i < count; i++)
  10090. {
  10091. uint32_t index = indices[i];
  10092. // Pointers
  10093. if (ptr_chain && i == 0)
  10094. {
  10095. // Here, the pointer type will be decorated with an array stride.
  10096. array_stride = get_decoration(basetype.self, DecorationArrayStride);
  10097. if (!array_stride)
  10098. SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block.");
  10099. auto *constant = maybe_get<SPIRConstant>(index);
  10100. if (constant)
  10101. {
  10102. // Constant array access.
  10103. offset += constant->scalar() * array_stride;
  10104. }
  10105. else
  10106. {
  10107. // Dynamic array access.
  10108. if (array_stride % word_stride)
  10109. {
  10110. SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size "
  10111. "of a 4-component vector. "
  10112. "Likely culprit here is a float or vec2 array inside a push "
  10113. "constant block which is std430. "
  10114. "This cannot be flattened. Try using std140 layout instead.");
  10115. }
  10116. expr += to_enclosed_expression(index);
  10117. expr += " * ";
  10118. expr += convert_to_string(array_stride / word_stride);
  10119. expr += " + ";
  10120. }
  10121. }
  10122. // Arrays
  10123. else if (!type->array.empty())
  10124. {
  10125. auto *constant = maybe_get<SPIRConstant>(index);
  10126. if (constant)
  10127. {
  10128. // Constant array access.
  10129. offset += constant->scalar() * array_stride;
  10130. }
  10131. else
  10132. {
  10133. // Dynamic array access.
  10134. if (array_stride % word_stride)
  10135. {
  10136. SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size "
  10137. "of a 4-component vector. "
  10138. "Likely culprit here is a float or vec2 array inside a push "
  10139. "constant block which is std430. "
  10140. "This cannot be flattened. Try using std140 layout instead.");
  10141. }
  10142. expr += to_enclosed_expression(index, false);
  10143. expr += " * ";
  10144. expr += convert_to_string(array_stride / word_stride);
  10145. expr += " + ";
  10146. }
  10147. uint32_t parent_type = type->parent_type;
  10148. type = &get<SPIRType>(parent_type);
  10149. if (!type->array.empty())
  10150. array_stride = get_decoration(parent_type, DecorationArrayStride);
  10151. }
  10152. // For structs, the index refers to a constant, which indexes into the members.
  10153. // We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
  10154. else if (type->basetype == SPIRType::Struct)
  10155. {
  10156. index = evaluate_constant_u32(index);
  10157. if (index >= type->member_types.size())
  10158. SPIRV_CROSS_THROW("Member index is out of bounds!");
  10159. offset += type_struct_member_offset(*type, index);
  10160. auto &struct_type = *type;
  10161. type = &get<SPIRType>(type->member_types[index]);
  10162. if (type->columns > 1)
  10163. {
  10164. matrix_stride = type_struct_member_matrix_stride(struct_type, index);
  10165. row_major_matrix_needs_conversion =
  10166. combined_decoration_for_member(struct_type, index).get(DecorationRowMajor);
  10167. }
  10168. else
  10169. row_major_matrix_needs_conversion = false;
  10170. if (!type->array.empty())
  10171. array_stride = type_struct_member_array_stride(struct_type, index);
  10172. }
  10173. // Matrix -> Vector
  10174. else if (type->columns > 1)
  10175. {
  10176. auto *constant = maybe_get<SPIRConstant>(index);
  10177. if (constant)
  10178. {
  10179. index = evaluate_constant_u32(index);
  10180. offset += index * (row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride);
  10181. }
  10182. else
  10183. {
  10184. uint32_t indexing_stride = row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride;
  10185. // Dynamic array access.
  10186. if (indexing_stride % word_stride)
  10187. {
  10188. SPIRV_CROSS_THROW("Matrix stride for dynamic indexing must be divisible by the size of a "
  10189. "4-component vector. "
  10190. "Likely culprit here is a row-major matrix being accessed dynamically. "
  10191. "This cannot be flattened. Try using std140 layout instead.");
  10192. }
  10193. expr += to_enclosed_expression(index, false);
  10194. expr += " * ";
  10195. expr += convert_to_string(indexing_stride / word_stride);
  10196. expr += " + ";
  10197. }
  10198. type = &get<SPIRType>(type->parent_type);
  10199. }
  10200. // Vector -> Scalar
  10201. else if (type->vecsize > 1)
  10202. {
  10203. auto *constant = maybe_get<SPIRConstant>(index);
  10204. if (constant)
  10205. {
  10206. index = evaluate_constant_u32(index);
  10207. offset += index * (row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8));
  10208. }
  10209. else
  10210. {
  10211. uint32_t indexing_stride = row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8);
  10212. // Dynamic array access.
  10213. if (indexing_stride % word_stride)
  10214. {
  10215. SPIRV_CROSS_THROW("Stride for dynamic vector indexing must be divisible by the "
  10216. "size of a 4-component vector. "
  10217. "This cannot be flattened in legacy targets.");
  10218. }
  10219. expr += to_enclosed_expression(index, false);
  10220. expr += " * ";
  10221. expr += convert_to_string(indexing_stride / word_stride);
  10222. expr += " + ";
  10223. }
  10224. type = &get<SPIRType>(type->parent_type);
  10225. }
  10226. else
  10227. SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
  10228. }
  10229. if (need_transpose)
  10230. *need_transpose = row_major_matrix_needs_conversion;
  10231. if (out_matrix_stride)
  10232. *out_matrix_stride = matrix_stride;
  10233. if (out_array_stride)
  10234. *out_array_stride = array_stride;
  10235. return std::make_pair(expr, offset);
  10236. }
  10237. bool CompilerGLSL::should_dereference(uint32_t id)
  10238. {
  10239. const auto &type = expression_type(id);
  10240. // Non-pointer expressions don't need to be dereferenced.
  10241. if (!is_pointer(type))
  10242. return false;
  10243. // Handles shouldn't be dereferenced either.
  10244. if (!expression_is_lvalue(id))
  10245. return false;
  10246. // If id is a variable but not a phi variable, we should not dereference it.
  10247. // BDA passed around as parameters are always pointers.
  10248. if (auto *var = maybe_get<SPIRVariable>(id))
  10249. return (var->parameter && is_physical_or_buffer_pointer(type)) || var->phi_variable;
  10250. if (auto *expr = maybe_get<SPIRExpression>(id))
  10251. {
  10252. // If id is an access chain, we should not dereference it.
  10253. if (expr->access_chain)
  10254. return false;
  10255. // If id is a forwarded copy of a variable pointer, we should not dereference it.
  10256. SPIRVariable *var = nullptr;
  10257. while (expr->loaded_from && expression_is_forwarded(expr->self))
  10258. {
  10259. auto &src_type = expression_type(expr->loaded_from);
  10260. // To be a copy, the pointer and its source expression must be the
  10261. // same type. Can't check type.self, because for some reason that's
  10262. // usually the base type with pointers stripped off. This check is
  10263. // complex enough that I've hoisted it out of the while condition.
  10264. if (src_type.pointer != type.pointer || src_type.pointer_depth != type.pointer_depth ||
  10265. src_type.parent_type != type.parent_type)
  10266. break;
  10267. if ((var = maybe_get<SPIRVariable>(expr->loaded_from)))
  10268. break;
  10269. if (!(expr = maybe_get<SPIRExpression>(expr->loaded_from)))
  10270. break;
  10271. }
  10272. return !var || var->phi_variable;
  10273. }
  10274. // Otherwise, we should dereference this pointer expression.
  10275. return true;
  10276. }
  10277. bool CompilerGLSL::should_dereference_caller_param(uint32_t id)
  10278. {
  10279. const auto &type = expression_type(id);
  10280. // BDA is always passed around as pointers. Similarly, we need to pass variable buffer pointers as pointers.
  10281. if (is_physical_or_buffer_pointer(type))
  10282. return false;
  10283. return should_dereference(id);
  10284. }
  10285. bool CompilerGLSL::should_forward(uint32_t id) const
  10286. {
  10287. // If id is a variable we will try to forward it regardless of force_temporary check below
  10288. // This is important because otherwise we'll get local sampler copies (highp sampler2D foo = bar) that are invalid in OpenGL GLSL
  10289. auto *var = maybe_get<SPIRVariable>(id);
  10290. if (var)
  10291. {
  10292. // Never forward volatile builtin variables, e.g. SPIR-V 1.6 HelperInvocation.
  10293. return !(has_decoration(id, DecorationBuiltIn) && has_decoration(id, DecorationVolatile));
  10294. }
  10295. // For debugging emit temporary variables for all expressions
  10296. if (options.force_temporary)
  10297. return false;
  10298. // If an expression carries enough dependencies we need to stop forwarding at some point,
  10299. // or we explode compilers. There are usually limits to how much we can nest expressions.
  10300. auto *expr = maybe_get<SPIRExpression>(id);
  10301. const uint32_t max_expression_dependencies = 64;
  10302. if (expr && expr->expression_dependencies.size() >= max_expression_dependencies)
  10303. return false;
  10304. if (expr && expr->loaded_from
  10305. && has_decoration(expr->loaded_from, DecorationBuiltIn)
  10306. && has_decoration(expr->loaded_from, DecorationVolatile))
  10307. {
  10308. // Never forward volatile builtin variables, e.g. SPIR-V 1.6 HelperInvocation.
  10309. return false;
  10310. }
  10311. // Immutable expression can always be forwarded.
  10312. if (is_immutable(id))
  10313. return true;
  10314. return false;
  10315. }
  10316. bool CompilerGLSL::should_suppress_usage_tracking(uint32_t id) const
  10317. {
  10318. // Used only by opcodes which don't do any real "work", they just swizzle data in some fashion.
  10319. return !expression_is_forwarded(id) || expression_suppresses_usage_tracking(id);
  10320. }
  10321. void CompilerGLSL::track_expression_read(uint32_t id)
  10322. {
  10323. switch (ir.ids[id].get_type())
  10324. {
  10325. case TypeExpression:
  10326. {
  10327. auto &e = get<SPIRExpression>(id);
  10328. for (auto implied_read : e.implied_read_expressions)
  10329. track_expression_read(implied_read);
  10330. break;
  10331. }
  10332. case TypeAccessChain:
  10333. {
  10334. auto &e = get<SPIRAccessChain>(id);
  10335. for (auto implied_read : e.implied_read_expressions)
  10336. track_expression_read(implied_read);
  10337. break;
  10338. }
  10339. default:
  10340. break;
  10341. }
  10342. // If we try to read a forwarded temporary more than once we will stamp out possibly complex code twice.
  10343. // In this case, it's better to just bind the complex expression to the temporary and read that temporary twice.
  10344. if (expression_is_forwarded(id) && !expression_suppresses_usage_tracking(id))
  10345. {
  10346. auto &v = expression_usage_counts[id];
  10347. v++;
  10348. // If we create an expression outside a loop,
  10349. // but access it inside a loop, we're implicitly reading it multiple times.
  10350. // If the expression in question is expensive, we should hoist it out to avoid relying on loop-invariant code motion
  10351. // working inside the backend compiler.
  10352. if (expression_read_implies_multiple_reads(id))
  10353. v++;
  10354. if (v >= 2)
  10355. {
  10356. //if (v == 2)
  10357. // fprintf(stderr, "ID %u was forced to temporary due to more than 1 expression use!\n", id);
  10358. // Force a recompile after this pass to avoid forwarding this variable.
  10359. force_temporary_and_recompile(id);
  10360. }
  10361. }
  10362. }
  10363. bool CompilerGLSL::args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure)
  10364. {
  10365. if (forced_temporaries.find(id) != end(forced_temporaries))
  10366. return false;
  10367. for (uint32_t i = 0; i < num_args; i++)
  10368. if (!should_forward(args[i]))
  10369. return false;
  10370. // We need to forward globals as well.
  10371. if (!pure)
  10372. {
  10373. for (auto global : global_variables)
  10374. if (!should_forward(global))
  10375. return false;
  10376. for (auto aliased : aliased_variables)
  10377. if (!should_forward(aliased))
  10378. return false;
  10379. }
  10380. return true;
  10381. }
  10382. void CompilerGLSL::register_impure_function_call()
  10383. {
  10384. // Impure functions can modify globals and aliased variables, so invalidate them as well.
  10385. for (auto global : global_variables)
  10386. flush_dependees(get<SPIRVariable>(global));
  10387. for (auto aliased : aliased_variables)
  10388. flush_dependees(get<SPIRVariable>(aliased));
  10389. }
  10390. void CompilerGLSL::register_call_out_argument(uint32_t id)
  10391. {
  10392. register_write(id);
  10393. auto *var = maybe_get<SPIRVariable>(id);
  10394. if (var)
  10395. flush_variable_declaration(var->self);
  10396. }
  10397. string CompilerGLSL::variable_decl_function_local(SPIRVariable &var)
  10398. {
  10399. // These variables are always function local,
  10400. // so make sure we emit the variable without storage qualifiers.
  10401. // Some backends will inject custom variables locally in a function
  10402. // with a storage qualifier which is not function-local.
  10403. auto old_storage = var.storage;
  10404. var.storage = StorageClassFunction;
  10405. auto expr = variable_decl(var);
  10406. var.storage = old_storage;
  10407. return expr;
  10408. }
  10409. void CompilerGLSL::emit_variable_temporary_copies(const SPIRVariable &var)
  10410. {
  10411. // Ensure that we declare phi-variable copies even if the original declaration isn't deferred
  10412. if (var.allocate_temporary_copy && !flushed_phi_variables.count(var.self))
  10413. {
  10414. auto &type = get<SPIRType>(var.basetype);
  10415. auto &flags = get_decoration_bitset(var.self);
  10416. statement(flags_to_qualifiers_glsl(type, var.self, flags), variable_decl(type, join("_", var.self, "_copy")), ";");
  10417. flushed_phi_variables.insert(var.self);
  10418. }
  10419. }
  10420. void CompilerGLSL::flush_variable_declaration(uint32_t id)
  10421. {
  10422. // Ensure that we declare phi-variable copies even if the original declaration isn't deferred
  10423. auto *var = maybe_get<SPIRVariable>(id);
  10424. if (var && var->deferred_declaration)
  10425. {
  10426. string initializer;
  10427. if (options.force_zero_initialized_variables &&
  10428. (var->storage == StorageClassFunction || var->storage == StorageClassGeneric ||
  10429. var->storage == StorageClassPrivate) &&
  10430. !var->initializer && type_can_zero_initialize(get_variable_data_type(*var)))
  10431. {
  10432. initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(*var)));
  10433. }
  10434. statement(variable_decl_function_local(*var), initializer, ";");
  10435. var->deferred_declaration = false;
  10436. }
  10437. if (var)
  10438. {
  10439. emit_variable_temporary_copies(*var);
  10440. }
  10441. }
  10442. bool CompilerGLSL::remove_duplicate_swizzle(string &op)
  10443. {
  10444. auto pos = op.find_last_of('.');
  10445. if (pos == string::npos || pos == 0)
  10446. return false;
  10447. string final_swiz = op.substr(pos + 1, string::npos);
  10448. if (backend.swizzle_is_function)
  10449. {
  10450. if (final_swiz.size() < 2)
  10451. return false;
  10452. if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()")
  10453. final_swiz.erase(final_swiz.size() - 2, string::npos);
  10454. else
  10455. return false;
  10456. }
  10457. // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
  10458. // If so, and previous swizzle is of same length,
  10459. // we can drop the final swizzle altogether.
  10460. for (uint32_t i = 0; i < final_swiz.size(); i++)
  10461. {
  10462. static const char expected[] = { 'x', 'y', 'z', 'w' };
  10463. if (i >= 4 || final_swiz[i] != expected[i])
  10464. return false;
  10465. }
  10466. auto prevpos = op.find_last_of('.', pos - 1);
  10467. if (prevpos == string::npos)
  10468. return false;
  10469. prevpos++;
  10470. // Make sure there are only swizzles here ...
  10471. for (auto i = prevpos; i < pos; i++)
  10472. {
  10473. if (op[i] < 'w' || op[i] > 'z')
  10474. {
  10475. // If swizzles are foo.xyz() like in C++ backend for example, check for that.
  10476. if (backend.swizzle_is_function && i + 2 == pos && op[i] == '(' && op[i + 1] == ')')
  10477. break;
  10478. return false;
  10479. }
  10480. }
  10481. // If original swizzle is large enough, just carve out the components we need.
  10482. // E.g. foobar.wyx.xy will turn into foobar.wy.
  10483. if (pos - prevpos >= final_swiz.size())
  10484. {
  10485. op.erase(prevpos + final_swiz.size(), string::npos);
  10486. // Add back the function call ...
  10487. if (backend.swizzle_is_function)
  10488. op += "()";
  10489. }
  10490. return true;
  10491. }
  10492. // Optimizes away vector swizzles where we have something like
  10493. // vec3 foo;
  10494. // foo.xyz <-- swizzle expression does nothing.
  10495. // This is a very common pattern after OpCompositeCombine.
  10496. bool CompilerGLSL::remove_unity_swizzle(uint32_t base, string &op)
  10497. {
  10498. auto pos = op.find_last_of('.');
  10499. if (pos == string::npos || pos == 0)
  10500. return false;
  10501. string final_swiz = op.substr(pos + 1, string::npos);
  10502. if (backend.swizzle_is_function)
  10503. {
  10504. if (final_swiz.size() < 2)
  10505. return false;
  10506. if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()")
  10507. final_swiz.erase(final_swiz.size() - 2, string::npos);
  10508. else
  10509. return false;
  10510. }
  10511. // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
  10512. // If so, and previous swizzle is of same length,
  10513. // we can drop the final swizzle altogether.
  10514. for (uint32_t i = 0; i < final_swiz.size(); i++)
  10515. {
  10516. static const char expected[] = { 'x', 'y', 'z', 'w' };
  10517. if (i >= 4 || final_swiz[i] != expected[i])
  10518. return false;
  10519. }
  10520. auto &type = expression_type(base);
  10521. // Sanity checking ...
  10522. assert(type.columns == 1 && type.array.empty());
  10523. if (type.vecsize == final_swiz.size())
  10524. op.erase(pos, string::npos);
  10525. return true;
  10526. }
  10527. string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32_t *elems, uint32_t length)
  10528. {
  10529. ID base = 0;
  10530. string op;
  10531. string subop;
  10532. // Can only merge swizzles for vectors.
  10533. auto &type = get<SPIRType>(return_type);
  10534. bool can_apply_swizzle_opt = type.basetype != SPIRType::Struct && type.array.empty() && type.columns == 1 &&
  10535. type.op != OpTypeCooperativeMatrixKHR;
  10536. bool swizzle_optimization = false;
  10537. for (uint32_t i = 0; i < length; i++)
  10538. {
  10539. auto *e = maybe_get<SPIRExpression>(elems[i]);
  10540. // If we're merging another scalar which belongs to the same base
  10541. // object, just merge the swizzles to avoid triggering more than 1 expression read as much as possible!
  10542. if (can_apply_swizzle_opt && e && e->base_expression && e->base_expression == base)
  10543. {
  10544. // Only supposed to be used for vector swizzle -> scalar.
  10545. assert(!e->expression.empty() && e->expression.front() == '.');
  10546. subop += e->expression.substr(1, string::npos);
  10547. swizzle_optimization = true;
  10548. }
  10549. else
  10550. {
  10551. // We'll likely end up with duplicated swizzles, e.g.
  10552. // foobar.xyz.xyz from patterns like
  10553. // OpVectorShuffle
  10554. // OpCompositeExtract x 3
  10555. // OpCompositeConstruct 3x + other scalar.
  10556. // Just modify op in-place.
  10557. if (swizzle_optimization)
  10558. {
  10559. if (backend.swizzle_is_function)
  10560. subop += "()";
  10561. // Don't attempt to remove unity swizzling if we managed to remove duplicate swizzles.
  10562. // The base "foo" might be vec4, while foo.xyz is vec3 (OpVectorShuffle) and looks like a vec3 due to the .xyz tacked on.
  10563. // We only want to remove the swizzles if we're certain that the resulting base will be the same vecsize.
  10564. // Essentially, we can only remove one set of swizzles, since that's what we have control over ...
  10565. // Case 1:
  10566. // foo.yxz.xyz: Duplicate swizzle kicks in, giving foo.yxz, we are done.
  10567. // foo.yxz was the result of OpVectorShuffle and we don't know the type of foo.
  10568. // Case 2:
  10569. // foo.xyz: Duplicate swizzle won't kick in.
  10570. // If foo is vec3, we can remove xyz, giving just foo.
  10571. if (!remove_duplicate_swizzle(subop))
  10572. remove_unity_swizzle(base, subop);
  10573. // Strips away redundant parens if we created them during component extraction.
  10574. strip_enclosed_expression(subop);
  10575. swizzle_optimization = false;
  10576. op += subop;
  10577. }
  10578. else
  10579. op += subop;
  10580. if (i)
  10581. op += ", ";
  10582. bool uses_buffer_offset =
  10583. type.basetype == SPIRType::Struct && has_member_decoration(type.self, i, DecorationOffset);
  10584. subop = to_composite_constructor_expression(type, elems[i], uses_buffer_offset);
  10585. }
  10586. base = e ? e->base_expression : ID(0);
  10587. }
  10588. if (swizzle_optimization)
  10589. {
  10590. if (backend.swizzle_is_function)
  10591. subop += "()";
  10592. if (!remove_duplicate_swizzle(subop))
  10593. remove_unity_swizzle(base, subop);
  10594. // Strips away redundant parens if we created them during component extraction.
  10595. strip_enclosed_expression(subop);
  10596. }
  10597. op += subop;
  10598. return op;
  10599. }
  10600. bool CompilerGLSL::skip_argument(uint32_t id) const
  10601. {
  10602. if (!combined_image_samplers.empty() || !options.vulkan_semantics)
  10603. {
  10604. auto &type = expression_type(id);
  10605. if (type.basetype == SPIRType::Sampler || (type.basetype == SPIRType::Image && type.image.sampled == 1))
  10606. return true;
  10607. }
  10608. return false;
  10609. }
  10610. bool CompilerGLSL::optimize_read_modify_write(const SPIRType &type, const string &lhs, const string &rhs)
  10611. {
  10612. // Do this with strings because we have a very clear pattern we can check for and it avoids
  10613. // adding lots of special cases to the code emission.
  10614. if (rhs.size() < lhs.size() + 3)
  10615. return false;
  10616. // Do not optimize matrices. They are a bit awkward to reason about in general
  10617. // (in which order does operation happen?), and it does not work on MSL anyways.
  10618. if (type.vecsize > 1 && type.columns > 1)
  10619. return false;
  10620. auto index = rhs.find(lhs);
  10621. if (index != 0)
  10622. return false;
  10623. // TODO: Shift operators, but it's not important for now.
  10624. auto op = rhs.find_first_of("+-/*%|&^", lhs.size() + 1);
  10625. if (op != lhs.size() + 1)
  10626. return false;
  10627. // Check that the op is followed by space. This excludes && and ||.
  10628. if (rhs[op + 1] != ' ')
  10629. return false;
  10630. char bop = rhs[op];
  10631. auto expr = rhs.substr(lhs.size() + 3);
  10632. // Avoids false positives where we get a = a * b + c.
  10633. // Normally, these expressions are always enclosed, but unexpected code paths may end up hitting this.
  10634. if (needs_enclose_expression(expr))
  10635. return false;
  10636. // Try to find increments and decrements. Makes it look neater as += 1, -= 1 is fairly rare to see in real code.
  10637. // Find some common patterns which are equivalent.
  10638. if ((bop == '+' || bop == '-') && (expr == "1" || expr == "uint(1)" || expr == "1u" || expr == "int(1u)"))
  10639. statement(lhs, bop, bop, ";");
  10640. else
  10641. statement(lhs, " ", bop, "= ", expr, ";");
  10642. return true;
  10643. }
  10644. void CompilerGLSL::register_control_dependent_expression(uint32_t expr)
  10645. {
  10646. if (forwarded_temporaries.find(expr) == end(forwarded_temporaries))
  10647. return;
  10648. assert(current_emitting_block);
  10649. current_emitting_block->invalidate_expressions.push_back(expr);
  10650. }
  10651. void CompilerGLSL::emit_block_instructions(SPIRBlock &block)
  10652. {
  10653. current_emitting_block = &block;
  10654. if (backend.requires_relaxed_precision_analysis)
  10655. {
  10656. // If PHI variables are consumed in unexpected precision contexts, copy them here.
  10657. for (size_t i = 0, n = block.phi_variables.size(); i < n; i++)
  10658. {
  10659. auto &phi = block.phi_variables[i];
  10660. // Ensure we only copy once. We know a-priori that this array will lay out
  10661. // the same function variables together.
  10662. if (i && block.phi_variables[i - 1].function_variable == phi.function_variable)
  10663. continue;
  10664. auto itr = temporary_to_mirror_precision_alias.find(phi.function_variable);
  10665. if (itr != temporary_to_mirror_precision_alias.end())
  10666. {
  10667. // Explicitly, we don't want to inherit RelaxedPrecision state in this CopyObject,
  10668. // so it helps to have handle_instruction_precision() on the outside of emit_instruction().
  10669. EmbeddedInstruction inst;
  10670. inst.op = OpCopyObject;
  10671. inst.length = 3;
  10672. inst.ops.push_back(expression_type_id(itr->first));
  10673. inst.ops.push_back(itr->second);
  10674. inst.ops.push_back(itr->first);
  10675. emit_instruction(inst);
  10676. }
  10677. }
  10678. }
  10679. for (auto &op : block.ops)
  10680. {
  10681. auto temporary_copy = handle_instruction_precision(op);
  10682. emit_instruction(op);
  10683. if (temporary_copy.dst_id)
  10684. {
  10685. // Explicitly, we don't want to inherit RelaxedPrecision state in this CopyObject,
  10686. // so it helps to have handle_instruction_precision() on the outside of emit_instruction().
  10687. EmbeddedInstruction inst;
  10688. inst.op = OpCopyObject;
  10689. inst.length = 3;
  10690. inst.ops.push_back(expression_type_id(temporary_copy.src_id));
  10691. inst.ops.push_back(temporary_copy.dst_id);
  10692. inst.ops.push_back(temporary_copy.src_id);
  10693. // Never attempt to hoist mirrored temporaries.
  10694. // They are hoisted in lock-step with their parents.
  10695. block_temporary_hoisting = true;
  10696. emit_instruction(inst);
  10697. block_temporary_hoisting = false;
  10698. }
  10699. }
  10700. current_emitting_block = nullptr;
  10701. }
  10702. void CompilerGLSL::disallow_forwarding_in_expression_chain(const SPIRExpression &expr)
  10703. {
  10704. // Allow trivially forwarded expressions like OpLoad or trivial shuffles,
  10705. // these will be marked as having suppressed usage tracking.
  10706. // Our only concern is to make sure arithmetic operations are done in similar ways.
  10707. if (forced_invariant_temporaries.count(expr.self) == 0)
  10708. {
  10709. if (!expression_suppresses_usage_tracking(expr.self))
  10710. force_temporary_and_recompile(expr.self);
  10711. forced_invariant_temporaries.insert(expr.self);
  10712. for (auto &dependent : expr.invariance_dependencies)
  10713. disallow_forwarding_in_expression_chain(get<SPIRExpression>(dependent));
  10714. }
  10715. }
  10716. void CompilerGLSL::handle_store_to_invariant_variable(uint32_t store_id, uint32_t value_id)
  10717. {
  10718. // Variables or access chains marked invariant are complicated. We will need to make sure the code-gen leading up to
  10719. // this variable is consistent. The failure case for SPIRV-Cross is when an expression is forced to a temporary
  10720. // in one translation unit, but not another, e.g. due to multiple use of an expression.
  10721. // This causes variance despite the output variable being marked invariant, so the solution here is to force all dependent
  10722. // expressions to be temporaries.
  10723. // It is uncertain if this is enough to support invariant in all possible cases, but it should be good enough
  10724. // for all reasonable uses of invariant.
  10725. if (!has_decoration(store_id, DecorationInvariant))
  10726. return;
  10727. auto *expr = maybe_get<SPIRExpression>(value_id);
  10728. if (!expr)
  10729. return;
  10730. disallow_forwarding_in_expression_chain(*expr);
  10731. }
  10732. void CompilerGLSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression)
  10733. {
  10734. auto rhs = to_pointer_expression(rhs_expression);
  10735. // Statements to OpStore may be empty if it is a struct with zero members. Just forward the store to /dev/null.
  10736. if (!rhs.empty())
  10737. {
  10738. handle_store_to_invariant_variable(lhs_expression, rhs_expression);
  10739. if (!unroll_array_to_complex_store(lhs_expression, rhs_expression))
  10740. {
  10741. auto lhs = to_dereferenced_expression(lhs_expression);
  10742. if (has_decoration(lhs_expression, DecorationNonUniform))
  10743. convert_non_uniform_expression(lhs, lhs_expression);
  10744. // We might need to cast in order to store to a builtin.
  10745. cast_to_variable_store(lhs_expression, rhs, expression_type(rhs_expression));
  10746. // Tries to optimize assignments like "<lhs> = <lhs> op expr".
  10747. // While this is purely cosmetic, this is important for legacy ESSL where loop
  10748. // variable increments must be in either i++ or i += const-expr.
  10749. // Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0.
  10750. if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs))
  10751. statement(lhs, " = ", rhs, ";");
  10752. }
  10753. register_write(lhs_expression);
  10754. }
  10755. }
  10756. uint32_t CompilerGLSL::get_integer_width_for_instruction(const Instruction &instr) const
  10757. {
  10758. if (instr.length < 3)
  10759. return 32;
  10760. auto *ops = stream(instr);
  10761. switch (instr.op)
  10762. {
  10763. case OpSConvert:
  10764. case OpConvertSToF:
  10765. case OpUConvert:
  10766. case OpConvertUToF:
  10767. case OpIEqual:
  10768. case OpINotEqual:
  10769. case OpSLessThan:
  10770. case OpSLessThanEqual:
  10771. case OpSGreaterThan:
  10772. case OpSGreaterThanEqual:
  10773. case OpULessThan:
  10774. case OpULessThanEqual:
  10775. case OpUGreaterThan:
  10776. case OpUGreaterThanEqual:
  10777. return expression_type(ops[2]).width;
  10778. case OpSMulExtended:
  10779. case OpUMulExtended:
  10780. return get<SPIRType>(get<SPIRType>(ops[0]).member_types[0]).width;
  10781. default:
  10782. {
  10783. // We can look at result type which is more robust.
  10784. auto *type = maybe_get<SPIRType>(ops[0]);
  10785. if (type && type_is_integral(*type))
  10786. return type->width;
  10787. else
  10788. return 32;
  10789. }
  10790. }
  10791. }
  10792. uint32_t CompilerGLSL::get_integer_width_for_glsl_instruction(GLSLstd450 op, const uint32_t *ops, uint32_t length) const
  10793. {
  10794. if (length < 1)
  10795. return 32;
  10796. switch (op)
  10797. {
  10798. case GLSLstd450SAbs:
  10799. case GLSLstd450SSign:
  10800. case GLSLstd450UMin:
  10801. case GLSLstd450SMin:
  10802. case GLSLstd450UMax:
  10803. case GLSLstd450SMax:
  10804. case GLSLstd450UClamp:
  10805. case GLSLstd450SClamp:
  10806. case GLSLstd450FindSMsb:
  10807. case GLSLstd450FindUMsb:
  10808. return expression_type(ops[0]).width;
  10809. default:
  10810. {
  10811. // We don't need to care about other opcodes, just return 32.
  10812. return 32;
  10813. }
  10814. }
  10815. }
  10816. void CompilerGLSL::forward_relaxed_precision(uint32_t dst_id, const uint32_t *args, uint32_t length)
  10817. {
  10818. // Only GLSL supports RelaxedPrecision directly.
  10819. // We cannot implement this in HLSL or MSL because it is tied to the type system.
  10820. // In SPIR-V, everything must masquerade as 32-bit.
  10821. if (!backend.requires_relaxed_precision_analysis)
  10822. return;
  10823. auto input_precision = analyze_expression_precision(args, length);
  10824. // For expressions which are loaded or directly forwarded, we inherit mediump implicitly.
  10825. // For dst_id to be analyzed properly, it must inherit any relaxed precision decoration from src_id.
  10826. if (input_precision == Options::Mediump)
  10827. set_decoration(dst_id, DecorationRelaxedPrecision);
  10828. }
  10829. CompilerGLSL::Options::Precision CompilerGLSL::analyze_expression_precision(const uint32_t *args, uint32_t length) const
  10830. {
  10831. // Now, analyze the precision at which the arguments would run.
  10832. // GLSL rules are such that the precision used to evaluate an expression is equal to the highest precision
  10833. // for the inputs. Constants do not have inherent precision and do not contribute to this decision.
  10834. // If all inputs are constants, they inherit precision from outer expressions, including an l-value.
  10835. // In this case, we'll have to force a temporary for dst_id so that we can bind the constant expression with
  10836. // correct precision.
  10837. bool expression_has_highp = false;
  10838. bool expression_has_mediump = false;
  10839. for (uint32_t i = 0; i < length; i++)
  10840. {
  10841. uint32_t arg = args[i];
  10842. auto handle_type = ir.ids[arg].get_type();
  10843. if (handle_type == TypeConstant || handle_type == TypeConstantOp || handle_type == TypeUndef)
  10844. continue;
  10845. if (has_decoration(arg, DecorationRelaxedPrecision))
  10846. expression_has_mediump = true;
  10847. else
  10848. expression_has_highp = true;
  10849. }
  10850. if (expression_has_highp)
  10851. return Options::Highp;
  10852. else if (expression_has_mediump)
  10853. return Options::Mediump;
  10854. else
  10855. return Options::DontCare;
  10856. }
  10857. void CompilerGLSL::analyze_precision_requirements(uint32_t type_id, uint32_t dst_id, uint32_t *args, uint32_t length)
  10858. {
  10859. if (!backend.requires_relaxed_precision_analysis)
  10860. return;
  10861. auto &type = get<SPIRType>(type_id);
  10862. // RelaxedPrecision only applies to 32-bit values.
  10863. if (type.basetype != SPIRType::Float && type.basetype != SPIRType::Int && type.basetype != SPIRType::UInt)
  10864. return;
  10865. bool operation_is_highp = !has_decoration(dst_id, DecorationRelaxedPrecision);
  10866. auto input_precision = analyze_expression_precision(args, length);
  10867. if (input_precision == Options::DontCare)
  10868. {
  10869. consume_temporary_in_precision_context(type_id, dst_id, input_precision);
  10870. return;
  10871. }
  10872. // In SPIR-V and GLSL, the semantics are flipped for how relaxed precision is determined.
  10873. // In SPIR-V, the operation itself marks RelaxedPrecision, meaning that inputs can be truncated to 16-bit.
  10874. // However, if the expression is not, inputs must be expanded to 32-bit first,
  10875. // since the operation must run at high precision.
  10876. // This is the awkward part, because if we have mediump inputs, or expressions which derived from mediump,
  10877. // we might have to forcefully bind the source IDs to highp temporaries. This is done by clearing decorations
  10878. // and forcing temporaries. Similarly for mediump operations. We bind highp expressions to mediump variables.
  10879. if ((operation_is_highp && input_precision == Options::Mediump) ||
  10880. (!operation_is_highp && input_precision == Options::Highp))
  10881. {
  10882. auto precision = operation_is_highp ? Options::Highp : Options::Mediump;
  10883. for (uint32_t i = 0; i < length; i++)
  10884. {
  10885. // Rewrites the opcode so that we consume an ID in correct precision context.
  10886. // This is pretty hacky, but it's the most straight forward way of implementing this without adding
  10887. // lots of extra passes to rewrite all code blocks.
  10888. args[i] = consume_temporary_in_precision_context(expression_type_id(args[i]), args[i], precision);
  10889. }
  10890. }
  10891. }
  10892. // This is probably not exhaustive ...
  10893. static bool opcode_is_precision_sensitive_operation(Op op)
  10894. {
  10895. switch (op)
  10896. {
  10897. case OpFAdd:
  10898. case OpFSub:
  10899. case OpFMul:
  10900. case OpFNegate:
  10901. case OpIAdd:
  10902. case OpISub:
  10903. case OpIMul:
  10904. case OpSNegate:
  10905. case OpFMod:
  10906. case OpFDiv:
  10907. case OpFRem:
  10908. case OpSMod:
  10909. case OpSDiv:
  10910. case OpSRem:
  10911. case OpUMod:
  10912. case OpUDiv:
  10913. case OpVectorTimesMatrix:
  10914. case OpMatrixTimesVector:
  10915. case OpMatrixTimesMatrix:
  10916. case OpDPdx:
  10917. case OpDPdy:
  10918. case OpDPdxCoarse:
  10919. case OpDPdyCoarse:
  10920. case OpDPdxFine:
  10921. case OpDPdyFine:
  10922. case OpFwidth:
  10923. case OpFwidthCoarse:
  10924. case OpFwidthFine:
  10925. case OpVectorTimesScalar:
  10926. case OpMatrixTimesScalar:
  10927. case OpOuterProduct:
  10928. case OpFConvert:
  10929. case OpSConvert:
  10930. case OpUConvert:
  10931. case OpConvertSToF:
  10932. case OpConvertUToF:
  10933. case OpConvertFToU:
  10934. case OpConvertFToS:
  10935. return true;
  10936. default:
  10937. return false;
  10938. }
  10939. }
  10940. // Instructions which just load data but don't do any arithmetic operation should just inherit the decoration.
  10941. // SPIR-V doesn't require this, but it's somewhat implied it has to work this way, relaxed precision is only
  10942. // relevant when operating on the IDs, not when shuffling things around.
  10943. static bool opcode_is_precision_forwarding_instruction(Op op, uint32_t &arg_count)
  10944. {
  10945. switch (op)
  10946. {
  10947. case OpLoad:
  10948. case OpAccessChain:
  10949. case OpInBoundsAccessChain:
  10950. case OpCompositeExtract:
  10951. case OpVectorExtractDynamic:
  10952. case OpSampledImage:
  10953. case OpImage:
  10954. case OpCopyObject:
  10955. case OpImageRead:
  10956. case OpImageFetch:
  10957. case OpImageSampleImplicitLod:
  10958. case OpImageSampleProjImplicitLod:
  10959. case OpImageSampleDrefImplicitLod:
  10960. case OpImageSampleProjDrefImplicitLod:
  10961. case OpImageSampleExplicitLod:
  10962. case OpImageSampleProjExplicitLod:
  10963. case OpImageSampleDrefExplicitLod:
  10964. case OpImageSampleProjDrefExplicitLod:
  10965. case OpImageGather:
  10966. case OpImageDrefGather:
  10967. case OpImageSparseRead:
  10968. case OpImageSparseFetch:
  10969. case OpImageSparseSampleImplicitLod:
  10970. case OpImageSparseSampleProjImplicitLod:
  10971. case OpImageSparseSampleDrefImplicitLod:
  10972. case OpImageSparseSampleProjDrefImplicitLod:
  10973. case OpImageSparseSampleExplicitLod:
  10974. case OpImageSparseSampleProjExplicitLod:
  10975. case OpImageSparseSampleDrefExplicitLod:
  10976. case OpImageSparseSampleProjDrefExplicitLod:
  10977. case OpImageSparseGather:
  10978. case OpImageSparseDrefGather:
  10979. arg_count = 1;
  10980. return true;
  10981. case OpVectorShuffle:
  10982. arg_count = 2;
  10983. return true;
  10984. case OpCompositeConstruct:
  10985. return true;
  10986. default:
  10987. break;
  10988. }
  10989. return false;
  10990. }
  10991. CompilerGLSL::TemporaryCopy CompilerGLSL::handle_instruction_precision(const Instruction &instruction)
  10992. {
  10993. auto ops = stream_mutable(instruction);
  10994. auto opcode = static_cast<Op>(instruction.op);
  10995. uint32_t length = instruction.length;
  10996. if (backend.requires_relaxed_precision_analysis)
  10997. {
  10998. if (length > 2)
  10999. {
  11000. uint32_t forwarding_length = length - 2;
  11001. if (opcode_is_precision_sensitive_operation(opcode))
  11002. analyze_precision_requirements(ops[0], ops[1], &ops[2], forwarding_length);
  11003. else if (opcode == OpExtInst && length >= 5 && get<SPIRExtension>(ops[2]).ext == SPIRExtension::GLSL)
  11004. analyze_precision_requirements(ops[0], ops[1], &ops[4], forwarding_length - 2);
  11005. else if (opcode_is_precision_forwarding_instruction(opcode, forwarding_length))
  11006. forward_relaxed_precision(ops[1], &ops[2], forwarding_length);
  11007. }
  11008. uint32_t result_type = 0, result_id = 0;
  11009. if (instruction_to_result_type(result_type, result_id, opcode, ops, length))
  11010. {
  11011. auto itr = temporary_to_mirror_precision_alias.find(ops[1]);
  11012. if (itr != temporary_to_mirror_precision_alias.end())
  11013. return { itr->second, itr->first };
  11014. }
  11015. }
  11016. return {};
  11017. }
  11018. static pair<string, string> split_coopmat_pointer(const string &expr)
  11019. {
  11020. auto ptr_expr = expr;
  11021. string index_expr;
  11022. if (ptr_expr.back() != ']')
  11023. SPIRV_CROSS_THROW("Access chain for coopmat must be indexed into an array.");
  11024. // Strip the access chain.
  11025. ptr_expr.pop_back();
  11026. uint32_t counter = 1;
  11027. while (counter && !ptr_expr.empty())
  11028. {
  11029. if (ptr_expr.back() == ']')
  11030. counter++;
  11031. else if (ptr_expr.back() == '[')
  11032. counter--;
  11033. ptr_expr.pop_back();
  11034. }
  11035. if (ptr_expr.empty())
  11036. SPIRV_CROSS_THROW("Invalid pointer expression for coopmat.");
  11037. index_expr = expr.substr(ptr_expr.size() + 1, expr.size() - (ptr_expr.size() + 1) - 1);
  11038. return { std::move(ptr_expr), std::move(index_expr) };
  11039. }
  11040. void CompilerGLSL::emit_instruction(const Instruction &instruction)
  11041. {
  11042. auto ops = stream(instruction);
  11043. auto opcode = static_cast<Op>(instruction.op);
  11044. uint32_t length = instruction.length;
  11045. #define GLSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op)
  11046. #define GLSL_BOP_CAST(op, type) \
  11047. emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, \
  11048. opcode_is_sign_invariant(opcode), implicit_integer_promotion)
  11049. #define GLSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op)
  11050. #define GLSL_UOP_CAST(op) emit_unary_op_cast(ops[0], ops[1], ops[2], #op)
  11051. #define GLSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op)
  11052. #define GLSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op)
  11053. #define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
  11054. #define GLSL_BFOP_CAST(op, type) \
  11055. emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
  11056. #define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
  11057. #define GLSL_UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op)
  11058. // If we need to do implicit bitcasts, make sure we do it with the correct type.
  11059. uint32_t integer_width = get_integer_width_for_instruction(instruction);
  11060. auto int_type = to_signed_basetype(integer_width);
  11061. auto uint_type = to_unsigned_basetype(integer_width);
  11062. // Handle C implicit integer promotion rules.
  11063. // If we get implicit promotion to int, need to make sure we cast by value to intended return type,
  11064. // otherwise, future sign-dependent operations and bitcasts will break.
  11065. bool implicit_integer_promotion = integer_width < 32 && backend.implicit_c_integer_promotion_rules &&
  11066. opcode_can_promote_integer_implicitly(opcode) &&
  11067. get<SPIRType>(ops[0]).vecsize == 1;
  11068. opcode = get_remapped_spirv_op(opcode);
  11069. switch (opcode)
  11070. {
  11071. // Dealing with memory
  11072. case OpLoad:
  11073. {
  11074. uint32_t result_type = ops[0];
  11075. uint32_t id = ops[1];
  11076. uint32_t ptr = ops[2];
  11077. flush_variable_declaration(ptr);
  11078. // If we're loading from memory that cannot be changed by the shader,
  11079. // just forward the expression directly to avoid needless temporaries.
  11080. // If an expression is mutable and forwardable, we speculate that it is immutable.
  11081. bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries);
  11082. // If loading a non-native row-major matrix, mark the expression as need_transpose.
  11083. bool need_transpose = false;
  11084. bool old_need_transpose = false;
  11085. auto *ptr_expression = maybe_get<SPIRExpression>(ptr);
  11086. if (forward)
  11087. {
  11088. // If we're forwarding the load, we're also going to forward transpose state, so don't transpose while
  11089. // taking the expression.
  11090. if (ptr_expression && ptr_expression->need_transpose)
  11091. {
  11092. old_need_transpose = true;
  11093. ptr_expression->need_transpose = false;
  11094. need_transpose = true;
  11095. }
  11096. else if (is_non_native_row_major_matrix(ptr))
  11097. need_transpose = true;
  11098. }
  11099. // If we are forwarding this load,
  11100. // don't register the read to access chain here, defer that to when we actually use the expression,
  11101. // using the add_implied_read_expression mechanism.
  11102. string expr;
  11103. bool is_packed = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked);
  11104. bool is_remapped = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID);
  11105. if (forward || (!is_packed && !is_remapped))
  11106. {
  11107. // For the simple case, we do not need to deal with repacking.
  11108. expr = to_dereferenced_expression(ptr, false);
  11109. }
  11110. else
  11111. {
  11112. // If we are not forwarding the expression, we need to unpack and resolve any physical type remapping here before
  11113. // storing the expression to a temporary.
  11114. expr = to_unpacked_expression(ptr);
  11115. }
  11116. auto &type = get<SPIRType>(result_type);
  11117. auto &expr_type = expression_type(ptr);
  11118. // If the expression has more vector components than the result type, insert
  11119. // a swizzle. This shouldn't happen normally on valid SPIR-V, but it might
  11120. // happen with e.g. the MSL backend replacing the type of an input variable.
  11121. if (expr_type.vecsize > type.vecsize)
  11122. expr = enclose_expression(expr + vector_swizzle(type.vecsize, 0));
  11123. if (forward && ptr_expression)
  11124. ptr_expression->need_transpose = old_need_transpose;
  11125. // We might need to cast in order to load from a builtin.
  11126. cast_from_variable_load(ptr, expr, type);
  11127. if (forward && ptr_expression)
  11128. ptr_expression->need_transpose = false;
  11129. // We might be trying to load a gl_Position[N], where we should be
  11130. // doing float4[](gl_in[i].gl_Position, ...) instead.
  11131. // Similar workarounds are required for input arrays in tessellation.
  11132. // Also, loading from gl_SampleMask array needs special unroll.
  11133. unroll_array_from_complex_load(id, ptr, expr);
  11134. if (!type_is_opaque_value(type) && has_decoration(ptr, DecorationNonUniform))
  11135. {
  11136. // If we're loading something non-opaque, we need to handle non-uniform descriptor access.
  11137. convert_non_uniform_expression(expr, ptr);
  11138. }
  11139. if (forward && ptr_expression)
  11140. ptr_expression->need_transpose = old_need_transpose;
  11141. bool flattened = ptr_expression && flattened_buffer_blocks.count(ptr_expression->loaded_from) != 0;
  11142. if (backend.needs_row_major_load_workaround && !is_non_native_row_major_matrix(ptr) && !flattened)
  11143. rewrite_load_for_wrapped_row_major(expr, result_type, ptr);
  11144. // By default, suppress usage tracking since using same expression multiple times does not imply any extra work.
  11145. // However, if we try to load a complex, composite object from a flattened buffer,
  11146. // we should avoid emitting the same code over and over and lower the result to a temporary.
  11147. bool usage_tracking = flattened && (type.basetype == SPIRType::Struct || (type.columns > 1));
  11148. SPIRExpression *e = nullptr;
  11149. if (!forward && expression_is_non_value_type_array(ptr))
  11150. {
  11151. // Complicated load case where we need to make a copy of ptr, but we cannot, because
  11152. // it is an array, and our backend does not support arrays as value types.
  11153. // Emit the temporary, and copy it explicitly.
  11154. e = &emit_uninitialized_temporary_expression(result_type, id);
  11155. emit_array_copy(nullptr, id, ptr, StorageClassFunction, get_expression_effective_storage_class(ptr));
  11156. }
  11157. else
  11158. e = &emit_op(result_type, id, expr, forward, !usage_tracking);
  11159. e->need_transpose = need_transpose;
  11160. register_read(id, ptr, forward);
  11161. if (forward)
  11162. {
  11163. // Pass through whether the result is of a packed type and the physical type ID.
  11164. if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked))
  11165. set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
  11166. if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID))
  11167. {
  11168. set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID,
  11169. get_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID));
  11170. }
  11171. }
  11172. else
  11173. {
  11174. // This might have been set on an earlier compilation iteration, force it to be unset.
  11175. unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
  11176. unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
  11177. }
  11178. inherit_expression_dependencies(id, ptr);
  11179. if (forward)
  11180. add_implied_read_expression(*e, ptr);
  11181. break;
  11182. }
  11183. case OpInBoundsAccessChain:
  11184. case OpAccessChain:
  11185. case OpPtrAccessChain:
  11186. {
  11187. auto *var = maybe_get<SPIRVariable>(ops[2]);
  11188. if (var)
  11189. flush_variable_declaration(var->self);
  11190. // If the base is immutable, the access chain pointer must also be.
  11191. // If an expression is mutable and forwardable, we speculate that it is immutable.
  11192. AccessChainMeta meta;
  11193. bool ptr_chain = opcode == OpPtrAccessChain;
  11194. auto &target_type = get<SPIRType>(ops[0]);
  11195. auto e = access_chain(ops[2], &ops[3], length - 3, target_type, &meta, ptr_chain);
  11196. // If the base is flattened UBO of struct type, the expression has to be a composite.
  11197. // In that case, backends which do not support inline syntax need it to be bound to a temporary.
  11198. // Otherwise, invalid expressions like ({UBO[0].xyz, UBO[0].w, UBO[1]}).member are emitted.
  11199. bool requires_temporary = false;
  11200. if (flattened_buffer_blocks.count(ops[2]) && target_type.basetype == SPIRType::Struct)
  11201. requires_temporary = !backend.can_declare_struct_inline;
  11202. auto &expr = requires_temporary ?
  11203. emit_op(ops[0], ops[1], std::move(e), false) :
  11204. set<SPIRExpression>(ops[1], std::move(e), ops[0], should_forward(ops[2]));
  11205. auto *backing_variable = maybe_get_backing_variable(ops[2]);
  11206. expr.loaded_from = backing_variable ? backing_variable->self : ID(ops[2]);
  11207. expr.need_transpose = meta.need_transpose;
  11208. expr.access_chain = true;
  11209. expr.access_meshlet_position_y = meta.access_meshlet_position_y;
  11210. // Mark the result as being packed. Some platforms handled packed vectors differently than non-packed.
  11211. if (meta.storage_is_packed)
  11212. set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypePacked);
  11213. if (meta.storage_physical_type != 0)
  11214. set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type);
  11215. if (meta.storage_is_invariant)
  11216. set_decoration(ops[1], DecorationInvariant);
  11217. if (meta.flattened_struct)
  11218. flattened_structs[ops[1]] = true;
  11219. if (meta.relaxed_precision && backend.requires_relaxed_precision_analysis)
  11220. set_decoration(ops[1], DecorationRelaxedPrecision);
  11221. if (meta.chain_is_builtin)
  11222. set_decoration(ops[1], DecorationBuiltIn, meta.builtin);
  11223. // If we have some expression dependencies in our access chain, this access chain is technically a forwarded
  11224. // temporary which could be subject to invalidation.
  11225. // Need to assume we're forwarded while calling inherit_expression_depdendencies.
  11226. forwarded_temporaries.insert(ops[1]);
  11227. // The access chain itself is never forced to a temporary, but its dependencies might.
  11228. suppressed_usage_tracking.insert(ops[1]);
  11229. for (uint32_t i = 2; i < length; i++)
  11230. {
  11231. inherit_expression_dependencies(ops[1], ops[i]);
  11232. add_implied_read_expression(expr, ops[i]);
  11233. }
  11234. // If we have no dependencies after all, i.e., all indices in the access chain are immutable temporaries,
  11235. // we're not forwarded after all.
  11236. if (expr.expression_dependencies.empty())
  11237. forwarded_temporaries.erase(ops[1]);
  11238. break;
  11239. }
  11240. case OpStore:
  11241. {
  11242. auto *var = maybe_get<SPIRVariable>(ops[0]);
  11243. if (var && var->statically_assigned)
  11244. var->static_expression = ops[1];
  11245. else if (var && var->loop_variable && !var->loop_variable_enable)
  11246. var->static_expression = ops[1];
  11247. else if (var && var->remapped_variable && var->static_expression)
  11248. {
  11249. // Skip the write.
  11250. }
  11251. else if (flattened_structs.count(ops[0]))
  11252. {
  11253. store_flattened_struct(ops[0], ops[1]);
  11254. register_write(ops[0]);
  11255. }
  11256. else
  11257. {
  11258. emit_store_statement(ops[0], ops[1]);
  11259. }
  11260. // Storing a pointer results in a variable pointer, so we must conservatively assume
  11261. // we can write through it.
  11262. if (expression_type(ops[1]).pointer)
  11263. register_write(ops[1]);
  11264. break;
  11265. }
  11266. case OpArrayLength:
  11267. {
  11268. uint32_t result_type = ops[0];
  11269. uint32_t id = ops[1];
  11270. auto e = access_chain_internal(ops[2], &ops[3], length - 3, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
  11271. if (has_decoration(ops[2], DecorationNonUniform))
  11272. convert_non_uniform_expression(e, ops[2]);
  11273. set<SPIRExpression>(id, join(type_to_glsl(get<SPIRType>(result_type)), "(", e, ".length())"), result_type,
  11274. true);
  11275. break;
  11276. }
  11277. // Function calls
  11278. case OpFunctionCall:
  11279. {
  11280. uint32_t result_type = ops[0];
  11281. uint32_t id = ops[1];
  11282. uint32_t func = ops[2];
  11283. const auto *arg = &ops[3];
  11284. length -= 3;
  11285. auto &callee = get<SPIRFunction>(func);
  11286. auto &return_type = get<SPIRType>(callee.return_type);
  11287. bool pure = function_is_pure(callee);
  11288. bool control_dependent = function_is_control_dependent(callee);
  11289. bool callee_has_out_variables = false;
  11290. bool emit_return_value_as_argument = false;
  11291. // Invalidate out variables passed to functions since they can be OpStore'd to.
  11292. for (uint32_t i = 0; i < length; i++)
  11293. {
  11294. if (callee.arguments[i].write_count)
  11295. {
  11296. register_call_out_argument(arg[i]);
  11297. callee_has_out_variables = true;
  11298. }
  11299. flush_variable_declaration(arg[i]);
  11300. }
  11301. if (!return_type.array.empty() && !backend.can_return_array)
  11302. {
  11303. callee_has_out_variables = true;
  11304. emit_return_value_as_argument = true;
  11305. }
  11306. if (!pure)
  11307. register_impure_function_call();
  11308. string funexpr;
  11309. SmallVector<string> arglist;
  11310. funexpr += to_name(func) + "(";
  11311. if (emit_return_value_as_argument)
  11312. {
  11313. statement(type_to_glsl(return_type), " ", to_name(id), type_to_array_glsl(return_type, 0), ";");
  11314. arglist.push_back(to_name(id));
  11315. }
  11316. for (uint32_t i = 0; i < length; i++)
  11317. {
  11318. // Do not pass in separate images or samplers if we're remapping
  11319. // to combined image samplers.
  11320. if (skip_argument(arg[i]))
  11321. continue;
  11322. arglist.push_back(to_func_call_arg(callee.arguments[i], arg[i]));
  11323. }
  11324. for (auto &combined : callee.combined_parameters)
  11325. {
  11326. auto image_id = combined.global_image ? combined.image_id : VariableID(arg[combined.image_id]);
  11327. auto sampler_id = combined.global_sampler ? combined.sampler_id : VariableID(arg[combined.sampler_id]);
  11328. arglist.push_back(to_combined_image_sampler(image_id, sampler_id));
  11329. }
  11330. append_global_func_args(callee, length, arglist);
  11331. funexpr += merge(arglist);
  11332. funexpr += ")";
  11333. // Check for function call constraints.
  11334. check_function_call_constraints(arg, length);
  11335. if (return_type.basetype != SPIRType::Void)
  11336. {
  11337. // If the function actually writes to an out variable,
  11338. // take the conservative route and do not forward.
  11339. // The problem is that we might not read the function
  11340. // result (and emit the function) before an out variable
  11341. // is read (common case when return value is ignored!
  11342. // In order to avoid start tracking invalid variables,
  11343. // just avoid the forwarding problem altogether.
  11344. bool forward = args_will_forward(id, arg, length, pure) && !callee_has_out_variables && pure &&
  11345. (forced_temporaries.find(id) == end(forced_temporaries));
  11346. if (emit_return_value_as_argument)
  11347. {
  11348. statement(funexpr, ";");
  11349. set<SPIRExpression>(id, to_name(id), result_type, true);
  11350. }
  11351. else
  11352. emit_op(result_type, id, funexpr, forward);
  11353. // Function calls are implicit loads from all variables in question.
  11354. // Set dependencies for them.
  11355. for (uint32_t i = 0; i < length; i++)
  11356. register_read(id, arg[i], forward);
  11357. // If we're going to forward the temporary result,
  11358. // put dependencies on every variable that must not change.
  11359. if (forward)
  11360. register_global_read_dependencies(callee, id);
  11361. }
  11362. else
  11363. statement(funexpr, ";");
  11364. if (control_dependent)
  11365. register_control_dependent_expression(id);
  11366. break;
  11367. }
  11368. // Composite munging
  11369. case OpCompositeConstruct:
  11370. {
  11371. uint32_t result_type = ops[0];
  11372. uint32_t id = ops[1];
  11373. const auto *const elems = &ops[2];
  11374. length -= 2;
  11375. bool forward = true;
  11376. for (uint32_t i = 0; i < length; i++)
  11377. forward = forward && should_forward(elems[i]);
  11378. auto &out_type = get<SPIRType>(result_type);
  11379. auto *in_type = length > 0 ? &expression_type(elems[0]) : nullptr;
  11380. // Only splat if we have vector constructors.
  11381. // Arrays and structs must be initialized properly in full.
  11382. bool composite = !out_type.array.empty() || out_type.basetype == SPIRType::Struct;
  11383. bool splat = false;
  11384. bool swizzle_splat = false;
  11385. if (in_type)
  11386. {
  11387. splat = in_type->vecsize == 1 && in_type->columns == 1 && !composite && backend.use_constructor_splatting;
  11388. swizzle_splat = in_type->vecsize == 1 && in_type->columns == 1 && backend.can_swizzle_scalar;
  11389. if (ir.ids[elems[0]].get_type() == TypeConstant && !type_is_floating_point(*in_type))
  11390. {
  11391. // Cannot swizzle literal integers as a special case.
  11392. swizzle_splat = false;
  11393. }
  11394. }
  11395. if (splat || swizzle_splat)
  11396. {
  11397. uint32_t input = elems[0];
  11398. for (uint32_t i = 0; i < length; i++)
  11399. {
  11400. if (input != elems[i])
  11401. {
  11402. splat = false;
  11403. swizzle_splat = false;
  11404. }
  11405. }
  11406. }
  11407. if (out_type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline)
  11408. forward = false;
  11409. if (!out_type.array.empty() && !backend.can_declare_arrays_inline)
  11410. forward = false;
  11411. if (type_is_empty(out_type) && !backend.supports_empty_struct)
  11412. forward = false;
  11413. string constructor_op;
  11414. if (backend.use_initializer_list && composite)
  11415. {
  11416. bool needs_trailing_tracket = false;
  11417. // Only use this path if we are building composites.
  11418. // This path cannot be used for arithmetic.
  11419. if (backend.use_typed_initializer_list && out_type.basetype == SPIRType::Struct && out_type.array.empty())
  11420. constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type));
  11421. else if (backend.use_typed_initializer_list && backend.array_is_value_type && !out_type.array.empty())
  11422. {
  11423. // MSL path. Array constructor is baked into type here, do not use _constructor variant.
  11424. constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type)) + "(";
  11425. needs_trailing_tracket = true;
  11426. }
  11427. constructor_op += "{ ";
  11428. if (type_is_empty(out_type) && !backend.supports_empty_struct)
  11429. constructor_op += "0";
  11430. else if (splat)
  11431. constructor_op += to_unpacked_expression(elems[0]);
  11432. else
  11433. constructor_op += build_composite_combiner(result_type, elems, length);
  11434. constructor_op += " }";
  11435. if (needs_trailing_tracket)
  11436. constructor_op += ")";
  11437. }
  11438. else if (swizzle_splat && !composite)
  11439. {
  11440. constructor_op = remap_swizzle(get<SPIRType>(result_type), 1, to_unpacked_expression(elems[0]));
  11441. }
  11442. else
  11443. {
  11444. constructor_op = type_to_glsl_constructor(get<SPIRType>(result_type)) + "(";
  11445. if (type_is_empty(out_type) && !backend.supports_empty_struct)
  11446. constructor_op += "0";
  11447. else if (splat)
  11448. constructor_op += to_unpacked_expression(elems[0]);
  11449. else
  11450. constructor_op += build_composite_combiner(result_type, elems, length);
  11451. constructor_op += ")";
  11452. }
  11453. if (!constructor_op.empty())
  11454. {
  11455. emit_op(result_type, id, constructor_op, forward);
  11456. for (uint32_t i = 0; i < length; i++)
  11457. inherit_expression_dependencies(id, elems[i]);
  11458. }
  11459. break;
  11460. }
  11461. case OpVectorInsertDynamic:
  11462. {
  11463. uint32_t result_type = ops[0];
  11464. uint32_t id = ops[1];
  11465. uint32_t vec = ops[2];
  11466. uint32_t comp = ops[3];
  11467. uint32_t index = ops[4];
  11468. flush_variable_declaration(vec);
  11469. // Make a copy, then use access chain to store the variable.
  11470. statement(declare_temporary(result_type, id), to_expression(vec), ";");
  11471. set<SPIRExpression>(id, to_name(id), result_type, true);
  11472. auto chain = access_chain_internal(id, &index, 1, 0, nullptr);
  11473. statement(chain, " = ", to_unpacked_expression(comp), ";");
  11474. break;
  11475. }
  11476. case OpVectorExtractDynamic:
  11477. {
  11478. uint32_t result_type = ops[0];
  11479. uint32_t id = ops[1];
  11480. auto expr = access_chain_internal(ops[2], &ops[3], 1, 0, nullptr);
  11481. emit_op(result_type, id, expr, should_forward(ops[2]));
  11482. inherit_expression_dependencies(id, ops[2]);
  11483. inherit_expression_dependencies(id, ops[3]);
  11484. break;
  11485. }
  11486. case OpCompositeExtract:
  11487. {
  11488. uint32_t result_type = ops[0];
  11489. uint32_t id = ops[1];
  11490. length -= 3;
  11491. auto &type = get<SPIRType>(result_type);
  11492. // We can only split the expression here if our expression is forwarded as a temporary.
  11493. bool allow_base_expression = forced_temporaries.find(id) == end(forced_temporaries);
  11494. // Do not allow base expression for struct members. We risk doing "swizzle" optimizations in this case.
  11495. auto &composite_type = expression_type(ops[2]);
  11496. bool composite_type_is_complex = composite_type.basetype == SPIRType::Struct || !composite_type.array.empty();
  11497. if (composite_type_is_complex)
  11498. allow_base_expression = false;
  11499. if (composite_type.op == OpTypeCooperativeMatrixKHR)
  11500. allow_base_expression = false;
  11501. // Packed expressions or physical ID mapped expressions cannot be split up.
  11502. if (has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypePacked) ||
  11503. has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypeID))
  11504. allow_base_expression = false;
  11505. // Cannot use base expression for row-major matrix row-extraction since we need to interleave access pattern
  11506. // into the base expression.
  11507. if (is_non_native_row_major_matrix(ops[2]))
  11508. allow_base_expression = false;
  11509. AccessChainMeta meta;
  11510. SPIRExpression *e = nullptr;
  11511. auto *c = maybe_get<SPIRConstant>(ops[2]);
  11512. if (c && !c->specialization && !composite_type_is_complex)
  11513. {
  11514. auto expr = to_extract_constant_composite_expression(result_type, *c, ops + 3, length);
  11515. e = &emit_op(result_type, id, expr, true, true);
  11516. }
  11517. else if (allow_base_expression && should_forward(ops[2]) && type.vecsize == 1 && type.columns == 1 && length == 1)
  11518. {
  11519. // Only apply this optimization if result is scalar.
  11520. // We want to split the access chain from the base.
  11521. // This is so we can later combine different CompositeExtract results
  11522. // with CompositeConstruct without emitting code like
  11523. //
  11524. // vec3 temp = texture(...).xyz
  11525. // vec4(temp.x, temp.y, temp.z, 1.0).
  11526. //
  11527. // when we actually wanted to emit this
  11528. // vec4(texture(...).xyz, 1.0).
  11529. //
  11530. // Including the base will prevent this and would trigger multiple reads
  11531. // from expression causing it to be forced to an actual temporary in GLSL.
  11532. auto expr = access_chain_internal(ops[2], &ops[3], length,
  11533. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_CHAIN_ONLY_BIT |
  11534. ACCESS_CHAIN_FORCE_COMPOSITE_BIT, &meta);
  11535. e = &emit_op(result_type, id, expr, true, should_suppress_usage_tracking(ops[2]));
  11536. inherit_expression_dependencies(id, ops[2]);
  11537. e->base_expression = ops[2];
  11538. if (meta.relaxed_precision && backend.requires_relaxed_precision_analysis)
  11539. set_decoration(ops[1], DecorationRelaxedPrecision);
  11540. }
  11541. else
  11542. {
  11543. auto expr = access_chain_internal(ops[2], &ops[3], length,
  11544. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_FORCE_COMPOSITE_BIT, &meta);
  11545. e = &emit_op(result_type, id, expr, should_forward(ops[2]), should_suppress_usage_tracking(ops[2]));
  11546. inherit_expression_dependencies(id, ops[2]);
  11547. }
  11548. // Pass through some meta information to the loaded expression.
  11549. // We can still end up loading a buffer type to a variable, then CompositeExtract from it
  11550. // instead of loading everything through an access chain.
  11551. e->need_transpose = meta.need_transpose;
  11552. if (meta.storage_is_packed)
  11553. set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
  11554. if (meta.storage_physical_type != 0)
  11555. set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type);
  11556. if (meta.storage_is_invariant)
  11557. set_decoration(id, DecorationInvariant);
  11558. break;
  11559. }
  11560. case OpCompositeInsert:
  11561. {
  11562. uint32_t result_type = ops[0];
  11563. uint32_t id = ops[1];
  11564. uint32_t obj = ops[2];
  11565. uint32_t composite = ops[3];
  11566. const auto *elems = &ops[4];
  11567. length -= 4;
  11568. flush_variable_declaration(composite);
  11569. // CompositeInsert requires a copy + modification, but this is very awkward code in HLL.
  11570. // Speculate that the input composite is no longer used, and we can modify it in-place.
  11571. // There are various scenarios where this is not possible to satisfy.
  11572. bool can_modify_in_place = true;
  11573. forced_temporaries.insert(id);
  11574. // Cannot safely RMW PHI variables since they have no way to be invalidated,
  11575. // forcing temporaries is not going to help.
  11576. // This is similar for Constant and Undef inputs.
  11577. // The only safe thing to RMW is SPIRExpression.
  11578. // If the expression has already been used (i.e. used in a continue block), we have to keep using
  11579. // that loop variable, since we won't be able to override the expression after the fact.
  11580. // If the composite is hoisted, we might never be able to properly invalidate any usage
  11581. // of that composite in a subsequent loop iteration.
  11582. if (invalid_expressions.count(composite) ||
  11583. block_composite_insert_overwrite.count(composite) ||
  11584. hoisted_temporaries.count(id) || hoisted_temporaries.count(composite) ||
  11585. maybe_get<SPIRExpression>(composite) == nullptr)
  11586. {
  11587. can_modify_in_place = false;
  11588. }
  11589. else if (backend.requires_relaxed_precision_analysis &&
  11590. has_decoration(composite, DecorationRelaxedPrecision) !=
  11591. has_decoration(id, DecorationRelaxedPrecision) &&
  11592. get<SPIRType>(result_type).basetype != SPIRType::Struct)
  11593. {
  11594. // Similarly, if precision does not match for input and output,
  11595. // we cannot alias them. If we write a composite into a relaxed precision
  11596. // ID, we might get a false truncation.
  11597. can_modify_in_place = false;
  11598. }
  11599. if (can_modify_in_place)
  11600. {
  11601. // Have to make sure the modified SSA value is bound to a temporary so we can modify it in-place.
  11602. if (!forced_temporaries.count(composite))
  11603. force_temporary_and_recompile(composite);
  11604. auto chain = access_chain_internal(composite, elems, length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
  11605. statement(chain, " = ", to_unpacked_expression(obj), ";");
  11606. set<SPIRExpression>(id, to_expression(composite), result_type, true);
  11607. invalid_expressions.insert(composite);
  11608. composite_insert_overwritten.insert(composite);
  11609. }
  11610. else
  11611. {
  11612. if (maybe_get<SPIRUndef>(composite) != nullptr)
  11613. {
  11614. emit_uninitialized_temporary_expression(result_type, id);
  11615. }
  11616. else
  11617. {
  11618. // Make a copy, then use access chain to store the variable.
  11619. statement(declare_temporary(result_type, id), to_expression(composite), ";");
  11620. set<SPIRExpression>(id, to_name(id), result_type, true);
  11621. }
  11622. auto chain = access_chain_internal(id, elems, length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
  11623. statement(chain, " = ", to_unpacked_expression(obj), ";");
  11624. }
  11625. break;
  11626. }
  11627. case OpCopyMemory:
  11628. {
  11629. uint32_t lhs = ops[0];
  11630. uint32_t rhs = ops[1];
  11631. if (lhs != rhs)
  11632. {
  11633. uint32_t &tmp_id = extra_sub_expressions[instruction.offset | EXTRA_SUB_EXPRESSION_TYPE_STREAM_OFFSET];
  11634. if (!tmp_id)
  11635. tmp_id = ir.increase_bound_by(1);
  11636. uint32_t tmp_type_id = expression_type(rhs).parent_type;
  11637. EmbeddedInstruction fake_load, fake_store;
  11638. fake_load.op = OpLoad;
  11639. fake_load.length = 3;
  11640. fake_load.ops.push_back(tmp_type_id);
  11641. fake_load.ops.push_back(tmp_id);
  11642. fake_load.ops.push_back(rhs);
  11643. fake_store.op = OpStore;
  11644. fake_store.length = 2;
  11645. fake_store.ops.push_back(lhs);
  11646. fake_store.ops.push_back(tmp_id);
  11647. // Load and Store do a *lot* of workarounds, and we'd like to reuse them as much as possible.
  11648. // Synthesize a fake Load and Store pair for CopyMemory.
  11649. emit_instruction(fake_load);
  11650. emit_instruction(fake_store);
  11651. }
  11652. break;
  11653. }
  11654. case OpCopyLogical:
  11655. {
  11656. // This is used for copying object of different types, arrays and structs.
  11657. // We need to unroll the copy, element-by-element.
  11658. uint32_t result_type = ops[0];
  11659. uint32_t id = ops[1];
  11660. uint32_t rhs = ops[2];
  11661. emit_uninitialized_temporary_expression(result_type, id);
  11662. emit_copy_logical_type(id, result_type, rhs, expression_type_id(rhs), {});
  11663. break;
  11664. }
  11665. case OpCopyObject:
  11666. {
  11667. uint32_t result_type = ops[0];
  11668. uint32_t id = ops[1];
  11669. uint32_t rhs = ops[2];
  11670. bool pointer = get<SPIRType>(result_type).pointer;
  11671. auto *chain = maybe_get<SPIRAccessChain>(rhs);
  11672. auto *imgsamp = maybe_get<SPIRCombinedImageSampler>(rhs);
  11673. if (chain)
  11674. {
  11675. // Cannot lower to a SPIRExpression, just copy the object.
  11676. auto &e = set<SPIRAccessChain>(id, *chain);
  11677. e.self = id;
  11678. }
  11679. else if (imgsamp)
  11680. {
  11681. // Cannot lower to a SPIRExpression, just copy the object.
  11682. // GLSL does not currently use this type and will never get here, but MSL does.
  11683. // Handled here instead of CompilerMSL for better integration and general handling,
  11684. // and in case GLSL or other subclasses require it in the future.
  11685. auto &e = set<SPIRCombinedImageSampler>(id, *imgsamp);
  11686. e.self = id;
  11687. }
  11688. else if (expression_is_lvalue(rhs) && !pointer)
  11689. {
  11690. // Need a copy.
  11691. // For pointer types, we copy the pointer itself.
  11692. emit_op(result_type, id, to_unpacked_expression(rhs), false);
  11693. }
  11694. else
  11695. {
  11696. // RHS expression is immutable, so just forward it.
  11697. // Copying these things really make no sense, but
  11698. // seems to be allowed anyways.
  11699. auto &e = emit_op(result_type, id, to_expression(rhs), true, true);
  11700. if (pointer)
  11701. {
  11702. auto *var = maybe_get_backing_variable(rhs);
  11703. e.loaded_from = var ? var->self : ID(0);
  11704. }
  11705. // If we're copying an access chain, need to inherit the read expressions.
  11706. auto *rhs_expr = maybe_get<SPIRExpression>(rhs);
  11707. if (rhs_expr)
  11708. {
  11709. e.implied_read_expressions = rhs_expr->implied_read_expressions;
  11710. e.expression_dependencies = rhs_expr->expression_dependencies;
  11711. }
  11712. }
  11713. break;
  11714. }
  11715. case OpVectorShuffle:
  11716. {
  11717. uint32_t result_type = ops[0];
  11718. uint32_t id = ops[1];
  11719. uint32_t vec0 = ops[2];
  11720. uint32_t vec1 = ops[3];
  11721. const auto *elems = &ops[4];
  11722. length -= 4;
  11723. auto &type0 = expression_type(vec0);
  11724. // If we have the undefined swizzle index -1, we need to swizzle in undefined data,
  11725. // or in our case, T(0).
  11726. bool shuffle = false;
  11727. for (uint32_t i = 0; i < length; i++)
  11728. if (elems[i] >= type0.vecsize || elems[i] == 0xffffffffu)
  11729. shuffle = true;
  11730. // Cannot use swizzles with packed expressions, force shuffle path.
  11731. if (!shuffle && has_extended_decoration(vec0, SPIRVCrossDecorationPhysicalTypePacked))
  11732. shuffle = true;
  11733. string expr;
  11734. bool should_fwd, trivial_forward;
  11735. if (shuffle)
  11736. {
  11737. should_fwd = should_forward(vec0) && should_forward(vec1);
  11738. trivial_forward = should_suppress_usage_tracking(vec0) && should_suppress_usage_tracking(vec1);
  11739. // Constructor style and shuffling from two different vectors.
  11740. SmallVector<string> args;
  11741. for (uint32_t i = 0; i < length; i++)
  11742. {
  11743. if (elems[i] == 0xffffffffu)
  11744. {
  11745. // Use a constant 0 here.
  11746. // We could use the first component or similar, but then we risk propagating
  11747. // a value we might not need, and bog down codegen.
  11748. SPIRConstant c;
  11749. c.constant_type = type0.parent_type;
  11750. assert(type0.parent_type != ID(0));
  11751. args.push_back(constant_expression(c));
  11752. }
  11753. else if (elems[i] >= type0.vecsize)
  11754. args.push_back(to_extract_component_expression(vec1, elems[i] - type0.vecsize));
  11755. else
  11756. args.push_back(to_extract_component_expression(vec0, elems[i]));
  11757. }
  11758. expr += join(type_to_glsl_constructor(get<SPIRType>(result_type)), "(", merge(args), ")");
  11759. }
  11760. else
  11761. {
  11762. should_fwd = should_forward(vec0);
  11763. trivial_forward = should_suppress_usage_tracking(vec0);
  11764. // We only source from first vector, so can use swizzle.
  11765. // If the vector is packed, unpack it before applying a swizzle (needed for MSL)
  11766. expr += to_enclosed_unpacked_expression(vec0);
  11767. expr += ".";
  11768. for (uint32_t i = 0; i < length; i++)
  11769. {
  11770. assert(elems[i] != 0xffffffffu);
  11771. expr += index_to_swizzle(elems[i]);
  11772. }
  11773. if (backend.swizzle_is_function && length > 1)
  11774. expr += "()";
  11775. }
  11776. // A shuffle is trivial in that it doesn't actually *do* anything.
  11777. // We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed.
  11778. emit_op(result_type, id, expr, should_fwd, trivial_forward);
  11779. inherit_expression_dependencies(id, vec0);
  11780. if (vec0 != vec1)
  11781. inherit_expression_dependencies(id, vec1);
  11782. break;
  11783. }
  11784. // ALU
  11785. case OpIsNan:
  11786. if (!is_legacy())
  11787. GLSL_UFOP(isnan);
  11788. else
  11789. {
  11790. // Check if the number doesn't equal itself
  11791. auto &type = get<SPIRType>(ops[0]);
  11792. if (type.vecsize > 1)
  11793. emit_binary_func_op(ops[0], ops[1], ops[2], ops[2], "notEqual");
  11794. else
  11795. emit_binary_op(ops[0], ops[1], ops[2], ops[2], "!=");
  11796. }
  11797. break;
  11798. case OpIsInf:
  11799. if (!is_legacy())
  11800. GLSL_UFOP(isinf);
  11801. else
  11802. {
  11803. // inf * 2 == inf by IEEE 754 rules, note this also applies to 0.0
  11804. // This is more reliable than checking if product with zero is NaN
  11805. uint32_t result_type = ops[0];
  11806. uint32_t result_id = ops[1];
  11807. uint32_t operand = ops[2];
  11808. auto &type = get<SPIRType>(result_type);
  11809. std::string expr;
  11810. if (type.vecsize > 1)
  11811. {
  11812. expr = type_to_glsl_constructor(type);
  11813. expr += '(';
  11814. for (uint32_t i = 0; i < type.vecsize; i++)
  11815. {
  11816. auto comp = to_extract_component_expression(operand, i);
  11817. expr += join(comp, " != 0.0 && 2.0 * ", comp, " == ", comp);
  11818. if (i + 1 < type.vecsize)
  11819. expr += ", ";
  11820. }
  11821. expr += ')';
  11822. }
  11823. else
  11824. {
  11825. // Register an extra read to force writing out a temporary
  11826. auto oper = to_enclosed_expression(operand);
  11827. track_expression_read(operand);
  11828. expr += join(oper, " != 0.0 && 2.0 * ", oper, " == ", oper);
  11829. }
  11830. emit_op(result_type, result_id, expr, should_forward(operand));
  11831. inherit_expression_dependencies(result_id, operand);
  11832. }
  11833. break;
  11834. case OpSNegate:
  11835. if (implicit_integer_promotion || expression_type_id(ops[2]) != ops[0])
  11836. GLSL_UOP_CAST(-);
  11837. else
  11838. GLSL_UOP(-);
  11839. break;
  11840. case OpFNegate:
  11841. GLSL_UOP(-);
  11842. break;
  11843. case OpIAdd:
  11844. {
  11845. // For simple arith ops, prefer the output type if there's a mismatch to avoid extra bitcasts.
  11846. auto type = get<SPIRType>(ops[0]).basetype;
  11847. GLSL_BOP_CAST(+, type);
  11848. break;
  11849. }
  11850. case OpFAdd:
  11851. GLSL_BOP(+);
  11852. break;
  11853. case OpISub:
  11854. {
  11855. auto type = get<SPIRType>(ops[0]).basetype;
  11856. GLSL_BOP_CAST(-, type);
  11857. break;
  11858. }
  11859. case OpFSub:
  11860. GLSL_BOP(-);
  11861. break;
  11862. case OpIMul:
  11863. {
  11864. auto type = get<SPIRType>(ops[0]).basetype;
  11865. GLSL_BOP_CAST(*, type);
  11866. break;
  11867. }
  11868. case OpVectorTimesMatrix:
  11869. case OpMatrixTimesVector:
  11870. {
  11871. // If the matrix needs transpose, just flip the multiply order.
  11872. auto *e = maybe_get<SPIRExpression>(ops[opcode == OpMatrixTimesVector ? 2 : 3]);
  11873. if (e && e->need_transpose)
  11874. {
  11875. e->need_transpose = false;
  11876. string expr;
  11877. if (opcode == OpMatrixTimesVector)
  11878. expr = join(to_enclosed_unpacked_expression(ops[3]), " * ",
  11879. enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])));
  11880. else
  11881. expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ",
  11882. to_enclosed_unpacked_expression(ops[2]));
  11883. bool forward = should_forward(ops[2]) && should_forward(ops[3]);
  11884. emit_op(ops[0], ops[1], expr, forward);
  11885. e->need_transpose = true;
  11886. inherit_expression_dependencies(ops[1], ops[2]);
  11887. inherit_expression_dependencies(ops[1], ops[3]);
  11888. }
  11889. else
  11890. GLSL_BOP(*);
  11891. break;
  11892. }
  11893. case OpMatrixTimesMatrix:
  11894. {
  11895. auto *a = maybe_get<SPIRExpression>(ops[2]);
  11896. auto *b = maybe_get<SPIRExpression>(ops[3]);
  11897. // If both matrices need transpose, we can multiply in flipped order and tag the expression as transposed.
  11898. // a^T * b^T = (b * a)^T.
  11899. if (a && b && a->need_transpose && b->need_transpose)
  11900. {
  11901. a->need_transpose = false;
  11902. b->need_transpose = false;
  11903. auto expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ",
  11904. enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])));
  11905. bool forward = should_forward(ops[2]) && should_forward(ops[3]);
  11906. emit_transposed_op(ops[0], ops[1], expr, forward);
  11907. a->need_transpose = true;
  11908. b->need_transpose = true;
  11909. inherit_expression_dependencies(ops[1], ops[2]);
  11910. inherit_expression_dependencies(ops[1], ops[3]);
  11911. }
  11912. else
  11913. GLSL_BOP(*);
  11914. break;
  11915. }
  11916. case OpMatrixTimesScalar:
  11917. {
  11918. auto *a = maybe_get<SPIRExpression>(ops[2]);
  11919. // If the matrix need transpose, just mark the result as needing so.
  11920. if (a && a->need_transpose)
  11921. {
  11922. a->need_transpose = false;
  11923. auto expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])), " * ",
  11924. to_enclosed_unpacked_expression(ops[3]));
  11925. bool forward = should_forward(ops[2]) && should_forward(ops[3]);
  11926. emit_transposed_op(ops[0], ops[1], expr, forward);
  11927. a->need_transpose = true;
  11928. inherit_expression_dependencies(ops[1], ops[2]);
  11929. inherit_expression_dependencies(ops[1], ops[3]);
  11930. }
  11931. else
  11932. GLSL_BOP(*);
  11933. break;
  11934. }
  11935. case OpFMul:
  11936. case OpVectorTimesScalar:
  11937. GLSL_BOP(*);
  11938. break;
  11939. case OpOuterProduct:
  11940. if (options.version < 120) // Matches GLSL 1.10 / ESSL 1.00
  11941. {
  11942. uint32_t result_type = ops[0];
  11943. uint32_t id = ops[1];
  11944. uint32_t a = ops[2];
  11945. uint32_t b = ops[3];
  11946. auto &type = get<SPIRType>(result_type);
  11947. string expr = type_to_glsl_constructor(type);
  11948. expr += "(";
  11949. for (uint32_t col = 0; col < type.columns; col++)
  11950. {
  11951. expr += to_enclosed_expression(a);
  11952. expr += " * ";
  11953. expr += to_extract_component_expression(b, col);
  11954. if (col + 1 < type.columns)
  11955. expr += ", ";
  11956. }
  11957. expr += ")";
  11958. emit_op(result_type, id, expr, should_forward(a) && should_forward(b));
  11959. inherit_expression_dependencies(id, a);
  11960. inherit_expression_dependencies(id, b);
  11961. }
  11962. else
  11963. GLSL_BFOP(outerProduct);
  11964. break;
  11965. case OpDot:
  11966. GLSL_BFOP(dot);
  11967. break;
  11968. case OpTranspose:
  11969. if (options.version < 120) // Matches GLSL 1.10 / ESSL 1.00
  11970. {
  11971. // transpose() is not available, so instead, flip need_transpose,
  11972. // which can later be turned into an emulated transpose op by
  11973. // convert_row_major_matrix(), if necessary.
  11974. uint32_t result_type = ops[0];
  11975. uint32_t result_id = ops[1];
  11976. uint32_t input = ops[2];
  11977. // Force need_transpose to false temporarily to prevent
  11978. // to_expression() from doing the transpose.
  11979. bool need_transpose = false;
  11980. auto *input_e = maybe_get<SPIRExpression>(input);
  11981. if (input_e)
  11982. swap(need_transpose, input_e->need_transpose);
  11983. bool forward = should_forward(input);
  11984. auto &e = emit_op(result_type, result_id, to_expression(input), forward);
  11985. e.need_transpose = !need_transpose;
  11986. // Restore the old need_transpose flag.
  11987. if (input_e)
  11988. input_e->need_transpose = need_transpose;
  11989. }
  11990. else
  11991. GLSL_UFOP(transpose);
  11992. break;
  11993. case OpSRem:
  11994. {
  11995. uint32_t result_type = ops[0];
  11996. uint32_t result_id = ops[1];
  11997. uint32_t op0 = ops[2];
  11998. uint32_t op1 = ops[3];
  11999. auto &out_type = get<SPIRType>(result_type);
  12000. bool forward = should_forward(op0) && should_forward(op1);
  12001. string cast_op0, cast_op1;
  12002. auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, int_type, op0, op1, false);
  12003. // Needs special handling.
  12004. auto expr = join(cast_op0, " - ", cast_op1, " * ", "(", cast_op0, " / ", cast_op1, ")");
  12005. if (implicit_integer_promotion)
  12006. {
  12007. expr = join(type_to_glsl(get<SPIRType>(result_type)), '(', expr, ')');
  12008. }
  12009. else if (out_type.basetype != int_type)
  12010. {
  12011. expected_type.basetype = int_type;
  12012. expr = join(bitcast_glsl_op(out_type, expected_type), '(', expr, ')');
  12013. }
  12014. emit_op(result_type, result_id, expr, forward);
  12015. inherit_expression_dependencies(result_id, op0);
  12016. inherit_expression_dependencies(result_id, op1);
  12017. break;
  12018. }
  12019. case OpSDiv:
  12020. GLSL_BOP_CAST(/, int_type);
  12021. break;
  12022. case OpUDiv:
  12023. GLSL_BOP_CAST(/, uint_type);
  12024. break;
  12025. case OpIAddCarry:
  12026. case OpISubBorrow:
  12027. {
  12028. if (options.es && options.version < 310)
  12029. SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310.");
  12030. else if (!options.es && options.version < 400)
  12031. SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 400.");
  12032. uint32_t result_type = ops[0];
  12033. uint32_t result_id = ops[1];
  12034. uint32_t op0 = ops[2];
  12035. uint32_t op1 = ops[3];
  12036. auto &type = get<SPIRType>(result_type);
  12037. emit_uninitialized_temporary_expression(result_type, result_id);
  12038. const char *op = opcode == OpIAddCarry ? "uaddCarry" : "usubBorrow";
  12039. statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", op, "(", to_expression(op0), ", ",
  12040. to_expression(op1), ", ", to_expression(result_id), ".", to_member_name(type, 1), ");");
  12041. break;
  12042. }
  12043. case OpUMulExtended:
  12044. case OpSMulExtended:
  12045. {
  12046. if (options.es && options.version < 310)
  12047. SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310.");
  12048. else if (!options.es && options.version < 400)
  12049. SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 4000.");
  12050. uint32_t result_type = ops[0];
  12051. uint32_t result_id = ops[1];
  12052. uint32_t op0 = ops[2];
  12053. uint32_t op1 = ops[3];
  12054. auto &type = get<SPIRType>(result_type);
  12055. emit_uninitialized_temporary_expression(result_type, result_id);
  12056. const char *op = opcode == OpUMulExtended ? "umulExtended" : "imulExtended";
  12057. statement(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(result_id), ".",
  12058. to_member_name(type, 1), ", ", to_expression(result_id), ".", to_member_name(type, 0), ");");
  12059. break;
  12060. }
  12061. case OpFDiv:
  12062. GLSL_BOP(/);
  12063. break;
  12064. case OpShiftRightLogical:
  12065. GLSL_BOP_CAST(>>, uint_type);
  12066. break;
  12067. case OpShiftRightArithmetic:
  12068. GLSL_BOP_CAST(>>, int_type);
  12069. break;
  12070. case OpShiftLeftLogical:
  12071. {
  12072. auto type = get<SPIRType>(ops[0]).basetype;
  12073. GLSL_BOP_CAST(<<, type);
  12074. break;
  12075. }
  12076. case OpBitwiseOr:
  12077. {
  12078. auto type = get<SPIRType>(ops[0]).basetype;
  12079. GLSL_BOP_CAST(|, type);
  12080. break;
  12081. }
  12082. case OpBitwiseXor:
  12083. {
  12084. auto type = get<SPIRType>(ops[0]).basetype;
  12085. GLSL_BOP_CAST(^, type);
  12086. break;
  12087. }
  12088. case OpBitwiseAnd:
  12089. {
  12090. auto type = get<SPIRType>(ops[0]).basetype;
  12091. GLSL_BOP_CAST(&, type);
  12092. break;
  12093. }
  12094. case OpNot:
  12095. if (implicit_integer_promotion || expression_type_id(ops[2]) != ops[0])
  12096. GLSL_UOP_CAST(~);
  12097. else
  12098. GLSL_UOP(~);
  12099. break;
  12100. case OpUMod:
  12101. GLSL_BOP_CAST(%, uint_type);
  12102. break;
  12103. case OpSMod:
  12104. GLSL_BOP_CAST(%, int_type);
  12105. break;
  12106. case OpFMod:
  12107. GLSL_BFOP(mod);
  12108. break;
  12109. case OpFRem:
  12110. {
  12111. uint32_t result_type = ops[0];
  12112. uint32_t result_id = ops[1];
  12113. uint32_t op0 = ops[2];
  12114. uint32_t op1 = ops[3];
  12115. // Needs special handling.
  12116. bool forward = should_forward(op0) && should_forward(op1);
  12117. std::string expr;
  12118. if (!is_legacy())
  12119. {
  12120. expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "trunc(",
  12121. to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")");
  12122. }
  12123. else
  12124. {
  12125. // Legacy GLSL has no trunc, emulate by casting to int and back
  12126. auto &op0_type = expression_type(op0);
  12127. auto via_type = op0_type;
  12128. via_type.basetype = SPIRType::Int;
  12129. expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ",
  12130. type_to_glsl(op0_type), "(", type_to_glsl(via_type), "(",
  12131. to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), "))");
  12132. }
  12133. emit_op(result_type, result_id, expr, forward);
  12134. inherit_expression_dependencies(result_id, op0);
  12135. inherit_expression_dependencies(result_id, op1);
  12136. break;
  12137. }
  12138. // Relational
  12139. case OpAny:
  12140. GLSL_UFOP(any);
  12141. break;
  12142. case OpAll:
  12143. GLSL_UFOP(all);
  12144. break;
  12145. case OpSelect:
  12146. emit_mix_op(ops[0], ops[1], ops[4], ops[3], ops[2]);
  12147. break;
  12148. case OpLogicalOr:
  12149. {
  12150. // No vector variant in GLSL for logical OR.
  12151. auto result_type = ops[0];
  12152. auto id = ops[1];
  12153. auto &type = get<SPIRType>(result_type);
  12154. if (type.vecsize > 1)
  12155. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "||", false, SPIRType::Unknown);
  12156. else
  12157. GLSL_BOP(||);
  12158. break;
  12159. }
  12160. case OpLogicalAnd:
  12161. {
  12162. // No vector variant in GLSL for logical AND.
  12163. auto result_type = ops[0];
  12164. auto id = ops[1];
  12165. auto &type = get<SPIRType>(result_type);
  12166. if (type.vecsize > 1)
  12167. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "&&", false, SPIRType::Unknown);
  12168. else
  12169. GLSL_BOP(&&);
  12170. break;
  12171. }
  12172. case OpLogicalNot:
  12173. {
  12174. auto &type = get<SPIRType>(ops[0]);
  12175. if (type.vecsize > 1)
  12176. GLSL_UFOP(not );
  12177. else
  12178. GLSL_UOP(!);
  12179. break;
  12180. }
  12181. case OpIEqual:
  12182. {
  12183. if (expression_type(ops[2]).vecsize > 1)
  12184. GLSL_BFOP_CAST(equal, int_type);
  12185. else
  12186. GLSL_BOP_CAST(==, int_type);
  12187. break;
  12188. }
  12189. case OpLogicalEqual:
  12190. case OpFOrdEqual:
  12191. {
  12192. if (expression_type(ops[2]).vecsize > 1)
  12193. GLSL_BFOP(equal);
  12194. else
  12195. GLSL_BOP(==);
  12196. break;
  12197. }
  12198. case OpINotEqual:
  12199. {
  12200. if (expression_type(ops[2]).vecsize > 1)
  12201. GLSL_BFOP_CAST(notEqual, int_type);
  12202. else
  12203. GLSL_BOP_CAST(!=, int_type);
  12204. break;
  12205. }
  12206. case OpLogicalNotEqual:
  12207. case OpFOrdNotEqual:
  12208. case OpFUnordNotEqual:
  12209. {
  12210. // GLSL is fuzzy on what to do with ordered vs unordered not equal.
  12211. // glslang started emitting UnorderedNotEqual some time ago to harmonize with IEEE,
  12212. // but this means we have no easy way of implementing ordered not equal.
  12213. if (expression_type(ops[2]).vecsize > 1)
  12214. GLSL_BFOP(notEqual);
  12215. else
  12216. GLSL_BOP(!=);
  12217. break;
  12218. }
  12219. case OpUGreaterThan:
  12220. case OpSGreaterThan:
  12221. {
  12222. auto type = opcode == OpUGreaterThan ? uint_type : int_type;
  12223. if (expression_type(ops[2]).vecsize > 1)
  12224. GLSL_BFOP_CAST(greaterThan, type);
  12225. else
  12226. GLSL_BOP_CAST(>, type);
  12227. break;
  12228. }
  12229. case OpFOrdGreaterThan:
  12230. {
  12231. if (expression_type(ops[2]).vecsize > 1)
  12232. GLSL_BFOP(greaterThan);
  12233. else
  12234. GLSL_BOP(>);
  12235. break;
  12236. }
  12237. case OpUGreaterThanEqual:
  12238. case OpSGreaterThanEqual:
  12239. {
  12240. auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type;
  12241. if (expression_type(ops[2]).vecsize > 1)
  12242. GLSL_BFOP_CAST(greaterThanEqual, type);
  12243. else
  12244. GLSL_BOP_CAST(>=, type);
  12245. break;
  12246. }
  12247. case OpFOrdGreaterThanEqual:
  12248. {
  12249. if (expression_type(ops[2]).vecsize > 1)
  12250. GLSL_BFOP(greaterThanEqual);
  12251. else
  12252. GLSL_BOP(>=);
  12253. break;
  12254. }
  12255. case OpULessThan:
  12256. case OpSLessThan:
  12257. {
  12258. auto type = opcode == OpULessThan ? uint_type : int_type;
  12259. if (expression_type(ops[2]).vecsize > 1)
  12260. GLSL_BFOP_CAST(lessThan, type);
  12261. else
  12262. GLSL_BOP_CAST(<, type);
  12263. break;
  12264. }
  12265. case OpFOrdLessThan:
  12266. {
  12267. if (expression_type(ops[2]).vecsize > 1)
  12268. GLSL_BFOP(lessThan);
  12269. else
  12270. GLSL_BOP(<);
  12271. break;
  12272. }
  12273. case OpULessThanEqual:
  12274. case OpSLessThanEqual:
  12275. {
  12276. auto type = opcode == OpULessThanEqual ? uint_type : int_type;
  12277. if (expression_type(ops[2]).vecsize > 1)
  12278. GLSL_BFOP_CAST(lessThanEqual, type);
  12279. else
  12280. GLSL_BOP_CAST(<=, type);
  12281. break;
  12282. }
  12283. case OpFOrdLessThanEqual:
  12284. {
  12285. if (expression_type(ops[2]).vecsize > 1)
  12286. GLSL_BFOP(lessThanEqual);
  12287. else
  12288. GLSL_BOP(<=);
  12289. break;
  12290. }
  12291. // Conversion
  12292. case OpSConvert:
  12293. case OpConvertSToF:
  12294. case OpUConvert:
  12295. case OpConvertUToF:
  12296. {
  12297. auto input_type = opcode == OpSConvert || opcode == OpConvertSToF ? int_type : uint_type;
  12298. uint32_t result_type = ops[0];
  12299. uint32_t id = ops[1];
  12300. auto &type = get<SPIRType>(result_type);
  12301. auto &arg_type = expression_type(ops[2]);
  12302. auto func = type_to_glsl_constructor(type);
  12303. if (arg_type.width < type.width || type_is_floating_point(type))
  12304. emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), input_type, type.basetype);
  12305. else
  12306. emit_unary_func_op(result_type, id, ops[2], func.c_str());
  12307. break;
  12308. }
  12309. case OpConvertFToU:
  12310. case OpConvertFToS:
  12311. {
  12312. // Cast to expected arithmetic type, then potentially bitcast away to desired signedness.
  12313. uint32_t result_type = ops[0];
  12314. uint32_t id = ops[1];
  12315. auto &type = get<SPIRType>(result_type);
  12316. auto expected_type = type;
  12317. auto &float_type = expression_type(ops[2]);
  12318. expected_type.basetype =
  12319. opcode == OpConvertFToS ? to_signed_basetype(type.width) : to_unsigned_basetype(type.width);
  12320. auto func = type_to_glsl_constructor(expected_type);
  12321. emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), float_type.basetype, expected_type.basetype);
  12322. break;
  12323. }
  12324. case OpCooperativeMatrixConvertNV:
  12325. if (!options.vulkan_semantics)
  12326. SPIRV_CROSS_THROW("CooperativeMatrixConvertNV requires vulkan semantics.");
  12327. require_extension_internal("GL_NV_cooperative_matrix2");
  12328. // fallthrough
  12329. case OpFConvert:
  12330. {
  12331. uint32_t result_type = ops[0];
  12332. uint32_t id = ops[1];
  12333. auto &type = get<SPIRType>(result_type);
  12334. if (type.op == OpTypeCooperativeMatrixKHR && opcode == OpFConvert)
  12335. {
  12336. auto &expr_type = expression_type(ops[2]);
  12337. if (get<SPIRConstant>(type.ext.cooperative.use_id).scalar() !=
  12338. get<SPIRConstant>(expr_type.ext.cooperative.use_id).scalar())
  12339. {
  12340. // Somewhat questionable with spec constant uses.
  12341. if (!options.vulkan_semantics)
  12342. SPIRV_CROSS_THROW("NV_cooperative_matrix2 requires vulkan semantics.");
  12343. require_extension_internal("GL_NV_cooperative_matrix2");
  12344. }
  12345. }
  12346. if ((type.basetype == SPIRType::FloatE4M3 || type.basetype == SPIRType::FloatE5M2) &&
  12347. has_decoration(id, DecorationSaturatedToLargestFloat8NormalConversionEXT))
  12348. {
  12349. emit_uninitialized_temporary_expression(result_type, id);
  12350. statement("saturatedConvertEXT(", to_expression(id), ", ", to_unpacked_expression(ops[2]), ");");
  12351. }
  12352. else
  12353. {
  12354. auto func = type_to_glsl_constructor(type);
  12355. emit_unary_func_op(result_type, id, ops[2], func.c_str());
  12356. }
  12357. break;
  12358. }
  12359. case OpBitcast:
  12360. {
  12361. uint32_t result_type = ops[0];
  12362. uint32_t id = ops[1];
  12363. uint32_t arg = ops[2];
  12364. if (!emit_complex_bitcast(result_type, id, arg))
  12365. {
  12366. auto op = bitcast_glsl_op(get<SPIRType>(result_type), expression_type(arg));
  12367. emit_unary_func_op(result_type, id, arg, op.c_str());
  12368. }
  12369. break;
  12370. }
  12371. case OpQuantizeToF16:
  12372. {
  12373. uint32_t result_type = ops[0];
  12374. uint32_t id = ops[1];
  12375. uint32_t arg = ops[2];
  12376. string op;
  12377. auto &type = get<SPIRType>(result_type);
  12378. switch (type.vecsize)
  12379. {
  12380. case 1:
  12381. op = join("unpackHalf2x16(packHalf2x16(vec2(", to_expression(arg), "))).x");
  12382. break;
  12383. case 2:
  12384. op = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), "))");
  12385. break;
  12386. case 3:
  12387. {
  12388. auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))");
  12389. auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zz)).x");
  12390. op = join("vec3(", op0, ", ", op1, ")");
  12391. break;
  12392. }
  12393. case 4:
  12394. {
  12395. auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))");
  12396. auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zw))");
  12397. op = join("vec4(", op0, ", ", op1, ")");
  12398. break;
  12399. }
  12400. default:
  12401. SPIRV_CROSS_THROW("Illegal argument to OpQuantizeToF16.");
  12402. }
  12403. emit_op(result_type, id, op, should_forward(arg));
  12404. inherit_expression_dependencies(id, arg);
  12405. break;
  12406. }
  12407. // Derivatives
  12408. case OpDPdx:
  12409. GLSL_UFOP(dFdx);
  12410. if (is_legacy_es())
  12411. require_extension_internal("GL_OES_standard_derivatives");
  12412. register_control_dependent_expression(ops[1]);
  12413. break;
  12414. case OpDPdy:
  12415. GLSL_UFOP(dFdy);
  12416. if (is_legacy_es())
  12417. require_extension_internal("GL_OES_standard_derivatives");
  12418. register_control_dependent_expression(ops[1]);
  12419. break;
  12420. case OpDPdxFine:
  12421. GLSL_UFOP(dFdxFine);
  12422. if (options.es)
  12423. {
  12424. SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
  12425. }
  12426. if (options.version < 450)
  12427. require_extension_internal("GL_ARB_derivative_control");
  12428. register_control_dependent_expression(ops[1]);
  12429. break;
  12430. case OpDPdyFine:
  12431. GLSL_UFOP(dFdyFine);
  12432. if (options.es)
  12433. {
  12434. SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
  12435. }
  12436. if (options.version < 450)
  12437. require_extension_internal("GL_ARB_derivative_control");
  12438. register_control_dependent_expression(ops[1]);
  12439. break;
  12440. case OpDPdxCoarse:
  12441. if (options.es)
  12442. {
  12443. SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
  12444. }
  12445. GLSL_UFOP(dFdxCoarse);
  12446. if (options.version < 450)
  12447. require_extension_internal("GL_ARB_derivative_control");
  12448. register_control_dependent_expression(ops[1]);
  12449. break;
  12450. case OpDPdyCoarse:
  12451. GLSL_UFOP(dFdyCoarse);
  12452. if (options.es)
  12453. {
  12454. SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
  12455. }
  12456. if (options.version < 450)
  12457. require_extension_internal("GL_ARB_derivative_control");
  12458. register_control_dependent_expression(ops[1]);
  12459. break;
  12460. case OpFwidth:
  12461. GLSL_UFOP(fwidth);
  12462. if (is_legacy_es())
  12463. require_extension_internal("GL_OES_standard_derivatives");
  12464. register_control_dependent_expression(ops[1]);
  12465. break;
  12466. case OpFwidthCoarse:
  12467. GLSL_UFOP(fwidthCoarse);
  12468. if (options.es)
  12469. {
  12470. SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
  12471. }
  12472. if (options.version < 450)
  12473. require_extension_internal("GL_ARB_derivative_control");
  12474. register_control_dependent_expression(ops[1]);
  12475. break;
  12476. case OpFwidthFine:
  12477. GLSL_UFOP(fwidthFine);
  12478. if (options.es)
  12479. {
  12480. SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
  12481. }
  12482. if (options.version < 450)
  12483. require_extension_internal("GL_ARB_derivative_control");
  12484. register_control_dependent_expression(ops[1]);
  12485. break;
  12486. // Bitfield
  12487. case OpBitFieldInsert:
  12488. {
  12489. emit_bitfield_insert_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], "bitfieldInsert", SPIRType::Int);
  12490. break;
  12491. }
  12492. case OpBitFieldSExtract:
  12493. {
  12494. emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", int_type, int_type,
  12495. SPIRType::Int, SPIRType::Int);
  12496. break;
  12497. }
  12498. case OpBitFieldUExtract:
  12499. {
  12500. emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", uint_type, uint_type,
  12501. SPIRType::Int, SPIRType::Int);
  12502. break;
  12503. }
  12504. case OpBitReverse:
  12505. // BitReverse does not have issues with sign since result type must match input type.
  12506. GLSL_UFOP(bitfieldReverse);
  12507. break;
  12508. case OpBitCount:
  12509. {
  12510. auto basetype = expression_type(ops[2]).basetype;
  12511. emit_unary_func_op_cast(ops[0], ops[1], ops[2], "bitCount", basetype, int_type);
  12512. break;
  12513. }
  12514. // Atomics
  12515. case OpAtomicExchange:
  12516. {
  12517. uint32_t result_type = ops[0];
  12518. uint32_t id = ops[1];
  12519. uint32_t ptr = ops[2];
  12520. // Ignore semantics for now, probably only relevant to CL.
  12521. uint32_t val = ops[5];
  12522. const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange";
  12523. emit_atomic_func_op(result_type, id, ptr, val, op);
  12524. break;
  12525. }
  12526. case OpAtomicCompareExchange:
  12527. {
  12528. uint32_t result_type = ops[0];
  12529. uint32_t id = ops[1];
  12530. uint32_t ptr = ops[2];
  12531. uint32_t val = ops[6];
  12532. uint32_t comp = ops[7];
  12533. const char *op = check_atomic_image(ptr) ? "imageAtomicCompSwap" : "atomicCompSwap";
  12534. emit_atomic_func_op(result_type, id, ptr, comp, val, op);
  12535. break;
  12536. }
  12537. case OpAtomicLoad:
  12538. {
  12539. // In plain GLSL, we have no atomic loads, so emulate this by fetch adding by 0 and hope compiler figures it out.
  12540. // Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL.
  12541. auto &type = expression_type(ops[2]);
  12542. forced_temporaries.insert(ops[1]);
  12543. bool atomic_image = check_atomic_image(ops[2]);
  12544. bool unsigned_type = (type.basetype == SPIRType::UInt) ||
  12545. (atomic_image && get<SPIRType>(type.image.type).basetype == SPIRType::UInt);
  12546. const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd";
  12547. const char *increment = unsigned_type ? "0u" : "0";
  12548. emit_op(ops[0], ops[1],
  12549. join(op, "(",
  12550. to_atomic_ptr_expression(ops[2]), ", ", increment, ")"), false);
  12551. flush_all_atomic_capable_variables();
  12552. if (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64)
  12553. require_extension_internal("GL_EXT_shader_atomic_int64");
  12554. break;
  12555. }
  12556. case OpAtomicStore:
  12557. {
  12558. // In plain GLSL, we have no atomic stores, so emulate this with an atomic exchange where we don't consume the result.
  12559. // Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL.
  12560. uint32_t ptr = ops[0];
  12561. // Ignore semantics for now, probably only relevant to CL.
  12562. uint32_t val = ops[3];
  12563. const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange";
  12564. statement(op, "(", to_atomic_ptr_expression(ptr), ", ", to_expression(val), ");");
  12565. flush_all_atomic_capable_variables();
  12566. auto &type = expression_type(ptr);
  12567. if (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64)
  12568. require_extension_internal("GL_EXT_shader_atomic_int64");
  12569. break;
  12570. }
  12571. case OpAtomicIIncrement:
  12572. case OpAtomicIDecrement:
  12573. {
  12574. forced_temporaries.insert(ops[1]);
  12575. auto &type = expression_type(ops[2]);
  12576. if (type.storage == StorageClassAtomicCounter)
  12577. {
  12578. // Legacy GLSL stuff, not sure if this is relevant to support.
  12579. if (opcode == OpAtomicIIncrement)
  12580. GLSL_UFOP(atomicCounterIncrement);
  12581. else
  12582. GLSL_UFOP(atomicCounterDecrement);
  12583. }
  12584. else
  12585. {
  12586. bool atomic_image = check_atomic_image(ops[2]);
  12587. bool unsigned_type = (type.basetype == SPIRType::UInt) ||
  12588. (atomic_image && get<SPIRType>(type.image.type).basetype == SPIRType::UInt);
  12589. const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd";
  12590. const char *increment = nullptr;
  12591. if (opcode == OpAtomicIIncrement && unsigned_type)
  12592. increment = "1u";
  12593. else if (opcode == OpAtomicIIncrement)
  12594. increment = "1";
  12595. else if (unsigned_type)
  12596. increment = "uint(-1)";
  12597. else
  12598. increment = "-1";
  12599. emit_op(ops[0], ops[1],
  12600. join(op, "(", to_atomic_ptr_expression(ops[2]), ", ", increment, ")"), false);
  12601. if (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64)
  12602. require_extension_internal("GL_EXT_shader_atomic_int64");
  12603. }
  12604. flush_all_atomic_capable_variables();
  12605. break;
  12606. }
  12607. case OpAtomicIAdd:
  12608. case OpAtomicFAddEXT:
  12609. {
  12610. const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
  12611. emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
  12612. break;
  12613. }
  12614. case OpAtomicISub:
  12615. {
  12616. const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
  12617. forced_temporaries.insert(ops[1]);
  12618. auto expr = join(op, "(", to_atomic_ptr_expression(ops[2]), ", -", to_enclosed_expression(ops[5]), ")");
  12619. emit_op(ops[0], ops[1], expr, should_forward(ops[2]) && should_forward(ops[5]));
  12620. flush_all_atomic_capable_variables();
  12621. auto &type = get<SPIRType>(ops[0]);
  12622. if (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64)
  12623. require_extension_internal("GL_EXT_shader_atomic_int64");
  12624. break;
  12625. }
  12626. case OpAtomicSMin:
  12627. case OpAtomicUMin:
  12628. {
  12629. const char *op = check_atomic_image(ops[2]) ? "imageAtomicMin" : "atomicMin";
  12630. emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
  12631. break;
  12632. }
  12633. case OpAtomicSMax:
  12634. case OpAtomicUMax:
  12635. {
  12636. const char *op = check_atomic_image(ops[2]) ? "imageAtomicMax" : "atomicMax";
  12637. emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
  12638. break;
  12639. }
  12640. case OpAtomicAnd:
  12641. {
  12642. const char *op = check_atomic_image(ops[2]) ? "imageAtomicAnd" : "atomicAnd";
  12643. emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
  12644. break;
  12645. }
  12646. case OpAtomicOr:
  12647. {
  12648. const char *op = check_atomic_image(ops[2]) ? "imageAtomicOr" : "atomicOr";
  12649. emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
  12650. break;
  12651. }
  12652. case OpAtomicXor:
  12653. {
  12654. const char *op = check_atomic_image(ops[2]) ? "imageAtomicXor" : "atomicXor";
  12655. emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
  12656. break;
  12657. }
  12658. // Geometry shaders
  12659. case OpEmitVertex:
  12660. statement("EmitVertex();");
  12661. break;
  12662. case OpEndPrimitive:
  12663. statement("EndPrimitive();");
  12664. break;
  12665. case OpEmitStreamVertex:
  12666. {
  12667. if (options.es)
  12668. SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES.");
  12669. else if (!options.es && options.version < 400)
  12670. SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400.");
  12671. auto stream_expr = to_expression(ops[0]);
  12672. if (expression_type(ops[0]).basetype != SPIRType::Int)
  12673. stream_expr = join("int(", stream_expr, ")");
  12674. statement("EmitStreamVertex(", stream_expr, ");");
  12675. break;
  12676. }
  12677. case OpEndStreamPrimitive:
  12678. {
  12679. if (options.es)
  12680. SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES.");
  12681. else if (!options.es && options.version < 400)
  12682. SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400.");
  12683. auto stream_expr = to_expression(ops[0]);
  12684. if (expression_type(ops[0]).basetype != SPIRType::Int)
  12685. stream_expr = join("int(", stream_expr, ")");
  12686. statement("EndStreamPrimitive(", stream_expr, ");");
  12687. break;
  12688. }
  12689. // Textures
  12690. case OpImageSampleExplicitLod:
  12691. case OpImageSampleProjExplicitLod:
  12692. case OpImageSampleDrefExplicitLod:
  12693. case OpImageSampleProjDrefExplicitLod:
  12694. case OpImageSampleImplicitLod:
  12695. case OpImageSampleProjImplicitLod:
  12696. case OpImageSampleDrefImplicitLod:
  12697. case OpImageSampleProjDrefImplicitLod:
  12698. case OpImageFetch:
  12699. case OpImageGather:
  12700. case OpImageDrefGather:
  12701. // Gets a bit hairy, so move this to a separate instruction.
  12702. emit_texture_op(instruction, false);
  12703. break;
  12704. case OpImageSparseSampleExplicitLod:
  12705. case OpImageSparseSampleProjExplicitLod:
  12706. case OpImageSparseSampleDrefExplicitLod:
  12707. case OpImageSparseSampleProjDrefExplicitLod:
  12708. case OpImageSparseSampleImplicitLod:
  12709. case OpImageSparseSampleProjImplicitLod:
  12710. case OpImageSparseSampleDrefImplicitLod:
  12711. case OpImageSparseSampleProjDrefImplicitLod:
  12712. case OpImageSparseFetch:
  12713. case OpImageSparseGather:
  12714. case OpImageSparseDrefGather:
  12715. // Gets a bit hairy, so move this to a separate instruction.
  12716. emit_texture_op(instruction, true);
  12717. break;
  12718. case OpImageSparseTexelsResident:
  12719. if (options.es)
  12720. SPIRV_CROSS_THROW("Sparse feedback is not supported in GLSL.");
  12721. require_extension_internal("GL_ARB_sparse_texture2");
  12722. emit_unary_func_op_cast(ops[0], ops[1], ops[2], "sparseTexelsResidentARB", int_type, SPIRType::Boolean);
  12723. break;
  12724. case OpImage:
  12725. {
  12726. uint32_t result_type = ops[0];
  12727. uint32_t id = ops[1];
  12728. // Suppress usage tracking.
  12729. auto &e = emit_op(result_type, id, to_expression(ops[2]), true, true);
  12730. // When using the image, we need to know which variable it is actually loaded from.
  12731. auto *var = maybe_get_backing_variable(ops[2]);
  12732. e.loaded_from = var ? var->self : ID(0);
  12733. break;
  12734. }
  12735. case OpImageQueryLod:
  12736. {
  12737. const char *op = nullptr;
  12738. if (!options.es && options.version < 400)
  12739. {
  12740. require_extension_internal("GL_ARB_texture_query_lod");
  12741. // For some reason, the ARB spec is all-caps.
  12742. op = "textureQueryLOD";
  12743. }
  12744. else if (options.es)
  12745. {
  12746. if (options.version < 300)
  12747. SPIRV_CROSS_THROW("textureQueryLod not supported in legacy ES");
  12748. require_extension_internal("GL_EXT_texture_query_lod");
  12749. op = "textureQueryLOD";
  12750. }
  12751. else
  12752. op = "textureQueryLod";
  12753. auto sampler_expr = to_expression(ops[2]);
  12754. if (has_decoration(ops[2], DecorationNonUniform))
  12755. {
  12756. if (maybe_get_backing_variable(ops[2]))
  12757. convert_non_uniform_expression(sampler_expr, ops[2]);
  12758. else if (*backend.nonuniform_qualifier != '\0')
  12759. sampler_expr = join(backend.nonuniform_qualifier, "(", sampler_expr, ")");
  12760. }
  12761. bool forward = should_forward(ops[3]);
  12762. emit_op(ops[0], ops[1],
  12763. join(op, "(", sampler_expr, ", ", to_unpacked_expression(ops[3]), ")"),
  12764. forward);
  12765. inherit_expression_dependencies(ops[1], ops[2]);
  12766. inherit_expression_dependencies(ops[1], ops[3]);
  12767. register_control_dependent_expression(ops[1]);
  12768. break;
  12769. }
  12770. case OpImageQueryLevels:
  12771. {
  12772. uint32_t result_type = ops[0];
  12773. uint32_t id = ops[1];
  12774. if (!options.es && options.version < 430)
  12775. require_extension_internal("GL_ARB_texture_query_levels");
  12776. if (options.es)
  12777. SPIRV_CROSS_THROW("textureQueryLevels not supported in ES profile.");
  12778. auto expr = join("textureQueryLevels(", convert_separate_image_to_expression(ops[2]), ")");
  12779. auto &restype = get<SPIRType>(ops[0]);
  12780. expr = bitcast_expression(restype, SPIRType::Int, expr);
  12781. emit_op(result_type, id, expr, true);
  12782. break;
  12783. }
  12784. case OpImageQuerySamples:
  12785. {
  12786. auto &type = expression_type(ops[2]);
  12787. uint32_t result_type = ops[0];
  12788. uint32_t id = ops[1];
  12789. if (options.es)
  12790. SPIRV_CROSS_THROW("textureSamples and imageSamples not supported in ES profile.");
  12791. else if (options.version < 450)
  12792. require_extension_internal("GL_ARB_texture_query_samples");
  12793. string expr;
  12794. if (type.image.sampled == 2)
  12795. expr = join("imageSamples(", to_non_uniform_aware_expression(ops[2]), ")");
  12796. else
  12797. expr = join("textureSamples(", convert_separate_image_to_expression(ops[2]), ")");
  12798. auto &restype = get<SPIRType>(ops[0]);
  12799. expr = bitcast_expression(restype, SPIRType::Int, expr);
  12800. emit_op(result_type, id, expr, true);
  12801. break;
  12802. }
  12803. case OpSampledImage:
  12804. {
  12805. uint32_t result_type = ops[0];
  12806. uint32_t id = ops[1];
  12807. emit_sampled_image_op(result_type, id, ops[2], ops[3]);
  12808. inherit_expression_dependencies(id, ops[2]);
  12809. inherit_expression_dependencies(id, ops[3]);
  12810. break;
  12811. }
  12812. case OpImageQuerySizeLod:
  12813. {
  12814. uint32_t result_type = ops[0];
  12815. uint32_t id = ops[1];
  12816. uint32_t img = ops[2];
  12817. auto &type = expression_type(img);
  12818. auto &imgtype = get<SPIRType>(type.self);
  12819. std::string fname = "textureSize";
  12820. if (is_legacy_desktop())
  12821. {
  12822. fname = legacy_tex_op(fname, imgtype, img);
  12823. }
  12824. else if (is_legacy_es())
  12825. SPIRV_CROSS_THROW("textureSize is not supported in ESSL 100.");
  12826. auto expr = join(fname, "(", convert_separate_image_to_expression(img), ", ",
  12827. bitcast_expression(SPIRType::Int, ops[3]), ")");
  12828. // ES needs to emulate 1D images as 2D.
  12829. if (type.image.dim == Dim1D && options.es)
  12830. expr = join(expr, ".x");
  12831. auto &restype = get<SPIRType>(ops[0]);
  12832. expr = bitcast_expression(restype, SPIRType::Int, expr);
  12833. emit_op(result_type, id, expr, true);
  12834. break;
  12835. }
  12836. // Image load/store
  12837. case OpImageRead:
  12838. case OpImageSparseRead:
  12839. {
  12840. // We added Nonreadable speculatively to the OpImage variable due to glslangValidator
  12841. // not adding the proper qualifiers.
  12842. // If it turns out we need to read the image after all, remove the qualifier and recompile.
  12843. auto *var = maybe_get_backing_variable(ops[2]);
  12844. if (var)
  12845. {
  12846. auto &flags = get_decoration_bitset(var->self);
  12847. if (flags.get(DecorationNonReadable))
  12848. {
  12849. unset_decoration(var->self, DecorationNonReadable);
  12850. force_recompile();
  12851. }
  12852. }
  12853. uint32_t result_type = ops[0];
  12854. uint32_t id = ops[1];
  12855. bool pure;
  12856. string imgexpr;
  12857. auto &type = expression_type(ops[2]);
  12858. if (var && var->remapped_variable) // Remapped input, just read as-is without any op-code
  12859. {
  12860. if (type.image.ms)
  12861. SPIRV_CROSS_THROW("Trying to remap multisampled image to variable, this is not possible.");
  12862. auto itr =
  12863. find_if(begin(pls_inputs), end(pls_inputs), [var](const PlsRemap &pls) { return pls.id == var->self; });
  12864. if (itr == end(pls_inputs))
  12865. {
  12866. // For non-PLS inputs, we rely on subpass type remapping information to get it right
  12867. // since ImageRead always returns 4-component vectors and the backing type is opaque.
  12868. if (!var->remapped_components)
  12869. SPIRV_CROSS_THROW("subpassInput was remapped, but remap_components is not set correctly.");
  12870. imgexpr = remap_swizzle(get<SPIRType>(result_type), var->remapped_components, to_expression(ops[2]));
  12871. }
  12872. else
  12873. {
  12874. // PLS input could have different number of components than what the SPIR expects, swizzle to
  12875. // the appropriate vector size.
  12876. uint32_t components = pls_format_to_components(itr->format);
  12877. imgexpr = remap_swizzle(get<SPIRType>(result_type), components, to_expression(ops[2]));
  12878. }
  12879. pure = true;
  12880. }
  12881. else if (type.image.dim == DimSubpassData)
  12882. {
  12883. if (var && subpass_input_is_framebuffer_fetch(var->self))
  12884. {
  12885. imgexpr = to_expression(var->self);
  12886. }
  12887. else if (options.vulkan_semantics)
  12888. {
  12889. // With Vulkan semantics, use the proper Vulkan GLSL construct.
  12890. if (type.image.ms)
  12891. {
  12892. uint32_t operands = ops[4];
  12893. if (operands != ImageOperandsSampleMask || length != 6)
  12894. SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
  12895. "operand mask was used.");
  12896. uint32_t samples = ops[5];
  12897. imgexpr = join("subpassLoad(", to_non_uniform_aware_expression(ops[2]), ", ", to_expression(samples), ")");
  12898. }
  12899. else
  12900. imgexpr = join("subpassLoad(", to_non_uniform_aware_expression(ops[2]), ")");
  12901. }
  12902. else
  12903. {
  12904. if (type.image.ms)
  12905. {
  12906. uint32_t operands = ops[4];
  12907. if (operands != ImageOperandsSampleMask || length != 6)
  12908. SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
  12909. "operand mask was used.");
  12910. uint32_t samples = ops[5];
  12911. imgexpr = join("texelFetch(", to_non_uniform_aware_expression(ops[2]), ", ivec2(gl_FragCoord.xy), ",
  12912. to_expression(samples), ")");
  12913. }
  12914. else
  12915. {
  12916. // Implement subpass loads via texture barrier style sampling.
  12917. imgexpr = join("texelFetch(", to_non_uniform_aware_expression(ops[2]), ", ivec2(gl_FragCoord.xy), 0)");
  12918. }
  12919. }
  12920. imgexpr = remap_swizzle(get<SPIRType>(result_type), 4, imgexpr);
  12921. pure = true;
  12922. }
  12923. else
  12924. {
  12925. bool sparse = opcode == OpImageSparseRead;
  12926. uint32_t sparse_code_id = 0;
  12927. uint32_t sparse_texel_id = 0;
  12928. if (sparse)
  12929. emit_sparse_feedback_temporaries(ops[0], ops[1], sparse_code_id, sparse_texel_id);
  12930. // imageLoad only accepts int coords, not uint.
  12931. auto coord_expr = to_expression(ops[3]);
  12932. auto target_coord_type = expression_type(ops[3]);
  12933. target_coord_type.basetype = SPIRType::Int;
  12934. coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr);
  12935. // ES needs to emulate 1D images as 2D.
  12936. if (type.image.dim == Dim1D && options.es)
  12937. coord_expr = join("ivec2(", coord_expr, ", 0)");
  12938. // Plain image load/store.
  12939. if (sparse)
  12940. {
  12941. if (type.image.ms)
  12942. {
  12943. uint32_t operands = ops[4];
  12944. if (operands != ImageOperandsSampleMask || length != 6)
  12945. SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
  12946. "operand mask was used.");
  12947. uint32_t samples = ops[5];
  12948. statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_non_uniform_aware_expression(ops[2]), ", ",
  12949. coord_expr, ", ", to_expression(samples), ", ", to_expression(sparse_texel_id), ");");
  12950. }
  12951. else
  12952. {
  12953. statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_non_uniform_aware_expression(ops[2]), ", ",
  12954. coord_expr, ", ", to_expression(sparse_texel_id), ");");
  12955. }
  12956. imgexpr = join(type_to_glsl(get<SPIRType>(result_type)), "(", to_expression(sparse_code_id), ", ",
  12957. to_expression(sparse_texel_id), ")");
  12958. }
  12959. else
  12960. {
  12961. if (type.image.ms)
  12962. {
  12963. uint32_t operands = ops[4];
  12964. if (operands != ImageOperandsSampleMask || length != 6)
  12965. SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
  12966. "operand mask was used.");
  12967. uint32_t samples = ops[5];
  12968. imgexpr =
  12969. join("imageLoad(", to_non_uniform_aware_expression(ops[2]), ", ", coord_expr, ", ", to_expression(samples), ")");
  12970. }
  12971. else
  12972. imgexpr = join("imageLoad(", to_non_uniform_aware_expression(ops[2]), ", ", coord_expr, ")");
  12973. }
  12974. if (!sparse)
  12975. imgexpr = remap_swizzle(get<SPIRType>(result_type), 4, imgexpr);
  12976. pure = false;
  12977. }
  12978. if (var)
  12979. {
  12980. bool forward = forced_temporaries.find(id) == end(forced_temporaries);
  12981. auto &e = emit_op(result_type, id, imgexpr, forward);
  12982. // We only need to track dependencies if we're reading from image load/store.
  12983. if (!pure)
  12984. {
  12985. e.loaded_from = var->self;
  12986. if (forward)
  12987. var->dependees.push_back(id);
  12988. }
  12989. }
  12990. else
  12991. emit_op(result_type, id, imgexpr, false);
  12992. inherit_expression_dependencies(id, ops[2]);
  12993. if (type.image.ms)
  12994. inherit_expression_dependencies(id, ops[5]);
  12995. break;
  12996. }
  12997. case OpImageTexelPointer:
  12998. {
  12999. uint32_t result_type = ops[0];
  13000. uint32_t id = ops[1];
  13001. auto coord_expr = to_expression(ops[3]);
  13002. auto target_coord_type = expression_type(ops[3]);
  13003. target_coord_type.basetype = SPIRType::Int;
  13004. coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr);
  13005. auto expr = join(to_expression(ops[2]), ", ", coord_expr);
  13006. auto &e = set<SPIRExpression>(id, expr, result_type, true);
  13007. // When using the pointer, we need to know which variable it is actually loaded from.
  13008. auto *var = maybe_get_backing_variable(ops[2]);
  13009. e.loaded_from = var ? var->self : ID(0);
  13010. inherit_expression_dependencies(id, ops[3]);
  13011. break;
  13012. }
  13013. case OpImageWrite:
  13014. {
  13015. // We added Nonwritable speculatively to the OpImage variable due to glslangValidator
  13016. // not adding the proper qualifiers.
  13017. // If it turns out we need to write to the image after all, remove the qualifier and recompile.
  13018. auto *var = maybe_get_backing_variable(ops[0]);
  13019. if (var)
  13020. {
  13021. if (has_decoration(var->self, DecorationNonWritable))
  13022. {
  13023. unset_decoration(var->self, DecorationNonWritable);
  13024. force_recompile();
  13025. }
  13026. }
  13027. auto &type = expression_type(ops[0]);
  13028. auto &value_type = expression_type(ops[2]);
  13029. auto store_type = value_type;
  13030. store_type.vecsize = 4;
  13031. // imageStore only accepts int coords, not uint.
  13032. auto coord_expr = to_expression(ops[1]);
  13033. auto target_coord_type = expression_type(ops[1]);
  13034. target_coord_type.basetype = SPIRType::Int;
  13035. coord_expr = bitcast_expression(target_coord_type, expression_type(ops[1]).basetype, coord_expr);
  13036. // ES needs to emulate 1D images as 2D.
  13037. if (type.image.dim == Dim1D && options.es)
  13038. coord_expr = join("ivec2(", coord_expr, ", 0)");
  13039. if (type.image.ms)
  13040. {
  13041. uint32_t operands = ops[3];
  13042. if (operands != ImageOperandsSampleMask || length != 5)
  13043. SPIRV_CROSS_THROW("Multisampled image used in OpImageWrite, but unexpected operand mask was used.");
  13044. uint32_t samples = ops[4];
  13045. statement("imageStore(", to_non_uniform_aware_expression(ops[0]), ", ", coord_expr, ", ", to_expression(samples), ", ",
  13046. remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");");
  13047. }
  13048. else
  13049. statement("imageStore(", to_non_uniform_aware_expression(ops[0]), ", ", coord_expr, ", ",
  13050. remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");");
  13051. if (var && variable_storage_is_aliased(*var))
  13052. flush_all_aliased_variables();
  13053. break;
  13054. }
  13055. case OpImageQuerySize:
  13056. {
  13057. auto &type = expression_type(ops[2]);
  13058. uint32_t result_type = ops[0];
  13059. uint32_t id = ops[1];
  13060. if (type.basetype == SPIRType::Image)
  13061. {
  13062. string expr;
  13063. if (type.image.sampled == 2)
  13064. {
  13065. if (!options.es && options.version < 430)
  13066. require_extension_internal("GL_ARB_shader_image_size");
  13067. else if (options.es && options.version < 310)
  13068. SPIRV_CROSS_THROW("At least ESSL 3.10 required for imageSize.");
  13069. // The size of an image is always constant.
  13070. expr = join("imageSize(", to_non_uniform_aware_expression(ops[2]), ")");
  13071. }
  13072. else
  13073. {
  13074. // This path is hit for samplerBuffers and multisampled images which do not have LOD.
  13075. std::string fname = "textureSize";
  13076. if (is_legacy())
  13077. {
  13078. auto &imgtype = get<SPIRType>(type.self);
  13079. fname = legacy_tex_op(fname, imgtype, ops[2]);
  13080. }
  13081. expr = join(fname, "(", convert_separate_image_to_expression(ops[2]), ")");
  13082. }
  13083. auto &restype = get<SPIRType>(ops[0]);
  13084. expr = bitcast_expression(restype, SPIRType::Int, expr);
  13085. emit_op(result_type, id, expr, true);
  13086. }
  13087. else
  13088. SPIRV_CROSS_THROW("Invalid type for OpImageQuerySize.");
  13089. break;
  13090. }
  13091. case OpImageSampleWeightedQCOM:
  13092. case OpImageBoxFilterQCOM:
  13093. case OpImageBlockMatchSSDQCOM:
  13094. case OpImageBlockMatchSADQCOM:
  13095. {
  13096. require_extension_internal("GL_QCOM_image_processing");
  13097. uint32_t result_type_id = ops[0];
  13098. uint32_t id = ops[1];
  13099. string expr;
  13100. switch (opcode)
  13101. {
  13102. case OpImageSampleWeightedQCOM:
  13103. expr = "textureWeightedQCOM";
  13104. break;
  13105. case OpImageBoxFilterQCOM:
  13106. expr = "textureBoxFilterQCOM";
  13107. break;
  13108. case OpImageBlockMatchSSDQCOM:
  13109. expr = "textureBlockMatchSSDQCOM";
  13110. break;
  13111. case OpImageBlockMatchSADQCOM:
  13112. expr = "textureBlockMatchSADQCOM";
  13113. break;
  13114. default:
  13115. SPIRV_CROSS_THROW("Invalid opcode for QCOM_image_processing.");
  13116. }
  13117. expr += "(";
  13118. bool forward = false;
  13119. expr += to_expression(ops[2]);
  13120. expr += ", " + to_expression(ops[3]);
  13121. switch (opcode)
  13122. {
  13123. case OpImageSampleWeightedQCOM:
  13124. expr += ", " + to_non_uniform_aware_expression(ops[4]);
  13125. break;
  13126. case OpImageBoxFilterQCOM:
  13127. expr += ", " + to_expression(ops[4]);
  13128. break;
  13129. case OpImageBlockMatchSSDQCOM:
  13130. case OpImageBlockMatchSADQCOM:
  13131. expr += ", " + to_non_uniform_aware_expression(ops[4]);
  13132. expr += ", " + to_expression(ops[5]);
  13133. expr += ", " + to_expression(ops[6]);
  13134. break;
  13135. default:
  13136. SPIRV_CROSS_THROW("Invalid opcode for QCOM_image_processing.");
  13137. }
  13138. expr += ")";
  13139. emit_op(result_type_id, id, expr, forward);
  13140. inherit_expression_dependencies(id, ops[3]);
  13141. if (opcode == OpImageBlockMatchSSDQCOM || opcode == OpImageBlockMatchSADQCOM)
  13142. inherit_expression_dependencies(id, ops[5]);
  13143. break;
  13144. }
  13145. case OpImageBlockMatchWindowSSDQCOM:
  13146. case OpImageBlockMatchWindowSADQCOM:
  13147. case OpImageBlockMatchGatherSSDQCOM:
  13148. case OpImageBlockMatchGatherSADQCOM:
  13149. {
  13150. require_extension_internal("GL_QCOM_image_processing2");
  13151. uint32_t result_type_id = ops[0];
  13152. uint32_t id = ops[1];
  13153. string expr;
  13154. switch (opcode)
  13155. {
  13156. case OpImageBlockMatchWindowSSDQCOM:
  13157. expr = "textureBlockMatchWindowSSDQCOM";
  13158. break;
  13159. case OpImageBlockMatchWindowSADQCOM:
  13160. expr = "textureBlockMatchWindowSADQCOM";
  13161. break;
  13162. case OpImageBlockMatchGatherSSDQCOM:
  13163. expr = "textureBlockMatchGatherSSDQCOM";
  13164. break;
  13165. case OpImageBlockMatchGatherSADQCOM:
  13166. expr = "textureBlockMatchGatherSADQCOM";
  13167. break;
  13168. default:
  13169. SPIRV_CROSS_THROW("Invalid opcode for QCOM_image_processing2.");
  13170. }
  13171. expr += "(";
  13172. bool forward = false;
  13173. expr += to_expression(ops[2]);
  13174. expr += ", " + to_expression(ops[3]);
  13175. expr += ", " + to_non_uniform_aware_expression(ops[4]);
  13176. expr += ", " + to_expression(ops[5]);
  13177. expr += ", " + to_expression(ops[6]);
  13178. expr += ")";
  13179. emit_op(result_type_id, id, expr, forward);
  13180. inherit_expression_dependencies(id, ops[3]);
  13181. inherit_expression_dependencies(id, ops[5]);
  13182. break;
  13183. }
  13184. // Compute
  13185. case OpControlBarrier:
  13186. case OpMemoryBarrier:
  13187. {
  13188. uint32_t execution_scope = 0;
  13189. uint32_t memory;
  13190. uint32_t semantics;
  13191. if (opcode == OpMemoryBarrier)
  13192. {
  13193. memory = evaluate_constant_u32(ops[0]);
  13194. semantics = evaluate_constant_u32(ops[1]);
  13195. }
  13196. else
  13197. {
  13198. execution_scope = evaluate_constant_u32(ops[0]);
  13199. memory = evaluate_constant_u32(ops[1]);
  13200. semantics = evaluate_constant_u32(ops[2]);
  13201. }
  13202. if (execution_scope == ScopeSubgroup || memory == ScopeSubgroup)
  13203. {
  13204. // OpControlBarrier with ScopeSubgroup is subgroupBarrier()
  13205. if (opcode != OpControlBarrier)
  13206. {
  13207. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMemBarrier);
  13208. }
  13209. else
  13210. {
  13211. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBarrier);
  13212. }
  13213. }
  13214. if (execution_scope != ScopeSubgroup && get_entry_point().model == ExecutionModelTessellationControl)
  13215. {
  13216. // Control shaders only have barriers, and it implies memory barriers.
  13217. if (opcode == OpControlBarrier)
  13218. statement("barrier();");
  13219. break;
  13220. }
  13221. // We only care about these flags, acquire/release and friends are not relevant to GLSL.
  13222. semantics = mask_relevant_memory_semantics(semantics);
  13223. if (opcode == OpMemoryBarrier)
  13224. {
  13225. // If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier
  13226. // does what we need, so we avoid redundant barriers.
  13227. const Instruction *next = get_next_instruction_in_block(instruction);
  13228. if (next && next->op == OpControlBarrier)
  13229. {
  13230. auto *next_ops = stream(*next);
  13231. uint32_t next_memory = evaluate_constant_u32(next_ops[1]);
  13232. uint32_t next_semantics = evaluate_constant_u32(next_ops[2]);
  13233. next_semantics = mask_relevant_memory_semantics(next_semantics);
  13234. bool memory_scope_covered = false;
  13235. if (next_memory == memory)
  13236. memory_scope_covered = true;
  13237. else if (next_semantics == MemorySemanticsWorkgroupMemoryMask)
  13238. {
  13239. // If we only care about workgroup memory, either Device or Workgroup scope is fine,
  13240. // scope does not have to match.
  13241. if ((next_memory == ScopeDevice || next_memory == ScopeWorkgroup) &&
  13242. (memory == ScopeDevice || memory == ScopeWorkgroup))
  13243. {
  13244. memory_scope_covered = true;
  13245. }
  13246. }
  13247. else if (memory == ScopeWorkgroup && next_memory == ScopeDevice)
  13248. {
  13249. // The control barrier has device scope, but the memory barrier just has workgroup scope.
  13250. memory_scope_covered = true;
  13251. }
  13252. // If we have the same memory scope, and all memory types are covered, we're good.
  13253. if (memory_scope_covered && (semantics & next_semantics) == semantics)
  13254. break;
  13255. }
  13256. }
  13257. // We are synchronizing some memory or syncing execution,
  13258. // so we cannot forward any loads beyond the memory barrier.
  13259. if (semantics || opcode == OpControlBarrier)
  13260. {
  13261. assert(current_emitting_block);
  13262. flush_control_dependent_expressions(current_emitting_block->self);
  13263. flush_all_active_variables();
  13264. }
  13265. if (memory == ScopeWorkgroup) // Only need to consider memory within a group
  13266. {
  13267. if (semantics == MemorySemanticsWorkgroupMemoryMask)
  13268. {
  13269. // OpControlBarrier implies a memory barrier for shared memory as well.
  13270. bool implies_shared_barrier = opcode == OpControlBarrier && execution_scope == ScopeWorkgroup;
  13271. if (!implies_shared_barrier)
  13272. statement("memoryBarrierShared();");
  13273. }
  13274. else if (semantics != 0)
  13275. statement("groupMemoryBarrier();");
  13276. }
  13277. else if (memory == ScopeSubgroup)
  13278. {
  13279. const uint32_t all_barriers =
  13280. MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask;
  13281. if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
  13282. {
  13283. // These are not relevant for GLSL, but assume it means memoryBarrier().
  13284. // memoryBarrier() does everything, so no need to test anything else.
  13285. statement("subgroupMemoryBarrier();");
  13286. }
  13287. else if ((semantics & all_barriers) == all_barriers)
  13288. {
  13289. // Short-hand instead of emitting 3 barriers.
  13290. statement("subgroupMemoryBarrier();");
  13291. }
  13292. else
  13293. {
  13294. // Pick out individual barriers.
  13295. if (semantics & MemorySemanticsWorkgroupMemoryMask)
  13296. statement("subgroupMemoryBarrierShared();");
  13297. if (semantics & MemorySemanticsUniformMemoryMask)
  13298. statement("subgroupMemoryBarrierBuffer();");
  13299. if (semantics & MemorySemanticsImageMemoryMask)
  13300. statement("subgroupMemoryBarrierImage();");
  13301. }
  13302. }
  13303. else
  13304. {
  13305. const uint32_t all_barriers =
  13306. MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask;
  13307. if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
  13308. {
  13309. // These are not relevant for GLSL, but assume it means memoryBarrier().
  13310. // memoryBarrier() does everything, so no need to test anything else.
  13311. statement("memoryBarrier();");
  13312. }
  13313. else if ((semantics & all_barriers) == all_barriers)
  13314. {
  13315. // Short-hand instead of emitting 4 barriers.
  13316. statement("memoryBarrier();");
  13317. }
  13318. else
  13319. {
  13320. // Pick out individual barriers.
  13321. if (semantics & MemorySemanticsWorkgroupMemoryMask)
  13322. statement("memoryBarrierShared();");
  13323. if (semantics & MemorySemanticsUniformMemoryMask)
  13324. statement("memoryBarrierBuffer();");
  13325. if (semantics & MemorySemanticsImageMemoryMask)
  13326. statement("memoryBarrierImage();");
  13327. }
  13328. }
  13329. if (opcode == OpControlBarrier)
  13330. {
  13331. if (execution_scope == ScopeSubgroup)
  13332. statement("subgroupBarrier();");
  13333. else
  13334. statement("barrier();");
  13335. }
  13336. break;
  13337. }
  13338. case OpExtInstWithForwardRefsKHR:
  13339. {
  13340. uint32_t extension_set = ops[2];
  13341. auto ext = get<SPIRExtension>(extension_set).ext;
  13342. if (ext != SPIRExtension::SPV_debug_info &&
  13343. ext != SPIRExtension::NonSemanticShaderDebugInfo &&
  13344. ext != SPIRExtension::NonSemanticGeneric)
  13345. {
  13346. SPIRV_CROSS_THROW("Unexpected use of ExtInstWithForwardRefsKHR.");
  13347. }
  13348. break;
  13349. }
  13350. case OpExtInst:
  13351. {
  13352. uint32_t extension_set = ops[2];
  13353. auto ext = get<SPIRExtension>(extension_set).ext;
  13354. if (ext == SPIRExtension::GLSL)
  13355. {
  13356. emit_glsl_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
  13357. }
  13358. else if (ext == SPIRExtension::SPV_AMD_shader_ballot)
  13359. {
  13360. emit_spv_amd_shader_ballot_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
  13361. }
  13362. else if (ext == SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter)
  13363. {
  13364. emit_spv_amd_shader_explicit_vertex_parameter_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
  13365. }
  13366. else if (ext == SPIRExtension::SPV_AMD_shader_trinary_minmax)
  13367. {
  13368. emit_spv_amd_shader_trinary_minmax_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
  13369. }
  13370. else if (ext == SPIRExtension::SPV_AMD_gcn_shader)
  13371. {
  13372. emit_spv_amd_gcn_shader_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
  13373. }
  13374. else if (ext == SPIRExtension::NonSemanticShaderDebugInfo)
  13375. {
  13376. emit_non_semantic_shader_debug_info(ops[0], ops[1], ops[3], &ops[4], length - 4);
  13377. }
  13378. else if (ext == SPIRExtension::SPV_debug_info ||
  13379. ext == SPIRExtension::NonSemanticGeneric)
  13380. {
  13381. break; // Ignore SPIR-V debug information extended instructions.
  13382. }
  13383. else if (ext == SPIRExtension::NonSemanticDebugPrintf)
  13384. {
  13385. // Operation 1 is printf.
  13386. if (ops[3] == 1)
  13387. {
  13388. if (!options.vulkan_semantics)
  13389. SPIRV_CROSS_THROW("Debug printf is only supported in Vulkan GLSL.\n");
  13390. require_extension_internal("GL_EXT_debug_printf");
  13391. auto &format_string = get<SPIRString>(ops[4]).str;
  13392. string expr = join(backend.printf_function, "(\"", format_string, "\"");
  13393. for (uint32_t i = 5; i < length; i++)
  13394. {
  13395. expr += ", ";
  13396. expr += to_expression(ops[i]);
  13397. }
  13398. statement(expr, ");");
  13399. }
  13400. }
  13401. else
  13402. {
  13403. statement("// unimplemented ext op ", instruction.op);
  13404. break;
  13405. }
  13406. break;
  13407. }
  13408. // Legacy sub-group stuff ...
  13409. case OpSubgroupBallotKHR:
  13410. {
  13411. uint32_t result_type = ops[0];
  13412. uint32_t id = ops[1];
  13413. string expr;
  13414. expr = join("uvec4(unpackUint2x32(ballotARB(" + to_expression(ops[2]) + ")), 0u, 0u)");
  13415. emit_op(result_type, id, expr, should_forward(ops[2]));
  13416. require_extension_internal("GL_ARB_shader_ballot");
  13417. inherit_expression_dependencies(id, ops[2]);
  13418. register_control_dependent_expression(ops[1]);
  13419. break;
  13420. }
  13421. case OpSubgroupFirstInvocationKHR:
  13422. {
  13423. uint32_t result_type = ops[0];
  13424. uint32_t id = ops[1];
  13425. emit_unary_func_op(result_type, id, ops[2], "readFirstInvocationARB");
  13426. require_extension_internal("GL_ARB_shader_ballot");
  13427. register_control_dependent_expression(ops[1]);
  13428. break;
  13429. }
  13430. case OpSubgroupReadInvocationKHR:
  13431. {
  13432. uint32_t result_type = ops[0];
  13433. uint32_t id = ops[1];
  13434. emit_binary_func_op(result_type, id, ops[2], ops[3], "readInvocationARB");
  13435. require_extension_internal("GL_ARB_shader_ballot");
  13436. register_control_dependent_expression(ops[1]);
  13437. break;
  13438. }
  13439. case OpSubgroupAllKHR:
  13440. {
  13441. uint32_t result_type = ops[0];
  13442. uint32_t id = ops[1];
  13443. emit_unary_func_op(result_type, id, ops[2], "allInvocationsARB");
  13444. require_extension_internal("GL_ARB_shader_group_vote");
  13445. register_control_dependent_expression(ops[1]);
  13446. break;
  13447. }
  13448. case OpSubgroupAnyKHR:
  13449. {
  13450. uint32_t result_type = ops[0];
  13451. uint32_t id = ops[1];
  13452. emit_unary_func_op(result_type, id, ops[2], "anyInvocationARB");
  13453. require_extension_internal("GL_ARB_shader_group_vote");
  13454. register_control_dependent_expression(ops[1]);
  13455. break;
  13456. }
  13457. case OpSubgroupAllEqualKHR:
  13458. {
  13459. uint32_t result_type = ops[0];
  13460. uint32_t id = ops[1];
  13461. emit_unary_func_op(result_type, id, ops[2], "allInvocationsEqualARB");
  13462. require_extension_internal("GL_ARB_shader_group_vote");
  13463. register_control_dependent_expression(ops[1]);
  13464. break;
  13465. }
  13466. case OpGroupIAddNonUniformAMD:
  13467. case OpGroupFAddNonUniformAMD:
  13468. {
  13469. uint32_t result_type = ops[0];
  13470. uint32_t id = ops[1];
  13471. emit_unary_func_op(result_type, id, ops[4], "addInvocationsNonUniformAMD");
  13472. require_extension_internal("GL_AMD_shader_ballot");
  13473. register_control_dependent_expression(ops[1]);
  13474. break;
  13475. }
  13476. case OpGroupFMinNonUniformAMD:
  13477. case OpGroupUMinNonUniformAMD:
  13478. case OpGroupSMinNonUniformAMD:
  13479. {
  13480. uint32_t result_type = ops[0];
  13481. uint32_t id = ops[1];
  13482. emit_unary_func_op(result_type, id, ops[4], "minInvocationsNonUniformAMD");
  13483. require_extension_internal("GL_AMD_shader_ballot");
  13484. register_control_dependent_expression(ops[1]);
  13485. break;
  13486. }
  13487. case OpGroupFMaxNonUniformAMD:
  13488. case OpGroupUMaxNonUniformAMD:
  13489. case OpGroupSMaxNonUniformAMD:
  13490. {
  13491. uint32_t result_type = ops[0];
  13492. uint32_t id = ops[1];
  13493. emit_unary_func_op(result_type, id, ops[4], "maxInvocationsNonUniformAMD");
  13494. require_extension_internal("GL_AMD_shader_ballot");
  13495. register_control_dependent_expression(ops[1]);
  13496. break;
  13497. }
  13498. case OpFragmentMaskFetchAMD:
  13499. {
  13500. auto &type = expression_type(ops[2]);
  13501. uint32_t result_type = ops[0];
  13502. uint32_t id = ops[1];
  13503. if (type.image.dim == DimSubpassData)
  13504. {
  13505. emit_unary_func_op(result_type, id, ops[2], "fragmentMaskFetchAMD");
  13506. }
  13507. else
  13508. {
  13509. emit_binary_func_op(result_type, id, ops[2], ops[3], "fragmentMaskFetchAMD");
  13510. }
  13511. require_extension_internal("GL_AMD_shader_fragment_mask");
  13512. break;
  13513. }
  13514. case OpFragmentFetchAMD:
  13515. {
  13516. auto &type = expression_type(ops[2]);
  13517. uint32_t result_type = ops[0];
  13518. uint32_t id = ops[1];
  13519. if (type.image.dim == DimSubpassData)
  13520. {
  13521. emit_binary_func_op(result_type, id, ops[2], ops[4], "fragmentFetchAMD");
  13522. }
  13523. else
  13524. {
  13525. emit_trinary_func_op(result_type, id, ops[2], ops[3], ops[4], "fragmentFetchAMD");
  13526. }
  13527. require_extension_internal("GL_AMD_shader_fragment_mask");
  13528. break;
  13529. }
  13530. // Vulkan 1.1 sub-group stuff ...
  13531. case OpGroupNonUniformElect:
  13532. case OpGroupNonUniformBroadcast:
  13533. case OpGroupNonUniformBroadcastFirst:
  13534. case OpGroupNonUniformBallot:
  13535. case OpGroupNonUniformInverseBallot:
  13536. case OpGroupNonUniformBallotBitExtract:
  13537. case OpGroupNonUniformBallotBitCount:
  13538. case OpGroupNonUniformBallotFindLSB:
  13539. case OpGroupNonUniformBallotFindMSB:
  13540. case OpGroupNonUniformShuffle:
  13541. case OpGroupNonUniformShuffleXor:
  13542. case OpGroupNonUniformShuffleUp:
  13543. case OpGroupNonUniformShuffleDown:
  13544. case OpGroupNonUniformAll:
  13545. case OpGroupNonUniformAny:
  13546. case OpGroupNonUniformAllEqual:
  13547. case OpGroupNonUniformFAdd:
  13548. case OpGroupNonUniformIAdd:
  13549. case OpGroupNonUniformFMul:
  13550. case OpGroupNonUniformIMul:
  13551. case OpGroupNonUniformFMin:
  13552. case OpGroupNonUniformFMax:
  13553. case OpGroupNonUniformSMin:
  13554. case OpGroupNonUniformSMax:
  13555. case OpGroupNonUniformUMin:
  13556. case OpGroupNonUniformUMax:
  13557. case OpGroupNonUniformBitwiseAnd:
  13558. case OpGroupNonUniformBitwiseOr:
  13559. case OpGroupNonUniformBitwiseXor:
  13560. case OpGroupNonUniformLogicalAnd:
  13561. case OpGroupNonUniformLogicalOr:
  13562. case OpGroupNonUniformLogicalXor:
  13563. case OpGroupNonUniformQuadSwap:
  13564. case OpGroupNonUniformQuadBroadcast:
  13565. case OpGroupNonUniformQuadAllKHR:
  13566. case OpGroupNonUniformQuadAnyKHR:
  13567. case OpGroupNonUniformRotateKHR:
  13568. emit_subgroup_op(instruction);
  13569. break;
  13570. case OpFUnordEqual:
  13571. case OpFUnordLessThan:
  13572. case OpFUnordGreaterThan:
  13573. case OpFUnordLessThanEqual:
  13574. case OpFUnordGreaterThanEqual:
  13575. {
  13576. // GLSL doesn't specify if floating point comparisons are ordered or unordered,
  13577. // but glslang always emits ordered floating point compares for GLSL.
  13578. // To get unordered compares, we can test the opposite thing and invert the result.
  13579. // This way, we force true when there is any NaN present.
  13580. uint32_t op0 = ops[2];
  13581. uint32_t op1 = ops[3];
  13582. string expr;
  13583. if (expression_type(op0).vecsize > 1)
  13584. {
  13585. const char *comp_op = nullptr;
  13586. switch (opcode)
  13587. {
  13588. case OpFUnordEqual:
  13589. comp_op = "notEqual";
  13590. break;
  13591. case OpFUnordLessThan:
  13592. comp_op = "greaterThanEqual";
  13593. break;
  13594. case OpFUnordLessThanEqual:
  13595. comp_op = "greaterThan";
  13596. break;
  13597. case OpFUnordGreaterThan:
  13598. comp_op = "lessThanEqual";
  13599. break;
  13600. case OpFUnordGreaterThanEqual:
  13601. comp_op = "lessThan";
  13602. break;
  13603. default:
  13604. assert(0);
  13605. break;
  13606. }
  13607. expr = join("not(", comp_op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), "))");
  13608. }
  13609. else
  13610. {
  13611. const char *comp_op = nullptr;
  13612. switch (opcode)
  13613. {
  13614. case OpFUnordEqual:
  13615. comp_op = " != ";
  13616. break;
  13617. case OpFUnordLessThan:
  13618. comp_op = " >= ";
  13619. break;
  13620. case OpFUnordLessThanEqual:
  13621. comp_op = " > ";
  13622. break;
  13623. case OpFUnordGreaterThan:
  13624. comp_op = " <= ";
  13625. break;
  13626. case OpFUnordGreaterThanEqual:
  13627. comp_op = " < ";
  13628. break;
  13629. default:
  13630. assert(0);
  13631. break;
  13632. }
  13633. expr = join("!(", to_enclosed_unpacked_expression(op0), comp_op, to_enclosed_unpacked_expression(op1), ")");
  13634. }
  13635. emit_op(ops[0], ops[1], expr, should_forward(op0) && should_forward(op1));
  13636. inherit_expression_dependencies(ops[1], op0);
  13637. inherit_expression_dependencies(ops[1], op1);
  13638. break;
  13639. }
  13640. case OpReportIntersectionKHR:
  13641. // NV is same opcode.
  13642. forced_temporaries.insert(ops[1]);
  13643. if (ray_tracing_is_khr)
  13644. GLSL_BFOP(reportIntersectionEXT);
  13645. else
  13646. GLSL_BFOP(reportIntersectionNV);
  13647. flush_control_dependent_expressions(current_emitting_block->self);
  13648. break;
  13649. case OpIgnoreIntersectionNV:
  13650. // KHR variant is a terminator.
  13651. statement("ignoreIntersectionNV();");
  13652. flush_control_dependent_expressions(current_emitting_block->self);
  13653. break;
  13654. case OpTerminateRayNV:
  13655. // KHR variant is a terminator.
  13656. statement("terminateRayNV();");
  13657. flush_control_dependent_expressions(current_emitting_block->self);
  13658. break;
  13659. case OpTraceNV:
  13660. statement("traceNV(", to_non_uniform_aware_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ",
  13661. to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ",
  13662. to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ",
  13663. to_expression(ops[9]), ", ", to_expression(ops[10]), ");");
  13664. flush_control_dependent_expressions(current_emitting_block->self);
  13665. break;
  13666. case OpTraceRayKHR:
  13667. if (!has_decoration(ops[10], DecorationLocation))
  13668. SPIRV_CROSS_THROW("A memory declaration object must be used in TraceRayKHR.");
  13669. statement("traceRayEXT(", to_non_uniform_aware_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ",
  13670. to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ",
  13671. to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ",
  13672. to_expression(ops[9]), ", ", get_decoration(ops[10], DecorationLocation), ");");
  13673. flush_control_dependent_expressions(current_emitting_block->self);
  13674. break;
  13675. case OpExecuteCallableNV:
  13676. statement("executeCallableNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");");
  13677. flush_control_dependent_expressions(current_emitting_block->self);
  13678. break;
  13679. case OpExecuteCallableKHR:
  13680. if (!has_decoration(ops[1], DecorationLocation))
  13681. SPIRV_CROSS_THROW("A memory declaration object must be used in ExecuteCallableKHR.");
  13682. statement("executeCallableEXT(", to_expression(ops[0]), ", ", get_decoration(ops[1], DecorationLocation), ");");
  13683. flush_control_dependent_expressions(current_emitting_block->self);
  13684. break;
  13685. // Don't bother forwarding temporaries. Avoids having to test expression invalidation with ray query objects.
  13686. case OpRayQueryInitializeKHR:
  13687. flush_variable_declaration(ops[0]);
  13688. statement("rayQueryInitializeEXT(",
  13689. to_expression(ops[0]), ", ", to_expression(ops[1]), ", ",
  13690. to_expression(ops[2]), ", ", to_expression(ops[3]), ", ",
  13691. to_expression(ops[4]), ", ", to_expression(ops[5]), ", ",
  13692. to_expression(ops[6]), ", ", to_expression(ops[7]), ");");
  13693. break;
  13694. case OpRayQueryProceedKHR:
  13695. flush_variable_declaration(ops[0]);
  13696. emit_op(ops[0], ops[1], join("rayQueryProceedEXT(", to_expression(ops[2]), ")"), false);
  13697. break;
  13698. case OpRayQueryTerminateKHR:
  13699. flush_variable_declaration(ops[0]);
  13700. statement("rayQueryTerminateEXT(", to_expression(ops[0]), ");");
  13701. break;
  13702. case OpRayQueryGenerateIntersectionKHR:
  13703. flush_variable_declaration(ops[0]);
  13704. statement("rayQueryGenerateIntersectionEXT(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");");
  13705. break;
  13706. case OpRayQueryConfirmIntersectionKHR:
  13707. flush_variable_declaration(ops[0]);
  13708. statement("rayQueryConfirmIntersectionEXT(", to_expression(ops[0]), ");");
  13709. break;
  13710. case OpRayQueryGetIntersectionTriangleVertexPositionsKHR:
  13711. flush_variable_declaration(ops[1]);
  13712. emit_uninitialized_temporary_expression(ops[0], ops[1]);
  13713. statement("rayQueryGetIntersectionTriangleVertexPositionsEXT(", to_expression(ops[2]), ", bool(", to_expression(ops[3]), "), ", to_expression(ops[1]), ");");
  13714. break;
  13715. #define GLSL_RAY_QUERY_GET_OP(op) \
  13716. case OpRayQueryGet##op##KHR: \
  13717. flush_variable_declaration(ops[2]); \
  13718. emit_op(ops[0], ops[1], join("rayQueryGet" #op "EXT(", to_expression(ops[2]), ")"), false); \
  13719. break
  13720. #define GLSL_RAY_QUERY_GET_OP2(op) \
  13721. case OpRayQueryGet##op##KHR: \
  13722. flush_variable_declaration(ops[2]); \
  13723. emit_op(ops[0], ops[1], join("rayQueryGet" #op "EXT(", to_expression(ops[2]), ", ", "bool(", to_expression(ops[3]), "))"), false); \
  13724. break
  13725. GLSL_RAY_QUERY_GET_OP(RayTMin);
  13726. GLSL_RAY_QUERY_GET_OP(RayFlags);
  13727. GLSL_RAY_QUERY_GET_OP(WorldRayOrigin);
  13728. GLSL_RAY_QUERY_GET_OP(WorldRayDirection);
  13729. GLSL_RAY_QUERY_GET_OP(IntersectionCandidateAABBOpaque);
  13730. GLSL_RAY_QUERY_GET_OP2(IntersectionType);
  13731. GLSL_RAY_QUERY_GET_OP2(IntersectionT);
  13732. GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceCustomIndex);
  13733. GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceId);
  13734. GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceShaderBindingTableRecordOffset);
  13735. GLSL_RAY_QUERY_GET_OP2(IntersectionGeometryIndex);
  13736. GLSL_RAY_QUERY_GET_OP2(IntersectionPrimitiveIndex);
  13737. GLSL_RAY_QUERY_GET_OP2(IntersectionBarycentrics);
  13738. GLSL_RAY_QUERY_GET_OP2(IntersectionFrontFace);
  13739. GLSL_RAY_QUERY_GET_OP2(IntersectionObjectRayDirection);
  13740. GLSL_RAY_QUERY_GET_OP2(IntersectionObjectRayOrigin);
  13741. GLSL_RAY_QUERY_GET_OP2(IntersectionObjectToWorld);
  13742. GLSL_RAY_QUERY_GET_OP2(IntersectionWorldToObject);
  13743. #undef GLSL_RAY_QUERY_GET_OP
  13744. #undef GLSL_RAY_QUERY_GET_OP2
  13745. case OpRayQueryGetClusterIdNV:
  13746. flush_variable_declaration(ops[2]);
  13747. emit_op(ops[0], ops[1], join("rayQueryGetIntersectionClusterIdNV(", to_expression(ops[2]), ", ", "bool(", to_expression(ops[3]), "))"), false);
  13748. break;
  13749. case OpTensorQuerySizeARM:
  13750. flush_variable_declaration(ops[1]);
  13751. // tensorSizeARM(tensor, dimension)
  13752. emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], "tensorSizeARM");
  13753. break;
  13754. case OpTensorReadARM:
  13755. {
  13756. flush_variable_declaration(ops[1]);
  13757. emit_uninitialized_temporary_expression(ops[0], ops[1]);
  13758. SmallVector<std::string> args {
  13759. to_expression(ops[2]), // tensor
  13760. to_expression(ops[3]), // coordinates
  13761. to_expression(ops[1]), // out value
  13762. };
  13763. if (length > 4)
  13764. {
  13765. std::string tensor_operands;
  13766. if (ops[4] == 0)
  13767. tensor_operands = "0x0u";
  13768. else if (ops[4] == TensorOperandsNontemporalARMMask)
  13769. tensor_operands = "gl_TensorOperandsNonTemporalARM";
  13770. else if (ops[4] == TensorOperandsOutOfBoundsValueARMMask)
  13771. tensor_operands = "gl_TensorOperandsOutOfBoundsValueARM";
  13772. else if (ops[4] == (TensorOperandsNontemporalARMMask | TensorOperandsOutOfBoundsValueARMMask))
  13773. tensor_operands = "gl_TensorOperandsNonTemporalARM | gl_TensorOperandsOutOfBoundsValueARM";
  13774. else
  13775. SPIRV_CROSS_THROW("Invalid tensorOperands for tensorReadARM.");
  13776. if ((ops[4] & TensorOperandsOutOfBoundsValueARMMask) && length != 6)
  13777. SPIRV_CROSS_THROW("gl_TensorOperandsOutOfBoundsValueARM requires an outOfBoundsValue argument.");
  13778. args.push_back(tensor_operands); // tensorOperands
  13779. }
  13780. if (length >= 6)
  13781. {
  13782. if ((length > 6) || (ops[4] & TensorOperandsOutOfBoundsValueARMMask) == 0)
  13783. SPIRV_CROSS_THROW("Too many arguments to tensorReadARM.");
  13784. args.push_back(to_expression(ops[5])); // outOfBoundsValue
  13785. }
  13786. // tensorRead(tensor, sizeof(type), coordinates, value, operand, ...)
  13787. statement("tensorReadARM(", merge(args), ");");
  13788. break;
  13789. }
  13790. case OpTensorWriteARM:
  13791. {
  13792. flush_variable_declaration(ops[0]);
  13793. SmallVector<std::string> args {
  13794. to_expression(ops[0]), // tensor
  13795. to_expression(ops[1]), // coordinates
  13796. to_expression(ops[2]), // out value
  13797. };
  13798. if (length > 3)
  13799. {
  13800. std::string tensor_operands;
  13801. if (ops[3] == 0)
  13802. tensor_operands = "0x0u";
  13803. else if (ops[3] == TensorOperandsNontemporalARMMask)
  13804. tensor_operands = "gl_TensorOperandsNonTemporalARM";
  13805. else
  13806. SPIRV_CROSS_THROW("Invalid tensorOperands for tensorWriteARM.");
  13807. args.push_back(tensor_operands); // tensorOperands
  13808. }
  13809. if (length > 4)
  13810. SPIRV_CROSS_THROW("Too many arguments to tensorWriteARM.");
  13811. // tensorWrite(tensor, sizeof(type), coordinates, value)
  13812. statement("tensorWriteARM(", merge(args), ");");
  13813. break;
  13814. }
  13815. case OpConvertUToAccelerationStructureKHR:
  13816. {
  13817. require_extension_internal("GL_EXT_ray_tracing");
  13818. bool elide_temporary = should_forward(ops[2]) && forced_temporaries.count(ops[1]) == 0 &&
  13819. !hoisted_temporaries.count(ops[1]);
  13820. if (elide_temporary)
  13821. {
  13822. GLSL_UFOP(accelerationStructureEXT);
  13823. }
  13824. else
  13825. {
  13826. // Force this path in subsequent iterations.
  13827. forced_temporaries.insert(ops[1]);
  13828. // We cannot declare a temporary acceleration structure in GLSL.
  13829. // If we get to this point, we'll have to emit a temporary uvec2,
  13830. // and cast to RTAS on demand.
  13831. statement(declare_temporary(expression_type_id(ops[2]), ops[1]), to_unpacked_expression(ops[2]), ";");
  13832. // Use raw SPIRExpression interface to block all usage tracking.
  13833. set<SPIRExpression>(ops[1], join("accelerationStructureEXT(", to_name(ops[1]), ")"), ops[0], true);
  13834. }
  13835. break;
  13836. }
  13837. case OpConvertUToPtr:
  13838. {
  13839. auto &type = get<SPIRType>(ops[0]);
  13840. if (type.storage != StorageClassPhysicalStorageBuffer)
  13841. SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBuffer is supported by OpConvertUToPtr.");
  13842. auto &in_type = expression_type(ops[2]);
  13843. if (in_type.vecsize == 2)
  13844. require_extension_internal("GL_EXT_buffer_reference_uvec2");
  13845. auto op = type_to_glsl(type);
  13846. emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str());
  13847. break;
  13848. }
  13849. case OpConvertPtrToU:
  13850. {
  13851. auto &type = get<SPIRType>(ops[0]);
  13852. auto &ptr_type = expression_type(ops[2]);
  13853. if (ptr_type.storage != StorageClassPhysicalStorageBuffer)
  13854. SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBuffer is supported by OpConvertPtrToU.");
  13855. if (type.vecsize == 2)
  13856. require_extension_internal("GL_EXT_buffer_reference_uvec2");
  13857. auto op = type_to_glsl(type);
  13858. emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str());
  13859. break;
  13860. }
  13861. case OpUndef:
  13862. // Undefined value has been declared.
  13863. break;
  13864. case OpLine:
  13865. {
  13866. emit_line_directive(ops[0], ops[1]);
  13867. break;
  13868. }
  13869. case OpNoLine:
  13870. break;
  13871. case OpDemoteToHelperInvocationEXT:
  13872. if (!options.vulkan_semantics)
  13873. SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL.");
  13874. require_extension_internal("GL_EXT_demote_to_helper_invocation");
  13875. statement(backend.demote_literal, ";");
  13876. break;
  13877. case OpIsHelperInvocationEXT:
  13878. if (!options.vulkan_semantics)
  13879. SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL.");
  13880. require_extension_internal("GL_EXT_demote_to_helper_invocation");
  13881. // Helper lane state with demote is volatile by nature.
  13882. // Do not forward this.
  13883. emit_op(ops[0], ops[1], "helperInvocationEXT()", false);
  13884. break;
  13885. case OpBeginInvocationInterlockEXT:
  13886. // If the interlock is complex, we emit this elsewhere.
  13887. if (!interlocked_is_complex)
  13888. {
  13889. statement("SPIRV_Cross_beginInvocationInterlock();");
  13890. flush_all_active_variables();
  13891. // Make sure forwarding doesn't propagate outside interlock region.
  13892. }
  13893. break;
  13894. case OpEndInvocationInterlockEXT:
  13895. // If the interlock is complex, we emit this elsewhere.
  13896. if (!interlocked_is_complex)
  13897. {
  13898. statement("SPIRV_Cross_endInvocationInterlock();");
  13899. flush_all_active_variables();
  13900. // Make sure forwarding doesn't propagate outside interlock region.
  13901. }
  13902. break;
  13903. case OpSetMeshOutputsEXT:
  13904. statement("SetMeshOutputsEXT(", to_unpacked_expression(ops[0]), ", ", to_unpacked_expression(ops[1]), ");");
  13905. break;
  13906. case OpReadClockKHR:
  13907. {
  13908. auto &type = get<SPIRType>(ops[0]);
  13909. auto scope = static_cast<Scope>(evaluate_constant_u32(ops[2]));
  13910. const char *op = nullptr;
  13911. // Forwarding clock statements leads to a scenario where an SSA value can take on different
  13912. // values every time it's evaluated. Block any forwarding attempt.
  13913. // We also might want to invalidate all expressions to function as a sort of optimization
  13914. // barrier, but might be overkill for now.
  13915. if (scope == ScopeDevice)
  13916. {
  13917. require_extension_internal("GL_EXT_shader_realtime_clock");
  13918. if (type.basetype == SPIRType::BaseType::UInt64)
  13919. op = "clockRealtimeEXT()";
  13920. else if (type.basetype == SPIRType::BaseType::UInt && type.vecsize == 2)
  13921. op = "clockRealtime2x32EXT()";
  13922. else
  13923. SPIRV_CROSS_THROW("Unsupported result type for OpReadClockKHR opcode.");
  13924. }
  13925. else if (scope == ScopeSubgroup)
  13926. {
  13927. require_extension_internal("GL_ARB_shader_clock");
  13928. if (type.basetype == SPIRType::BaseType::UInt64)
  13929. op = "clockARB()";
  13930. else if (type.basetype == SPIRType::BaseType::UInt && type.vecsize == 2)
  13931. op = "clock2x32ARB()";
  13932. else
  13933. SPIRV_CROSS_THROW("Unsupported result type for OpReadClockKHR opcode.");
  13934. }
  13935. else
  13936. SPIRV_CROSS_THROW("Unsupported scope for OpReadClockKHR opcode.");
  13937. emit_op(ops[0], ops[1], op, false);
  13938. break;
  13939. }
  13940. case OpCooperativeVectorLoadNV:
  13941. {
  13942. uint32_t result_type = ops[0];
  13943. uint32_t id = ops[1];
  13944. emit_uninitialized_temporary_expression(result_type, id);
  13945. statement("coopVecLoadNV(", to_expression(id), ", ", to_expression(ops[2]), ", ", to_expression(ops[3]), ");");
  13946. register_read(id, ops[2], false);
  13947. break;
  13948. }
  13949. case OpCooperativeVectorStoreNV:
  13950. {
  13951. uint32_t id = ops[0];
  13952. statement("coopVecStoreNV(", to_expression(ops[2]), ", ", to_expression(id), ", ", to_expression(ops[1]), ");");
  13953. register_write(ops[2]);
  13954. break;
  13955. }
  13956. case OpCooperativeVectorOuterProductAccumulateNV:
  13957. {
  13958. auto buf = ops[0];
  13959. auto offset = ops[1];
  13960. auto v1 = ops[2];
  13961. auto v2 = ops[3];
  13962. auto matrix_layout_id = ops[4];
  13963. auto matrix_iterpretation_id = ops[5];
  13964. auto matrix_stride_id = length >= 6 ? ops[6] : 0;
  13965. statement(join("coopVecOuterProductAccumulateNV(", to_expression(v1), ", ", to_expression(v2), ", ",
  13966. to_expression(buf), ", ", to_expression(offset), ", ",
  13967. matrix_stride_id ? to_expression(matrix_stride_id) : "0",
  13968. ", ", to_pretty_expression_if_int_constant(
  13969. matrix_layout_id, std::begin(CoopVecMatrixLayoutNames), std::end(CoopVecMatrixLayoutNames)),
  13970. ", ", to_pretty_expression_if_int_constant(
  13971. matrix_iterpretation_id, std::begin(CoopVecComponentTypeNames), std::end(CoopVecComponentTypeNames)),
  13972. ");"));
  13973. register_write(ops[0]);
  13974. break;
  13975. }
  13976. case OpCooperativeVectorReduceSumAccumulateNV:
  13977. {
  13978. auto buf = ops[0];
  13979. auto offset = ops[1];
  13980. auto v1 = ops[2];
  13981. statement(join("coopVecReduceSumAccumulateNV(", to_expression(v1), ", ", to_expression(buf), ", ",
  13982. to_expression(offset), ");"));
  13983. register_write(ops[0]);
  13984. break;
  13985. }
  13986. case OpCooperativeVectorMatrixMulNV:
  13987. case OpCooperativeVectorMatrixMulAddNV:
  13988. {
  13989. uint32_t result_type = ops[0];
  13990. uint32_t id = ops[1];
  13991. emit_uninitialized_temporary_expression(result_type, id);
  13992. std::string stmt;
  13993. switch (opcode)
  13994. {
  13995. case OpCooperativeVectorMatrixMulAddNV:
  13996. stmt += "coopVecMatMulAddNV(";
  13997. break;
  13998. case OpCooperativeVectorMatrixMulNV:
  13999. stmt += "coopVecMatMulNV(";
  14000. break;
  14001. default:
  14002. SPIRV_CROSS_THROW("Invalid op code for coopvec instruction.");
  14003. }
  14004. for (uint32_t i = 1; i < length; i++)
  14005. {
  14006. // arguments 3, 6 and in case of MulAddNv also 9 use component type int constants
  14007. if (i == 3 || i == 6 || (i == 9 && opcode == OpCooperativeVectorMatrixMulAddNV))
  14008. {
  14009. stmt += to_pretty_expression_if_int_constant(
  14010. ops[i], std::begin(CoopVecComponentTypeNames), std::end(CoopVecComponentTypeNames));
  14011. }
  14012. else if ((i == 12 && opcode == OpCooperativeVectorMatrixMulAddNV) ||
  14013. (i == 9 && opcode == OpCooperativeVectorMatrixMulNV))
  14014. {
  14015. stmt += to_pretty_expression_if_int_constant(
  14016. ops[i], std::begin(CoopVecMatrixLayoutNames), std::end(CoopVecMatrixLayoutNames));
  14017. }
  14018. else
  14019. stmt += to_expression(ops[i]);
  14020. if (i < length - 1)
  14021. stmt += ", ";
  14022. }
  14023. stmt += ");";
  14024. statement(stmt);
  14025. break;
  14026. }
  14027. case OpCooperativeMatrixLengthKHR:
  14028. {
  14029. // Need to synthesize a dummy temporary, since the SPIR-V opcode is based on the type.
  14030. uint32_t result_type = ops[0];
  14031. uint32_t id = ops[1];
  14032. set<SPIRExpression>(
  14033. id, join(type_to_glsl(get<SPIRType>(result_type)),
  14034. "(", type_to_glsl(get<SPIRType>(ops[2])), "(0).length())"),
  14035. result_type, true);
  14036. break;
  14037. }
  14038. case OpCooperativeMatrixLoadKHR:
  14039. {
  14040. // Spec contradicts itself if stride is optional or not.
  14041. if (length < 5)
  14042. SPIRV_CROSS_THROW("Stride is not provided.");
  14043. uint32_t result_type = ops[0];
  14044. uint32_t id = ops[1];
  14045. emit_uninitialized_temporary_expression(result_type, id);
  14046. auto expr = to_expression(ops[2]);
  14047. pair<string, string> split_expr;
  14048. if (!is_forcing_recompilation())
  14049. split_expr = split_coopmat_pointer(expr);
  14050. string layout_expr = to_pretty_expression_if_int_constant(
  14051. ops[3], std::begin(CoopMatMatrixLayoutNames), std::end(CoopMatMatrixLayoutNames));
  14052. statement("coopMatLoad(", to_expression(id), ", ", split_expr.first, ", ", split_expr.second, ", ",
  14053. to_expression(ops[4]), ", ", layout_expr, ");");
  14054. register_read(id, ops[2], false);
  14055. break;
  14056. }
  14057. case OpCooperativeMatrixStoreKHR:
  14058. {
  14059. // Spec contradicts itself if stride is optional or not.
  14060. if (length < 4)
  14061. SPIRV_CROSS_THROW("Stride is not provided.");
  14062. // SPIR-V and GLSL don't agree how to pass the expression.
  14063. // In SPIR-V it's a pointer, but in GLSL it's reference to array + index.
  14064. auto expr = to_expression(ops[0]);
  14065. pair<string, string> split_expr;
  14066. if (!is_forcing_recompilation())
  14067. split_expr = split_coopmat_pointer(expr);
  14068. string layout_expr = to_pretty_expression_if_int_constant(
  14069. ops[2], std::begin(CoopMatMatrixLayoutNames), std::end(CoopMatMatrixLayoutNames));
  14070. statement("coopMatStore(", to_expression(ops[1]), ", ", split_expr.first, ", ", split_expr.second, ", ",
  14071. to_expression(ops[3]), ", ", layout_expr, ");");
  14072. // TODO: Do we care about memory operands?
  14073. register_write(ops[0]);
  14074. break;
  14075. }
  14076. case OpCooperativeMatrixMulAddKHR:
  14077. {
  14078. uint32_t result_type = ops[0];
  14079. uint32_t id = ops[1];
  14080. uint32_t A = ops[2];
  14081. uint32_t B = ops[3];
  14082. uint32_t C = ops[4];
  14083. bool forward = should_forward(A) && should_forward(B) && should_forward(C);
  14084. emit_op(result_type, id,
  14085. join("coopMatMulAdd(",
  14086. to_unpacked_expression(A), ", ",
  14087. to_unpacked_expression(B), ", ",
  14088. to_unpacked_expression(C), ", ",
  14089. (length >= 6 ? ops[5] : 0),
  14090. ")"),
  14091. forward);
  14092. inherit_expression_dependencies(id, A);
  14093. inherit_expression_dependencies(id, B);
  14094. inherit_expression_dependencies(id, C);
  14095. break;
  14096. }
  14097. case OpCompositeConstructReplicateEXT:
  14098. {
  14099. uint32_t result_type = ops[0];
  14100. uint32_t id = ops[1];
  14101. auto &type = get<SPIRType>(result_type);
  14102. auto value_to_replicate = to_expression(ops[2]);
  14103. std::string rhs;
  14104. // Matrices don't have a replicating constructor for vectors. Need to manually replicate
  14105. if (type.op == OpTypeMatrix || type.op == OpTypeArray)
  14106. {
  14107. if (type.op == OpTypeArray && type.array.size() != 1)
  14108. {
  14109. SPIRV_CROSS_THROW(
  14110. "Multi-dimensional arrays currently not supported for OpCompositeConstructReplicateEXT");
  14111. }
  14112. uint32_t num_elements = type.op == OpTypeMatrix ? type.columns : type.array[0];
  14113. if (backend.use_initializer_list && type.op == OpTypeArray)
  14114. {
  14115. rhs += "{";
  14116. }
  14117. else
  14118. {
  14119. rhs += type_to_glsl_constructor(type);
  14120. rhs += "(";
  14121. }
  14122. for (uint32_t i = 0; i < num_elements; i++)
  14123. {
  14124. rhs += value_to_replicate;
  14125. if (i < num_elements - 1)
  14126. rhs += ", ";
  14127. }
  14128. if (backend.use_initializer_list && type.op == OpTypeArray)
  14129. rhs += "}";
  14130. else
  14131. rhs += ")";
  14132. }
  14133. else
  14134. {
  14135. rhs = join(type_to_glsl(type), "(", to_expression(ops[2]), ")");
  14136. }
  14137. emit_op(result_type, id, rhs, true);
  14138. break;
  14139. }
  14140. default:
  14141. statement("// unimplemented op ", instruction.op);
  14142. break;
  14143. }
  14144. }
  14145. // Appends function arguments, mapped from global variables, beyond the specified arg index.
  14146. // This is used when a function call uses fewer arguments than the function defines.
  14147. // This situation may occur if the function signature has been dynamically modified to
  14148. // extract global variables referenced from within the function, and convert them to
  14149. // function arguments. This is necessary for shader languages that do not support global
  14150. // access to shader input content from within a function (eg. Metal). Each additional
  14151. // function args uses the name of the global variable. Function nesting will modify the
  14152. // functions and function calls all the way up the nesting chain.
  14153. void CompilerGLSL::append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector<string> &arglist)
  14154. {
  14155. auto &args = func.arguments;
  14156. uint32_t arg_cnt = uint32_t(args.size());
  14157. for (uint32_t arg_idx = index; arg_idx < arg_cnt; arg_idx++)
  14158. {
  14159. auto &arg = args[arg_idx];
  14160. assert(arg.alias_global_variable);
  14161. // If the underlying variable needs to be declared
  14162. // (ie. a local variable with deferred declaration), do so now.
  14163. uint32_t var_id = get<SPIRVariable>(arg.id).basevariable;
  14164. if (var_id)
  14165. flush_variable_declaration(var_id);
  14166. arglist.push_back(to_func_call_arg(arg, arg.id));
  14167. }
  14168. }
  14169. string CompilerGLSL::to_member_name(const SPIRType &type, uint32_t index)
  14170. {
  14171. if (type.type_alias != TypeID(0) &&
  14172. !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
  14173. {
  14174. return to_member_name(get<SPIRType>(type.type_alias), index);
  14175. }
  14176. auto &memb = ir.meta[type.self].members;
  14177. if (index < memb.size() && !memb[index].alias.empty())
  14178. return memb[index].alias;
  14179. else
  14180. return join("_m", index);
  14181. }
  14182. string CompilerGLSL::to_member_reference(uint32_t, const SPIRType &type, uint32_t index, bool)
  14183. {
  14184. return join(".", to_member_name(type, index));
  14185. }
  14186. string CompilerGLSL::to_multi_member_reference(const SPIRType &type, const SmallVector<uint32_t> &indices)
  14187. {
  14188. string ret;
  14189. auto *member_type = &type;
  14190. for (auto &index : indices)
  14191. {
  14192. ret += join(".", to_member_name(*member_type, index));
  14193. member_type = &get<SPIRType>(member_type->member_types[index]);
  14194. }
  14195. return ret;
  14196. }
  14197. void CompilerGLSL::add_member_name(SPIRType &type, uint32_t index)
  14198. {
  14199. auto &memb = ir.meta[type.self].members;
  14200. if (index < memb.size() && !memb[index].alias.empty())
  14201. {
  14202. auto &name = memb[index].alias;
  14203. if (name.empty())
  14204. return;
  14205. ParsedIR::sanitize_identifier(name, true, true);
  14206. update_name_cache(type.member_name_cache, name);
  14207. }
  14208. }
  14209. // Checks whether the ID is a row_major matrix that requires conversion before use
  14210. bool CompilerGLSL::is_non_native_row_major_matrix(uint32_t id)
  14211. {
  14212. // Natively supported row-major matrices do not need to be converted.
  14213. // Legacy targets do not support row major.
  14214. if (backend.native_row_major_matrix && !is_legacy())
  14215. return false;
  14216. auto *e = maybe_get<SPIRExpression>(id);
  14217. if (e)
  14218. return e->need_transpose;
  14219. else
  14220. return has_decoration(id, DecorationRowMajor);
  14221. }
  14222. // Checks whether the member is a row_major matrix that requires conversion before use
  14223. bool CompilerGLSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index)
  14224. {
  14225. // Natively supported row-major matrices do not need to be converted.
  14226. if (backend.native_row_major_matrix && !is_legacy())
  14227. return false;
  14228. // Non-matrix or column-major matrix types do not need to be converted.
  14229. if (!has_member_decoration(type.self, index, DecorationRowMajor))
  14230. return false;
  14231. // Only square row-major matrices can be converted at this time.
  14232. // Converting non-square matrices will require defining custom GLSL function that
  14233. // swaps matrix elements while retaining the original dimensional form of the matrix.
  14234. const auto mbr_type = get<SPIRType>(type.member_types[index]);
  14235. if (mbr_type.columns != mbr_type.vecsize)
  14236. SPIRV_CROSS_THROW("Row-major matrices must be square on this platform.");
  14237. return true;
  14238. }
  14239. // Checks if we need to remap physical type IDs when declaring the type in a buffer.
  14240. bool CompilerGLSL::member_is_remapped_physical_type(const SPIRType &type, uint32_t index) const
  14241. {
  14242. return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID);
  14243. }
  14244. // Checks whether the member is in packed data type, that might need to be unpacked.
  14245. bool CompilerGLSL::member_is_packed_physical_type(const SPIRType &type, uint32_t index) const
  14246. {
  14247. return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypePacked);
  14248. }
  14249. // Wraps the expression string in a function call that converts the
  14250. // row_major matrix result of the expression to a column_major matrix.
  14251. // Base implementation uses the standard library transpose() function.
  14252. // Subclasses may override to use a different function.
  14253. string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, uint32_t /* physical_type_id */,
  14254. bool /*is_packed*/, bool relaxed)
  14255. {
  14256. strip_enclosed_expression(exp_str);
  14257. if (!is_matrix(exp_type))
  14258. {
  14259. auto column_index = exp_str.find_last_of('[');
  14260. if (column_index == string::npos)
  14261. return exp_str;
  14262. auto column_expr = exp_str.substr(column_index);
  14263. exp_str.resize(column_index);
  14264. auto end_deferred_index = column_expr.find_last_of(']');
  14265. if (end_deferred_index != string::npos && end_deferred_index + 1 != column_expr.size())
  14266. {
  14267. // If we have any data member fixups, it must be transposed so that it refers to this index.
  14268. // E.g. [0].data followed by [1] would be shuffled to [1][0].data which is wrong,
  14269. // and needs to be [1].data[0] instead.
  14270. end_deferred_index++;
  14271. column_expr = column_expr.substr(end_deferred_index) +
  14272. column_expr.substr(0, end_deferred_index);
  14273. }
  14274. auto transposed_expr = type_to_glsl_constructor(exp_type) + "(";
  14275. // Loading a column from a row-major matrix. Unroll the load.
  14276. for (uint32_t c = 0; c < exp_type.vecsize; c++)
  14277. {
  14278. transposed_expr += join(exp_str, '[', c, ']', column_expr);
  14279. if (c + 1 < exp_type.vecsize)
  14280. transposed_expr += ", ";
  14281. }
  14282. transposed_expr += ")";
  14283. return transposed_expr;
  14284. }
  14285. else if (options.version < 120)
  14286. {
  14287. // GLSL 110, ES 100 do not have transpose(), so emulate it. Note that
  14288. // these GLSL versions do not support non-square matrices.
  14289. if (exp_type.vecsize == 2 && exp_type.columns == 2)
  14290. require_polyfill(PolyfillTranspose2x2, relaxed);
  14291. else if (exp_type.vecsize == 3 && exp_type.columns == 3)
  14292. require_polyfill(PolyfillTranspose3x3, relaxed);
  14293. else if (exp_type.vecsize == 4 && exp_type.columns == 4)
  14294. require_polyfill(PolyfillTranspose4x4, relaxed);
  14295. else
  14296. SPIRV_CROSS_THROW("Non-square matrices are not supported in legacy GLSL, cannot transpose.");
  14297. return join("spvTranspose", (options.es && relaxed) ? "MP" : "", "(", exp_str, ")");
  14298. }
  14299. else
  14300. return join("transpose(", exp_str, ")");
  14301. }
  14302. string CompilerGLSL::variable_decl(const SPIRType &type, const string &name, uint32_t id)
  14303. {
  14304. string type_name = type_to_glsl(type, id);
  14305. remap_variable_type_name(type, name, type_name);
  14306. return join(type_name, " ", name, type_to_array_glsl(type, id));
  14307. }
  14308. bool CompilerGLSL::variable_decl_is_remapped_storage(const SPIRVariable &var, StorageClass storage) const
  14309. {
  14310. return var.storage == storage;
  14311. }
  14312. // Emit a structure member. Subclasses may override to modify output,
  14313. // or to dynamically add a padding member if needed.
  14314. void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
  14315. const string &qualifier, uint32_t)
  14316. {
  14317. auto &membertype = get<SPIRType>(member_type_id);
  14318. Bitset memberflags;
  14319. auto &memb = ir.meta[type.self].members;
  14320. if (index < memb.size())
  14321. memberflags = memb[index].decoration_flags;
  14322. string qualifiers;
  14323. bool is_block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
  14324. ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
  14325. if (is_block)
  14326. qualifiers = to_interpolation_qualifiers(memberflags);
  14327. statement(layout_for_member(type, index), qualifiers, qualifier, flags_to_qualifiers_glsl(membertype, 0, memberflags),
  14328. variable_decl(membertype, to_member_name(type, index)), ";");
  14329. }
  14330. void CompilerGLSL::emit_struct_padding_target(const SPIRType &)
  14331. {
  14332. }
  14333. string CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, uint32_t id, const Bitset &flags)
  14334. {
  14335. // GL_EXT_buffer_reference variables can be marked as restrict.
  14336. if (flags.get(DecorationRestrictPointerEXT))
  14337. return "restrict ";
  14338. string qual;
  14339. if (type_is_floating_point(type) &&
  14340. (flags.get(DecorationNoContraction) || (type.self && has_legacy_nocontract(type.self, id))) &&
  14341. backend.support_precise_qualifier)
  14342. {
  14343. qual = "precise ";
  14344. }
  14345. // Structs do not have precision qualifiers, neither do doubles (desktop only anyways, so no mediump/highp).
  14346. bool type_supports_precision =
  14347. type.basetype == SPIRType::Float || type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt ||
  14348. type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage ||
  14349. type.basetype == SPIRType::Sampler;
  14350. if (!type_supports_precision)
  14351. return qual;
  14352. if (options.es)
  14353. {
  14354. auto &execution = get_entry_point();
  14355. if (type.basetype == SPIRType::UInt && is_legacy_es())
  14356. {
  14357. // HACK: This is a bool. See comment in type_to_glsl().
  14358. qual += "lowp ";
  14359. }
  14360. else if (flags.get(DecorationRelaxedPrecision))
  14361. {
  14362. bool implied_fmediump = type.basetype == SPIRType::Float &&
  14363. options.fragment.default_float_precision == Options::Mediump &&
  14364. execution.model == ExecutionModelFragment;
  14365. bool implied_imediump = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
  14366. options.fragment.default_int_precision == Options::Mediump &&
  14367. execution.model == ExecutionModelFragment;
  14368. qual += (implied_fmediump || implied_imediump) ? "" : "mediump ";
  14369. }
  14370. else
  14371. {
  14372. bool implied_fhighp =
  14373. type.basetype == SPIRType::Float && ((options.fragment.default_float_precision == Options::Highp &&
  14374. execution.model == ExecutionModelFragment) ||
  14375. (execution.model != ExecutionModelFragment));
  14376. bool implied_ihighp = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
  14377. ((options.fragment.default_int_precision == Options::Highp &&
  14378. execution.model == ExecutionModelFragment) ||
  14379. (execution.model != ExecutionModelFragment));
  14380. qual += (implied_fhighp || implied_ihighp) ? "" : "highp ";
  14381. }
  14382. }
  14383. else if (backend.allow_precision_qualifiers)
  14384. {
  14385. // Vulkan GLSL supports precision qualifiers, even in desktop profiles, which is convenient.
  14386. // The default is highp however, so only emit mediump in the rare case that a shader has these.
  14387. if (flags.get(DecorationRelaxedPrecision))
  14388. qual += "mediump ";
  14389. }
  14390. return qual;
  14391. }
  14392. string CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id)
  14393. {
  14394. auto &type = expression_type(id);
  14395. bool use_precision_qualifiers = backend.allow_precision_qualifiers;
  14396. if (use_precision_qualifiers && (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage))
  14397. {
  14398. // Force mediump for the sampler type. We cannot declare 16-bit or smaller image types.
  14399. auto &result_type = get<SPIRType>(type.image.type);
  14400. if (result_type.width < 32)
  14401. return "mediump ";
  14402. }
  14403. return flags_to_qualifiers_glsl(type, id, ir.meta[id].decoration.decoration_flags);
  14404. }
  14405. void CompilerGLSL::fixup_io_block_patch_primitive_qualifiers(const SPIRVariable &var)
  14406. {
  14407. // Works around weird behavior in glslangValidator where
  14408. // a patch out block is translated to just block members getting the decoration.
  14409. // To make glslang not complain when we compile again, we have to transform this back to a case where
  14410. // the variable itself has Patch decoration, and not members.
  14411. // Same for perprimitiveEXT.
  14412. auto &type = get<SPIRType>(var.basetype);
  14413. if (has_decoration(type.self, DecorationBlock))
  14414. {
  14415. uint32_t member_count = uint32_t(type.member_types.size());
  14416. Decoration promoted_decoration = {};
  14417. bool do_promote_decoration = false;
  14418. for (uint32_t i = 0; i < member_count; i++)
  14419. {
  14420. if (has_member_decoration(type.self, i, DecorationPatch))
  14421. {
  14422. promoted_decoration = DecorationPatch;
  14423. do_promote_decoration = true;
  14424. break;
  14425. }
  14426. else if (has_member_decoration(type.self, i, DecorationPerPrimitiveEXT))
  14427. {
  14428. promoted_decoration = DecorationPerPrimitiveEXT;
  14429. do_promote_decoration = true;
  14430. break;
  14431. }
  14432. }
  14433. if (do_promote_decoration)
  14434. {
  14435. set_decoration(var.self, promoted_decoration);
  14436. for (uint32_t i = 0; i < member_count; i++)
  14437. unset_member_decoration(type.self, i, promoted_decoration);
  14438. }
  14439. }
  14440. }
  14441. string CompilerGLSL::to_qualifiers_glsl(uint32_t id)
  14442. {
  14443. auto &flags = get_decoration_bitset(id);
  14444. string res;
  14445. auto *var = maybe_get<SPIRVariable>(id);
  14446. if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied)
  14447. res += "shared ";
  14448. else if (var && var->storage == StorageClassTaskPayloadWorkgroupEXT && !backend.shared_is_implied)
  14449. res += "taskPayloadSharedEXT ";
  14450. res += to_interpolation_qualifiers(flags);
  14451. if (var)
  14452. res += to_storage_qualifiers_glsl(*var);
  14453. auto &type = expression_type(id);
  14454. if (type.image.dim != DimSubpassData && type.image.sampled == 2)
  14455. {
  14456. if (flags.get(DecorationCoherent))
  14457. res += "coherent ";
  14458. if (flags.get(DecorationRestrict))
  14459. res += "restrict ";
  14460. if (flags.get(DecorationNonWritable))
  14461. res += "readonly ";
  14462. bool formatted_load = type.image.format == ImageFormatUnknown;
  14463. if (flags.get(DecorationNonReadable))
  14464. {
  14465. res += "writeonly ";
  14466. formatted_load = false;
  14467. }
  14468. if (formatted_load)
  14469. {
  14470. if (!options.es)
  14471. require_extension_internal("GL_EXT_shader_image_load_formatted");
  14472. else
  14473. SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_image_load_formatted in ESSL.");
  14474. }
  14475. }
  14476. else if (type.basetype == SPIRType::Tensor)
  14477. {
  14478. if (flags.get(DecorationNonWritable))
  14479. res += "readonly ";
  14480. if (flags.get(DecorationNonReadable))
  14481. res += "writeonly ";
  14482. }
  14483. res += to_precision_qualifiers_glsl(id);
  14484. return res;
  14485. }
  14486. string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg)
  14487. {
  14488. // glslangValidator seems to make all arguments pointer no matter what which is rather bizarre ...
  14489. auto &type = expression_type(arg.id);
  14490. const char *direction = "";
  14491. if (is_pointer(type) &&
  14492. (type.storage == StorageClassFunction ||
  14493. type.storage == StorageClassPrivate ||
  14494. type.storage == StorageClassOutput))
  14495. {
  14496. // If we're passing around block types to function, we really mean reference in a pointer sense,
  14497. // but DXC does not like inout for mesh blocks, so workaround that. out is technically not correct,
  14498. // but it works in practice due to legalization. It's ... not great, but you gotta do what you gotta do.
  14499. // GLSL will never hit this case since it's not valid.
  14500. if (type.storage == StorageClassOutput && get_execution_model() == ExecutionModelMeshEXT &&
  14501. has_decoration(type.self, DecorationBlock) && is_builtin_type(type) && arg.write_count)
  14502. {
  14503. direction = "out ";
  14504. }
  14505. else if (arg.write_count && arg.read_count)
  14506. direction = "inout ";
  14507. else if (arg.write_count)
  14508. direction = "out ";
  14509. }
  14510. return join(direction, to_qualifiers_glsl(arg.id), variable_decl(type, to_name(arg.id), arg.id));
  14511. }
  14512. string CompilerGLSL::to_initializer_expression(const SPIRVariable &var)
  14513. {
  14514. return to_unpacked_expression(var.initializer);
  14515. }
  14516. string CompilerGLSL::to_zero_initialized_expression(uint32_t type_id)
  14517. {
  14518. #ifndef NDEBUG
  14519. auto &type = get<SPIRType>(type_id);
  14520. assert(type.storage == StorageClassPrivate || type.storage == StorageClassFunction ||
  14521. type.storage == StorageClassGeneric);
  14522. #endif
  14523. uint32_t id = ir.increase_bound_by(1);
  14524. ir.make_constant_null(id, type_id, false);
  14525. return constant_expression(get<SPIRConstant>(id));
  14526. }
  14527. bool CompilerGLSL::type_can_zero_initialize(const SPIRType &type) const
  14528. {
  14529. if (type.pointer)
  14530. return false;
  14531. if (!type.array.empty() && options.flatten_multidimensional_arrays)
  14532. return false;
  14533. for (auto &literal : type.array_size_literal)
  14534. if (!literal)
  14535. return false;
  14536. for (auto &memb : type.member_types)
  14537. if (!type_can_zero_initialize(get<SPIRType>(memb)))
  14538. return false;
  14539. return true;
  14540. }
  14541. string CompilerGLSL::variable_decl(const SPIRVariable &variable)
  14542. {
  14543. // Ignore the pointer type since GLSL doesn't have pointers.
  14544. auto &type = get_variable_data_type(variable);
  14545. if (type.pointer_depth > 1 && !backend.support_pointer_to_pointer)
  14546. SPIRV_CROSS_THROW("Cannot declare pointer-to-pointer types.");
  14547. auto res = join(to_qualifiers_glsl(variable.self), variable_decl(type, to_name(variable.self), variable.self));
  14548. if (variable.loop_variable && variable.static_expression)
  14549. {
  14550. uint32_t expr = variable.static_expression;
  14551. if (ir.ids[expr].get_type() != TypeUndef)
  14552. res += join(" = ", to_unpacked_expression(variable.static_expression));
  14553. else if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
  14554. res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable)));
  14555. }
  14556. else if (variable.initializer)
  14557. {
  14558. if (!variable_decl_is_remapped_storage(variable, StorageClassWorkgroup))
  14559. {
  14560. uint32_t expr = variable.initializer;
  14561. if (ir.ids[expr].get_type() != TypeUndef)
  14562. res += join(" = ", to_initializer_expression(variable));
  14563. else if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
  14564. res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable)));
  14565. }
  14566. else
  14567. {
  14568. // Workgroup memory requires special handling. First, it can only be Null-Initialized.
  14569. // GLSL will handle this with null initializer, while others require more work after the decl
  14570. require_extension_internal("GL_EXT_null_initializer");
  14571. if (!backend.constant_null_initializer.empty())
  14572. res += join(" = ", backend.constant_null_initializer);
  14573. }
  14574. }
  14575. return res;
  14576. }
  14577. const char *CompilerGLSL::to_pls_qualifiers_glsl(const SPIRVariable &variable)
  14578. {
  14579. auto &flags = get_decoration_bitset(variable.self);
  14580. if (flags.get(DecorationRelaxedPrecision))
  14581. return "mediump ";
  14582. else
  14583. return "highp ";
  14584. }
  14585. string CompilerGLSL::pls_decl(const PlsRemap &var)
  14586. {
  14587. auto &variable = get<SPIRVariable>(var.id);
  14588. auto op_and_basetype = pls_format_to_basetype(var.format);
  14589. SPIRType type { op_and_basetype.first };
  14590. type.basetype = op_and_basetype.second;
  14591. auto vecsize = pls_format_to_components(var.format);
  14592. if (vecsize > 1)
  14593. {
  14594. type.op = OpTypeVector;
  14595. type.vecsize = vecsize;
  14596. }
  14597. return join(to_pls_layout(var.format), to_pls_qualifiers_glsl(variable), type_to_glsl(type), " ",
  14598. to_name(variable.self));
  14599. }
  14600. uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type) const
  14601. {
  14602. return to_array_size_literal(type, uint32_t(type.array.size() - 1));
  14603. }
  14604. uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type, uint32_t index) const
  14605. {
  14606. assert(type.array.size() == type.array_size_literal.size());
  14607. if (type.array_size_literal[index])
  14608. {
  14609. return type.array[index];
  14610. }
  14611. else
  14612. {
  14613. // Use the default spec constant value.
  14614. // This is the best we can do.
  14615. return evaluate_constant_u32(type.array[index]);
  14616. }
  14617. }
  14618. string CompilerGLSL::to_array_size(const SPIRType &type, uint32_t index)
  14619. {
  14620. assert(type.array.size() == type.array_size_literal.size());
  14621. auto &size = type.array[index];
  14622. if (!type.array_size_literal[index])
  14623. return to_expression(size);
  14624. else if (size)
  14625. return convert_to_string(size);
  14626. else if (!backend.unsized_array_supported)
  14627. {
  14628. // For runtime-sized arrays, we can work around
  14629. // lack of standard support for this by simply having
  14630. // a single element array.
  14631. //
  14632. // Runtime length arrays must always be the last element
  14633. // in an interface block.
  14634. return "1";
  14635. }
  14636. else
  14637. return "";
  14638. }
  14639. string CompilerGLSL::type_to_array_glsl(const SPIRType &type, uint32_t)
  14640. {
  14641. if (type.pointer && type.storage == StorageClassPhysicalStorageBuffer && type.basetype != SPIRType::Struct)
  14642. {
  14643. // We are using a wrapped pointer type, and we should not emit any array declarations here.
  14644. return "";
  14645. }
  14646. if (type.array.empty())
  14647. return "";
  14648. if (options.flatten_multidimensional_arrays)
  14649. {
  14650. string res;
  14651. res += "[";
  14652. for (auto i = uint32_t(type.array.size()); i; i--)
  14653. {
  14654. res += enclose_expression(to_array_size(type, i - 1));
  14655. if (i > 1)
  14656. res += " * ";
  14657. }
  14658. res += "]";
  14659. return res;
  14660. }
  14661. else
  14662. {
  14663. if (type.array.size() > 1)
  14664. {
  14665. if (!options.es && options.version < 430)
  14666. require_extension_internal("GL_ARB_arrays_of_arrays");
  14667. else if (options.es && options.version < 310)
  14668. SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310. "
  14669. "Try using --flatten-multidimensional-arrays or set "
  14670. "options.flatten_multidimensional_arrays to true.");
  14671. }
  14672. string res;
  14673. for (auto i = uint32_t(type.array.size()); i; i--)
  14674. {
  14675. res += "[";
  14676. res += to_array_size(type, i - 1);
  14677. res += "]";
  14678. }
  14679. return res;
  14680. }
  14681. }
  14682. string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id, bool /*member*/)
  14683. {
  14684. auto &imagetype = get<SPIRType>(type.image.type);
  14685. string res;
  14686. switch (imagetype.basetype)
  14687. {
  14688. case SPIRType::Int64:
  14689. res = "i64";
  14690. require_extension_internal("GL_EXT_shader_image_int64");
  14691. break;
  14692. case SPIRType::UInt64:
  14693. res = "u64";
  14694. require_extension_internal("GL_EXT_shader_image_int64");
  14695. break;
  14696. case SPIRType::Int:
  14697. case SPIRType::Short:
  14698. case SPIRType::SByte:
  14699. res = "i";
  14700. break;
  14701. case SPIRType::UInt:
  14702. case SPIRType::UShort:
  14703. case SPIRType::UByte:
  14704. res = "u";
  14705. break;
  14706. default:
  14707. break;
  14708. }
  14709. // For half image types, we will force mediump for the sampler, and cast to f16 after any sampling operation.
  14710. // We cannot express a true half texture type in GLSL. Neither for short integer formats for that matter.
  14711. if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics)
  14712. return res + "subpassInput" + (type.image.ms ? "MS" : "");
  14713. else if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData &&
  14714. subpass_input_is_framebuffer_fetch(id))
  14715. {
  14716. SPIRType sampled_type = get<SPIRType>(type.image.type);
  14717. sampled_type.vecsize = 4;
  14718. return type_to_glsl(sampled_type);
  14719. }
  14720. // If we're emulating subpassInput with samplers, force sampler2D
  14721. // so we don't have to specify format.
  14722. if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData)
  14723. {
  14724. // Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V.
  14725. if (type.image.dim == DimBuffer && type.image.sampled == 1)
  14726. res += "sampler";
  14727. else
  14728. res += type.image.sampled == 2 ? "image" : "texture";
  14729. }
  14730. else
  14731. res += "sampler";
  14732. switch (type.image.dim)
  14733. {
  14734. case Dim1D:
  14735. // ES doesn't support 1D. Fake it with 2D.
  14736. res += options.es ? "2D" : "1D";
  14737. break;
  14738. case Dim2D:
  14739. res += "2D";
  14740. break;
  14741. case Dim3D:
  14742. res += "3D";
  14743. break;
  14744. case DimCube:
  14745. res += "Cube";
  14746. break;
  14747. case DimRect:
  14748. if (options.es)
  14749. SPIRV_CROSS_THROW("Rectangle textures are not supported on OpenGL ES.");
  14750. if (is_legacy_desktop())
  14751. require_extension_internal("GL_ARB_texture_rectangle");
  14752. res += "2DRect";
  14753. break;
  14754. case DimBuffer:
  14755. if (options.es && options.version < 320)
  14756. require_extension_internal("GL_EXT_texture_buffer");
  14757. else if (!options.es && options.version < 140)
  14758. require_extension_internal("GL_EXT_texture_buffer_object");
  14759. res += "Buffer";
  14760. break;
  14761. case DimSubpassData:
  14762. res += "2D";
  14763. break;
  14764. default:
  14765. SPIRV_CROSS_THROW("Only 1D, 2D, 2DRect, 3D, Buffer, InputTarget and Cube textures supported.");
  14766. }
  14767. if (type.image.ms)
  14768. res += "MS";
  14769. if (type.image.arrayed)
  14770. {
  14771. if (is_legacy_desktop())
  14772. require_extension_internal("GL_EXT_texture_array");
  14773. res += "Array";
  14774. }
  14775. // "Shadow" state in GLSL only exists for samplers and combined image samplers.
  14776. if (((type.basetype == SPIRType::SampledImage) || (type.basetype == SPIRType::Sampler)) &&
  14777. is_depth_image(type, id))
  14778. {
  14779. res += "Shadow";
  14780. if (type.image.dim == DimCube && is_legacy())
  14781. {
  14782. if (!options.es)
  14783. require_extension_internal("GL_EXT_gpu_shader4");
  14784. else
  14785. {
  14786. require_extension_internal("GL_NV_shadow_samplers_cube");
  14787. res += "NV";
  14788. }
  14789. }
  14790. }
  14791. return res;
  14792. }
  14793. string CompilerGLSL::type_to_glsl_constructor(const SPIRType &type)
  14794. {
  14795. if (backend.use_array_constructor && type.array.size() > 1)
  14796. {
  14797. if (options.flatten_multidimensional_arrays)
  14798. SPIRV_CROSS_THROW("Cannot flatten constructors of multidimensional array constructors, "
  14799. "e.g. float[][]().");
  14800. else if (!options.es && options.version < 430)
  14801. require_extension_internal("GL_ARB_arrays_of_arrays");
  14802. else if (options.es && options.version < 310)
  14803. SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310.");
  14804. }
  14805. auto e = type_to_glsl(type);
  14806. if (backend.use_array_constructor)
  14807. {
  14808. for (uint32_t i = 0; i < type.array.size(); i++)
  14809. e += "[]";
  14810. }
  14811. return e;
  14812. }
  14813. // The optional id parameter indicates the object whose type we are trying
  14814. // to find the description for. It is optional. Most type descriptions do not
  14815. // depend on a specific object's use of that type.
  14816. string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id)
  14817. {
  14818. if (is_physical_pointer(type) && !is_physical_pointer_to_buffer_block(type))
  14819. {
  14820. // Need to create a magic type name which compacts the entire type information.
  14821. auto *parent = &get_pointee_type(type);
  14822. string name = type_to_glsl(*parent);
  14823. uint32_t array_stride = get_decoration(type.parent_type, DecorationArrayStride);
  14824. // Resolve all array dimensions in one go since once we lose the pointer type,
  14825. // array information is left to to_array_type_glsl. The base type loses array information.
  14826. while (is_array(*parent))
  14827. {
  14828. if (parent->array_size_literal.back())
  14829. name += join(type.array.back(), "_");
  14830. else
  14831. name += join("id", type.array.back(), "_");
  14832. name += "stride_" + std::to_string(array_stride);
  14833. array_stride = get_decoration(parent->parent_type, DecorationArrayStride);
  14834. parent = &get<SPIRType>(parent->parent_type);
  14835. }
  14836. name += "Pointer";
  14837. return name;
  14838. }
  14839. switch (type.basetype)
  14840. {
  14841. case SPIRType::Struct:
  14842. // Need OpName lookup here to get a "sensible" name for a struct.
  14843. if (backend.explicit_struct_type)
  14844. return join("struct ", to_name(type.self));
  14845. else
  14846. return to_name(type.self);
  14847. case SPIRType::Image:
  14848. case SPIRType::SampledImage:
  14849. return image_type_glsl(type, id);
  14850. case SPIRType::Sampler:
  14851. // The depth field is set by calling code based on the variable ID of the sampler, effectively reintroducing
  14852. // this distinction into the type system.
  14853. return comparison_ids.count(id) ? "samplerShadow" : "sampler";
  14854. case SPIRType::AccelerationStructure:
  14855. return ray_tracing_is_khr ? "accelerationStructureEXT" : "accelerationStructureNV";
  14856. case SPIRType::RayQuery:
  14857. return "rayQueryEXT";
  14858. case SPIRType::Tensor:
  14859. if (type.ext.tensor.rank == 0)
  14860. SPIRV_CROSS_THROW("GLSL tensors must have a Rank.");
  14861. if (type.ext.tensor.shape != 0)
  14862. SPIRV_CROSS_THROW("GLSL tensors cannot have a Shape.");
  14863. return join("tensorARM<", type_to_glsl(get<SPIRType>(type.ext.tensor.type)), ", ",
  14864. to_expression(type.ext.tensor.rank), ">");
  14865. case SPIRType::Void:
  14866. return "void";
  14867. default:
  14868. break;
  14869. }
  14870. if (type.basetype == SPIRType::UInt && is_legacy())
  14871. {
  14872. if (options.es)
  14873. // HACK: spirv-cross changes bools into uints and generates code which compares them to
  14874. // zero. Input code will have already been validated as not to have contained any uints,
  14875. // so any remaining uints must in fact be bools. However, simply returning "bool" here
  14876. // will result in invalid code. Instead, return an int.
  14877. return backend.basic_int_type;
  14878. else
  14879. require_extension_internal("GL_EXT_gpu_shader4");
  14880. }
  14881. if (type.basetype == SPIRType::AtomicCounter)
  14882. {
  14883. if (options.es && options.version < 310)
  14884. SPIRV_CROSS_THROW("At least ESSL 3.10 required for atomic counters.");
  14885. else if (!options.es && options.version < 420)
  14886. require_extension_internal("GL_ARB_shader_atomic_counters");
  14887. }
  14888. if (type.op == OpTypeCooperativeVectorNV)
  14889. {
  14890. require_extension_internal("GL_NV_cooperative_vector");
  14891. if (!options.vulkan_semantics)
  14892. SPIRV_CROSS_THROW("Cooperative vector NV only available in Vulkan.");
  14893. std::string component_type_str = type_to_glsl(get<SPIRType>(type.ext.coopVecNV.component_type_id));
  14894. return join("coopvecNV<", component_type_str, ", ", to_expression(type.ext.coopVecNV.component_count_id), ">");
  14895. }
  14896. const SPIRType *coop_type = &type;
  14897. while (is_pointer(*coop_type) || is_array(*coop_type))
  14898. coop_type = &get<SPIRType>(coop_type->parent_type);
  14899. if (coop_type->op == OpTypeCooperativeMatrixKHR)
  14900. {
  14901. require_extension_internal("GL_KHR_cooperative_matrix");
  14902. if (!options.vulkan_semantics)
  14903. SPIRV_CROSS_THROW("Cooperative matrix only available in Vulkan.");
  14904. // GLSL doesn't support this as spec constant, which makes sense ...
  14905. uint32_t use_type = get<SPIRConstant>(coop_type->ext.cooperative.use_id).scalar();
  14906. const char *use = nullptr;
  14907. switch (use_type)
  14908. {
  14909. case CooperativeMatrixUseMatrixAKHR:
  14910. use = "gl_MatrixUseA";
  14911. break;
  14912. case CooperativeMatrixUseMatrixBKHR:
  14913. use = "gl_MatrixUseB";
  14914. break;
  14915. case CooperativeMatrixUseMatrixAccumulatorKHR:
  14916. use = "gl_MatrixUseAccumulator";
  14917. break;
  14918. default:
  14919. SPIRV_CROSS_THROW("Invalid matrix use.");
  14920. }
  14921. string scope_expr;
  14922. if (const auto *scope = maybe_get<SPIRConstant>(coop_type->ext.cooperative.scope_id))
  14923. {
  14924. if (!scope->specialization)
  14925. {
  14926. require_extension_internal("GL_KHR_memory_scope_semantics");
  14927. if (scope->scalar() == ScopeSubgroup)
  14928. scope_expr = "gl_ScopeSubgroup";
  14929. else if (scope->scalar() == ScopeWorkgroup)
  14930. scope_expr = "gl_ScopeWorkgroup";
  14931. else
  14932. SPIRV_CROSS_THROW("Invalid scope for cooperative matrix.");
  14933. }
  14934. }
  14935. if (scope_expr.empty())
  14936. scope_expr = to_expression(coop_type->ext.cooperative.scope_id);
  14937. return join("coopmat<", type_to_glsl(get<SPIRType>(coop_type->parent_type)), ", ",
  14938. scope_expr, ", ",
  14939. to_expression(coop_type->ext.cooperative.rows_id), ", ",
  14940. to_expression(coop_type->ext.cooperative.columns_id), ", ", use, ">");
  14941. }
  14942. if (type.vecsize == 1 && type.columns == 1) // Scalar builtin
  14943. {
  14944. switch (type.basetype)
  14945. {
  14946. case SPIRType::Boolean:
  14947. return "bool";
  14948. case SPIRType::SByte:
  14949. return backend.basic_int8_type;
  14950. case SPIRType::UByte:
  14951. return backend.basic_uint8_type;
  14952. case SPIRType::Short:
  14953. return backend.basic_int16_type;
  14954. case SPIRType::UShort:
  14955. return backend.basic_uint16_type;
  14956. case SPIRType::Int:
  14957. return backend.basic_int_type;
  14958. case SPIRType::UInt:
  14959. return backend.basic_uint_type;
  14960. case SPIRType::AtomicCounter:
  14961. return "atomic_uint";
  14962. case SPIRType::Half:
  14963. return "float16_t";
  14964. case SPIRType::BFloat16:
  14965. if (!options.vulkan_semantics)
  14966. SPIRV_CROSS_THROW("bfloat16 requires Vulkan semantics.");
  14967. require_extension_internal("GL_EXT_bfloat16");
  14968. return "bfloat16_t";
  14969. case SPIRType::FloatE4M3:
  14970. if (!options.vulkan_semantics)
  14971. SPIRV_CROSS_THROW("floate4m3_t requires Vulkan semantics.");
  14972. require_extension_internal("GL_EXT_float_e4m3");
  14973. return "floate4m3_t";
  14974. case SPIRType::FloatE5M2:
  14975. if (!options.vulkan_semantics)
  14976. SPIRV_CROSS_THROW("floate5m2_t requires Vulkan semantics.");
  14977. require_extension_internal("GL_EXT_float_e5m2");
  14978. return "floate5m2_t";
  14979. case SPIRType::Float:
  14980. return "float";
  14981. case SPIRType::Double:
  14982. return "double";
  14983. case SPIRType::Int64:
  14984. return "int64_t";
  14985. case SPIRType::UInt64:
  14986. return "uint64_t";
  14987. default:
  14988. return "???";
  14989. }
  14990. }
  14991. else if (type.vecsize > 1 && type.columns == 1) // Vector builtin
  14992. {
  14993. switch (type.basetype)
  14994. {
  14995. case SPIRType::Boolean:
  14996. return join("bvec", type.vecsize);
  14997. case SPIRType::SByte:
  14998. return join("i8vec", type.vecsize);
  14999. case SPIRType::UByte:
  15000. return join("u8vec", type.vecsize);
  15001. case SPIRType::Short:
  15002. return join("i16vec", type.vecsize);
  15003. case SPIRType::UShort:
  15004. return join("u16vec", type.vecsize);
  15005. case SPIRType::Int:
  15006. return join("ivec", type.vecsize);
  15007. case SPIRType::UInt:
  15008. return join("uvec", type.vecsize);
  15009. case SPIRType::Half:
  15010. return join("f16vec", type.vecsize);
  15011. case SPIRType::BFloat16:
  15012. if (!options.vulkan_semantics)
  15013. SPIRV_CROSS_THROW("bfloat16 requires Vulkan semantics.");
  15014. require_extension_internal("GL_EXT_bfloat16");
  15015. return join("bf16vec", type.vecsize);
  15016. case SPIRType::FloatE4M3:
  15017. if (!options.vulkan_semantics)
  15018. SPIRV_CROSS_THROW("floate4m3_t requires Vulkan semantics.");
  15019. require_extension_internal("GL_EXT_float_e4m3");
  15020. return join("fe4m3vec", type.vecsize);
  15021. case SPIRType::FloatE5M2:
  15022. if (!options.vulkan_semantics)
  15023. SPIRV_CROSS_THROW("floate5m2_t requires Vulkan semantics.");
  15024. require_extension_internal("GL_EXT_float_e5m2");
  15025. return join("fe5m2vec", type.vecsize);
  15026. case SPIRType::Float:
  15027. return join("vec", type.vecsize);
  15028. case SPIRType::Double:
  15029. return join("dvec", type.vecsize);
  15030. case SPIRType::Int64:
  15031. return join("i64vec", type.vecsize);
  15032. case SPIRType::UInt64:
  15033. return join("u64vec", type.vecsize);
  15034. default:
  15035. return "???";
  15036. }
  15037. }
  15038. else if (type.vecsize == type.columns) // Simple Matrix builtin
  15039. {
  15040. switch (type.basetype)
  15041. {
  15042. case SPIRType::Boolean:
  15043. return join("bmat", type.vecsize);
  15044. case SPIRType::Int:
  15045. return join("imat", type.vecsize);
  15046. case SPIRType::UInt:
  15047. return join("umat", type.vecsize);
  15048. case SPIRType::Half:
  15049. return join("f16mat", type.vecsize);
  15050. case SPIRType::Float:
  15051. return join("mat", type.vecsize);
  15052. case SPIRType::Double:
  15053. return join("dmat", type.vecsize);
  15054. // Matrix types not supported for int64/uint64.
  15055. default:
  15056. return "???";
  15057. }
  15058. }
  15059. else
  15060. {
  15061. switch (type.basetype)
  15062. {
  15063. case SPIRType::Boolean:
  15064. return join("bmat", type.columns, "x", type.vecsize);
  15065. case SPIRType::Int:
  15066. return join("imat", type.columns, "x", type.vecsize);
  15067. case SPIRType::UInt:
  15068. return join("umat", type.columns, "x", type.vecsize);
  15069. case SPIRType::Half:
  15070. return join("f16mat", type.columns, "x", type.vecsize);
  15071. case SPIRType::Float:
  15072. return join("mat", type.columns, "x", type.vecsize);
  15073. case SPIRType::Double:
  15074. return join("dmat", type.columns, "x", type.vecsize);
  15075. // Matrix types not supported for int64/uint64.
  15076. default:
  15077. return "???";
  15078. }
  15079. }
  15080. }
  15081. void CompilerGLSL::add_variable(unordered_set<string> &variables_primary,
  15082. const unordered_set<string> &variables_secondary, string &name)
  15083. {
  15084. if (name.empty())
  15085. return;
  15086. ParsedIR::sanitize_underscores(name);
  15087. if (ParsedIR::is_globally_reserved_identifier(name, true))
  15088. {
  15089. name.clear();
  15090. return;
  15091. }
  15092. update_name_cache(variables_primary, variables_secondary, name);
  15093. }
  15094. void CompilerGLSL::add_local_variable_name(uint32_t id)
  15095. {
  15096. add_variable(local_variable_names, block_names, ir.meta[id].decoration.alias);
  15097. }
  15098. void CompilerGLSL::add_resource_name(uint32_t id)
  15099. {
  15100. add_variable(resource_names, block_names, ir.meta[id].decoration.alias);
  15101. }
  15102. void CompilerGLSL::add_header_line(const std::string &line)
  15103. {
  15104. header_lines.push_back(line);
  15105. }
  15106. bool CompilerGLSL::has_extension(const std::string &ext) const
  15107. {
  15108. auto itr = find(begin(forced_extensions), end(forced_extensions), ext);
  15109. return itr != end(forced_extensions);
  15110. }
  15111. void CompilerGLSL::require_extension(const std::string &ext)
  15112. {
  15113. if (!has_extension(ext))
  15114. forced_extensions.push_back(ext);
  15115. }
  15116. const SmallVector<std::string> &CompilerGLSL::get_required_extensions() const
  15117. {
  15118. return forced_extensions;
  15119. }
  15120. void CompilerGLSL::require_extension_internal(const string &ext)
  15121. {
  15122. if (backend.supports_extensions && !has_extension(ext))
  15123. {
  15124. forced_extensions.push_back(ext);
  15125. force_recompile();
  15126. }
  15127. }
  15128. void CompilerGLSL::flatten_buffer_block(VariableID id)
  15129. {
  15130. auto &var = get<SPIRVariable>(id);
  15131. auto &type = get<SPIRType>(var.basetype);
  15132. auto name = to_name(type.self, false);
  15133. auto &flags = get_decoration_bitset(type.self);
  15134. if (!type.array.empty())
  15135. SPIRV_CROSS_THROW(name + " is an array of UBOs.");
  15136. if (type.basetype != SPIRType::Struct)
  15137. SPIRV_CROSS_THROW(name + " is not a struct.");
  15138. if (!flags.get(DecorationBlock))
  15139. SPIRV_CROSS_THROW(name + " is not a block.");
  15140. if (type.member_types.empty())
  15141. SPIRV_CROSS_THROW(name + " is an empty struct.");
  15142. flattened_buffer_blocks.insert(id);
  15143. }
  15144. bool CompilerGLSL::builtin_translates_to_nonarray(BuiltIn /*builtin*/) const
  15145. {
  15146. return false; // GLSL itself does not need to translate array builtin types to non-array builtin types
  15147. }
  15148. bool CompilerGLSL::is_user_type_structured(uint32_t /*id*/) const
  15149. {
  15150. return false; // GLSL itself does not have structured user type, but HLSL does with StructuredBuffer and RWStructuredBuffer resources.
  15151. }
  15152. bool CompilerGLSL::check_atomic_image(uint32_t id)
  15153. {
  15154. auto &type = expression_type(id);
  15155. if (type.storage == StorageClassImage)
  15156. {
  15157. if (options.es && options.version < 320)
  15158. require_extension_internal("GL_OES_shader_image_atomic");
  15159. auto *var = maybe_get_backing_variable(id);
  15160. if (var)
  15161. {
  15162. if (has_decoration(var->self, DecorationNonWritable) || has_decoration(var->self, DecorationNonReadable))
  15163. {
  15164. unset_decoration(var->self, DecorationNonWritable);
  15165. unset_decoration(var->self, DecorationNonReadable);
  15166. force_recompile();
  15167. }
  15168. }
  15169. return true;
  15170. }
  15171. else
  15172. return false;
  15173. }
  15174. void CompilerGLSL::add_function_overload(const SPIRFunction &func)
  15175. {
  15176. Hasher hasher;
  15177. for (auto &arg : func.arguments)
  15178. {
  15179. // Parameters can vary with pointer type or not,
  15180. // but that will not change the signature in GLSL/HLSL,
  15181. // so strip the pointer type before hashing.
  15182. uint32_t type_id = get_pointee_type_id(arg.type);
  15183. // Workaround glslang bug. It seems to only consider the base type when resolving overloads.
  15184. if (get<SPIRType>(type_id).op == OpTypeCooperativeMatrixKHR)
  15185. type_id = get<SPIRType>(type_id).parent_type;
  15186. auto &type = get<SPIRType>(type_id);
  15187. if (!combined_image_samplers.empty())
  15188. {
  15189. // If we have combined image samplers, we cannot really trust the image and sampler arguments
  15190. // we pass down to callees, because they may be shuffled around.
  15191. // Ignore these arguments, to make sure that functions need to differ in some other way
  15192. // to be considered different overloads.
  15193. if (type.basetype == SPIRType::SampledImage ||
  15194. (type.basetype == SPIRType::Image && type.image.sampled == 1) || type.basetype == SPIRType::Sampler)
  15195. {
  15196. continue;
  15197. }
  15198. }
  15199. hasher.u32(type_id);
  15200. }
  15201. uint64_t types_hash = hasher.get();
  15202. auto function_name = to_name(func.self);
  15203. auto itr = function_overloads.find(function_name);
  15204. if (itr != end(function_overloads))
  15205. {
  15206. // There exists a function with this name already.
  15207. auto &overloads = itr->second;
  15208. if (overloads.count(types_hash) != 0)
  15209. {
  15210. // Overload conflict, assign a new name.
  15211. add_resource_name(func.self);
  15212. function_overloads[to_name(func.self)].insert(types_hash);
  15213. }
  15214. else
  15215. {
  15216. // Can reuse the name.
  15217. overloads.insert(types_hash);
  15218. }
  15219. }
  15220. else
  15221. {
  15222. // First time we see this function name.
  15223. add_resource_name(func.self);
  15224. function_overloads[to_name(func.self)].insert(types_hash);
  15225. }
  15226. }
  15227. void CompilerGLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags)
  15228. {
  15229. if (func.self != ir.default_entry_point)
  15230. add_function_overload(func);
  15231. // Avoid shadow declarations.
  15232. local_variable_names = resource_names;
  15233. string decl;
  15234. auto &type = get<SPIRType>(func.return_type);
  15235. decl += flags_to_qualifiers_glsl(type, 0, return_flags);
  15236. decl += type_to_glsl(type);
  15237. decl += type_to_array_glsl(type, 0);
  15238. decl += " ";
  15239. if (func.self == ir.default_entry_point)
  15240. {
  15241. // If we need complex fallback in GLSL, we just wrap main() in a function
  15242. // and interlock the entire shader ...
  15243. if (interlocked_is_complex)
  15244. decl += "spvMainInterlockedBody";
  15245. else
  15246. decl += "main";
  15247. processing_entry_point = true;
  15248. }
  15249. else
  15250. decl += to_name(func.self);
  15251. decl += "(";
  15252. SmallVector<string> arglist;
  15253. for (auto &arg : func.arguments)
  15254. {
  15255. // Do not pass in separate images or samplers if we're remapping
  15256. // to combined image samplers.
  15257. if (skip_argument(arg.id))
  15258. continue;
  15259. // Might change the variable name if it already exists in this function.
  15260. // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
  15261. // to use same name for variables.
  15262. // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
  15263. add_local_variable_name(arg.id);
  15264. arglist.push_back(argument_decl(arg));
  15265. // Hold a pointer to the parameter so we can invalidate the readonly field if needed.
  15266. auto *var = maybe_get<SPIRVariable>(arg.id);
  15267. if (var)
  15268. var->parameter = &arg;
  15269. }
  15270. for (auto &arg : func.shadow_arguments)
  15271. {
  15272. // Might change the variable name if it already exists in this function.
  15273. // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
  15274. // to use same name for variables.
  15275. // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
  15276. add_local_variable_name(arg.id);
  15277. arglist.push_back(argument_decl(arg));
  15278. // Hold a pointer to the parameter so we can invalidate the readonly field if needed.
  15279. auto *var = maybe_get<SPIRVariable>(arg.id);
  15280. if (var)
  15281. var->parameter = &arg;
  15282. }
  15283. decl += merge(arglist);
  15284. decl += ")";
  15285. statement(decl);
  15286. }
  15287. void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags)
  15288. {
  15289. // Avoid potential cycles.
  15290. if (func.active)
  15291. return;
  15292. func.active = true;
  15293. // If we depend on a function, emit that function before we emit our own function.
  15294. for (auto block : func.blocks)
  15295. {
  15296. auto &b = get<SPIRBlock>(block);
  15297. for (auto &i : b.ops)
  15298. {
  15299. auto ops = stream(i);
  15300. auto op = static_cast<Op>(i.op);
  15301. if (op == OpFunctionCall)
  15302. {
  15303. // Recursively emit functions which are called.
  15304. uint32_t id = ops[2];
  15305. emit_function(get<SPIRFunction>(id), ir.meta[ops[1]].decoration.decoration_flags);
  15306. }
  15307. }
  15308. }
  15309. if (func.entry_line.file_id != 0)
  15310. emit_line_directive(func.entry_line.file_id, func.entry_line.line_literal);
  15311. emit_function_prototype(func, return_flags);
  15312. begin_scope();
  15313. if (func.self == ir.default_entry_point)
  15314. emit_entry_point_declarations();
  15315. current_function = &func;
  15316. auto &entry_block = get<SPIRBlock>(func.entry_block);
  15317. sort(begin(func.constant_arrays_needed_on_stack), end(func.constant_arrays_needed_on_stack));
  15318. for (auto &array : func.constant_arrays_needed_on_stack)
  15319. {
  15320. auto &c = get<SPIRConstant>(array);
  15321. auto &type = get<SPIRType>(c.constant_type);
  15322. statement(variable_decl(type, join("_", array, "_array_copy")), " = ", constant_expression(c), ";");
  15323. }
  15324. for (auto &v : func.local_variables)
  15325. {
  15326. auto &var = get<SPIRVariable>(v);
  15327. var.deferred_declaration = false;
  15328. if (var.storage == StorageClassTaskPayloadWorkgroupEXT)
  15329. continue;
  15330. if (variable_decl_is_remapped_storage(var, StorageClassWorkgroup))
  15331. {
  15332. // Special variable type which cannot have initializer,
  15333. // need to be declared as standalone variables.
  15334. // Comes from MSL which can push global variables as local variables in main function.
  15335. add_local_variable_name(var.self);
  15336. statement(variable_decl(var), ";");
  15337. // "Real" workgroup variables in compute shaders needs extra caretaking.
  15338. // They need to be initialized with an extra routine as they come in arbitrary form.
  15339. if (var.storage == StorageClassWorkgroup && var.initializer)
  15340. emit_workgroup_initialization(var);
  15341. var.deferred_declaration = false;
  15342. }
  15343. else if (var.storage == StorageClassPrivate)
  15344. {
  15345. // These variables will not have had their CFG usage analyzed, so move it to the entry block.
  15346. // Comes from MSL which can push global variables as local variables in main function.
  15347. // We could just declare them right now, but we would miss out on an important initialization case which is
  15348. // LUT declaration in MSL.
  15349. // If we don't declare the variable when it is assigned we're forced to go through a helper function
  15350. // which copies elements one by one.
  15351. add_local_variable_name(var.self);
  15352. if (var.initializer)
  15353. {
  15354. statement(variable_decl(var), ";");
  15355. var.deferred_declaration = false;
  15356. }
  15357. else
  15358. {
  15359. auto &dominated = entry_block.dominated_variables;
  15360. if (find(begin(dominated), end(dominated), var.self) == end(dominated))
  15361. entry_block.dominated_variables.push_back(var.self);
  15362. var.deferred_declaration = true;
  15363. }
  15364. }
  15365. else if (var.storage == StorageClassFunction && var.remapped_variable && var.static_expression)
  15366. {
  15367. // No need to declare this variable, it has a static expression.
  15368. var.deferred_declaration = false;
  15369. }
  15370. else if (expression_is_lvalue(v))
  15371. {
  15372. add_local_variable_name(var.self);
  15373. // Loop variables should never be declared early, they are explicitly emitted in a loop.
  15374. if (var.initializer && !var.loop_variable)
  15375. statement(variable_decl_function_local(var), ";");
  15376. else
  15377. {
  15378. // Don't declare variable until first use to declutter the GLSL output quite a lot.
  15379. // If we don't touch the variable before first branch,
  15380. // declare it then since we need variable declaration to be in top scope.
  15381. var.deferred_declaration = true;
  15382. }
  15383. }
  15384. else
  15385. {
  15386. // HACK: SPIR-V in older glslang output likes to use samplers and images as local variables, but GLSL does not allow this.
  15387. // For these types (non-lvalue), we enforce forwarding through a shadowed variable.
  15388. // This means that when we OpStore to these variables, we just write in the expression ID directly.
  15389. // This breaks any kind of branching, since the variable must be statically assigned.
  15390. // Branching on samplers and images would be pretty much impossible to fake in GLSL.
  15391. var.statically_assigned = true;
  15392. }
  15393. var.loop_variable_enable = false;
  15394. // Loop variables are never declared outside their for-loop, so block any implicit declaration.
  15395. if (var.loop_variable)
  15396. {
  15397. var.deferred_declaration = false;
  15398. // Need to reset the static expression so we can fallback to initializer if need be.
  15399. var.static_expression = 0;
  15400. }
  15401. }
  15402. // Enforce declaration order for regression testing purposes.
  15403. for (auto &block_id : func.blocks)
  15404. {
  15405. auto &block = get<SPIRBlock>(block_id);
  15406. sort(begin(block.dominated_variables), end(block.dominated_variables));
  15407. }
  15408. for (auto &line : current_function->fixup_hooks_in)
  15409. line();
  15410. emit_block_chain(entry_block);
  15411. end_scope();
  15412. processing_entry_point = false;
  15413. statement("");
  15414. // Make sure deferred declaration state for local variables is cleared when we are done with function.
  15415. // We risk declaring Private/Workgroup variables in places we are not supposed to otherwise.
  15416. for (auto &v : func.local_variables)
  15417. {
  15418. auto &var = get<SPIRVariable>(v);
  15419. var.deferred_declaration = false;
  15420. }
  15421. }
  15422. void CompilerGLSL::emit_fixup()
  15423. {
  15424. if (is_vertex_like_shader())
  15425. {
  15426. if (options.vertex.fixup_clipspace)
  15427. {
  15428. const char *suffix = backend.float_literal_suffix ? "f" : "";
  15429. statement("gl_Position.z = 2.0", suffix, " * gl_Position.z - gl_Position.w;");
  15430. }
  15431. if (options.vertex.flip_vert_y)
  15432. statement("gl_Position.y = -gl_Position.y;");
  15433. }
  15434. }
  15435. void CompilerGLSL::emit_workgroup_initialization(const SPIRVariable &)
  15436. {
  15437. }
  15438. void CompilerGLSL::flush_phi(BlockID from, BlockID to)
  15439. {
  15440. auto &child = get<SPIRBlock>(to);
  15441. if (child.ignore_phi_from_block == from)
  15442. return;
  15443. unordered_set<uint32_t> temporary_phi_variables;
  15444. for (auto itr = begin(child.phi_variables); itr != end(child.phi_variables); ++itr)
  15445. {
  15446. auto &phi = *itr;
  15447. if (phi.parent == from)
  15448. {
  15449. auto &var = get<SPIRVariable>(phi.function_variable);
  15450. // A Phi variable might be a loop variable, so flush to static expression.
  15451. if (var.loop_variable && !var.loop_variable_enable)
  15452. var.static_expression = phi.local_variable;
  15453. else
  15454. {
  15455. flush_variable_declaration(phi.function_variable);
  15456. // Check if we are going to write to a Phi variable that another statement will read from
  15457. // as part of another Phi node in our target block.
  15458. // For this case, we will need to copy phi.function_variable to a temporary, and use that for future reads.
  15459. // This is judged to be extremely rare, so deal with it here using a simple, but suboptimal algorithm.
  15460. bool need_saved_temporary =
  15461. find_if(itr + 1, end(child.phi_variables), [&](const SPIRBlock::Phi &future_phi) -> bool {
  15462. return future_phi.local_variable == ID(phi.function_variable) && future_phi.parent == from;
  15463. }) != end(child.phi_variables);
  15464. if (need_saved_temporary)
  15465. {
  15466. // Need to make sure we declare the phi variable with a copy at the right scope.
  15467. // We cannot safely declare a temporary here since we might be inside a continue block.
  15468. if (!var.allocate_temporary_copy)
  15469. {
  15470. var.allocate_temporary_copy = true;
  15471. force_recompile();
  15472. }
  15473. statement("_", phi.function_variable, "_copy", " = ", to_name(phi.function_variable), ";");
  15474. temporary_phi_variables.insert(phi.function_variable);
  15475. }
  15476. // This might be called in continue block, so make sure we
  15477. // use this to emit ESSL 1.0 compliant increments/decrements.
  15478. auto lhs = to_expression(phi.function_variable);
  15479. string rhs;
  15480. if (temporary_phi_variables.count(phi.local_variable))
  15481. rhs = join("_", phi.local_variable, "_copy");
  15482. else
  15483. rhs = to_pointer_expression(phi.local_variable);
  15484. if (!optimize_read_modify_write(get<SPIRType>(var.basetype), lhs, rhs))
  15485. statement(lhs, " = ", rhs, ";");
  15486. }
  15487. register_write(phi.function_variable);
  15488. }
  15489. }
  15490. }
  15491. void CompilerGLSL::branch_to_continue(BlockID from, BlockID to)
  15492. {
  15493. auto &to_block = get<SPIRBlock>(to);
  15494. if (from == to)
  15495. return;
  15496. assert(is_continue(to));
  15497. if (to_block.complex_continue)
  15498. {
  15499. // Just emit the whole block chain as is.
  15500. auto usage_counts = expression_usage_counts;
  15501. emit_block_chain(to_block);
  15502. // Expression usage counts are moot after returning from the continue block.
  15503. expression_usage_counts = usage_counts;
  15504. }
  15505. else
  15506. {
  15507. auto &from_block = get<SPIRBlock>(from);
  15508. bool outside_control_flow = false;
  15509. uint32_t loop_dominator = 0;
  15510. // FIXME: Refactor this to not use the old loop_dominator tracking.
  15511. if (from_block.merge_block)
  15512. {
  15513. // If we are a loop header, we don't set the loop dominator,
  15514. // so just use "self" here.
  15515. loop_dominator = from;
  15516. }
  15517. else if (from_block.loop_dominator != BlockID(SPIRBlock::NoDominator))
  15518. {
  15519. loop_dominator = from_block.loop_dominator;
  15520. }
  15521. if (loop_dominator != 0)
  15522. {
  15523. auto &cfg = get_cfg_for_current_function();
  15524. // For non-complex continue blocks, we implicitly branch to the continue block
  15525. // by having the continue block be part of the loop header in for (; ; continue-block).
  15526. outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(loop_dominator, from);
  15527. }
  15528. // Some simplification for for-loops. We always end up with a useless continue;
  15529. // statement since we branch to a loop block.
  15530. // Walk the CFG, if we unconditionally execute the block calling continue assuming we're in the loop block,
  15531. // we can avoid writing out an explicit continue statement.
  15532. // Similar optimization to return statements if we know we're outside flow control.
  15533. if (!outside_control_flow)
  15534. statement("continue;");
  15535. }
  15536. }
  15537. void CompilerGLSL::branch(BlockID from, BlockID to)
  15538. {
  15539. flush_phi(from, to);
  15540. flush_control_dependent_expressions(from);
  15541. bool to_is_continue = is_continue(to);
  15542. // This is only a continue if we branch to our loop dominator.
  15543. if ((ir.block_meta[to] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) != 0 && get<SPIRBlock>(from).loop_dominator == to)
  15544. {
  15545. // This can happen if we had a complex continue block which was emitted.
  15546. // Once the continue block tries to branch to the loop header, just emit continue;
  15547. // and end the chain here.
  15548. statement("continue;");
  15549. }
  15550. else if (from != to && is_break(to))
  15551. {
  15552. // We cannot break to ourselves, so check explicitly for from != to.
  15553. // This case can trigger if a loop header is all three of these things:
  15554. // - Continue block
  15555. // - Loop header
  15556. // - Break merge target all at once ...
  15557. // Very dirty workaround.
  15558. // Switch constructs are able to break, but they cannot break out of a loop at the same time,
  15559. // yet SPIR-V allows it.
  15560. // Only sensible solution is to make a ladder variable, which we declare at the top of the switch block,
  15561. // write to the ladder here, and defer the break.
  15562. // The loop we're breaking out of must dominate the switch block, or there is no ladder breaking case.
  15563. if (is_loop_break(to))
  15564. {
  15565. for (size_t n = current_emitting_switch_stack.size(); n; n--)
  15566. {
  15567. auto *current_emitting_switch = current_emitting_switch_stack[n - 1];
  15568. if (current_emitting_switch &&
  15569. current_emitting_switch->loop_dominator != BlockID(SPIRBlock::NoDominator) &&
  15570. get<SPIRBlock>(current_emitting_switch->loop_dominator).merge_block == to)
  15571. {
  15572. if (!current_emitting_switch->need_ladder_break)
  15573. {
  15574. force_recompile();
  15575. current_emitting_switch->need_ladder_break = true;
  15576. }
  15577. statement("_", current_emitting_switch->self, "_ladder_break = true;");
  15578. }
  15579. else
  15580. break;
  15581. }
  15582. }
  15583. statement("break;");
  15584. }
  15585. else if (to_is_continue || from == to)
  15586. {
  15587. // For from == to case can happen for a do-while loop which branches into itself.
  15588. // We don't mark these cases as continue blocks, but the only possible way to branch into
  15589. // ourselves is through means of continue blocks.
  15590. // If we are merging to a continue block, there is no need to emit the block chain for continue here.
  15591. // We can branch to the continue block after we merge execution.
  15592. // Here we make use of structured control flow rules from spec:
  15593. // 2.11: - the merge block declared by a header block cannot be a merge block declared by any other header block
  15594. // - each header block must strictly dominate its merge block, unless the merge block is unreachable in the CFG
  15595. // If we are branching to a merge block, we must be inside a construct which dominates the merge block.
  15596. auto &block_meta = ir.block_meta[to];
  15597. bool branching_to_merge =
  15598. (block_meta & (ParsedIR::BLOCK_META_SELECTION_MERGE_BIT | ParsedIR::BLOCK_META_MULTISELECT_MERGE_BIT |
  15599. ParsedIR::BLOCK_META_LOOP_MERGE_BIT)) != 0;
  15600. if (!to_is_continue || !branching_to_merge)
  15601. branch_to_continue(from, to);
  15602. }
  15603. else if (!is_conditional(to))
  15604. emit_block_chain(get<SPIRBlock>(to));
  15605. // It is important that we check for break before continue.
  15606. // A block might serve two purposes, a break block for the inner scope, and
  15607. // a continue block in the outer scope.
  15608. // Inner scope always takes precedence.
  15609. }
  15610. void CompilerGLSL::branch(BlockID from, uint32_t cond, BlockID true_block, BlockID false_block)
  15611. {
  15612. auto &from_block = get<SPIRBlock>(from);
  15613. BlockID merge_block = from_block.merge == SPIRBlock::MergeSelection ? from_block.next_block : BlockID(0);
  15614. // If we branch directly to our selection merge target, we don't need a code path.
  15615. bool true_block_needs_code = true_block != merge_block || flush_phi_required(from, true_block);
  15616. bool false_block_needs_code = false_block != merge_block || flush_phi_required(from, false_block);
  15617. if (!true_block_needs_code && !false_block_needs_code)
  15618. return;
  15619. // We might have a loop merge here. Only consider selection flattening constructs.
  15620. // Loop hints are handled explicitly elsewhere.
  15621. if (from_block.hint == SPIRBlock::HintFlatten || from_block.hint == SPIRBlock::HintDontFlatten)
  15622. emit_block_hints(from_block);
  15623. if (true_block_needs_code)
  15624. {
  15625. statement("if (", to_expression(cond), ")");
  15626. begin_scope();
  15627. branch(from, true_block);
  15628. end_scope();
  15629. if (false_block_needs_code)
  15630. {
  15631. statement("else");
  15632. begin_scope();
  15633. branch(from, false_block);
  15634. end_scope();
  15635. }
  15636. }
  15637. else if (false_block_needs_code)
  15638. {
  15639. // Only need false path, use negative conditional.
  15640. statement("if (!", to_enclosed_expression(cond), ")");
  15641. begin_scope();
  15642. branch(from, false_block);
  15643. end_scope();
  15644. }
  15645. }
  15646. // FIXME: This currently cannot handle complex continue blocks
  15647. // as in do-while.
  15648. // This should be seen as a "trivial" continue block.
  15649. string CompilerGLSL::emit_continue_block(uint32_t continue_block, bool follow_true_block, bool follow_false_block)
  15650. {
  15651. auto *block = &get<SPIRBlock>(continue_block);
  15652. // While emitting the continue block, declare_temporary will check this
  15653. // if we have to emit temporaries.
  15654. current_continue_block = block;
  15655. SmallVector<string> statements;
  15656. // Capture all statements into our list.
  15657. auto *old = redirect_statement;
  15658. redirect_statement = &statements;
  15659. // Stamp out all blocks one after each other.
  15660. while ((ir.block_meta[block->self] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) == 0)
  15661. {
  15662. // Write out all instructions we have in this block.
  15663. emit_block_instructions(*block);
  15664. // For plain branchless for/while continue blocks.
  15665. if (block->next_block)
  15666. {
  15667. flush_phi(continue_block, block->next_block);
  15668. block = &get<SPIRBlock>(block->next_block);
  15669. }
  15670. // For do while blocks. The last block will be a select block.
  15671. else if (block->true_block && follow_true_block)
  15672. {
  15673. flush_phi(continue_block, block->true_block);
  15674. block = &get<SPIRBlock>(block->true_block);
  15675. }
  15676. else if (block->false_block && follow_false_block)
  15677. {
  15678. flush_phi(continue_block, block->false_block);
  15679. block = &get<SPIRBlock>(block->false_block);
  15680. }
  15681. else
  15682. {
  15683. SPIRV_CROSS_THROW("Invalid continue block detected!");
  15684. }
  15685. }
  15686. // Restore old pointer.
  15687. redirect_statement = old;
  15688. // Somewhat ugly, strip off the last ';' since we use ',' instead.
  15689. // Ideally, we should select this behavior in statement().
  15690. for (auto &s : statements)
  15691. {
  15692. if (!s.empty() && s.back() == ';')
  15693. s.erase(s.size() - 1, 1);
  15694. }
  15695. current_continue_block = nullptr;
  15696. return merge(statements);
  15697. }
  15698. void CompilerGLSL::emit_while_loop_initializers(const SPIRBlock &block)
  15699. {
  15700. // While loops do not take initializers, so declare all of them outside.
  15701. for (auto &loop_var : block.loop_variables)
  15702. {
  15703. auto &var = get<SPIRVariable>(loop_var);
  15704. statement(variable_decl(var), ";");
  15705. }
  15706. }
  15707. string CompilerGLSL::emit_for_loop_initializers(const SPIRBlock &block)
  15708. {
  15709. if (block.loop_variables.empty())
  15710. return "";
  15711. bool same_types = for_loop_initializers_are_same_type(block);
  15712. // We can only declare for loop initializers if all variables are of same type.
  15713. // If we cannot do this, declare individual variables before the loop header.
  15714. // We might have a loop variable candidate which was not assigned to for some reason.
  15715. uint32_t missing_initializers = 0;
  15716. for (auto &variable : block.loop_variables)
  15717. {
  15718. uint32_t expr = get<SPIRVariable>(variable).static_expression;
  15719. // Sometimes loop variables are initialized with OpUndef, but we can just declare
  15720. // a plain variable without initializer in this case.
  15721. if (expr == 0 || ir.ids[expr].get_type() == TypeUndef)
  15722. missing_initializers++;
  15723. }
  15724. if (block.loop_variables.size() == 1 && missing_initializers == 0)
  15725. {
  15726. return variable_decl(get<SPIRVariable>(block.loop_variables.front()));
  15727. }
  15728. else if (!same_types || missing_initializers == uint32_t(block.loop_variables.size()))
  15729. {
  15730. for (auto &loop_var : block.loop_variables)
  15731. statement(variable_decl(get<SPIRVariable>(loop_var)), ";");
  15732. return "";
  15733. }
  15734. else
  15735. {
  15736. // We have a mix of loop variables, either ones with a clear initializer, or ones without.
  15737. // Separate the two streams.
  15738. string expr;
  15739. for (auto &loop_var : block.loop_variables)
  15740. {
  15741. uint32_t static_expr = get<SPIRVariable>(loop_var).static_expression;
  15742. if (static_expr == 0 || ir.ids[static_expr].get_type() == TypeUndef)
  15743. {
  15744. statement(variable_decl(get<SPIRVariable>(loop_var)), ";");
  15745. }
  15746. else
  15747. {
  15748. auto &var = get<SPIRVariable>(loop_var);
  15749. auto &type = get_variable_data_type(var);
  15750. if (expr.empty())
  15751. {
  15752. // For loop initializers are of the form <type id = value, id = value, id = value, etc ...
  15753. expr = join(to_qualifiers_glsl(var.self), type_to_glsl(type), " ");
  15754. }
  15755. else
  15756. {
  15757. expr += ", ";
  15758. // In MSL, being based on C++, the asterisk marking a pointer
  15759. // binds to the identifier, not the type.
  15760. if (type.pointer)
  15761. expr += "* ";
  15762. }
  15763. expr += join(to_name(loop_var), " = ", to_pointer_expression(var.static_expression));
  15764. }
  15765. }
  15766. return expr;
  15767. }
  15768. }
  15769. bool CompilerGLSL::for_loop_initializers_are_same_type(const SPIRBlock &block)
  15770. {
  15771. if (block.loop_variables.size() <= 1)
  15772. return true;
  15773. uint32_t expected = 0;
  15774. Bitset expected_flags;
  15775. for (auto &var : block.loop_variables)
  15776. {
  15777. // Don't care about uninitialized variables as they will not be part of the initializers.
  15778. uint32_t expr = get<SPIRVariable>(var).static_expression;
  15779. if (expr == 0 || ir.ids[expr].get_type() == TypeUndef)
  15780. continue;
  15781. if (expected == 0)
  15782. {
  15783. expected = get<SPIRVariable>(var).basetype;
  15784. expected_flags = get_decoration_bitset(var);
  15785. }
  15786. else if (expected != get<SPIRVariable>(var).basetype)
  15787. return false;
  15788. // Precision flags and things like that must also match.
  15789. if (expected_flags != get_decoration_bitset(var))
  15790. return false;
  15791. }
  15792. return true;
  15793. }
  15794. void CompilerGLSL::emit_block_instructions_with_masked_debug(SPIRBlock &block)
  15795. {
  15796. // Have to block debug instructions such as OpLine here, since it will be treated as a statement otherwise,
  15797. // which breaks loop optimizations.
  15798. // Any line directive would be declared outside the loop body, which would just be confusing either way.
  15799. bool old_block_debug_directives = block_debug_directives;
  15800. block_debug_directives = true;
  15801. emit_block_instructions(block);
  15802. block_debug_directives = old_block_debug_directives;
  15803. }
  15804. bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method)
  15805. {
  15806. SPIRBlock::ContinueBlockType continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
  15807. if (method == SPIRBlock::MergeToSelectForLoop || method == SPIRBlock::MergeToSelectContinueForLoop)
  15808. {
  15809. uint32_t current_count = statement_count;
  15810. // If we're trying to create a true for loop,
  15811. // we need to make sure that all opcodes before branch statement do not actually emit any code.
  15812. // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
  15813. emit_block_instructions_with_masked_debug(block);
  15814. bool condition_is_temporary = forced_temporaries.find(block.condition) == end(forced_temporaries);
  15815. bool flushes_phi = flush_phi_required(block.self, block.true_block) ||
  15816. flush_phi_required(block.self, block.false_block);
  15817. // This can work! We only did trivial things which could be forwarded in block body!
  15818. if (!flushes_phi && current_count == statement_count && condition_is_temporary)
  15819. {
  15820. switch (continue_type)
  15821. {
  15822. case SPIRBlock::ForLoop:
  15823. {
  15824. // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
  15825. flush_undeclared_variables(block);
  15826. // Important that we do this in this order because
  15827. // emitting the continue block can invalidate the condition expression.
  15828. auto initializer = emit_for_loop_initializers(block);
  15829. auto condition = to_expression(block.condition);
  15830. // Condition might have to be inverted.
  15831. if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
  15832. condition = join("!", enclose_expression(condition));
  15833. emit_block_hints(block);
  15834. if (method != SPIRBlock::MergeToSelectContinueForLoop)
  15835. {
  15836. auto continue_block = emit_continue_block(block.continue_block, false, false);
  15837. statement("for (", initializer, "; ", condition, "; ", continue_block, ")");
  15838. }
  15839. else
  15840. statement("for (", initializer, "; ", condition, "; )");
  15841. break;
  15842. }
  15843. case SPIRBlock::WhileLoop:
  15844. {
  15845. // This block may be a dominating block, so make sure we flush undeclared variables before building the while loop header.
  15846. flush_undeclared_variables(block);
  15847. emit_while_loop_initializers(block);
  15848. emit_block_hints(block);
  15849. auto condition = to_expression(block.condition);
  15850. // Condition might have to be inverted.
  15851. if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
  15852. condition = join("!", enclose_expression(condition));
  15853. statement("while (", condition, ")");
  15854. break;
  15855. }
  15856. default:
  15857. block.disable_block_optimization = true;
  15858. force_recompile();
  15859. begin_scope(); // We'll see an end_scope() later.
  15860. return false;
  15861. }
  15862. begin_scope();
  15863. return true;
  15864. }
  15865. else
  15866. {
  15867. block.disable_block_optimization = true;
  15868. force_recompile();
  15869. begin_scope(); // We'll see an end_scope() later.
  15870. return false;
  15871. }
  15872. }
  15873. else if (method == SPIRBlock::MergeToDirectForLoop)
  15874. {
  15875. auto &child = get<SPIRBlock>(block.next_block);
  15876. // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
  15877. flush_undeclared_variables(child);
  15878. uint32_t current_count = statement_count;
  15879. // If we're trying to create a true for loop,
  15880. // we need to make sure that all opcodes before branch statement do not actually emit any code.
  15881. // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
  15882. emit_block_instructions_with_masked_debug(child);
  15883. bool condition_is_temporary = forced_temporaries.find(child.condition) == end(forced_temporaries);
  15884. bool flushes_phi = flush_phi_required(child.self, child.true_block) ||
  15885. flush_phi_required(child.self, child.false_block);
  15886. if (!flushes_phi && current_count == statement_count && condition_is_temporary)
  15887. {
  15888. uint32_t target_block = child.true_block;
  15889. switch (continue_type)
  15890. {
  15891. case SPIRBlock::ForLoop:
  15892. {
  15893. // Important that we do this in this order because
  15894. // emitting the continue block can invalidate the condition expression.
  15895. auto initializer = emit_for_loop_initializers(block);
  15896. auto condition = to_expression(child.condition);
  15897. // Condition might have to be inverted.
  15898. if (execution_is_noop(get<SPIRBlock>(child.true_block), get<SPIRBlock>(block.merge_block)))
  15899. {
  15900. condition = join("!", enclose_expression(condition));
  15901. target_block = child.false_block;
  15902. }
  15903. auto continue_block = emit_continue_block(block.continue_block, false, false);
  15904. emit_block_hints(block);
  15905. statement("for (", initializer, "; ", condition, "; ", continue_block, ")");
  15906. break;
  15907. }
  15908. case SPIRBlock::WhileLoop:
  15909. {
  15910. emit_while_loop_initializers(block);
  15911. emit_block_hints(block);
  15912. auto condition = to_expression(child.condition);
  15913. // Condition might have to be inverted.
  15914. if (execution_is_noop(get<SPIRBlock>(child.true_block), get<SPIRBlock>(block.merge_block)))
  15915. {
  15916. condition = join("!", enclose_expression(condition));
  15917. target_block = child.false_block;
  15918. }
  15919. statement("while (", condition, ")");
  15920. break;
  15921. }
  15922. default:
  15923. block.disable_block_optimization = true;
  15924. force_recompile();
  15925. begin_scope(); // We'll see an end_scope() later.
  15926. return false;
  15927. }
  15928. begin_scope();
  15929. branch(child.self, target_block);
  15930. return true;
  15931. }
  15932. else
  15933. {
  15934. block.disable_block_optimization = true;
  15935. force_recompile();
  15936. begin_scope(); // We'll see an end_scope() later.
  15937. return false;
  15938. }
  15939. }
  15940. else
  15941. return false;
  15942. }
  15943. void CompilerGLSL::flush_undeclared_variables(SPIRBlock &block)
  15944. {
  15945. for (auto &v : block.dominated_variables)
  15946. flush_variable_declaration(v);
  15947. }
  15948. void CompilerGLSL::emit_hoisted_temporaries(SmallVector<pair<TypeID, ID>> &temporaries)
  15949. {
  15950. // If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header.
  15951. // Need to sort these to ensure that reference output is stable.
  15952. sort(begin(temporaries), end(temporaries),
  15953. [](const pair<TypeID, ID> &a, const pair<TypeID, ID> &b) { return a.second < b.second; });
  15954. for (auto &tmp : temporaries)
  15955. {
  15956. auto &type = get<SPIRType>(tmp.first);
  15957. // There are some rare scenarios where we are asked to declare pointer types as hoisted temporaries.
  15958. // This should be ignored unless we're doing actual variable pointers and backend supports it.
  15959. // Access chains cannot normally be lowered to temporaries in GLSL and HLSL.
  15960. if (type.pointer && !backend.native_pointers)
  15961. continue;
  15962. add_local_variable_name(tmp.second);
  15963. auto &flags = get_decoration_bitset(tmp.second);
  15964. // Not all targets support pointer literals, so don't bother with that case.
  15965. string initializer;
  15966. if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
  15967. initializer = join(" = ", to_zero_initialized_expression(tmp.first));
  15968. statement(flags_to_qualifiers_glsl(type, tmp.second, flags), variable_decl(type, to_name(tmp.second)), initializer, ";");
  15969. hoisted_temporaries.insert(tmp.second);
  15970. forced_temporaries.insert(tmp.second);
  15971. // The temporary might be read from before it's assigned, set up the expression now.
  15972. set<SPIRExpression>(tmp.second, to_name(tmp.second), tmp.first, true);
  15973. // If we have hoisted temporaries in multi-precision contexts, emit that here too ...
  15974. // We will not be able to analyze hoisted-ness for dependent temporaries that we hallucinate here.
  15975. auto mirrored_precision_itr = temporary_to_mirror_precision_alias.find(tmp.second);
  15976. if (mirrored_precision_itr != temporary_to_mirror_precision_alias.end())
  15977. {
  15978. uint32_t mirror_id = mirrored_precision_itr->second;
  15979. auto &mirror_flags = get_decoration_bitset(mirror_id);
  15980. statement(flags_to_qualifiers_glsl(type, mirror_id, mirror_flags),
  15981. variable_decl(type, to_name(mirror_id)),
  15982. initializer, ";");
  15983. // The temporary might be read from before it's assigned, set up the expression now.
  15984. set<SPIRExpression>(mirror_id, to_name(mirror_id), tmp.first, true);
  15985. hoisted_temporaries.insert(mirror_id);
  15986. }
  15987. }
  15988. }
  15989. void CompilerGLSL::emit_block_chain(SPIRBlock &block)
  15990. {
  15991. SmallVector<BlockID> cleanup_stack;
  15992. BlockID next_block = emit_block_chain_inner(block);
  15993. while (next_block != 0)
  15994. {
  15995. cleanup_stack.push_back(next_block);
  15996. next_block = emit_block_chain_inner(get<SPIRBlock>(next_block));
  15997. }
  15998. while (!cleanup_stack.empty())
  15999. {
  16000. emit_block_chain_cleanup(get<SPIRBlock>(cleanup_stack.back()));
  16001. cleanup_stack.pop_back();
  16002. }
  16003. emit_block_chain_cleanup(block);
  16004. }
  16005. BlockID CompilerGLSL::emit_block_chain_inner(SPIRBlock &block)
  16006. {
  16007. bool select_branch_to_true_block = false;
  16008. bool select_branch_to_false_block = false;
  16009. bool skip_direct_branch = false;
  16010. bool emitted_loop_header_variables = false;
  16011. bool force_complex_continue_block = false;
  16012. ValueSaver<uint32_t> loop_level_saver(current_loop_level);
  16013. if (block.merge == SPIRBlock::MergeLoop)
  16014. add_loop_level();
  16015. // If we're emitting PHI variables with precision aliases, we have to emit them as hoisted temporaries.
  16016. for (auto var_id : block.dominated_variables)
  16017. {
  16018. auto &var = get<SPIRVariable>(var_id);
  16019. if (var.phi_variable)
  16020. {
  16021. auto mirrored_precision_itr = temporary_to_mirror_precision_alias.find(var_id);
  16022. if (mirrored_precision_itr != temporary_to_mirror_precision_alias.end() &&
  16023. find_if(block.declare_temporary.begin(), block.declare_temporary.end(),
  16024. [mirrored_precision_itr](const std::pair<TypeID, VariableID> &p) {
  16025. return p.second == mirrored_precision_itr->second;
  16026. }) == block.declare_temporary.end())
  16027. {
  16028. block.declare_temporary.push_back({ var.basetype, mirrored_precision_itr->second });
  16029. }
  16030. }
  16031. }
  16032. emit_hoisted_temporaries(block.declare_temporary);
  16033. SPIRBlock::ContinueBlockType continue_type = SPIRBlock::ContinueNone;
  16034. if (block.continue_block)
  16035. {
  16036. continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
  16037. // If we know we cannot emit a loop, mark the block early as a complex loop so we don't force unnecessary recompiles.
  16038. if (continue_type == SPIRBlock::ComplexLoop)
  16039. block.complex_continue = true;
  16040. }
  16041. // If we have loop variables, stop masking out access to the variable now.
  16042. for (auto var_id : block.loop_variables)
  16043. {
  16044. auto &var = get<SPIRVariable>(var_id);
  16045. var.loop_variable_enable = true;
  16046. // We're not going to declare the variable directly, so emit a copy here.
  16047. emit_variable_temporary_copies(var);
  16048. }
  16049. // Remember deferred declaration state. We will restore it before returning.
  16050. assert(block.rearm_dominated_variables.empty());
  16051. block.rearm_dominated_variables.resize(block.dominated_variables.size());
  16052. for (size_t i = 0; i < block.dominated_variables.size(); i++)
  16053. {
  16054. uint32_t var_id = block.dominated_variables[i];
  16055. auto &var = get<SPIRVariable>(var_id);
  16056. block.rearm_dominated_variables[i] = var.deferred_declaration;
  16057. }
  16058. // This is the method often used by spirv-opt to implement loops.
  16059. // The loop header goes straight into the continue block.
  16060. // However, don't attempt this on ESSL 1.0, because if a loop variable is used in a continue block,
  16061. // it *MUST* be used in the continue block. This loop method will not work.
  16062. if (!is_legacy_es() && block_is_loop_candidate(block, SPIRBlock::MergeToSelectContinueForLoop))
  16063. {
  16064. flush_undeclared_variables(block);
  16065. if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectContinueForLoop))
  16066. {
  16067. if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
  16068. select_branch_to_false_block = true;
  16069. else
  16070. select_branch_to_true_block = true;
  16071. emitted_loop_header_variables = true;
  16072. force_complex_continue_block = true;
  16073. }
  16074. }
  16075. // This is the older loop behavior in glslang which branches to loop body directly from the loop header.
  16076. else if (block_is_loop_candidate(block, SPIRBlock::MergeToSelectForLoop))
  16077. {
  16078. flush_undeclared_variables(block);
  16079. if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectForLoop))
  16080. {
  16081. // The body of while, is actually just the true (or false) block, so always branch there unconditionally.
  16082. if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
  16083. select_branch_to_false_block = true;
  16084. else
  16085. select_branch_to_true_block = true;
  16086. emitted_loop_header_variables = true;
  16087. }
  16088. }
  16089. // This is the newer loop behavior in glslang which branches from Loop header directly to
  16090. // a new block, which in turn has a OpBranchSelection without a selection merge.
  16091. else if (block_is_loop_candidate(block, SPIRBlock::MergeToDirectForLoop))
  16092. {
  16093. flush_undeclared_variables(block);
  16094. if (attempt_emit_loop_header(block, SPIRBlock::MergeToDirectForLoop))
  16095. {
  16096. skip_direct_branch = true;
  16097. emitted_loop_header_variables = true;
  16098. }
  16099. }
  16100. else if (continue_type == SPIRBlock::DoWhileLoop)
  16101. {
  16102. flush_undeclared_variables(block);
  16103. emit_while_loop_initializers(block);
  16104. emitted_loop_header_variables = true;
  16105. // We have some temporaries where the loop header is the dominator.
  16106. // We risk a case where we have code like:
  16107. // for (;;) { create-temporary; break; } consume-temporary;
  16108. // so force-declare temporaries here.
  16109. emit_hoisted_temporaries(block.potential_declare_temporary);
  16110. statement("do");
  16111. begin_scope();
  16112. emit_block_instructions(block);
  16113. }
  16114. else if (block.merge == SPIRBlock::MergeLoop)
  16115. {
  16116. flush_undeclared_variables(block);
  16117. emit_while_loop_initializers(block);
  16118. emitted_loop_header_variables = true;
  16119. // We have a generic loop without any distinguishable pattern like for, while or do while.
  16120. get<SPIRBlock>(block.continue_block).complex_continue = true;
  16121. continue_type = SPIRBlock::ComplexLoop;
  16122. // We have some temporaries where the loop header is the dominator.
  16123. // We risk a case where we have code like:
  16124. // for (;;) { create-temporary; break; } consume-temporary;
  16125. // so force-declare temporaries here.
  16126. emit_hoisted_temporaries(block.potential_declare_temporary);
  16127. emit_block_hints(block);
  16128. statement("for (;;)");
  16129. begin_scope();
  16130. emit_block_instructions(block);
  16131. }
  16132. else
  16133. {
  16134. emit_block_instructions(block);
  16135. }
  16136. // If we didn't successfully emit a loop header and we had loop variable candidates, we have a problem
  16137. // as writes to said loop variables might have been masked out, we need a recompile.
  16138. if (!emitted_loop_header_variables && !block.loop_variables.empty())
  16139. {
  16140. force_recompile_guarantee_forward_progress();
  16141. for (auto var : block.loop_variables)
  16142. get<SPIRVariable>(var).loop_variable = false;
  16143. block.loop_variables.clear();
  16144. }
  16145. flush_undeclared_variables(block);
  16146. bool emit_next_block = true;
  16147. // Handle end of block.
  16148. switch (block.terminator)
  16149. {
  16150. case SPIRBlock::Direct:
  16151. // True when emitting complex continue block.
  16152. if (block.loop_dominator == block.next_block)
  16153. {
  16154. branch(block.self, block.next_block);
  16155. emit_next_block = false;
  16156. }
  16157. // True if MergeToDirectForLoop succeeded.
  16158. else if (skip_direct_branch)
  16159. emit_next_block = false;
  16160. else if (is_continue(block.next_block) || is_break(block.next_block) || is_conditional(block.next_block))
  16161. {
  16162. branch(block.self, block.next_block);
  16163. emit_next_block = false;
  16164. }
  16165. break;
  16166. case SPIRBlock::Select:
  16167. // True if MergeToSelectForLoop or MergeToSelectContinueForLoop succeeded.
  16168. if (select_branch_to_true_block)
  16169. {
  16170. if (force_complex_continue_block)
  16171. {
  16172. assert(block.true_block == block.continue_block);
  16173. // We're going to emit a continue block directly here, so make sure it's marked as complex.
  16174. auto &complex_continue = get<SPIRBlock>(block.continue_block).complex_continue;
  16175. bool old_complex = complex_continue;
  16176. complex_continue = true;
  16177. branch(block.self, block.true_block);
  16178. complex_continue = old_complex;
  16179. }
  16180. else
  16181. branch(block.self, block.true_block);
  16182. }
  16183. else if (select_branch_to_false_block)
  16184. {
  16185. if (force_complex_continue_block)
  16186. {
  16187. assert(block.false_block == block.continue_block);
  16188. // We're going to emit a continue block directly here, so make sure it's marked as complex.
  16189. auto &complex_continue = get<SPIRBlock>(block.continue_block).complex_continue;
  16190. bool old_complex = complex_continue;
  16191. complex_continue = true;
  16192. branch(block.self, block.false_block);
  16193. complex_continue = old_complex;
  16194. }
  16195. else
  16196. branch(block.self, block.false_block);
  16197. }
  16198. else
  16199. branch(block.self, block.condition, block.true_block, block.false_block);
  16200. break;
  16201. case SPIRBlock::MultiSelect:
  16202. {
  16203. auto &type = expression_type(block.condition);
  16204. bool unsigned_case = type.basetype == SPIRType::UInt || type.basetype == SPIRType::UShort ||
  16205. type.basetype == SPIRType::UByte || type.basetype == SPIRType::UInt64;
  16206. if (block.merge == SPIRBlock::MergeNone)
  16207. SPIRV_CROSS_THROW("Switch statement is not structured");
  16208. if (!backend.support_64bit_switch && (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64))
  16209. {
  16210. // SPIR-V spec suggests this is allowed, but we cannot support it in higher level languages.
  16211. SPIRV_CROSS_THROW("Cannot use 64-bit switch selectors.");
  16212. }
  16213. const char *label_suffix = "";
  16214. if (type.basetype == SPIRType::UInt && backend.uint32_t_literal_suffix)
  16215. label_suffix = "u";
  16216. else if (type.basetype == SPIRType::Int64 && backend.support_64bit_switch)
  16217. label_suffix = "l";
  16218. else if (type.basetype == SPIRType::UInt64 && backend.support_64bit_switch)
  16219. label_suffix = "ul";
  16220. else if (type.basetype == SPIRType::UShort)
  16221. label_suffix = backend.uint16_t_literal_suffix;
  16222. else if (type.basetype == SPIRType::Short)
  16223. label_suffix = backend.int16_t_literal_suffix;
  16224. current_emitting_switch_stack.push_back(&block);
  16225. if (block.need_ladder_break)
  16226. statement("bool _", block.self, "_ladder_break = false;");
  16227. // Find all unique case constructs.
  16228. unordered_map<uint32_t, SmallVector<uint64_t>> case_constructs;
  16229. SmallVector<uint32_t> block_declaration_order;
  16230. SmallVector<uint64_t> literals_to_merge;
  16231. // If a switch case branches to the default block for some reason, we can just remove that literal from consideration
  16232. // and let the default: block handle it.
  16233. // 2.11 in SPIR-V spec states that for fall-through cases, there is a very strict declaration order which we can take advantage of here.
  16234. // We only need to consider possible fallthrough if order[i] branches to order[i + 1].
  16235. auto &cases = get_case_list(block);
  16236. for (auto &c : cases)
  16237. {
  16238. if (c.block != block.next_block && c.block != block.default_block)
  16239. {
  16240. if (!case_constructs.count(c.block))
  16241. block_declaration_order.push_back(c.block);
  16242. case_constructs[c.block].push_back(c.value);
  16243. }
  16244. else if (c.block == block.next_block && block.default_block != block.next_block)
  16245. {
  16246. // We might have to flush phi inside specific case labels.
  16247. // If we can piggyback on default:, do so instead.
  16248. literals_to_merge.push_back(c.value);
  16249. }
  16250. }
  16251. // Empty literal array -> default.
  16252. if (block.default_block != block.next_block)
  16253. {
  16254. auto &default_block = get<SPIRBlock>(block.default_block);
  16255. // We need to slide in the default block somewhere in this chain
  16256. // if there are fall-through scenarios since the default is declared separately in OpSwitch.
  16257. // Only consider trivial fall-through cases here.
  16258. size_t num_blocks = block_declaration_order.size();
  16259. bool injected_block = false;
  16260. for (size_t i = 0; i < num_blocks; i++)
  16261. {
  16262. auto &case_block = get<SPIRBlock>(block_declaration_order[i]);
  16263. if (execution_is_direct_branch(case_block, default_block))
  16264. {
  16265. // Fallthrough to default block, we must inject the default block here.
  16266. block_declaration_order.insert(begin(block_declaration_order) + i + 1, block.default_block);
  16267. injected_block = true;
  16268. break;
  16269. }
  16270. else if (execution_is_direct_branch(default_block, case_block))
  16271. {
  16272. // Default case is falling through to another case label, we must inject the default block here.
  16273. block_declaration_order.insert(begin(block_declaration_order) + i, block.default_block);
  16274. injected_block = true;
  16275. break;
  16276. }
  16277. }
  16278. // Order does not matter.
  16279. if (!injected_block)
  16280. block_declaration_order.push_back(block.default_block);
  16281. else if (is_legacy_es())
  16282. SPIRV_CROSS_THROW("Default case label fallthrough to other case label is not supported in ESSL 1.0.");
  16283. case_constructs[block.default_block] = {};
  16284. }
  16285. size_t num_blocks = block_declaration_order.size();
  16286. const auto to_case_label = [](uint64_t literal, uint32_t width, bool is_unsigned_case) -> string
  16287. {
  16288. if (is_unsigned_case)
  16289. return convert_to_string(literal);
  16290. // For smaller cases, the literals are compiled as 32 bit wide
  16291. // literals so we don't need to care for all sizes specifically.
  16292. if (width <= 32)
  16293. {
  16294. return convert_to_string(int64_t(int32_t(literal)));
  16295. }
  16296. return convert_to_string(int64_t(literal));
  16297. };
  16298. const auto to_legacy_case_label = [&](uint32_t condition, const SmallVector<uint64_t> &labels,
  16299. const char *suffix) -> string {
  16300. string ret;
  16301. size_t count = labels.size();
  16302. for (size_t i = 0; i < count; i++)
  16303. {
  16304. if (i)
  16305. ret += " || ";
  16306. ret += join(count > 1 ? "(" : "", to_enclosed_expression(condition), " == ", labels[i], suffix,
  16307. count > 1 ? ")" : "");
  16308. }
  16309. return ret;
  16310. };
  16311. // We need to deal with a complex scenario for OpPhi. If we have case-fallthrough and Phi in the picture,
  16312. // we need to flush phi nodes outside the switch block in a branch,
  16313. // and skip any Phi handling inside the case label to make fall-through work as expected.
  16314. // This kind of code-gen is super awkward and it's a last resort. Normally we would want to handle this
  16315. // inside the case label if at all possible.
  16316. for (size_t i = 1; backend.support_case_fallthrough && i < num_blocks; i++)
  16317. {
  16318. if (flush_phi_required(block.self, block_declaration_order[i]) &&
  16319. flush_phi_required(block_declaration_order[i - 1], block_declaration_order[i]))
  16320. {
  16321. uint32_t target_block = block_declaration_order[i];
  16322. // Make sure we flush Phi, it might have been marked to be ignored earlier.
  16323. get<SPIRBlock>(target_block).ignore_phi_from_block = 0;
  16324. auto &literals = case_constructs[target_block];
  16325. if (literals.empty())
  16326. {
  16327. // Oh boy, gotta make a complete negative test instead! o.o
  16328. // Find all possible literals that would *not* make us enter the default block.
  16329. // If none of those literals match, we flush Phi ...
  16330. SmallVector<string> conditions;
  16331. for (size_t j = 0; j < num_blocks; j++)
  16332. {
  16333. auto &negative_literals = case_constructs[block_declaration_order[j]];
  16334. for (auto &case_label : negative_literals)
  16335. conditions.push_back(join(to_enclosed_expression(block.condition),
  16336. " != ", to_case_label(case_label, type.width, unsigned_case)));
  16337. }
  16338. statement("if (", merge(conditions, " && "), ")");
  16339. begin_scope();
  16340. flush_phi(block.self, target_block);
  16341. end_scope();
  16342. }
  16343. else
  16344. {
  16345. SmallVector<string> conditions;
  16346. conditions.reserve(literals.size());
  16347. for (auto &case_label : literals)
  16348. conditions.push_back(join(to_enclosed_expression(block.condition),
  16349. " == ", to_case_label(case_label, type.width, unsigned_case)));
  16350. statement("if (", merge(conditions, " || "), ")");
  16351. begin_scope();
  16352. flush_phi(block.self, target_block);
  16353. end_scope();
  16354. }
  16355. // Mark the block so that we don't flush Phi from header to case label.
  16356. get<SPIRBlock>(target_block).ignore_phi_from_block = block.self;
  16357. }
  16358. }
  16359. // If there is only one default block, and no cases, this is a case where SPIRV-opt decided to emulate
  16360. // non-structured exits with the help of a switch block.
  16361. // This is buggy on FXC, so just emit the logical equivalent of a do { } while(false), which is more idiomatic.
  16362. bool block_like_switch = cases.empty();
  16363. // If this is true, the switch is completely meaningless, and we should just avoid it.
  16364. bool collapsed_switch = block_like_switch && block.default_block == block.next_block;
  16365. if (!collapsed_switch)
  16366. {
  16367. if (block_like_switch || is_legacy())
  16368. {
  16369. // ESSL 1.0 is not guaranteed to support do/while.
  16370. if (is_legacy_es())
  16371. {
  16372. uint32_t counter = statement_count;
  16373. statement("for (int spvDummy", counter, " = 0; spvDummy", counter, " < 1; spvDummy", counter,
  16374. "++)");
  16375. }
  16376. else
  16377. statement("do");
  16378. }
  16379. else
  16380. {
  16381. emit_block_hints(block);
  16382. statement("switch (", to_unpacked_expression(block.condition), ")");
  16383. }
  16384. begin_scope();
  16385. }
  16386. for (size_t i = 0; i < num_blocks; i++)
  16387. {
  16388. uint32_t target_block = block_declaration_order[i];
  16389. auto &literals = case_constructs[target_block];
  16390. if (literals.empty())
  16391. {
  16392. // Default case.
  16393. if (!block_like_switch)
  16394. {
  16395. if (is_legacy())
  16396. statement("else");
  16397. else
  16398. statement("default:");
  16399. }
  16400. }
  16401. else
  16402. {
  16403. if (is_legacy())
  16404. {
  16405. statement((i ? "else " : ""), "if (", to_legacy_case_label(block.condition, literals, label_suffix),
  16406. ")");
  16407. }
  16408. else
  16409. {
  16410. for (auto &case_literal : literals)
  16411. {
  16412. // The case label value must be sign-extended properly in SPIR-V, so we can assume 32-bit values here.
  16413. statement("case ", to_case_label(case_literal, type.width, unsigned_case), label_suffix, ":");
  16414. }
  16415. }
  16416. }
  16417. auto &case_block = get<SPIRBlock>(target_block);
  16418. if (backend.support_case_fallthrough && i + 1 < num_blocks &&
  16419. execution_is_direct_branch(case_block, get<SPIRBlock>(block_declaration_order[i + 1])))
  16420. {
  16421. // We will fall through here, so just terminate the block chain early.
  16422. // We still need to deal with Phi potentially.
  16423. // No need for a stack-like thing here since we only do fall-through when there is a
  16424. // single trivial branch to fall-through target..
  16425. current_emitting_switch_fallthrough = true;
  16426. }
  16427. else
  16428. current_emitting_switch_fallthrough = false;
  16429. if (!block_like_switch)
  16430. begin_scope();
  16431. branch(block.self, target_block);
  16432. if (!block_like_switch)
  16433. end_scope();
  16434. current_emitting_switch_fallthrough = false;
  16435. }
  16436. // Might still have to flush phi variables if we branch from loop header directly to merge target.
  16437. // This is supposed to emit all cases where we branch from header to merge block directly.
  16438. // There are two main scenarios where cannot rely on default fallthrough.
  16439. // - There is an explicit default: label already.
  16440. // In this case, literals_to_merge need to form their own "default" case, so that we avoid executing that block.
  16441. // - Header -> Merge requires flushing PHI. In this case, we need to collect all cases and flush PHI there.
  16442. bool header_merge_requires_phi = flush_phi_required(block.self, block.next_block);
  16443. bool need_fallthrough_block = block.default_block == block.next_block || !literals_to_merge.empty();
  16444. if (!collapsed_switch && ((header_merge_requires_phi && need_fallthrough_block) || !literals_to_merge.empty()))
  16445. {
  16446. for (auto &case_literal : literals_to_merge)
  16447. statement("case ", to_case_label(case_literal, type.width, unsigned_case), label_suffix, ":");
  16448. if (block.default_block == block.next_block)
  16449. {
  16450. if (is_legacy())
  16451. statement("else");
  16452. else
  16453. statement("default:");
  16454. }
  16455. begin_scope();
  16456. flush_phi(block.self, block.next_block);
  16457. statement("break;");
  16458. end_scope();
  16459. }
  16460. if (!collapsed_switch)
  16461. {
  16462. if ((block_like_switch || is_legacy()) && !is_legacy_es())
  16463. end_scope_decl("while(false)");
  16464. else
  16465. end_scope();
  16466. }
  16467. else
  16468. flush_phi(block.self, block.next_block);
  16469. if (block.need_ladder_break)
  16470. {
  16471. statement("if (_", block.self, "_ladder_break)");
  16472. begin_scope();
  16473. statement("break;");
  16474. end_scope();
  16475. }
  16476. current_emitting_switch_stack.pop_back();
  16477. break;
  16478. }
  16479. case SPIRBlock::Return:
  16480. {
  16481. for (auto &line : current_function->fixup_hooks_out)
  16482. line();
  16483. if (processing_entry_point)
  16484. emit_fixup();
  16485. auto &cfg = get_cfg_for_current_function();
  16486. if (block.return_value)
  16487. {
  16488. auto &type = expression_type(block.return_value);
  16489. if (!type.array.empty() && !backend.can_return_array)
  16490. {
  16491. // If we cannot return arrays, we will have a special out argument we can write to instead.
  16492. // The backend is responsible for setting this up, and redirection the return values as appropriate.
  16493. if (ir.ids[block.return_value].get_type() != TypeUndef)
  16494. {
  16495. emit_array_copy("spvReturnValue", 0, block.return_value, StorageClassFunction,
  16496. get_expression_effective_storage_class(block.return_value));
  16497. }
  16498. if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) ||
  16499. block.loop_dominator != BlockID(SPIRBlock::NoDominator))
  16500. {
  16501. statement("return;");
  16502. }
  16503. }
  16504. else
  16505. {
  16506. // OpReturnValue can return Undef, so don't emit anything for this case.
  16507. if (ir.ids[block.return_value].get_type() != TypeUndef)
  16508. statement("return ", to_unpacked_expression(block.return_value), ";");
  16509. }
  16510. }
  16511. else if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) ||
  16512. block.loop_dominator != BlockID(SPIRBlock::NoDominator))
  16513. {
  16514. // If this block is the very final block and not called from control flow,
  16515. // we do not need an explicit return which looks out of place. Just end the function here.
  16516. // In the very weird case of for(;;) { return; } executing return is unconditional,
  16517. // but we actually need a return here ...
  16518. statement("return;");
  16519. }
  16520. break;
  16521. }
  16522. // If the Kill is terminating a block with a (probably synthetic) return value, emit a return value statement.
  16523. case SPIRBlock::Kill:
  16524. statement(backend.discard_literal, ";");
  16525. if (block.return_value)
  16526. statement("return ", to_unpacked_expression(block.return_value), ";");
  16527. break;
  16528. case SPIRBlock::Unreachable:
  16529. {
  16530. // If the entry point ends with unreachable and has a return value, insert a return
  16531. // statement to avoid potential compiler errors from non-void functions without a return value.
  16532. if (block.return_value)
  16533. {
  16534. statement("return ", to_unpacked_expression(block.return_value), ";");
  16535. break;
  16536. }
  16537. // Avoid emitting false fallthrough, which can happen for
  16538. // if (cond) break; else discard; inside a case label.
  16539. // Discard is not always implementable as a terminator.
  16540. auto &cfg = get_cfg_for_current_function();
  16541. bool inner_dominator_is_switch = false;
  16542. ID id = block.self;
  16543. while (id)
  16544. {
  16545. auto &iter_block = get<SPIRBlock>(id);
  16546. if (iter_block.terminator == SPIRBlock::MultiSelect ||
  16547. iter_block.merge == SPIRBlock::MergeLoop)
  16548. {
  16549. ID next_block = iter_block.merge == SPIRBlock::MergeLoop ?
  16550. iter_block.merge_block : iter_block.next_block;
  16551. bool outside_construct = next_block && cfg.find_common_dominator(next_block, block.self) == next_block;
  16552. if (!outside_construct)
  16553. {
  16554. inner_dominator_is_switch = iter_block.terminator == SPIRBlock::MultiSelect;
  16555. break;
  16556. }
  16557. }
  16558. if (cfg.get_preceding_edges(id).empty())
  16559. break;
  16560. id = cfg.get_immediate_dominator(id);
  16561. }
  16562. if (inner_dominator_is_switch)
  16563. statement("break; // unreachable workaround");
  16564. emit_next_block = false;
  16565. break;
  16566. }
  16567. case SPIRBlock::IgnoreIntersection:
  16568. statement("ignoreIntersectionEXT;");
  16569. break;
  16570. case SPIRBlock::TerminateRay:
  16571. statement("terminateRayEXT;");
  16572. break;
  16573. case SPIRBlock::EmitMeshTasks:
  16574. emit_mesh_tasks(block);
  16575. break;
  16576. default:
  16577. SPIRV_CROSS_THROW("Unimplemented block terminator.");
  16578. }
  16579. BlockID trailing_block_id = 0;
  16580. if (block.next_block && emit_next_block)
  16581. {
  16582. // If we hit this case, we're dealing with an unconditional branch, which means we will output
  16583. // that block after this. If we had selection merge, we already flushed phi variables.
  16584. if (block.merge != SPIRBlock::MergeSelection)
  16585. {
  16586. flush_phi(block.self, block.next_block);
  16587. // For a direct branch, need to remember to invalidate expressions in the next linear block instead.
  16588. get<SPIRBlock>(block.next_block).invalidate_expressions.clear();
  16589. std::swap(get<SPIRBlock>(block.next_block).invalidate_expressions, block.invalidate_expressions);
  16590. }
  16591. // For switch fallthrough cases, we terminate the chain here, but we still need to handle Phi.
  16592. if (!current_emitting_switch_fallthrough)
  16593. {
  16594. // For merge selects we might have ignored the fact that a merge target
  16595. // could have been a break; or continue;
  16596. // We will need to deal with it here.
  16597. if (is_loop_break(block.next_block))
  16598. {
  16599. // Cannot check for just break, because switch statements will also use break.
  16600. assert(block.merge == SPIRBlock::MergeSelection);
  16601. statement("break;");
  16602. }
  16603. else if (is_continue(block.next_block))
  16604. {
  16605. assert(block.merge == SPIRBlock::MergeSelection);
  16606. branch_to_continue(block.self, block.next_block);
  16607. }
  16608. else if (BlockID(block.self) != block.next_block)
  16609. {
  16610. // Recursing here is quite scary since it's quite easy to stack overflow if
  16611. // the SPIR-V is constructed a particular way.
  16612. // We have to simulate the tail call ourselves.
  16613. if (block.merge != SPIRBlock::MergeLoop)
  16614. trailing_block_id = block.next_block;
  16615. else
  16616. emit_block_chain(get<SPIRBlock>(block.next_block));
  16617. }
  16618. }
  16619. }
  16620. if (block.merge == SPIRBlock::MergeLoop)
  16621. {
  16622. if (continue_type == SPIRBlock::DoWhileLoop)
  16623. {
  16624. // Make sure that we run the continue block to get the expressions set, but this
  16625. // should become an empty string.
  16626. // We have no fallbacks if we cannot forward everything to temporaries ...
  16627. const auto &continue_block = get<SPIRBlock>(block.continue_block);
  16628. bool positive_test = execution_is_noop(get<SPIRBlock>(continue_block.true_block),
  16629. get<SPIRBlock>(continue_block.loop_dominator));
  16630. uint32_t current_count = statement_count;
  16631. auto statements = emit_continue_block(block.continue_block, positive_test, !positive_test);
  16632. if (statement_count != current_count)
  16633. {
  16634. // The DoWhile block has side effects, force ComplexLoop pattern next pass.
  16635. get<SPIRBlock>(block.continue_block).complex_continue = true;
  16636. force_recompile();
  16637. }
  16638. // Might have to invert the do-while test here.
  16639. auto condition = to_expression(continue_block.condition);
  16640. if (!positive_test)
  16641. condition = join("!", enclose_expression(condition));
  16642. end_scope_decl(join("while (", condition, ")"));
  16643. }
  16644. else
  16645. end_scope();
  16646. loop_level_saver.release();
  16647. // We cannot break out of two loops at once, so don't check for break; here.
  16648. // Using block.self as the "from" block isn't quite right, but it has the same scope
  16649. // and dominance structure, so it's fine.
  16650. if (is_continue(block.merge_block))
  16651. branch_to_continue(block.self, block.merge_block);
  16652. else
  16653. trailing_block_id = block.merge_block;
  16654. }
  16655. return trailing_block_id;
  16656. }
  16657. void CompilerGLSL::emit_block_chain_cleanup(SPIRBlock &block)
  16658. {
  16659. // Forget about control dependent expressions now.
  16660. block.invalidate_expressions.clear();
  16661. // After we return, we must be out of scope, so if we somehow have to re-emit this block,
  16662. // re-declare variables if necessary.
  16663. // We only need one array here for rearm_dominated_variables,
  16664. // since it should be impossible for the same block to be remitted in the same chain twice.
  16665. assert(block.rearm_dominated_variables.size() == block.dominated_variables.size());
  16666. for (size_t i = 0; i < block.dominated_variables.size(); i++)
  16667. {
  16668. uint32_t var = block.dominated_variables[i];
  16669. get<SPIRVariable>(var).deferred_declaration = block.rearm_dominated_variables[i];
  16670. }
  16671. block.rearm_dominated_variables.clear();
  16672. // Just like for deferred declaration, we need to forget about loop variable enable
  16673. // if our block chain is reinstantiated later.
  16674. for (auto &var_id : block.loop_variables)
  16675. get<SPIRVariable>(var_id).loop_variable_enable = false;
  16676. }
  16677. void CompilerGLSL::begin_scope()
  16678. {
  16679. statement("{");
  16680. indent++;
  16681. }
  16682. void CompilerGLSL::end_scope()
  16683. {
  16684. if (!indent)
  16685. SPIRV_CROSS_THROW("Popping empty indent stack.");
  16686. indent--;
  16687. statement("}");
  16688. }
  16689. void CompilerGLSL::end_scope(const string &trailer)
  16690. {
  16691. if (!indent)
  16692. SPIRV_CROSS_THROW("Popping empty indent stack.");
  16693. indent--;
  16694. statement("}", trailer);
  16695. }
  16696. void CompilerGLSL::end_scope_decl()
  16697. {
  16698. if (!indent)
  16699. SPIRV_CROSS_THROW("Popping empty indent stack.");
  16700. indent--;
  16701. statement("};");
  16702. }
  16703. void CompilerGLSL::end_scope_decl(const string &decl)
  16704. {
  16705. if (!indent)
  16706. SPIRV_CROSS_THROW("Popping empty indent stack.");
  16707. indent--;
  16708. statement("} ", decl, ";");
  16709. }
  16710. void CompilerGLSL::check_function_call_constraints(const uint32_t *args, uint32_t length)
  16711. {
  16712. // If our variable is remapped, and we rely on type-remapping information as
  16713. // well, then we cannot pass the variable as a function parameter.
  16714. // Fixing this is non-trivial without stamping out variants of the same function,
  16715. // so for now warn about this and suggest workarounds instead.
  16716. for (uint32_t i = 0; i < length; i++)
  16717. {
  16718. auto *var = maybe_get<SPIRVariable>(args[i]);
  16719. if (!var || !var->remapped_variable)
  16720. continue;
  16721. auto &type = get<SPIRType>(var->basetype);
  16722. if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData)
  16723. {
  16724. SPIRV_CROSS_THROW("Tried passing a remapped subpassInput variable to a function. "
  16725. "This will not work correctly because type-remapping information is lost. "
  16726. "To workaround, please consider not passing the subpass input as a function parameter, "
  16727. "or use in/out variables instead which do not need type remapping information.");
  16728. }
  16729. }
  16730. }
  16731. const Instruction *CompilerGLSL::get_next_instruction_in_block(const Instruction &instr)
  16732. {
  16733. // FIXME: This is kind of hacky. There should be a cleaner way.
  16734. auto offset = uint32_t(&instr - current_emitting_block->ops.data());
  16735. if ((offset + 1) < current_emitting_block->ops.size())
  16736. return &current_emitting_block->ops[offset + 1];
  16737. else
  16738. return nullptr;
  16739. }
  16740. uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics)
  16741. {
  16742. return semantics & (MemorySemanticsAtomicCounterMemoryMask | MemorySemanticsImageMemoryMask |
  16743. MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask |
  16744. MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask);
  16745. }
  16746. bool CompilerGLSL::emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t rhs_id, StorageClass, StorageClass)
  16747. {
  16748. string lhs;
  16749. if (expr)
  16750. lhs = expr;
  16751. else
  16752. lhs = to_expression(lhs_id);
  16753. statement(lhs, " = ", to_expression(rhs_id), ";");
  16754. return true;
  16755. }
  16756. bool CompilerGLSL::unroll_array_to_complex_store(uint32_t target_id, uint32_t source_id)
  16757. {
  16758. if (!backend.force_gl_in_out_block)
  16759. return false;
  16760. // This path is only relevant for GL backends.
  16761. auto *var = maybe_get<SPIRVariable>(target_id);
  16762. if (!var || var->storage != StorageClassOutput)
  16763. return false;
  16764. if (!is_builtin_variable(*var) || BuiltIn(get_decoration(var->self, DecorationBuiltIn)) != BuiltInSampleMask)
  16765. return false;
  16766. auto &type = expression_type(source_id);
  16767. string array_expr;
  16768. if (type.array_size_literal.back())
  16769. {
  16770. array_expr = convert_to_string(type.array.back());
  16771. if (type.array.back() == 0)
  16772. SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array.");
  16773. }
  16774. else
  16775. array_expr = to_expression(type.array.back());
  16776. SPIRType target_type { OpTypeInt };
  16777. target_type.basetype = SPIRType::Int;
  16778. statement("for (int i = 0; i < int(", array_expr, "); i++)");
  16779. begin_scope();
  16780. statement(to_expression(target_id), "[i] = ",
  16781. bitcast_expression(target_type, type.basetype, join(to_expression(source_id), "[i]")),
  16782. ";");
  16783. end_scope();
  16784. return true;
  16785. }
  16786. void CompilerGLSL::unroll_array_from_complex_load(uint32_t target_id, uint32_t source_id, std::string &expr)
  16787. {
  16788. if (!backend.force_gl_in_out_block)
  16789. return;
  16790. // This path is only relevant for GL backends.
  16791. auto *var = maybe_get<SPIRVariable>(source_id);
  16792. if (!var)
  16793. return;
  16794. if (var->storage != StorageClassInput && var->storage != StorageClassOutput)
  16795. return;
  16796. auto &type = get_variable_data_type(*var);
  16797. if (type.array.empty())
  16798. return;
  16799. auto builtin = BuiltIn(get_decoration(var->self, DecorationBuiltIn));
  16800. bool is_builtin = is_builtin_variable(*var) &&
  16801. (builtin == BuiltInPointSize ||
  16802. builtin == BuiltInPosition ||
  16803. builtin == BuiltInSampleMask);
  16804. bool is_tess = is_tessellation_shader();
  16805. bool is_patch = has_decoration(var->self, DecorationPatch);
  16806. bool is_sample_mask = is_builtin && builtin == BuiltInSampleMask;
  16807. // Tessellation input arrays are special in that they are unsized, so we cannot directly copy from it.
  16808. // We must unroll the array load.
  16809. // For builtins, we couldn't catch this case normally,
  16810. // because this is resolved in the OpAccessChain in most cases.
  16811. // If we load the entire array, we have no choice but to unroll here.
  16812. if (!is_patch && (is_builtin || is_tess))
  16813. {
  16814. auto new_expr = join("_", target_id, "_unrolled");
  16815. statement(variable_decl(type, new_expr, target_id), ";");
  16816. string array_expr;
  16817. if (type.array_size_literal.back())
  16818. {
  16819. array_expr = convert_to_string(type.array.back());
  16820. if (type.array.back() == 0)
  16821. SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array.");
  16822. }
  16823. else
  16824. array_expr = to_expression(type.array.back());
  16825. // The array size might be a specialization constant, so use a for-loop instead.
  16826. statement("for (int i = 0; i < int(", array_expr, "); i++)");
  16827. begin_scope();
  16828. if (is_builtin && !is_sample_mask)
  16829. statement(new_expr, "[i] = gl_in[i].", expr, ";");
  16830. else if (is_sample_mask)
  16831. {
  16832. SPIRType target_type { OpTypeInt };
  16833. target_type.basetype = SPIRType::Int;
  16834. statement(new_expr, "[i] = ", bitcast_expression(target_type, type.basetype, join(expr, "[i]")), ";");
  16835. }
  16836. else
  16837. statement(new_expr, "[i] = ", expr, "[i];");
  16838. end_scope();
  16839. expr = std::move(new_expr);
  16840. }
  16841. }
  16842. void CompilerGLSL::cast_from_variable_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type)
  16843. {
  16844. // We will handle array cases elsewhere.
  16845. if (!expr_type.array.empty())
  16846. return;
  16847. auto *var = maybe_get_backing_variable(source_id);
  16848. if (var)
  16849. source_id = var->self;
  16850. // Only interested in standalone builtin variables.
  16851. if (!has_decoration(source_id, DecorationBuiltIn))
  16852. {
  16853. // Except for int attributes in legacy GLSL, which are cast from float.
  16854. if (is_legacy() && expr_type.basetype == SPIRType::Int && var && var->storage == StorageClassInput)
  16855. expr = join(type_to_glsl(expr_type), "(", expr, ")");
  16856. return;
  16857. }
  16858. auto builtin = static_cast<BuiltIn>(get_decoration(source_id, DecorationBuiltIn));
  16859. auto expected_type = expr_type.basetype;
  16860. // TODO: Fill in for more builtins.
  16861. switch (builtin)
  16862. {
  16863. case BuiltInLayer:
  16864. case BuiltInPrimitiveId:
  16865. case BuiltInViewportIndex:
  16866. case BuiltInInstanceId:
  16867. case BuiltInInstanceIndex:
  16868. case BuiltInVertexId:
  16869. case BuiltInVertexIndex:
  16870. case BuiltInSampleId:
  16871. case BuiltInBaseVertex:
  16872. case BuiltInBaseInstance:
  16873. case BuiltInDrawIndex:
  16874. case BuiltInFragStencilRefEXT:
  16875. case BuiltInInstanceCustomIndexNV:
  16876. case BuiltInSampleMask:
  16877. case BuiltInPrimitiveShadingRateKHR:
  16878. case BuiltInShadingRateKHR:
  16879. expected_type = SPIRType::Int;
  16880. break;
  16881. case BuiltInGlobalInvocationId:
  16882. case BuiltInLocalInvocationId:
  16883. case BuiltInWorkgroupId:
  16884. case BuiltInLocalInvocationIndex:
  16885. case BuiltInWorkgroupSize:
  16886. case BuiltInNumWorkgroups:
  16887. case BuiltInIncomingRayFlagsNV:
  16888. case BuiltInLaunchIdNV:
  16889. case BuiltInLaunchSizeNV:
  16890. case BuiltInPrimitiveTriangleIndicesEXT:
  16891. case BuiltInPrimitiveLineIndicesEXT:
  16892. case BuiltInPrimitivePointIndicesEXT:
  16893. expected_type = SPIRType::UInt;
  16894. break;
  16895. default:
  16896. break;
  16897. }
  16898. if (expected_type != expr_type.basetype)
  16899. expr = bitcast_expression(expr_type, expected_type, expr);
  16900. }
  16901. SPIRType::BaseType CompilerGLSL::get_builtin_basetype(BuiltIn builtin, SPIRType::BaseType default_type)
  16902. {
  16903. // TODO: Fill in for more builtins.
  16904. switch (builtin)
  16905. {
  16906. case BuiltInLayer:
  16907. case BuiltInPrimitiveId:
  16908. case BuiltInViewportIndex:
  16909. case BuiltInFragStencilRefEXT:
  16910. case BuiltInSampleMask:
  16911. case BuiltInPrimitiveShadingRateKHR:
  16912. case BuiltInShadingRateKHR:
  16913. return SPIRType::Int;
  16914. default:
  16915. return default_type;
  16916. }
  16917. }
  16918. void CompilerGLSL::cast_to_variable_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type)
  16919. {
  16920. auto *var = maybe_get_backing_variable(target_id);
  16921. if (var)
  16922. target_id = var->self;
  16923. // Only interested in standalone builtin variables.
  16924. if (!has_decoration(target_id, DecorationBuiltIn))
  16925. return;
  16926. auto builtin = static_cast<BuiltIn>(get_decoration(target_id, DecorationBuiltIn));
  16927. auto expected_type = get_builtin_basetype(builtin, expr_type.basetype);
  16928. if (expected_type != expr_type.basetype)
  16929. {
  16930. auto type = expr_type;
  16931. type.basetype = expected_type;
  16932. expr = bitcast_expression(type, expr_type.basetype, expr);
  16933. }
  16934. }
  16935. void CompilerGLSL::convert_non_uniform_expression(string &expr, uint32_t ptr_id)
  16936. {
  16937. if (*backend.nonuniform_qualifier == '\0')
  16938. return;
  16939. auto *var = maybe_get_backing_variable(ptr_id);
  16940. if (!var)
  16941. return;
  16942. if (var->storage != StorageClassUniformConstant &&
  16943. var->storage != StorageClassStorageBuffer &&
  16944. var->storage != StorageClassUniform)
  16945. return;
  16946. auto &backing_type = get<SPIRType>(var->basetype);
  16947. if (backing_type.array.empty())
  16948. return;
  16949. // If we get here, we know we're accessing an arrayed resource which
  16950. // might require nonuniform qualifier.
  16951. auto start_array_index = expr.find_first_of('[');
  16952. if (start_array_index == string::npos)
  16953. return;
  16954. // We've opened a bracket, track expressions until we can close the bracket.
  16955. // This must be our resource index.
  16956. size_t end_array_index = string::npos;
  16957. unsigned bracket_count = 1;
  16958. for (size_t index = start_array_index + 1; index < expr.size(); index++)
  16959. {
  16960. if (expr[index] == ']')
  16961. {
  16962. if (--bracket_count == 0)
  16963. {
  16964. end_array_index = index;
  16965. break;
  16966. }
  16967. }
  16968. else if (expr[index] == '[')
  16969. bracket_count++;
  16970. }
  16971. assert(bracket_count == 0);
  16972. // Doesn't really make sense to declare a non-arrayed image with nonuniformEXT, but there's
  16973. // nothing we can do here to express that.
  16974. if (start_array_index == string::npos || end_array_index == string::npos || end_array_index < start_array_index)
  16975. return;
  16976. start_array_index++;
  16977. expr = join(expr.substr(0, start_array_index), backend.nonuniform_qualifier, "(",
  16978. expr.substr(start_array_index, end_array_index - start_array_index), ")",
  16979. expr.substr(end_array_index, string::npos));
  16980. }
  16981. void CompilerGLSL::emit_block_hints(const SPIRBlock &block)
  16982. {
  16983. if ((options.es && options.version < 310) || (!options.es && options.version < 140))
  16984. return;
  16985. switch (block.hint)
  16986. {
  16987. case SPIRBlock::HintFlatten:
  16988. require_extension_internal("GL_EXT_control_flow_attributes");
  16989. statement("SPIRV_CROSS_FLATTEN");
  16990. break;
  16991. case SPIRBlock::HintDontFlatten:
  16992. require_extension_internal("GL_EXT_control_flow_attributes");
  16993. statement("SPIRV_CROSS_BRANCH");
  16994. break;
  16995. case SPIRBlock::HintUnroll:
  16996. require_extension_internal("GL_EXT_control_flow_attributes");
  16997. statement("SPIRV_CROSS_UNROLL");
  16998. break;
  16999. case SPIRBlock::HintDontUnroll:
  17000. require_extension_internal("GL_EXT_control_flow_attributes");
  17001. statement("SPIRV_CROSS_LOOP");
  17002. break;
  17003. default:
  17004. break;
  17005. }
  17006. }
  17007. void CompilerGLSL::preserve_alias_on_reset(uint32_t id)
  17008. {
  17009. preserved_aliases[id] = get_name(id);
  17010. }
  17011. void CompilerGLSL::reset_name_caches()
  17012. {
  17013. for (auto &preserved : preserved_aliases)
  17014. set_name(preserved.first, preserved.second);
  17015. preserved_aliases.clear();
  17016. resource_names.clear();
  17017. block_input_names.clear();
  17018. block_output_names.clear();
  17019. block_ubo_names.clear();
  17020. block_ssbo_names.clear();
  17021. block_names.clear();
  17022. function_overloads.clear();
  17023. }
  17024. void CompilerGLSL::fixup_anonymous_struct_names(std::unordered_set<uint32_t> &visited, const SPIRType &type)
  17025. {
  17026. if (visited.count(type.self))
  17027. return;
  17028. visited.insert(type.self);
  17029. for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
  17030. {
  17031. auto &mbr_type = get<SPIRType>(type.member_types[i]);
  17032. if (mbr_type.basetype == SPIRType::Struct)
  17033. {
  17034. // If there are multiple aliases, the output might be somewhat unpredictable,
  17035. // but the only real alternative in that case is to do nothing, which isn't any better.
  17036. // This check should be fine in practice.
  17037. if (get_name(mbr_type.self).empty() && !get_member_name(type.self, i).empty())
  17038. {
  17039. auto anon_name = join("anon_", get_member_name(type.self, i));
  17040. ParsedIR::sanitize_underscores(anon_name);
  17041. set_name(mbr_type.self, anon_name);
  17042. }
  17043. fixup_anonymous_struct_names(visited, mbr_type);
  17044. }
  17045. }
  17046. }
  17047. void CompilerGLSL::fixup_anonymous_struct_names()
  17048. {
  17049. // HLSL codegen can often end up emitting anonymous structs inside blocks, which
  17050. // breaks GL linking since all names must match ...
  17051. // Try to emit sensible code, so attempt to find such structs and emit anon_$member.
  17052. // Breaks exponential explosion with weird type trees.
  17053. std::unordered_set<uint32_t> visited;
  17054. ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) {
  17055. if (type.basetype == SPIRType::Struct &&
  17056. (has_decoration(type.self, DecorationBlock) ||
  17057. has_decoration(type.self, DecorationBufferBlock)))
  17058. {
  17059. fixup_anonymous_struct_names(visited, type);
  17060. }
  17061. });
  17062. }
  17063. void CompilerGLSL::fixup_type_alias()
  17064. {
  17065. // Due to how some backends work, the "master" type of type_alias must be a block-like type if it exists.
  17066. ir.for_each_typed_id<SPIRType>([&](uint32_t self, SPIRType &type) {
  17067. if (!type.type_alias)
  17068. return;
  17069. if (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock))
  17070. {
  17071. // Top-level block types should never alias anything else.
  17072. type.type_alias = 0;
  17073. }
  17074. else if (type_is_block_like(type) && type.self == ID(self))
  17075. {
  17076. // A block-like type is any type which contains Offset decoration, but not top-level blocks,
  17077. // i.e. blocks which are placed inside buffers.
  17078. // Become the master.
  17079. ir.for_each_typed_id<SPIRType>([&](uint32_t other_id, SPIRType &other_type) {
  17080. if (other_id == self)
  17081. return;
  17082. if (other_type.type_alias == type.type_alias)
  17083. other_type.type_alias = self;
  17084. });
  17085. this->get<SPIRType>(type.type_alias).type_alias = self;
  17086. type.type_alias = 0;
  17087. }
  17088. });
  17089. }
  17090. void CompilerGLSL::reorder_type_alias()
  17091. {
  17092. // Reorder declaration of types so that the master of the type alias is always emitted first.
  17093. // We need this in case a type B depends on type A (A must come before in the vector), but A is an alias of a type Abuffer, which
  17094. // means declaration of A doesn't happen (yet), and order would be B, ABuffer and not ABuffer, B. Fix this up here.
  17095. auto loop_lock = ir.create_loop_hard_lock();
  17096. auto &type_ids = ir.ids_for_type[TypeType];
  17097. for (auto alias_itr = begin(type_ids); alias_itr != end(type_ids); ++alias_itr)
  17098. {
  17099. auto &type = get<SPIRType>(*alias_itr);
  17100. if (type.type_alias != TypeID(0) &&
  17101. !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
  17102. {
  17103. // We will skip declaring this type, so make sure the type_alias type comes before.
  17104. auto master_itr = find(begin(type_ids), end(type_ids), ID(type.type_alias));
  17105. assert(master_itr != end(type_ids));
  17106. if (alias_itr < master_itr)
  17107. {
  17108. // Must also swap the type order for the constant-type joined array.
  17109. auto &joined_types = ir.ids_for_constant_undef_or_type;
  17110. auto alt_alias_itr = find(begin(joined_types), end(joined_types), *alias_itr);
  17111. auto alt_master_itr = find(begin(joined_types), end(joined_types), *master_itr);
  17112. assert(alt_alias_itr != end(joined_types));
  17113. assert(alt_master_itr != end(joined_types));
  17114. swap(*alias_itr, *master_itr);
  17115. swap(*alt_alias_itr, *alt_master_itr);
  17116. }
  17117. }
  17118. }
  17119. }
  17120. void CompilerGLSL::emit_line_directive(uint32_t file_id, uint32_t line_literal)
  17121. {
  17122. // If we are redirecting statements, ignore the line directive.
  17123. // Common case here is continue blocks.
  17124. if (redirect_statement)
  17125. return;
  17126. // If we're emitting code in a sensitive context such as condition blocks in for loops, don't emit
  17127. // any line directives, because it's not possible.
  17128. if (block_debug_directives)
  17129. return;
  17130. if (options.emit_line_directives)
  17131. {
  17132. require_extension_internal("GL_GOOGLE_cpp_style_line_directive");
  17133. statement_no_indent("#line ", line_literal, " \"", get<SPIRString>(file_id).str, "\"");
  17134. }
  17135. }
  17136. void CompilerGLSL::emit_non_semantic_shader_debug_info(uint32_t, uint32_t result_id, uint32_t eop,
  17137. const uint32_t *args, uint32_t)
  17138. {
  17139. if (!options.emit_line_directives)
  17140. return;
  17141. switch (eop)
  17142. {
  17143. case SPIRExtension::DebugLine:
  17144. {
  17145. // We're missing line end and columns here, but I don't think we can emit those in any meaningful way.
  17146. emit_line_directive(args[0], get<SPIRConstant>(args[1]).scalar());
  17147. break;
  17148. }
  17149. case SPIRExtension::DebugSource:
  17150. {
  17151. // Forward the string declaration here. We ignore the optional text operand.
  17152. auto &str = get<SPIRString>(args[0]).str;
  17153. set<SPIRString>(result_id, str);
  17154. break;
  17155. }
  17156. default:
  17157. break;
  17158. }
  17159. }
  17160. void CompilerGLSL::emit_copy_logical_type(uint32_t lhs_id, uint32_t lhs_type_id, uint32_t rhs_id, uint32_t rhs_type_id,
  17161. SmallVector<uint32_t> chain)
  17162. {
  17163. // Fully unroll all member/array indices one by one.
  17164. auto &lhs_type = get<SPIRType>(lhs_type_id);
  17165. auto &rhs_type = get<SPIRType>(rhs_type_id);
  17166. if (!lhs_type.array.empty())
  17167. {
  17168. // Could use a loop here to support specialization constants, but it gets rather complicated with nested array types,
  17169. // and this is a rather obscure opcode anyways, keep it simple unless we are forced to.
  17170. uint32_t array_size = to_array_size_literal(lhs_type);
  17171. chain.push_back(0);
  17172. for (uint32_t i = 0; i < array_size; i++)
  17173. {
  17174. chain.back() = i;
  17175. emit_copy_logical_type(lhs_id, lhs_type.parent_type, rhs_id, rhs_type.parent_type, chain);
  17176. }
  17177. }
  17178. else if (lhs_type.basetype == SPIRType::Struct)
  17179. {
  17180. chain.push_back(0);
  17181. uint32_t member_count = uint32_t(lhs_type.member_types.size());
  17182. for (uint32_t i = 0; i < member_count; i++)
  17183. {
  17184. chain.back() = i;
  17185. emit_copy_logical_type(lhs_id, lhs_type.member_types[i], rhs_id, rhs_type.member_types[i], chain);
  17186. }
  17187. }
  17188. else
  17189. {
  17190. // Need to handle unpack/packing fixups since this can differ wildly between the logical types,
  17191. // particularly in MSL.
  17192. // To deal with this, we emit access chains and go through emit_store_statement
  17193. // to deal with all the special cases we can encounter.
  17194. AccessChainMeta lhs_meta, rhs_meta;
  17195. auto lhs = access_chain_internal(lhs_id, chain.data(), uint32_t(chain.size()),
  17196. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &lhs_meta);
  17197. auto rhs = access_chain_internal(rhs_id, chain.data(), uint32_t(chain.size()),
  17198. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &rhs_meta);
  17199. uint32_t id = ir.increase_bound_by(2);
  17200. lhs_id = id;
  17201. rhs_id = id + 1;
  17202. {
  17203. auto &lhs_expr = set<SPIRExpression>(lhs_id, std::move(lhs), lhs_type_id, true);
  17204. lhs_expr.need_transpose = lhs_meta.need_transpose;
  17205. if (lhs_meta.storage_is_packed)
  17206. set_extended_decoration(lhs_id, SPIRVCrossDecorationPhysicalTypePacked);
  17207. if (lhs_meta.storage_physical_type != 0)
  17208. set_extended_decoration(lhs_id, SPIRVCrossDecorationPhysicalTypeID, lhs_meta.storage_physical_type);
  17209. forwarded_temporaries.insert(lhs_id);
  17210. suppressed_usage_tracking.insert(lhs_id);
  17211. }
  17212. {
  17213. auto &rhs_expr = set<SPIRExpression>(rhs_id, std::move(rhs), rhs_type_id, true);
  17214. rhs_expr.need_transpose = rhs_meta.need_transpose;
  17215. if (rhs_meta.storage_is_packed)
  17216. set_extended_decoration(rhs_id, SPIRVCrossDecorationPhysicalTypePacked);
  17217. if (rhs_meta.storage_physical_type != 0)
  17218. set_extended_decoration(rhs_id, SPIRVCrossDecorationPhysicalTypeID, rhs_meta.storage_physical_type);
  17219. forwarded_temporaries.insert(rhs_id);
  17220. suppressed_usage_tracking.insert(rhs_id);
  17221. }
  17222. emit_store_statement(lhs_id, rhs_id);
  17223. }
  17224. }
  17225. bool CompilerGLSL::subpass_input_is_framebuffer_fetch(uint32_t id) const
  17226. {
  17227. if (!has_decoration(id, DecorationInputAttachmentIndex))
  17228. return false;
  17229. uint32_t input_attachment_index = get_decoration(id, DecorationInputAttachmentIndex);
  17230. for (auto &remap : subpass_to_framebuffer_fetch_attachment)
  17231. if (remap.first == input_attachment_index)
  17232. return true;
  17233. return false;
  17234. }
  17235. const SPIRVariable *CompilerGLSL::find_subpass_input_by_attachment_index(uint32_t index) const
  17236. {
  17237. const SPIRVariable *ret = nullptr;
  17238. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
  17239. if (has_decoration(var.self, DecorationInputAttachmentIndex) &&
  17240. get_decoration(var.self, DecorationInputAttachmentIndex) == index)
  17241. {
  17242. ret = &var;
  17243. }
  17244. });
  17245. return ret;
  17246. }
  17247. const SPIRVariable *CompilerGLSL::find_color_output_by_location(uint32_t location) const
  17248. {
  17249. const SPIRVariable *ret = nullptr;
  17250. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
  17251. if (var.storage == StorageClassOutput && get_decoration(var.self, DecorationLocation) == location)
  17252. ret = &var;
  17253. });
  17254. return ret;
  17255. }
  17256. void CompilerGLSL::emit_inout_fragment_outputs_copy_to_subpass_inputs()
  17257. {
  17258. for (auto &remap : subpass_to_framebuffer_fetch_attachment)
  17259. {
  17260. auto *subpass_var = find_subpass_input_by_attachment_index(remap.first);
  17261. auto *output_var = find_color_output_by_location(remap.second);
  17262. if (!subpass_var)
  17263. continue;
  17264. if (!output_var)
  17265. SPIRV_CROSS_THROW("Need to declare the corresponding fragment output variable to be able "
  17266. "to read from it.");
  17267. if (is_array(get<SPIRType>(output_var->basetype)))
  17268. SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_framebuffer_fetch with arrays of color outputs.");
  17269. auto &func = get<SPIRFunction>(get_entry_point().self);
  17270. func.fixup_hooks_in.push_back([=]() {
  17271. if (is_legacy())
  17272. {
  17273. statement(to_expression(subpass_var->self), " = ", "gl_LastFragData[",
  17274. get_decoration(output_var->self, DecorationLocation), "];");
  17275. }
  17276. else
  17277. {
  17278. uint32_t num_rt_components = this->get<SPIRType>(output_var->basetype).vecsize;
  17279. statement(to_expression(subpass_var->self), vector_swizzle(num_rt_components, 0), " = ",
  17280. to_expression(output_var->self), ";");
  17281. }
  17282. });
  17283. }
  17284. }
  17285. bool CompilerGLSL::variable_is_depth_or_compare(VariableID id) const
  17286. {
  17287. return is_depth_image(get<SPIRType>(get<SPIRVariable>(id).basetype), id);
  17288. }
  17289. const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extension_name(Candidate c)
  17290. {
  17291. static const char *const retval[CandidateCount] = { "GL_KHR_shader_subgroup_ballot",
  17292. "GL_KHR_shader_subgroup_basic",
  17293. "GL_KHR_shader_subgroup_vote",
  17294. "GL_KHR_shader_subgroup_arithmetic",
  17295. "GL_NV_gpu_shader_5",
  17296. "GL_NV_shader_thread_group",
  17297. "GL_NV_shader_thread_shuffle",
  17298. "GL_ARB_shader_ballot",
  17299. "GL_ARB_shader_group_vote",
  17300. "GL_AMD_gcn_shader" };
  17301. return retval[c];
  17302. }
  17303. SmallVector<std::string> CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_names(Candidate c)
  17304. {
  17305. switch (c)
  17306. {
  17307. case ARB_shader_ballot:
  17308. return { "GL_ARB_shader_int64" };
  17309. case AMD_gcn_shader:
  17310. return { "GL_AMD_gpu_shader_int64", "GL_NV_gpu_shader5" };
  17311. default:
  17312. return {};
  17313. }
  17314. }
  17315. const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_predicate(Candidate c)
  17316. {
  17317. switch (c)
  17318. {
  17319. case ARB_shader_ballot:
  17320. return "defined(GL_ARB_shader_int64)";
  17321. case AMD_gcn_shader:
  17322. return "(defined(GL_AMD_gpu_shader_int64) || defined(GL_NV_gpu_shader5))";
  17323. default:
  17324. return "";
  17325. }
  17326. }
  17327. CompilerGLSL::ShaderSubgroupSupportHelper::FeatureVector CompilerGLSL::ShaderSubgroupSupportHelper::
  17328. get_feature_dependencies(Feature feature)
  17329. {
  17330. switch (feature)
  17331. {
  17332. case SubgroupAllEqualT:
  17333. return { SubgroupBroadcast_First, SubgroupAll_Any_AllEqualBool };
  17334. case SubgroupElect:
  17335. return { SubgroupBallotFindLSB_MSB, SubgroupBallot, SubgroupInvocationID };
  17336. case SubgroupInverseBallot_InclBitCount_ExclBitCout:
  17337. return { SubgroupMask };
  17338. case SubgroupBallotBitCount:
  17339. return { SubgroupBallot };
  17340. case SubgroupArithmeticIAddReduce:
  17341. case SubgroupArithmeticIAddInclusiveScan:
  17342. case SubgroupArithmeticFAddReduce:
  17343. case SubgroupArithmeticFAddInclusiveScan:
  17344. case SubgroupArithmeticIMulReduce:
  17345. case SubgroupArithmeticIMulInclusiveScan:
  17346. case SubgroupArithmeticFMulReduce:
  17347. case SubgroupArithmeticFMulInclusiveScan:
  17348. return { SubgroupSize, SubgroupBallot, SubgroupBallotBitCount, SubgroupMask, SubgroupBallotBitExtract };
  17349. case SubgroupArithmeticIAddExclusiveScan:
  17350. case SubgroupArithmeticFAddExclusiveScan:
  17351. case SubgroupArithmeticIMulExclusiveScan:
  17352. case SubgroupArithmeticFMulExclusiveScan:
  17353. return { SubgroupSize, SubgroupBallot, SubgroupBallotBitCount,
  17354. SubgroupMask, SubgroupElect, SubgroupBallotBitExtract };
  17355. default:
  17356. return {};
  17357. }
  17358. }
  17359. CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::
  17360. get_feature_dependency_mask(Feature feature)
  17361. {
  17362. return build_mask(get_feature_dependencies(feature));
  17363. }
  17364. bool CompilerGLSL::ShaderSubgroupSupportHelper::can_feature_be_implemented_without_extensions(Feature feature)
  17365. {
  17366. static const bool retval[FeatureCount] = {
  17367. false, false, false, false, false, false,
  17368. true, // SubgroupBalloFindLSB_MSB
  17369. false, false, false, false,
  17370. true, // SubgroupMemBarrier - replaced with workgroup memory barriers
  17371. false, false, true, false,
  17372. false, false, false, false, false, false, // iadd, fadd
  17373. false, false, false, false, false, false, // imul , fmul
  17374. };
  17375. return retval[feature];
  17376. }
  17377. CompilerGLSL::ShaderSubgroupSupportHelper::Candidate CompilerGLSL::ShaderSubgroupSupportHelper::
  17378. get_KHR_extension_for_feature(Feature feature)
  17379. {
  17380. static const Candidate extensions[FeatureCount] = {
  17381. KHR_shader_subgroup_ballot, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic,
  17382. KHR_shader_subgroup_basic, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_vote,
  17383. KHR_shader_subgroup_vote, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic,
  17384. KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot,
  17385. KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic,
  17386. KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic,
  17387. KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic,
  17388. KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic,
  17389. };
  17390. return extensions[feature];
  17391. }
  17392. void CompilerGLSL::ShaderSubgroupSupportHelper::request_feature(Feature feature)
  17393. {
  17394. feature_mask |= (FeatureMask(1) << feature) | get_feature_dependency_mask(feature);
  17395. }
  17396. bool CompilerGLSL::ShaderSubgroupSupportHelper::is_feature_requested(Feature feature) const
  17397. {
  17398. return (feature_mask & (1u << feature)) != 0;
  17399. }
  17400. CompilerGLSL::ShaderSubgroupSupportHelper::Result CompilerGLSL::ShaderSubgroupSupportHelper::resolve() const
  17401. {
  17402. Result res;
  17403. for (uint32_t i = 0u; i < FeatureCount; ++i)
  17404. {
  17405. if (feature_mask & (1u << i))
  17406. {
  17407. auto feature = static_cast<Feature>(i);
  17408. std::unordered_set<uint32_t> unique_candidates;
  17409. auto candidates = get_candidates_for_feature(feature);
  17410. unique_candidates.insert(candidates.begin(), candidates.end());
  17411. auto deps = get_feature_dependencies(feature);
  17412. for (Feature d : deps)
  17413. {
  17414. candidates = get_candidates_for_feature(d);
  17415. if (!candidates.empty())
  17416. unique_candidates.insert(candidates.begin(), candidates.end());
  17417. }
  17418. for (uint32_t c : unique_candidates)
  17419. ++res.weights[static_cast<Candidate>(c)];
  17420. }
  17421. }
  17422. return res;
  17423. }
  17424. CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper::
  17425. get_candidates_for_feature(Feature ft, const Result &r)
  17426. {
  17427. auto c = get_candidates_for_feature(ft);
  17428. auto cmp = [&r](Candidate a, Candidate b) {
  17429. if (r.weights[a] == r.weights[b])
  17430. return a < b; // Prefer candidates with lower enum value
  17431. return r.weights[a] > r.weights[b];
  17432. };
  17433. std::sort(c.begin(), c.end(), cmp);
  17434. return c;
  17435. }
  17436. CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper::
  17437. get_candidates_for_feature(Feature feature)
  17438. {
  17439. switch (feature)
  17440. {
  17441. case SubgroupMask:
  17442. return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot };
  17443. case SubgroupSize:
  17444. return { KHR_shader_subgroup_basic, NV_shader_thread_group, AMD_gcn_shader, ARB_shader_ballot };
  17445. case SubgroupInvocationID:
  17446. return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot };
  17447. case SubgroupID:
  17448. return { KHR_shader_subgroup_basic, NV_shader_thread_group };
  17449. case NumSubgroups:
  17450. return { KHR_shader_subgroup_basic, NV_shader_thread_group };
  17451. case SubgroupBroadcast_First:
  17452. return { KHR_shader_subgroup_ballot, NV_shader_thread_shuffle, ARB_shader_ballot };
  17453. case SubgroupBallotFindLSB_MSB:
  17454. return { KHR_shader_subgroup_ballot, NV_shader_thread_group };
  17455. case SubgroupAll_Any_AllEqualBool:
  17456. return { KHR_shader_subgroup_vote, NV_gpu_shader_5, ARB_shader_group_vote, AMD_gcn_shader };
  17457. case SubgroupAllEqualT:
  17458. return {}; // depends on other features only
  17459. case SubgroupElect:
  17460. return {}; // depends on other features only
  17461. case SubgroupBallot:
  17462. return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot };
  17463. case SubgroupBarrier:
  17464. return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot, AMD_gcn_shader };
  17465. case SubgroupMemBarrier:
  17466. return { KHR_shader_subgroup_basic };
  17467. case SubgroupInverseBallot_InclBitCount_ExclBitCout:
  17468. return {};
  17469. case SubgroupBallotBitExtract:
  17470. return { NV_shader_thread_group };
  17471. case SubgroupBallotBitCount:
  17472. return {};
  17473. case SubgroupArithmeticIAddReduce:
  17474. case SubgroupArithmeticIAddExclusiveScan:
  17475. case SubgroupArithmeticIAddInclusiveScan:
  17476. case SubgroupArithmeticFAddReduce:
  17477. case SubgroupArithmeticFAddExclusiveScan:
  17478. case SubgroupArithmeticFAddInclusiveScan:
  17479. case SubgroupArithmeticIMulReduce:
  17480. case SubgroupArithmeticIMulExclusiveScan:
  17481. case SubgroupArithmeticIMulInclusiveScan:
  17482. case SubgroupArithmeticFMulReduce:
  17483. case SubgroupArithmeticFMulExclusiveScan:
  17484. case SubgroupArithmeticFMulInclusiveScan:
  17485. return { KHR_shader_subgroup_arithmetic, NV_shader_thread_shuffle };
  17486. default:
  17487. return {};
  17488. }
  17489. }
  17490. CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::build_mask(
  17491. const SmallVector<Feature> &features)
  17492. {
  17493. FeatureMask mask = 0;
  17494. for (Feature f : features)
  17495. mask |= FeatureMask(1) << f;
  17496. return mask;
  17497. }
  17498. CompilerGLSL::ShaderSubgroupSupportHelper::Result::Result()
  17499. {
  17500. for (auto &weight : weights)
  17501. weight = 0;
  17502. // Make sure KHR_shader_subgroup extensions are always prefered.
  17503. const uint32_t big_num = FeatureCount;
  17504. weights[KHR_shader_subgroup_ballot] = big_num;
  17505. weights[KHR_shader_subgroup_basic] = big_num;
  17506. weights[KHR_shader_subgroup_vote] = big_num;
  17507. weights[KHR_shader_subgroup_arithmetic] = big_num;
  17508. }
  17509. void CompilerGLSL::request_workaround_wrapper_overload(TypeID id)
  17510. {
  17511. // Must be ordered to maintain deterministic output, so vector is appropriate.
  17512. if (find(begin(workaround_ubo_load_overload_types), end(workaround_ubo_load_overload_types), id) ==
  17513. end(workaround_ubo_load_overload_types))
  17514. {
  17515. force_recompile();
  17516. workaround_ubo_load_overload_types.push_back(id);
  17517. }
  17518. }
  17519. void CompilerGLSL::rewrite_load_for_wrapped_row_major(std::string &expr, TypeID loaded_type, ID ptr)
  17520. {
  17521. // Loading row-major matrices from UBOs on older AMD Windows OpenGL drivers is problematic.
  17522. // To load these types correctly, we must first wrap them in a dummy function which only purpose is to
  17523. // ensure row_major decoration is actually respected.
  17524. auto *var = maybe_get_backing_variable(ptr);
  17525. if (!var)
  17526. return;
  17527. auto &backing_type = get<SPIRType>(var->basetype);
  17528. bool is_ubo = backing_type.basetype == SPIRType::Struct && backing_type.storage == StorageClassUniform &&
  17529. has_decoration(backing_type.self, DecorationBlock);
  17530. if (!is_ubo)
  17531. return;
  17532. auto *type = &get<SPIRType>(loaded_type);
  17533. bool rewrite = false;
  17534. bool relaxed = options.es;
  17535. if (is_matrix(*type))
  17536. {
  17537. // To avoid adding a lot of unnecessary meta tracking to forward the row_major state,
  17538. // we will simply look at the base struct itself. It is exceptionally rare to mix and match row-major/col-major state.
  17539. // If there is any row-major action going on, we apply the workaround.
  17540. // It is harmless to apply the workaround to column-major matrices, so this is still a valid solution.
  17541. // If an access chain occurred, the workaround is not required, so loading vectors or scalars don't need workaround.
  17542. type = &backing_type;
  17543. }
  17544. else
  17545. {
  17546. // If we're loading a composite, we don't have overloads like these.
  17547. relaxed = false;
  17548. }
  17549. if (type->basetype == SPIRType::Struct)
  17550. {
  17551. // If we're loading a struct where any member is a row-major matrix, apply the workaround.
  17552. for (uint32_t i = 0; i < uint32_t(type->member_types.size()); i++)
  17553. {
  17554. auto decorations = combined_decoration_for_member(*type, i);
  17555. if (decorations.get(DecorationRowMajor))
  17556. rewrite = true;
  17557. // Since we decide on a per-struct basis, only use mediump wrapper if all candidates are mediump.
  17558. if (!decorations.get(DecorationRelaxedPrecision))
  17559. relaxed = false;
  17560. }
  17561. }
  17562. if (rewrite)
  17563. {
  17564. request_workaround_wrapper_overload(loaded_type);
  17565. expr = join("spvWorkaroundRowMajor", (relaxed ? "MP" : ""), "(", expr, ")");
  17566. }
  17567. }
  17568. void CompilerGLSL::mask_stage_output_by_location(uint32_t location, uint32_t component)
  17569. {
  17570. masked_output_locations.insert({ location, component });
  17571. }
  17572. void CompilerGLSL::mask_stage_output_by_builtin(BuiltIn builtin)
  17573. {
  17574. masked_output_builtins.insert(builtin);
  17575. }
  17576. bool CompilerGLSL::is_stage_output_variable_masked(const SPIRVariable &var) const
  17577. {
  17578. auto &type = get<SPIRType>(var.basetype);
  17579. bool is_block = has_decoration(type.self, DecorationBlock);
  17580. // Blocks by themselves are never masked. Must be masked per-member.
  17581. if (is_block)
  17582. return false;
  17583. bool is_builtin = has_decoration(var.self, DecorationBuiltIn);
  17584. if (is_builtin)
  17585. {
  17586. return is_stage_output_builtin_masked(BuiltIn(get_decoration(var.self, DecorationBuiltIn)));
  17587. }
  17588. else
  17589. {
  17590. if (!has_decoration(var.self, DecorationLocation))
  17591. return false;
  17592. return is_stage_output_location_masked(
  17593. get_decoration(var.self, DecorationLocation),
  17594. get_decoration(var.self, DecorationComponent));
  17595. }
  17596. }
  17597. bool CompilerGLSL::is_stage_output_block_member_masked(const SPIRVariable &var, uint32_t index, bool strip_array) const
  17598. {
  17599. auto &type = get<SPIRType>(var.basetype);
  17600. bool is_block = has_decoration(type.self, DecorationBlock);
  17601. if (!is_block)
  17602. return false;
  17603. BuiltIn builtin = BuiltInMax;
  17604. if (is_member_builtin(type, index, &builtin))
  17605. {
  17606. return is_stage_output_builtin_masked(builtin);
  17607. }
  17608. else
  17609. {
  17610. uint32_t location = get_declared_member_location(var, index, strip_array);
  17611. uint32_t component = get_member_decoration(type.self, index, DecorationComponent);
  17612. return is_stage_output_location_masked(location, component);
  17613. }
  17614. }
  17615. bool CompilerGLSL::is_per_primitive_variable(const SPIRVariable &var) const
  17616. {
  17617. if (has_decoration(var.self, DecorationPerPrimitiveEXT))
  17618. return true;
  17619. auto &type = get<SPIRType>(var.basetype);
  17620. if (!has_decoration(type.self, DecorationBlock))
  17621. return false;
  17622. for (uint32_t i = 0, n = uint32_t(type.member_types.size()); i < n; i++)
  17623. if (!has_member_decoration(type.self, i, DecorationPerPrimitiveEXT))
  17624. return false;
  17625. return true;
  17626. }
  17627. bool CompilerGLSL::is_stage_output_location_masked(uint32_t location, uint32_t component) const
  17628. {
  17629. return masked_output_locations.count({ location, component }) != 0;
  17630. }
  17631. bool CompilerGLSL::is_stage_output_builtin_masked(BuiltIn builtin) const
  17632. {
  17633. return masked_output_builtins.count(builtin) != 0;
  17634. }
  17635. uint32_t CompilerGLSL::get_declared_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const
  17636. {
  17637. auto &block_type = get<SPIRType>(var.basetype);
  17638. if (has_member_decoration(block_type.self, mbr_idx, DecorationLocation))
  17639. return get_member_decoration(block_type.self, mbr_idx, DecorationLocation);
  17640. else
  17641. return get_accumulated_member_location(var, mbr_idx, strip_array);
  17642. }
  17643. uint32_t CompilerGLSL::get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const
  17644. {
  17645. auto &type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var);
  17646. uint32_t location = get_decoration(var.self, DecorationLocation);
  17647. for (uint32_t i = 0; i < mbr_idx; i++)
  17648. {
  17649. auto &mbr_type = get<SPIRType>(type.member_types[i]);
  17650. // Start counting from any place we have a new location decoration.
  17651. if (has_member_decoration(type.self, mbr_idx, DecorationLocation))
  17652. location = get_member_decoration(type.self, mbr_idx, DecorationLocation);
  17653. uint32_t location_count = type_to_location_count(mbr_type);
  17654. location += location_count;
  17655. }
  17656. return location;
  17657. }
  17658. StorageClass CompilerGLSL::get_expression_effective_storage_class(uint32_t ptr)
  17659. {
  17660. auto *var = maybe_get_backing_variable(ptr);
  17661. // If the expression has been lowered to a temporary, we need to use the Generic storage class.
  17662. // We're looking for the effective storage class of a given expression.
  17663. // An access chain or forwarded OpLoads from such access chains
  17664. // will generally have the storage class of the underlying variable, but if the load was not forwarded
  17665. // we have lost any address space qualifiers.
  17666. bool forced_temporary = ir.ids[ptr].get_type() == TypeExpression && !get<SPIRExpression>(ptr).access_chain &&
  17667. (forced_temporaries.count(ptr) != 0 || forwarded_temporaries.count(ptr) == 0);
  17668. if (var && !forced_temporary)
  17669. {
  17670. if (variable_decl_is_remapped_storage(*var, StorageClassWorkgroup))
  17671. return StorageClassWorkgroup;
  17672. if (variable_decl_is_remapped_storage(*var, StorageClassStorageBuffer))
  17673. return StorageClassStorageBuffer;
  17674. // Normalize SSBOs to StorageBuffer here.
  17675. if (var->storage == StorageClassUniform &&
  17676. has_decoration(get<SPIRType>(var->basetype).self, DecorationBufferBlock))
  17677. return StorageClassStorageBuffer;
  17678. else
  17679. return var->storage;
  17680. }
  17681. else
  17682. return expression_type(ptr).storage;
  17683. }
  17684. uint32_t CompilerGLSL::type_to_location_count(const SPIRType &type) const
  17685. {
  17686. uint32_t count;
  17687. if (type.basetype == SPIRType::Struct)
  17688. {
  17689. uint32_t mbr_count = uint32_t(type.member_types.size());
  17690. count = 0;
  17691. for (uint32_t i = 0; i < mbr_count; i++)
  17692. count += type_to_location_count(get<SPIRType>(type.member_types[i]));
  17693. }
  17694. else
  17695. {
  17696. count = type.columns > 1 ? type.columns : 1;
  17697. }
  17698. uint32_t dim_count = uint32_t(type.array.size());
  17699. for (uint32_t i = 0; i < dim_count; i++)
  17700. count *= to_array_size_literal(type, i);
  17701. return count;
  17702. }
  17703. std::string CompilerGLSL::format_float(float value) const
  17704. {
  17705. if (float_formatter)
  17706. return float_formatter->format_float(value);
  17707. // default behavior
  17708. return convert_to_string(value, current_locale_radix_character);
  17709. }
  17710. std::string CompilerGLSL::format_double(double value) const
  17711. {
  17712. if (float_formatter)
  17713. return float_formatter->format_double(value);
  17714. // default behavior
  17715. return convert_to_string(value, current_locale_radix_character);
  17716. }
  17717. std::string CompilerGLSL::to_pretty_expression_if_int_constant(
  17718. uint32_t id,
  17719. const GlslConstantNameMapping *mapping_start, const GlslConstantNameMapping *mapping_end,
  17720. bool register_expression_read)
  17721. {
  17722. auto *c = maybe_get<SPIRConstant>(id);
  17723. if (c && !c->specialization)
  17724. {
  17725. auto value = c->scalar();
  17726. auto pretty_name = std::find_if(mapping_start, mapping_end,
  17727. [value](const GlslConstantNameMapping &mapping) { return mapping.value == value; });
  17728. if (pretty_name != mapping_end)
  17729. return pretty_name->alias;
  17730. }
  17731. return join("int(", to_expression(id, register_expression_read), ")");
  17732. }
  17733. uint32_t CompilerGLSL::get_fp_fast_math_flags_for_op(uint32_t result_type, uint32_t id) const
  17734. {
  17735. uint32_t fp_flags = ~0;
  17736. if (!type_is_floating_point(get<SPIRType>(result_type)))
  17737. return fp_flags;
  17738. auto &ep = get_entry_point();
  17739. // Per-operation flag supersedes all defaults.
  17740. if (id != 0 && has_decoration(id, DecorationFPFastMathMode))
  17741. return get_decoration(id, DecorationFPFastMathMode);
  17742. // Handle float_controls1 execution modes.
  17743. uint32_t width = get<SPIRType>(result_type).width;
  17744. bool szinp = false;
  17745. switch (width)
  17746. {
  17747. case 8:
  17748. szinp = ep.signed_zero_inf_nan_preserve_8;
  17749. break;
  17750. case 16:
  17751. szinp = ep.signed_zero_inf_nan_preserve_16;
  17752. break;
  17753. case 32:
  17754. szinp = ep.signed_zero_inf_nan_preserve_32;
  17755. break;
  17756. case 64:
  17757. szinp = ep.signed_zero_inf_nan_preserve_64;
  17758. break;
  17759. default:
  17760. break;
  17761. }
  17762. if (szinp)
  17763. fp_flags &= ~(FPFastMathModeNSZMask | FPFastMathModeNotInfMask | FPFastMathModeNotNaNMask);
  17764. // Legacy NoContraction deals with any kind of transform to the expression.
  17765. if (id != 0 && has_decoration(id, DecorationNoContraction))
  17766. fp_flags &= ~(FPFastMathModeAllowContractMask | FPFastMathModeAllowTransformMask | FPFastMathModeAllowReassocMask);
  17767. // Handle float_controls2 execution modes.
  17768. bool found_default = false;
  17769. for (auto &fp_pair : ep.fp_fast_math_defaults)
  17770. {
  17771. if (get<SPIRType>(fp_pair.first).width == width && fp_pair.second)
  17772. {
  17773. fp_flags &= get<SPIRConstant>(fp_pair.second).scalar();
  17774. found_default = true;
  17775. }
  17776. }
  17777. // From SPV_KHR_float_controls2:
  17778. // "This definition implies that, if the entry point set any FPFastMathDefault execution mode
  17779. // then any type for which a default is not set uses no fast math flags
  17780. // (although this can still be overridden on a per-operation basis).
  17781. // Modules must not mix setting fast math modes explicitly using this extension and relying on older API defaults."
  17782. if (!found_default && !ep.fp_fast_math_defaults.empty())
  17783. fp_flags = 0;
  17784. return fp_flags;
  17785. }
  17786. bool CompilerGLSL::has_legacy_nocontract(uint32_t result_type, uint32_t id) const
  17787. {
  17788. const auto fp_flags = FPFastMathModeAllowContractMask |
  17789. FPFastMathModeAllowTransformMask |
  17790. FPFastMathModeAllowReassocMask;
  17791. return (get_fp_fast_math_flags_for_op(result_type, id) & fp_flags) != fp_flags;
  17792. }