spirv_glsl.cpp 599 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023802480258026802780288029803080318032803380348035803680378038803980408041804280438044804580468047804880498050805180528053805480558056805780588059806080618062806380648065806680678068806980708071807280738074807580768077807880798080808180828083808480858086808780888089809080918092809380948095809680978098809981008101810281038104810581068107810881098110811181128113811481158116811781188119812081218122812381248125812681278128812981308131813281338134813581368137813881398140814181428143814481458146814781488149815081518152815381548155815681578158815981608161816281638164816581668167816881698170817181728173817481758176817781788179818081818182818381848185818681878188818981908191819281938194819581968197819881998200820182028203820482058206820782088209821082118212821382148215821682178218821982208221822282238224822582268227822882298230823182328233823482358236823782388239824082418242824382448245824682478248824982508251825282538254825582568257825882598260826182628263826482658266826782688269827082718272827382748275827682778278827982808281828282838284828582868287828882898290829182928293829482958296829782988299830083018302830383048305830683078308830983108311831283138314831583168317831883198320832183228323832483258326832783288329833083318332833383348335833683378338833983408341834283438344834583468347834883498350835183528353835483558356835783588359836083618362836383648365836683678368836983708371837283738374837583768377837883798380838183828383838483858386838783888389839083918392839383948395839683978398839984008401840284038404840584068407840884098410841184128413841484158416841784188419842084218422842384248425842684278428842984308431843284338434843584368437843884398440844184428443844484458446844784488449845084518452845384548455845684578458845984608461846284638464846584668467846884698470847184728473847484758476847784788479848084818482848384848485848684878488848984908491849284938494849584968497849884998500850185028503850485058506850785088509851085118512851385148515851685178518851985208521852285238524852585268527852885298530853185328533853485358536853785388539854085418542854385448545854685478548854985508551855285538554855585568557855885598560856185628563856485658566856785688569857085718572857385748575857685778578857985808581858285838584858585868587858885898590859185928593859485958596859785988599860086018602860386048605860686078608860986108611861286138614861586168617861886198620862186228623862486258626862786288629863086318632863386348635863686378638863986408641864286438644864586468647864886498650865186528653865486558656865786588659866086618662866386648665866686678668866986708671867286738674867586768677867886798680868186828683868486858686868786888689869086918692869386948695869686978698869987008701870287038704870587068707870887098710871187128713871487158716871787188719872087218722872387248725872687278728872987308731873287338734873587368737873887398740874187428743874487458746874787488749875087518752875387548755875687578758875987608761876287638764876587668767876887698770877187728773877487758776877787788779878087818782878387848785878687878788878987908791879287938794879587968797879887998800880188028803880488058806880788088809881088118812881388148815881688178818881988208821882288238824882588268827882888298830883188328833883488358836883788388839884088418842884388448845884688478848884988508851885288538854885588568857885888598860886188628863886488658866886788688869887088718872887388748875887688778878887988808881888288838884888588868887888888898890889188928893889488958896889788988899890089018902890389048905890689078908890989108911891289138914891589168917891889198920892189228923892489258926892789288929893089318932893389348935893689378938893989408941894289438944894589468947894889498950895189528953895489558956895789588959896089618962896389648965896689678968896989708971897289738974897589768977897889798980898189828983898489858986898789888989899089918992899389948995899689978998899990009001900290039004900590069007900890099010901190129013901490159016901790189019902090219022902390249025902690279028902990309031903290339034903590369037903890399040904190429043904490459046904790489049905090519052905390549055905690579058905990609061906290639064906590669067906890699070907190729073907490759076907790789079908090819082908390849085908690879088908990909091909290939094909590969097909890999100910191029103910491059106910791089109911091119112911391149115911691179118911991209121912291239124912591269127912891299130913191329133913491359136913791389139914091419142914391449145914691479148914991509151915291539154915591569157915891599160916191629163916491659166916791689169917091719172917391749175917691779178917991809181918291839184918591869187918891899190919191929193919491959196919791989199920092019202920392049205920692079208920992109211921292139214921592169217921892199220922192229223922492259226922792289229923092319232923392349235923692379238923992409241924292439244924592469247924892499250925192529253925492559256925792589259926092619262926392649265926692679268926992709271927292739274927592769277927892799280928192829283928492859286928792889289929092919292929392949295929692979298929993009301930293039304930593069307930893099310931193129313931493159316931793189319932093219322932393249325932693279328932993309331933293339334933593369337933893399340934193429343934493459346934793489349935093519352935393549355935693579358935993609361936293639364936593669367936893699370937193729373937493759376937793789379938093819382938393849385938693879388938993909391939293939394939593969397939893999400940194029403940494059406940794089409941094119412941394149415941694179418941994209421942294239424942594269427942894299430943194329433943494359436943794389439944094419442944394449445944694479448944994509451945294539454945594569457945894599460946194629463946494659466946794689469947094719472947394749475947694779478947994809481948294839484948594869487948894899490949194929493949494959496949794989499950095019502950395049505950695079508950995109511951295139514951595169517951895199520952195229523952495259526952795289529953095319532953395349535953695379538953995409541954295439544954595469547954895499550955195529553955495559556955795589559956095619562956395649565956695679568956995709571957295739574957595769577957895799580958195829583958495859586958795889589959095919592959395949595959695979598959996009601960296039604960596069607960896099610961196129613961496159616961796189619962096219622962396249625962696279628962996309631963296339634963596369637963896399640964196429643964496459646964796489649965096519652965396549655965696579658965996609661966296639664966596669667966896699670967196729673967496759676967796789679968096819682968396849685968696879688968996909691969296939694969596969697969896999700970197029703970497059706970797089709971097119712971397149715971697179718971997209721972297239724972597269727972897299730973197329733973497359736973797389739974097419742974397449745974697479748974997509751975297539754975597569757975897599760976197629763976497659766976797689769977097719772977397749775977697779778977997809781978297839784978597869787978897899790979197929793979497959796979797989799980098019802980398049805980698079808980998109811981298139814981598169817981898199820982198229823982498259826982798289829983098319832983398349835983698379838983998409841984298439844984598469847984898499850985198529853985498559856985798589859986098619862986398649865986698679868986998709871987298739874987598769877987898799880988198829883988498859886988798889889989098919892989398949895989698979898989999009901990299039904990599069907990899099910991199129913991499159916991799189919992099219922992399249925992699279928992999309931993299339934993599369937993899399940994199429943994499459946994799489949995099519952995399549955995699579958995999609961996299639964996599669967996899699970997199729973997499759976997799789979998099819982998399849985998699879988998999909991999299939994999599969997999899991000010001100021000310004100051000610007100081000910010100111001210013100141001510016100171001810019100201002110022100231002410025100261002710028100291003010031100321003310034100351003610037100381003910040100411004210043100441004510046100471004810049100501005110052100531005410055100561005710058100591006010061100621006310064100651006610067100681006910070100711007210073100741007510076100771007810079100801008110082100831008410085100861008710088100891009010091100921009310094100951009610097100981009910100101011010210103101041010510106101071010810109101101011110112101131011410115101161011710118101191012010121101221012310124101251012610127101281012910130101311013210133101341013510136101371013810139101401014110142101431014410145101461014710148101491015010151101521015310154101551015610157101581015910160101611016210163101641016510166101671016810169101701017110172101731017410175101761017710178101791018010181101821018310184101851018610187101881018910190101911019210193101941019510196101971019810199102001020110202102031020410205102061020710208102091021010211102121021310214102151021610217102181021910220102211022210223102241022510226102271022810229102301023110232102331023410235102361023710238102391024010241102421024310244102451024610247102481024910250102511025210253102541025510256102571025810259102601026110262102631026410265102661026710268102691027010271102721027310274102751027610277102781027910280102811028210283102841028510286102871028810289102901029110292102931029410295102961029710298102991030010301103021030310304103051030610307103081030910310103111031210313103141031510316103171031810319103201032110322103231032410325103261032710328103291033010331103321033310334103351033610337103381033910340103411034210343103441034510346103471034810349103501035110352103531035410355103561035710358103591036010361103621036310364103651036610367103681036910370103711037210373103741037510376103771037810379103801038110382103831038410385103861038710388103891039010391103921039310394103951039610397103981039910400104011040210403104041040510406104071040810409104101041110412104131041410415104161041710418104191042010421104221042310424104251042610427104281042910430104311043210433104341043510436104371043810439104401044110442104431044410445104461044710448104491045010451104521045310454104551045610457104581045910460104611046210463104641046510466104671046810469104701047110472104731047410475104761047710478104791048010481104821048310484104851048610487104881048910490104911049210493104941049510496104971049810499105001050110502105031050410505105061050710508105091051010511105121051310514105151051610517105181051910520105211052210523105241052510526105271052810529105301053110532105331053410535105361053710538105391054010541105421054310544105451054610547105481054910550105511055210553105541055510556105571055810559105601056110562105631056410565105661056710568105691057010571105721057310574105751057610577105781057910580105811058210583105841058510586105871058810589105901059110592105931059410595105961059710598105991060010601106021060310604106051060610607106081060910610106111061210613106141061510616106171061810619106201062110622106231062410625106261062710628106291063010631106321063310634106351063610637106381063910640106411064210643106441064510646106471064810649106501065110652106531065410655106561065710658106591066010661106621066310664106651066610667106681066910670106711067210673106741067510676106771067810679106801068110682106831068410685106861068710688106891069010691106921069310694106951069610697106981069910700107011070210703107041070510706107071070810709107101071110712107131071410715107161071710718107191072010721107221072310724107251072610727107281072910730107311073210733107341073510736107371073810739107401074110742107431074410745107461074710748107491075010751107521075310754107551075610757107581075910760107611076210763107641076510766107671076810769107701077110772107731077410775107761077710778107791078010781107821078310784107851078610787107881078910790107911079210793107941079510796107971079810799108001080110802108031080410805108061080710808108091081010811108121081310814108151081610817108181081910820108211082210823108241082510826108271082810829108301083110832108331083410835108361083710838108391084010841108421084310844108451084610847108481084910850108511085210853108541085510856108571085810859108601086110862108631086410865108661086710868108691087010871108721087310874108751087610877108781087910880108811088210883108841088510886108871088810889108901089110892108931089410895108961089710898108991090010901109021090310904109051090610907109081090910910109111091210913109141091510916109171091810919109201092110922109231092410925109261092710928109291093010931109321093310934109351093610937109381093910940109411094210943109441094510946109471094810949109501095110952109531095410955109561095710958109591096010961109621096310964109651096610967109681096910970109711097210973109741097510976109771097810979109801098110982109831098410985109861098710988109891099010991109921099310994109951099610997109981099911000110011100211003110041100511006110071100811009110101101111012110131101411015110161101711018110191102011021110221102311024110251102611027110281102911030110311103211033110341103511036110371103811039110401104111042110431104411045110461104711048110491105011051110521105311054110551105611057110581105911060110611106211063110641106511066110671106811069110701107111072110731107411075110761107711078110791108011081110821108311084110851108611087110881108911090110911109211093110941109511096110971109811099111001110111102111031110411105111061110711108111091111011111111121111311114111151111611117111181111911120111211112211123111241112511126111271112811129111301113111132111331113411135111361113711138111391114011141111421114311144111451114611147111481114911150111511115211153111541115511156111571115811159111601116111162111631116411165111661116711168111691117011171111721117311174111751117611177111781117911180111811118211183111841118511186111871118811189111901119111192111931119411195111961119711198111991120011201112021120311204112051120611207112081120911210112111121211213112141121511216112171121811219112201122111222112231122411225112261122711228112291123011231112321123311234112351123611237112381123911240112411124211243112441124511246112471124811249112501125111252112531125411255112561125711258112591126011261112621126311264112651126611267112681126911270112711127211273112741127511276112771127811279112801128111282112831128411285112861128711288112891129011291112921129311294112951129611297112981129911300113011130211303113041130511306113071130811309113101131111312113131131411315113161131711318113191132011321113221132311324113251132611327113281132911330113311133211333113341133511336113371133811339113401134111342113431134411345113461134711348113491135011351113521135311354113551135611357113581135911360113611136211363113641136511366113671136811369113701137111372113731137411375113761137711378113791138011381113821138311384113851138611387113881138911390113911139211393113941139511396113971139811399114001140111402114031140411405114061140711408114091141011411114121141311414114151141611417114181141911420114211142211423114241142511426114271142811429114301143111432114331143411435114361143711438114391144011441114421144311444114451144611447114481144911450114511145211453114541145511456114571145811459114601146111462114631146411465114661146711468114691147011471114721147311474114751147611477114781147911480114811148211483114841148511486114871148811489114901149111492114931149411495114961149711498114991150011501115021150311504115051150611507115081150911510115111151211513115141151511516115171151811519115201152111522115231152411525115261152711528115291153011531115321153311534115351153611537115381153911540115411154211543115441154511546115471154811549115501155111552115531155411555115561155711558115591156011561115621156311564115651156611567115681156911570115711157211573115741157511576115771157811579115801158111582115831158411585115861158711588115891159011591115921159311594115951159611597115981159911600116011160211603116041160511606116071160811609116101161111612116131161411615116161161711618116191162011621116221162311624116251162611627116281162911630116311163211633116341163511636116371163811639116401164111642116431164411645116461164711648116491165011651116521165311654116551165611657116581165911660116611166211663116641166511666116671166811669116701167111672116731167411675116761167711678116791168011681116821168311684116851168611687116881168911690116911169211693116941169511696116971169811699117001170111702117031170411705117061170711708117091171011711117121171311714117151171611717117181171911720117211172211723117241172511726117271172811729117301173111732117331173411735117361173711738117391174011741117421174311744117451174611747117481174911750117511175211753117541175511756117571175811759117601176111762117631176411765117661176711768117691177011771117721177311774117751177611777117781177911780117811178211783117841178511786117871178811789117901179111792117931179411795117961179711798117991180011801118021180311804118051180611807118081180911810118111181211813118141181511816118171181811819118201182111822118231182411825118261182711828118291183011831118321183311834118351183611837118381183911840118411184211843118441184511846118471184811849118501185111852118531185411855118561185711858118591186011861118621186311864118651186611867118681186911870118711187211873118741187511876118771187811879118801188111882118831188411885118861188711888118891189011891118921189311894118951189611897118981189911900119011190211903119041190511906119071190811909119101191111912119131191411915119161191711918119191192011921119221192311924119251192611927119281192911930119311193211933119341193511936119371193811939119401194111942119431194411945119461194711948119491195011951119521195311954119551195611957119581195911960119611196211963119641196511966119671196811969119701197111972119731197411975119761197711978119791198011981119821198311984119851198611987119881198911990119911199211993119941199511996119971199811999120001200112002120031200412005120061200712008120091201012011120121201312014120151201612017120181201912020120211202212023120241202512026120271202812029120301203112032120331203412035120361203712038120391204012041120421204312044120451204612047120481204912050120511205212053120541205512056120571205812059120601206112062120631206412065120661206712068120691207012071120721207312074120751207612077120781207912080120811208212083120841208512086120871208812089120901209112092120931209412095120961209712098120991210012101121021210312104121051210612107121081210912110121111211212113121141211512116121171211812119121201212112122121231212412125121261212712128121291213012131121321213312134121351213612137121381213912140121411214212143121441214512146121471214812149121501215112152121531215412155121561215712158121591216012161121621216312164121651216612167121681216912170121711217212173121741217512176121771217812179121801218112182121831218412185121861218712188121891219012191121921219312194121951219612197121981219912200122011220212203122041220512206122071220812209122101221112212122131221412215122161221712218122191222012221122221222312224122251222612227122281222912230122311223212233122341223512236122371223812239122401224112242122431224412245122461224712248122491225012251122521225312254122551225612257122581225912260122611226212263122641226512266122671226812269122701227112272122731227412275122761227712278122791228012281122821228312284122851228612287122881228912290122911229212293122941229512296122971229812299123001230112302123031230412305123061230712308123091231012311123121231312314123151231612317123181231912320123211232212323123241232512326123271232812329123301233112332123331233412335123361233712338123391234012341123421234312344123451234612347123481234912350123511235212353123541235512356123571235812359123601236112362123631236412365123661236712368123691237012371123721237312374123751237612377123781237912380123811238212383123841238512386123871238812389123901239112392123931239412395123961239712398123991240012401124021240312404124051240612407124081240912410124111241212413124141241512416124171241812419124201242112422124231242412425124261242712428124291243012431124321243312434124351243612437124381243912440124411244212443124441244512446124471244812449124501245112452124531245412455124561245712458124591246012461124621246312464124651246612467124681246912470124711247212473124741247512476124771247812479124801248112482124831248412485124861248712488124891249012491124921249312494124951249612497124981249912500125011250212503125041250512506125071250812509125101251112512125131251412515125161251712518125191252012521125221252312524125251252612527125281252912530125311253212533125341253512536125371253812539125401254112542125431254412545125461254712548125491255012551125521255312554125551255612557125581255912560125611256212563125641256512566125671256812569125701257112572125731257412575125761257712578125791258012581125821258312584125851258612587125881258912590125911259212593125941259512596125971259812599126001260112602126031260412605126061260712608126091261012611126121261312614126151261612617126181261912620126211262212623126241262512626126271262812629126301263112632126331263412635126361263712638126391264012641126421264312644126451264612647126481264912650126511265212653126541265512656126571265812659126601266112662126631266412665126661266712668126691267012671126721267312674126751267612677126781267912680126811268212683126841268512686126871268812689126901269112692126931269412695126961269712698126991270012701127021270312704127051270612707127081270912710127111271212713127141271512716127171271812719127201272112722127231272412725127261272712728127291273012731127321273312734127351273612737127381273912740127411274212743127441274512746127471274812749127501275112752127531275412755127561275712758127591276012761127621276312764127651276612767127681276912770127711277212773127741277512776127771277812779127801278112782127831278412785127861278712788127891279012791127921279312794127951279612797127981279912800128011280212803128041280512806128071280812809128101281112812128131281412815128161281712818128191282012821128221282312824128251282612827128281282912830128311283212833128341283512836128371283812839128401284112842128431284412845128461284712848128491285012851128521285312854128551285612857128581285912860128611286212863128641286512866128671286812869128701287112872128731287412875128761287712878128791288012881128821288312884128851288612887128881288912890128911289212893128941289512896128971289812899129001290112902129031290412905129061290712908129091291012911129121291312914129151291612917129181291912920129211292212923129241292512926129271292812929129301293112932129331293412935129361293712938129391294012941129421294312944129451294612947129481294912950129511295212953129541295512956129571295812959129601296112962129631296412965129661296712968129691297012971129721297312974129751297612977129781297912980129811298212983129841298512986129871298812989129901299112992129931299412995129961299712998129991300013001130021300313004130051300613007130081300913010130111301213013130141301513016130171301813019130201302113022130231302413025130261302713028130291303013031130321303313034130351303613037130381303913040130411304213043130441304513046130471304813049130501305113052130531305413055130561305713058130591306013061130621306313064130651306613067130681306913070130711307213073130741307513076130771307813079130801308113082130831308413085130861308713088130891309013091130921309313094130951309613097130981309913100131011310213103131041310513106131071310813109131101311113112131131311413115131161311713118131191312013121131221312313124131251312613127131281312913130131311313213133131341313513136131371313813139131401314113142131431314413145131461314713148131491315013151131521315313154131551315613157131581315913160131611316213163131641316513166131671316813169131701317113172131731317413175131761317713178131791318013181131821318313184131851318613187131881318913190131911319213193131941319513196131971319813199132001320113202132031320413205132061320713208132091321013211132121321313214132151321613217132181321913220132211322213223132241322513226132271322813229132301323113232132331323413235132361323713238132391324013241132421324313244132451324613247132481324913250132511325213253132541325513256132571325813259132601326113262132631326413265132661326713268132691327013271132721327313274132751327613277132781327913280132811328213283132841328513286132871328813289132901329113292132931329413295132961329713298132991330013301133021330313304133051330613307133081330913310133111331213313133141331513316133171331813319133201332113322133231332413325133261332713328133291333013331133321333313334133351333613337133381333913340133411334213343133441334513346133471334813349133501335113352133531335413355133561335713358133591336013361133621336313364133651336613367133681336913370133711337213373133741337513376133771337813379133801338113382133831338413385133861338713388133891339013391133921339313394133951339613397133981339913400134011340213403134041340513406134071340813409134101341113412134131341413415134161341713418134191342013421134221342313424134251342613427134281342913430134311343213433134341343513436134371343813439134401344113442134431344413445134461344713448134491345013451134521345313454134551345613457134581345913460134611346213463134641346513466134671346813469134701347113472134731347413475134761347713478134791348013481134821348313484134851348613487134881348913490134911349213493134941349513496134971349813499135001350113502135031350413505135061350713508135091351013511135121351313514135151351613517135181351913520135211352213523135241352513526135271352813529135301353113532135331353413535135361353713538135391354013541135421354313544135451354613547135481354913550135511355213553135541355513556135571355813559135601356113562135631356413565135661356713568135691357013571135721357313574135751357613577135781357913580135811358213583135841358513586135871358813589135901359113592135931359413595135961359713598135991360013601136021360313604136051360613607136081360913610136111361213613136141361513616136171361813619136201362113622136231362413625136261362713628136291363013631136321363313634136351363613637136381363913640136411364213643136441364513646136471364813649136501365113652136531365413655136561365713658136591366013661136621366313664136651366613667136681366913670136711367213673136741367513676136771367813679136801368113682136831368413685136861368713688136891369013691136921369313694136951369613697136981369913700137011370213703137041370513706137071370813709137101371113712137131371413715137161371713718137191372013721137221372313724137251372613727137281372913730137311373213733137341373513736137371373813739137401374113742137431374413745137461374713748137491375013751137521375313754137551375613757137581375913760137611376213763137641376513766137671376813769137701377113772137731377413775137761377713778137791378013781137821378313784137851378613787137881378913790137911379213793137941379513796137971379813799138001380113802138031380413805138061380713808138091381013811138121381313814138151381613817138181381913820138211382213823138241382513826138271382813829138301383113832138331383413835138361383713838138391384013841138421384313844138451384613847138481384913850138511385213853138541385513856138571385813859138601386113862138631386413865138661386713868138691387013871138721387313874138751387613877138781387913880138811388213883138841388513886138871388813889138901389113892138931389413895138961389713898138991390013901139021390313904139051390613907139081390913910139111391213913139141391513916139171391813919139201392113922139231392413925139261392713928139291393013931139321393313934139351393613937139381393913940139411394213943139441394513946139471394813949139501395113952139531395413955139561395713958139591396013961139621396313964139651396613967139681396913970139711397213973139741397513976139771397813979139801398113982139831398413985139861398713988139891399013991139921399313994139951399613997139981399914000140011400214003140041400514006140071400814009140101401114012140131401414015140161401714018140191402014021140221402314024140251402614027140281402914030140311403214033140341403514036140371403814039140401404114042140431404414045140461404714048140491405014051140521405314054140551405614057140581405914060140611406214063140641406514066140671406814069140701407114072140731407414075140761407714078140791408014081140821408314084140851408614087140881408914090140911409214093140941409514096140971409814099141001410114102141031410414105141061410714108141091411014111141121411314114141151411614117141181411914120141211412214123141241412514126141271412814129141301413114132141331413414135141361413714138141391414014141141421414314144141451414614147141481414914150141511415214153141541415514156141571415814159141601416114162141631416414165141661416714168141691417014171141721417314174141751417614177141781417914180141811418214183141841418514186141871418814189141901419114192141931419414195141961419714198141991420014201142021420314204142051420614207142081420914210142111421214213142141421514216142171421814219142201422114222142231422414225142261422714228142291423014231142321423314234142351423614237142381423914240142411424214243142441424514246142471424814249142501425114252142531425414255142561425714258142591426014261142621426314264142651426614267142681426914270142711427214273142741427514276142771427814279142801428114282142831428414285142861428714288142891429014291142921429314294142951429614297142981429914300143011430214303143041430514306143071430814309143101431114312143131431414315143161431714318143191432014321143221432314324143251432614327143281432914330143311433214333143341433514336143371433814339143401434114342143431434414345143461434714348143491435014351143521435314354143551435614357143581435914360143611436214363143641436514366143671436814369143701437114372143731437414375143761437714378143791438014381143821438314384143851438614387143881438914390143911439214393143941439514396143971439814399144001440114402144031440414405144061440714408144091441014411144121441314414144151441614417144181441914420144211442214423144241442514426144271442814429144301443114432144331443414435144361443714438144391444014441144421444314444144451444614447144481444914450144511445214453144541445514456144571445814459144601446114462144631446414465144661446714468144691447014471144721447314474144751447614477144781447914480144811448214483144841448514486144871448814489144901449114492144931449414495144961449714498144991450014501145021450314504145051450614507145081450914510145111451214513145141451514516145171451814519145201452114522145231452414525145261452714528145291453014531145321453314534145351453614537145381453914540145411454214543145441454514546145471454814549145501455114552145531455414555145561455714558145591456014561145621456314564145651456614567145681456914570145711457214573145741457514576145771457814579145801458114582145831458414585145861458714588145891459014591145921459314594145951459614597145981459914600146011460214603146041460514606146071460814609146101461114612146131461414615146161461714618146191462014621146221462314624146251462614627146281462914630146311463214633146341463514636146371463814639146401464114642146431464414645146461464714648146491465014651146521465314654146551465614657146581465914660146611466214663146641466514666146671466814669146701467114672146731467414675146761467714678146791468014681146821468314684146851468614687146881468914690146911469214693146941469514696146971469814699147001470114702147031470414705147061470714708147091471014711147121471314714147151471614717147181471914720147211472214723147241472514726147271472814729147301473114732147331473414735147361473714738147391474014741147421474314744147451474614747147481474914750147511475214753147541475514756147571475814759147601476114762147631476414765147661476714768147691477014771147721477314774147751477614777147781477914780147811478214783147841478514786147871478814789147901479114792147931479414795147961479714798147991480014801148021480314804148051480614807148081480914810148111481214813148141481514816148171481814819148201482114822148231482414825148261482714828148291483014831148321483314834148351483614837148381483914840148411484214843148441484514846148471484814849148501485114852148531485414855148561485714858148591486014861148621486314864148651486614867148681486914870148711487214873148741487514876148771487814879148801488114882148831488414885148861488714888148891489014891148921489314894148951489614897148981489914900149011490214903149041490514906149071490814909149101491114912149131491414915149161491714918149191492014921149221492314924149251492614927149281492914930149311493214933149341493514936149371493814939149401494114942149431494414945149461494714948149491495014951149521495314954149551495614957149581495914960149611496214963149641496514966149671496814969149701497114972149731497414975149761497714978149791498014981149821498314984149851498614987149881498914990149911499214993149941499514996149971499814999150001500115002150031500415005150061500715008150091501015011150121501315014150151501615017150181501915020150211502215023150241502515026150271502815029150301503115032150331503415035150361503715038150391504015041150421504315044150451504615047150481504915050150511505215053150541505515056150571505815059150601506115062150631506415065150661506715068150691507015071150721507315074150751507615077150781507915080150811508215083150841508515086150871508815089150901509115092150931509415095150961509715098150991510015101151021510315104151051510615107151081510915110151111511215113151141511515116151171511815119151201512115122151231512415125151261512715128151291513015131151321513315134151351513615137151381513915140151411514215143151441514515146151471514815149151501515115152151531515415155151561515715158151591516015161151621516315164151651516615167151681516915170151711517215173151741517515176151771517815179151801518115182151831518415185151861518715188151891519015191151921519315194151951519615197151981519915200152011520215203152041520515206152071520815209152101521115212152131521415215152161521715218152191522015221152221522315224152251522615227152281522915230152311523215233152341523515236152371523815239152401524115242152431524415245152461524715248152491525015251152521525315254152551525615257152581525915260152611526215263152641526515266152671526815269152701527115272152731527415275152761527715278152791528015281152821528315284152851528615287152881528915290152911529215293152941529515296152971529815299153001530115302153031530415305153061530715308153091531015311153121531315314153151531615317153181531915320153211532215323153241532515326153271532815329153301533115332153331533415335153361533715338153391534015341153421534315344153451534615347153481534915350153511535215353153541535515356153571535815359153601536115362153631536415365153661536715368153691537015371153721537315374153751537615377153781537915380153811538215383153841538515386153871538815389153901539115392153931539415395153961539715398153991540015401154021540315404154051540615407154081540915410154111541215413154141541515416154171541815419154201542115422154231542415425154261542715428154291543015431154321543315434154351543615437154381543915440154411544215443154441544515446154471544815449154501545115452154531545415455154561545715458154591546015461154621546315464154651546615467154681546915470154711547215473154741547515476154771547815479154801548115482154831548415485154861548715488154891549015491154921549315494154951549615497154981549915500155011550215503155041550515506155071550815509155101551115512155131551415515155161551715518155191552015521155221552315524155251552615527155281552915530155311553215533155341553515536155371553815539155401554115542155431554415545155461554715548155491555015551155521555315554155551555615557155581555915560155611556215563155641556515566155671556815569155701557115572155731557415575155761557715578155791558015581155821558315584155851558615587155881558915590155911559215593155941559515596155971559815599156001560115602156031560415605156061560715608156091561015611156121561315614156151561615617156181561915620156211562215623156241562515626156271562815629156301563115632156331563415635156361563715638156391564015641156421564315644156451564615647156481564915650156511565215653156541565515656156571565815659156601566115662156631566415665156661566715668156691567015671156721567315674156751567615677156781567915680156811568215683156841568515686156871568815689156901569115692156931569415695156961569715698156991570015701157021570315704157051570615707157081570915710157111571215713157141571515716157171571815719157201572115722157231572415725157261572715728157291573015731157321573315734157351573615737157381573915740157411574215743157441574515746157471574815749157501575115752157531575415755157561575715758157591576015761157621576315764157651576615767157681576915770157711577215773157741577515776157771577815779157801578115782157831578415785157861578715788157891579015791157921579315794157951579615797157981579915800158011580215803158041580515806158071580815809158101581115812158131581415815158161581715818158191582015821158221582315824158251582615827158281582915830158311583215833158341583515836158371583815839158401584115842158431584415845158461584715848158491585015851158521585315854158551585615857158581585915860158611586215863158641586515866158671586815869158701587115872158731587415875158761587715878158791588015881158821588315884158851588615887158881588915890158911589215893158941589515896158971589815899159001590115902159031590415905159061590715908159091591015911159121591315914159151591615917159181591915920159211592215923159241592515926159271592815929159301593115932159331593415935159361593715938159391594015941159421594315944159451594615947159481594915950159511595215953159541595515956159571595815959159601596115962159631596415965159661596715968159691597015971159721597315974159751597615977159781597915980159811598215983159841598515986159871598815989159901599115992159931599415995159961599715998159991600016001160021600316004160051600616007160081600916010160111601216013160141601516016160171601816019160201602116022160231602416025160261602716028160291603016031160321603316034160351603616037160381603916040160411604216043160441604516046160471604816049160501605116052160531605416055160561605716058160591606016061160621606316064160651606616067160681606916070160711607216073160741607516076160771607816079160801608116082160831608416085160861608716088160891609016091160921609316094160951609616097160981609916100161011610216103161041610516106161071610816109161101611116112161131611416115161161611716118161191612016121161221612316124161251612616127161281612916130161311613216133161341613516136161371613816139161401614116142161431614416145161461614716148161491615016151161521615316154161551615616157161581615916160161611616216163161641616516166161671616816169161701617116172161731617416175161761617716178161791618016181161821618316184161851618616187161881618916190161911619216193161941619516196161971619816199162001620116202162031620416205162061620716208162091621016211162121621316214162151621616217162181621916220162211622216223162241622516226162271622816229162301623116232162331623416235162361623716238162391624016241162421624316244162451624616247162481624916250162511625216253162541625516256162571625816259162601626116262162631626416265162661626716268162691627016271162721627316274162751627616277162781627916280162811628216283162841628516286162871628816289162901629116292162931629416295162961629716298162991630016301163021630316304163051630616307163081630916310163111631216313163141631516316163171631816319163201632116322163231632416325163261632716328163291633016331163321633316334163351633616337163381633916340163411634216343163441634516346163471634816349163501635116352163531635416355163561635716358163591636016361163621636316364163651636616367163681636916370163711637216373163741637516376163771637816379163801638116382163831638416385163861638716388163891639016391163921639316394163951639616397163981639916400164011640216403164041640516406164071640816409164101641116412164131641416415164161641716418164191642016421164221642316424164251642616427164281642916430164311643216433164341643516436164371643816439164401644116442164431644416445164461644716448164491645016451164521645316454164551645616457164581645916460164611646216463164641646516466164671646816469164701647116472164731647416475164761647716478164791648016481164821648316484164851648616487164881648916490164911649216493164941649516496164971649816499165001650116502165031650416505165061650716508165091651016511165121651316514165151651616517165181651916520165211652216523165241652516526165271652816529165301653116532165331653416535165361653716538165391654016541165421654316544165451654616547165481654916550165511655216553165541655516556165571655816559165601656116562165631656416565165661656716568165691657016571165721657316574165751657616577165781657916580165811658216583165841658516586165871658816589165901659116592165931659416595165961659716598165991660016601166021660316604166051660616607166081660916610166111661216613166141661516616166171661816619166201662116622166231662416625166261662716628166291663016631166321663316634166351663616637166381663916640166411664216643166441664516646166471664816649166501665116652166531665416655166561665716658166591666016661166621666316664166651666616667166681666916670166711667216673166741667516676166771667816679166801668116682166831668416685166861668716688166891669016691166921669316694166951669616697166981669916700167011670216703167041670516706167071670816709167101671116712167131671416715167161671716718167191672016721167221672316724167251672616727167281672916730167311673216733167341673516736167371673816739167401674116742167431674416745167461674716748167491675016751167521675316754167551675616757167581675916760167611676216763167641676516766167671676816769167701677116772167731677416775167761677716778167791678016781167821678316784167851678616787167881678916790167911679216793167941679516796167971679816799168001680116802168031680416805168061680716808168091681016811168121681316814168151681616817168181681916820168211682216823168241682516826168271682816829168301683116832168331683416835168361683716838168391684016841168421684316844168451684616847168481684916850168511685216853168541685516856168571685816859168601686116862168631686416865168661686716868168691687016871168721687316874168751687616877168781687916880168811688216883168841688516886168871688816889168901689116892168931689416895168961689716898168991690016901169021690316904169051690616907169081690916910169111691216913169141691516916169171691816919169201692116922169231692416925169261692716928169291693016931169321693316934169351693616937169381693916940169411694216943169441694516946169471694816949169501695116952169531695416955169561695716958169591696016961169621696316964169651696616967169681696916970169711697216973169741697516976169771697816979169801698116982169831698416985169861698716988169891699016991169921699316994169951699616997169981699917000170011700217003170041700517006170071700817009170101701117012170131701417015170161701717018170191702017021170221702317024170251702617027170281702917030170311703217033170341703517036170371703817039170401704117042170431704417045170461704717048170491705017051170521705317054170551705617057170581705917060170611706217063170641706517066170671706817069170701707117072170731707417075170761707717078170791708017081170821708317084170851708617087170881708917090170911709217093170941709517096170971709817099171001710117102171031710417105171061710717108171091711017111171121711317114171151711617117171181711917120171211712217123171241712517126171271712817129171301713117132171331713417135171361713717138171391714017141171421714317144171451714617147171481714917150171511715217153171541715517156171571715817159171601716117162171631716417165171661716717168171691717017171171721717317174171751717617177171781717917180171811718217183171841718517186171871718817189171901719117192171931719417195171961719717198171991720017201172021720317204172051720617207172081720917210172111721217213172141721517216172171721817219172201722117222172231722417225172261722717228172291723017231172321723317234172351723617237172381723917240172411724217243172441724517246172471724817249172501725117252172531725417255172561725717258172591726017261172621726317264172651726617267172681726917270172711727217273172741727517276172771727817279172801728117282172831728417285172861728717288172891729017291172921729317294172951729617297172981729917300173011730217303173041730517306173071730817309173101731117312173131731417315173161731717318173191732017321173221732317324173251732617327173281732917330173311733217333173341733517336173371733817339173401734117342173431734417345173461734717348173491735017351173521735317354173551735617357173581735917360173611736217363173641736517366173671736817369173701737117372173731737417375173761737717378173791738017381173821738317384173851738617387173881738917390173911739217393173941739517396173971739817399174001740117402174031740417405174061740717408174091741017411174121741317414174151741617417174181741917420174211742217423174241742517426174271742817429174301743117432174331743417435174361743717438174391744017441174421744317444174451744617447174481744917450174511745217453174541745517456174571745817459174601746117462174631746417465174661746717468174691747017471174721747317474174751747617477174781747917480174811748217483174841748517486174871748817489174901749117492174931749417495174961749717498174991750017501175021750317504175051750617507175081750917510175111751217513175141751517516175171751817519175201752117522175231752417525175261752717528175291753017531175321753317534175351753617537175381753917540175411754217543175441754517546175471754817549175501755117552175531755417555175561755717558175591756017561175621756317564175651756617567175681756917570175711757217573175741757517576175771757817579175801758117582175831758417585175861758717588175891759017591175921759317594175951759617597175981759917600176011760217603176041760517606176071760817609176101761117612176131761417615176161761717618176191762017621176221762317624176251762617627176281762917630176311763217633176341763517636176371763817639176401764117642176431764417645176461764717648176491765017651176521765317654176551765617657176581765917660176611766217663176641766517666176671766817669176701767117672176731767417675176761767717678176791768017681176821768317684176851768617687176881768917690176911769217693176941769517696176971769817699177001770117702177031770417705177061770717708177091771017711177121771317714177151771617717177181771917720177211772217723177241772517726177271772817729177301773117732177331773417735177361773717738177391774017741177421774317744177451774617747177481774917750177511775217753177541775517756177571775817759177601776117762177631776417765177661776717768177691777017771177721777317774177751777617777177781777917780177811778217783177841778517786177871778817789177901779117792177931779417795177961779717798177991780017801178021780317804178051780617807178081780917810178111781217813178141781517816178171781817819178201782117822178231782417825178261782717828178291783017831178321783317834178351783617837178381783917840178411784217843178441784517846178471784817849178501785117852178531785417855178561785717858178591786017861178621786317864178651786617867178681786917870178711787217873178741787517876178771787817879178801788117882178831788417885178861788717888178891789017891178921789317894178951789617897178981789917900179011790217903179041790517906179071790817909179101791117912179131791417915179161791717918179191792017921179221792317924179251792617927179281792917930179311793217933179341793517936179371793817939179401794117942179431794417945179461794717948179491795017951179521795317954179551795617957179581795917960179611796217963179641796517966179671796817969179701797117972179731797417975179761797717978179791798017981179821798317984179851798617987179881798917990179911799217993179941799517996179971799817999180001800118002180031800418005180061800718008180091801018011180121801318014180151801618017180181801918020180211802218023180241802518026180271802818029180301803118032180331803418035180361803718038180391804018041180421804318044180451804618047180481804918050180511805218053180541805518056180571805818059180601806118062180631806418065180661806718068180691807018071180721807318074180751807618077180781807918080180811808218083180841808518086180871808818089180901809118092180931809418095180961809718098180991810018101181021810318104181051810618107181081810918110181111811218113181141811518116181171811818119181201812118122181231812418125181261812718128181291813018131181321813318134181351813618137181381813918140181411814218143181441814518146181471814818149181501815118152181531815418155181561815718158181591816018161181621816318164181651816618167181681816918170181711817218173181741817518176181771817818179181801818118182181831818418185181861818718188181891819018191181921819318194181951819618197181981819918200182011820218203182041820518206182071820818209182101821118212182131821418215182161821718218182191822018221182221822318224182251822618227182281822918230182311823218233182341823518236182371823818239182401824118242182431824418245182461824718248182491825018251182521825318254182551825618257182581825918260182611826218263182641826518266182671826818269182701827118272182731827418275182761827718278182791828018281182821828318284182851828618287182881828918290182911829218293182941829518296182971829818299183001830118302183031830418305183061830718308183091831018311183121831318314183151831618317183181831918320183211832218323183241832518326183271832818329183301833118332183331833418335183361833718338183391834018341183421834318344183451834618347183481834918350183511835218353183541835518356183571835818359183601836118362183631836418365183661836718368183691837018371183721837318374183751837618377183781837918380183811838218383183841838518386183871838818389183901839118392183931839418395183961839718398183991840018401184021840318404184051840618407184081840918410184111841218413184141841518416184171841818419184201842118422184231842418425184261842718428184291843018431184321843318434184351843618437184381843918440184411844218443184441844518446184471844818449184501845118452184531845418455184561845718458184591846018461184621846318464184651846618467184681846918470184711847218473184741847518476184771847818479184801848118482184831848418485184861848718488184891849018491184921849318494184951849618497184981849918500185011850218503185041850518506185071850818509185101851118512185131851418515185161851718518185191852018521185221852318524185251852618527185281852918530185311853218533185341853518536185371853818539185401854118542185431854418545185461854718548185491855018551185521855318554185551855618557185581855918560185611856218563185641856518566185671856818569185701857118572185731857418575185761857718578185791858018581185821858318584185851858618587185881858918590185911859218593185941859518596185971859818599186001860118602186031860418605186061860718608186091861018611186121861318614
  1. /*
  2. * Copyright 2015-2021 Arm Limited
  3. * SPDX-License-Identifier: Apache-2.0 OR MIT
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /*
  18. * At your option, you may choose to accept this material under either:
  19. * 1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
  20. * 2. The MIT License, found at <http://opensource.org/licenses/MIT>.
  21. */
  22. #include "spirv_glsl.hpp"
  23. #include "GLSL.std.450.h"
  24. #include "spirv_common.hpp"
  25. #include <algorithm>
  26. #include <assert.h>
  27. #include <cmath>
  28. #include <limits>
  29. #include <locale.h>
  30. #include <utility>
  31. #include <array>
  32. #ifndef _WIN32
  33. #include <langinfo.h>
  34. #endif
  35. #include <locale.h>
  36. using namespace spv;
  37. using namespace SPIRV_CROSS_NAMESPACE;
  38. using namespace std;
  39. enum ExtraSubExpressionType
  40. {
  41. // Create masks above any legal ID range to allow multiple address spaces into the extra_sub_expressions map.
  42. EXTRA_SUB_EXPRESSION_TYPE_STREAM_OFFSET = 0x10000000,
  43. EXTRA_SUB_EXPRESSION_TYPE_AUX = 0x20000000
  44. };
  45. static bool is_unsigned_opcode(Op op)
  46. {
  47. // Don't have to be exhaustive, only relevant for legacy target checking ...
  48. switch (op)
  49. {
  50. case OpShiftRightLogical:
  51. case OpUGreaterThan:
  52. case OpUGreaterThanEqual:
  53. case OpULessThan:
  54. case OpULessThanEqual:
  55. case OpUConvert:
  56. case OpUDiv:
  57. case OpUMod:
  58. case OpUMulExtended:
  59. case OpConvertUToF:
  60. case OpConvertFToU:
  61. return true;
  62. default:
  63. return false;
  64. }
  65. }
  66. static bool is_unsigned_glsl_opcode(GLSLstd450 op)
  67. {
  68. // Don't have to be exhaustive, only relevant for legacy target checking ...
  69. switch (op)
  70. {
  71. case GLSLstd450UClamp:
  72. case GLSLstd450UMin:
  73. case GLSLstd450UMax:
  74. case GLSLstd450FindUMsb:
  75. return true;
  76. default:
  77. return false;
  78. }
  79. }
  80. static bool packing_is_vec4_padded(BufferPackingStandard packing)
  81. {
  82. switch (packing)
  83. {
  84. case BufferPackingHLSLCbuffer:
  85. case BufferPackingHLSLCbufferPackOffset:
  86. case BufferPackingStd140:
  87. case BufferPackingStd140EnhancedLayout:
  88. return true;
  89. default:
  90. return false;
  91. }
  92. }
  93. static bool packing_is_hlsl(BufferPackingStandard packing)
  94. {
  95. switch (packing)
  96. {
  97. case BufferPackingHLSLCbuffer:
  98. case BufferPackingHLSLCbufferPackOffset:
  99. return true;
  100. default:
  101. return false;
  102. }
  103. }
  104. static bool packing_has_flexible_offset(BufferPackingStandard packing)
  105. {
  106. switch (packing)
  107. {
  108. case BufferPackingStd140:
  109. case BufferPackingStd430:
  110. case BufferPackingScalar:
  111. case BufferPackingHLSLCbuffer:
  112. return false;
  113. default:
  114. return true;
  115. }
  116. }
  117. static bool packing_is_scalar(BufferPackingStandard packing)
  118. {
  119. switch (packing)
  120. {
  121. case BufferPackingScalar:
  122. case BufferPackingScalarEnhancedLayout:
  123. return true;
  124. default:
  125. return false;
  126. }
  127. }
  128. static BufferPackingStandard packing_to_substruct_packing(BufferPackingStandard packing)
  129. {
  130. switch (packing)
  131. {
  132. case BufferPackingStd140EnhancedLayout:
  133. return BufferPackingStd140;
  134. case BufferPackingStd430EnhancedLayout:
  135. return BufferPackingStd430;
  136. case BufferPackingHLSLCbufferPackOffset:
  137. return BufferPackingHLSLCbuffer;
  138. case BufferPackingScalarEnhancedLayout:
  139. return BufferPackingScalar;
  140. default:
  141. return packing;
  142. }
  143. }
  144. void CompilerGLSL::init()
  145. {
  146. if (ir.source.known)
  147. {
  148. options.es = ir.source.es;
  149. options.version = ir.source.version;
  150. }
  151. // Query the locale to see what the decimal point is.
  152. // We'll rely on fixing it up ourselves in the rare case we have a comma-as-decimal locale
  153. // rather than setting locales ourselves. Settings locales in a safe and isolated way is rather
  154. // tricky.
  155. #ifdef _WIN32
  156. // On Windows, localeconv uses thread-local storage, so it should be fine.
  157. const struct lconv *conv = localeconv();
  158. if (conv && conv->decimal_point)
  159. current_locale_radix_character = *conv->decimal_point;
  160. #elif defined(__ANDROID__) && __ANDROID_API__ < 26
  161. // nl_langinfo is not supported on this platform, fall back to the worse alternative.
  162. const struct lconv *conv = localeconv();
  163. if (conv && conv->decimal_point)
  164. current_locale_radix_character = *conv->decimal_point;
  165. #else
  166. // localeconv, the portable function is not MT safe ...
  167. const char *decimal_point = nl_langinfo(RADIXCHAR);
  168. if (decimal_point && *decimal_point != '\0')
  169. current_locale_radix_character = *decimal_point;
  170. #endif
  171. }
  172. static const char *to_pls_layout(PlsFormat format)
  173. {
  174. switch (format)
  175. {
  176. case PlsR11FG11FB10F:
  177. return "layout(r11f_g11f_b10f) ";
  178. case PlsR32F:
  179. return "layout(r32f) ";
  180. case PlsRG16F:
  181. return "layout(rg16f) ";
  182. case PlsRGB10A2:
  183. return "layout(rgb10_a2) ";
  184. case PlsRGBA8:
  185. return "layout(rgba8) ";
  186. case PlsRG16:
  187. return "layout(rg16) ";
  188. case PlsRGBA8I:
  189. return "layout(rgba8i)";
  190. case PlsRG16I:
  191. return "layout(rg16i) ";
  192. case PlsRGB10A2UI:
  193. return "layout(rgb10_a2ui) ";
  194. case PlsRGBA8UI:
  195. return "layout(rgba8ui) ";
  196. case PlsRG16UI:
  197. return "layout(rg16ui) ";
  198. case PlsR32UI:
  199. return "layout(r32ui) ";
  200. default:
  201. return "";
  202. }
  203. }
  204. static SPIRType::BaseType pls_format_to_basetype(PlsFormat format)
  205. {
  206. switch (format)
  207. {
  208. default:
  209. case PlsR11FG11FB10F:
  210. case PlsR32F:
  211. case PlsRG16F:
  212. case PlsRGB10A2:
  213. case PlsRGBA8:
  214. case PlsRG16:
  215. return SPIRType::Float;
  216. case PlsRGBA8I:
  217. case PlsRG16I:
  218. return SPIRType::Int;
  219. case PlsRGB10A2UI:
  220. case PlsRGBA8UI:
  221. case PlsRG16UI:
  222. case PlsR32UI:
  223. return SPIRType::UInt;
  224. }
  225. }
  226. static uint32_t pls_format_to_components(PlsFormat format)
  227. {
  228. switch (format)
  229. {
  230. default:
  231. case PlsR32F:
  232. case PlsR32UI:
  233. return 1;
  234. case PlsRG16F:
  235. case PlsRG16:
  236. case PlsRG16UI:
  237. case PlsRG16I:
  238. return 2;
  239. case PlsR11FG11FB10F:
  240. return 3;
  241. case PlsRGB10A2:
  242. case PlsRGBA8:
  243. case PlsRGBA8I:
  244. case PlsRGB10A2UI:
  245. case PlsRGBA8UI:
  246. return 4;
  247. }
  248. }
  249. const char *CompilerGLSL::vector_swizzle(int vecsize, int index)
  250. {
  251. static const char *const swizzle[4][4] = {
  252. { ".x", ".y", ".z", ".w" },
  253. { ".xy", ".yz", ".zw", nullptr },
  254. { ".xyz", ".yzw", nullptr, nullptr },
  255. #if defined(__GNUC__) && (__GNUC__ == 9)
  256. // This works around a GCC 9 bug, see details in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90947.
  257. // This array ends up being compiled as all nullptrs, tripping the assertions below.
  258. { "", nullptr, nullptr, "$" },
  259. #else
  260. { "", nullptr, nullptr, nullptr },
  261. #endif
  262. };
  263. assert(vecsize >= 1 && vecsize <= 4);
  264. assert(index >= 0 && index < 4);
  265. assert(swizzle[vecsize - 1][index]);
  266. return swizzle[vecsize - 1][index];
  267. }
  268. void CompilerGLSL::reset(uint32_t iteration_count)
  269. {
  270. // Sanity check the iteration count to be robust against a certain class of bugs where
  271. // we keep forcing recompilations without making clear forward progress.
  272. // In buggy situations we will loop forever, or loop for an unbounded number of iterations.
  273. // Certain types of recompilations are considered to make forward progress,
  274. // but in almost all situations, we'll never see more than 3 iterations.
  275. // It is highly context-sensitive when we need to force recompilation,
  276. // and it is not practical with the current architecture
  277. // to resolve everything up front.
  278. if (iteration_count >= options.force_recompile_max_debug_iterations && !is_force_recompile_forward_progress)
  279. SPIRV_CROSS_THROW("Maximum compilation loops detected and no forward progress was made. Must be a SPIRV-Cross bug!");
  280. // We do some speculative optimizations which should pretty much always work out,
  281. // but just in case the SPIR-V is rather weird, recompile until it's happy.
  282. // This typically only means one extra pass.
  283. clear_force_recompile();
  284. // Clear invalid expression tracking.
  285. invalid_expressions.clear();
  286. composite_insert_overwritten.clear();
  287. current_function = nullptr;
  288. // Clear temporary usage tracking.
  289. expression_usage_counts.clear();
  290. forwarded_temporaries.clear();
  291. suppressed_usage_tracking.clear();
  292. // Ensure that we declare phi-variable copies even if the original declaration isn't deferred
  293. flushed_phi_variables.clear();
  294. current_emitting_switch_stack.clear();
  295. reset_name_caches();
  296. ir.for_each_typed_id<SPIRFunction>([&](uint32_t, SPIRFunction &func) {
  297. func.active = false;
  298. func.flush_undeclared = true;
  299. });
  300. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { var.dependees.clear(); });
  301. ir.reset_all_of_type<SPIRExpression>();
  302. ir.reset_all_of_type<SPIRAccessChain>();
  303. statement_count = 0;
  304. indent = 0;
  305. current_loop_level = 0;
  306. }
  307. void CompilerGLSL::remap_pls_variables()
  308. {
  309. for (auto &input : pls_inputs)
  310. {
  311. auto &var = get<SPIRVariable>(input.id);
  312. bool input_is_target = false;
  313. if (var.storage == StorageClassUniformConstant)
  314. {
  315. auto &type = get<SPIRType>(var.basetype);
  316. input_is_target = type.image.dim == DimSubpassData;
  317. }
  318. if (var.storage != StorageClassInput && !input_is_target)
  319. SPIRV_CROSS_THROW("Can only use in and target variables for PLS inputs.");
  320. var.remapped_variable = true;
  321. }
  322. for (auto &output : pls_outputs)
  323. {
  324. auto &var = get<SPIRVariable>(output.id);
  325. if (var.storage != StorageClassOutput)
  326. SPIRV_CROSS_THROW("Can only use out variables for PLS outputs.");
  327. var.remapped_variable = true;
  328. }
  329. }
  330. void CompilerGLSL::remap_ext_framebuffer_fetch(uint32_t input_attachment_index, uint32_t color_location, bool coherent)
  331. {
  332. subpass_to_framebuffer_fetch_attachment.push_back({ input_attachment_index, color_location });
  333. inout_color_attachments.push_back({ color_location, coherent });
  334. }
  335. bool CompilerGLSL::location_is_framebuffer_fetch(uint32_t location) const
  336. {
  337. return std::find_if(begin(inout_color_attachments), end(inout_color_attachments),
  338. [&](const std::pair<uint32_t, bool> &elem) {
  339. return elem.first == location;
  340. }) != end(inout_color_attachments);
  341. }
  342. bool CompilerGLSL::location_is_non_coherent_framebuffer_fetch(uint32_t location) const
  343. {
  344. return std::find_if(begin(inout_color_attachments), end(inout_color_attachments),
  345. [&](const std::pair<uint32_t, bool> &elem) {
  346. return elem.first == location && !elem.second;
  347. }) != end(inout_color_attachments);
  348. }
  349. void CompilerGLSL::find_static_extensions()
  350. {
  351. ir.for_each_typed_id<SPIRType>([&](uint32_t, const SPIRType &type) {
  352. if (type.basetype == SPIRType::Double)
  353. {
  354. if (options.es)
  355. SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
  356. if (!options.es && options.version < 400)
  357. require_extension_internal("GL_ARB_gpu_shader_fp64");
  358. }
  359. else if (type.basetype == SPIRType::Int64 || type.basetype == SPIRType::UInt64)
  360. {
  361. if (options.es && options.version < 310) // GL_NV_gpu_shader5 fallback requires 310.
  362. SPIRV_CROSS_THROW("64-bit integers not supported in ES profile before version 310.");
  363. require_extension_internal("GL_ARB_gpu_shader_int64");
  364. }
  365. else if (type.basetype == SPIRType::Half)
  366. {
  367. require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_float16");
  368. if (options.vulkan_semantics)
  369. require_extension_internal("GL_EXT_shader_16bit_storage");
  370. }
  371. else if (type.basetype == SPIRType::SByte || type.basetype == SPIRType::UByte)
  372. {
  373. require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int8");
  374. if (options.vulkan_semantics)
  375. require_extension_internal("GL_EXT_shader_8bit_storage");
  376. }
  377. else if (type.basetype == SPIRType::Short || type.basetype == SPIRType::UShort)
  378. {
  379. require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int16");
  380. if (options.vulkan_semantics)
  381. require_extension_internal("GL_EXT_shader_16bit_storage");
  382. }
  383. });
  384. auto &execution = get_entry_point();
  385. switch (execution.model)
  386. {
  387. case ExecutionModelGLCompute:
  388. if (!options.es && options.version < 430)
  389. require_extension_internal("GL_ARB_compute_shader");
  390. if (options.es && options.version < 310)
  391. SPIRV_CROSS_THROW("At least ESSL 3.10 required for compute shaders.");
  392. break;
  393. case ExecutionModelGeometry:
  394. if (options.es && options.version < 320)
  395. require_extension_internal("GL_EXT_geometry_shader");
  396. if (!options.es && options.version < 150)
  397. require_extension_internal("GL_ARB_geometry_shader4");
  398. if (execution.flags.get(ExecutionModeInvocations) && execution.invocations != 1)
  399. {
  400. // Instanced GS is part of 400 core or this extension.
  401. if (!options.es && options.version < 400)
  402. require_extension_internal("GL_ARB_gpu_shader5");
  403. }
  404. break;
  405. case ExecutionModelTessellationEvaluation:
  406. case ExecutionModelTessellationControl:
  407. if (options.es && options.version < 320)
  408. require_extension_internal("GL_EXT_tessellation_shader");
  409. if (!options.es && options.version < 400)
  410. require_extension_internal("GL_ARB_tessellation_shader");
  411. break;
  412. case ExecutionModelRayGenerationKHR:
  413. case ExecutionModelIntersectionKHR:
  414. case ExecutionModelAnyHitKHR:
  415. case ExecutionModelClosestHitKHR:
  416. case ExecutionModelMissKHR:
  417. case ExecutionModelCallableKHR:
  418. // NV enums are aliases.
  419. if (options.es || options.version < 460)
  420. SPIRV_CROSS_THROW("Ray tracing shaders require non-es profile with version 460 or above.");
  421. if (!options.vulkan_semantics)
  422. SPIRV_CROSS_THROW("Ray tracing requires Vulkan semantics.");
  423. // Need to figure out if we should target KHR or NV extension based on capabilities.
  424. for (auto &cap : ir.declared_capabilities)
  425. {
  426. if (cap == CapabilityRayTracingKHR || cap == CapabilityRayQueryKHR ||
  427. cap == CapabilityRayTraversalPrimitiveCullingKHR)
  428. {
  429. ray_tracing_is_khr = true;
  430. break;
  431. }
  432. }
  433. if (ray_tracing_is_khr)
  434. {
  435. // In KHR ray tracing we pass payloads by pointer instead of location,
  436. // so make sure we assign locations properly.
  437. ray_tracing_khr_fixup_locations();
  438. require_extension_internal("GL_EXT_ray_tracing");
  439. }
  440. else
  441. require_extension_internal("GL_NV_ray_tracing");
  442. break;
  443. case ExecutionModelMeshEXT:
  444. case ExecutionModelTaskEXT:
  445. if (options.es || options.version < 450)
  446. SPIRV_CROSS_THROW("Mesh shaders require GLSL 450 or above.");
  447. if (!options.vulkan_semantics)
  448. SPIRV_CROSS_THROW("Mesh shaders require Vulkan semantics.");
  449. require_extension_internal("GL_EXT_mesh_shader");
  450. break;
  451. default:
  452. break;
  453. }
  454. if (!pls_inputs.empty() || !pls_outputs.empty())
  455. {
  456. if (execution.model != ExecutionModelFragment)
  457. SPIRV_CROSS_THROW("Can only use GL_EXT_shader_pixel_local_storage in fragment shaders.");
  458. require_extension_internal("GL_EXT_shader_pixel_local_storage");
  459. }
  460. if (!inout_color_attachments.empty())
  461. {
  462. if (execution.model != ExecutionModelFragment)
  463. SPIRV_CROSS_THROW("Can only use GL_EXT_shader_framebuffer_fetch in fragment shaders.");
  464. if (options.vulkan_semantics)
  465. SPIRV_CROSS_THROW("Cannot use EXT_shader_framebuffer_fetch in Vulkan GLSL.");
  466. bool has_coherent = false;
  467. bool has_incoherent = false;
  468. for (auto &att : inout_color_attachments)
  469. {
  470. if (att.second)
  471. has_coherent = true;
  472. else
  473. has_incoherent = true;
  474. }
  475. if (has_coherent)
  476. require_extension_internal("GL_EXT_shader_framebuffer_fetch");
  477. if (has_incoherent)
  478. require_extension_internal("GL_EXT_shader_framebuffer_fetch_non_coherent");
  479. }
  480. if (options.separate_shader_objects && !options.es && options.version < 410)
  481. require_extension_internal("GL_ARB_separate_shader_objects");
  482. if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
  483. {
  484. if (!options.vulkan_semantics)
  485. SPIRV_CROSS_THROW("GL_EXT_buffer_reference is only supported in Vulkan GLSL.");
  486. if (options.es && options.version < 320)
  487. SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires ESSL 320.");
  488. else if (!options.es && options.version < 450)
  489. SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires GLSL 450.");
  490. require_extension_internal("GL_EXT_buffer_reference");
  491. }
  492. else if (ir.addressing_model != AddressingModelLogical)
  493. {
  494. SPIRV_CROSS_THROW("Only Logical and PhysicalStorageBuffer64EXT addressing models are supported.");
  495. }
  496. // Check for nonuniform qualifier and passthrough.
  497. // Instead of looping over all decorations to find this, just look at capabilities.
  498. for (auto &cap : ir.declared_capabilities)
  499. {
  500. switch (cap)
  501. {
  502. case CapabilityShaderNonUniformEXT:
  503. if (!options.vulkan_semantics)
  504. require_extension_internal("GL_NV_gpu_shader5");
  505. else
  506. require_extension_internal("GL_EXT_nonuniform_qualifier");
  507. break;
  508. case CapabilityRuntimeDescriptorArrayEXT:
  509. if (!options.vulkan_semantics)
  510. SPIRV_CROSS_THROW("GL_EXT_nonuniform_qualifier is only supported in Vulkan GLSL.");
  511. require_extension_internal("GL_EXT_nonuniform_qualifier");
  512. break;
  513. case CapabilityGeometryShaderPassthroughNV:
  514. if (execution.model == ExecutionModelGeometry)
  515. {
  516. require_extension_internal("GL_NV_geometry_shader_passthrough");
  517. execution.geometry_passthrough = true;
  518. }
  519. break;
  520. case CapabilityVariablePointers:
  521. case CapabilityVariablePointersStorageBuffer:
  522. SPIRV_CROSS_THROW("VariablePointers capability is not supported in GLSL.");
  523. case CapabilityMultiView:
  524. if (options.vulkan_semantics)
  525. require_extension_internal("GL_EXT_multiview");
  526. else
  527. {
  528. require_extension_internal("GL_OVR_multiview2");
  529. if (options.ovr_multiview_view_count == 0)
  530. SPIRV_CROSS_THROW("ovr_multiview_view_count must be non-zero when using GL_OVR_multiview2.");
  531. if (get_execution_model() != ExecutionModelVertex)
  532. SPIRV_CROSS_THROW("OVR_multiview2 can only be used with Vertex shaders.");
  533. }
  534. break;
  535. case CapabilityRayQueryKHR:
  536. if (options.es || options.version < 460 || !options.vulkan_semantics)
  537. SPIRV_CROSS_THROW("RayQuery requires Vulkan GLSL 460.");
  538. require_extension_internal("GL_EXT_ray_query");
  539. ray_tracing_is_khr = true;
  540. break;
  541. case CapabilityRayTraversalPrimitiveCullingKHR:
  542. if (options.es || options.version < 460 || !options.vulkan_semantics)
  543. SPIRV_CROSS_THROW("RayQuery requires Vulkan GLSL 460.");
  544. require_extension_internal("GL_EXT_ray_flags_primitive_culling");
  545. ray_tracing_is_khr = true;
  546. break;
  547. default:
  548. break;
  549. }
  550. }
  551. if (options.ovr_multiview_view_count)
  552. {
  553. if (options.vulkan_semantics)
  554. SPIRV_CROSS_THROW("OVR_multiview2 cannot be used with Vulkan semantics.");
  555. if (get_execution_model() != ExecutionModelVertex)
  556. SPIRV_CROSS_THROW("OVR_multiview2 can only be used with Vertex shaders.");
  557. require_extension_internal("GL_OVR_multiview2");
  558. }
  559. // KHR one is likely to get promoted at some point, so if we don't see an explicit SPIR-V extension, assume KHR.
  560. for (auto &ext : ir.declared_extensions)
  561. if (ext == "SPV_NV_fragment_shader_barycentric")
  562. barycentric_is_nv = true;
  563. }
  564. void CompilerGLSL::require_polyfill(Polyfill polyfill, bool relaxed)
  565. {
  566. uint32_t &polyfills = (relaxed && options.es) ? required_polyfills_relaxed : required_polyfills;
  567. if ((polyfills & polyfill) == 0)
  568. {
  569. polyfills |= polyfill;
  570. force_recompile();
  571. }
  572. }
  573. void CompilerGLSL::ray_tracing_khr_fixup_locations()
  574. {
  575. uint32_t location = 0;
  576. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  577. // Incoming payload storage can also be used for tracing.
  578. if (var.storage != StorageClassRayPayloadKHR && var.storage != StorageClassCallableDataKHR &&
  579. var.storage != StorageClassIncomingRayPayloadKHR && var.storage != StorageClassIncomingCallableDataKHR)
  580. return;
  581. if (is_hidden_variable(var))
  582. return;
  583. set_decoration(var.self, DecorationLocation, location++);
  584. });
  585. }
  586. string CompilerGLSL::compile()
  587. {
  588. ir.fixup_reserved_names();
  589. if (!options.vulkan_semantics)
  590. {
  591. // only NV_gpu_shader5 supports divergent indexing on OpenGL, and it does so without extra qualifiers
  592. backend.nonuniform_qualifier = "";
  593. backend.needs_row_major_load_workaround = options.enable_row_major_load_workaround;
  594. }
  595. backend.allow_precision_qualifiers = options.vulkan_semantics || options.es;
  596. backend.force_gl_in_out_block = true;
  597. backend.supports_extensions = true;
  598. backend.use_array_constructor = true;
  599. backend.workgroup_size_is_hidden = true;
  600. backend.requires_relaxed_precision_analysis = options.es || options.vulkan_semantics;
  601. backend.support_precise_qualifier =
  602. (!options.es && options.version >= 400) || (options.es && options.version >= 320);
  603. if (is_legacy_es())
  604. backend.support_case_fallthrough = false;
  605. // Scan the SPIR-V to find trivial uses of extensions.
  606. fixup_anonymous_struct_names();
  607. fixup_type_alias();
  608. reorder_type_alias();
  609. build_function_control_flow_graphs_and_analyze();
  610. find_static_extensions();
  611. fixup_image_load_store_access();
  612. update_active_builtins();
  613. analyze_image_and_sampler_usage();
  614. analyze_interlocked_resource_usage();
  615. if (!inout_color_attachments.empty())
  616. emit_inout_fragment_outputs_copy_to_subpass_inputs();
  617. // Shaders might cast unrelated data to pointers of non-block types.
  618. // Find all such instances and make sure we can cast the pointers to a synthesized block type.
  619. if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
  620. analyze_non_block_pointer_types();
  621. uint32_t pass_count = 0;
  622. do
  623. {
  624. reset(pass_count);
  625. buffer.reset();
  626. emit_header();
  627. emit_resources();
  628. emit_extension_workarounds(get_execution_model());
  629. if (required_polyfills != 0)
  630. emit_polyfills(required_polyfills, false);
  631. if (options.es && required_polyfills_relaxed != 0)
  632. emit_polyfills(required_polyfills_relaxed, true);
  633. emit_function(get<SPIRFunction>(ir.default_entry_point), Bitset());
  634. pass_count++;
  635. } while (is_forcing_recompilation());
  636. // Implement the interlocked wrapper function at the end.
  637. // The body was implemented in lieu of main().
  638. if (interlocked_is_complex)
  639. {
  640. statement("void main()");
  641. begin_scope();
  642. statement("// Interlocks were used in a way not compatible with GLSL, this is very slow.");
  643. statement("SPIRV_Cross_beginInvocationInterlock();");
  644. statement("spvMainInterlockedBody();");
  645. statement("SPIRV_Cross_endInvocationInterlock();");
  646. end_scope();
  647. }
  648. // Entry point in GLSL is always main().
  649. get_entry_point().name = "main";
  650. return buffer.str();
  651. }
  652. std::string CompilerGLSL::get_partial_source()
  653. {
  654. return buffer.str();
  655. }
  656. void CompilerGLSL::build_workgroup_size(SmallVector<string> &arguments, const SpecializationConstant &wg_x,
  657. const SpecializationConstant &wg_y, const SpecializationConstant &wg_z)
  658. {
  659. auto &execution = get_entry_point();
  660. bool builtin_workgroup = execution.workgroup_size.constant != 0;
  661. bool use_local_size_id = !builtin_workgroup && execution.flags.get(ExecutionModeLocalSizeId);
  662. if (wg_x.id)
  663. {
  664. if (options.vulkan_semantics)
  665. arguments.push_back(join("local_size_x_id = ", wg_x.constant_id));
  666. else
  667. arguments.push_back(join("local_size_x = ", get<SPIRConstant>(wg_x.id).specialization_constant_macro_name));
  668. }
  669. else if (use_local_size_id && execution.workgroup_size.id_x)
  670. arguments.push_back(join("local_size_x = ", get<SPIRConstant>(execution.workgroup_size.id_x).scalar()));
  671. else
  672. arguments.push_back(join("local_size_x = ", execution.workgroup_size.x));
  673. if (wg_y.id)
  674. {
  675. if (options.vulkan_semantics)
  676. arguments.push_back(join("local_size_y_id = ", wg_y.constant_id));
  677. else
  678. arguments.push_back(join("local_size_y = ", get<SPIRConstant>(wg_y.id).specialization_constant_macro_name));
  679. }
  680. else if (use_local_size_id && execution.workgroup_size.id_y)
  681. arguments.push_back(join("local_size_y = ", get<SPIRConstant>(execution.workgroup_size.id_y).scalar()));
  682. else
  683. arguments.push_back(join("local_size_y = ", execution.workgroup_size.y));
  684. if (wg_z.id)
  685. {
  686. if (options.vulkan_semantics)
  687. arguments.push_back(join("local_size_z_id = ", wg_z.constant_id));
  688. else
  689. arguments.push_back(join("local_size_z = ", get<SPIRConstant>(wg_z.id).specialization_constant_macro_name));
  690. }
  691. else if (use_local_size_id && execution.workgroup_size.id_z)
  692. arguments.push_back(join("local_size_z = ", get<SPIRConstant>(execution.workgroup_size.id_z).scalar()));
  693. else
  694. arguments.push_back(join("local_size_z = ", execution.workgroup_size.z));
  695. }
  696. void CompilerGLSL::request_subgroup_feature(ShaderSubgroupSupportHelper::Feature feature)
  697. {
  698. if (options.vulkan_semantics)
  699. {
  700. auto khr_extension = ShaderSubgroupSupportHelper::get_KHR_extension_for_feature(feature);
  701. require_extension_internal(ShaderSubgroupSupportHelper::get_extension_name(khr_extension));
  702. }
  703. else
  704. {
  705. if (!shader_subgroup_supporter.is_feature_requested(feature))
  706. force_recompile();
  707. shader_subgroup_supporter.request_feature(feature);
  708. }
  709. }
  710. void CompilerGLSL::emit_header()
  711. {
  712. auto &execution = get_entry_point();
  713. statement("#version ", options.version, options.es && options.version > 100 ? " es" : "");
  714. if (!options.es && options.version < 420)
  715. {
  716. // Needed for binding = # on UBOs, etc.
  717. if (options.enable_420pack_extension)
  718. {
  719. statement("#ifdef GL_ARB_shading_language_420pack");
  720. statement("#extension GL_ARB_shading_language_420pack : require");
  721. statement("#endif");
  722. }
  723. // Needed for: layout(early_fragment_tests) in;
  724. if (execution.flags.get(ExecutionModeEarlyFragmentTests))
  725. require_extension_internal("GL_ARB_shader_image_load_store");
  726. }
  727. // Needed for: layout(post_depth_coverage) in;
  728. if (execution.flags.get(ExecutionModePostDepthCoverage))
  729. require_extension_internal("GL_ARB_post_depth_coverage");
  730. // Needed for: layout({pixel,sample}_interlock_[un]ordered) in;
  731. bool interlock_used = execution.flags.get(ExecutionModePixelInterlockOrderedEXT) ||
  732. execution.flags.get(ExecutionModePixelInterlockUnorderedEXT) ||
  733. execution.flags.get(ExecutionModeSampleInterlockOrderedEXT) ||
  734. execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT);
  735. if (interlock_used)
  736. {
  737. if (options.es)
  738. {
  739. if (options.version < 310)
  740. SPIRV_CROSS_THROW("At least ESSL 3.10 required for fragment shader interlock.");
  741. require_extension_internal("GL_NV_fragment_shader_interlock");
  742. }
  743. else
  744. {
  745. if (options.version < 420)
  746. require_extension_internal("GL_ARB_shader_image_load_store");
  747. require_extension_internal("GL_ARB_fragment_shader_interlock");
  748. }
  749. }
  750. for (auto &ext : forced_extensions)
  751. {
  752. if (ext == "GL_ARB_gpu_shader_int64")
  753. {
  754. statement("#if defined(GL_ARB_gpu_shader_int64)");
  755. statement("#extension GL_ARB_gpu_shader_int64 : require");
  756. if (!options.vulkan_semantics || options.es)
  757. {
  758. statement("#elif defined(GL_NV_gpu_shader5)");
  759. statement("#extension GL_NV_gpu_shader5 : require");
  760. }
  761. statement("#else");
  762. statement("#error No extension available for 64-bit integers.");
  763. statement("#endif");
  764. }
  765. else if (ext == "GL_EXT_shader_explicit_arithmetic_types_float16")
  766. {
  767. // Special case, this extension has a potential fallback to another vendor extension in normal GLSL.
  768. // GL_AMD_gpu_shader_half_float is a superset, so try that first.
  769. statement("#if defined(GL_AMD_gpu_shader_half_float)");
  770. statement("#extension GL_AMD_gpu_shader_half_float : require");
  771. if (!options.vulkan_semantics)
  772. {
  773. statement("#elif defined(GL_NV_gpu_shader5)");
  774. statement("#extension GL_NV_gpu_shader5 : require");
  775. }
  776. else
  777. {
  778. statement("#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)");
  779. statement("#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require");
  780. }
  781. statement("#else");
  782. statement("#error No extension available for FP16.");
  783. statement("#endif");
  784. }
  785. else if (ext == "GL_EXT_shader_explicit_arithmetic_types_int8")
  786. {
  787. if (options.vulkan_semantics)
  788. statement("#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require");
  789. else
  790. {
  791. statement("#if defined(GL_EXT_shader_explicit_arithmetic_types_int8)");
  792. statement("#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require");
  793. statement("#elif defined(GL_NV_gpu_shader5)");
  794. statement("#extension GL_NV_gpu_shader5 : require");
  795. statement("#else");
  796. statement("#error No extension available for Int8.");
  797. statement("#endif");
  798. }
  799. }
  800. else if (ext == "GL_EXT_shader_explicit_arithmetic_types_int16")
  801. {
  802. if (options.vulkan_semantics)
  803. statement("#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require");
  804. else
  805. {
  806. statement("#if defined(GL_EXT_shader_explicit_arithmetic_types_int16)");
  807. statement("#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require");
  808. statement("#elif defined(GL_AMD_gpu_shader_int16)");
  809. statement("#extension GL_AMD_gpu_shader_int16 : require");
  810. statement("#elif defined(GL_NV_gpu_shader5)");
  811. statement("#extension GL_NV_gpu_shader5 : require");
  812. statement("#else");
  813. statement("#error No extension available for Int16.");
  814. statement("#endif");
  815. }
  816. }
  817. else if (ext == "GL_ARB_post_depth_coverage")
  818. {
  819. if (options.es)
  820. statement("#extension GL_EXT_post_depth_coverage : require");
  821. else
  822. {
  823. statement("#if defined(GL_ARB_post_depth_coverge)");
  824. statement("#extension GL_ARB_post_depth_coverage : require");
  825. statement("#else");
  826. statement("#extension GL_EXT_post_depth_coverage : require");
  827. statement("#endif");
  828. }
  829. }
  830. else if (!options.vulkan_semantics && ext == "GL_ARB_shader_draw_parameters")
  831. {
  832. // Soft-enable this extension on plain GLSL.
  833. statement("#ifdef ", ext);
  834. statement("#extension ", ext, " : enable");
  835. statement("#endif");
  836. }
  837. else if (ext == "GL_EXT_control_flow_attributes")
  838. {
  839. // These are just hints so we can conditionally enable and fallback in the shader.
  840. statement("#if defined(GL_EXT_control_flow_attributes)");
  841. statement("#extension GL_EXT_control_flow_attributes : require");
  842. statement("#define SPIRV_CROSS_FLATTEN [[flatten]]");
  843. statement("#define SPIRV_CROSS_BRANCH [[dont_flatten]]");
  844. statement("#define SPIRV_CROSS_UNROLL [[unroll]]");
  845. statement("#define SPIRV_CROSS_LOOP [[dont_unroll]]");
  846. statement("#else");
  847. statement("#define SPIRV_CROSS_FLATTEN");
  848. statement("#define SPIRV_CROSS_BRANCH");
  849. statement("#define SPIRV_CROSS_UNROLL");
  850. statement("#define SPIRV_CROSS_LOOP");
  851. statement("#endif");
  852. }
  853. else if (ext == "GL_NV_fragment_shader_interlock")
  854. {
  855. statement("#extension GL_NV_fragment_shader_interlock : require");
  856. statement("#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockNV()");
  857. statement("#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockNV()");
  858. }
  859. else if (ext == "GL_ARB_fragment_shader_interlock")
  860. {
  861. statement("#ifdef GL_ARB_fragment_shader_interlock");
  862. statement("#extension GL_ARB_fragment_shader_interlock : enable");
  863. statement("#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()");
  864. statement("#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()");
  865. statement("#elif defined(GL_INTEL_fragment_shader_ordering)");
  866. statement("#extension GL_INTEL_fragment_shader_ordering : enable");
  867. statement("#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()");
  868. statement("#define SPIRV_Cross_endInvocationInterlock()");
  869. statement("#endif");
  870. }
  871. else
  872. statement("#extension ", ext, " : require");
  873. }
  874. if (!options.vulkan_semantics)
  875. {
  876. using Supp = ShaderSubgroupSupportHelper;
  877. auto result = shader_subgroup_supporter.resolve();
  878. for (uint32_t feature_index = 0; feature_index < Supp::FeatureCount; feature_index++)
  879. {
  880. auto feature = static_cast<Supp::Feature>(feature_index);
  881. if (!shader_subgroup_supporter.is_feature_requested(feature))
  882. continue;
  883. auto exts = Supp::get_candidates_for_feature(feature, result);
  884. if (exts.empty())
  885. continue;
  886. statement("");
  887. for (auto &ext : exts)
  888. {
  889. const char *name = Supp::get_extension_name(ext);
  890. const char *extra_predicate = Supp::get_extra_required_extension_predicate(ext);
  891. auto extra_names = Supp::get_extra_required_extension_names(ext);
  892. statement(&ext != &exts.front() ? "#elif" : "#if", " defined(", name, ")",
  893. (*extra_predicate != '\0' ? " && " : ""), extra_predicate);
  894. for (const auto &e : extra_names)
  895. statement("#extension ", e, " : enable");
  896. statement("#extension ", name, " : require");
  897. }
  898. if (!Supp::can_feature_be_implemented_without_extensions(feature))
  899. {
  900. statement("#else");
  901. statement("#error No extensions available to emulate requested subgroup feature.");
  902. }
  903. statement("#endif");
  904. }
  905. }
  906. for (auto &header : header_lines)
  907. statement(header);
  908. SmallVector<string> inputs;
  909. SmallVector<string> outputs;
  910. switch (execution.model)
  911. {
  912. case ExecutionModelVertex:
  913. if (options.ovr_multiview_view_count)
  914. inputs.push_back(join("num_views = ", options.ovr_multiview_view_count));
  915. break;
  916. case ExecutionModelGeometry:
  917. if ((execution.flags.get(ExecutionModeInvocations)) && execution.invocations != 1)
  918. inputs.push_back(join("invocations = ", execution.invocations));
  919. if (execution.flags.get(ExecutionModeInputPoints))
  920. inputs.push_back("points");
  921. if (execution.flags.get(ExecutionModeInputLines))
  922. inputs.push_back("lines");
  923. if (execution.flags.get(ExecutionModeInputLinesAdjacency))
  924. inputs.push_back("lines_adjacency");
  925. if (execution.flags.get(ExecutionModeTriangles))
  926. inputs.push_back("triangles");
  927. if (execution.flags.get(ExecutionModeInputTrianglesAdjacency))
  928. inputs.push_back("triangles_adjacency");
  929. if (!execution.geometry_passthrough)
  930. {
  931. // For passthrough, these are implies and cannot be declared in shader.
  932. outputs.push_back(join("max_vertices = ", execution.output_vertices));
  933. if (execution.flags.get(ExecutionModeOutputTriangleStrip))
  934. outputs.push_back("triangle_strip");
  935. if (execution.flags.get(ExecutionModeOutputPoints))
  936. outputs.push_back("points");
  937. if (execution.flags.get(ExecutionModeOutputLineStrip))
  938. outputs.push_back("line_strip");
  939. }
  940. break;
  941. case ExecutionModelTessellationControl:
  942. if (execution.flags.get(ExecutionModeOutputVertices))
  943. outputs.push_back(join("vertices = ", execution.output_vertices));
  944. break;
  945. case ExecutionModelTessellationEvaluation:
  946. if (execution.flags.get(ExecutionModeQuads))
  947. inputs.push_back("quads");
  948. if (execution.flags.get(ExecutionModeTriangles))
  949. inputs.push_back("triangles");
  950. if (execution.flags.get(ExecutionModeIsolines))
  951. inputs.push_back("isolines");
  952. if (execution.flags.get(ExecutionModePointMode))
  953. inputs.push_back("point_mode");
  954. if (!execution.flags.get(ExecutionModeIsolines))
  955. {
  956. if (execution.flags.get(ExecutionModeVertexOrderCw))
  957. inputs.push_back("cw");
  958. if (execution.flags.get(ExecutionModeVertexOrderCcw))
  959. inputs.push_back("ccw");
  960. }
  961. if (execution.flags.get(ExecutionModeSpacingFractionalEven))
  962. inputs.push_back("fractional_even_spacing");
  963. if (execution.flags.get(ExecutionModeSpacingFractionalOdd))
  964. inputs.push_back("fractional_odd_spacing");
  965. if (execution.flags.get(ExecutionModeSpacingEqual))
  966. inputs.push_back("equal_spacing");
  967. break;
  968. case ExecutionModelGLCompute:
  969. case ExecutionModelTaskEXT:
  970. case ExecutionModelMeshEXT:
  971. {
  972. if (execution.workgroup_size.constant != 0 || execution.flags.get(ExecutionModeLocalSizeId))
  973. {
  974. SpecializationConstant wg_x, wg_y, wg_z;
  975. get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
  976. // If there are any spec constants on legacy GLSL, defer declaration, we need to set up macro
  977. // declarations before we can emit the work group size.
  978. if (options.vulkan_semantics ||
  979. ((wg_x.id == ConstantID(0)) && (wg_y.id == ConstantID(0)) && (wg_z.id == ConstantID(0))))
  980. build_workgroup_size(inputs, wg_x, wg_y, wg_z);
  981. }
  982. else
  983. {
  984. inputs.push_back(join("local_size_x = ", execution.workgroup_size.x));
  985. inputs.push_back(join("local_size_y = ", execution.workgroup_size.y));
  986. inputs.push_back(join("local_size_z = ", execution.workgroup_size.z));
  987. }
  988. if (execution.model == ExecutionModelMeshEXT)
  989. {
  990. outputs.push_back(join("max_vertices = ", execution.output_vertices));
  991. outputs.push_back(join("max_primitives = ", execution.output_primitives));
  992. if (execution.flags.get(ExecutionModeOutputTrianglesEXT))
  993. outputs.push_back("triangles");
  994. else if (execution.flags.get(ExecutionModeOutputLinesEXT))
  995. outputs.push_back("lines");
  996. else if (execution.flags.get(ExecutionModeOutputPoints))
  997. outputs.push_back("points");
  998. }
  999. break;
  1000. }
  1001. case ExecutionModelFragment:
  1002. if (options.es)
  1003. {
  1004. switch (options.fragment.default_float_precision)
  1005. {
  1006. case Options::Lowp:
  1007. statement("precision lowp float;");
  1008. break;
  1009. case Options::Mediump:
  1010. statement("precision mediump float;");
  1011. break;
  1012. case Options::Highp:
  1013. statement("precision highp float;");
  1014. break;
  1015. default:
  1016. break;
  1017. }
  1018. switch (options.fragment.default_int_precision)
  1019. {
  1020. case Options::Lowp:
  1021. statement("precision lowp int;");
  1022. break;
  1023. case Options::Mediump:
  1024. statement("precision mediump int;");
  1025. break;
  1026. case Options::Highp:
  1027. statement("precision highp int;");
  1028. break;
  1029. default:
  1030. break;
  1031. }
  1032. }
  1033. if (execution.flags.get(ExecutionModeEarlyFragmentTests))
  1034. inputs.push_back("early_fragment_tests");
  1035. if (execution.flags.get(ExecutionModePostDepthCoverage))
  1036. inputs.push_back("post_depth_coverage");
  1037. if (interlock_used)
  1038. statement("#if defined(GL_ARB_fragment_shader_interlock)");
  1039. if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT))
  1040. statement("layout(pixel_interlock_ordered) in;");
  1041. else if (execution.flags.get(ExecutionModePixelInterlockUnorderedEXT))
  1042. statement("layout(pixel_interlock_unordered) in;");
  1043. else if (execution.flags.get(ExecutionModeSampleInterlockOrderedEXT))
  1044. statement("layout(sample_interlock_ordered) in;");
  1045. else if (execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT))
  1046. statement("layout(sample_interlock_unordered) in;");
  1047. if (interlock_used)
  1048. {
  1049. statement("#elif !defined(GL_INTEL_fragment_shader_ordering)");
  1050. statement("#error Fragment Shader Interlock/Ordering extension missing!");
  1051. statement("#endif");
  1052. }
  1053. if (!options.es && execution.flags.get(ExecutionModeDepthGreater))
  1054. statement("layout(depth_greater) out float gl_FragDepth;");
  1055. else if (!options.es && execution.flags.get(ExecutionModeDepthLess))
  1056. statement("layout(depth_less) out float gl_FragDepth;");
  1057. break;
  1058. default:
  1059. break;
  1060. }
  1061. for (auto &cap : ir.declared_capabilities)
  1062. if (cap == CapabilityRayTraversalPrimitiveCullingKHR)
  1063. statement("layout(primitive_culling);");
  1064. if (!inputs.empty())
  1065. statement("layout(", merge(inputs), ") in;");
  1066. if (!outputs.empty())
  1067. statement("layout(", merge(outputs), ") out;");
  1068. statement("");
  1069. }
  1070. bool CompilerGLSL::type_is_empty(const SPIRType &type)
  1071. {
  1072. return type.basetype == SPIRType::Struct && type.member_types.empty();
  1073. }
  1074. void CompilerGLSL::emit_struct(SPIRType &type)
  1075. {
  1076. // Struct types can be stamped out multiple times
  1077. // with just different offsets, matrix layouts, etc ...
  1078. // Type-punning with these types is legal, which complicates things
  1079. // when we are storing struct and array types in an SSBO for example.
  1080. // If the type master is packed however, we can no longer assume that the struct declaration will be redundant.
  1081. if (type.type_alias != TypeID(0) &&
  1082. !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
  1083. return;
  1084. add_resource_name(type.self);
  1085. auto name = type_to_glsl(type);
  1086. statement(!backend.explicit_struct_type ? "struct " : "", name);
  1087. begin_scope();
  1088. type.member_name_cache.clear();
  1089. uint32_t i = 0;
  1090. bool emitted = false;
  1091. for (auto &member : type.member_types)
  1092. {
  1093. add_member_name(type, i);
  1094. emit_struct_member(type, member, i);
  1095. i++;
  1096. emitted = true;
  1097. }
  1098. // Don't declare empty structs in GLSL, this is not allowed.
  1099. if (type_is_empty(type) && !backend.supports_empty_struct)
  1100. {
  1101. statement("int empty_struct_member;");
  1102. emitted = true;
  1103. }
  1104. if (has_extended_decoration(type.self, SPIRVCrossDecorationPaddingTarget))
  1105. emit_struct_padding_target(type);
  1106. end_scope_decl();
  1107. if (emitted)
  1108. statement("");
  1109. }
  1110. string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags)
  1111. {
  1112. string res;
  1113. //if (flags & (1ull << DecorationSmooth))
  1114. // res += "smooth ";
  1115. if (flags.get(DecorationFlat))
  1116. res += "flat ";
  1117. if (flags.get(DecorationNoPerspective))
  1118. {
  1119. if (options.es)
  1120. {
  1121. if (options.version < 300)
  1122. SPIRV_CROSS_THROW("noperspective requires ESSL 300.");
  1123. require_extension_internal("GL_NV_shader_noperspective_interpolation");
  1124. }
  1125. else if (is_legacy_desktop())
  1126. require_extension_internal("GL_EXT_gpu_shader4");
  1127. res += "noperspective ";
  1128. }
  1129. if (flags.get(DecorationCentroid))
  1130. res += "centroid ";
  1131. if (flags.get(DecorationPatch))
  1132. res += "patch ";
  1133. if (flags.get(DecorationSample))
  1134. {
  1135. if (options.es)
  1136. {
  1137. if (options.version < 300)
  1138. SPIRV_CROSS_THROW("sample requires ESSL 300.");
  1139. else if (options.version < 320)
  1140. require_extension_internal("GL_OES_shader_multisample_interpolation");
  1141. }
  1142. res += "sample ";
  1143. }
  1144. if (flags.get(DecorationInvariant) && (options.es || options.version >= 120))
  1145. res += "invariant ";
  1146. if (flags.get(DecorationPerPrimitiveEXT))
  1147. {
  1148. res += "perprimitiveEXT ";
  1149. require_extension_internal("GL_EXT_mesh_shader");
  1150. }
  1151. if (flags.get(DecorationExplicitInterpAMD))
  1152. {
  1153. require_extension_internal("GL_AMD_shader_explicit_vertex_parameter");
  1154. res += "__explicitInterpAMD ";
  1155. }
  1156. if (flags.get(DecorationPerVertexKHR))
  1157. {
  1158. if (options.es && options.version < 320)
  1159. SPIRV_CROSS_THROW("pervertexEXT requires ESSL 320.");
  1160. else if (!options.es && options.version < 450)
  1161. SPIRV_CROSS_THROW("pervertexEXT requires GLSL 450.");
  1162. if (barycentric_is_nv)
  1163. {
  1164. require_extension_internal("GL_NV_fragment_shader_barycentric");
  1165. res += "pervertexNV ";
  1166. }
  1167. else
  1168. {
  1169. require_extension_internal("GL_EXT_fragment_shader_barycentric");
  1170. res += "pervertexEXT ";
  1171. }
  1172. }
  1173. return res;
  1174. }
  1175. string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index)
  1176. {
  1177. if (is_legacy())
  1178. return "";
  1179. bool is_block = has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock);
  1180. if (!is_block)
  1181. return "";
  1182. auto &memb = ir.meta[type.self].members;
  1183. if (index >= memb.size())
  1184. return "";
  1185. auto &dec = memb[index];
  1186. SmallVector<string> attr;
  1187. if (has_member_decoration(type.self, index, DecorationPassthroughNV))
  1188. attr.push_back("passthrough");
  1189. // We can only apply layouts on members in block interfaces.
  1190. // This is a bit problematic because in SPIR-V decorations are applied on the struct types directly.
  1191. // This is not supported on GLSL, so we have to make the assumption that if a struct within our buffer block struct
  1192. // has a decoration, it was originally caused by a top-level layout() qualifier in GLSL.
  1193. //
  1194. // We would like to go from (SPIR-V style):
  1195. //
  1196. // struct Foo { layout(row_major) mat4 matrix; };
  1197. // buffer UBO { Foo foo; };
  1198. //
  1199. // to
  1200. //
  1201. // struct Foo { mat4 matrix; }; // GLSL doesn't support any layout shenanigans in raw struct declarations.
  1202. // buffer UBO { layout(row_major) Foo foo; }; // Apply the layout on top-level.
  1203. auto flags = combined_decoration_for_member(type, index);
  1204. if (flags.get(DecorationRowMajor))
  1205. attr.push_back("row_major");
  1206. // We don't emit any global layouts, so column_major is default.
  1207. //if (flags & (1ull << DecorationColMajor))
  1208. // attr.push_back("column_major");
  1209. if (dec.decoration_flags.get(DecorationLocation) && can_use_io_location(type.storage, true))
  1210. attr.push_back(join("location = ", dec.location));
  1211. // Can only declare component if we can declare location.
  1212. if (dec.decoration_flags.get(DecorationComponent) && can_use_io_location(type.storage, true))
  1213. {
  1214. if (!options.es)
  1215. {
  1216. if (options.version < 440 && options.version >= 140)
  1217. require_extension_internal("GL_ARB_enhanced_layouts");
  1218. else if (options.version < 140)
  1219. SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40.");
  1220. attr.push_back(join("component = ", dec.component));
  1221. }
  1222. else
  1223. SPIRV_CROSS_THROW("Component decoration is not supported in ES targets.");
  1224. }
  1225. // SPIRVCrossDecorationPacked is set by layout_for_variable earlier to mark that we need to emit offset qualifiers.
  1226. // This is only done selectively in GLSL as needed.
  1227. if (has_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset) &&
  1228. dec.decoration_flags.get(DecorationOffset))
  1229. attr.push_back(join("offset = ", dec.offset));
  1230. else if (type.storage == StorageClassOutput && dec.decoration_flags.get(DecorationOffset))
  1231. attr.push_back(join("xfb_offset = ", dec.offset));
  1232. if (attr.empty())
  1233. return "";
  1234. string res = "layout(";
  1235. res += merge(attr);
  1236. res += ") ";
  1237. return res;
  1238. }
  1239. const char *CompilerGLSL::format_to_glsl(spv::ImageFormat format)
  1240. {
  1241. if (options.es && is_desktop_only_format(format))
  1242. SPIRV_CROSS_THROW("Attempting to use image format not supported in ES profile.");
  1243. switch (format)
  1244. {
  1245. case ImageFormatRgba32f:
  1246. return "rgba32f";
  1247. case ImageFormatRgba16f:
  1248. return "rgba16f";
  1249. case ImageFormatR32f:
  1250. return "r32f";
  1251. case ImageFormatRgba8:
  1252. return "rgba8";
  1253. case ImageFormatRgba8Snorm:
  1254. return "rgba8_snorm";
  1255. case ImageFormatRg32f:
  1256. return "rg32f";
  1257. case ImageFormatRg16f:
  1258. return "rg16f";
  1259. case ImageFormatRgba32i:
  1260. return "rgba32i";
  1261. case ImageFormatRgba16i:
  1262. return "rgba16i";
  1263. case ImageFormatR32i:
  1264. return "r32i";
  1265. case ImageFormatRgba8i:
  1266. return "rgba8i";
  1267. case ImageFormatRg32i:
  1268. return "rg32i";
  1269. case ImageFormatRg16i:
  1270. return "rg16i";
  1271. case ImageFormatRgba32ui:
  1272. return "rgba32ui";
  1273. case ImageFormatRgba16ui:
  1274. return "rgba16ui";
  1275. case ImageFormatR32ui:
  1276. return "r32ui";
  1277. case ImageFormatRgba8ui:
  1278. return "rgba8ui";
  1279. case ImageFormatRg32ui:
  1280. return "rg32ui";
  1281. case ImageFormatRg16ui:
  1282. return "rg16ui";
  1283. case ImageFormatR11fG11fB10f:
  1284. return "r11f_g11f_b10f";
  1285. case ImageFormatR16f:
  1286. return "r16f";
  1287. case ImageFormatRgb10A2:
  1288. return "rgb10_a2";
  1289. case ImageFormatR8:
  1290. return "r8";
  1291. case ImageFormatRg8:
  1292. return "rg8";
  1293. case ImageFormatR16:
  1294. return "r16";
  1295. case ImageFormatRg16:
  1296. return "rg16";
  1297. case ImageFormatRgba16:
  1298. return "rgba16";
  1299. case ImageFormatR16Snorm:
  1300. return "r16_snorm";
  1301. case ImageFormatRg16Snorm:
  1302. return "rg16_snorm";
  1303. case ImageFormatRgba16Snorm:
  1304. return "rgba16_snorm";
  1305. case ImageFormatR8Snorm:
  1306. return "r8_snorm";
  1307. case ImageFormatRg8Snorm:
  1308. return "rg8_snorm";
  1309. case ImageFormatR8ui:
  1310. return "r8ui";
  1311. case ImageFormatRg8ui:
  1312. return "rg8ui";
  1313. case ImageFormatR16ui:
  1314. return "r16ui";
  1315. case ImageFormatRgb10a2ui:
  1316. return "rgb10_a2ui";
  1317. case ImageFormatR8i:
  1318. return "r8i";
  1319. case ImageFormatRg8i:
  1320. return "rg8i";
  1321. case ImageFormatR16i:
  1322. return "r16i";
  1323. case ImageFormatR64i:
  1324. return "r64i";
  1325. case ImageFormatR64ui:
  1326. return "r64ui";
  1327. default:
  1328. case ImageFormatUnknown:
  1329. return nullptr;
  1330. }
  1331. }
  1332. uint32_t CompilerGLSL::type_to_packed_base_size(const SPIRType &type, BufferPackingStandard)
  1333. {
  1334. switch (type.basetype)
  1335. {
  1336. case SPIRType::Double:
  1337. case SPIRType::Int64:
  1338. case SPIRType::UInt64:
  1339. return 8;
  1340. case SPIRType::Float:
  1341. case SPIRType::Int:
  1342. case SPIRType::UInt:
  1343. return 4;
  1344. case SPIRType::Half:
  1345. case SPIRType::Short:
  1346. case SPIRType::UShort:
  1347. return 2;
  1348. case SPIRType::SByte:
  1349. case SPIRType::UByte:
  1350. return 1;
  1351. default:
  1352. SPIRV_CROSS_THROW("Unrecognized type in type_to_packed_base_size.");
  1353. }
  1354. }
  1355. uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, const Bitset &flags,
  1356. BufferPackingStandard packing)
  1357. {
  1358. // If using PhysicalStorageBufferEXT storage class, this is a pointer,
  1359. // and is 64-bit.
  1360. if (type_is_top_level_physical_pointer(type))
  1361. {
  1362. if (!type.pointer)
  1363. SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers.");
  1364. if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
  1365. {
  1366. if (packing_is_vec4_padded(packing) && type_is_array_of_pointers(type))
  1367. return 16;
  1368. else
  1369. return 8;
  1370. }
  1371. else
  1372. SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT.");
  1373. }
  1374. else if (type_is_top_level_array(type))
  1375. {
  1376. uint32_t minimum_alignment = 1;
  1377. if (packing_is_vec4_padded(packing))
  1378. minimum_alignment = 16;
  1379. auto *tmp = &get<SPIRType>(type.parent_type);
  1380. while (!tmp->array.empty())
  1381. tmp = &get<SPIRType>(tmp->parent_type);
  1382. // Get the alignment of the base type, then maybe round up.
  1383. return max(minimum_alignment, type_to_packed_alignment(*tmp, flags, packing));
  1384. }
  1385. if (type.basetype == SPIRType::Struct)
  1386. {
  1387. // Rule 9. Structs alignments are maximum alignment of its members.
  1388. uint32_t alignment = 1;
  1389. for (uint32_t i = 0; i < type.member_types.size(); i++)
  1390. {
  1391. auto member_flags = ir.meta[type.self].members[i].decoration_flags;
  1392. alignment =
  1393. max(alignment, type_to_packed_alignment(get<SPIRType>(type.member_types[i]), member_flags, packing));
  1394. }
  1395. // In std140, struct alignment is rounded up to 16.
  1396. if (packing_is_vec4_padded(packing))
  1397. alignment = max<uint32_t>(alignment, 16u);
  1398. return alignment;
  1399. }
  1400. else
  1401. {
  1402. const uint32_t base_alignment = type_to_packed_base_size(type, packing);
  1403. // Alignment requirement for scalar block layout is always the alignment for the most basic component.
  1404. if (packing_is_scalar(packing))
  1405. return base_alignment;
  1406. // Vectors are *not* aligned in HLSL, but there's an extra rule where vectors cannot straddle
  1407. // a vec4, this is handled outside since that part knows our current offset.
  1408. if (type.columns == 1 && packing_is_hlsl(packing))
  1409. return base_alignment;
  1410. // From 7.6.2.2 in GL 4.5 core spec.
  1411. // Rule 1
  1412. if (type.vecsize == 1 && type.columns == 1)
  1413. return base_alignment;
  1414. // Rule 2
  1415. if ((type.vecsize == 2 || type.vecsize == 4) && type.columns == 1)
  1416. return type.vecsize * base_alignment;
  1417. // Rule 3
  1418. if (type.vecsize == 3 && type.columns == 1)
  1419. return 4 * base_alignment;
  1420. // Rule 4 implied. Alignment does not change in std430.
  1421. // Rule 5. Column-major matrices are stored as arrays of
  1422. // vectors.
  1423. if (flags.get(DecorationColMajor) && type.columns > 1)
  1424. {
  1425. if (packing_is_vec4_padded(packing))
  1426. return 4 * base_alignment;
  1427. else if (type.vecsize == 3)
  1428. return 4 * base_alignment;
  1429. else
  1430. return type.vecsize * base_alignment;
  1431. }
  1432. // Rule 6 implied.
  1433. // Rule 7.
  1434. if (flags.get(DecorationRowMajor) && type.vecsize > 1)
  1435. {
  1436. if (packing_is_vec4_padded(packing))
  1437. return 4 * base_alignment;
  1438. else if (type.columns == 3)
  1439. return 4 * base_alignment;
  1440. else
  1441. return type.columns * base_alignment;
  1442. }
  1443. // Rule 8 implied.
  1444. }
  1445. SPIRV_CROSS_THROW("Did not find suitable rule for type. Bogus decorations?");
  1446. }
  1447. uint32_t CompilerGLSL::type_to_packed_array_stride(const SPIRType &type, const Bitset &flags,
  1448. BufferPackingStandard packing)
  1449. {
  1450. // Array stride is equal to aligned size of the underlying type.
  1451. uint32_t parent = type.parent_type;
  1452. assert(parent);
  1453. auto &tmp = get<SPIRType>(parent);
  1454. uint32_t size = type_to_packed_size(tmp, flags, packing);
  1455. uint32_t alignment = type_to_packed_alignment(type, flags, packing);
  1456. return (size + alignment - 1) & ~(alignment - 1);
  1457. }
  1458. uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing)
  1459. {
  1460. // If using PhysicalStorageBufferEXT storage class, this is a pointer,
  1461. // and is 64-bit.
  1462. if (type_is_top_level_physical_pointer(type))
  1463. {
  1464. if (!type.pointer)
  1465. SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers.");
  1466. if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
  1467. return 8;
  1468. else
  1469. SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT.");
  1470. }
  1471. else if (type_is_top_level_array(type))
  1472. {
  1473. uint32_t packed_size = to_array_size_literal(type) * type_to_packed_array_stride(type, flags, packing);
  1474. // For arrays of vectors and matrices in HLSL, the last element has a size which depends on its vector size,
  1475. // so that it is possible to pack other vectors into the last element.
  1476. if (packing_is_hlsl(packing) && type.basetype != SPIRType::Struct)
  1477. packed_size -= (4 - type.vecsize) * (type.width / 8);
  1478. return packed_size;
  1479. }
  1480. uint32_t size = 0;
  1481. if (type.basetype == SPIRType::Struct)
  1482. {
  1483. uint32_t pad_alignment = 1;
  1484. for (uint32_t i = 0; i < type.member_types.size(); i++)
  1485. {
  1486. auto member_flags = ir.meta[type.self].members[i].decoration_flags;
  1487. auto &member_type = get<SPIRType>(type.member_types[i]);
  1488. uint32_t packed_alignment = type_to_packed_alignment(member_type, member_flags, packing);
  1489. uint32_t alignment = max(packed_alignment, pad_alignment);
  1490. // The next member following a struct member is aligned to the base alignment of the struct that came before.
  1491. // GL 4.5 spec, 7.6.2.2.
  1492. if (member_type.basetype == SPIRType::Struct)
  1493. pad_alignment = packed_alignment;
  1494. else
  1495. pad_alignment = 1;
  1496. size = (size + alignment - 1) & ~(alignment - 1);
  1497. size += type_to_packed_size(member_type, member_flags, packing);
  1498. }
  1499. }
  1500. else
  1501. {
  1502. const uint32_t base_alignment = type_to_packed_base_size(type, packing);
  1503. if (packing_is_scalar(packing))
  1504. {
  1505. size = type.vecsize * type.columns * base_alignment;
  1506. }
  1507. else
  1508. {
  1509. if (type.columns == 1)
  1510. size = type.vecsize * base_alignment;
  1511. if (flags.get(DecorationColMajor) && type.columns > 1)
  1512. {
  1513. if (packing_is_vec4_padded(packing))
  1514. size = type.columns * 4 * base_alignment;
  1515. else if (type.vecsize == 3)
  1516. size = type.columns * 4 * base_alignment;
  1517. else
  1518. size = type.columns * type.vecsize * base_alignment;
  1519. }
  1520. if (flags.get(DecorationRowMajor) && type.vecsize > 1)
  1521. {
  1522. if (packing_is_vec4_padded(packing))
  1523. size = type.vecsize * 4 * base_alignment;
  1524. else if (type.columns == 3)
  1525. size = type.vecsize * 4 * base_alignment;
  1526. else
  1527. size = type.vecsize * type.columns * base_alignment;
  1528. }
  1529. // For matrices in HLSL, the last element has a size which depends on its vector size,
  1530. // so that it is possible to pack other vectors into the last element.
  1531. if (packing_is_hlsl(packing) && type.columns > 1)
  1532. size -= (4 - type.vecsize) * (type.width / 8);
  1533. }
  1534. }
  1535. return size;
  1536. }
  1537. bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing,
  1538. uint32_t *failed_validation_index, uint32_t start_offset,
  1539. uint32_t end_offset)
  1540. {
  1541. // This is very tricky and error prone, but try to be exhaustive and correct here.
  1542. // SPIR-V doesn't directly say if we're using std430 or std140.
  1543. // SPIR-V communicates this using Offset and ArrayStride decorations (which is what really matters),
  1544. // so we have to try to infer whether or not the original GLSL source was std140 or std430 based on this information.
  1545. // We do not have to consider shared or packed since these layouts are not allowed in Vulkan SPIR-V (they are useless anyways, and custom offsets would do the same thing).
  1546. //
  1547. // It is almost certain that we're using std430, but it gets tricky with arrays in particular.
  1548. // We will assume std430, but infer std140 if we can prove the struct is not compliant with std430.
  1549. //
  1550. // The only two differences between std140 and std430 are related to padding alignment/array stride
  1551. // in arrays and structs. In std140 they take minimum vec4 alignment.
  1552. // std430 only removes the vec4 requirement.
  1553. uint32_t offset = 0;
  1554. uint32_t pad_alignment = 1;
  1555. bool is_top_level_block =
  1556. has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock);
  1557. for (uint32_t i = 0; i < type.member_types.size(); i++)
  1558. {
  1559. auto &memb_type = get<SPIRType>(type.member_types[i]);
  1560. auto member_flags = ir.meta[type.self].members[i].decoration_flags;
  1561. // Verify alignment rules.
  1562. uint32_t packed_alignment = type_to_packed_alignment(memb_type, member_flags, packing);
  1563. // This is a rather dirty workaround to deal with some cases of OpSpecConstantOp used as array size, e.g:
  1564. // layout(constant_id = 0) const int s = 10;
  1565. // const int S = s + 5; // SpecConstantOp
  1566. // buffer Foo { int data[S]; }; // <-- Very hard for us to deduce a fixed value here,
  1567. // we would need full implementation of compile-time constant folding. :(
  1568. // If we are the last member of a struct, there might be cases where the actual size of that member is irrelevant
  1569. // for our analysis (e.g. unsized arrays).
  1570. // This lets us simply ignore that there are spec constant op sized arrays in our buffers.
  1571. // Querying size of this member will fail, so just don't call it unless we have to.
  1572. //
  1573. // This is likely "best effort" we can support without going into unacceptably complicated workarounds.
  1574. bool member_can_be_unsized =
  1575. is_top_level_block && size_t(i + 1) == type.member_types.size() && !memb_type.array.empty();
  1576. uint32_t packed_size = 0;
  1577. if (!member_can_be_unsized || packing_is_hlsl(packing))
  1578. packed_size = type_to_packed_size(memb_type, member_flags, packing);
  1579. // We only need to care about this if we have non-array types which can straddle the vec4 boundary.
  1580. uint32_t actual_offset = type_struct_member_offset(type, i);
  1581. if (packing_is_hlsl(packing))
  1582. {
  1583. // If a member straddles across a vec4 boundary, alignment is actually vec4.
  1584. uint32_t begin_word = actual_offset / 16;
  1585. uint32_t end_word = (actual_offset + packed_size - 1) / 16;
  1586. if (begin_word != end_word)
  1587. packed_alignment = max<uint32_t>(packed_alignment, 16u);
  1588. }
  1589. // Field is not in the specified range anymore and we can ignore any further fields.
  1590. if (actual_offset >= end_offset)
  1591. break;
  1592. uint32_t alignment = max(packed_alignment, pad_alignment);
  1593. offset = (offset + alignment - 1) & ~(alignment - 1);
  1594. // The next member following a struct member is aligned to the base alignment of the struct that came before.
  1595. // GL 4.5 spec, 7.6.2.2.
  1596. if (memb_type.basetype == SPIRType::Struct && !memb_type.pointer)
  1597. pad_alignment = packed_alignment;
  1598. else
  1599. pad_alignment = 1;
  1600. // Only care about packing if we are in the given range
  1601. if (actual_offset >= start_offset)
  1602. {
  1603. // We only care about offsets in std140, std430, etc ...
  1604. // For EnhancedLayout variants, we have the flexibility to choose our own offsets.
  1605. if (!packing_has_flexible_offset(packing))
  1606. {
  1607. if (actual_offset != offset) // This cannot be the packing we're looking for.
  1608. {
  1609. if (failed_validation_index)
  1610. *failed_validation_index = i;
  1611. return false;
  1612. }
  1613. }
  1614. else if ((actual_offset & (alignment - 1)) != 0)
  1615. {
  1616. // We still need to verify that alignment rules are observed, even if we have explicit offset.
  1617. if (failed_validation_index)
  1618. *failed_validation_index = i;
  1619. return false;
  1620. }
  1621. // Verify array stride rules.
  1622. if (type_is_top_level_array(memb_type) &&
  1623. type_to_packed_array_stride(memb_type, member_flags, packing) !=
  1624. type_struct_member_array_stride(type, i))
  1625. {
  1626. if (failed_validation_index)
  1627. *failed_validation_index = i;
  1628. return false;
  1629. }
  1630. // Verify that sub-structs also follow packing rules.
  1631. // We cannot use enhanced layouts on substructs, so they better be up to spec.
  1632. auto substruct_packing = packing_to_substruct_packing(packing);
  1633. if (!memb_type.pointer && !memb_type.member_types.empty() &&
  1634. !buffer_is_packing_standard(memb_type, substruct_packing))
  1635. {
  1636. if (failed_validation_index)
  1637. *failed_validation_index = i;
  1638. return false;
  1639. }
  1640. }
  1641. // Bump size.
  1642. offset = actual_offset + packed_size;
  1643. }
  1644. return true;
  1645. }
  1646. bool CompilerGLSL::can_use_io_location(StorageClass storage, bool block)
  1647. {
  1648. // Location specifiers are must have in SPIR-V, but they aren't really supported in earlier versions of GLSL.
  1649. // Be very explicit here about how to solve the issue.
  1650. if ((get_execution_model() != ExecutionModelVertex && storage == StorageClassInput) ||
  1651. (get_execution_model() != ExecutionModelFragment && storage == StorageClassOutput))
  1652. {
  1653. uint32_t minimum_desktop_version = block ? 440 : 410;
  1654. // ARB_enhanced_layouts vs ARB_separate_shader_objects ...
  1655. if (!options.es && options.version < minimum_desktop_version && !options.separate_shader_objects)
  1656. return false;
  1657. else if (options.es && options.version < 310)
  1658. return false;
  1659. }
  1660. if ((get_execution_model() == ExecutionModelVertex && storage == StorageClassInput) ||
  1661. (get_execution_model() == ExecutionModelFragment && storage == StorageClassOutput))
  1662. {
  1663. if (options.es && options.version < 300)
  1664. return false;
  1665. else if (!options.es && options.version < 330)
  1666. return false;
  1667. }
  1668. if (storage == StorageClassUniform || storage == StorageClassUniformConstant || storage == StorageClassPushConstant)
  1669. {
  1670. if (options.es && options.version < 310)
  1671. return false;
  1672. else if (!options.es && options.version < 430)
  1673. return false;
  1674. }
  1675. return true;
  1676. }
  1677. string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
  1678. {
  1679. // FIXME: Come up with a better solution for when to disable layouts.
  1680. // Having layouts depend on extensions as well as which types
  1681. // of layouts are used. For now, the simple solution is to just disable
  1682. // layouts for legacy versions.
  1683. if (is_legacy())
  1684. return "";
  1685. if (subpass_input_is_framebuffer_fetch(var.self))
  1686. return "";
  1687. SmallVector<string> attr;
  1688. auto &type = get<SPIRType>(var.basetype);
  1689. auto &flags = get_decoration_bitset(var.self);
  1690. auto &typeflags = get_decoration_bitset(type.self);
  1691. if (flags.get(DecorationPassthroughNV))
  1692. attr.push_back("passthrough");
  1693. if (options.vulkan_semantics && var.storage == StorageClassPushConstant)
  1694. attr.push_back("push_constant");
  1695. else if (var.storage == StorageClassShaderRecordBufferKHR)
  1696. attr.push_back(ray_tracing_is_khr ? "shaderRecordEXT" : "shaderRecordNV");
  1697. if (flags.get(DecorationRowMajor))
  1698. attr.push_back("row_major");
  1699. if (flags.get(DecorationColMajor))
  1700. attr.push_back("column_major");
  1701. if (options.vulkan_semantics)
  1702. {
  1703. if (flags.get(DecorationInputAttachmentIndex))
  1704. attr.push_back(join("input_attachment_index = ", get_decoration(var.self, DecorationInputAttachmentIndex)));
  1705. }
  1706. bool is_block = has_decoration(type.self, DecorationBlock);
  1707. if (flags.get(DecorationLocation) && can_use_io_location(var.storage, is_block))
  1708. {
  1709. Bitset combined_decoration;
  1710. for (uint32_t i = 0; i < ir.meta[type.self].members.size(); i++)
  1711. combined_decoration.merge_or(combined_decoration_for_member(type, i));
  1712. // If our members have location decorations, we don't need to
  1713. // emit location decorations at the top as well (looks weird).
  1714. if (!combined_decoration.get(DecorationLocation))
  1715. attr.push_back(join("location = ", get_decoration(var.self, DecorationLocation)));
  1716. }
  1717. if (get_execution_model() == ExecutionModelFragment && var.storage == StorageClassOutput &&
  1718. location_is_non_coherent_framebuffer_fetch(get_decoration(var.self, DecorationLocation)))
  1719. {
  1720. attr.push_back("noncoherent");
  1721. }
  1722. // Transform feedback
  1723. bool uses_enhanced_layouts = false;
  1724. if (is_block && var.storage == StorageClassOutput)
  1725. {
  1726. // For blocks, there is a restriction where xfb_stride/xfb_buffer must only be declared on the block itself,
  1727. // since all members must match the same xfb_buffer. The only thing we will declare for members of the block
  1728. // is the xfb_offset.
  1729. uint32_t member_count = uint32_t(type.member_types.size());
  1730. bool have_xfb_buffer_stride = false;
  1731. bool have_any_xfb_offset = false;
  1732. bool have_geom_stream = false;
  1733. uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0;
  1734. if (flags.get(DecorationXfbBuffer) && flags.get(DecorationXfbStride))
  1735. {
  1736. have_xfb_buffer_stride = true;
  1737. xfb_buffer = get_decoration(var.self, DecorationXfbBuffer);
  1738. xfb_stride = get_decoration(var.self, DecorationXfbStride);
  1739. }
  1740. if (flags.get(DecorationStream))
  1741. {
  1742. have_geom_stream = true;
  1743. geom_stream = get_decoration(var.self, DecorationStream);
  1744. }
  1745. // Verify that none of the members violate our assumption.
  1746. for (uint32_t i = 0; i < member_count; i++)
  1747. {
  1748. if (has_member_decoration(type.self, i, DecorationStream))
  1749. {
  1750. uint32_t member_geom_stream = get_member_decoration(type.self, i, DecorationStream);
  1751. if (have_geom_stream && member_geom_stream != geom_stream)
  1752. SPIRV_CROSS_THROW("IO block member Stream mismatch.");
  1753. have_geom_stream = true;
  1754. geom_stream = member_geom_stream;
  1755. }
  1756. // Only members with an Offset decoration participate in XFB.
  1757. if (!has_member_decoration(type.self, i, DecorationOffset))
  1758. continue;
  1759. have_any_xfb_offset = true;
  1760. if (has_member_decoration(type.self, i, DecorationXfbBuffer))
  1761. {
  1762. uint32_t buffer_index = get_member_decoration(type.self, i, DecorationXfbBuffer);
  1763. if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
  1764. SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
  1765. have_xfb_buffer_stride = true;
  1766. xfb_buffer = buffer_index;
  1767. }
  1768. if (has_member_decoration(type.self, i, DecorationXfbStride))
  1769. {
  1770. uint32_t stride = get_member_decoration(type.self, i, DecorationXfbStride);
  1771. if (have_xfb_buffer_stride && stride != xfb_stride)
  1772. SPIRV_CROSS_THROW("IO block member XfbStride mismatch.");
  1773. have_xfb_buffer_stride = true;
  1774. xfb_stride = stride;
  1775. }
  1776. }
  1777. if (have_xfb_buffer_stride && have_any_xfb_offset)
  1778. {
  1779. attr.push_back(join("xfb_buffer = ", xfb_buffer));
  1780. attr.push_back(join("xfb_stride = ", xfb_stride));
  1781. uses_enhanced_layouts = true;
  1782. }
  1783. if (have_geom_stream)
  1784. {
  1785. if (get_execution_model() != ExecutionModelGeometry)
  1786. SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
  1787. if (options.es)
  1788. SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
  1789. if (options.version < 400)
  1790. require_extension_internal("GL_ARB_transform_feedback3");
  1791. attr.push_back(join("stream = ", get_decoration(var.self, DecorationStream)));
  1792. }
  1793. }
  1794. else if (var.storage == StorageClassOutput)
  1795. {
  1796. if (flags.get(DecorationXfbBuffer) && flags.get(DecorationXfbStride) && flags.get(DecorationOffset))
  1797. {
  1798. // XFB for standalone variables, we can emit all decorations.
  1799. attr.push_back(join("xfb_buffer = ", get_decoration(var.self, DecorationXfbBuffer)));
  1800. attr.push_back(join("xfb_stride = ", get_decoration(var.self, DecorationXfbStride)));
  1801. attr.push_back(join("xfb_offset = ", get_decoration(var.self, DecorationOffset)));
  1802. uses_enhanced_layouts = true;
  1803. }
  1804. if (flags.get(DecorationStream))
  1805. {
  1806. if (get_execution_model() != ExecutionModelGeometry)
  1807. SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
  1808. if (options.es)
  1809. SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
  1810. if (options.version < 400)
  1811. require_extension_internal("GL_ARB_transform_feedback3");
  1812. attr.push_back(join("stream = ", get_decoration(var.self, DecorationStream)));
  1813. }
  1814. }
  1815. // Can only declare Component if we can declare location.
  1816. if (flags.get(DecorationComponent) && can_use_io_location(var.storage, is_block))
  1817. {
  1818. uses_enhanced_layouts = true;
  1819. attr.push_back(join("component = ", get_decoration(var.self, DecorationComponent)));
  1820. }
  1821. if (uses_enhanced_layouts)
  1822. {
  1823. if (!options.es)
  1824. {
  1825. if (options.version < 440 && options.version >= 140)
  1826. require_extension_internal("GL_ARB_enhanced_layouts");
  1827. else if (options.version < 140)
  1828. SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in targets below GLSL 1.40.");
  1829. if (!options.es && options.version < 440)
  1830. require_extension_internal("GL_ARB_enhanced_layouts");
  1831. }
  1832. else if (options.es)
  1833. SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in ESSL.");
  1834. }
  1835. if (flags.get(DecorationIndex))
  1836. attr.push_back(join("index = ", get_decoration(var.self, DecorationIndex)));
  1837. // Do not emit set = decoration in regular GLSL output, but
  1838. // we need to preserve it in Vulkan GLSL mode.
  1839. if (var.storage != StorageClassPushConstant && var.storage != StorageClassShaderRecordBufferKHR)
  1840. {
  1841. if (flags.get(DecorationDescriptorSet) && options.vulkan_semantics)
  1842. attr.push_back(join("set = ", get_decoration(var.self, DecorationDescriptorSet)));
  1843. }
  1844. bool push_constant_block = options.vulkan_semantics && var.storage == StorageClassPushConstant;
  1845. bool ssbo_block = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferKHR ||
  1846. (var.storage == StorageClassUniform && typeflags.get(DecorationBufferBlock));
  1847. bool emulated_ubo = var.storage == StorageClassPushConstant && options.emit_push_constant_as_uniform_buffer;
  1848. bool ubo_block = var.storage == StorageClassUniform && typeflags.get(DecorationBlock);
  1849. // GL 3.0/GLSL 1.30 is not considered legacy, but it doesn't have UBOs ...
  1850. bool can_use_buffer_blocks = (options.es && options.version >= 300) || (!options.es && options.version >= 140);
  1851. // pretend no UBOs when options say so
  1852. if (ubo_block && options.emit_uniform_buffer_as_plain_uniforms)
  1853. can_use_buffer_blocks = false;
  1854. bool can_use_binding;
  1855. if (options.es)
  1856. can_use_binding = options.version >= 310;
  1857. else
  1858. can_use_binding = options.enable_420pack_extension || (options.version >= 420);
  1859. // Make sure we don't emit binding layout for a classic uniform on GLSL 1.30.
  1860. if (!can_use_buffer_blocks && var.storage == StorageClassUniform)
  1861. can_use_binding = false;
  1862. if (var.storage == StorageClassShaderRecordBufferKHR)
  1863. can_use_binding = false;
  1864. if (can_use_binding && flags.get(DecorationBinding))
  1865. attr.push_back(join("binding = ", get_decoration(var.self, DecorationBinding)));
  1866. if (var.storage != StorageClassOutput && flags.get(DecorationOffset))
  1867. attr.push_back(join("offset = ", get_decoration(var.self, DecorationOffset)));
  1868. // Instead of adding explicit offsets for every element here, just assume we're using std140 or std430.
  1869. // If SPIR-V does not comply with either layout, we cannot really work around it.
  1870. if (can_use_buffer_blocks && (ubo_block || emulated_ubo))
  1871. {
  1872. attr.push_back(buffer_to_packing_standard(type, false));
  1873. }
  1874. else if (can_use_buffer_blocks && (push_constant_block || ssbo_block))
  1875. {
  1876. attr.push_back(buffer_to_packing_standard(type, true));
  1877. }
  1878. // For images, the type itself adds a layout qualifer.
  1879. // Only emit the format for storage images.
  1880. if (type.basetype == SPIRType::Image && type.image.sampled == 2)
  1881. {
  1882. const char *fmt = format_to_glsl(type.image.format);
  1883. if (fmt)
  1884. attr.push_back(fmt);
  1885. }
  1886. if (attr.empty())
  1887. return "";
  1888. string res = "layout(";
  1889. res += merge(attr);
  1890. res += ") ";
  1891. return res;
  1892. }
  1893. string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool support_std430_without_scalar_layout)
  1894. {
  1895. if (support_std430_without_scalar_layout && buffer_is_packing_standard(type, BufferPackingStd430))
  1896. return "std430";
  1897. else if (buffer_is_packing_standard(type, BufferPackingStd140))
  1898. return "std140";
  1899. else if (options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingScalar))
  1900. {
  1901. require_extension_internal("GL_EXT_scalar_block_layout");
  1902. return "scalar";
  1903. }
  1904. else if (support_std430_without_scalar_layout &&
  1905. buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout))
  1906. {
  1907. if (options.es && !options.vulkan_semantics)
  1908. SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
  1909. "not support GL_ARB_enhanced_layouts.");
  1910. if (!options.es && !options.vulkan_semantics && options.version < 440)
  1911. require_extension_internal("GL_ARB_enhanced_layouts");
  1912. set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
  1913. return "std430";
  1914. }
  1915. else if (buffer_is_packing_standard(type, BufferPackingStd140EnhancedLayout))
  1916. {
  1917. // Fallback time. We might be able to use the ARB_enhanced_layouts to deal with this difference,
  1918. // however, we can only use layout(offset) on the block itself, not any substructs, so the substructs better be the appropriate layout.
  1919. // Enhanced layouts seem to always work in Vulkan GLSL, so no need for extensions there.
  1920. if (options.es && !options.vulkan_semantics)
  1921. SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
  1922. "not support GL_ARB_enhanced_layouts.");
  1923. if (!options.es && !options.vulkan_semantics && options.version < 440)
  1924. require_extension_internal("GL_ARB_enhanced_layouts");
  1925. set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
  1926. return "std140";
  1927. }
  1928. else if (options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingScalarEnhancedLayout))
  1929. {
  1930. set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
  1931. require_extension_internal("GL_EXT_scalar_block_layout");
  1932. return "scalar";
  1933. }
  1934. else if (!support_std430_without_scalar_layout && options.vulkan_semantics &&
  1935. buffer_is_packing_standard(type, BufferPackingStd430))
  1936. {
  1937. // UBOs can support std430 with GL_EXT_scalar_block_layout.
  1938. require_extension_internal("GL_EXT_scalar_block_layout");
  1939. return "std430";
  1940. }
  1941. else if (!support_std430_without_scalar_layout && options.vulkan_semantics &&
  1942. buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout))
  1943. {
  1944. // UBOs can support std430 with GL_EXT_scalar_block_layout.
  1945. set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
  1946. require_extension_internal("GL_EXT_scalar_block_layout");
  1947. return "std430";
  1948. }
  1949. else
  1950. {
  1951. SPIRV_CROSS_THROW("Buffer block cannot be expressed as any of std430, std140, scalar, even with enhanced "
  1952. "layouts. You can try flattening this block to support a more flexible layout.");
  1953. }
  1954. }
  1955. void CompilerGLSL::emit_push_constant_block(const SPIRVariable &var)
  1956. {
  1957. if (flattened_buffer_blocks.count(var.self))
  1958. emit_buffer_block_flattened(var);
  1959. else if (options.vulkan_semantics)
  1960. emit_push_constant_block_vulkan(var);
  1961. else if (options.emit_push_constant_as_uniform_buffer)
  1962. emit_buffer_block_native(var);
  1963. else
  1964. emit_push_constant_block_glsl(var);
  1965. }
  1966. void CompilerGLSL::emit_push_constant_block_vulkan(const SPIRVariable &var)
  1967. {
  1968. emit_buffer_block(var);
  1969. }
  1970. void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var)
  1971. {
  1972. // OpenGL has no concept of push constant blocks, implement it as a uniform struct.
  1973. auto &type = get<SPIRType>(var.basetype);
  1974. unset_decoration(var.self, DecorationBinding);
  1975. unset_decoration(var.self, DecorationDescriptorSet);
  1976. #if 0
  1977. if (flags & ((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet)))
  1978. SPIRV_CROSS_THROW("Push constant blocks cannot be compiled to GLSL with Binding or Set syntax. "
  1979. "Remap to location with reflection API first or disable these decorations.");
  1980. #endif
  1981. // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
  1982. // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
  1983. bool block_flag = has_decoration(type.self, DecorationBlock);
  1984. unset_decoration(type.self, DecorationBlock);
  1985. emit_struct(type);
  1986. if (block_flag)
  1987. set_decoration(type.self, DecorationBlock);
  1988. emit_uniform(var);
  1989. statement("");
  1990. }
  1991. void CompilerGLSL::emit_buffer_block(const SPIRVariable &var)
  1992. {
  1993. auto &type = get<SPIRType>(var.basetype);
  1994. bool ubo_block = var.storage == StorageClassUniform && has_decoration(type.self, DecorationBlock);
  1995. if (flattened_buffer_blocks.count(var.self))
  1996. emit_buffer_block_flattened(var);
  1997. else if (is_legacy() || (!options.es && options.version == 130) ||
  1998. (ubo_block && options.emit_uniform_buffer_as_plain_uniforms))
  1999. emit_buffer_block_legacy(var);
  2000. else
  2001. emit_buffer_block_native(var);
  2002. }
  2003. void CompilerGLSL::emit_buffer_block_legacy(const SPIRVariable &var)
  2004. {
  2005. auto &type = get<SPIRType>(var.basetype);
  2006. bool ssbo = var.storage == StorageClassStorageBuffer ||
  2007. ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
  2008. if (ssbo)
  2009. SPIRV_CROSS_THROW("SSBOs not supported in legacy targets.");
  2010. // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
  2011. // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
  2012. auto &block_flags = ir.meta[type.self].decoration.decoration_flags;
  2013. bool block_flag = block_flags.get(DecorationBlock);
  2014. block_flags.clear(DecorationBlock);
  2015. emit_struct(type);
  2016. if (block_flag)
  2017. block_flags.set(DecorationBlock);
  2018. emit_uniform(var);
  2019. statement("");
  2020. }
  2021. void CompilerGLSL::emit_buffer_reference_block(uint32_t type_id, bool forward_declaration)
  2022. {
  2023. auto &type = get<SPIRType>(type_id);
  2024. string buffer_name;
  2025. if (forward_declaration)
  2026. {
  2027. // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ...
  2028. // Allow aliased name since we might be declaring the block twice. Once with buffer reference (forward declared) and one proper declaration.
  2029. // The names must match up.
  2030. buffer_name = to_name(type.self, false);
  2031. // Shaders never use the block by interface name, so we don't
  2032. // have to track this other than updating name caches.
  2033. // If we have a collision for any reason, just fallback immediately.
  2034. if (ir.meta[type.self].decoration.alias.empty() ||
  2035. block_ssbo_names.find(buffer_name) != end(block_ssbo_names) ||
  2036. resource_names.find(buffer_name) != end(resource_names))
  2037. {
  2038. buffer_name = join("_", type.self);
  2039. }
  2040. // Make sure we get something unique for both global name scope and block name scope.
  2041. // See GLSL 4.5 spec: section 4.3.9 for details.
  2042. add_variable(block_ssbo_names, resource_names, buffer_name);
  2043. // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
  2044. // This cannot conflict with anything else, so we're safe now.
  2045. // We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope.
  2046. if (buffer_name.empty())
  2047. buffer_name = join("_", type.self);
  2048. block_names.insert(buffer_name);
  2049. block_ssbo_names.insert(buffer_name);
  2050. // Ensure we emit the correct name when emitting non-forward pointer type.
  2051. ir.meta[type.self].decoration.alias = buffer_name;
  2052. }
  2053. else if (type.basetype != SPIRType::Struct)
  2054. buffer_name = type_to_glsl(type);
  2055. else
  2056. buffer_name = to_name(type.self, false);
  2057. if (!forward_declaration)
  2058. {
  2059. auto itr = physical_storage_type_to_alignment.find(type_id);
  2060. uint32_t alignment = 0;
  2061. if (itr != physical_storage_type_to_alignment.end())
  2062. alignment = itr->second.alignment;
  2063. if (type.basetype == SPIRType::Struct)
  2064. {
  2065. SmallVector<std::string> attributes;
  2066. attributes.push_back("buffer_reference");
  2067. if (alignment)
  2068. attributes.push_back(join("buffer_reference_align = ", alignment));
  2069. attributes.push_back(buffer_to_packing_standard(type, true));
  2070. auto flags = ir.get_buffer_block_type_flags(type);
  2071. string decorations;
  2072. if (flags.get(DecorationRestrict))
  2073. decorations += " restrict";
  2074. if (flags.get(DecorationCoherent))
  2075. decorations += " coherent";
  2076. if (flags.get(DecorationNonReadable))
  2077. decorations += " writeonly";
  2078. if (flags.get(DecorationNonWritable))
  2079. decorations += " readonly";
  2080. statement("layout(", merge(attributes), ")", decorations, " buffer ", buffer_name);
  2081. }
  2082. else if (alignment)
  2083. statement("layout(buffer_reference, buffer_reference_align = ", alignment, ") buffer ", buffer_name);
  2084. else
  2085. statement("layout(buffer_reference) buffer ", buffer_name);
  2086. begin_scope();
  2087. if (type.basetype == SPIRType::Struct)
  2088. {
  2089. type.member_name_cache.clear();
  2090. uint32_t i = 0;
  2091. for (auto &member : type.member_types)
  2092. {
  2093. add_member_name(type, i);
  2094. emit_struct_member(type, member, i);
  2095. i++;
  2096. }
  2097. }
  2098. else
  2099. {
  2100. auto &pointee_type = get_pointee_type(type);
  2101. statement(type_to_glsl(pointee_type), " value", type_to_array_glsl(pointee_type), ";");
  2102. }
  2103. end_scope_decl();
  2104. statement("");
  2105. }
  2106. else
  2107. {
  2108. statement("layout(buffer_reference) buffer ", buffer_name, ";");
  2109. }
  2110. }
  2111. void CompilerGLSL::emit_buffer_block_native(const SPIRVariable &var)
  2112. {
  2113. auto &type = get<SPIRType>(var.basetype);
  2114. Bitset flags = ir.get_buffer_block_flags(var);
  2115. bool ssbo = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferKHR ||
  2116. ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
  2117. bool is_restrict = ssbo && flags.get(DecorationRestrict);
  2118. bool is_writeonly = ssbo && flags.get(DecorationNonReadable);
  2119. bool is_readonly = ssbo && flags.get(DecorationNonWritable);
  2120. bool is_coherent = ssbo && flags.get(DecorationCoherent);
  2121. // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ...
  2122. auto buffer_name = to_name(type.self, false);
  2123. auto &block_namespace = ssbo ? block_ssbo_names : block_ubo_names;
  2124. // Shaders never use the block by interface name, so we don't
  2125. // have to track this other than updating name caches.
  2126. // If we have a collision for any reason, just fallback immediately.
  2127. if (ir.meta[type.self].decoration.alias.empty() || block_namespace.find(buffer_name) != end(block_namespace) ||
  2128. resource_names.find(buffer_name) != end(resource_names))
  2129. {
  2130. buffer_name = get_block_fallback_name(var.self);
  2131. }
  2132. // Make sure we get something unique for both global name scope and block name scope.
  2133. // See GLSL 4.5 spec: section 4.3.9 for details.
  2134. add_variable(block_namespace, resource_names, buffer_name);
  2135. // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
  2136. // This cannot conflict with anything else, so we're safe now.
  2137. // We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope.
  2138. if (buffer_name.empty())
  2139. buffer_name = join("_", get<SPIRType>(var.basetype).self, "_", var.self);
  2140. block_names.insert(buffer_name);
  2141. block_namespace.insert(buffer_name);
  2142. // Save for post-reflection later.
  2143. declared_block_names[var.self] = buffer_name;
  2144. statement(layout_for_variable(var), is_coherent ? "coherent " : "", is_restrict ? "restrict " : "",
  2145. is_writeonly ? "writeonly " : "", is_readonly ? "readonly " : "", ssbo ? "buffer " : "uniform ",
  2146. buffer_name);
  2147. begin_scope();
  2148. type.member_name_cache.clear();
  2149. uint32_t i = 0;
  2150. for (auto &member : type.member_types)
  2151. {
  2152. add_member_name(type, i);
  2153. emit_struct_member(type, member, i);
  2154. i++;
  2155. }
  2156. // var.self can be used as a backup name for the block name,
  2157. // so we need to make sure we don't disturb the name here on a recompile.
  2158. // It will need to be reset if we have to recompile.
  2159. preserve_alias_on_reset(var.self);
  2160. add_resource_name(var.self);
  2161. end_scope_decl(to_name(var.self) + type_to_array_glsl(type));
  2162. statement("");
  2163. }
  2164. void CompilerGLSL::emit_buffer_block_flattened(const SPIRVariable &var)
  2165. {
  2166. auto &type = get<SPIRType>(var.basetype);
  2167. // Block names should never alias.
  2168. auto buffer_name = to_name(type.self, false);
  2169. size_t buffer_size = (get_declared_struct_size(type) + 15) / 16;
  2170. SPIRType::BaseType basic_type;
  2171. if (get_common_basic_type(type, basic_type))
  2172. {
  2173. SPIRType tmp;
  2174. tmp.basetype = basic_type;
  2175. tmp.vecsize = 4;
  2176. if (basic_type != SPIRType::Float && basic_type != SPIRType::Int && basic_type != SPIRType::UInt)
  2177. SPIRV_CROSS_THROW("Basic types in a flattened UBO must be float, int or uint.");
  2178. auto flags = ir.get_buffer_block_flags(var);
  2179. statement("uniform ", flags_to_qualifiers_glsl(tmp, flags), type_to_glsl(tmp), " ", buffer_name, "[",
  2180. buffer_size, "];");
  2181. }
  2182. else
  2183. SPIRV_CROSS_THROW("All basic types in a flattened block must be the same.");
  2184. }
  2185. const char *CompilerGLSL::to_storage_qualifiers_glsl(const SPIRVariable &var)
  2186. {
  2187. auto &execution = get_entry_point();
  2188. if (subpass_input_is_framebuffer_fetch(var.self))
  2189. return "";
  2190. if (var.storage == StorageClassInput || var.storage == StorageClassOutput)
  2191. {
  2192. if (is_legacy() && execution.model == ExecutionModelVertex)
  2193. return var.storage == StorageClassInput ? "attribute " : "varying ";
  2194. else if (is_legacy() && execution.model == ExecutionModelFragment)
  2195. return "varying "; // Fragment outputs are renamed so they never hit this case.
  2196. else if (execution.model == ExecutionModelFragment && var.storage == StorageClassOutput)
  2197. {
  2198. uint32_t loc = get_decoration(var.self, DecorationLocation);
  2199. bool is_inout = location_is_framebuffer_fetch(loc);
  2200. if (is_inout)
  2201. return "inout ";
  2202. else
  2203. return "out ";
  2204. }
  2205. else
  2206. return var.storage == StorageClassInput ? "in " : "out ";
  2207. }
  2208. else if (var.storage == StorageClassUniformConstant || var.storage == StorageClassUniform ||
  2209. var.storage == StorageClassPushConstant)
  2210. {
  2211. return "uniform ";
  2212. }
  2213. else if (var.storage == StorageClassRayPayloadKHR)
  2214. {
  2215. return ray_tracing_is_khr ? "rayPayloadEXT " : "rayPayloadNV ";
  2216. }
  2217. else if (var.storage == StorageClassIncomingRayPayloadKHR)
  2218. {
  2219. return ray_tracing_is_khr ? "rayPayloadInEXT " : "rayPayloadInNV ";
  2220. }
  2221. else if (var.storage == StorageClassHitAttributeKHR)
  2222. {
  2223. return ray_tracing_is_khr ? "hitAttributeEXT " : "hitAttributeNV ";
  2224. }
  2225. else if (var.storage == StorageClassCallableDataKHR)
  2226. {
  2227. return ray_tracing_is_khr ? "callableDataEXT " : "callableDataNV ";
  2228. }
  2229. else if (var.storage == StorageClassIncomingCallableDataKHR)
  2230. {
  2231. return ray_tracing_is_khr ? "callableDataInEXT " : "callableDataInNV ";
  2232. }
  2233. return "";
  2234. }
  2235. void CompilerGLSL::emit_flattened_io_block_member(const std::string &basename, const SPIRType &type, const char *qual,
  2236. const SmallVector<uint32_t> &indices)
  2237. {
  2238. uint32_t member_type_id = type.self;
  2239. const SPIRType *member_type = &type;
  2240. const SPIRType *parent_type = nullptr;
  2241. auto flattened_name = basename;
  2242. for (auto &index : indices)
  2243. {
  2244. flattened_name += "_";
  2245. flattened_name += to_member_name(*member_type, index);
  2246. parent_type = member_type;
  2247. member_type_id = member_type->member_types[index];
  2248. member_type = &get<SPIRType>(member_type_id);
  2249. }
  2250. assert(member_type->basetype != SPIRType::Struct);
  2251. // We're overriding struct member names, so ensure we do so on the primary type.
  2252. if (parent_type->type_alias)
  2253. parent_type = &get<SPIRType>(parent_type->type_alias);
  2254. // Sanitize underscores because joining the two identifiers might create more than 1 underscore in a row,
  2255. // which is not allowed.
  2256. ParsedIR::sanitize_underscores(flattened_name);
  2257. uint32_t last_index = indices.back();
  2258. // Pass in the varying qualifier here so it will appear in the correct declaration order.
  2259. // Replace member name while emitting it so it encodes both struct name and member name.
  2260. auto backup_name = get_member_name(parent_type->self, last_index);
  2261. auto member_name = to_member_name(*parent_type, last_index);
  2262. set_member_name(parent_type->self, last_index, flattened_name);
  2263. emit_struct_member(*parent_type, member_type_id, last_index, qual);
  2264. // Restore member name.
  2265. set_member_name(parent_type->self, last_index, member_name);
  2266. }
  2267. void CompilerGLSL::emit_flattened_io_block_struct(const std::string &basename, const SPIRType &type, const char *qual,
  2268. const SmallVector<uint32_t> &indices)
  2269. {
  2270. auto sub_indices = indices;
  2271. sub_indices.push_back(0);
  2272. const SPIRType *member_type = &type;
  2273. for (auto &index : indices)
  2274. member_type = &get<SPIRType>(member_type->member_types[index]);
  2275. assert(member_type->basetype == SPIRType::Struct);
  2276. if (!member_type->array.empty())
  2277. SPIRV_CROSS_THROW("Cannot flatten array of structs in I/O blocks.");
  2278. for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++)
  2279. {
  2280. sub_indices.back() = i;
  2281. if (get<SPIRType>(member_type->member_types[i]).basetype == SPIRType::Struct)
  2282. emit_flattened_io_block_struct(basename, type, qual, sub_indices);
  2283. else
  2284. emit_flattened_io_block_member(basename, type, qual, sub_indices);
  2285. }
  2286. }
  2287. void CompilerGLSL::emit_flattened_io_block(const SPIRVariable &var, const char *qual)
  2288. {
  2289. auto &var_type = get<SPIRType>(var.basetype);
  2290. if (!var_type.array.empty())
  2291. SPIRV_CROSS_THROW("Array of varying structs cannot be flattened to legacy-compatible varyings.");
  2292. // Emit flattened types based on the type alias. Normally, we are never supposed to emit
  2293. // struct declarations for aliased types.
  2294. auto &type = var_type.type_alias ? get<SPIRType>(var_type.type_alias) : var_type;
  2295. auto old_flags = ir.meta[type.self].decoration.decoration_flags;
  2296. // Emit the members as if they are part of a block to get all qualifiers.
  2297. ir.meta[type.self].decoration.decoration_flags.set(DecorationBlock);
  2298. type.member_name_cache.clear();
  2299. SmallVector<uint32_t> member_indices;
  2300. member_indices.push_back(0);
  2301. auto basename = to_name(var.self);
  2302. uint32_t i = 0;
  2303. for (auto &member : type.member_types)
  2304. {
  2305. add_member_name(type, i);
  2306. auto &membertype = get<SPIRType>(member);
  2307. member_indices.back() = i;
  2308. if (membertype.basetype == SPIRType::Struct)
  2309. emit_flattened_io_block_struct(basename, type, qual, member_indices);
  2310. else
  2311. emit_flattened_io_block_member(basename, type, qual, member_indices);
  2312. i++;
  2313. }
  2314. ir.meta[type.self].decoration.decoration_flags = old_flags;
  2315. // Treat this variable as fully flattened from now on.
  2316. flattened_structs[var.self] = true;
  2317. }
  2318. void CompilerGLSL::emit_interface_block(const SPIRVariable &var)
  2319. {
  2320. auto &type = get<SPIRType>(var.basetype);
  2321. if (var.storage == StorageClassInput && type.basetype == SPIRType::Double &&
  2322. !options.es && options.version < 410)
  2323. {
  2324. require_extension_internal("GL_ARB_vertex_attrib_64bit");
  2325. }
  2326. // Either make it plain in/out or in/out blocks depending on what shader is doing ...
  2327. bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock);
  2328. const char *qual = to_storage_qualifiers_glsl(var);
  2329. if (block)
  2330. {
  2331. // ESSL earlier than 310 and GLSL earlier than 150 did not support
  2332. // I/O variables which are struct types.
  2333. // To support this, flatten the struct into separate varyings instead.
  2334. if (options.force_flattened_io_blocks || (options.es && options.version < 310) ||
  2335. (!options.es && options.version < 150))
  2336. {
  2337. // I/O blocks on ES require version 310 with Android Extension Pack extensions, or core version 320.
  2338. // On desktop, I/O blocks were introduced with geometry shaders in GL 3.2 (GLSL 150).
  2339. emit_flattened_io_block(var, qual);
  2340. }
  2341. else
  2342. {
  2343. if (options.es && options.version < 320)
  2344. {
  2345. // Geometry and tessellation extensions imply this extension.
  2346. if (!has_extension("GL_EXT_geometry_shader") && !has_extension("GL_EXT_tessellation_shader"))
  2347. require_extension_internal("GL_EXT_shader_io_blocks");
  2348. }
  2349. // Workaround to make sure we can emit "patch in/out" correctly.
  2350. fixup_io_block_patch_primitive_qualifiers(var);
  2351. // Block names should never alias.
  2352. auto block_name = to_name(type.self, false);
  2353. // The namespace for I/O blocks is separate from other variables in GLSL.
  2354. auto &block_namespace = type.storage == StorageClassInput ? block_input_names : block_output_names;
  2355. // Shaders never use the block by interface name, so we don't
  2356. // have to track this other than updating name caches.
  2357. if (block_name.empty() || block_namespace.find(block_name) != end(block_namespace))
  2358. block_name = get_fallback_name(type.self);
  2359. else
  2360. block_namespace.insert(block_name);
  2361. // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
  2362. // This cannot conflict with anything else, so we're safe now.
  2363. if (block_name.empty())
  2364. block_name = join("_", get<SPIRType>(var.basetype).self, "_", var.self);
  2365. // Instance names cannot alias block names.
  2366. resource_names.insert(block_name);
  2367. const char *block_qualifier;
  2368. if (has_decoration(var.self, DecorationPatch))
  2369. block_qualifier = "patch ";
  2370. else if (has_decoration(var.self, DecorationPerPrimitiveEXT))
  2371. block_qualifier = "perprimitiveEXT ";
  2372. else
  2373. block_qualifier = "";
  2374. statement(layout_for_variable(var), block_qualifier, qual, block_name);
  2375. begin_scope();
  2376. type.member_name_cache.clear();
  2377. uint32_t i = 0;
  2378. for (auto &member : type.member_types)
  2379. {
  2380. add_member_name(type, i);
  2381. emit_struct_member(type, member, i);
  2382. i++;
  2383. }
  2384. add_resource_name(var.self);
  2385. end_scope_decl(join(to_name(var.self), type_to_array_glsl(type)));
  2386. statement("");
  2387. }
  2388. }
  2389. else
  2390. {
  2391. // ESSL earlier than 310 and GLSL earlier than 150 did not support
  2392. // I/O variables which are struct types.
  2393. // To support this, flatten the struct into separate varyings instead.
  2394. if (type.basetype == SPIRType::Struct &&
  2395. (options.force_flattened_io_blocks || (options.es && options.version < 310) ||
  2396. (!options.es && options.version < 150)))
  2397. {
  2398. emit_flattened_io_block(var, qual);
  2399. }
  2400. else
  2401. {
  2402. add_resource_name(var.self);
  2403. // Legacy GLSL did not support int attributes, we automatically
  2404. // declare them as float and cast them on load/store
  2405. SPIRType newtype = type;
  2406. if (is_legacy() && var.storage == StorageClassInput && type.basetype == SPIRType::Int)
  2407. newtype.basetype = SPIRType::Float;
  2408. // Tessellation control and evaluation shaders must have either
  2409. // gl_MaxPatchVertices or unsized arrays for input arrays.
  2410. // Opt for unsized as it's the more "correct" variant to use.
  2411. if (type.storage == StorageClassInput && !type.array.empty() &&
  2412. !has_decoration(var.self, DecorationPatch) &&
  2413. (get_entry_point().model == ExecutionModelTessellationControl ||
  2414. get_entry_point().model == ExecutionModelTessellationEvaluation))
  2415. {
  2416. newtype.array.back() = 0;
  2417. newtype.array_size_literal.back() = true;
  2418. }
  2419. statement(layout_for_variable(var), to_qualifiers_glsl(var.self),
  2420. variable_decl(newtype, to_name(var.self), var.self), ";");
  2421. }
  2422. }
  2423. }
  2424. void CompilerGLSL::emit_uniform(const SPIRVariable &var)
  2425. {
  2426. auto &type = get<SPIRType>(var.basetype);
  2427. if (type.basetype == SPIRType::Image && type.image.sampled == 2 && type.image.dim != DimSubpassData)
  2428. {
  2429. if (!options.es && options.version < 420)
  2430. require_extension_internal("GL_ARB_shader_image_load_store");
  2431. else if (options.es && options.version < 310)
  2432. SPIRV_CROSS_THROW("At least ESSL 3.10 required for shader image load store.");
  2433. }
  2434. add_resource_name(var.self);
  2435. statement(layout_for_variable(var), variable_decl(var), ";");
  2436. }
  2437. string CompilerGLSL::constant_value_macro_name(uint32_t id)
  2438. {
  2439. return join("SPIRV_CROSS_CONSTANT_ID_", id);
  2440. }
  2441. void CompilerGLSL::emit_specialization_constant_op(const SPIRConstantOp &constant)
  2442. {
  2443. auto &type = get<SPIRType>(constant.basetype);
  2444. add_resource_name(constant.self);
  2445. auto name = to_name(constant.self);
  2446. statement("const ", variable_decl(type, name), " = ", constant_op_expression(constant), ";");
  2447. }
  2448. int CompilerGLSL::get_constant_mapping_to_workgroup_component(const SPIRConstant &c) const
  2449. {
  2450. auto &entry_point = get_entry_point();
  2451. int index = -1;
  2452. // Need to redirect specialization constants which are used as WorkGroupSize to the builtin,
  2453. // since the spec constant declarations are never explicitly declared.
  2454. if (entry_point.workgroup_size.constant == 0 && entry_point.flags.get(ExecutionModeLocalSizeId))
  2455. {
  2456. if (c.self == entry_point.workgroup_size.id_x)
  2457. index = 0;
  2458. else if (c.self == entry_point.workgroup_size.id_y)
  2459. index = 1;
  2460. else if (c.self == entry_point.workgroup_size.id_z)
  2461. index = 2;
  2462. }
  2463. return index;
  2464. }
  2465. void CompilerGLSL::emit_constant(const SPIRConstant &constant)
  2466. {
  2467. auto &type = get<SPIRType>(constant.constant_type);
  2468. SpecializationConstant wg_x, wg_y, wg_z;
  2469. ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
  2470. // This specialization constant is implicitly declared by emitting layout() in;
  2471. if (constant.self == workgroup_size_id)
  2472. return;
  2473. // These specialization constants are implicitly declared by emitting layout() in;
  2474. // In legacy GLSL, we will still need to emit macros for these, so a layout() in; declaration
  2475. // later can use macro overrides for work group size.
  2476. bool is_workgroup_size_constant = ConstantID(constant.self) == wg_x.id || ConstantID(constant.self) == wg_y.id ||
  2477. ConstantID(constant.self) == wg_z.id;
  2478. if (options.vulkan_semantics && is_workgroup_size_constant)
  2479. {
  2480. // Vulkan GLSL does not need to declare workgroup spec constants explicitly, it is handled in layout().
  2481. return;
  2482. }
  2483. else if (!options.vulkan_semantics && is_workgroup_size_constant &&
  2484. !has_decoration(constant.self, DecorationSpecId))
  2485. {
  2486. // Only bother declaring a workgroup size if it is actually a specialization constant, because we need macros.
  2487. return;
  2488. }
  2489. add_resource_name(constant.self);
  2490. auto name = to_name(constant.self);
  2491. // Only scalars have constant IDs.
  2492. if (has_decoration(constant.self, DecorationSpecId))
  2493. {
  2494. if (options.vulkan_semantics)
  2495. {
  2496. statement("layout(constant_id = ", get_decoration(constant.self, DecorationSpecId), ") const ",
  2497. variable_decl(type, name), " = ", constant_expression(constant), ";");
  2498. }
  2499. else
  2500. {
  2501. const string &macro_name = constant.specialization_constant_macro_name;
  2502. statement("#ifndef ", macro_name);
  2503. statement("#define ", macro_name, " ", constant_expression(constant));
  2504. statement("#endif");
  2505. // For workgroup size constants, only emit the macros.
  2506. if (!is_workgroup_size_constant)
  2507. statement("const ", variable_decl(type, name), " = ", macro_name, ";");
  2508. }
  2509. }
  2510. else
  2511. {
  2512. statement("const ", variable_decl(type, name), " = ", constant_expression(constant), ";");
  2513. }
  2514. }
  2515. void CompilerGLSL::emit_entry_point_declarations()
  2516. {
  2517. }
  2518. void CompilerGLSL::replace_illegal_names(const unordered_set<string> &keywords)
  2519. {
  2520. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
  2521. if (is_hidden_variable(var))
  2522. return;
  2523. auto *meta = ir.find_meta(var.self);
  2524. if (!meta)
  2525. return;
  2526. auto &m = meta->decoration;
  2527. if (keywords.find(m.alias) != end(keywords))
  2528. m.alias = join("_", m.alias);
  2529. });
  2530. ir.for_each_typed_id<SPIRFunction>([&](uint32_t, const SPIRFunction &func) {
  2531. auto *meta = ir.find_meta(func.self);
  2532. if (!meta)
  2533. return;
  2534. auto &m = meta->decoration;
  2535. if (keywords.find(m.alias) != end(keywords))
  2536. m.alias = join("_", m.alias);
  2537. });
  2538. ir.for_each_typed_id<SPIRType>([&](uint32_t, const SPIRType &type) {
  2539. auto *meta = ir.find_meta(type.self);
  2540. if (!meta)
  2541. return;
  2542. auto &m = meta->decoration;
  2543. if (keywords.find(m.alias) != end(keywords))
  2544. m.alias = join("_", m.alias);
  2545. for (auto &memb : meta->members)
  2546. if (keywords.find(memb.alias) != end(keywords))
  2547. memb.alias = join("_", memb.alias);
  2548. });
  2549. }
  2550. void CompilerGLSL::replace_illegal_names()
  2551. {
  2552. // clang-format off
  2553. static const unordered_set<string> keywords = {
  2554. "abs", "acos", "acosh", "all", "any", "asin", "asinh", "atan", "atanh",
  2555. "atomicAdd", "atomicCompSwap", "atomicCounter", "atomicCounterDecrement", "atomicCounterIncrement",
  2556. "atomicExchange", "atomicMax", "atomicMin", "atomicOr", "atomicXor",
  2557. "bitCount", "bitfieldExtract", "bitfieldInsert", "bitfieldReverse",
  2558. "ceil", "cos", "cosh", "cross", "degrees",
  2559. "dFdx", "dFdxCoarse", "dFdxFine",
  2560. "dFdy", "dFdyCoarse", "dFdyFine",
  2561. "distance", "dot", "EmitStreamVertex", "EmitVertex", "EndPrimitive", "EndStreamPrimitive", "equal", "exp", "exp2",
  2562. "faceforward", "findLSB", "findMSB", "float16BitsToInt16", "float16BitsToUint16", "floatBitsToInt", "floatBitsToUint", "floor", "fma", "fract",
  2563. "frexp", "fwidth", "fwidthCoarse", "fwidthFine",
  2564. "greaterThan", "greaterThanEqual", "groupMemoryBarrier",
  2565. "imageAtomicAdd", "imageAtomicAnd", "imageAtomicCompSwap", "imageAtomicExchange", "imageAtomicMax", "imageAtomicMin", "imageAtomicOr", "imageAtomicXor",
  2566. "imageLoad", "imageSamples", "imageSize", "imageStore", "imulExtended", "int16BitsToFloat16", "intBitsToFloat", "interpolateAtOffset", "interpolateAtCentroid", "interpolateAtSample",
  2567. "inverse", "inversesqrt", "isinf", "isnan", "ldexp", "length", "lessThan", "lessThanEqual", "log", "log2",
  2568. "matrixCompMult", "max", "memoryBarrier", "memoryBarrierAtomicCounter", "memoryBarrierBuffer", "memoryBarrierImage", "memoryBarrierShared",
  2569. "min", "mix", "mod", "modf", "noise", "noise1", "noise2", "noise3", "noise4", "normalize", "not", "notEqual",
  2570. "outerProduct", "packDouble2x32", "packHalf2x16", "packInt2x16", "packInt4x16", "packSnorm2x16", "packSnorm4x8",
  2571. "packUint2x16", "packUint4x16", "packUnorm2x16", "packUnorm4x8", "pow",
  2572. "radians", "reflect", "refract", "round", "roundEven", "sign", "sin", "sinh", "smoothstep", "sqrt", "step",
  2573. "tan", "tanh", "texelFetch", "texelFetchOffset", "texture", "textureGather", "textureGatherOffset", "textureGatherOffsets",
  2574. "textureGrad", "textureGradOffset", "textureLod", "textureLodOffset", "textureOffset", "textureProj", "textureProjGrad",
  2575. "textureProjGradOffset", "textureProjLod", "textureProjLodOffset", "textureProjOffset", "textureQueryLevels", "textureQueryLod", "textureSamples", "textureSize",
  2576. "transpose", "trunc", "uaddCarry", "uint16BitsToFloat16", "uintBitsToFloat", "umulExtended", "unpackDouble2x32", "unpackHalf2x16", "unpackInt2x16", "unpackInt4x16",
  2577. "unpackSnorm2x16", "unpackSnorm4x8", "unpackUint2x16", "unpackUint4x16", "unpackUnorm2x16", "unpackUnorm4x8", "usubBorrow",
  2578. "active", "asm", "atomic_uint", "attribute", "bool", "break", "buffer",
  2579. "bvec2", "bvec3", "bvec4", "case", "cast", "centroid", "class", "coherent", "common", "const", "continue", "default", "discard",
  2580. "dmat2", "dmat2x2", "dmat2x3", "dmat2x4", "dmat3", "dmat3x2", "dmat3x3", "dmat3x4", "dmat4", "dmat4x2", "dmat4x3", "dmat4x4",
  2581. "do", "double", "dvec2", "dvec3", "dvec4", "else", "enum", "extern", "external", "false", "filter", "fixed", "flat", "float",
  2582. "for", "fvec2", "fvec3", "fvec4", "goto", "half", "highp", "hvec2", "hvec3", "hvec4", "if", "iimage1D", "iimage1DArray",
  2583. "iimage2D", "iimage2DArray", "iimage2DMS", "iimage2DMSArray", "iimage2DRect", "iimage3D", "iimageBuffer", "iimageCube",
  2584. "iimageCubeArray", "image1D", "image1DArray", "image2D", "image2DArray", "image2DMS", "image2DMSArray", "image2DRect",
  2585. "image3D", "imageBuffer", "imageCube", "imageCubeArray", "in", "inline", "inout", "input", "int", "interface", "invariant",
  2586. "isampler1D", "isampler1DArray", "isampler2D", "isampler2DArray", "isampler2DMS", "isampler2DMSArray", "isampler2DRect",
  2587. "isampler3D", "isamplerBuffer", "isamplerCube", "isamplerCubeArray", "ivec2", "ivec3", "ivec4", "layout", "long", "lowp",
  2588. "mat2", "mat2x2", "mat2x3", "mat2x4", "mat3", "mat3x2", "mat3x3", "mat3x4", "mat4", "mat4x2", "mat4x3", "mat4x4", "mediump",
  2589. "namespace", "noinline", "noperspective", "out", "output", "packed", "partition", "patch", "precise", "precision", "public", "readonly",
  2590. "resource", "restrict", "return", "sample", "sampler1D", "sampler1DArray", "sampler1DArrayShadow",
  2591. "sampler1DShadow", "sampler2D", "sampler2DArray", "sampler2DArrayShadow", "sampler2DMS", "sampler2DMSArray",
  2592. "sampler2DRect", "sampler2DRectShadow", "sampler2DShadow", "sampler3D", "sampler3DRect", "samplerBuffer",
  2593. "samplerCube", "samplerCubeArray", "samplerCubeArrayShadow", "samplerCubeShadow", "shared", "short", "sizeof", "smooth", "static",
  2594. "struct", "subroutine", "superp", "switch", "template", "this", "true", "typedef", "uimage1D", "uimage1DArray", "uimage2D",
  2595. "uimage2DArray", "uimage2DMS", "uimage2DMSArray", "uimage2DRect", "uimage3D", "uimageBuffer", "uimageCube",
  2596. "uimageCubeArray", "uint", "uniform", "union", "unsigned", "usampler1D", "usampler1DArray", "usampler2D", "usampler2DArray",
  2597. "usampler2DMS", "usampler2DMSArray", "usampler2DRect", "usampler3D", "usamplerBuffer", "usamplerCube",
  2598. "usamplerCubeArray", "using", "uvec2", "uvec3", "uvec4", "varying", "vec2", "vec3", "vec4", "void", "volatile",
  2599. "while", "writeonly",
  2600. };
  2601. // clang-format on
  2602. replace_illegal_names(keywords);
  2603. }
  2604. void CompilerGLSL::replace_fragment_output(SPIRVariable &var)
  2605. {
  2606. auto &m = ir.meta[var.self].decoration;
  2607. uint32_t location = 0;
  2608. if (m.decoration_flags.get(DecorationLocation))
  2609. location = m.location;
  2610. // If our variable is arrayed, we must not emit the array part of this as the SPIR-V will
  2611. // do the access chain part of this for us.
  2612. auto &type = get<SPIRType>(var.basetype);
  2613. if (type.array.empty())
  2614. {
  2615. // Redirect the write to a specific render target in legacy GLSL.
  2616. m.alias = join("gl_FragData[", location, "]");
  2617. if (is_legacy_es() && location != 0)
  2618. require_extension_internal("GL_EXT_draw_buffers");
  2619. }
  2620. else if (type.array.size() == 1)
  2621. {
  2622. // If location is non-zero, we probably have to add an offset.
  2623. // This gets really tricky since we'd have to inject an offset in the access chain.
  2624. // FIXME: This seems like an extremely odd-ball case, so it's probably fine to leave it like this for now.
  2625. m.alias = "gl_FragData";
  2626. if (location != 0)
  2627. SPIRV_CROSS_THROW("Arrayed output variable used, but location is not 0. "
  2628. "This is unimplemented in SPIRV-Cross.");
  2629. if (is_legacy_es())
  2630. require_extension_internal("GL_EXT_draw_buffers");
  2631. }
  2632. else
  2633. SPIRV_CROSS_THROW("Array-of-array output variable used. This cannot be implemented in legacy GLSL.");
  2634. var.compat_builtin = true; // We don't want to declare this variable, but use the name as-is.
  2635. }
  2636. void CompilerGLSL::replace_fragment_outputs()
  2637. {
  2638. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  2639. auto &type = this->get<SPIRType>(var.basetype);
  2640. if (!is_builtin_variable(var) && !var.remapped_variable && type.pointer && var.storage == StorageClassOutput)
  2641. replace_fragment_output(var);
  2642. });
  2643. }
  2644. string CompilerGLSL::remap_swizzle(const SPIRType &out_type, uint32_t input_components, const string &expr)
  2645. {
  2646. if (out_type.vecsize == input_components)
  2647. return expr;
  2648. else if (input_components == 1 && !backend.can_swizzle_scalar)
  2649. return join(type_to_glsl(out_type), "(", expr, ")");
  2650. else
  2651. {
  2652. // FIXME: This will not work with packed expressions.
  2653. auto e = enclose_expression(expr) + ".";
  2654. // Just clamp the swizzle index if we have more outputs than inputs.
  2655. for (uint32_t c = 0; c < out_type.vecsize; c++)
  2656. e += index_to_swizzle(min(c, input_components - 1));
  2657. if (backend.swizzle_is_function && out_type.vecsize > 1)
  2658. e += "()";
  2659. remove_duplicate_swizzle(e);
  2660. return e;
  2661. }
  2662. }
  2663. void CompilerGLSL::emit_pls()
  2664. {
  2665. auto &execution = get_entry_point();
  2666. if (execution.model != ExecutionModelFragment)
  2667. SPIRV_CROSS_THROW("Pixel local storage only supported in fragment shaders.");
  2668. if (!options.es)
  2669. SPIRV_CROSS_THROW("Pixel local storage only supported in OpenGL ES.");
  2670. if (options.version < 300)
  2671. SPIRV_CROSS_THROW("Pixel local storage only supported in ESSL 3.0 and above.");
  2672. if (!pls_inputs.empty())
  2673. {
  2674. statement("__pixel_local_inEXT _PLSIn");
  2675. begin_scope();
  2676. for (auto &input : pls_inputs)
  2677. statement(pls_decl(input), ";");
  2678. end_scope_decl();
  2679. statement("");
  2680. }
  2681. if (!pls_outputs.empty())
  2682. {
  2683. statement("__pixel_local_outEXT _PLSOut");
  2684. begin_scope();
  2685. for (auto &output : pls_outputs)
  2686. statement(pls_decl(output), ";");
  2687. end_scope_decl();
  2688. statement("");
  2689. }
  2690. }
  2691. void CompilerGLSL::fixup_image_load_store_access()
  2692. {
  2693. if (!options.enable_storage_image_qualifier_deduction)
  2694. return;
  2695. ir.for_each_typed_id<SPIRVariable>([&](uint32_t var, const SPIRVariable &) {
  2696. auto &vartype = expression_type(var);
  2697. if (vartype.basetype == SPIRType::Image && vartype.image.sampled == 2)
  2698. {
  2699. // Very old glslangValidator and HLSL compilers do not emit required qualifiers here.
  2700. // Solve this by making the image access as restricted as possible and loosen up if we need to.
  2701. // If any no-read/no-write flags are actually set, assume that the compiler knows what it's doing.
  2702. if (!has_decoration(var, DecorationNonWritable) && !has_decoration(var, DecorationNonReadable))
  2703. {
  2704. set_decoration(var, DecorationNonWritable);
  2705. set_decoration(var, DecorationNonReadable);
  2706. }
  2707. }
  2708. });
  2709. }
  2710. static bool is_block_builtin(BuiltIn builtin)
  2711. {
  2712. return builtin == BuiltInPosition || builtin == BuiltInPointSize || builtin == BuiltInClipDistance ||
  2713. builtin == BuiltInCullDistance;
  2714. }
  2715. bool CompilerGLSL::should_force_emit_builtin_block(StorageClass storage)
  2716. {
  2717. // If the builtin block uses XFB, we need to force explicit redeclaration of the builtin block.
  2718. if (storage != StorageClassOutput)
  2719. return false;
  2720. bool should_force = false;
  2721. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  2722. if (should_force)
  2723. return;
  2724. auto &type = this->get<SPIRType>(var.basetype);
  2725. bool block = has_decoration(type.self, DecorationBlock);
  2726. if (var.storage == storage && block && is_builtin_variable(var))
  2727. {
  2728. uint32_t member_count = uint32_t(type.member_types.size());
  2729. for (uint32_t i = 0; i < member_count; i++)
  2730. {
  2731. if (has_member_decoration(type.self, i, DecorationBuiltIn) &&
  2732. is_block_builtin(BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn))) &&
  2733. has_member_decoration(type.self, i, DecorationOffset))
  2734. {
  2735. should_force = true;
  2736. }
  2737. }
  2738. }
  2739. else if (var.storage == storage && !block && is_builtin_variable(var))
  2740. {
  2741. if (is_block_builtin(BuiltIn(get_decoration(type.self, DecorationBuiltIn))) &&
  2742. has_decoration(var.self, DecorationOffset))
  2743. {
  2744. should_force = true;
  2745. }
  2746. }
  2747. });
  2748. // If we're declaring clip/cull planes with control points we need to force block declaration.
  2749. if ((get_execution_model() == ExecutionModelTessellationControl ||
  2750. get_execution_model() == ExecutionModelMeshEXT) &&
  2751. (clip_distance_count || cull_distance_count))
  2752. {
  2753. should_force = true;
  2754. }
  2755. // Either glslang bug or oversight, but global invariant position does not work in mesh shaders.
  2756. if (get_execution_model() == ExecutionModelMeshEXT && position_invariant)
  2757. should_force = true;
  2758. return should_force;
  2759. }
  2760. void CompilerGLSL::fixup_implicit_builtin_block_names(ExecutionModel model)
  2761. {
  2762. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  2763. auto &type = this->get<SPIRType>(var.basetype);
  2764. bool block = has_decoration(type.self, DecorationBlock);
  2765. if ((var.storage == StorageClassOutput || var.storage == StorageClassInput) && block &&
  2766. is_builtin_variable(var))
  2767. {
  2768. if (model != ExecutionModelMeshEXT)
  2769. {
  2770. // Make sure the array has a supported name in the code.
  2771. if (var.storage == StorageClassOutput)
  2772. set_name(var.self, "gl_out");
  2773. else if (var.storage == StorageClassInput)
  2774. set_name(var.self, "gl_in");
  2775. }
  2776. else
  2777. {
  2778. auto flags = get_buffer_block_flags(var.self);
  2779. if (flags.get(DecorationPerPrimitiveEXT))
  2780. {
  2781. set_name(var.self, "gl_MeshPrimitivesEXT");
  2782. set_name(type.self, "gl_MeshPerPrimitiveEXT");
  2783. }
  2784. else
  2785. {
  2786. set_name(var.self, "gl_MeshVerticesEXT");
  2787. set_name(type.self, "gl_MeshPerVertexEXT");
  2788. }
  2789. }
  2790. }
  2791. if (model == ExecutionModelMeshEXT && var.storage == StorageClassOutput && !block)
  2792. {
  2793. auto *m = ir.find_meta(var.self);
  2794. if (m && m->decoration.builtin)
  2795. {
  2796. auto builtin_type = m->decoration.builtin_type;
  2797. if (builtin_type == BuiltInPrimitivePointIndicesEXT)
  2798. set_name(var.self, "gl_PrimitivePointIndicesEXT");
  2799. else if (builtin_type == BuiltInPrimitiveLineIndicesEXT)
  2800. set_name(var.self, "gl_PrimitiveLineIndicesEXT");
  2801. else if (builtin_type == BuiltInPrimitiveTriangleIndicesEXT)
  2802. set_name(var.self, "gl_PrimitiveTriangleIndicesEXT");
  2803. }
  2804. }
  2805. });
  2806. }
  2807. void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionModel model)
  2808. {
  2809. Bitset emitted_builtins;
  2810. Bitset global_builtins;
  2811. const SPIRVariable *block_var = nullptr;
  2812. bool emitted_block = false;
  2813. // Need to use declared size in the type.
  2814. // These variables might have been declared, but not statically used, so we haven't deduced their size yet.
  2815. uint32_t cull_distance_size = 0;
  2816. uint32_t clip_distance_size = 0;
  2817. bool have_xfb_buffer_stride = false;
  2818. bool have_geom_stream = false;
  2819. bool have_any_xfb_offset = false;
  2820. uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0;
  2821. std::unordered_map<uint32_t, uint32_t> builtin_xfb_offsets;
  2822. const auto builtin_is_per_vertex_set = [](BuiltIn builtin) -> bool {
  2823. return builtin == BuiltInPosition || builtin == BuiltInPointSize ||
  2824. builtin == BuiltInClipDistance || builtin == BuiltInCullDistance;
  2825. };
  2826. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  2827. auto &type = this->get<SPIRType>(var.basetype);
  2828. bool block = has_decoration(type.self, DecorationBlock);
  2829. Bitset builtins;
  2830. if (var.storage == storage && block && is_builtin_variable(var))
  2831. {
  2832. uint32_t index = 0;
  2833. for (auto &m : ir.meta[type.self].members)
  2834. {
  2835. if (m.builtin && builtin_is_per_vertex_set(m.builtin_type))
  2836. {
  2837. builtins.set(m.builtin_type);
  2838. if (m.builtin_type == BuiltInCullDistance)
  2839. cull_distance_size = to_array_size_literal(this->get<SPIRType>(type.member_types[index]));
  2840. else if (m.builtin_type == BuiltInClipDistance)
  2841. clip_distance_size = to_array_size_literal(this->get<SPIRType>(type.member_types[index]));
  2842. if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationOffset))
  2843. {
  2844. have_any_xfb_offset = true;
  2845. builtin_xfb_offsets[m.builtin_type] = m.offset;
  2846. }
  2847. if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationStream))
  2848. {
  2849. uint32_t stream = m.stream;
  2850. if (have_geom_stream && geom_stream != stream)
  2851. SPIRV_CROSS_THROW("IO block member Stream mismatch.");
  2852. have_geom_stream = true;
  2853. geom_stream = stream;
  2854. }
  2855. }
  2856. index++;
  2857. }
  2858. if (storage == StorageClassOutput && has_decoration(var.self, DecorationXfbBuffer) &&
  2859. has_decoration(var.self, DecorationXfbStride))
  2860. {
  2861. uint32_t buffer_index = get_decoration(var.self, DecorationXfbBuffer);
  2862. uint32_t stride = get_decoration(var.self, DecorationXfbStride);
  2863. if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
  2864. SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
  2865. if (have_xfb_buffer_stride && stride != xfb_stride)
  2866. SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
  2867. have_xfb_buffer_stride = true;
  2868. xfb_buffer = buffer_index;
  2869. xfb_stride = stride;
  2870. }
  2871. if (storage == StorageClassOutput && has_decoration(var.self, DecorationStream))
  2872. {
  2873. uint32_t stream = get_decoration(var.self, DecorationStream);
  2874. if (have_geom_stream && geom_stream != stream)
  2875. SPIRV_CROSS_THROW("IO block member Stream mismatch.");
  2876. have_geom_stream = true;
  2877. geom_stream = stream;
  2878. }
  2879. }
  2880. else if (var.storage == storage && !block && is_builtin_variable(var))
  2881. {
  2882. // While we're at it, collect all declared global builtins (HLSL mostly ...).
  2883. auto &m = ir.meta[var.self].decoration;
  2884. if (m.builtin && builtin_is_per_vertex_set(m.builtin_type))
  2885. {
  2886. // For mesh/tesc output, Clip/Cull is an array-of-array. Look at innermost array type
  2887. // for correct result.
  2888. global_builtins.set(m.builtin_type);
  2889. if (m.builtin_type == BuiltInCullDistance)
  2890. cull_distance_size = to_array_size_literal(type, 0);
  2891. else if (m.builtin_type == BuiltInClipDistance)
  2892. clip_distance_size = to_array_size_literal(type, 0);
  2893. if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationXfbStride) &&
  2894. m.decoration_flags.get(DecorationXfbBuffer) && m.decoration_flags.get(DecorationOffset))
  2895. {
  2896. have_any_xfb_offset = true;
  2897. builtin_xfb_offsets[m.builtin_type] = m.offset;
  2898. uint32_t buffer_index = m.xfb_buffer;
  2899. uint32_t stride = m.xfb_stride;
  2900. if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
  2901. SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
  2902. if (have_xfb_buffer_stride && stride != xfb_stride)
  2903. SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
  2904. have_xfb_buffer_stride = true;
  2905. xfb_buffer = buffer_index;
  2906. xfb_stride = stride;
  2907. }
  2908. if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationStream))
  2909. {
  2910. uint32_t stream = get_decoration(var.self, DecorationStream);
  2911. if (have_geom_stream && geom_stream != stream)
  2912. SPIRV_CROSS_THROW("IO block member Stream mismatch.");
  2913. have_geom_stream = true;
  2914. geom_stream = stream;
  2915. }
  2916. }
  2917. }
  2918. if (builtins.empty())
  2919. return;
  2920. if (emitted_block)
  2921. SPIRV_CROSS_THROW("Cannot use more than one builtin I/O block.");
  2922. emitted_builtins = builtins;
  2923. emitted_block = true;
  2924. block_var = &var;
  2925. });
  2926. global_builtins =
  2927. Bitset(global_builtins.get_lower() & ((1ull << BuiltInPosition) | (1ull << BuiltInPointSize) |
  2928. (1ull << BuiltInClipDistance) | (1ull << BuiltInCullDistance)));
  2929. // Try to collect all other declared builtins.
  2930. if (!emitted_block)
  2931. emitted_builtins = global_builtins;
  2932. // Can't declare an empty interface block.
  2933. if (emitted_builtins.empty())
  2934. return;
  2935. if (storage == StorageClassOutput)
  2936. {
  2937. SmallVector<string> attr;
  2938. if (have_xfb_buffer_stride && have_any_xfb_offset)
  2939. {
  2940. if (!options.es)
  2941. {
  2942. if (options.version < 440 && options.version >= 140)
  2943. require_extension_internal("GL_ARB_enhanced_layouts");
  2944. else if (options.version < 140)
  2945. SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40.");
  2946. if (!options.es && options.version < 440)
  2947. require_extension_internal("GL_ARB_enhanced_layouts");
  2948. }
  2949. else if (options.es)
  2950. SPIRV_CROSS_THROW("Need GL_ARB_enhanced_layouts for xfb_stride or xfb_buffer.");
  2951. attr.push_back(join("xfb_buffer = ", xfb_buffer, ", xfb_stride = ", xfb_stride));
  2952. }
  2953. if (have_geom_stream)
  2954. {
  2955. if (get_execution_model() != ExecutionModelGeometry)
  2956. SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
  2957. if (options.es)
  2958. SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
  2959. if (options.version < 400)
  2960. require_extension_internal("GL_ARB_transform_feedback3");
  2961. attr.push_back(join("stream = ", geom_stream));
  2962. }
  2963. if (model == ExecutionModelMeshEXT)
  2964. statement("out gl_MeshPerVertexEXT");
  2965. else if (!attr.empty())
  2966. statement("layout(", merge(attr), ") out gl_PerVertex");
  2967. else
  2968. statement("out gl_PerVertex");
  2969. }
  2970. else
  2971. {
  2972. // If we have passthrough, there is no way PerVertex cannot be passthrough.
  2973. if (get_entry_point().geometry_passthrough)
  2974. statement("layout(passthrough) in gl_PerVertex");
  2975. else
  2976. statement("in gl_PerVertex");
  2977. }
  2978. begin_scope();
  2979. if (emitted_builtins.get(BuiltInPosition))
  2980. {
  2981. auto itr = builtin_xfb_offsets.find(BuiltInPosition);
  2982. if (itr != end(builtin_xfb_offsets))
  2983. statement("layout(xfb_offset = ", itr->second, ") vec4 gl_Position;");
  2984. else if (position_invariant)
  2985. statement("invariant vec4 gl_Position;");
  2986. else
  2987. statement("vec4 gl_Position;");
  2988. }
  2989. if (emitted_builtins.get(BuiltInPointSize))
  2990. {
  2991. auto itr = builtin_xfb_offsets.find(BuiltInPointSize);
  2992. if (itr != end(builtin_xfb_offsets))
  2993. statement("layout(xfb_offset = ", itr->second, ") float gl_PointSize;");
  2994. else
  2995. statement("float gl_PointSize;");
  2996. }
  2997. if (emitted_builtins.get(BuiltInClipDistance))
  2998. {
  2999. auto itr = builtin_xfb_offsets.find(BuiltInClipDistance);
  3000. if (itr != end(builtin_xfb_offsets))
  3001. statement("layout(xfb_offset = ", itr->second, ") float gl_ClipDistance[", clip_distance_size, "];");
  3002. else
  3003. statement("float gl_ClipDistance[", clip_distance_size, "];");
  3004. }
  3005. if (emitted_builtins.get(BuiltInCullDistance))
  3006. {
  3007. auto itr = builtin_xfb_offsets.find(BuiltInCullDistance);
  3008. if (itr != end(builtin_xfb_offsets))
  3009. statement("layout(xfb_offset = ", itr->second, ") float gl_CullDistance[", cull_distance_size, "];");
  3010. else
  3011. statement("float gl_CullDistance[", cull_distance_size, "];");
  3012. }
  3013. bool builtin_array = model == ExecutionModelTessellationControl ||
  3014. (model == ExecutionModelMeshEXT && storage == StorageClassOutput) ||
  3015. (model == ExecutionModelGeometry && storage == StorageClassInput) ||
  3016. (model == ExecutionModelTessellationEvaluation && storage == StorageClassInput);
  3017. if (builtin_array)
  3018. {
  3019. const char *instance_name;
  3020. if (model == ExecutionModelMeshEXT)
  3021. instance_name = "gl_MeshVerticesEXT"; // Per primitive is never synthesized.
  3022. else
  3023. instance_name = storage == StorageClassInput ? "gl_in" : "gl_out";
  3024. if (model == ExecutionModelTessellationControl && storage == StorageClassOutput)
  3025. end_scope_decl(join(instance_name, "[", get_entry_point().output_vertices, "]"));
  3026. else
  3027. end_scope_decl(join(instance_name, "[]"));
  3028. }
  3029. else
  3030. end_scope_decl();
  3031. statement("");
  3032. }
  3033. bool CompilerGLSL::variable_is_lut(const SPIRVariable &var) const
  3034. {
  3035. bool statically_assigned = var.statically_assigned && var.static_expression != ID(0) && var.remapped_variable;
  3036. if (statically_assigned)
  3037. {
  3038. auto *constant = maybe_get<SPIRConstant>(var.static_expression);
  3039. if (constant && constant->is_used_as_lut)
  3040. return true;
  3041. }
  3042. return false;
  3043. }
  3044. void CompilerGLSL::emit_resources()
  3045. {
  3046. auto &execution = get_entry_point();
  3047. replace_illegal_names();
  3048. // Legacy GL uses gl_FragData[], redeclare all fragment outputs
  3049. // with builtins.
  3050. if (execution.model == ExecutionModelFragment && is_legacy())
  3051. replace_fragment_outputs();
  3052. // Emit PLS blocks if we have such variables.
  3053. if (!pls_inputs.empty() || !pls_outputs.empty())
  3054. emit_pls();
  3055. switch (execution.model)
  3056. {
  3057. case ExecutionModelGeometry:
  3058. case ExecutionModelTessellationControl:
  3059. case ExecutionModelTessellationEvaluation:
  3060. case ExecutionModelMeshEXT:
  3061. fixup_implicit_builtin_block_names(execution.model);
  3062. break;
  3063. default:
  3064. break;
  3065. }
  3066. bool global_invariant_position = position_invariant && (options.es || options.version >= 120);
  3067. // Emit custom gl_PerVertex for SSO compatibility.
  3068. if (options.separate_shader_objects && !options.es && execution.model != ExecutionModelFragment)
  3069. {
  3070. switch (execution.model)
  3071. {
  3072. case ExecutionModelGeometry:
  3073. case ExecutionModelTessellationControl:
  3074. case ExecutionModelTessellationEvaluation:
  3075. emit_declared_builtin_block(StorageClassInput, execution.model);
  3076. emit_declared_builtin_block(StorageClassOutput, execution.model);
  3077. global_invariant_position = false;
  3078. break;
  3079. case ExecutionModelVertex:
  3080. case ExecutionModelMeshEXT:
  3081. emit_declared_builtin_block(StorageClassOutput, execution.model);
  3082. global_invariant_position = false;
  3083. break;
  3084. default:
  3085. break;
  3086. }
  3087. }
  3088. else if (should_force_emit_builtin_block(StorageClassOutput))
  3089. {
  3090. emit_declared_builtin_block(StorageClassOutput, execution.model);
  3091. global_invariant_position = false;
  3092. }
  3093. else if (execution.geometry_passthrough)
  3094. {
  3095. // Need to declare gl_in with Passthrough.
  3096. // If we're doing passthrough, we cannot emit an output block, so the output block test above will never pass.
  3097. emit_declared_builtin_block(StorageClassInput, execution.model);
  3098. }
  3099. else
  3100. {
  3101. // Need to redeclare clip/cull distance with explicit size to use them.
  3102. // SPIR-V mandates these builtins have a size declared.
  3103. const char *storage = execution.model == ExecutionModelFragment ? "in" : "out";
  3104. if (clip_distance_count != 0)
  3105. statement(storage, " float gl_ClipDistance[", clip_distance_count, "];");
  3106. if (cull_distance_count != 0)
  3107. statement(storage, " float gl_CullDistance[", cull_distance_count, "];");
  3108. if (clip_distance_count != 0 || cull_distance_count != 0)
  3109. statement("");
  3110. }
  3111. if (global_invariant_position)
  3112. {
  3113. statement("invariant gl_Position;");
  3114. statement("");
  3115. }
  3116. bool emitted = false;
  3117. // If emitted Vulkan GLSL,
  3118. // emit specialization constants as actual floats,
  3119. // spec op expressions will redirect to the constant name.
  3120. //
  3121. {
  3122. auto loop_lock = ir.create_loop_hard_lock();
  3123. for (auto &id_ : ir.ids_for_constant_undef_or_type)
  3124. {
  3125. auto &id = ir.ids[id_];
  3126. if (id.get_type() == TypeConstant)
  3127. {
  3128. auto &c = id.get<SPIRConstant>();
  3129. bool needs_declaration = c.specialization || c.is_used_as_lut;
  3130. if (needs_declaration)
  3131. {
  3132. if (!options.vulkan_semantics && c.specialization)
  3133. {
  3134. c.specialization_constant_macro_name =
  3135. constant_value_macro_name(get_decoration(c.self, DecorationSpecId));
  3136. }
  3137. emit_constant(c);
  3138. emitted = true;
  3139. }
  3140. }
  3141. else if (id.get_type() == TypeConstantOp)
  3142. {
  3143. emit_specialization_constant_op(id.get<SPIRConstantOp>());
  3144. emitted = true;
  3145. }
  3146. else if (id.get_type() == TypeType)
  3147. {
  3148. auto *type = &id.get<SPIRType>();
  3149. bool is_natural_struct = type->basetype == SPIRType::Struct && type->array.empty() && !type->pointer &&
  3150. (!has_decoration(type->self, DecorationBlock) &&
  3151. !has_decoration(type->self, DecorationBufferBlock));
  3152. // Special case, ray payload and hit attribute blocks are not really blocks, just regular structs.
  3153. if (type->basetype == SPIRType::Struct && type->pointer &&
  3154. has_decoration(type->self, DecorationBlock) &&
  3155. (type->storage == StorageClassRayPayloadKHR || type->storage == StorageClassIncomingRayPayloadKHR ||
  3156. type->storage == StorageClassHitAttributeKHR))
  3157. {
  3158. type = &get<SPIRType>(type->parent_type);
  3159. is_natural_struct = true;
  3160. }
  3161. if (is_natural_struct)
  3162. {
  3163. if (emitted)
  3164. statement("");
  3165. emitted = false;
  3166. emit_struct(*type);
  3167. }
  3168. }
  3169. else if (id.get_type() == TypeUndef)
  3170. {
  3171. auto &undef = id.get<SPIRUndef>();
  3172. auto &type = this->get<SPIRType>(undef.basetype);
  3173. // OpUndef can be void for some reason ...
  3174. if (type.basetype == SPIRType::Void)
  3175. return;
  3176. string initializer;
  3177. if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
  3178. initializer = join(" = ", to_zero_initialized_expression(undef.basetype));
  3179. // FIXME: If used in a constant, we must declare it as one.
  3180. statement(variable_decl(type, to_name(undef.self), undef.self), initializer, ";");
  3181. emitted = true;
  3182. }
  3183. }
  3184. }
  3185. if (emitted)
  3186. statement("");
  3187. // If we needed to declare work group size late, check here.
  3188. // If the work group size depends on a specialization constant, we need to declare the layout() block
  3189. // after constants (and their macros) have been declared.
  3190. if (execution.model == ExecutionModelGLCompute && !options.vulkan_semantics &&
  3191. (execution.workgroup_size.constant != 0 || execution.flags.get(ExecutionModeLocalSizeId)))
  3192. {
  3193. SpecializationConstant wg_x, wg_y, wg_z;
  3194. get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
  3195. if ((wg_x.id != ConstantID(0)) || (wg_y.id != ConstantID(0)) || (wg_z.id != ConstantID(0)))
  3196. {
  3197. SmallVector<string> inputs;
  3198. build_workgroup_size(inputs, wg_x, wg_y, wg_z);
  3199. statement("layout(", merge(inputs), ") in;");
  3200. statement("");
  3201. }
  3202. }
  3203. emitted = false;
  3204. if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
  3205. {
  3206. for (auto type : physical_storage_non_block_pointer_types)
  3207. {
  3208. emit_buffer_reference_block(type, false);
  3209. }
  3210. // Output buffer reference blocks.
  3211. // Do this in two stages, one with forward declaration,
  3212. // and one without. Buffer reference blocks can reference themselves
  3213. // to support things like linked lists.
  3214. ir.for_each_typed_id<SPIRType>([&](uint32_t self, SPIRType &type) {
  3215. if (type.basetype == SPIRType::Struct && type.pointer &&
  3216. type.pointer_depth == 1 && !type_is_array_of_pointers(type) &&
  3217. type.storage == StorageClassPhysicalStorageBufferEXT)
  3218. {
  3219. emit_buffer_reference_block(self, true);
  3220. }
  3221. });
  3222. ir.for_each_typed_id<SPIRType>([&](uint32_t self, SPIRType &type) {
  3223. if (type.basetype == SPIRType::Struct &&
  3224. type.pointer && type.pointer_depth == 1 && !type_is_array_of_pointers(type) &&
  3225. type.storage == StorageClassPhysicalStorageBufferEXT)
  3226. {
  3227. emit_buffer_reference_block(self, false);
  3228. }
  3229. });
  3230. }
  3231. // Output UBOs and SSBOs
  3232. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  3233. auto &type = this->get<SPIRType>(var.basetype);
  3234. bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform ||
  3235. type.storage == StorageClassShaderRecordBufferKHR;
  3236. bool has_block_flags = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
  3237. ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
  3238. if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) &&
  3239. has_block_flags)
  3240. {
  3241. emit_buffer_block(var);
  3242. }
  3243. });
  3244. // Output push constant blocks
  3245. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  3246. auto &type = this->get<SPIRType>(var.basetype);
  3247. if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassPushConstant &&
  3248. !is_hidden_variable(var))
  3249. {
  3250. emit_push_constant_block(var);
  3251. }
  3252. });
  3253. bool skip_separate_image_sampler = !combined_image_samplers.empty() || !options.vulkan_semantics;
  3254. // Output Uniform Constants (values, samplers, images, etc).
  3255. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  3256. auto &type = this->get<SPIRType>(var.basetype);
  3257. // If we're remapping separate samplers and images, only emit the combined samplers.
  3258. if (skip_separate_image_sampler)
  3259. {
  3260. // Sampler buffers are always used without a sampler, and they will also work in regular GL.
  3261. bool sampler_buffer = type.basetype == SPIRType::Image && type.image.dim == DimBuffer;
  3262. bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1;
  3263. bool separate_sampler = type.basetype == SPIRType::Sampler;
  3264. if (!sampler_buffer && (separate_image || separate_sampler))
  3265. return;
  3266. }
  3267. if (var.storage != StorageClassFunction && type.pointer &&
  3268. (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter ||
  3269. type.storage == StorageClassRayPayloadKHR || type.storage == StorageClassIncomingRayPayloadKHR ||
  3270. type.storage == StorageClassCallableDataKHR || type.storage == StorageClassIncomingCallableDataKHR ||
  3271. type.storage == StorageClassHitAttributeKHR) &&
  3272. !is_hidden_variable(var))
  3273. {
  3274. emit_uniform(var);
  3275. emitted = true;
  3276. }
  3277. });
  3278. if (emitted)
  3279. statement("");
  3280. emitted = false;
  3281. bool emitted_base_instance = false;
  3282. // Output in/out interfaces.
  3283. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
  3284. auto &type = this->get<SPIRType>(var.basetype);
  3285. bool is_hidden = is_hidden_variable(var);
  3286. // Unused output I/O variables might still be required to implement framebuffer fetch.
  3287. if (var.storage == StorageClassOutput && !is_legacy() &&
  3288. location_is_framebuffer_fetch(get_decoration(var.self, DecorationLocation)) != 0)
  3289. {
  3290. is_hidden = false;
  3291. }
  3292. if (var.storage != StorageClassFunction && type.pointer &&
  3293. (var.storage == StorageClassInput || var.storage == StorageClassOutput) &&
  3294. interface_variable_exists_in_entry_point(var.self) && !is_hidden)
  3295. {
  3296. if (options.es && get_execution_model() == ExecutionModelVertex && var.storage == StorageClassInput &&
  3297. type.array.size() == 1)
  3298. {
  3299. SPIRV_CROSS_THROW("OpenGL ES doesn't support array input variables in vertex shader.");
  3300. }
  3301. emit_interface_block(var);
  3302. emitted = true;
  3303. }
  3304. else if (is_builtin_variable(var))
  3305. {
  3306. auto builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
  3307. // For gl_InstanceIndex emulation on GLES, the API user needs to
  3308. // supply this uniform.
  3309. // The draw parameter extension is soft-enabled on GL with some fallbacks.
  3310. if (!options.vulkan_semantics)
  3311. {
  3312. if (!emitted_base_instance &&
  3313. ((options.vertex.support_nonzero_base_instance && builtin == BuiltInInstanceIndex) ||
  3314. (builtin == BuiltInBaseInstance)))
  3315. {
  3316. statement("#ifdef GL_ARB_shader_draw_parameters");
  3317. statement("#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB");
  3318. statement("#else");
  3319. // A crude, but simple workaround which should be good enough for non-indirect draws.
  3320. statement("uniform int SPIRV_Cross_BaseInstance;");
  3321. statement("#endif");
  3322. emitted = true;
  3323. emitted_base_instance = true;
  3324. }
  3325. else if (builtin == BuiltInBaseVertex)
  3326. {
  3327. statement("#ifdef GL_ARB_shader_draw_parameters");
  3328. statement("#define SPIRV_Cross_BaseVertex gl_BaseVertexARB");
  3329. statement("#else");
  3330. // A crude, but simple workaround which should be good enough for non-indirect draws.
  3331. statement("uniform int SPIRV_Cross_BaseVertex;");
  3332. statement("#endif");
  3333. }
  3334. else if (builtin == BuiltInDrawIndex)
  3335. {
  3336. statement("#ifndef GL_ARB_shader_draw_parameters");
  3337. // Cannot really be worked around.
  3338. statement("#error GL_ARB_shader_draw_parameters is not supported.");
  3339. statement("#endif");
  3340. }
  3341. }
  3342. }
  3343. });
  3344. // Global variables.
  3345. for (auto global : global_variables)
  3346. {
  3347. auto &var = get<SPIRVariable>(global);
  3348. if (is_hidden_variable(var, true))
  3349. continue;
  3350. if (var.storage != StorageClassOutput)
  3351. {
  3352. if (!variable_is_lut(var))
  3353. {
  3354. add_resource_name(var.self);
  3355. string initializer;
  3356. if (options.force_zero_initialized_variables && var.storage == StorageClassPrivate &&
  3357. !var.initializer && !var.static_expression && type_can_zero_initialize(get_variable_data_type(var)))
  3358. {
  3359. initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(var)));
  3360. }
  3361. statement(variable_decl(var), initializer, ";");
  3362. emitted = true;
  3363. }
  3364. }
  3365. else if (var.initializer && maybe_get<SPIRConstant>(var.initializer) != nullptr)
  3366. {
  3367. emit_output_variable_initializer(var);
  3368. }
  3369. }
  3370. if (emitted)
  3371. statement("");
  3372. }
  3373. void CompilerGLSL::emit_output_variable_initializer(const SPIRVariable &var)
  3374. {
  3375. // If a StorageClassOutput variable has an initializer, we need to initialize it in main().
  3376. auto &entry_func = this->get<SPIRFunction>(ir.default_entry_point);
  3377. auto &type = get<SPIRType>(var.basetype);
  3378. bool is_patch = has_decoration(var.self, DecorationPatch);
  3379. bool is_block = has_decoration(type.self, DecorationBlock);
  3380. bool is_control_point = get_execution_model() == ExecutionModelTessellationControl && !is_patch;
  3381. if (is_block)
  3382. {
  3383. uint32_t member_count = uint32_t(type.member_types.size());
  3384. bool type_is_array = type.array.size() == 1;
  3385. uint32_t array_size = 1;
  3386. if (type_is_array)
  3387. array_size = to_array_size_literal(type);
  3388. uint32_t iteration_count = is_control_point ? 1 : array_size;
  3389. // If the initializer is a block, we must initialize each block member one at a time.
  3390. for (uint32_t i = 0; i < member_count; i++)
  3391. {
  3392. // These outputs might not have been properly declared, so don't initialize them in that case.
  3393. if (has_member_decoration(type.self, i, DecorationBuiltIn))
  3394. {
  3395. if (get_member_decoration(type.self, i, DecorationBuiltIn) == BuiltInCullDistance &&
  3396. !cull_distance_count)
  3397. continue;
  3398. if (get_member_decoration(type.self, i, DecorationBuiltIn) == BuiltInClipDistance &&
  3399. !clip_distance_count)
  3400. continue;
  3401. }
  3402. // We need to build a per-member array first, essentially transposing from AoS to SoA.
  3403. // This code path hits when we have an array of blocks.
  3404. string lut_name;
  3405. if (type_is_array)
  3406. {
  3407. lut_name = join("_", var.self, "_", i, "_init");
  3408. uint32_t member_type_id = get<SPIRType>(var.basetype).member_types[i];
  3409. auto &member_type = get<SPIRType>(member_type_id);
  3410. auto array_type = member_type;
  3411. array_type.parent_type = member_type_id;
  3412. array_type.array.push_back(array_size);
  3413. array_type.array_size_literal.push_back(true);
  3414. SmallVector<string> exprs;
  3415. exprs.reserve(array_size);
  3416. auto &c = get<SPIRConstant>(var.initializer);
  3417. for (uint32_t j = 0; j < array_size; j++)
  3418. exprs.push_back(to_expression(get<SPIRConstant>(c.subconstants[j]).subconstants[i]));
  3419. statement("const ", type_to_glsl(array_type), " ", lut_name, type_to_array_glsl(array_type), " = ",
  3420. type_to_glsl_constructor(array_type), "(", merge(exprs, ", "), ");");
  3421. }
  3422. for (uint32_t j = 0; j < iteration_count; j++)
  3423. {
  3424. entry_func.fixup_hooks_in.push_back([=, &var]() {
  3425. AccessChainMeta meta;
  3426. auto &c = this->get<SPIRConstant>(var.initializer);
  3427. uint32_t invocation_id = 0;
  3428. uint32_t member_index_id = 0;
  3429. if (is_control_point)
  3430. {
  3431. uint32_t ids = ir.increase_bound_by(3);
  3432. SPIRType uint_type;
  3433. uint_type.basetype = SPIRType::UInt;
  3434. uint_type.width = 32;
  3435. set<SPIRType>(ids, uint_type);
  3436. set<SPIRExpression>(ids + 1, builtin_to_glsl(BuiltInInvocationId, StorageClassInput), ids, true);
  3437. set<SPIRConstant>(ids + 2, ids, i, false);
  3438. invocation_id = ids + 1;
  3439. member_index_id = ids + 2;
  3440. }
  3441. if (is_patch)
  3442. {
  3443. statement("if (gl_InvocationID == 0)");
  3444. begin_scope();
  3445. }
  3446. if (type_is_array && !is_control_point)
  3447. {
  3448. uint32_t indices[2] = { j, i };
  3449. auto chain = access_chain_internal(var.self, indices, 2, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta);
  3450. statement(chain, " = ", lut_name, "[", j, "];");
  3451. }
  3452. else if (is_control_point)
  3453. {
  3454. uint32_t indices[2] = { invocation_id, member_index_id };
  3455. auto chain = access_chain_internal(var.self, indices, 2, 0, &meta);
  3456. statement(chain, " = ", lut_name, "[", builtin_to_glsl(BuiltInInvocationId, StorageClassInput), "];");
  3457. }
  3458. else
  3459. {
  3460. auto chain =
  3461. access_chain_internal(var.self, &i, 1, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta);
  3462. statement(chain, " = ", to_expression(c.subconstants[i]), ";");
  3463. }
  3464. if (is_patch)
  3465. end_scope();
  3466. });
  3467. }
  3468. }
  3469. }
  3470. else if (is_control_point)
  3471. {
  3472. auto lut_name = join("_", var.self, "_init");
  3473. statement("const ", type_to_glsl(type), " ", lut_name, type_to_array_glsl(type),
  3474. " = ", to_expression(var.initializer), ";");
  3475. entry_func.fixup_hooks_in.push_back([&, lut_name]() {
  3476. statement(to_expression(var.self), "[gl_InvocationID] = ", lut_name, "[gl_InvocationID];");
  3477. });
  3478. }
  3479. else if (has_decoration(var.self, DecorationBuiltIn) &&
  3480. BuiltIn(get_decoration(var.self, DecorationBuiltIn)) == BuiltInSampleMask)
  3481. {
  3482. // We cannot copy the array since gl_SampleMask is unsized in GLSL. Unroll time! <_<
  3483. entry_func.fixup_hooks_in.push_back([&] {
  3484. auto &c = this->get<SPIRConstant>(var.initializer);
  3485. uint32_t num_constants = uint32_t(c.subconstants.size());
  3486. for (uint32_t i = 0; i < num_constants; i++)
  3487. {
  3488. // Don't use to_expression on constant since it might be uint, just fish out the raw int.
  3489. statement(to_expression(var.self), "[", i, "] = ",
  3490. convert_to_string(this->get<SPIRConstant>(c.subconstants[i]).scalar_i32()), ";");
  3491. }
  3492. });
  3493. }
  3494. else
  3495. {
  3496. auto lut_name = join("_", var.self, "_init");
  3497. statement("const ", type_to_glsl(type), " ", lut_name,
  3498. type_to_array_glsl(type), " = ", to_expression(var.initializer), ";");
  3499. entry_func.fixup_hooks_in.push_back([&, lut_name, is_patch]() {
  3500. if (is_patch)
  3501. {
  3502. statement("if (gl_InvocationID == 0)");
  3503. begin_scope();
  3504. }
  3505. statement(to_expression(var.self), " = ", lut_name, ";");
  3506. if (is_patch)
  3507. end_scope();
  3508. });
  3509. }
  3510. }
  3511. void CompilerGLSL::emit_subgroup_arithmetic_workaround(const std::string &func, Op op, GroupOperation group_op)
  3512. {
  3513. std::string result;
  3514. switch (group_op)
  3515. {
  3516. case GroupOperationReduce:
  3517. result = "reduction";
  3518. break;
  3519. case GroupOperationExclusiveScan:
  3520. result = "excl_scan";
  3521. break;
  3522. case GroupOperationInclusiveScan:
  3523. result = "incl_scan";
  3524. break;
  3525. default:
  3526. SPIRV_CROSS_THROW("Unsupported workaround for arithmetic group operation");
  3527. }
  3528. struct TypeInfo
  3529. {
  3530. std::string type;
  3531. std::string identity;
  3532. };
  3533. std::vector<TypeInfo> type_infos;
  3534. switch (op)
  3535. {
  3536. case OpGroupNonUniformIAdd:
  3537. {
  3538. type_infos.emplace_back(TypeInfo{ "uint", "0u" });
  3539. type_infos.emplace_back(TypeInfo{ "uvec2", "uvec2(0u)" });
  3540. type_infos.emplace_back(TypeInfo{ "uvec3", "uvec3(0u)" });
  3541. type_infos.emplace_back(TypeInfo{ "uvec4", "uvec4(0u)" });
  3542. type_infos.emplace_back(TypeInfo{ "int", "0" });
  3543. type_infos.emplace_back(TypeInfo{ "ivec2", "ivec2(0)" });
  3544. type_infos.emplace_back(TypeInfo{ "ivec3", "ivec3(0)" });
  3545. type_infos.emplace_back(TypeInfo{ "ivec4", "ivec4(0)" });
  3546. break;
  3547. }
  3548. case OpGroupNonUniformFAdd:
  3549. {
  3550. type_infos.emplace_back(TypeInfo{ "float", "0.0f" });
  3551. type_infos.emplace_back(TypeInfo{ "vec2", "vec2(0.0f)" });
  3552. type_infos.emplace_back(TypeInfo{ "vec3", "vec3(0.0f)" });
  3553. type_infos.emplace_back(TypeInfo{ "vec4", "vec4(0.0f)" });
  3554. // ARB_gpu_shader_fp64 is required in GL4.0 which in turn is required by NV_thread_shuffle
  3555. type_infos.emplace_back(TypeInfo{ "double", "0.0LF" });
  3556. type_infos.emplace_back(TypeInfo{ "dvec2", "dvec2(0.0LF)" });
  3557. type_infos.emplace_back(TypeInfo{ "dvec3", "dvec3(0.0LF)" });
  3558. type_infos.emplace_back(TypeInfo{ "dvec4", "dvec4(0.0LF)" });
  3559. break;
  3560. }
  3561. case OpGroupNonUniformIMul:
  3562. {
  3563. type_infos.emplace_back(TypeInfo{ "uint", "1u" });
  3564. type_infos.emplace_back(TypeInfo{ "uvec2", "uvec2(1u)" });
  3565. type_infos.emplace_back(TypeInfo{ "uvec3", "uvec3(1u)" });
  3566. type_infos.emplace_back(TypeInfo{ "uvec4", "uvec4(1u)" });
  3567. type_infos.emplace_back(TypeInfo{ "int", "1" });
  3568. type_infos.emplace_back(TypeInfo{ "ivec2", "ivec2(1)" });
  3569. type_infos.emplace_back(TypeInfo{ "ivec3", "ivec3(1)" });
  3570. type_infos.emplace_back(TypeInfo{ "ivec4", "ivec4(1)" });
  3571. break;
  3572. }
  3573. case OpGroupNonUniformFMul:
  3574. {
  3575. type_infos.emplace_back(TypeInfo{ "float", "1.0f" });
  3576. type_infos.emplace_back(TypeInfo{ "vec2", "vec2(1.0f)" });
  3577. type_infos.emplace_back(TypeInfo{ "vec3", "vec3(1.0f)" });
  3578. type_infos.emplace_back(TypeInfo{ "vec4", "vec4(1.0f)" });
  3579. type_infos.emplace_back(TypeInfo{ "double", "0.0LF" });
  3580. type_infos.emplace_back(TypeInfo{ "dvec2", "dvec2(1.0LF)" });
  3581. type_infos.emplace_back(TypeInfo{ "dvec3", "dvec3(1.0LF)" });
  3582. type_infos.emplace_back(TypeInfo{ "dvec4", "dvec4(1.0LF)" });
  3583. break;
  3584. }
  3585. default:
  3586. SPIRV_CROSS_THROW("Unsupported workaround for arithmetic group operation");
  3587. }
  3588. const bool op_is_addition = op == OpGroupNonUniformIAdd || op == OpGroupNonUniformFAdd;
  3589. const bool op_is_multiplication = op == OpGroupNonUniformIMul || op == OpGroupNonUniformFMul;
  3590. std::string op_symbol;
  3591. if (op_is_addition)
  3592. {
  3593. op_symbol = "+=";
  3594. }
  3595. else if (op_is_multiplication)
  3596. {
  3597. op_symbol = "*=";
  3598. }
  3599. for (const TypeInfo &t : type_infos)
  3600. {
  3601. statement(t.type, " ", func, "(", t.type, " v)");
  3602. begin_scope();
  3603. statement(t.type, " ", result, " = ", t.identity, ";");
  3604. statement("uvec4 active_threads = subgroupBallot(true);");
  3605. statement("if (subgroupBallotBitCount(active_threads) == gl_SubgroupSize)");
  3606. begin_scope();
  3607. statement("uint total = gl_SubgroupSize / 2u;");
  3608. statement(result, " = v;");
  3609. statement("for (uint i = 1u; i <= total; i <<= 1u)");
  3610. begin_scope();
  3611. statement("bool valid;");
  3612. if (group_op == GroupOperationReduce)
  3613. {
  3614. statement(t.type, " s = shuffleXorNV(", result, ", i, gl_SubgroupSize, valid);");
  3615. }
  3616. else if (group_op == GroupOperationExclusiveScan || group_op == GroupOperationInclusiveScan)
  3617. {
  3618. statement(t.type, " s = shuffleUpNV(", result, ", i, gl_SubgroupSize, valid);");
  3619. }
  3620. if (op_is_addition || op_is_multiplication)
  3621. {
  3622. statement(result, " ", op_symbol, " valid ? s : ", t.identity, ";");
  3623. }
  3624. end_scope();
  3625. if (group_op == GroupOperationExclusiveScan)
  3626. {
  3627. statement(result, " = shuffleUpNV(", result, ", 1u, gl_SubgroupSize);");
  3628. statement("if (subgroupElect())");
  3629. begin_scope();
  3630. statement(result, " = ", t.identity, ";");
  3631. end_scope();
  3632. }
  3633. end_scope();
  3634. statement("else");
  3635. begin_scope();
  3636. if (group_op == GroupOperationExclusiveScan)
  3637. {
  3638. statement("uint total = subgroupBallotBitCount(gl_SubgroupLtMask);");
  3639. }
  3640. else if (group_op == GroupOperationInclusiveScan)
  3641. {
  3642. statement("uint total = subgroupBallotBitCount(gl_SubgroupLeMask);");
  3643. }
  3644. statement("for (uint i = 0u; i < gl_SubgroupSize; ++i)");
  3645. begin_scope();
  3646. statement("bool valid = subgroupBallotBitExtract(active_threads, i);");
  3647. statement(t.type, " s = shuffleNV(v, i, gl_SubgroupSize);");
  3648. if (group_op == GroupOperationExclusiveScan || group_op == GroupOperationInclusiveScan)
  3649. {
  3650. statement("valid = valid && (i < total);");
  3651. }
  3652. if (op_is_addition || op_is_multiplication)
  3653. {
  3654. statement(result, " ", op_symbol, " valid ? s : ", t.identity, ";");
  3655. }
  3656. end_scope();
  3657. end_scope();
  3658. statement("return ", result, ";");
  3659. end_scope();
  3660. }
  3661. }
  3662. void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model)
  3663. {
  3664. static const char *workaround_types[] = { "int", "ivec2", "ivec3", "ivec4", "uint", "uvec2", "uvec3", "uvec4",
  3665. "float", "vec2", "vec3", "vec4", "double", "dvec2", "dvec3", "dvec4" };
  3666. if (!options.vulkan_semantics)
  3667. {
  3668. using Supp = ShaderSubgroupSupportHelper;
  3669. auto result = shader_subgroup_supporter.resolve();
  3670. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupMask))
  3671. {
  3672. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupMask, result);
  3673. for (auto &e : exts)
  3674. {
  3675. const char *name = Supp::get_extension_name(e);
  3676. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3677. switch (e)
  3678. {
  3679. case Supp::NV_shader_thread_group:
  3680. statement("#define gl_SubgroupEqMask uvec4(gl_ThreadEqMaskNV, 0u, 0u, 0u)");
  3681. statement("#define gl_SubgroupGeMask uvec4(gl_ThreadGeMaskNV, 0u, 0u, 0u)");
  3682. statement("#define gl_SubgroupGtMask uvec4(gl_ThreadGtMaskNV, 0u, 0u, 0u)");
  3683. statement("#define gl_SubgroupLeMask uvec4(gl_ThreadLeMaskNV, 0u, 0u, 0u)");
  3684. statement("#define gl_SubgroupLtMask uvec4(gl_ThreadLtMaskNV, 0u, 0u, 0u)");
  3685. break;
  3686. case Supp::ARB_shader_ballot:
  3687. statement("#define gl_SubgroupEqMask uvec4(unpackUint2x32(gl_SubGroupEqMaskARB), 0u, 0u)");
  3688. statement("#define gl_SubgroupGeMask uvec4(unpackUint2x32(gl_SubGroupGeMaskARB), 0u, 0u)");
  3689. statement("#define gl_SubgroupGtMask uvec4(unpackUint2x32(gl_SubGroupGtMaskARB), 0u, 0u)");
  3690. statement("#define gl_SubgroupLeMask uvec4(unpackUint2x32(gl_SubGroupLeMaskARB), 0u, 0u)");
  3691. statement("#define gl_SubgroupLtMask uvec4(unpackUint2x32(gl_SubGroupLtMaskARB), 0u, 0u)");
  3692. break;
  3693. default:
  3694. break;
  3695. }
  3696. }
  3697. statement("#endif");
  3698. statement("");
  3699. }
  3700. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupSize))
  3701. {
  3702. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupSize, result);
  3703. for (auto &e : exts)
  3704. {
  3705. const char *name = Supp::get_extension_name(e);
  3706. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3707. switch (e)
  3708. {
  3709. case Supp::NV_shader_thread_group:
  3710. statement("#define gl_SubgroupSize gl_WarpSizeNV");
  3711. break;
  3712. case Supp::ARB_shader_ballot:
  3713. statement("#define gl_SubgroupSize gl_SubGroupSizeARB");
  3714. break;
  3715. case Supp::AMD_gcn_shader:
  3716. statement("#define gl_SubgroupSize uint(gl_SIMDGroupSizeAMD)");
  3717. break;
  3718. default:
  3719. break;
  3720. }
  3721. }
  3722. statement("#endif");
  3723. statement("");
  3724. }
  3725. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupInvocationID))
  3726. {
  3727. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupInvocationID, result);
  3728. for (auto &e : exts)
  3729. {
  3730. const char *name = Supp::get_extension_name(e);
  3731. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3732. switch (e)
  3733. {
  3734. case Supp::NV_shader_thread_group:
  3735. statement("#define gl_SubgroupInvocationID gl_ThreadInWarpNV");
  3736. break;
  3737. case Supp::ARB_shader_ballot:
  3738. statement("#define gl_SubgroupInvocationID gl_SubGroupInvocationARB");
  3739. break;
  3740. default:
  3741. break;
  3742. }
  3743. }
  3744. statement("#endif");
  3745. statement("");
  3746. }
  3747. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupID))
  3748. {
  3749. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupID, result);
  3750. for (auto &e : exts)
  3751. {
  3752. const char *name = Supp::get_extension_name(e);
  3753. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3754. switch (e)
  3755. {
  3756. case Supp::NV_shader_thread_group:
  3757. statement("#define gl_SubgroupID gl_WarpIDNV");
  3758. break;
  3759. default:
  3760. break;
  3761. }
  3762. }
  3763. statement("#endif");
  3764. statement("");
  3765. }
  3766. if (shader_subgroup_supporter.is_feature_requested(Supp::NumSubgroups))
  3767. {
  3768. auto exts = Supp::get_candidates_for_feature(Supp::NumSubgroups, result);
  3769. for (auto &e : exts)
  3770. {
  3771. const char *name = Supp::get_extension_name(e);
  3772. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3773. switch (e)
  3774. {
  3775. case Supp::NV_shader_thread_group:
  3776. statement("#define gl_NumSubgroups gl_WarpsPerSMNV");
  3777. break;
  3778. default:
  3779. break;
  3780. }
  3781. }
  3782. statement("#endif");
  3783. statement("");
  3784. }
  3785. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBroadcast_First))
  3786. {
  3787. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBroadcast_First, result);
  3788. for (auto &e : exts)
  3789. {
  3790. const char *name = Supp::get_extension_name(e);
  3791. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3792. switch (e)
  3793. {
  3794. case Supp::NV_shader_thread_shuffle:
  3795. for (const char *t : workaround_types)
  3796. {
  3797. statement(t, " subgroupBroadcastFirst(", t,
  3798. " value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }");
  3799. }
  3800. for (const char *t : workaround_types)
  3801. {
  3802. statement(t, " subgroupBroadcast(", t,
  3803. " value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }");
  3804. }
  3805. break;
  3806. case Supp::ARB_shader_ballot:
  3807. for (const char *t : workaround_types)
  3808. {
  3809. statement(t, " subgroupBroadcastFirst(", t,
  3810. " value) { return readFirstInvocationARB(value); }");
  3811. }
  3812. for (const char *t : workaround_types)
  3813. {
  3814. statement(t, " subgroupBroadcast(", t,
  3815. " value, uint id) { return readInvocationARB(value, id); }");
  3816. }
  3817. break;
  3818. default:
  3819. break;
  3820. }
  3821. }
  3822. statement("#endif");
  3823. statement("");
  3824. }
  3825. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotFindLSB_MSB))
  3826. {
  3827. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBallotFindLSB_MSB, result);
  3828. for (auto &e : exts)
  3829. {
  3830. const char *name = Supp::get_extension_name(e);
  3831. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3832. switch (e)
  3833. {
  3834. case Supp::NV_shader_thread_group:
  3835. statement("uint subgroupBallotFindLSB(uvec4 value) { return findLSB(value.x); }");
  3836. statement("uint subgroupBallotFindMSB(uvec4 value) { return findMSB(value.x); }");
  3837. break;
  3838. default:
  3839. break;
  3840. }
  3841. }
  3842. statement("#else");
  3843. statement("uint subgroupBallotFindLSB(uvec4 value)");
  3844. begin_scope();
  3845. statement("int firstLive = findLSB(value.x);");
  3846. statement("return uint(firstLive != -1 ? firstLive : (findLSB(value.y) + 32));");
  3847. end_scope();
  3848. statement("uint subgroupBallotFindMSB(uvec4 value)");
  3849. begin_scope();
  3850. statement("int firstLive = findMSB(value.y);");
  3851. statement("return uint(firstLive != -1 ? (firstLive + 32) : findMSB(value.x));");
  3852. end_scope();
  3853. statement("#endif");
  3854. statement("");
  3855. }
  3856. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupAll_Any_AllEqualBool))
  3857. {
  3858. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupAll_Any_AllEqualBool, result);
  3859. for (auto &e : exts)
  3860. {
  3861. const char *name = Supp::get_extension_name(e);
  3862. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3863. switch (e)
  3864. {
  3865. case Supp::NV_gpu_shader_5:
  3866. statement("bool subgroupAll(bool value) { return allThreadsNV(value); }");
  3867. statement("bool subgroupAny(bool value) { return anyThreadNV(value); }");
  3868. statement("bool subgroupAllEqual(bool value) { return allThreadsEqualNV(value); }");
  3869. break;
  3870. case Supp::ARB_shader_group_vote:
  3871. statement("bool subgroupAll(bool v) { return allInvocationsARB(v); }");
  3872. statement("bool subgroupAny(bool v) { return anyInvocationARB(v); }");
  3873. statement("bool subgroupAllEqual(bool v) { return allInvocationsEqualARB(v); }");
  3874. break;
  3875. case Supp::AMD_gcn_shader:
  3876. statement("bool subgroupAll(bool value) { return ballotAMD(value) == ballotAMD(true); }");
  3877. statement("bool subgroupAny(bool value) { return ballotAMD(value) != 0ull; }");
  3878. statement("bool subgroupAllEqual(bool value) { uint64_t b = ballotAMD(value); return b == 0ull || "
  3879. "b == ballotAMD(true); }");
  3880. break;
  3881. default:
  3882. break;
  3883. }
  3884. }
  3885. statement("#endif");
  3886. statement("");
  3887. }
  3888. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupAllEqualT))
  3889. {
  3890. statement("#ifndef GL_KHR_shader_subgroup_vote");
  3891. statement(
  3892. "#define _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(type) bool subgroupAllEqual(type value) { return "
  3893. "subgroupAllEqual(subgroupBroadcastFirst(value) == value); }");
  3894. for (const char *t : workaround_types)
  3895. statement("_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(", t, ")");
  3896. statement("#undef _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND");
  3897. statement("#endif");
  3898. statement("");
  3899. }
  3900. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallot))
  3901. {
  3902. auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBallot, result);
  3903. for (auto &e : exts)
  3904. {
  3905. const char *name = Supp::get_extension_name(e);
  3906. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  3907. switch (e)
  3908. {
  3909. case Supp::NV_shader_thread_group:
  3910. statement("uvec4 subgroupBallot(bool v) { return uvec4(ballotThreadNV(v), 0u, 0u, 0u); }");
  3911. break;
  3912. case Supp::ARB_shader_ballot:
  3913. statement("uvec4 subgroupBallot(bool v) { return uvec4(unpackUint2x32(ballotARB(v)), 0u, 0u); }");
  3914. break;
  3915. default:
  3916. break;
  3917. }
  3918. }
  3919. statement("#endif");
  3920. statement("");
  3921. }
  3922. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupElect))
  3923. {
  3924. statement("#ifndef GL_KHR_shader_subgroup_basic");
  3925. statement("bool subgroupElect()");
  3926. begin_scope();
  3927. statement("uvec4 activeMask = subgroupBallot(true);");
  3928. statement("uint firstLive = subgroupBallotFindLSB(activeMask);");
  3929. statement("return gl_SubgroupInvocationID == firstLive;");
  3930. end_scope();
  3931. statement("#endif");
  3932. statement("");
  3933. }
  3934. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBarrier))
  3935. {
  3936. // Extensions we're using in place of GL_KHR_shader_subgroup_basic state
  3937. // that subgroup execute in lockstep so this barrier is implicit.
  3938. // However the GL 4.6 spec also states that `barrier` implies a shared memory barrier,
  3939. // and a specific test of optimizing scans by leveraging lock-step invocation execution,
  3940. // has shown that a `memoryBarrierShared` is needed in place of a `subgroupBarrier`.
  3941. // https://github.com/buildaworldnet/IrrlichtBAW/commit/d8536857991b89a30a6b65d29441e51b64c2c7ad#diff-9f898d27be1ea6fc79b03d9b361e299334c1a347b6e4dc344ee66110c6aa596aR19
  3942. statement("#ifndef GL_KHR_shader_subgroup_basic");
  3943. statement("void subgroupBarrier() { memoryBarrierShared(); }");
  3944. statement("#endif");
  3945. statement("");
  3946. }
  3947. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupMemBarrier))
  3948. {
  3949. if (model == spv::ExecutionModelGLCompute)
  3950. {
  3951. statement("#ifndef GL_KHR_shader_subgroup_basic");
  3952. statement("void subgroupMemoryBarrier() { groupMemoryBarrier(); }");
  3953. statement("void subgroupMemoryBarrierBuffer() { groupMemoryBarrier(); }");
  3954. statement("void subgroupMemoryBarrierShared() { memoryBarrierShared(); }");
  3955. statement("void subgroupMemoryBarrierImage() { groupMemoryBarrier(); }");
  3956. statement("#endif");
  3957. }
  3958. else
  3959. {
  3960. statement("#ifndef GL_KHR_shader_subgroup_basic");
  3961. statement("void subgroupMemoryBarrier() { memoryBarrier(); }");
  3962. statement("void subgroupMemoryBarrierBuffer() { memoryBarrierBuffer(); }");
  3963. statement("void subgroupMemoryBarrierImage() { memoryBarrierImage(); }");
  3964. statement("#endif");
  3965. }
  3966. statement("");
  3967. }
  3968. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupInverseBallot_InclBitCount_ExclBitCout))
  3969. {
  3970. statement("#ifndef GL_KHR_shader_subgroup_ballot");
  3971. statement("bool subgroupInverseBallot(uvec4 value)");
  3972. begin_scope();
  3973. statement("return any(notEqual(value.xy & gl_SubgroupEqMask.xy, uvec2(0u)));");
  3974. end_scope();
  3975. statement("uint subgroupBallotInclusiveBitCount(uvec4 value)");
  3976. begin_scope();
  3977. statement("uvec2 v = value.xy & gl_SubgroupLeMask.xy;");
  3978. statement("ivec2 c = bitCount(v);");
  3979. statement_no_indent("#ifdef GL_NV_shader_thread_group");
  3980. statement("return uint(c.x);");
  3981. statement_no_indent("#else");
  3982. statement("return uint(c.x + c.y);");
  3983. statement_no_indent("#endif");
  3984. end_scope();
  3985. statement("uint subgroupBallotExclusiveBitCount(uvec4 value)");
  3986. begin_scope();
  3987. statement("uvec2 v = value.xy & gl_SubgroupLtMask.xy;");
  3988. statement("ivec2 c = bitCount(v);");
  3989. statement_no_indent("#ifdef GL_NV_shader_thread_group");
  3990. statement("return uint(c.x);");
  3991. statement_no_indent("#else");
  3992. statement("return uint(c.x + c.y);");
  3993. statement_no_indent("#endif");
  3994. end_scope();
  3995. statement("#endif");
  3996. statement("");
  3997. }
  3998. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotBitCount))
  3999. {
  4000. statement("#ifndef GL_KHR_shader_subgroup_ballot");
  4001. statement("uint subgroupBallotBitCount(uvec4 value)");
  4002. begin_scope();
  4003. statement("ivec2 c = bitCount(value.xy);");
  4004. statement_no_indent("#ifdef GL_NV_shader_thread_group");
  4005. statement("return uint(c.x);");
  4006. statement_no_indent("#else");
  4007. statement("return uint(c.x + c.y);");
  4008. statement_no_indent("#endif");
  4009. end_scope();
  4010. statement("#endif");
  4011. statement("");
  4012. }
  4013. if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotBitExtract))
  4014. {
  4015. statement("#ifndef GL_KHR_shader_subgroup_ballot");
  4016. statement("bool subgroupBallotBitExtract(uvec4 value, uint index)");
  4017. begin_scope();
  4018. statement_no_indent("#ifdef GL_NV_shader_thread_group");
  4019. statement("uint shifted = value.x >> index;");
  4020. statement_no_indent("#else");
  4021. statement("uint shifted = value[index >> 5u] >> (index & 0x1fu);");
  4022. statement_no_indent("#endif");
  4023. statement("return (shifted & 1u) != 0u;");
  4024. end_scope();
  4025. statement("#endif");
  4026. statement("");
  4027. }
  4028. auto arithmetic_feature_helper =
  4029. [&](Supp::Feature feat, std::string func_name, spv::Op op, spv::GroupOperation group_op)
  4030. {
  4031. if (shader_subgroup_supporter.is_feature_requested(feat))
  4032. {
  4033. auto exts = Supp::get_candidates_for_feature(feat, result);
  4034. for (auto &e : exts)
  4035. {
  4036. const char *name = Supp::get_extension_name(e);
  4037. statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
  4038. switch (e)
  4039. {
  4040. case Supp::NV_shader_thread_shuffle:
  4041. emit_subgroup_arithmetic_workaround(func_name, op, group_op);
  4042. break;
  4043. default:
  4044. break;
  4045. }
  4046. }
  4047. statement("#endif");
  4048. statement("");
  4049. }
  4050. };
  4051. arithmetic_feature_helper(Supp::SubgroupArithmeticIAddReduce, "subgroupAdd", OpGroupNonUniformIAdd,
  4052. GroupOperationReduce);
  4053. arithmetic_feature_helper(Supp::SubgroupArithmeticIAddExclusiveScan, "subgroupExclusiveAdd",
  4054. OpGroupNonUniformIAdd, GroupOperationExclusiveScan);
  4055. arithmetic_feature_helper(Supp::SubgroupArithmeticIAddInclusiveScan, "subgroupInclusiveAdd",
  4056. OpGroupNonUniformIAdd, GroupOperationInclusiveScan);
  4057. arithmetic_feature_helper(Supp::SubgroupArithmeticFAddReduce, "subgroupAdd", OpGroupNonUniformFAdd,
  4058. GroupOperationReduce);
  4059. arithmetic_feature_helper(Supp::SubgroupArithmeticFAddExclusiveScan, "subgroupExclusiveAdd",
  4060. OpGroupNonUniformFAdd, GroupOperationExclusiveScan);
  4061. arithmetic_feature_helper(Supp::SubgroupArithmeticFAddInclusiveScan, "subgroupInclusiveAdd",
  4062. OpGroupNonUniformFAdd, GroupOperationInclusiveScan);
  4063. arithmetic_feature_helper(Supp::SubgroupArithmeticIMulReduce, "subgroupMul", OpGroupNonUniformIMul,
  4064. GroupOperationReduce);
  4065. arithmetic_feature_helper(Supp::SubgroupArithmeticIMulExclusiveScan, "subgroupExclusiveMul",
  4066. OpGroupNonUniformIMul, GroupOperationExclusiveScan);
  4067. arithmetic_feature_helper(Supp::SubgroupArithmeticIMulInclusiveScan, "subgroupInclusiveMul",
  4068. OpGroupNonUniformIMul, GroupOperationInclusiveScan);
  4069. arithmetic_feature_helper(Supp::SubgroupArithmeticFMulReduce, "subgroupMul", OpGroupNonUniformFMul,
  4070. GroupOperationReduce);
  4071. arithmetic_feature_helper(Supp::SubgroupArithmeticFMulExclusiveScan, "subgroupExclusiveMul",
  4072. OpGroupNonUniformFMul, GroupOperationExclusiveScan);
  4073. arithmetic_feature_helper(Supp::SubgroupArithmeticFMulInclusiveScan, "subgroupInclusiveMul",
  4074. OpGroupNonUniformFMul, GroupOperationInclusiveScan);
  4075. }
  4076. if (!workaround_ubo_load_overload_types.empty())
  4077. {
  4078. for (auto &type_id : workaround_ubo_load_overload_types)
  4079. {
  4080. auto &type = get<SPIRType>(type_id);
  4081. if (options.es && is_matrix(type))
  4082. {
  4083. // Need both variants.
  4084. // GLSL cannot overload on precision, so need to dispatch appropriately.
  4085. statement("highp ", type_to_glsl(type), " spvWorkaroundRowMajor(highp ", type_to_glsl(type), " wrap) { return wrap; }");
  4086. statement("mediump ", type_to_glsl(type), " spvWorkaroundRowMajorMP(mediump ", type_to_glsl(type), " wrap) { return wrap; }");
  4087. }
  4088. else
  4089. {
  4090. statement(type_to_glsl(type), " spvWorkaroundRowMajor(", type_to_glsl(type), " wrap) { return wrap; }");
  4091. }
  4092. }
  4093. statement("");
  4094. }
  4095. }
  4096. void CompilerGLSL::emit_polyfills(uint32_t polyfills, bool relaxed)
  4097. {
  4098. const char *qual = "";
  4099. const char *suffix = (options.es && relaxed) ? "MP" : "";
  4100. if (options.es)
  4101. qual = relaxed ? "mediump " : "highp ";
  4102. if (polyfills & PolyfillTranspose2x2)
  4103. {
  4104. statement(qual, "mat2 spvTranspose", suffix, "(", qual, "mat2 m)");
  4105. begin_scope();
  4106. statement("return mat2(m[0][0], m[1][0], m[0][1], m[1][1]);");
  4107. end_scope();
  4108. statement("");
  4109. }
  4110. if (polyfills & PolyfillTranspose3x3)
  4111. {
  4112. statement(qual, "mat3 spvTranspose", suffix, "(", qual, "mat3 m)");
  4113. begin_scope();
  4114. statement("return mat3(m[0][0], m[1][0], m[2][0], m[0][1], m[1][1], m[2][1], m[0][2], m[1][2], m[2][2]);");
  4115. end_scope();
  4116. statement("");
  4117. }
  4118. if (polyfills & PolyfillTranspose4x4)
  4119. {
  4120. statement(qual, "mat4 spvTranspose", suffix, "(", qual, "mat4 m)");
  4121. begin_scope();
  4122. statement("return mat4(m[0][0], m[1][0], m[2][0], m[3][0], m[0][1], m[1][1], m[2][1], m[3][1], m[0][2], "
  4123. "m[1][2], m[2][2], m[3][2], m[0][3], m[1][3], m[2][3], m[3][3]);");
  4124. end_scope();
  4125. statement("");
  4126. }
  4127. if (polyfills & PolyfillDeterminant2x2)
  4128. {
  4129. statement(qual, "float spvDeterminant", suffix, "(", qual, "mat2 m)");
  4130. begin_scope();
  4131. statement("return m[0][0] * m[1][1] - m[0][1] * m[1][0];");
  4132. end_scope();
  4133. statement("");
  4134. }
  4135. if (polyfills & PolyfillDeterminant3x3)
  4136. {
  4137. statement(qual, "float spvDeterminant", suffix, "(", qual, "mat3 m)");
  4138. begin_scope();
  4139. statement("return dot(m[0], vec3(m[1][1] * m[2][2] - m[1][2] * m[2][1], "
  4140. "m[1][2] * m[2][0] - m[1][0] * m[2][2], "
  4141. "m[1][0] * m[2][1] - m[1][1] * m[2][0]));");
  4142. end_scope();
  4143. statement("");
  4144. }
  4145. if (polyfills & PolyfillDeterminant4x4)
  4146. {
  4147. statement(qual, "float spvDeterminant", suffix, "(", qual, "mat4 m)");
  4148. begin_scope();
  4149. statement("return dot(m[0], vec4("
  4150. "m[2][1] * m[3][2] * m[1][3] - m[3][1] * m[2][2] * m[1][3] + m[3][1] * m[1][2] * m[2][3] - m[1][1] * m[3][2] * m[2][3] - m[2][1] * m[1][2] * m[3][3] + m[1][1] * m[2][2] * m[3][3], "
  4151. "m[3][0] * m[2][2] * m[1][3] - m[2][0] * m[3][2] * m[1][3] - m[3][0] * m[1][2] * m[2][3] + m[1][0] * m[3][2] * m[2][3] + m[2][0] * m[1][2] * m[3][3] - m[1][0] * m[2][2] * m[3][3], "
  4152. "m[2][0] * m[3][1] * m[1][3] - m[3][0] * m[2][1] * m[1][3] + m[3][0] * m[1][1] * m[2][3] - m[1][0] * m[3][1] * m[2][3] - m[2][0] * m[1][1] * m[3][3] + m[1][0] * m[2][1] * m[3][3], "
  4153. "m[3][0] * m[2][1] * m[1][2] - m[2][0] * m[3][1] * m[1][2] - m[3][0] * m[1][1] * m[2][2] + m[1][0] * m[3][1] * m[2][2] + m[2][0] * m[1][1] * m[3][2] - m[1][0] * m[2][1] * m[3][2]));");
  4154. end_scope();
  4155. statement("");
  4156. }
  4157. if (polyfills & PolyfillMatrixInverse2x2)
  4158. {
  4159. statement(qual, "mat2 spvInverse", suffix, "(", qual, "mat2 m)");
  4160. begin_scope();
  4161. statement("return mat2(m[1][1], -m[0][1], -m[1][0], m[0][0]) "
  4162. "* (1.0 / (m[0][0] * m[1][1] - m[1][0] * m[0][1]));");
  4163. end_scope();
  4164. statement("");
  4165. }
  4166. if (polyfills & PolyfillMatrixInverse3x3)
  4167. {
  4168. statement(qual, "mat3 spvInverse", suffix, "(", qual, "mat3 m)");
  4169. begin_scope();
  4170. statement(qual, "vec3 t = vec3(m[1][1] * m[2][2] - m[1][2] * m[2][1], m[1][2] * m[2][0] - m[1][0] * m[2][2], m[1][0] * m[2][1] - m[1][1] * m[2][0]);");
  4171. statement("return mat3(t[0], "
  4172. "m[0][2] * m[2][1] - m[0][1] * m[2][2], "
  4173. "m[0][1] * m[1][2] - m[0][2] * m[1][1], "
  4174. "t[1], "
  4175. "m[0][0] * m[2][2] - m[0][2] * m[2][0], "
  4176. "m[0][2] * m[1][0] - m[0][0] * m[1][2], "
  4177. "t[2], "
  4178. "m[0][1] * m[2][0] - m[0][0] * m[2][1], "
  4179. "m[0][0] * m[1][1] - m[0][1] * m[1][0]) "
  4180. "* (1.0 / dot(m[0], t));");
  4181. end_scope();
  4182. statement("");
  4183. }
  4184. if (polyfills & PolyfillMatrixInverse4x4)
  4185. {
  4186. statement(qual, "mat4 spvInverse", suffix, "(", qual, "mat4 m)");
  4187. begin_scope();
  4188. statement(qual, "vec4 t = vec4("
  4189. "m[2][1] * m[3][2] * m[1][3] - m[3][1] * m[2][2] * m[1][3] + m[3][1] * m[1][2] * m[2][3] - m[1][1] * m[3][2] * m[2][3] - m[2][1] * m[1][2] * m[3][3] + m[1][1] * m[2][2] * m[3][3], "
  4190. "m[3][0] * m[2][2] * m[1][3] - m[2][0] * m[3][2] * m[1][3] - m[3][0] * m[1][2] * m[2][3] + m[1][0] * m[3][2] * m[2][3] + m[2][0] * m[1][2] * m[3][3] - m[1][0] * m[2][2] * m[3][3], "
  4191. "m[2][0] * m[3][1] * m[1][3] - m[3][0] * m[2][1] * m[1][3] + m[3][0] * m[1][1] * m[2][3] - m[1][0] * m[3][1] * m[2][3] - m[2][0] * m[1][1] * m[3][3] + m[1][0] * m[2][1] * m[3][3], "
  4192. "m[3][0] * m[2][1] * m[1][2] - m[2][0] * m[3][1] * m[1][2] - m[3][0] * m[1][1] * m[2][2] + m[1][0] * m[3][1] * m[2][2] + m[2][0] * m[1][1] * m[3][2] - m[1][0] * m[2][1] * m[3][2]);");
  4193. statement("return mat4("
  4194. "t[0], "
  4195. "m[3][1] * m[2][2] * m[0][3] - m[2][1] * m[3][2] * m[0][3] - m[3][1] * m[0][2] * m[2][3] + m[0][1] * m[3][2] * m[2][3] + m[2][1] * m[0][2] * m[3][3] - m[0][1] * m[2][2] * m[3][3], "
  4196. "m[1][1] * m[3][2] * m[0][3] - m[3][1] * m[1][2] * m[0][3] + m[3][1] * m[0][2] * m[1][3] - m[0][1] * m[3][2] * m[1][3] - m[1][1] * m[0][2] * m[3][3] + m[0][1] * m[1][2] * m[3][3], "
  4197. "m[2][1] * m[1][2] * m[0][3] - m[1][1] * m[2][2] * m[0][3] - m[2][1] * m[0][2] * m[1][3] + m[0][1] * m[2][2] * m[1][3] + m[1][1] * m[0][2] * m[2][3] - m[0][1] * m[1][2] * m[2][3], "
  4198. "t[1], "
  4199. "m[2][0] * m[3][2] * m[0][3] - m[3][0] * m[2][2] * m[0][3] + m[3][0] * m[0][2] * m[2][3] - m[0][0] * m[3][2] * m[2][3] - m[2][0] * m[0][2] * m[3][3] + m[0][0] * m[2][2] * m[3][3], "
  4200. "m[3][0] * m[1][2] * m[0][3] - m[1][0] * m[3][2] * m[0][3] - m[3][0] * m[0][2] * m[1][3] + m[0][0] * m[3][2] * m[1][3] + m[1][0] * m[0][2] * m[3][3] - m[0][0] * m[1][2] * m[3][3], "
  4201. "m[1][0] * m[2][2] * m[0][3] - m[2][0] * m[1][2] * m[0][3] + m[2][0] * m[0][2] * m[1][3] - m[0][0] * m[2][2] * m[1][3] - m[1][0] * m[0][2] * m[2][3] + m[0][0] * m[1][2] * m[2][3], "
  4202. "t[2], "
  4203. "m[3][0] * m[2][1] * m[0][3] - m[2][0] * m[3][1] * m[0][3] - m[3][0] * m[0][1] * m[2][3] + m[0][0] * m[3][1] * m[2][3] + m[2][0] * m[0][1] * m[3][3] - m[0][0] * m[2][1] * m[3][3], "
  4204. "m[1][0] * m[3][1] * m[0][3] - m[3][0] * m[1][1] * m[0][3] + m[3][0] * m[0][1] * m[1][3] - m[0][0] * m[3][1] * m[1][3] - m[1][0] * m[0][1] * m[3][3] + m[0][0] * m[1][1] * m[3][3], "
  4205. "m[2][0] * m[1][1] * m[0][3] - m[1][0] * m[2][1] * m[0][3] - m[2][0] * m[0][1] * m[1][3] + m[0][0] * m[2][1] * m[1][3] + m[1][0] * m[0][1] * m[2][3] - m[0][0] * m[1][1] * m[2][3], "
  4206. "t[3], "
  4207. "m[2][0] * m[3][1] * m[0][2] - m[3][0] * m[2][1] * m[0][2] + m[3][0] * m[0][1] * m[2][2] - m[0][0] * m[3][1] * m[2][2] - m[2][0] * m[0][1] * m[3][2] + m[0][0] * m[2][1] * m[3][2], "
  4208. "m[3][0] * m[1][1] * m[0][2] - m[1][0] * m[3][1] * m[0][2] - m[3][0] * m[0][1] * m[1][2] + m[0][0] * m[3][1] * m[1][2] + m[1][0] * m[0][1] * m[3][2] - m[0][0] * m[1][1] * m[3][2], "
  4209. "m[1][0] * m[2][1] * m[0][2] - m[2][0] * m[1][1] * m[0][2] + m[2][0] * m[0][1] * m[1][2] - m[0][0] * m[2][1] * m[1][2] - m[1][0] * m[0][1] * m[2][2] + m[0][0] * m[1][1] * m[2][2]) "
  4210. "* (1.0 / dot(m[0], t));");
  4211. end_scope();
  4212. statement("");
  4213. }
  4214. }
  4215. // Returns a string representation of the ID, usable as a function arg.
  4216. // Default is to simply return the expression representation fo the arg ID.
  4217. // Subclasses may override to modify the return value.
  4218. string CompilerGLSL::to_func_call_arg(const SPIRFunction::Parameter &, uint32_t id)
  4219. {
  4220. // Make sure that we use the name of the original variable, and not the parameter alias.
  4221. uint32_t name_id = id;
  4222. auto *var = maybe_get<SPIRVariable>(id);
  4223. if (var && var->basevariable)
  4224. name_id = var->basevariable;
  4225. return to_expression(name_id);
  4226. }
  4227. void CompilerGLSL::force_temporary_and_recompile(uint32_t id)
  4228. {
  4229. auto res = forced_temporaries.insert(id);
  4230. // Forcing new temporaries guarantees forward progress.
  4231. if (res.second)
  4232. force_recompile_guarantee_forward_progress();
  4233. else
  4234. force_recompile();
  4235. }
  4236. uint32_t CompilerGLSL::consume_temporary_in_precision_context(uint32_t type_id, uint32_t id, Options::Precision precision)
  4237. {
  4238. // Constants do not have innate precision.
  4239. auto handle_type = ir.ids[id].get_type();
  4240. if (handle_type == TypeConstant || handle_type == TypeConstantOp || handle_type == TypeUndef)
  4241. return id;
  4242. // Ignore anything that isn't 32-bit values.
  4243. auto &type = get<SPIRType>(type_id);
  4244. if (type.pointer)
  4245. return id;
  4246. if (type.basetype != SPIRType::Float && type.basetype != SPIRType::UInt && type.basetype != SPIRType::Int)
  4247. return id;
  4248. if (precision == Options::DontCare)
  4249. {
  4250. // If precision is consumed as don't care (operations only consisting of constants),
  4251. // we need to bind the expression to a temporary,
  4252. // otherwise we have no way of controlling the precision later.
  4253. auto itr = forced_temporaries.insert(id);
  4254. if (itr.second)
  4255. force_recompile_guarantee_forward_progress();
  4256. return id;
  4257. }
  4258. auto current_precision = has_decoration(id, DecorationRelaxedPrecision) ? Options::Mediump : Options::Highp;
  4259. if (current_precision == precision)
  4260. return id;
  4261. auto itr = temporary_to_mirror_precision_alias.find(id);
  4262. if (itr == temporary_to_mirror_precision_alias.end())
  4263. {
  4264. uint32_t alias_id = ir.increase_bound_by(1);
  4265. auto &m = ir.meta[alias_id];
  4266. if (auto *input_m = ir.find_meta(id))
  4267. m = *input_m;
  4268. const char *prefix;
  4269. if (precision == Options::Mediump)
  4270. {
  4271. set_decoration(alias_id, DecorationRelaxedPrecision);
  4272. prefix = "mp_copy_";
  4273. }
  4274. else
  4275. {
  4276. unset_decoration(alias_id, DecorationRelaxedPrecision);
  4277. prefix = "hp_copy_";
  4278. }
  4279. auto alias_name = join(prefix, to_name(id));
  4280. ParsedIR::sanitize_underscores(alias_name);
  4281. set_name(alias_id, alias_name);
  4282. emit_op(type_id, alias_id, to_expression(id), true);
  4283. temporary_to_mirror_precision_alias[id] = alias_id;
  4284. forced_temporaries.insert(id);
  4285. forced_temporaries.insert(alias_id);
  4286. force_recompile_guarantee_forward_progress();
  4287. id = alias_id;
  4288. }
  4289. else
  4290. {
  4291. id = itr->second;
  4292. }
  4293. return id;
  4294. }
  4295. void CompilerGLSL::handle_invalid_expression(uint32_t id)
  4296. {
  4297. // We tried to read an invalidated expression.
  4298. // This means we need another pass at compilation, but next time,
  4299. // force temporary variables so that they cannot be invalidated.
  4300. force_temporary_and_recompile(id);
  4301. // If the invalid expression happened as a result of a CompositeInsert
  4302. // overwrite, we must block this from happening next iteration.
  4303. if (composite_insert_overwritten.count(id))
  4304. block_composite_insert_overwrite.insert(id);
  4305. }
  4306. // Converts the format of the current expression from packed to unpacked,
  4307. // by wrapping the expression in a constructor of the appropriate type.
  4308. // GLSL does not support packed formats, so simply return the expression.
  4309. // Subclasses that do will override.
  4310. string CompilerGLSL::unpack_expression_type(string expr_str, const SPIRType &, uint32_t, bool, bool)
  4311. {
  4312. return expr_str;
  4313. }
  4314. // Sometimes we proactively enclosed an expression where it turns out we might have not needed it after all.
  4315. void CompilerGLSL::strip_enclosed_expression(string &expr)
  4316. {
  4317. if (expr.size() < 2 || expr.front() != '(' || expr.back() != ')')
  4318. return;
  4319. // Have to make sure that our first and last parens actually enclose everything inside it.
  4320. uint32_t paren_count = 0;
  4321. for (auto &c : expr)
  4322. {
  4323. if (c == '(')
  4324. paren_count++;
  4325. else if (c == ')')
  4326. {
  4327. paren_count--;
  4328. // If we hit 0 and this is not the final char, our first and final parens actually don't
  4329. // enclose the expression, and we cannot strip, e.g.: (a + b) * (c + d).
  4330. if (paren_count == 0 && &c != &expr.back())
  4331. return;
  4332. }
  4333. }
  4334. expr.erase(expr.size() - 1, 1);
  4335. expr.erase(begin(expr));
  4336. }
  4337. bool CompilerGLSL::needs_enclose_expression(const std::string &expr)
  4338. {
  4339. bool need_parens = false;
  4340. // If the expression starts with a unary we need to enclose to deal with cases where we have back-to-back
  4341. // unary expressions.
  4342. if (!expr.empty())
  4343. {
  4344. auto c = expr.front();
  4345. if (c == '-' || c == '+' || c == '!' || c == '~' || c == '&' || c == '*')
  4346. need_parens = true;
  4347. }
  4348. if (!need_parens)
  4349. {
  4350. uint32_t paren_count = 0;
  4351. for (auto c : expr)
  4352. {
  4353. if (c == '(' || c == '[')
  4354. paren_count++;
  4355. else if (c == ')' || c == ']')
  4356. {
  4357. assert(paren_count);
  4358. paren_count--;
  4359. }
  4360. else if (c == ' ' && paren_count == 0)
  4361. {
  4362. need_parens = true;
  4363. break;
  4364. }
  4365. }
  4366. assert(paren_count == 0);
  4367. }
  4368. return need_parens;
  4369. }
  4370. string CompilerGLSL::enclose_expression(const string &expr)
  4371. {
  4372. // If this expression contains any spaces which are not enclosed by parentheses,
  4373. // we need to enclose it so we can treat the whole string as an expression.
  4374. // This happens when two expressions have been part of a binary op earlier.
  4375. if (needs_enclose_expression(expr))
  4376. return join('(', expr, ')');
  4377. else
  4378. return expr;
  4379. }
  4380. string CompilerGLSL::dereference_expression(const SPIRType &expr_type, const std::string &expr)
  4381. {
  4382. // If this expression starts with an address-of operator ('&'), then
  4383. // just return the part after the operator.
  4384. // TODO: Strip parens if unnecessary?
  4385. if (expr.front() == '&')
  4386. return expr.substr(1);
  4387. else if (backend.native_pointers)
  4388. return join('*', expr);
  4389. else if (expr_type.storage == StorageClassPhysicalStorageBufferEXT && expr_type.basetype != SPIRType::Struct &&
  4390. expr_type.pointer_depth == 1)
  4391. {
  4392. return join(enclose_expression(expr), ".value");
  4393. }
  4394. else
  4395. return expr;
  4396. }
  4397. string CompilerGLSL::address_of_expression(const std::string &expr)
  4398. {
  4399. if (expr.size() > 3 && expr[0] == '(' && expr[1] == '*' && expr.back() == ')')
  4400. {
  4401. // If we have an expression which looks like (*foo), taking the address of it is the same as stripping
  4402. // the first two and last characters. We might have to enclose the expression.
  4403. // This doesn't work for cases like (*foo + 10),
  4404. // but this is an r-value expression which we cannot take the address of anyways.
  4405. return enclose_expression(expr.substr(2, expr.size() - 3));
  4406. }
  4407. else if (expr.front() == '*')
  4408. {
  4409. // If this expression starts with a dereference operator ('*'), then
  4410. // just return the part after the operator.
  4411. return expr.substr(1);
  4412. }
  4413. else
  4414. return join('&', enclose_expression(expr));
  4415. }
  4416. // Just like to_expression except that we enclose the expression inside parentheses if needed.
  4417. string CompilerGLSL::to_enclosed_expression(uint32_t id, bool register_expression_read)
  4418. {
  4419. return enclose_expression(to_expression(id, register_expression_read));
  4420. }
  4421. // Used explicitly when we want to read a row-major expression, but without any transpose shenanigans.
  4422. // need_transpose must be forced to false.
  4423. string CompilerGLSL::to_unpacked_row_major_matrix_expression(uint32_t id)
  4424. {
  4425. return unpack_expression_type(to_expression(id), expression_type(id),
  4426. get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
  4427. has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), true);
  4428. }
  4429. string CompilerGLSL::to_unpacked_expression(uint32_t id, bool register_expression_read)
  4430. {
  4431. // If we need to transpose, it will also take care of unpacking rules.
  4432. auto *e = maybe_get<SPIRExpression>(id);
  4433. bool need_transpose = e && e->need_transpose;
  4434. bool is_remapped = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
  4435. bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
  4436. if (!need_transpose && (is_remapped || is_packed))
  4437. {
  4438. return unpack_expression_type(to_expression(id, register_expression_read),
  4439. get_pointee_type(expression_type_id(id)),
  4440. get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
  4441. has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), false);
  4442. }
  4443. else
  4444. return to_expression(id, register_expression_read);
  4445. }
  4446. string CompilerGLSL::to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read)
  4447. {
  4448. return enclose_expression(to_unpacked_expression(id, register_expression_read));
  4449. }
  4450. string CompilerGLSL::to_dereferenced_expression(uint32_t id, bool register_expression_read)
  4451. {
  4452. auto &type = expression_type(id);
  4453. if (type.pointer && should_dereference(id))
  4454. return dereference_expression(type, to_enclosed_expression(id, register_expression_read));
  4455. else
  4456. return to_expression(id, register_expression_read);
  4457. }
  4458. string CompilerGLSL::to_pointer_expression(uint32_t id, bool register_expression_read)
  4459. {
  4460. auto &type = expression_type(id);
  4461. if (type.pointer && expression_is_lvalue(id) && !should_dereference(id))
  4462. return address_of_expression(to_enclosed_expression(id, register_expression_read));
  4463. else
  4464. return to_unpacked_expression(id, register_expression_read);
  4465. }
  4466. string CompilerGLSL::to_enclosed_pointer_expression(uint32_t id, bool register_expression_read)
  4467. {
  4468. auto &type = expression_type(id);
  4469. if (type.pointer && expression_is_lvalue(id) && !should_dereference(id))
  4470. return address_of_expression(to_enclosed_expression(id, register_expression_read));
  4471. else
  4472. return to_enclosed_unpacked_expression(id, register_expression_read);
  4473. }
  4474. string CompilerGLSL::to_extract_component_expression(uint32_t id, uint32_t index)
  4475. {
  4476. auto expr = to_enclosed_expression(id);
  4477. if (has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked))
  4478. return join(expr, "[", index, "]");
  4479. else
  4480. return join(expr, ".", index_to_swizzle(index));
  4481. }
  4482. string CompilerGLSL::to_extract_constant_composite_expression(uint32_t result_type, const SPIRConstant &c,
  4483. const uint32_t *chain, uint32_t length)
  4484. {
  4485. // It is kinda silly if application actually enter this path since they know the constant up front.
  4486. // It is useful here to extract the plain constant directly.
  4487. SPIRConstant tmp;
  4488. tmp.constant_type = result_type;
  4489. auto &composite_type = get<SPIRType>(c.constant_type);
  4490. assert(composite_type.basetype != SPIRType::Struct && composite_type.array.empty());
  4491. assert(!c.specialization);
  4492. if (is_matrix(composite_type))
  4493. {
  4494. if (length == 2)
  4495. {
  4496. tmp.m.c[0].vecsize = 1;
  4497. tmp.m.columns = 1;
  4498. tmp.m.c[0].r[0] = c.m.c[chain[0]].r[chain[1]];
  4499. }
  4500. else
  4501. {
  4502. assert(length == 1);
  4503. tmp.m.c[0].vecsize = composite_type.vecsize;
  4504. tmp.m.columns = 1;
  4505. tmp.m.c[0] = c.m.c[chain[0]];
  4506. }
  4507. }
  4508. else
  4509. {
  4510. assert(length == 1);
  4511. tmp.m.c[0].vecsize = 1;
  4512. tmp.m.columns = 1;
  4513. tmp.m.c[0].r[0] = c.m.c[0].r[chain[0]];
  4514. }
  4515. return constant_expression(tmp);
  4516. }
  4517. string CompilerGLSL::to_rerolled_array_expression(const SPIRType &parent_type,
  4518. const string &base_expr, const SPIRType &type)
  4519. {
  4520. bool remapped_boolean = parent_type.basetype == SPIRType::Struct &&
  4521. type.basetype == SPIRType::Boolean &&
  4522. backend.boolean_in_struct_remapped_type != SPIRType::Boolean;
  4523. SPIRType tmp_type;
  4524. if (remapped_boolean)
  4525. {
  4526. tmp_type = get<SPIRType>(type.parent_type);
  4527. tmp_type.basetype = backend.boolean_in_struct_remapped_type;
  4528. }
  4529. else if (type.basetype == SPIRType::Boolean && backend.boolean_in_struct_remapped_type != SPIRType::Boolean)
  4530. {
  4531. // It's possible that we have an r-value expression that was OpLoaded from a struct.
  4532. // We have to reroll this and explicitly cast the input to bool, because the r-value is short.
  4533. tmp_type = get<SPIRType>(type.parent_type);
  4534. remapped_boolean = true;
  4535. }
  4536. uint32_t size = to_array_size_literal(type);
  4537. auto &parent = get<SPIRType>(type.parent_type);
  4538. string expr = "{ ";
  4539. for (uint32_t i = 0; i < size; i++)
  4540. {
  4541. auto subexpr = join(base_expr, "[", convert_to_string(i), "]");
  4542. if (!type_is_top_level_array(parent))
  4543. {
  4544. if (remapped_boolean)
  4545. subexpr = join(type_to_glsl(tmp_type), "(", subexpr, ")");
  4546. expr += subexpr;
  4547. }
  4548. else
  4549. expr += to_rerolled_array_expression(parent_type, subexpr, parent);
  4550. if (i + 1 < size)
  4551. expr += ", ";
  4552. }
  4553. expr += " }";
  4554. return expr;
  4555. }
  4556. string CompilerGLSL::to_composite_constructor_expression(const SPIRType &parent_type, uint32_t id, bool block_like_type)
  4557. {
  4558. auto &type = expression_type(id);
  4559. bool reroll_array = false;
  4560. bool remapped_boolean = parent_type.basetype == SPIRType::Struct &&
  4561. type.basetype == SPIRType::Boolean &&
  4562. backend.boolean_in_struct_remapped_type != SPIRType::Boolean;
  4563. if (type_is_top_level_array(type))
  4564. {
  4565. reroll_array = !backend.array_is_value_type ||
  4566. (block_like_type && !backend.array_is_value_type_in_buffer_blocks);
  4567. if (remapped_boolean)
  4568. {
  4569. // Forced to reroll if we have to change bool[] to short[].
  4570. reroll_array = true;
  4571. }
  4572. }
  4573. if (reroll_array)
  4574. {
  4575. // For this case, we need to "re-roll" an array initializer from a temporary.
  4576. // We cannot simply pass the array directly, since it decays to a pointer and it cannot
  4577. // participate in a struct initializer. E.g.
  4578. // float arr[2] = { 1.0, 2.0 };
  4579. // Foo foo = { arr }; must be transformed to
  4580. // Foo foo = { { arr[0], arr[1] } };
  4581. // The array sizes cannot be deduced from specialization constants since we cannot use any loops.
  4582. // We're only triggering one read of the array expression, but this is fine since arrays have to be declared
  4583. // as temporaries anyways.
  4584. return to_rerolled_array_expression(parent_type, to_enclosed_expression(id), type);
  4585. }
  4586. else
  4587. {
  4588. auto expr = to_unpacked_expression(id);
  4589. if (remapped_boolean)
  4590. {
  4591. auto tmp_type = type;
  4592. tmp_type.basetype = backend.boolean_in_struct_remapped_type;
  4593. expr = join(type_to_glsl(tmp_type), "(", expr, ")");
  4594. }
  4595. return expr;
  4596. }
  4597. }
  4598. string CompilerGLSL::to_non_uniform_aware_expression(uint32_t id)
  4599. {
  4600. string expr = to_expression(id);
  4601. if (has_decoration(id, DecorationNonUniform))
  4602. convert_non_uniform_expression(expr, id);
  4603. return expr;
  4604. }
  4605. string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read)
  4606. {
  4607. auto itr = invalid_expressions.find(id);
  4608. if (itr != end(invalid_expressions))
  4609. handle_invalid_expression(id);
  4610. if (ir.ids[id].get_type() == TypeExpression)
  4611. {
  4612. // We might have a more complex chain of dependencies.
  4613. // A possible scenario is that we
  4614. //
  4615. // %1 = OpLoad
  4616. // %2 = OpDoSomething %1 %1. here %2 will have a dependency on %1.
  4617. // %3 = OpDoSomethingAgain %2 %2. Here %3 will lose the link to %1 since we don't propagate the dependencies like that.
  4618. // OpStore %1 %foo // Here we can invalidate %1, and hence all expressions which depend on %1. Only %2 will know since it's part of invalid_expressions.
  4619. // %4 = OpDoSomethingAnotherTime %3 %3 // If we forward all expressions we will see %1 expression after store, not before.
  4620. //
  4621. // However, we can propagate up a list of depended expressions when we used %2, so we can check if %2 is invalid when reading %3 after the store,
  4622. // and see that we should not forward reads of the original variable.
  4623. auto &expr = get<SPIRExpression>(id);
  4624. for (uint32_t dep : expr.expression_dependencies)
  4625. if (invalid_expressions.find(dep) != end(invalid_expressions))
  4626. handle_invalid_expression(dep);
  4627. }
  4628. if (register_expression_read)
  4629. track_expression_read(id);
  4630. switch (ir.ids[id].get_type())
  4631. {
  4632. case TypeExpression:
  4633. {
  4634. auto &e = get<SPIRExpression>(id);
  4635. if (e.base_expression)
  4636. return to_enclosed_expression(e.base_expression) + e.expression;
  4637. else if (e.need_transpose)
  4638. {
  4639. // This should not be reached for access chains, since we always deal explicitly with transpose state
  4640. // when consuming an access chain expression.
  4641. uint32_t physical_type_id = get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
  4642. bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
  4643. bool relaxed = has_decoration(id, DecorationRelaxedPrecision);
  4644. return convert_row_major_matrix(e.expression, get<SPIRType>(e.expression_type), physical_type_id,
  4645. is_packed, relaxed);
  4646. }
  4647. else if (flattened_structs.count(id))
  4648. {
  4649. return load_flattened_struct(e.expression, get<SPIRType>(e.expression_type));
  4650. }
  4651. else
  4652. {
  4653. if (is_forcing_recompilation())
  4654. {
  4655. // During first compilation phase, certain expression patterns can trigger exponential growth of memory.
  4656. // Avoid this by returning dummy expressions during this phase.
  4657. // Do not use empty expressions here, because those are sentinels for other cases.
  4658. return "_";
  4659. }
  4660. else
  4661. return e.expression;
  4662. }
  4663. }
  4664. case TypeConstant:
  4665. {
  4666. auto &c = get<SPIRConstant>(id);
  4667. auto &type = get<SPIRType>(c.constant_type);
  4668. // WorkGroupSize may be a constant.
  4669. if (has_decoration(c.self, DecorationBuiltIn))
  4670. return builtin_to_glsl(BuiltIn(get_decoration(c.self, DecorationBuiltIn)), StorageClassGeneric);
  4671. else if (c.specialization)
  4672. {
  4673. if (backend.workgroup_size_is_hidden)
  4674. {
  4675. int wg_index = get_constant_mapping_to_workgroup_component(c);
  4676. if (wg_index >= 0)
  4677. {
  4678. auto wg_size = join(builtin_to_glsl(BuiltInWorkgroupSize, StorageClassInput), vector_swizzle(1, wg_index));
  4679. if (type.basetype != SPIRType::UInt)
  4680. wg_size = bitcast_expression(type, SPIRType::UInt, wg_size);
  4681. return wg_size;
  4682. }
  4683. }
  4684. if (expression_is_forwarded(id))
  4685. return constant_expression(c);
  4686. return to_name(id);
  4687. }
  4688. else if (c.is_used_as_lut)
  4689. return to_name(id);
  4690. else if (type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline)
  4691. return to_name(id);
  4692. else if (!type.array.empty() && !backend.can_declare_arrays_inline)
  4693. return to_name(id);
  4694. else
  4695. return constant_expression(c);
  4696. }
  4697. case TypeConstantOp:
  4698. return to_name(id);
  4699. case TypeVariable:
  4700. {
  4701. auto &var = get<SPIRVariable>(id);
  4702. // If we try to use a loop variable before the loop header, we have to redirect it to the static expression,
  4703. // the variable has not been declared yet.
  4704. if (var.statically_assigned || (var.loop_variable && !var.loop_variable_enable))
  4705. {
  4706. // We might try to load from a loop variable before it has been initialized.
  4707. // Prefer static expression and fallback to initializer.
  4708. if (var.static_expression)
  4709. return to_expression(var.static_expression);
  4710. else if (var.initializer)
  4711. return to_expression(var.initializer);
  4712. else
  4713. {
  4714. // We cannot declare the variable yet, so have to fake it.
  4715. uint32_t undef_id = ir.increase_bound_by(1);
  4716. return emit_uninitialized_temporary_expression(get_variable_data_type_id(var), undef_id).expression;
  4717. }
  4718. }
  4719. else if (var.deferred_declaration)
  4720. {
  4721. var.deferred_declaration = false;
  4722. return variable_decl(var);
  4723. }
  4724. else if (flattened_structs.count(id))
  4725. {
  4726. return load_flattened_struct(to_name(id), get<SPIRType>(var.basetype));
  4727. }
  4728. else
  4729. {
  4730. auto &dec = ir.meta[var.self].decoration;
  4731. if (dec.builtin)
  4732. return builtin_to_glsl(dec.builtin_type, var.storage);
  4733. else
  4734. return to_name(id);
  4735. }
  4736. }
  4737. case TypeCombinedImageSampler:
  4738. // This type should never be taken the expression of directly.
  4739. // The intention is that texture sampling functions will extract the image and samplers
  4740. // separately and take their expressions as needed.
  4741. // GLSL does not use this type because OpSampledImage immediately creates a combined image sampler
  4742. // expression ala sampler2D(texture, sampler).
  4743. SPIRV_CROSS_THROW("Combined image samplers have no default expression representation.");
  4744. case TypeAccessChain:
  4745. // We cannot express this type. They only have meaning in other OpAccessChains, OpStore or OpLoad.
  4746. SPIRV_CROSS_THROW("Access chains have no default expression representation.");
  4747. default:
  4748. return to_name(id);
  4749. }
  4750. }
  4751. SmallVector<ConstantID> CompilerGLSL::get_composite_constant_ids(ConstantID const_id)
  4752. {
  4753. if (auto *constant = maybe_get<SPIRConstant>(const_id))
  4754. {
  4755. const auto &type = get<SPIRType>(constant->constant_type);
  4756. if (is_array(type) || type.basetype == SPIRType::Struct)
  4757. return constant->subconstants;
  4758. if (is_matrix(type))
  4759. return SmallVector<ConstantID>(constant->m.id);
  4760. if (is_vector(type))
  4761. return SmallVector<ConstantID>(constant->m.c[0].id);
  4762. SPIRV_CROSS_THROW("Unexpected scalar constant!");
  4763. }
  4764. if (!const_composite_insert_ids.count(const_id))
  4765. SPIRV_CROSS_THROW("Unimplemented for this OpSpecConstantOp!");
  4766. return const_composite_insert_ids[const_id];
  4767. }
  4768. void CompilerGLSL::fill_composite_constant(SPIRConstant &constant, TypeID type_id,
  4769. const SmallVector<ConstantID> &initializers)
  4770. {
  4771. auto &type = get<SPIRType>(type_id);
  4772. constant.specialization = true;
  4773. if (is_array(type) || type.basetype == SPIRType::Struct)
  4774. {
  4775. constant.subconstants = initializers;
  4776. }
  4777. else if (is_matrix(type))
  4778. {
  4779. constant.m.columns = type.columns;
  4780. for (uint32_t i = 0; i < type.columns; ++i)
  4781. {
  4782. constant.m.id[i] = initializers[i];
  4783. constant.m.c[i].vecsize = type.vecsize;
  4784. }
  4785. }
  4786. else if (is_vector(type))
  4787. {
  4788. constant.m.c[0].vecsize = type.vecsize;
  4789. for (uint32_t i = 0; i < type.vecsize; ++i)
  4790. constant.m.c[0].id[i] = initializers[i];
  4791. }
  4792. else
  4793. SPIRV_CROSS_THROW("Unexpected scalar in SpecConstantOp CompositeInsert!");
  4794. }
  4795. void CompilerGLSL::set_composite_constant(ConstantID const_id, TypeID type_id,
  4796. const SmallVector<ConstantID> &initializers)
  4797. {
  4798. if (maybe_get<SPIRConstantOp>(const_id))
  4799. {
  4800. const_composite_insert_ids[const_id] = initializers;
  4801. return;
  4802. }
  4803. auto &constant = set<SPIRConstant>(const_id, type_id);
  4804. fill_composite_constant(constant, type_id, initializers);
  4805. forwarded_temporaries.insert(const_id);
  4806. }
  4807. TypeID CompilerGLSL::get_composite_member_type(TypeID type_id, uint32_t member_idx)
  4808. {
  4809. auto &type = get<SPIRType>(type_id);
  4810. if (is_array(type))
  4811. return type.parent_type;
  4812. if (type.basetype == SPIRType::Struct)
  4813. return type.member_types[member_idx];
  4814. if (is_matrix(type))
  4815. return type.parent_type;
  4816. if (is_vector(type))
  4817. return type.parent_type;
  4818. SPIRV_CROSS_THROW("Shouldn't reach lower than vector handling OpSpecConstantOp CompositeInsert!");
  4819. }
  4820. string CompilerGLSL::constant_op_expression(const SPIRConstantOp &cop)
  4821. {
  4822. auto &type = get<SPIRType>(cop.basetype);
  4823. bool binary = false;
  4824. bool unary = false;
  4825. string op;
  4826. if (is_legacy() && is_unsigned_opcode(cop.opcode))
  4827. SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets.");
  4828. // TODO: Find a clean way to reuse emit_instruction.
  4829. switch (cop.opcode)
  4830. {
  4831. case OpSConvert:
  4832. case OpUConvert:
  4833. case OpFConvert:
  4834. op = type_to_glsl_constructor(type);
  4835. break;
  4836. #define GLSL_BOP(opname, x) \
  4837. case Op##opname: \
  4838. binary = true; \
  4839. op = x; \
  4840. break
  4841. #define GLSL_UOP(opname, x) \
  4842. case Op##opname: \
  4843. unary = true; \
  4844. op = x; \
  4845. break
  4846. GLSL_UOP(SNegate, "-");
  4847. GLSL_UOP(Not, "~");
  4848. GLSL_BOP(IAdd, "+");
  4849. GLSL_BOP(ISub, "-");
  4850. GLSL_BOP(IMul, "*");
  4851. GLSL_BOP(SDiv, "/");
  4852. GLSL_BOP(UDiv, "/");
  4853. GLSL_BOP(UMod, "%");
  4854. GLSL_BOP(SMod, "%");
  4855. GLSL_BOP(ShiftRightLogical, ">>");
  4856. GLSL_BOP(ShiftRightArithmetic, ">>");
  4857. GLSL_BOP(ShiftLeftLogical, "<<");
  4858. GLSL_BOP(BitwiseOr, "|");
  4859. GLSL_BOP(BitwiseXor, "^");
  4860. GLSL_BOP(BitwiseAnd, "&");
  4861. GLSL_BOP(LogicalOr, "||");
  4862. GLSL_BOP(LogicalAnd, "&&");
  4863. GLSL_UOP(LogicalNot, "!");
  4864. GLSL_BOP(LogicalEqual, "==");
  4865. GLSL_BOP(LogicalNotEqual, "!=");
  4866. GLSL_BOP(IEqual, "==");
  4867. GLSL_BOP(INotEqual, "!=");
  4868. GLSL_BOP(ULessThan, "<");
  4869. GLSL_BOP(SLessThan, "<");
  4870. GLSL_BOP(ULessThanEqual, "<=");
  4871. GLSL_BOP(SLessThanEqual, "<=");
  4872. GLSL_BOP(UGreaterThan, ">");
  4873. GLSL_BOP(SGreaterThan, ">");
  4874. GLSL_BOP(UGreaterThanEqual, ">=");
  4875. GLSL_BOP(SGreaterThanEqual, ">=");
  4876. case OpSRem:
  4877. {
  4878. uint32_t op0 = cop.arguments[0];
  4879. uint32_t op1 = cop.arguments[1];
  4880. return join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "(",
  4881. to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")");
  4882. }
  4883. case OpSelect:
  4884. {
  4885. if (cop.arguments.size() < 3)
  4886. SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
  4887. // This one is pretty annoying. It's triggered from
  4888. // uint(bool), int(bool) from spec constants.
  4889. // In order to preserve its compile-time constness in Vulkan GLSL,
  4890. // we need to reduce the OpSelect expression back to this simplified model.
  4891. // If we cannot, fail.
  4892. if (to_trivial_mix_op(type, op, cop.arguments[2], cop.arguments[1], cop.arguments[0]))
  4893. {
  4894. // Implement as a simple cast down below.
  4895. }
  4896. else
  4897. {
  4898. // Implement a ternary and pray the compiler understands it :)
  4899. return to_ternary_expression(type, cop.arguments[0], cop.arguments[1], cop.arguments[2]);
  4900. }
  4901. break;
  4902. }
  4903. case OpVectorShuffle:
  4904. {
  4905. string expr = type_to_glsl_constructor(type);
  4906. expr += "(";
  4907. uint32_t left_components = expression_type(cop.arguments[0]).vecsize;
  4908. string left_arg = to_enclosed_expression(cop.arguments[0]);
  4909. string right_arg = to_enclosed_expression(cop.arguments[1]);
  4910. for (uint32_t i = 2; i < uint32_t(cop.arguments.size()); i++)
  4911. {
  4912. uint32_t index = cop.arguments[i];
  4913. if (index == 0xFFFFFFFF)
  4914. {
  4915. SPIRConstant c;
  4916. c.constant_type = type.parent_type;
  4917. assert(type.parent_type != ID(0));
  4918. expr += constant_expression(c);
  4919. }
  4920. else if (index >= left_components)
  4921. {
  4922. expr += right_arg + "." + "xyzw"[index - left_components];
  4923. }
  4924. else
  4925. {
  4926. expr += left_arg + "." + "xyzw"[index];
  4927. }
  4928. if (i + 1 < uint32_t(cop.arguments.size()))
  4929. expr += ", ";
  4930. }
  4931. expr += ")";
  4932. return expr;
  4933. }
  4934. case OpCompositeExtract:
  4935. {
  4936. auto expr = access_chain_internal(cop.arguments[0], &cop.arguments[1], uint32_t(cop.arguments.size() - 1),
  4937. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
  4938. return expr;
  4939. }
  4940. case OpCompositeInsert:
  4941. {
  4942. SmallVector<ConstantID> new_init = get_composite_constant_ids(cop.arguments[1]);
  4943. uint32_t idx;
  4944. uint32_t target_id = cop.self;
  4945. uint32_t target_type_id = cop.basetype;
  4946. // We have to drill down to the part we want to modify, and create new
  4947. // constants for each containing part.
  4948. for (idx = 2; idx < cop.arguments.size() - 1; ++idx)
  4949. {
  4950. uint32_t new_const = ir.increase_bound_by(1);
  4951. uint32_t old_const = new_init[cop.arguments[idx]];
  4952. new_init[cop.arguments[idx]] = new_const;
  4953. set_composite_constant(target_id, target_type_id, new_init);
  4954. new_init = get_composite_constant_ids(old_const);
  4955. target_id = new_const;
  4956. target_type_id = get_composite_member_type(target_type_id, cop.arguments[idx]);
  4957. }
  4958. // Now replace the initializer with the one from this instruction.
  4959. new_init[cop.arguments[idx]] = cop.arguments[0];
  4960. set_composite_constant(target_id, target_type_id, new_init);
  4961. SPIRConstant tmp_const(cop.basetype);
  4962. fill_composite_constant(tmp_const, cop.basetype, const_composite_insert_ids[cop.self]);
  4963. return constant_expression(tmp_const);
  4964. }
  4965. default:
  4966. // Some opcodes are unimplemented here, these are currently not possible to test from glslang.
  4967. SPIRV_CROSS_THROW("Unimplemented spec constant op.");
  4968. }
  4969. uint32_t bit_width = 0;
  4970. if (unary || binary || cop.opcode == OpSConvert || cop.opcode == OpUConvert)
  4971. bit_width = expression_type(cop.arguments[0]).width;
  4972. SPIRType::BaseType input_type;
  4973. bool skip_cast_if_equal_type = opcode_is_sign_invariant(cop.opcode);
  4974. switch (cop.opcode)
  4975. {
  4976. case OpIEqual:
  4977. case OpINotEqual:
  4978. input_type = to_signed_basetype(bit_width);
  4979. break;
  4980. case OpSLessThan:
  4981. case OpSLessThanEqual:
  4982. case OpSGreaterThan:
  4983. case OpSGreaterThanEqual:
  4984. case OpSMod:
  4985. case OpSDiv:
  4986. case OpShiftRightArithmetic:
  4987. case OpSConvert:
  4988. case OpSNegate:
  4989. input_type = to_signed_basetype(bit_width);
  4990. break;
  4991. case OpULessThan:
  4992. case OpULessThanEqual:
  4993. case OpUGreaterThan:
  4994. case OpUGreaterThanEqual:
  4995. case OpUMod:
  4996. case OpUDiv:
  4997. case OpShiftRightLogical:
  4998. case OpUConvert:
  4999. input_type = to_unsigned_basetype(bit_width);
  5000. break;
  5001. default:
  5002. input_type = type.basetype;
  5003. break;
  5004. }
  5005. #undef GLSL_BOP
  5006. #undef GLSL_UOP
  5007. if (binary)
  5008. {
  5009. if (cop.arguments.size() < 2)
  5010. SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
  5011. string cast_op0;
  5012. string cast_op1;
  5013. auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, cop.arguments[0],
  5014. cop.arguments[1], skip_cast_if_equal_type);
  5015. if (type.basetype != input_type && type.basetype != SPIRType::Boolean)
  5016. {
  5017. expected_type.basetype = input_type;
  5018. auto expr = bitcast_glsl_op(type, expected_type);
  5019. expr += '(';
  5020. expr += join(cast_op0, " ", op, " ", cast_op1);
  5021. expr += ')';
  5022. return expr;
  5023. }
  5024. else
  5025. return join("(", cast_op0, " ", op, " ", cast_op1, ")");
  5026. }
  5027. else if (unary)
  5028. {
  5029. if (cop.arguments.size() < 1)
  5030. SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
  5031. // Auto-bitcast to result type as needed.
  5032. // Works around various casting scenarios in glslang as there is no OpBitcast for specialization constants.
  5033. return join("(", op, bitcast_glsl(type, cop.arguments[0]), ")");
  5034. }
  5035. else if (cop.opcode == OpSConvert || cop.opcode == OpUConvert)
  5036. {
  5037. if (cop.arguments.size() < 1)
  5038. SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
  5039. auto &arg_type = expression_type(cop.arguments[0]);
  5040. if (arg_type.width < type.width && input_type != arg_type.basetype)
  5041. {
  5042. auto expected = arg_type;
  5043. expected.basetype = input_type;
  5044. return join(op, "(", bitcast_glsl(expected, cop.arguments[0]), ")");
  5045. }
  5046. else
  5047. return join(op, "(", to_expression(cop.arguments[0]), ")");
  5048. }
  5049. else
  5050. {
  5051. if (cop.arguments.size() < 1)
  5052. SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
  5053. return join(op, "(", to_expression(cop.arguments[0]), ")");
  5054. }
  5055. }
  5056. string CompilerGLSL::constant_expression(const SPIRConstant &c,
  5057. bool inside_block_like_struct_scope,
  5058. bool inside_struct_scope)
  5059. {
  5060. auto &type = get<SPIRType>(c.constant_type);
  5061. if (type_is_top_level_pointer(type))
  5062. {
  5063. return backend.null_pointer_literal;
  5064. }
  5065. else if (!c.subconstants.empty())
  5066. {
  5067. // Handles Arrays and structures.
  5068. string res;
  5069. // Only consider the decay if we are inside a struct scope where we are emitting a member with Offset decoration.
  5070. // Outside a block-like struct declaration, we can always bind to a constant array with templated type.
  5071. // Should look at ArrayStride here as well, but it's possible to declare a constant struct
  5072. // with Offset = 0, using no ArrayStride on the enclosed array type.
  5073. // A particular CTS test hits this scenario.
  5074. bool array_type_decays = inside_block_like_struct_scope &&
  5075. type_is_top_level_array(type) &&
  5076. !backend.array_is_value_type_in_buffer_blocks;
  5077. // Allow Metal to use the array<T> template to make arrays a value type
  5078. bool needs_trailing_tracket = false;
  5079. if (backend.use_initializer_list && backend.use_typed_initializer_list && type.basetype == SPIRType::Struct &&
  5080. !type_is_top_level_array(type))
  5081. {
  5082. res = type_to_glsl_constructor(type) + "{ ";
  5083. }
  5084. else if (backend.use_initializer_list && backend.use_typed_initializer_list && backend.array_is_value_type &&
  5085. type_is_top_level_array(type) && !array_type_decays)
  5086. {
  5087. const auto *p_type = &type;
  5088. SPIRType tmp_type;
  5089. if (inside_struct_scope &&
  5090. backend.boolean_in_struct_remapped_type != SPIRType::Boolean &&
  5091. type.basetype == SPIRType::Boolean)
  5092. {
  5093. tmp_type = type;
  5094. tmp_type.basetype = backend.boolean_in_struct_remapped_type;
  5095. p_type = &tmp_type;
  5096. }
  5097. res = type_to_glsl_constructor(*p_type) + "({ ";
  5098. needs_trailing_tracket = true;
  5099. }
  5100. else if (backend.use_initializer_list)
  5101. {
  5102. res = "{ ";
  5103. }
  5104. else
  5105. {
  5106. res = type_to_glsl_constructor(type) + "(";
  5107. }
  5108. uint32_t subconstant_index = 0;
  5109. for (auto &elem : c.subconstants)
  5110. {
  5111. if (auto *op = maybe_get<SPIRConstantOp>(elem))
  5112. {
  5113. res += constant_op_expression(*op);
  5114. }
  5115. else if (maybe_get<SPIRUndef>(elem) != nullptr)
  5116. {
  5117. res += to_name(elem);
  5118. }
  5119. else
  5120. {
  5121. auto &subc = get<SPIRConstant>(elem);
  5122. if (subc.specialization && !expression_is_forwarded(elem))
  5123. res += to_name(elem);
  5124. else
  5125. {
  5126. if (!type_is_top_level_array(type) && type.basetype == SPIRType::Struct)
  5127. {
  5128. // When we get down to emitting struct members, override the block-like information.
  5129. // For constants, we can freely mix and match block-like state.
  5130. inside_block_like_struct_scope =
  5131. has_member_decoration(type.self, subconstant_index, DecorationOffset);
  5132. }
  5133. if (type.basetype == SPIRType::Struct)
  5134. inside_struct_scope = true;
  5135. res += constant_expression(subc, inside_block_like_struct_scope, inside_struct_scope);
  5136. }
  5137. }
  5138. if (&elem != &c.subconstants.back())
  5139. res += ", ";
  5140. subconstant_index++;
  5141. }
  5142. res += backend.use_initializer_list ? " }" : ")";
  5143. if (needs_trailing_tracket)
  5144. res += ")";
  5145. return res;
  5146. }
  5147. else if (type.basetype == SPIRType::Struct && type.member_types.size() == 0)
  5148. {
  5149. // Metal tessellation likes empty structs which are then constant expressions.
  5150. if (backend.supports_empty_struct)
  5151. return "{ }";
  5152. else if (backend.use_typed_initializer_list)
  5153. return join(type_to_glsl(type), "{ 0 }");
  5154. else if (backend.use_initializer_list)
  5155. return "{ 0 }";
  5156. else
  5157. return join(type_to_glsl(type), "(0)");
  5158. }
  5159. else if (c.columns() == 1)
  5160. {
  5161. auto res = constant_expression_vector(c, 0);
  5162. if (inside_struct_scope &&
  5163. backend.boolean_in_struct_remapped_type != SPIRType::Boolean &&
  5164. type.basetype == SPIRType::Boolean)
  5165. {
  5166. SPIRType tmp_type = type;
  5167. tmp_type.basetype = backend.boolean_in_struct_remapped_type;
  5168. res = join(type_to_glsl(tmp_type), "(", res, ")");
  5169. }
  5170. return res;
  5171. }
  5172. else
  5173. {
  5174. string res = type_to_glsl(type) + "(";
  5175. for (uint32_t col = 0; col < c.columns(); col++)
  5176. {
  5177. if (c.specialization_constant_id(col) != 0)
  5178. res += to_name(c.specialization_constant_id(col));
  5179. else
  5180. res += constant_expression_vector(c, col);
  5181. if (col + 1 < c.columns())
  5182. res += ", ";
  5183. }
  5184. res += ")";
  5185. if (inside_struct_scope &&
  5186. backend.boolean_in_struct_remapped_type != SPIRType::Boolean &&
  5187. type.basetype == SPIRType::Boolean)
  5188. {
  5189. SPIRType tmp_type = type;
  5190. tmp_type.basetype = backend.boolean_in_struct_remapped_type;
  5191. res = join(type_to_glsl(tmp_type), "(", res, ")");
  5192. }
  5193. return res;
  5194. }
  5195. }
  5196. #ifdef _MSC_VER
  5197. // snprintf does not exist or is buggy on older MSVC versions, some of them
  5198. // being used by MinGW. Use sprintf instead and disable corresponding warning.
  5199. #pragma warning(push)
  5200. #pragma warning(disable : 4996)
  5201. #endif
  5202. string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
  5203. {
  5204. string res;
  5205. float float_value = c.scalar_f16(col, row);
  5206. // There is no literal "hf" in GL_NV_gpu_shader5, so to avoid lots
  5207. // of complicated workarounds, just value-cast to the half type always.
  5208. if (std::isnan(float_value) || std::isinf(float_value))
  5209. {
  5210. SPIRType type;
  5211. type.basetype = SPIRType::Half;
  5212. type.vecsize = 1;
  5213. type.columns = 1;
  5214. if (float_value == numeric_limits<float>::infinity())
  5215. res = join(type_to_glsl(type), "(1.0 / 0.0)");
  5216. else if (float_value == -numeric_limits<float>::infinity())
  5217. res = join(type_to_glsl(type), "(-1.0 / 0.0)");
  5218. else if (std::isnan(float_value))
  5219. res = join(type_to_glsl(type), "(0.0 / 0.0)");
  5220. else
  5221. SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
  5222. }
  5223. else
  5224. {
  5225. SPIRType type;
  5226. type.basetype = SPIRType::Half;
  5227. type.vecsize = 1;
  5228. type.columns = 1;
  5229. res = join(type_to_glsl(type), "(", convert_to_string(float_value, current_locale_radix_character), ")");
  5230. }
  5231. return res;
  5232. }
  5233. string CompilerGLSL::convert_float_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
  5234. {
  5235. string res;
  5236. float float_value = c.scalar_f32(col, row);
  5237. if (std::isnan(float_value) || std::isinf(float_value))
  5238. {
  5239. // Use special representation.
  5240. if (!is_legacy())
  5241. {
  5242. SPIRType out_type;
  5243. SPIRType in_type;
  5244. out_type.basetype = SPIRType::Float;
  5245. in_type.basetype = SPIRType::UInt;
  5246. out_type.vecsize = 1;
  5247. in_type.vecsize = 1;
  5248. out_type.width = 32;
  5249. in_type.width = 32;
  5250. char print_buffer[32];
  5251. #ifdef _WIN32
  5252. sprintf(print_buffer, "0x%xu", c.scalar(col, row));
  5253. #else
  5254. snprintf(print_buffer, sizeof(print_buffer), "0x%xu", c.scalar(col, row));
  5255. #endif
  5256. const char *comment = "inf";
  5257. if (float_value == -numeric_limits<float>::infinity())
  5258. comment = "-inf";
  5259. else if (std::isnan(float_value))
  5260. comment = "nan";
  5261. res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, " /* ", comment, " */)");
  5262. }
  5263. else
  5264. {
  5265. if (float_value == numeric_limits<float>::infinity())
  5266. {
  5267. if (backend.float_literal_suffix)
  5268. res = "(1.0f / 0.0f)";
  5269. else
  5270. res = "(1.0 / 0.0)";
  5271. }
  5272. else if (float_value == -numeric_limits<float>::infinity())
  5273. {
  5274. if (backend.float_literal_suffix)
  5275. res = "(-1.0f / 0.0f)";
  5276. else
  5277. res = "(-1.0 / 0.0)";
  5278. }
  5279. else if (std::isnan(float_value))
  5280. {
  5281. if (backend.float_literal_suffix)
  5282. res = "(0.0f / 0.0f)";
  5283. else
  5284. res = "(0.0 / 0.0)";
  5285. }
  5286. else
  5287. SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
  5288. }
  5289. }
  5290. else
  5291. {
  5292. res = convert_to_string(float_value, current_locale_radix_character);
  5293. if (backend.float_literal_suffix)
  5294. res += "f";
  5295. }
  5296. return res;
  5297. }
  5298. std::string CompilerGLSL::convert_double_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
  5299. {
  5300. string res;
  5301. double double_value = c.scalar_f64(col, row);
  5302. if (std::isnan(double_value) || std::isinf(double_value))
  5303. {
  5304. // Use special representation.
  5305. if (!is_legacy())
  5306. {
  5307. SPIRType out_type;
  5308. SPIRType in_type;
  5309. out_type.basetype = SPIRType::Double;
  5310. in_type.basetype = SPIRType::UInt64;
  5311. out_type.vecsize = 1;
  5312. in_type.vecsize = 1;
  5313. out_type.width = 64;
  5314. in_type.width = 64;
  5315. uint64_t u64_value = c.scalar_u64(col, row);
  5316. if (options.es && options.version < 310) // GL_NV_gpu_shader5 fallback requires 310.
  5317. SPIRV_CROSS_THROW("64-bit integers not supported in ES profile before version 310.");
  5318. require_extension_internal("GL_ARB_gpu_shader_int64");
  5319. char print_buffer[64];
  5320. #ifdef _WIN32
  5321. sprintf(print_buffer, "0x%llx%s", static_cast<unsigned long long>(u64_value),
  5322. backend.long_long_literal_suffix ? "ull" : "ul");
  5323. #else
  5324. snprintf(print_buffer, sizeof(print_buffer), "0x%llx%s", static_cast<unsigned long long>(u64_value),
  5325. backend.long_long_literal_suffix ? "ull" : "ul");
  5326. #endif
  5327. const char *comment = "inf";
  5328. if (double_value == -numeric_limits<double>::infinity())
  5329. comment = "-inf";
  5330. else if (std::isnan(double_value))
  5331. comment = "nan";
  5332. res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, " /* ", comment, " */)");
  5333. }
  5334. else
  5335. {
  5336. if (options.es)
  5337. SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
  5338. if (options.version < 400)
  5339. require_extension_internal("GL_ARB_gpu_shader_fp64");
  5340. if (double_value == numeric_limits<double>::infinity())
  5341. {
  5342. if (backend.double_literal_suffix)
  5343. res = "(1.0lf / 0.0lf)";
  5344. else
  5345. res = "(1.0 / 0.0)";
  5346. }
  5347. else if (double_value == -numeric_limits<double>::infinity())
  5348. {
  5349. if (backend.double_literal_suffix)
  5350. res = "(-1.0lf / 0.0lf)";
  5351. else
  5352. res = "(-1.0 / 0.0)";
  5353. }
  5354. else if (std::isnan(double_value))
  5355. {
  5356. if (backend.double_literal_suffix)
  5357. res = "(0.0lf / 0.0lf)";
  5358. else
  5359. res = "(0.0 / 0.0)";
  5360. }
  5361. else
  5362. SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
  5363. }
  5364. }
  5365. else
  5366. {
  5367. res = convert_to_string(double_value, current_locale_radix_character);
  5368. if (backend.double_literal_suffix)
  5369. res += "lf";
  5370. }
  5371. return res;
  5372. }
  5373. #ifdef _MSC_VER
  5374. #pragma warning(pop)
  5375. #endif
  5376. string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t vector)
  5377. {
  5378. auto type = get<SPIRType>(c.constant_type);
  5379. type.columns = 1;
  5380. auto scalar_type = type;
  5381. scalar_type.vecsize = 1;
  5382. string res;
  5383. bool splat = backend.use_constructor_splatting && c.vector_size() > 1;
  5384. bool swizzle_splat = backend.can_swizzle_scalar && c.vector_size() > 1;
  5385. if (!type_is_floating_point(type))
  5386. {
  5387. // Cannot swizzle literal integers as a special case.
  5388. swizzle_splat = false;
  5389. }
  5390. if (splat || swizzle_splat)
  5391. {
  5392. // Cannot use constant splatting if we have specialization constants somewhere in the vector.
  5393. for (uint32_t i = 0; i < c.vector_size(); i++)
  5394. {
  5395. if (c.specialization_constant_id(vector, i) != 0)
  5396. {
  5397. splat = false;
  5398. swizzle_splat = false;
  5399. break;
  5400. }
  5401. }
  5402. }
  5403. if (splat || swizzle_splat)
  5404. {
  5405. if (type.width == 64)
  5406. {
  5407. uint64_t ident = c.scalar_u64(vector, 0);
  5408. for (uint32_t i = 1; i < c.vector_size(); i++)
  5409. {
  5410. if (ident != c.scalar_u64(vector, i))
  5411. {
  5412. splat = false;
  5413. swizzle_splat = false;
  5414. break;
  5415. }
  5416. }
  5417. }
  5418. else
  5419. {
  5420. uint32_t ident = c.scalar(vector, 0);
  5421. for (uint32_t i = 1; i < c.vector_size(); i++)
  5422. {
  5423. if (ident != c.scalar(vector, i))
  5424. {
  5425. splat = false;
  5426. swizzle_splat = false;
  5427. }
  5428. }
  5429. }
  5430. }
  5431. if (c.vector_size() > 1 && !swizzle_splat)
  5432. res += type_to_glsl(type) + "(";
  5433. switch (type.basetype)
  5434. {
  5435. case SPIRType::Half:
  5436. if (splat || swizzle_splat)
  5437. {
  5438. res += convert_half_to_string(c, vector, 0);
  5439. if (swizzle_splat)
  5440. res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
  5441. }
  5442. else
  5443. {
  5444. for (uint32_t i = 0; i < c.vector_size(); i++)
  5445. {
  5446. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  5447. res += to_expression(c.specialization_constant_id(vector, i));
  5448. else
  5449. res += convert_half_to_string(c, vector, i);
  5450. if (i + 1 < c.vector_size())
  5451. res += ", ";
  5452. }
  5453. }
  5454. break;
  5455. case SPIRType::Float:
  5456. if (splat || swizzle_splat)
  5457. {
  5458. res += convert_float_to_string(c, vector, 0);
  5459. if (swizzle_splat)
  5460. res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
  5461. }
  5462. else
  5463. {
  5464. for (uint32_t i = 0; i < c.vector_size(); i++)
  5465. {
  5466. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  5467. res += to_expression(c.specialization_constant_id(vector, i));
  5468. else
  5469. res += convert_float_to_string(c, vector, i);
  5470. if (i + 1 < c.vector_size())
  5471. res += ", ";
  5472. }
  5473. }
  5474. break;
  5475. case SPIRType::Double:
  5476. if (splat || swizzle_splat)
  5477. {
  5478. res += convert_double_to_string(c, vector, 0);
  5479. if (swizzle_splat)
  5480. res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
  5481. }
  5482. else
  5483. {
  5484. for (uint32_t i = 0; i < c.vector_size(); i++)
  5485. {
  5486. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  5487. res += to_expression(c.specialization_constant_id(vector, i));
  5488. else
  5489. res += convert_double_to_string(c, vector, i);
  5490. if (i + 1 < c.vector_size())
  5491. res += ", ";
  5492. }
  5493. }
  5494. break;
  5495. case SPIRType::Int64:
  5496. {
  5497. auto tmp = type;
  5498. tmp.vecsize = 1;
  5499. tmp.columns = 1;
  5500. auto int64_type = type_to_glsl(tmp);
  5501. if (splat)
  5502. {
  5503. res += convert_to_string(c.scalar_i64(vector, 0), int64_type, backend.long_long_literal_suffix);
  5504. }
  5505. else
  5506. {
  5507. for (uint32_t i = 0; i < c.vector_size(); i++)
  5508. {
  5509. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  5510. res += to_expression(c.specialization_constant_id(vector, i));
  5511. else
  5512. res += convert_to_string(c.scalar_i64(vector, i), int64_type, backend.long_long_literal_suffix);
  5513. if (i + 1 < c.vector_size())
  5514. res += ", ";
  5515. }
  5516. }
  5517. break;
  5518. }
  5519. case SPIRType::UInt64:
  5520. if (splat)
  5521. {
  5522. res += convert_to_string(c.scalar_u64(vector, 0));
  5523. if (backend.long_long_literal_suffix)
  5524. res += "ull";
  5525. else
  5526. res += "ul";
  5527. }
  5528. else
  5529. {
  5530. for (uint32_t i = 0; i < c.vector_size(); i++)
  5531. {
  5532. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  5533. res += to_expression(c.specialization_constant_id(vector, i));
  5534. else
  5535. {
  5536. res += convert_to_string(c.scalar_u64(vector, i));
  5537. if (backend.long_long_literal_suffix)
  5538. res += "ull";
  5539. else
  5540. res += "ul";
  5541. }
  5542. if (i + 1 < c.vector_size())
  5543. res += ", ";
  5544. }
  5545. }
  5546. break;
  5547. case SPIRType::UInt:
  5548. if (splat)
  5549. {
  5550. res += convert_to_string(c.scalar(vector, 0));
  5551. if (is_legacy())
  5552. {
  5553. // Fake unsigned constant literals with signed ones if possible.
  5554. // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
  5555. if (c.scalar_i32(vector, 0) < 0)
  5556. SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made the literal negative.");
  5557. }
  5558. else if (backend.uint32_t_literal_suffix)
  5559. res += "u";
  5560. }
  5561. else
  5562. {
  5563. for (uint32_t i = 0; i < c.vector_size(); i++)
  5564. {
  5565. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  5566. res += to_expression(c.specialization_constant_id(vector, i));
  5567. else
  5568. {
  5569. res += convert_to_string(c.scalar(vector, i));
  5570. if (is_legacy())
  5571. {
  5572. // Fake unsigned constant literals with signed ones if possible.
  5573. // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
  5574. if (c.scalar_i32(vector, i) < 0)
  5575. SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made "
  5576. "the literal negative.");
  5577. }
  5578. else if (backend.uint32_t_literal_suffix)
  5579. res += "u";
  5580. }
  5581. if (i + 1 < c.vector_size())
  5582. res += ", ";
  5583. }
  5584. }
  5585. break;
  5586. case SPIRType::Int:
  5587. if (splat)
  5588. res += convert_to_string(c.scalar_i32(vector, 0));
  5589. else
  5590. {
  5591. for (uint32_t i = 0; i < c.vector_size(); i++)
  5592. {
  5593. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  5594. res += to_expression(c.specialization_constant_id(vector, i));
  5595. else
  5596. res += convert_to_string(c.scalar_i32(vector, i));
  5597. if (i + 1 < c.vector_size())
  5598. res += ", ";
  5599. }
  5600. }
  5601. break;
  5602. case SPIRType::UShort:
  5603. if (splat)
  5604. {
  5605. res += convert_to_string(c.scalar(vector, 0));
  5606. }
  5607. else
  5608. {
  5609. for (uint32_t i = 0; i < c.vector_size(); i++)
  5610. {
  5611. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  5612. res += to_expression(c.specialization_constant_id(vector, i));
  5613. else
  5614. {
  5615. if (*backend.uint16_t_literal_suffix)
  5616. {
  5617. res += convert_to_string(c.scalar_u16(vector, i));
  5618. res += backend.uint16_t_literal_suffix;
  5619. }
  5620. else
  5621. {
  5622. // If backend doesn't have a literal suffix, we need to value cast.
  5623. res += type_to_glsl(scalar_type);
  5624. res += "(";
  5625. res += convert_to_string(c.scalar_u16(vector, i));
  5626. res += ")";
  5627. }
  5628. }
  5629. if (i + 1 < c.vector_size())
  5630. res += ", ";
  5631. }
  5632. }
  5633. break;
  5634. case SPIRType::Short:
  5635. if (splat)
  5636. {
  5637. res += convert_to_string(c.scalar_i16(vector, 0));
  5638. }
  5639. else
  5640. {
  5641. for (uint32_t i = 0; i < c.vector_size(); i++)
  5642. {
  5643. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  5644. res += to_expression(c.specialization_constant_id(vector, i));
  5645. else
  5646. {
  5647. if (*backend.int16_t_literal_suffix)
  5648. {
  5649. res += convert_to_string(c.scalar_i16(vector, i));
  5650. res += backend.int16_t_literal_suffix;
  5651. }
  5652. else
  5653. {
  5654. // If backend doesn't have a literal suffix, we need to value cast.
  5655. res += type_to_glsl(scalar_type);
  5656. res += "(";
  5657. res += convert_to_string(c.scalar_i16(vector, i));
  5658. res += ")";
  5659. }
  5660. }
  5661. if (i + 1 < c.vector_size())
  5662. res += ", ";
  5663. }
  5664. }
  5665. break;
  5666. case SPIRType::UByte:
  5667. if (splat)
  5668. {
  5669. res += convert_to_string(c.scalar_u8(vector, 0));
  5670. }
  5671. else
  5672. {
  5673. for (uint32_t i = 0; i < c.vector_size(); i++)
  5674. {
  5675. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  5676. res += to_expression(c.specialization_constant_id(vector, i));
  5677. else
  5678. {
  5679. res += type_to_glsl(scalar_type);
  5680. res += "(";
  5681. res += convert_to_string(c.scalar_u8(vector, i));
  5682. res += ")";
  5683. }
  5684. if (i + 1 < c.vector_size())
  5685. res += ", ";
  5686. }
  5687. }
  5688. break;
  5689. case SPIRType::SByte:
  5690. if (splat)
  5691. {
  5692. res += convert_to_string(c.scalar_i8(vector, 0));
  5693. }
  5694. else
  5695. {
  5696. for (uint32_t i = 0; i < c.vector_size(); i++)
  5697. {
  5698. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  5699. res += to_expression(c.specialization_constant_id(vector, i));
  5700. else
  5701. {
  5702. res += type_to_glsl(scalar_type);
  5703. res += "(";
  5704. res += convert_to_string(c.scalar_i8(vector, i));
  5705. res += ")";
  5706. }
  5707. if (i + 1 < c.vector_size())
  5708. res += ", ";
  5709. }
  5710. }
  5711. break;
  5712. case SPIRType::Boolean:
  5713. if (splat)
  5714. res += c.scalar(vector, 0) ? "true" : "false";
  5715. else
  5716. {
  5717. for (uint32_t i = 0; i < c.vector_size(); i++)
  5718. {
  5719. if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
  5720. res += to_expression(c.specialization_constant_id(vector, i));
  5721. else
  5722. res += c.scalar(vector, i) ? "true" : "false";
  5723. if (i + 1 < c.vector_size())
  5724. res += ", ";
  5725. }
  5726. }
  5727. break;
  5728. default:
  5729. SPIRV_CROSS_THROW("Invalid constant expression basetype.");
  5730. }
  5731. if (c.vector_size() > 1 && !swizzle_splat)
  5732. res += ")";
  5733. return res;
  5734. }
  5735. SPIRExpression &CompilerGLSL::emit_uninitialized_temporary_expression(uint32_t type, uint32_t id)
  5736. {
  5737. forced_temporaries.insert(id);
  5738. emit_uninitialized_temporary(type, id);
  5739. return set<SPIRExpression>(id, to_name(id), type, true);
  5740. }
  5741. void CompilerGLSL::emit_uninitialized_temporary(uint32_t result_type, uint32_t result_id)
  5742. {
  5743. // If we're declaring temporaries inside continue blocks,
  5744. // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
  5745. if (!block_temporary_hoisting && current_continue_block && !hoisted_temporaries.count(result_id))
  5746. {
  5747. auto &header = get<SPIRBlock>(current_continue_block->loop_dominator);
  5748. if (find_if(begin(header.declare_temporary), end(header.declare_temporary),
  5749. [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
  5750. return tmp.first == result_type && tmp.second == result_id;
  5751. }) == end(header.declare_temporary))
  5752. {
  5753. header.declare_temporary.emplace_back(result_type, result_id);
  5754. hoisted_temporaries.insert(result_id);
  5755. force_recompile();
  5756. }
  5757. }
  5758. else if (hoisted_temporaries.count(result_id) == 0)
  5759. {
  5760. auto &type = get<SPIRType>(result_type);
  5761. auto &flags = get_decoration_bitset(result_id);
  5762. // The result_id has not been made into an expression yet, so use flags interface.
  5763. add_local_variable_name(result_id);
  5764. string initializer;
  5765. if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
  5766. initializer = join(" = ", to_zero_initialized_expression(result_type));
  5767. statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), initializer, ";");
  5768. }
  5769. }
  5770. string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id)
  5771. {
  5772. auto &type = get<SPIRType>(result_type);
  5773. // If we're declaring temporaries inside continue blocks,
  5774. // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
  5775. if (!block_temporary_hoisting && current_continue_block && !hoisted_temporaries.count(result_id))
  5776. {
  5777. auto &header = get<SPIRBlock>(current_continue_block->loop_dominator);
  5778. if (find_if(begin(header.declare_temporary), end(header.declare_temporary),
  5779. [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
  5780. return tmp.first == result_type && tmp.second == result_id;
  5781. }) == end(header.declare_temporary))
  5782. {
  5783. header.declare_temporary.emplace_back(result_type, result_id);
  5784. hoisted_temporaries.insert(result_id);
  5785. force_recompile_guarantee_forward_progress();
  5786. }
  5787. return join(to_name(result_id), " = ");
  5788. }
  5789. else if (hoisted_temporaries.count(result_id))
  5790. {
  5791. // The temporary has already been declared earlier, so just "declare" the temporary by writing to it.
  5792. return join(to_name(result_id), " = ");
  5793. }
  5794. else
  5795. {
  5796. // The result_id has not been made into an expression yet, so use flags interface.
  5797. add_local_variable_name(result_id);
  5798. auto &flags = get_decoration_bitset(result_id);
  5799. return join(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), " = ");
  5800. }
  5801. }
  5802. bool CompilerGLSL::expression_is_forwarded(uint32_t id) const
  5803. {
  5804. return forwarded_temporaries.count(id) != 0;
  5805. }
  5806. bool CompilerGLSL::expression_suppresses_usage_tracking(uint32_t id) const
  5807. {
  5808. return suppressed_usage_tracking.count(id) != 0;
  5809. }
  5810. bool CompilerGLSL::expression_read_implies_multiple_reads(uint32_t id) const
  5811. {
  5812. auto *expr = maybe_get<SPIRExpression>(id);
  5813. if (!expr)
  5814. return false;
  5815. // If we're emitting code at a deeper loop level than when we emitted the expression,
  5816. // we're probably reading the same expression over and over.
  5817. return current_loop_level > expr->emitted_loop_level;
  5818. }
  5819. SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding,
  5820. bool suppress_usage_tracking)
  5821. {
  5822. if (forwarding && (forced_temporaries.find(result_id) == end(forced_temporaries)))
  5823. {
  5824. // Just forward it without temporary.
  5825. // If the forward is trivial, we do not force flushing to temporary for this expression.
  5826. forwarded_temporaries.insert(result_id);
  5827. if (suppress_usage_tracking)
  5828. suppressed_usage_tracking.insert(result_id);
  5829. return set<SPIRExpression>(result_id, rhs, result_type, true);
  5830. }
  5831. else
  5832. {
  5833. // If expression isn't immutable, bind it to a temporary and make the new temporary immutable (they always are).
  5834. statement(declare_temporary(result_type, result_id), rhs, ";");
  5835. return set<SPIRExpression>(result_id, to_name(result_id), result_type, true);
  5836. }
  5837. }
  5838. void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
  5839. {
  5840. bool forward = should_forward(op0);
  5841. emit_op(result_type, result_id, join(op, to_enclosed_unpacked_expression(op0)), forward);
  5842. inherit_expression_dependencies(result_id, op0);
  5843. }
  5844. void CompilerGLSL::emit_unary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
  5845. {
  5846. auto &type = get<SPIRType>(result_type);
  5847. bool forward = should_forward(op0);
  5848. emit_op(result_type, result_id, join(type_to_glsl(type), "(", op, to_enclosed_unpacked_expression(op0), ")"), forward);
  5849. inherit_expression_dependencies(result_id, op0);
  5850. }
  5851. void CompilerGLSL::emit_mesh_tasks(SPIRBlock &block)
  5852. {
  5853. statement("EmitMeshTasksEXT(",
  5854. to_unpacked_expression(block.mesh.groups[0]), ", ",
  5855. to_unpacked_expression(block.mesh.groups[1]), ", ",
  5856. to_unpacked_expression(block.mesh.groups[2]), ");");
  5857. }
  5858. void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op)
  5859. {
  5860. // Various FP arithmetic opcodes such as add, sub, mul will hit this.
  5861. bool force_temporary_precise = backend.support_precise_qualifier &&
  5862. has_decoration(result_id, DecorationNoContraction) &&
  5863. type_is_floating_point(get<SPIRType>(result_type));
  5864. bool forward = should_forward(op0) && should_forward(op1) && !force_temporary_precise;
  5865. emit_op(result_type, result_id,
  5866. join(to_enclosed_unpacked_expression(op0), " ", op, " ", to_enclosed_unpacked_expression(op1)), forward);
  5867. inherit_expression_dependencies(result_id, op0);
  5868. inherit_expression_dependencies(result_id, op1);
  5869. }
  5870. void CompilerGLSL::emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op)
  5871. {
  5872. auto &type = get<SPIRType>(result_type);
  5873. auto expr = type_to_glsl_constructor(type);
  5874. expr += '(';
  5875. for (uint32_t i = 0; i < type.vecsize; i++)
  5876. {
  5877. // Make sure to call to_expression multiple times to ensure
  5878. // that these expressions are properly flushed to temporaries if needed.
  5879. expr += op;
  5880. expr += to_extract_component_expression(operand, i);
  5881. if (i + 1 < type.vecsize)
  5882. expr += ", ";
  5883. }
  5884. expr += ')';
  5885. emit_op(result_type, result_id, expr, should_forward(operand));
  5886. inherit_expression_dependencies(result_id, operand);
  5887. }
  5888. void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  5889. const char *op, bool negate, SPIRType::BaseType expected_type)
  5890. {
  5891. auto &type0 = expression_type(op0);
  5892. auto &type1 = expression_type(op1);
  5893. SPIRType target_type0 = type0;
  5894. SPIRType target_type1 = type1;
  5895. target_type0.basetype = expected_type;
  5896. target_type1.basetype = expected_type;
  5897. target_type0.vecsize = 1;
  5898. target_type1.vecsize = 1;
  5899. auto &type = get<SPIRType>(result_type);
  5900. auto expr = type_to_glsl_constructor(type);
  5901. expr += '(';
  5902. for (uint32_t i = 0; i < type.vecsize; i++)
  5903. {
  5904. // Make sure to call to_expression multiple times to ensure
  5905. // that these expressions are properly flushed to temporaries if needed.
  5906. if (negate)
  5907. expr += "!(";
  5908. if (expected_type != SPIRType::Unknown && type0.basetype != expected_type)
  5909. expr += bitcast_expression(target_type0, type0.basetype, to_extract_component_expression(op0, i));
  5910. else
  5911. expr += to_extract_component_expression(op0, i);
  5912. expr += ' ';
  5913. expr += op;
  5914. expr += ' ';
  5915. if (expected_type != SPIRType::Unknown && type1.basetype != expected_type)
  5916. expr += bitcast_expression(target_type1, type1.basetype, to_extract_component_expression(op1, i));
  5917. else
  5918. expr += to_extract_component_expression(op1, i);
  5919. if (negate)
  5920. expr += ")";
  5921. if (i + 1 < type.vecsize)
  5922. expr += ", ";
  5923. }
  5924. expr += ')';
  5925. emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
  5926. inherit_expression_dependencies(result_id, op0);
  5927. inherit_expression_dependencies(result_id, op1);
  5928. }
  5929. SPIRType CompilerGLSL::binary_op_bitcast_helper(string &cast_op0, string &cast_op1, SPIRType::BaseType &input_type,
  5930. uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type)
  5931. {
  5932. auto &type0 = expression_type(op0);
  5933. auto &type1 = expression_type(op1);
  5934. // We have to bitcast if our inputs are of different type, or if our types are not equal to expected inputs.
  5935. // For some functions like OpIEqual and INotEqual, we don't care if inputs are of different types than expected
  5936. // since equality test is exactly the same.
  5937. bool cast = (type0.basetype != type1.basetype) || (!skip_cast_if_equal_type && type0.basetype != input_type);
  5938. // Create a fake type so we can bitcast to it.
  5939. // We only deal with regular arithmetic types here like int, uints and so on.
  5940. SPIRType expected_type;
  5941. expected_type.basetype = input_type;
  5942. expected_type.vecsize = type0.vecsize;
  5943. expected_type.columns = type0.columns;
  5944. expected_type.width = type0.width;
  5945. if (cast)
  5946. {
  5947. cast_op0 = bitcast_glsl(expected_type, op0);
  5948. cast_op1 = bitcast_glsl(expected_type, op1);
  5949. }
  5950. else
  5951. {
  5952. // If we don't cast, our actual input type is that of the first (or second) argument.
  5953. cast_op0 = to_enclosed_unpacked_expression(op0);
  5954. cast_op1 = to_enclosed_unpacked_expression(op1);
  5955. input_type = type0.basetype;
  5956. }
  5957. return expected_type;
  5958. }
  5959. bool CompilerGLSL::emit_complex_bitcast(uint32_t result_type, uint32_t id, uint32_t op0)
  5960. {
  5961. // Some bitcasts may require complex casting sequences, and are implemented here.
  5962. // Otherwise a simply unary function will do with bitcast_glsl_op.
  5963. auto &output_type = get<SPIRType>(result_type);
  5964. auto &input_type = expression_type(op0);
  5965. string expr;
  5966. if (output_type.basetype == SPIRType::Half && input_type.basetype == SPIRType::Float && input_type.vecsize == 1)
  5967. expr = join("unpackFloat2x16(floatBitsToUint(", to_unpacked_expression(op0), "))");
  5968. else if (output_type.basetype == SPIRType::Float && input_type.basetype == SPIRType::Half &&
  5969. input_type.vecsize == 2)
  5970. expr = join("uintBitsToFloat(packFloat2x16(", to_unpacked_expression(op0), "))");
  5971. else
  5972. return false;
  5973. emit_op(result_type, id, expr, should_forward(op0));
  5974. return true;
  5975. }
  5976. void CompilerGLSL::emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  5977. const char *op, SPIRType::BaseType input_type,
  5978. bool skip_cast_if_equal_type,
  5979. bool implicit_integer_promotion)
  5980. {
  5981. string cast_op0, cast_op1;
  5982. auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
  5983. auto &out_type = get<SPIRType>(result_type);
  5984. // We might have casted away from the result type, so bitcast again.
  5985. // For example, arithmetic right shift with uint inputs.
  5986. // Special case boolean outputs since relational opcodes output booleans instead of int/uint.
  5987. auto bitop = join(cast_op0, " ", op, " ", cast_op1);
  5988. string expr;
  5989. if (implicit_integer_promotion)
  5990. {
  5991. // Simple value cast.
  5992. expr = join(type_to_glsl(out_type), '(', bitop, ')');
  5993. }
  5994. else if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
  5995. {
  5996. expected_type.basetype = input_type;
  5997. expr = join(bitcast_glsl_op(out_type, expected_type), '(', bitop, ')');
  5998. }
  5999. else
  6000. {
  6001. expr = std::move(bitop);
  6002. }
  6003. emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
  6004. inherit_expression_dependencies(result_id, op0);
  6005. inherit_expression_dependencies(result_id, op1);
  6006. }
  6007. void CompilerGLSL::emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
  6008. {
  6009. bool forward = should_forward(op0);
  6010. emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ")"), forward);
  6011. inherit_expression_dependencies(result_id, op0);
  6012. }
  6013. void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  6014. const char *op)
  6015. {
  6016. // Opaque types (e.g. OpTypeSampledImage) must always be forwarded in GLSL
  6017. const auto &type = get_type(result_type);
  6018. bool must_forward = type_is_opaque_value(type);
  6019. bool forward = must_forward || (should_forward(op0) && should_forward(op1));
  6020. emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ")"),
  6021. forward);
  6022. inherit_expression_dependencies(result_id, op0);
  6023. inherit_expression_dependencies(result_id, op1);
  6024. }
  6025. void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  6026. const char *op)
  6027. {
  6028. auto &type = get<SPIRType>(result_type);
  6029. if (type_is_floating_point(type))
  6030. {
  6031. if (!options.vulkan_semantics)
  6032. SPIRV_CROSS_THROW("Floating point atomics requires Vulkan semantics.");
  6033. if (options.es)
  6034. SPIRV_CROSS_THROW("Floating point atomics requires desktop GLSL.");
  6035. require_extension_internal("GL_EXT_shader_atomic_float");
  6036. }
  6037. forced_temporaries.insert(result_id);
  6038. emit_op(result_type, result_id,
  6039. join(op, "(", to_non_uniform_aware_expression(op0), ", ",
  6040. to_unpacked_expression(op1), ")"), false);
  6041. flush_all_atomic_capable_variables();
  6042. }
  6043. void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id,
  6044. uint32_t op0, uint32_t op1, uint32_t op2,
  6045. const char *op)
  6046. {
  6047. forced_temporaries.insert(result_id);
  6048. emit_op(result_type, result_id,
  6049. join(op, "(", to_non_uniform_aware_expression(op0), ", ",
  6050. to_unpacked_expression(op1), ", ", to_unpacked_expression(op2), ")"), false);
  6051. flush_all_atomic_capable_variables();
  6052. }
  6053. void CompilerGLSL::emit_unary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op,
  6054. SPIRType::BaseType input_type, SPIRType::BaseType expected_result_type)
  6055. {
  6056. auto &out_type = get<SPIRType>(result_type);
  6057. auto &expr_type = expression_type(op0);
  6058. auto expected_type = out_type;
  6059. // Bit-widths might be different in unary cases because we use it for SConvert/UConvert and friends.
  6060. expected_type.basetype = input_type;
  6061. expected_type.width = expr_type.width;
  6062. string cast_op;
  6063. if (expr_type.basetype != input_type)
  6064. {
  6065. if (expr_type.basetype == SPIRType::Boolean)
  6066. cast_op = join(type_to_glsl(expected_type), "(", to_unpacked_expression(op0), ")");
  6067. else
  6068. cast_op = bitcast_glsl(expected_type, op0);
  6069. }
  6070. else
  6071. cast_op = to_unpacked_expression(op0);
  6072. string expr;
  6073. if (out_type.basetype != expected_result_type)
  6074. {
  6075. expected_type.basetype = expected_result_type;
  6076. expected_type.width = out_type.width;
  6077. if (out_type.basetype == SPIRType::Boolean)
  6078. expr = type_to_glsl(out_type);
  6079. else
  6080. expr = bitcast_glsl_op(out_type, expected_type);
  6081. expr += '(';
  6082. expr += join(op, "(", cast_op, ")");
  6083. expr += ')';
  6084. }
  6085. else
  6086. {
  6087. expr += join(op, "(", cast_op, ")");
  6088. }
  6089. emit_op(result_type, result_id, expr, should_forward(op0));
  6090. inherit_expression_dependencies(result_id, op0);
  6091. }
  6092. // Very special case. Handling bitfieldExtract requires us to deal with different bitcasts of different signs
  6093. // and different vector sizes all at once. Need a special purpose method here.
  6094. void CompilerGLSL::emit_trinary_func_op_bitextract(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  6095. uint32_t op2, const char *op,
  6096. SPIRType::BaseType expected_result_type,
  6097. SPIRType::BaseType input_type0, SPIRType::BaseType input_type1,
  6098. SPIRType::BaseType input_type2)
  6099. {
  6100. auto &out_type = get<SPIRType>(result_type);
  6101. auto expected_type = out_type;
  6102. expected_type.basetype = input_type0;
  6103. string cast_op0 =
  6104. expression_type(op0).basetype != input_type0 ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
  6105. auto op1_expr = to_unpacked_expression(op1);
  6106. auto op2_expr = to_unpacked_expression(op2);
  6107. // Use value casts here instead. Input must be exactly int or uint, but SPIR-V might be 16-bit.
  6108. expected_type.basetype = input_type1;
  6109. expected_type.vecsize = 1;
  6110. string cast_op1 = expression_type(op1).basetype != input_type1 ?
  6111. join(type_to_glsl_constructor(expected_type), "(", op1_expr, ")") :
  6112. op1_expr;
  6113. expected_type.basetype = input_type2;
  6114. expected_type.vecsize = 1;
  6115. string cast_op2 = expression_type(op2).basetype != input_type2 ?
  6116. join(type_to_glsl_constructor(expected_type), "(", op2_expr, ")") :
  6117. op2_expr;
  6118. string expr;
  6119. if (out_type.basetype != expected_result_type)
  6120. {
  6121. expected_type.vecsize = out_type.vecsize;
  6122. expected_type.basetype = expected_result_type;
  6123. expr = bitcast_glsl_op(out_type, expected_type);
  6124. expr += '(';
  6125. expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
  6126. expr += ')';
  6127. }
  6128. else
  6129. {
  6130. expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
  6131. }
  6132. emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2));
  6133. inherit_expression_dependencies(result_id, op0);
  6134. inherit_expression_dependencies(result_id, op1);
  6135. inherit_expression_dependencies(result_id, op2);
  6136. }
  6137. void CompilerGLSL::emit_trinary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  6138. uint32_t op2, const char *op, SPIRType::BaseType input_type)
  6139. {
  6140. auto &out_type = get<SPIRType>(result_type);
  6141. auto expected_type = out_type;
  6142. expected_type.basetype = input_type;
  6143. string cast_op0 =
  6144. expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
  6145. string cast_op1 =
  6146. expression_type(op1).basetype != input_type ? bitcast_glsl(expected_type, op1) : to_unpacked_expression(op1);
  6147. string cast_op2 =
  6148. expression_type(op2).basetype != input_type ? bitcast_glsl(expected_type, op2) : to_unpacked_expression(op2);
  6149. string expr;
  6150. if (out_type.basetype != input_type)
  6151. {
  6152. expr = bitcast_glsl_op(out_type, expected_type);
  6153. expr += '(';
  6154. expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
  6155. expr += ')';
  6156. }
  6157. else
  6158. {
  6159. expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
  6160. }
  6161. emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2));
  6162. inherit_expression_dependencies(result_id, op0);
  6163. inherit_expression_dependencies(result_id, op1);
  6164. inherit_expression_dependencies(result_id, op2);
  6165. }
  6166. void CompilerGLSL::emit_binary_func_op_cast_clustered(uint32_t result_type, uint32_t result_id, uint32_t op0,
  6167. uint32_t op1, const char *op, SPIRType::BaseType input_type)
  6168. {
  6169. // Special purpose method for implementing clustered subgroup opcodes.
  6170. // Main difference is that op1 does not participate in any casting, it needs to be a literal.
  6171. auto &out_type = get<SPIRType>(result_type);
  6172. auto expected_type = out_type;
  6173. expected_type.basetype = input_type;
  6174. string cast_op0 =
  6175. expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
  6176. string expr;
  6177. if (out_type.basetype != input_type)
  6178. {
  6179. expr = bitcast_glsl_op(out_type, expected_type);
  6180. expr += '(';
  6181. expr += join(op, "(", cast_op0, ", ", to_expression(op1), ")");
  6182. expr += ')';
  6183. }
  6184. else
  6185. {
  6186. expr += join(op, "(", cast_op0, ", ", to_expression(op1), ")");
  6187. }
  6188. emit_op(result_type, result_id, expr, should_forward(op0));
  6189. inherit_expression_dependencies(result_id, op0);
  6190. }
  6191. void CompilerGLSL::emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  6192. const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
  6193. {
  6194. string cast_op0, cast_op1;
  6195. auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
  6196. auto &out_type = get<SPIRType>(result_type);
  6197. // Special case boolean outputs since relational opcodes output booleans instead of int/uint.
  6198. string expr;
  6199. if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
  6200. {
  6201. expected_type.basetype = input_type;
  6202. expr = bitcast_glsl_op(out_type, expected_type);
  6203. expr += '(';
  6204. expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
  6205. expr += ')';
  6206. }
  6207. else
  6208. {
  6209. expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
  6210. }
  6211. emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
  6212. inherit_expression_dependencies(result_id, op0);
  6213. inherit_expression_dependencies(result_id, op1);
  6214. }
  6215. void CompilerGLSL::emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  6216. uint32_t op2, const char *op)
  6217. {
  6218. bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2);
  6219. emit_op(result_type, result_id,
  6220. join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ",
  6221. to_unpacked_expression(op2), ")"),
  6222. forward);
  6223. inherit_expression_dependencies(result_id, op0);
  6224. inherit_expression_dependencies(result_id, op1);
  6225. inherit_expression_dependencies(result_id, op2);
  6226. }
  6227. void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  6228. uint32_t op2, uint32_t op3, const char *op)
  6229. {
  6230. bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3);
  6231. emit_op(result_type, result_id,
  6232. join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ",
  6233. to_unpacked_expression(op2), ", ", to_unpacked_expression(op3), ")"),
  6234. forward);
  6235. inherit_expression_dependencies(result_id, op0);
  6236. inherit_expression_dependencies(result_id, op1);
  6237. inherit_expression_dependencies(result_id, op2);
  6238. inherit_expression_dependencies(result_id, op3);
  6239. }
  6240. void CompilerGLSL::emit_bitfield_insert_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
  6241. uint32_t op2, uint32_t op3, const char *op,
  6242. SPIRType::BaseType offset_count_type)
  6243. {
  6244. // Only need to cast offset/count arguments. Types of base/insert must be same as result type,
  6245. // and bitfieldInsert is sign invariant.
  6246. bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3);
  6247. auto op0_expr = to_unpacked_expression(op0);
  6248. auto op1_expr = to_unpacked_expression(op1);
  6249. auto op2_expr = to_unpacked_expression(op2);
  6250. auto op3_expr = to_unpacked_expression(op3);
  6251. SPIRType target_type;
  6252. target_type.vecsize = 1;
  6253. target_type.basetype = offset_count_type;
  6254. if (expression_type(op2).basetype != offset_count_type)
  6255. {
  6256. // Value-cast here. Input might be 16-bit. GLSL requires int.
  6257. op2_expr = join(type_to_glsl_constructor(target_type), "(", op2_expr, ")");
  6258. }
  6259. if (expression_type(op3).basetype != offset_count_type)
  6260. {
  6261. // Value-cast here. Input might be 16-bit. GLSL requires int.
  6262. op3_expr = join(type_to_glsl_constructor(target_type), "(", op3_expr, ")");
  6263. }
  6264. emit_op(result_type, result_id, join(op, "(", op0_expr, ", ", op1_expr, ", ", op2_expr, ", ", op3_expr, ")"),
  6265. forward);
  6266. inherit_expression_dependencies(result_id, op0);
  6267. inherit_expression_dependencies(result_id, op1);
  6268. inherit_expression_dependencies(result_id, op2);
  6269. inherit_expression_dependencies(result_id, op3);
  6270. }
  6271. string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t tex)
  6272. {
  6273. const char *type;
  6274. switch (imgtype.image.dim)
  6275. {
  6276. case spv::Dim1D:
  6277. // Force 2D path for ES.
  6278. if (options.es)
  6279. type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D";
  6280. else
  6281. type = (imgtype.image.arrayed && !options.es) ? "1DArray" : "1D";
  6282. break;
  6283. case spv::Dim2D:
  6284. type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D";
  6285. break;
  6286. case spv::Dim3D:
  6287. type = "3D";
  6288. break;
  6289. case spv::DimCube:
  6290. type = "Cube";
  6291. break;
  6292. case spv::DimRect:
  6293. type = "2DRect";
  6294. break;
  6295. case spv::DimBuffer:
  6296. type = "Buffer";
  6297. break;
  6298. case spv::DimSubpassData:
  6299. type = "2D";
  6300. break;
  6301. default:
  6302. type = "";
  6303. break;
  6304. }
  6305. // In legacy GLSL, an extension is required for textureLod in the fragment
  6306. // shader or textureGrad anywhere.
  6307. bool legacy_lod_ext = false;
  6308. auto &execution = get_entry_point();
  6309. if (op == "textureGrad" || op == "textureProjGrad" ||
  6310. ((op == "textureLod" || op == "textureProjLod") && execution.model != ExecutionModelVertex))
  6311. {
  6312. if (is_legacy_es())
  6313. {
  6314. legacy_lod_ext = true;
  6315. require_extension_internal("GL_EXT_shader_texture_lod");
  6316. }
  6317. else if (is_legacy_desktop())
  6318. require_extension_internal("GL_ARB_shader_texture_lod");
  6319. }
  6320. if (op == "textureLodOffset" || op == "textureProjLodOffset")
  6321. {
  6322. if (is_legacy_es())
  6323. SPIRV_CROSS_THROW(join(op, " not allowed in legacy ES"));
  6324. require_extension_internal("GL_EXT_gpu_shader4");
  6325. }
  6326. // GLES has very limited support for shadow samplers.
  6327. // Basically shadow2D and shadow2DProj work through EXT_shadow_samplers,
  6328. // everything else can just throw
  6329. bool is_comparison = is_depth_image(imgtype, tex);
  6330. if (is_comparison && is_legacy_es())
  6331. {
  6332. if (op == "texture" || op == "textureProj")
  6333. require_extension_internal("GL_EXT_shadow_samplers");
  6334. else
  6335. SPIRV_CROSS_THROW(join(op, " not allowed on depth samplers in legacy ES"));
  6336. if (imgtype.image.dim == spv::DimCube)
  6337. return "shadowCubeNV";
  6338. }
  6339. if (op == "textureSize")
  6340. {
  6341. if (is_legacy_es())
  6342. SPIRV_CROSS_THROW("textureSize not supported in legacy ES");
  6343. if (is_comparison)
  6344. SPIRV_CROSS_THROW("textureSize not supported on shadow sampler in legacy GLSL");
  6345. require_extension_internal("GL_EXT_gpu_shader4");
  6346. }
  6347. if (op == "texelFetch" && is_legacy_es())
  6348. SPIRV_CROSS_THROW("texelFetch not supported in legacy ES");
  6349. bool is_es_and_depth = is_legacy_es() && is_comparison;
  6350. std::string type_prefix = is_comparison ? "shadow" : "texture";
  6351. if (op == "texture")
  6352. return is_es_and_depth ? join(type_prefix, type, "EXT") : join(type_prefix, type);
  6353. else if (op == "textureLod")
  6354. return join(type_prefix, type, legacy_lod_ext ? "LodEXT" : "Lod");
  6355. else if (op == "textureProj")
  6356. return join(type_prefix, type, is_es_and_depth ? "ProjEXT" : "Proj");
  6357. else if (op == "textureGrad")
  6358. return join(type_prefix, type, is_legacy_es() ? "GradEXT" : is_legacy_desktop() ? "GradARB" : "Grad");
  6359. else if (op == "textureProjLod")
  6360. return join(type_prefix, type, legacy_lod_ext ? "ProjLodEXT" : "ProjLod");
  6361. else if (op == "textureLodOffset")
  6362. return join(type_prefix, type, "LodOffset");
  6363. else if (op == "textureProjGrad")
  6364. return join(type_prefix, type,
  6365. is_legacy_es() ? "ProjGradEXT" : is_legacy_desktop() ? "ProjGradARB" : "ProjGrad");
  6366. else if (op == "textureProjLodOffset")
  6367. return join(type_prefix, type, "ProjLodOffset");
  6368. else if (op == "textureSize")
  6369. return join("textureSize", type);
  6370. else if (op == "texelFetch")
  6371. return join("texelFetch", type);
  6372. else
  6373. {
  6374. SPIRV_CROSS_THROW(join("Unsupported legacy texture op: ", op));
  6375. }
  6376. }
  6377. bool CompilerGLSL::to_trivial_mix_op(const SPIRType &type, string &op, uint32_t left, uint32_t right, uint32_t lerp)
  6378. {
  6379. auto *cleft = maybe_get<SPIRConstant>(left);
  6380. auto *cright = maybe_get<SPIRConstant>(right);
  6381. auto &lerptype = expression_type(lerp);
  6382. // If our targets aren't constants, we cannot use construction.
  6383. if (!cleft || !cright)
  6384. return false;
  6385. // If our targets are spec constants, we cannot use construction.
  6386. if (cleft->specialization || cright->specialization)
  6387. return false;
  6388. auto &value_type = get<SPIRType>(cleft->constant_type);
  6389. if (lerptype.basetype != SPIRType::Boolean)
  6390. return false;
  6391. if (value_type.basetype == SPIRType::Struct || is_array(value_type))
  6392. return false;
  6393. if (!backend.use_constructor_splatting && value_type.vecsize != lerptype.vecsize)
  6394. return false;
  6395. // Only valid way in SPIR-V 1.4 to use matrices in select is a scalar select.
  6396. // matrix(scalar) constructor fills in diagnonals, so gets messy very quickly.
  6397. // Just avoid this case.
  6398. if (value_type.columns > 1)
  6399. return false;
  6400. // If our bool selects between 0 and 1, we can cast from bool instead, making our trivial constructor.
  6401. bool ret = true;
  6402. for (uint32_t row = 0; ret && row < value_type.vecsize; row++)
  6403. {
  6404. switch (type.basetype)
  6405. {
  6406. case SPIRType::Short:
  6407. case SPIRType::UShort:
  6408. ret = cleft->scalar_u16(0, row) == 0 && cright->scalar_u16(0, row) == 1;
  6409. break;
  6410. case SPIRType::Int:
  6411. case SPIRType::UInt:
  6412. ret = cleft->scalar(0, row) == 0 && cright->scalar(0, row) == 1;
  6413. break;
  6414. case SPIRType::Half:
  6415. ret = cleft->scalar_f16(0, row) == 0.0f && cright->scalar_f16(0, row) == 1.0f;
  6416. break;
  6417. case SPIRType::Float:
  6418. ret = cleft->scalar_f32(0, row) == 0.0f && cright->scalar_f32(0, row) == 1.0f;
  6419. break;
  6420. case SPIRType::Double:
  6421. ret = cleft->scalar_f64(0, row) == 0.0 && cright->scalar_f64(0, row) == 1.0;
  6422. break;
  6423. case SPIRType::Int64:
  6424. case SPIRType::UInt64:
  6425. ret = cleft->scalar_u64(0, row) == 0 && cright->scalar_u64(0, row) == 1;
  6426. break;
  6427. default:
  6428. ret = false;
  6429. break;
  6430. }
  6431. }
  6432. if (ret)
  6433. op = type_to_glsl_constructor(type);
  6434. return ret;
  6435. }
  6436. string CompilerGLSL::to_ternary_expression(const SPIRType &restype, uint32_t select, uint32_t true_value,
  6437. uint32_t false_value)
  6438. {
  6439. string expr;
  6440. auto &lerptype = expression_type(select);
  6441. if (lerptype.vecsize == 1)
  6442. expr = join(to_enclosed_expression(select), " ? ", to_enclosed_pointer_expression(true_value), " : ",
  6443. to_enclosed_pointer_expression(false_value));
  6444. else
  6445. {
  6446. auto swiz = [this](uint32_t expression, uint32_t i) { return to_extract_component_expression(expression, i); };
  6447. expr = type_to_glsl_constructor(restype);
  6448. expr += "(";
  6449. for (uint32_t i = 0; i < restype.vecsize; i++)
  6450. {
  6451. expr += swiz(select, i);
  6452. expr += " ? ";
  6453. expr += swiz(true_value, i);
  6454. expr += " : ";
  6455. expr += swiz(false_value, i);
  6456. if (i + 1 < restype.vecsize)
  6457. expr += ", ";
  6458. }
  6459. expr += ")";
  6460. }
  6461. return expr;
  6462. }
  6463. void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp)
  6464. {
  6465. auto &lerptype = expression_type(lerp);
  6466. auto &restype = get<SPIRType>(result_type);
  6467. // If this results in a variable pointer, assume it may be written through.
  6468. if (restype.pointer)
  6469. {
  6470. register_write(left);
  6471. register_write(right);
  6472. }
  6473. string mix_op;
  6474. bool has_boolean_mix = *backend.boolean_mix_function &&
  6475. ((options.es && options.version >= 310) || (!options.es && options.version >= 450));
  6476. bool trivial_mix = to_trivial_mix_op(restype, mix_op, left, right, lerp);
  6477. // Cannot use boolean mix when the lerp argument is just one boolean,
  6478. // fall back to regular trinary statements.
  6479. if (lerptype.vecsize == 1)
  6480. has_boolean_mix = false;
  6481. // If we can reduce the mix to a simple cast, do so.
  6482. // This helps for cases like int(bool), uint(bool) which is implemented with
  6483. // OpSelect bool 1 0.
  6484. if (trivial_mix)
  6485. {
  6486. emit_unary_func_op(result_type, id, lerp, mix_op.c_str());
  6487. }
  6488. else if (!has_boolean_mix && lerptype.basetype == SPIRType::Boolean)
  6489. {
  6490. // Boolean mix not supported on desktop without extension.
  6491. // Was added in OpenGL 4.5 with ES 3.1 compat.
  6492. //
  6493. // Could use GL_EXT_shader_integer_mix on desktop at least,
  6494. // but Apple doesn't support it. :(
  6495. // Just implement it as ternary expressions.
  6496. auto expr = to_ternary_expression(get<SPIRType>(result_type), lerp, right, left);
  6497. emit_op(result_type, id, expr, should_forward(left) && should_forward(right) && should_forward(lerp));
  6498. inherit_expression_dependencies(id, left);
  6499. inherit_expression_dependencies(id, right);
  6500. inherit_expression_dependencies(id, lerp);
  6501. }
  6502. else if (lerptype.basetype == SPIRType::Boolean)
  6503. emit_trinary_func_op(result_type, id, left, right, lerp, backend.boolean_mix_function);
  6504. else
  6505. emit_trinary_func_op(result_type, id, left, right, lerp, "mix");
  6506. }
  6507. string CompilerGLSL::to_combined_image_sampler(VariableID image_id, VariableID samp_id)
  6508. {
  6509. // Keep track of the array indices we have used to load the image.
  6510. // We'll need to use the same array index into the combined image sampler array.
  6511. auto image_expr = to_non_uniform_aware_expression(image_id);
  6512. string array_expr;
  6513. auto array_index = image_expr.find_first_of('[');
  6514. if (array_index != string::npos)
  6515. array_expr = image_expr.substr(array_index, string::npos);
  6516. auto &args = current_function->arguments;
  6517. // For GLSL and ESSL targets, we must enumerate all possible combinations for sampler2D(texture2D, sampler) and redirect
  6518. // all possible combinations into new sampler2D uniforms.
  6519. auto *image = maybe_get_backing_variable(image_id);
  6520. auto *samp = maybe_get_backing_variable(samp_id);
  6521. if (image)
  6522. image_id = image->self;
  6523. if (samp)
  6524. samp_id = samp->self;
  6525. auto image_itr = find_if(begin(args), end(args),
  6526. [image_id](const SPIRFunction::Parameter &param) { return image_id == param.id; });
  6527. auto sampler_itr = find_if(begin(args), end(args),
  6528. [samp_id](const SPIRFunction::Parameter &param) { return samp_id == param.id; });
  6529. if (image_itr != end(args) || sampler_itr != end(args))
  6530. {
  6531. // If any parameter originates from a parameter, we will find it in our argument list.
  6532. bool global_image = image_itr == end(args);
  6533. bool global_sampler = sampler_itr == end(args);
  6534. VariableID iid = global_image ? image_id : VariableID(uint32_t(image_itr - begin(args)));
  6535. VariableID sid = global_sampler ? samp_id : VariableID(uint32_t(sampler_itr - begin(args)));
  6536. auto &combined = current_function->combined_parameters;
  6537. auto itr = find_if(begin(combined), end(combined), [=](const SPIRFunction::CombinedImageSamplerParameter &p) {
  6538. return p.global_image == global_image && p.global_sampler == global_sampler && p.image_id == iid &&
  6539. p.sampler_id == sid;
  6540. });
  6541. if (itr != end(combined))
  6542. return to_expression(itr->id) + array_expr;
  6543. else
  6544. {
  6545. SPIRV_CROSS_THROW("Cannot find mapping for combined sampler parameter, was "
  6546. "build_combined_image_samplers() used "
  6547. "before compile() was called?");
  6548. }
  6549. }
  6550. else
  6551. {
  6552. // For global sampler2D, look directly at the global remapping table.
  6553. auto &mapping = combined_image_samplers;
  6554. auto itr = find_if(begin(mapping), end(mapping), [image_id, samp_id](const CombinedImageSampler &combined) {
  6555. return combined.image_id == image_id && combined.sampler_id == samp_id;
  6556. });
  6557. if (itr != end(combined_image_samplers))
  6558. return to_expression(itr->combined_id) + array_expr;
  6559. else
  6560. {
  6561. SPIRV_CROSS_THROW("Cannot find mapping for combined sampler, was build_combined_image_samplers() used "
  6562. "before compile() was called?");
  6563. }
  6564. }
  6565. }
  6566. bool CompilerGLSL::is_supported_subgroup_op_in_opengl(spv::Op op, const uint32_t *ops)
  6567. {
  6568. switch (op)
  6569. {
  6570. case OpGroupNonUniformElect:
  6571. case OpGroupNonUniformBallot:
  6572. case OpGroupNonUniformBallotFindLSB:
  6573. case OpGroupNonUniformBallotFindMSB:
  6574. case OpGroupNonUniformBroadcast:
  6575. case OpGroupNonUniformBroadcastFirst:
  6576. case OpGroupNonUniformAll:
  6577. case OpGroupNonUniformAny:
  6578. case OpGroupNonUniformAllEqual:
  6579. case OpControlBarrier:
  6580. case OpMemoryBarrier:
  6581. case OpGroupNonUniformBallotBitCount:
  6582. case OpGroupNonUniformBallotBitExtract:
  6583. case OpGroupNonUniformInverseBallot:
  6584. return true;
  6585. case OpGroupNonUniformIAdd:
  6586. case OpGroupNonUniformFAdd:
  6587. case OpGroupNonUniformIMul:
  6588. case OpGroupNonUniformFMul:
  6589. {
  6590. const GroupOperation operation = static_cast<GroupOperation>(ops[3]);
  6591. if (operation == GroupOperationReduce || operation == GroupOperationInclusiveScan ||
  6592. operation == GroupOperationExclusiveScan)
  6593. {
  6594. return true;
  6595. }
  6596. else
  6597. {
  6598. return false;
  6599. }
  6600. }
  6601. default:
  6602. return false;
  6603. }
  6604. }
  6605. void CompilerGLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id)
  6606. {
  6607. if (options.vulkan_semantics && combined_image_samplers.empty())
  6608. {
  6609. emit_binary_func_op(result_type, result_id, image_id, samp_id,
  6610. type_to_glsl(get<SPIRType>(result_type), result_id).c_str());
  6611. }
  6612. else
  6613. {
  6614. // Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types.
  6615. emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true, true);
  6616. }
  6617. // Make sure to suppress usage tracking and any expression invalidation.
  6618. // It is illegal to create temporaries of opaque types.
  6619. forwarded_temporaries.erase(result_id);
  6620. }
  6621. static inline bool image_opcode_is_sample_no_dref(Op op)
  6622. {
  6623. switch (op)
  6624. {
  6625. case OpImageSampleExplicitLod:
  6626. case OpImageSampleImplicitLod:
  6627. case OpImageSampleProjExplicitLod:
  6628. case OpImageSampleProjImplicitLod:
  6629. case OpImageFetch:
  6630. case OpImageRead:
  6631. case OpImageSparseSampleExplicitLod:
  6632. case OpImageSparseSampleImplicitLod:
  6633. case OpImageSparseSampleProjExplicitLod:
  6634. case OpImageSparseSampleProjImplicitLod:
  6635. case OpImageSparseFetch:
  6636. case OpImageSparseRead:
  6637. return true;
  6638. default:
  6639. return false;
  6640. }
  6641. }
  6642. void CompilerGLSL::emit_sparse_feedback_temporaries(uint32_t result_type_id, uint32_t id, uint32_t &feedback_id,
  6643. uint32_t &texel_id)
  6644. {
  6645. // Need to allocate two temporaries.
  6646. if (options.es)
  6647. SPIRV_CROSS_THROW("Sparse texture feedback is not supported on ESSL.");
  6648. require_extension_internal("GL_ARB_sparse_texture2");
  6649. auto &temps = extra_sub_expressions[id];
  6650. if (temps == 0)
  6651. temps = ir.increase_bound_by(2);
  6652. feedback_id = temps + 0;
  6653. texel_id = temps + 1;
  6654. auto &return_type = get<SPIRType>(result_type_id);
  6655. if (return_type.basetype != SPIRType::Struct || return_type.member_types.size() != 2)
  6656. SPIRV_CROSS_THROW("Invalid return type for sparse feedback.");
  6657. emit_uninitialized_temporary(return_type.member_types[0], feedback_id);
  6658. emit_uninitialized_temporary(return_type.member_types[1], texel_id);
  6659. }
  6660. uint32_t CompilerGLSL::get_sparse_feedback_texel_id(uint32_t id) const
  6661. {
  6662. auto itr = extra_sub_expressions.find(id);
  6663. if (itr == extra_sub_expressions.end())
  6664. return 0;
  6665. else
  6666. return itr->second + 1;
  6667. }
  6668. void CompilerGLSL::emit_texture_op(const Instruction &i, bool sparse)
  6669. {
  6670. auto *ops = stream(i);
  6671. auto op = static_cast<Op>(i.op);
  6672. SmallVector<uint32_t> inherited_expressions;
  6673. uint32_t result_type_id = ops[0];
  6674. uint32_t id = ops[1];
  6675. auto &return_type = get<SPIRType>(result_type_id);
  6676. uint32_t sparse_code_id = 0;
  6677. uint32_t sparse_texel_id = 0;
  6678. if (sparse)
  6679. emit_sparse_feedback_temporaries(result_type_id, id, sparse_code_id, sparse_texel_id);
  6680. bool forward = false;
  6681. string expr = to_texture_op(i, sparse, &forward, inherited_expressions);
  6682. if (sparse)
  6683. {
  6684. statement(to_expression(sparse_code_id), " = ", expr, ";");
  6685. expr = join(type_to_glsl(return_type), "(", to_expression(sparse_code_id), ", ", to_expression(sparse_texel_id),
  6686. ")");
  6687. forward = true;
  6688. inherited_expressions.clear();
  6689. }
  6690. emit_op(result_type_id, id, expr, forward);
  6691. for (auto &inherit : inherited_expressions)
  6692. inherit_expression_dependencies(id, inherit);
  6693. // Do not register sparse ops as control dependent as they are always lowered to a temporary.
  6694. switch (op)
  6695. {
  6696. case OpImageSampleDrefImplicitLod:
  6697. case OpImageSampleImplicitLod:
  6698. case OpImageSampleProjImplicitLod:
  6699. case OpImageSampleProjDrefImplicitLod:
  6700. register_control_dependent_expression(id);
  6701. break;
  6702. default:
  6703. break;
  6704. }
  6705. }
  6706. std::string CompilerGLSL::to_texture_op(const Instruction &i, bool sparse, bool *forward,
  6707. SmallVector<uint32_t> &inherited_expressions)
  6708. {
  6709. auto *ops = stream(i);
  6710. auto op = static_cast<Op>(i.op);
  6711. uint32_t length = i.length;
  6712. uint32_t result_type_id = ops[0];
  6713. VariableID img = ops[2];
  6714. uint32_t coord = ops[3];
  6715. uint32_t dref = 0;
  6716. uint32_t comp = 0;
  6717. bool gather = false;
  6718. bool proj = false;
  6719. bool fetch = false;
  6720. bool nonuniform_expression = false;
  6721. const uint32_t *opt = nullptr;
  6722. auto &result_type = get<SPIRType>(result_type_id);
  6723. inherited_expressions.push_back(coord);
  6724. if (has_decoration(img, DecorationNonUniform) && !maybe_get_backing_variable(img))
  6725. nonuniform_expression = true;
  6726. switch (op)
  6727. {
  6728. case OpImageSampleDrefImplicitLod:
  6729. case OpImageSampleDrefExplicitLod:
  6730. case OpImageSparseSampleDrefImplicitLod:
  6731. case OpImageSparseSampleDrefExplicitLod:
  6732. dref = ops[4];
  6733. opt = &ops[5];
  6734. length -= 5;
  6735. break;
  6736. case OpImageSampleProjDrefImplicitLod:
  6737. case OpImageSampleProjDrefExplicitLod:
  6738. case OpImageSparseSampleProjDrefImplicitLod:
  6739. case OpImageSparseSampleProjDrefExplicitLod:
  6740. dref = ops[4];
  6741. opt = &ops[5];
  6742. length -= 5;
  6743. proj = true;
  6744. break;
  6745. case OpImageDrefGather:
  6746. case OpImageSparseDrefGather:
  6747. dref = ops[4];
  6748. opt = &ops[5];
  6749. length -= 5;
  6750. gather = true;
  6751. if (options.es && options.version < 310)
  6752. SPIRV_CROSS_THROW("textureGather requires ESSL 310.");
  6753. else if (!options.es && options.version < 400)
  6754. SPIRV_CROSS_THROW("textureGather with depth compare requires GLSL 400.");
  6755. break;
  6756. case OpImageGather:
  6757. case OpImageSparseGather:
  6758. comp = ops[4];
  6759. opt = &ops[5];
  6760. length -= 5;
  6761. gather = true;
  6762. if (options.es && options.version < 310)
  6763. SPIRV_CROSS_THROW("textureGather requires ESSL 310.");
  6764. else if (!options.es && options.version < 400)
  6765. {
  6766. if (!expression_is_constant_null(comp))
  6767. SPIRV_CROSS_THROW("textureGather with component requires GLSL 400.");
  6768. require_extension_internal("GL_ARB_texture_gather");
  6769. }
  6770. break;
  6771. case OpImageFetch:
  6772. case OpImageSparseFetch:
  6773. case OpImageRead: // Reads == fetches in Metal (other langs will not get here)
  6774. opt = &ops[4];
  6775. length -= 4;
  6776. fetch = true;
  6777. break;
  6778. case OpImageSampleProjImplicitLod:
  6779. case OpImageSampleProjExplicitLod:
  6780. case OpImageSparseSampleProjImplicitLod:
  6781. case OpImageSparseSampleProjExplicitLod:
  6782. opt = &ops[4];
  6783. length -= 4;
  6784. proj = true;
  6785. break;
  6786. default:
  6787. opt = &ops[4];
  6788. length -= 4;
  6789. break;
  6790. }
  6791. // Bypass pointers because we need the real image struct
  6792. auto &type = expression_type(img);
  6793. auto &imgtype = get<SPIRType>(type.self);
  6794. uint32_t coord_components = 0;
  6795. switch (imgtype.image.dim)
  6796. {
  6797. case spv::Dim1D:
  6798. coord_components = 1;
  6799. break;
  6800. case spv::Dim2D:
  6801. coord_components = 2;
  6802. break;
  6803. case spv::Dim3D:
  6804. coord_components = 3;
  6805. break;
  6806. case spv::DimCube:
  6807. coord_components = 3;
  6808. break;
  6809. case spv::DimBuffer:
  6810. coord_components = 1;
  6811. break;
  6812. default:
  6813. coord_components = 2;
  6814. break;
  6815. }
  6816. if (dref)
  6817. inherited_expressions.push_back(dref);
  6818. if (proj)
  6819. coord_components++;
  6820. if (imgtype.image.arrayed)
  6821. coord_components++;
  6822. uint32_t bias = 0;
  6823. uint32_t lod = 0;
  6824. uint32_t grad_x = 0;
  6825. uint32_t grad_y = 0;
  6826. uint32_t coffset = 0;
  6827. uint32_t offset = 0;
  6828. uint32_t coffsets = 0;
  6829. uint32_t sample = 0;
  6830. uint32_t minlod = 0;
  6831. uint32_t flags = 0;
  6832. if (length)
  6833. {
  6834. flags = *opt++;
  6835. length--;
  6836. }
  6837. auto test = [&](uint32_t &v, uint32_t flag) {
  6838. if (length && (flags & flag))
  6839. {
  6840. v = *opt++;
  6841. inherited_expressions.push_back(v);
  6842. length--;
  6843. }
  6844. };
  6845. test(bias, ImageOperandsBiasMask);
  6846. test(lod, ImageOperandsLodMask);
  6847. test(grad_x, ImageOperandsGradMask);
  6848. test(grad_y, ImageOperandsGradMask);
  6849. test(coffset, ImageOperandsConstOffsetMask);
  6850. test(offset, ImageOperandsOffsetMask);
  6851. test(coffsets, ImageOperandsConstOffsetsMask);
  6852. test(sample, ImageOperandsSampleMask);
  6853. test(minlod, ImageOperandsMinLodMask);
  6854. TextureFunctionBaseArguments base_args = {};
  6855. base_args.img = img;
  6856. base_args.imgtype = &imgtype;
  6857. base_args.is_fetch = fetch != 0;
  6858. base_args.is_gather = gather != 0;
  6859. base_args.is_proj = proj != 0;
  6860. string expr;
  6861. TextureFunctionNameArguments name_args = {};
  6862. name_args.base = base_args;
  6863. name_args.has_array_offsets = coffsets != 0;
  6864. name_args.has_offset = coffset != 0 || offset != 0;
  6865. name_args.has_grad = grad_x != 0 || grad_y != 0;
  6866. name_args.has_dref = dref != 0;
  6867. name_args.is_sparse_feedback = sparse;
  6868. name_args.has_min_lod = minlod != 0;
  6869. name_args.lod = lod;
  6870. expr += to_function_name(name_args);
  6871. expr += "(";
  6872. uint32_t sparse_texel_id = 0;
  6873. if (sparse)
  6874. sparse_texel_id = get_sparse_feedback_texel_id(ops[1]);
  6875. TextureFunctionArguments args = {};
  6876. args.base = base_args;
  6877. args.coord = coord;
  6878. args.coord_components = coord_components;
  6879. args.dref = dref;
  6880. args.grad_x = grad_x;
  6881. args.grad_y = grad_y;
  6882. args.lod = lod;
  6883. if (coffsets)
  6884. args.offset = coffsets;
  6885. else if (coffset)
  6886. args.offset = coffset;
  6887. else
  6888. args.offset = offset;
  6889. args.bias = bias;
  6890. args.component = comp;
  6891. args.sample = sample;
  6892. args.sparse_texel = sparse_texel_id;
  6893. args.min_lod = minlod;
  6894. args.nonuniform_expression = nonuniform_expression;
  6895. expr += to_function_args(args, forward);
  6896. expr += ")";
  6897. // texture(samplerXShadow) returns float. shadowX() returns vec4, but only in desktop GLSL. Swizzle here.
  6898. if (is_legacy() && !options.es && is_depth_image(imgtype, img))
  6899. expr += ".r";
  6900. // Sampling from a texture which was deduced to be a depth image, might actually return 1 component here.
  6901. // Remap back to 4 components as sampling opcodes expect.
  6902. if (backend.comparison_image_samples_scalar && image_opcode_is_sample_no_dref(op))
  6903. {
  6904. bool image_is_depth = false;
  6905. const auto *combined = maybe_get<SPIRCombinedImageSampler>(img);
  6906. VariableID image_id = combined ? combined->image : img;
  6907. if (combined && is_depth_image(imgtype, combined->image))
  6908. image_is_depth = true;
  6909. else if (is_depth_image(imgtype, img))
  6910. image_is_depth = true;
  6911. // We must also check the backing variable for the image.
  6912. // We might have loaded an OpImage, and used that handle for two different purposes.
  6913. // Once with comparison, once without.
  6914. auto *image_variable = maybe_get_backing_variable(image_id);
  6915. if (image_variable && is_depth_image(get<SPIRType>(image_variable->basetype), image_variable->self))
  6916. image_is_depth = true;
  6917. if (image_is_depth)
  6918. expr = remap_swizzle(result_type, 1, expr);
  6919. }
  6920. if (!sparse && !backend.support_small_type_sampling_result && result_type.width < 32)
  6921. {
  6922. // Just value cast (narrowing) to expected type since we cannot rely on narrowing to work automatically.
  6923. // Hopefully compiler picks this up and converts the texturing instruction to the appropriate precision.
  6924. expr = join(type_to_glsl_constructor(result_type), "(", expr, ")");
  6925. }
  6926. // Deals with reads from MSL. We might need to downconvert to fewer components.
  6927. if (op == OpImageRead)
  6928. expr = remap_swizzle(result_type, 4, expr);
  6929. return expr;
  6930. }
  6931. bool CompilerGLSL::expression_is_constant_null(uint32_t id) const
  6932. {
  6933. auto *c = maybe_get<SPIRConstant>(id);
  6934. if (!c)
  6935. return false;
  6936. return c->constant_is_null();
  6937. }
  6938. bool CompilerGLSL::expression_is_non_value_type_array(uint32_t ptr)
  6939. {
  6940. auto &type = expression_type(ptr);
  6941. if (!type_is_top_level_array(get_pointee_type(type)))
  6942. return false;
  6943. if (!backend.array_is_value_type)
  6944. return true;
  6945. auto *var = maybe_get_backing_variable(ptr);
  6946. if (!var)
  6947. return false;
  6948. auto &backed_type = get<SPIRType>(var->basetype);
  6949. return !backend.array_is_value_type_in_buffer_blocks && backed_type.basetype == SPIRType::Struct &&
  6950. has_member_decoration(backed_type.self, 0, DecorationOffset);
  6951. }
  6952. // Returns the function name for a texture sampling function for the specified image and sampling characteristics.
  6953. // For some subclasses, the function is a method on the specified image.
  6954. string CompilerGLSL::to_function_name(const TextureFunctionNameArguments &args)
  6955. {
  6956. if (args.has_min_lod)
  6957. {
  6958. if (options.es)
  6959. SPIRV_CROSS_THROW("Sparse residency is not supported in ESSL.");
  6960. require_extension_internal("GL_ARB_sparse_texture_clamp");
  6961. }
  6962. string fname;
  6963. auto &imgtype = *args.base.imgtype;
  6964. VariableID tex = args.base.img;
  6965. // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
  6966. // To emulate this, we will have to use textureGrad with a constant gradient of 0.
  6967. // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
  6968. // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
  6969. bool workaround_lod_array_shadow_as_grad = false;
  6970. if (((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) &&
  6971. is_depth_image(imgtype, tex) && args.lod && !args.base.is_fetch)
  6972. {
  6973. if (!expression_is_constant_null(args.lod))
  6974. {
  6975. SPIRV_CROSS_THROW("textureLod on sampler2DArrayShadow is not constant 0.0. This cannot be "
  6976. "expressed in GLSL.");
  6977. }
  6978. workaround_lod_array_shadow_as_grad = true;
  6979. }
  6980. if (args.is_sparse_feedback)
  6981. fname += "sparse";
  6982. if (args.base.is_fetch)
  6983. fname += args.is_sparse_feedback ? "TexelFetch" : "texelFetch";
  6984. else
  6985. {
  6986. fname += args.is_sparse_feedback ? "Texture" : "texture";
  6987. if (args.base.is_gather)
  6988. fname += "Gather";
  6989. if (args.has_array_offsets)
  6990. fname += "Offsets";
  6991. if (args.base.is_proj)
  6992. fname += "Proj";
  6993. if (args.has_grad || workaround_lod_array_shadow_as_grad)
  6994. fname += "Grad";
  6995. if (args.lod != 0 && !workaround_lod_array_shadow_as_grad)
  6996. fname += "Lod";
  6997. }
  6998. if (args.has_offset)
  6999. fname += "Offset";
  7000. if (args.has_min_lod)
  7001. fname += "Clamp";
  7002. if (args.is_sparse_feedback || args.has_min_lod)
  7003. fname += "ARB";
  7004. return (is_legacy() && !args.base.is_gather) ? legacy_tex_op(fname, imgtype, tex) : fname;
  7005. }
  7006. std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id)
  7007. {
  7008. auto *var = maybe_get_backing_variable(id);
  7009. // If we are fetching from a plain OpTypeImage, we must combine with a dummy sampler in GLSL.
  7010. // In Vulkan GLSL, we can make use of the newer GL_EXT_samplerless_texture_functions.
  7011. if (var)
  7012. {
  7013. auto &type = get<SPIRType>(var->basetype);
  7014. if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer)
  7015. {
  7016. if (options.vulkan_semantics)
  7017. {
  7018. if (dummy_sampler_id)
  7019. {
  7020. // Don't need to consider Shadow state since the dummy sampler is always non-shadow.
  7021. auto sampled_type = type;
  7022. sampled_type.basetype = SPIRType::SampledImage;
  7023. return join(type_to_glsl(sampled_type), "(", to_non_uniform_aware_expression(id), ", ",
  7024. to_expression(dummy_sampler_id), ")");
  7025. }
  7026. else
  7027. {
  7028. // Newer glslang supports this extension to deal with texture2D as argument to texture functions.
  7029. require_extension_internal("GL_EXT_samplerless_texture_functions");
  7030. }
  7031. }
  7032. else
  7033. {
  7034. if (!dummy_sampler_id)
  7035. SPIRV_CROSS_THROW("Cannot find dummy sampler ID. Was "
  7036. "build_dummy_sampler_for_combined_images() called?");
  7037. return to_combined_image_sampler(id, dummy_sampler_id);
  7038. }
  7039. }
  7040. }
  7041. return to_non_uniform_aware_expression(id);
  7042. }
  7043. // Returns the function args for a texture sampling function for the specified image and sampling characteristics.
  7044. string CompilerGLSL::to_function_args(const TextureFunctionArguments &args, bool *p_forward)
  7045. {
  7046. VariableID img = args.base.img;
  7047. auto &imgtype = *args.base.imgtype;
  7048. string farg_str;
  7049. if (args.base.is_fetch)
  7050. farg_str = convert_separate_image_to_expression(img);
  7051. else
  7052. farg_str = to_non_uniform_aware_expression(img);
  7053. if (args.nonuniform_expression && farg_str.find_first_of('[') != string::npos)
  7054. {
  7055. // Only emit nonuniformEXT() wrapper if the underlying expression is arrayed in some way.
  7056. farg_str = join(backend.nonuniform_qualifier, "(", farg_str, ")");
  7057. }
  7058. bool swizz_func = backend.swizzle_is_function;
  7059. auto swizzle = [swizz_func](uint32_t comps, uint32_t in_comps) -> const char * {
  7060. if (comps == in_comps)
  7061. return "";
  7062. switch (comps)
  7063. {
  7064. case 1:
  7065. return ".x";
  7066. case 2:
  7067. return swizz_func ? ".xy()" : ".xy";
  7068. case 3:
  7069. return swizz_func ? ".xyz()" : ".xyz";
  7070. default:
  7071. return "";
  7072. }
  7073. };
  7074. bool forward = should_forward(args.coord);
  7075. // The IR can give us more components than we need, so chop them off as needed.
  7076. auto swizzle_expr = swizzle(args.coord_components, expression_type(args.coord).vecsize);
  7077. // Only enclose the UV expression if needed.
  7078. auto coord_expr =
  7079. (*swizzle_expr == '\0') ? to_expression(args.coord) : (to_enclosed_expression(args.coord) + swizzle_expr);
  7080. // texelFetch only takes int, not uint.
  7081. auto &coord_type = expression_type(args.coord);
  7082. if (coord_type.basetype == SPIRType::UInt)
  7083. {
  7084. auto expected_type = coord_type;
  7085. expected_type.vecsize = args.coord_components;
  7086. expected_type.basetype = SPIRType::Int;
  7087. coord_expr = bitcast_expression(expected_type, coord_type.basetype, coord_expr);
  7088. }
  7089. // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
  7090. // To emulate this, we will have to use textureGrad with a constant gradient of 0.
  7091. // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
  7092. // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
  7093. bool workaround_lod_array_shadow_as_grad =
  7094. ((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) &&
  7095. is_depth_image(imgtype, img) && args.lod != 0 && !args.base.is_fetch;
  7096. if (args.dref)
  7097. {
  7098. forward = forward && should_forward(args.dref);
  7099. // SPIR-V splits dref and coordinate.
  7100. if (args.base.is_gather ||
  7101. args.coord_components == 4) // GLSL also splits the arguments in two. Same for textureGather.
  7102. {
  7103. farg_str += ", ";
  7104. farg_str += to_expression(args.coord);
  7105. farg_str += ", ";
  7106. farg_str += to_expression(args.dref);
  7107. }
  7108. else if (args.base.is_proj)
  7109. {
  7110. // Have to reshuffle so we get vec4(coord, dref, proj), special case.
  7111. // Other shading languages splits up the arguments for coord and compare value like SPIR-V.
  7112. // The coordinate type for textureProj shadow is always vec4 even for sampler1DShadow.
  7113. farg_str += ", vec4(";
  7114. if (imgtype.image.dim == Dim1D)
  7115. {
  7116. // Could reuse coord_expr, but we will mess up the temporary usage checking.
  7117. farg_str += to_enclosed_expression(args.coord) + ".x";
  7118. farg_str += ", ";
  7119. farg_str += "0.0, ";
  7120. farg_str += to_expression(args.dref);
  7121. farg_str += ", ";
  7122. farg_str += to_enclosed_expression(args.coord) + ".y)";
  7123. }
  7124. else if (imgtype.image.dim == Dim2D)
  7125. {
  7126. // Could reuse coord_expr, but we will mess up the temporary usage checking.
  7127. farg_str += to_enclosed_expression(args.coord) + (swizz_func ? ".xy()" : ".xy");
  7128. farg_str += ", ";
  7129. farg_str += to_expression(args.dref);
  7130. farg_str += ", ";
  7131. farg_str += to_enclosed_expression(args.coord) + ".z)";
  7132. }
  7133. else
  7134. SPIRV_CROSS_THROW("Invalid type for textureProj with shadow.");
  7135. }
  7136. else
  7137. {
  7138. // Create a composite which merges coord/dref into a single vector.
  7139. auto type = expression_type(args.coord);
  7140. type.vecsize = args.coord_components + 1;
  7141. if (imgtype.image.dim == Dim1D && options.es)
  7142. type.vecsize++;
  7143. farg_str += ", ";
  7144. farg_str += type_to_glsl_constructor(type);
  7145. farg_str += "(";
  7146. if (imgtype.image.dim == Dim1D && options.es)
  7147. {
  7148. if (imgtype.image.arrayed)
  7149. {
  7150. farg_str += enclose_expression(coord_expr) + ".x";
  7151. farg_str += ", 0.0, ";
  7152. farg_str += enclose_expression(coord_expr) + ".y";
  7153. }
  7154. else
  7155. {
  7156. farg_str += coord_expr;
  7157. farg_str += ", 0.0";
  7158. }
  7159. }
  7160. else
  7161. farg_str += coord_expr;
  7162. farg_str += ", ";
  7163. farg_str += to_expression(args.dref);
  7164. farg_str += ")";
  7165. }
  7166. }
  7167. else
  7168. {
  7169. if (imgtype.image.dim == Dim1D && options.es)
  7170. {
  7171. // Have to fake a second coordinate.
  7172. if (type_is_floating_point(coord_type))
  7173. {
  7174. // Cannot mix proj and array.
  7175. if (imgtype.image.arrayed || args.base.is_proj)
  7176. {
  7177. coord_expr = join("vec3(", enclose_expression(coord_expr), ".x, 0.0, ",
  7178. enclose_expression(coord_expr), ".y)");
  7179. }
  7180. else
  7181. coord_expr = join("vec2(", coord_expr, ", 0.0)");
  7182. }
  7183. else
  7184. {
  7185. if (imgtype.image.arrayed)
  7186. {
  7187. coord_expr = join("ivec3(", enclose_expression(coord_expr),
  7188. ".x, 0, ",
  7189. enclose_expression(coord_expr), ".y)");
  7190. }
  7191. else
  7192. coord_expr = join("ivec2(", coord_expr, ", 0)");
  7193. }
  7194. }
  7195. farg_str += ", ";
  7196. farg_str += coord_expr;
  7197. }
  7198. if (args.grad_x || args.grad_y)
  7199. {
  7200. forward = forward && should_forward(args.grad_x);
  7201. forward = forward && should_forward(args.grad_y);
  7202. farg_str += ", ";
  7203. farg_str += to_expression(args.grad_x);
  7204. farg_str += ", ";
  7205. farg_str += to_expression(args.grad_y);
  7206. }
  7207. if (args.lod)
  7208. {
  7209. if (workaround_lod_array_shadow_as_grad)
  7210. {
  7211. // Implement textureGrad() instead. LOD == 0.0 is implemented as gradient of 0.0.
  7212. // Implementing this as plain texture() is not safe on some implementations.
  7213. if (imgtype.image.dim == Dim2D)
  7214. farg_str += ", vec2(0.0), vec2(0.0)";
  7215. else if (imgtype.image.dim == DimCube)
  7216. farg_str += ", vec3(0.0), vec3(0.0)";
  7217. }
  7218. else
  7219. {
  7220. forward = forward && should_forward(args.lod);
  7221. farg_str += ", ";
  7222. // Lod expression for TexelFetch in GLSL must be int, and only int.
  7223. if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms)
  7224. farg_str += bitcast_expression(SPIRType::Int, args.lod);
  7225. else
  7226. farg_str += to_expression(args.lod);
  7227. }
  7228. }
  7229. else if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms)
  7230. {
  7231. // Lod argument is optional in OpImageFetch, but we require a LOD value, pick 0 as the default.
  7232. farg_str += ", 0";
  7233. }
  7234. if (args.offset)
  7235. {
  7236. forward = forward && should_forward(args.offset);
  7237. farg_str += ", ";
  7238. farg_str += bitcast_expression(SPIRType::Int, args.offset);
  7239. }
  7240. if (args.sample)
  7241. {
  7242. farg_str += ", ";
  7243. farg_str += bitcast_expression(SPIRType::Int, args.sample);
  7244. }
  7245. if (args.min_lod)
  7246. {
  7247. farg_str += ", ";
  7248. farg_str += to_expression(args.min_lod);
  7249. }
  7250. if (args.sparse_texel)
  7251. {
  7252. // Sparse texel output parameter comes after everything else, except it's before the optional, component/bias arguments.
  7253. farg_str += ", ";
  7254. farg_str += to_expression(args.sparse_texel);
  7255. }
  7256. if (args.bias)
  7257. {
  7258. forward = forward && should_forward(args.bias);
  7259. farg_str += ", ";
  7260. farg_str += to_expression(args.bias);
  7261. }
  7262. if (args.component && !expression_is_constant_null(args.component))
  7263. {
  7264. forward = forward && should_forward(args.component);
  7265. farg_str += ", ";
  7266. farg_str += bitcast_expression(SPIRType::Int, args.component);
  7267. }
  7268. *p_forward = forward;
  7269. return farg_str;
  7270. }
  7271. Op CompilerGLSL::get_remapped_spirv_op(Op op) const
  7272. {
  7273. if (options.relax_nan_checks)
  7274. {
  7275. switch (op)
  7276. {
  7277. case OpFUnordLessThan:
  7278. op = OpFOrdLessThan;
  7279. break;
  7280. case OpFUnordLessThanEqual:
  7281. op = OpFOrdLessThanEqual;
  7282. break;
  7283. case OpFUnordGreaterThan:
  7284. op = OpFOrdGreaterThan;
  7285. break;
  7286. case OpFUnordGreaterThanEqual:
  7287. op = OpFOrdGreaterThanEqual;
  7288. break;
  7289. case OpFUnordEqual:
  7290. op = OpFOrdEqual;
  7291. break;
  7292. case OpFOrdNotEqual:
  7293. op = OpFUnordNotEqual;
  7294. break;
  7295. default:
  7296. break;
  7297. }
  7298. }
  7299. return op;
  7300. }
  7301. GLSLstd450 CompilerGLSL::get_remapped_glsl_op(GLSLstd450 std450_op) const
  7302. {
  7303. // Relax to non-NaN aware opcodes.
  7304. if (options.relax_nan_checks)
  7305. {
  7306. switch (std450_op)
  7307. {
  7308. case GLSLstd450NClamp:
  7309. std450_op = GLSLstd450FClamp;
  7310. break;
  7311. case GLSLstd450NMin:
  7312. std450_op = GLSLstd450FMin;
  7313. break;
  7314. case GLSLstd450NMax:
  7315. std450_op = GLSLstd450FMax;
  7316. break;
  7317. default:
  7318. break;
  7319. }
  7320. }
  7321. return std450_op;
  7322. }
  7323. void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t length)
  7324. {
  7325. auto op = static_cast<GLSLstd450>(eop);
  7326. if (is_legacy() && is_unsigned_glsl_opcode(op))
  7327. SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy GLSL targets.");
  7328. // If we need to do implicit bitcasts, make sure we do it with the correct type.
  7329. uint32_t integer_width = get_integer_width_for_glsl_instruction(op, args, length);
  7330. auto int_type = to_signed_basetype(integer_width);
  7331. auto uint_type = to_unsigned_basetype(integer_width);
  7332. op = get_remapped_glsl_op(op);
  7333. switch (op)
  7334. {
  7335. // FP fiddling
  7336. case GLSLstd450Round:
  7337. if (!is_legacy())
  7338. emit_unary_func_op(result_type, id, args[0], "round");
  7339. else
  7340. {
  7341. auto op0 = to_enclosed_expression(args[0]);
  7342. auto &op0_type = expression_type(args[0]);
  7343. auto expr = join("floor(", op0, " + ", type_to_glsl_constructor(op0_type), "(0.5))");
  7344. bool forward = should_forward(args[0]);
  7345. emit_op(result_type, id, expr, forward);
  7346. inherit_expression_dependencies(id, args[0]);
  7347. }
  7348. break;
  7349. case GLSLstd450RoundEven:
  7350. if (!is_legacy())
  7351. emit_unary_func_op(result_type, id, args[0], "roundEven");
  7352. else if (!options.es)
  7353. {
  7354. // This extension provides round() with round-to-even semantics.
  7355. require_extension_internal("GL_EXT_gpu_shader4");
  7356. emit_unary_func_op(result_type, id, args[0], "round");
  7357. }
  7358. else
  7359. SPIRV_CROSS_THROW("roundEven supported only in ESSL 300.");
  7360. break;
  7361. case GLSLstd450Trunc:
  7362. if (!is_legacy())
  7363. emit_unary_func_op(result_type, id, args[0], "trunc");
  7364. else
  7365. {
  7366. // Implement by value-casting to int and back.
  7367. bool forward = should_forward(args[0]);
  7368. auto op0 = to_unpacked_expression(args[0]);
  7369. auto &op0_type = expression_type(args[0]);
  7370. auto via_type = op0_type;
  7371. via_type.basetype = SPIRType::Int;
  7372. auto expr = join(type_to_glsl(op0_type), "(", type_to_glsl(via_type), "(", op0, "))");
  7373. emit_op(result_type, id, expr, forward);
  7374. inherit_expression_dependencies(id, args[0]);
  7375. }
  7376. break;
  7377. case GLSLstd450SAbs:
  7378. emit_unary_func_op_cast(result_type, id, args[0], "abs", int_type, int_type);
  7379. break;
  7380. case GLSLstd450FAbs:
  7381. emit_unary_func_op(result_type, id, args[0], "abs");
  7382. break;
  7383. case GLSLstd450SSign:
  7384. emit_unary_func_op_cast(result_type, id, args[0], "sign", int_type, int_type);
  7385. break;
  7386. case GLSLstd450FSign:
  7387. emit_unary_func_op(result_type, id, args[0], "sign");
  7388. break;
  7389. case GLSLstd450Floor:
  7390. emit_unary_func_op(result_type, id, args[0], "floor");
  7391. break;
  7392. case GLSLstd450Ceil:
  7393. emit_unary_func_op(result_type, id, args[0], "ceil");
  7394. break;
  7395. case GLSLstd450Fract:
  7396. emit_unary_func_op(result_type, id, args[0], "fract");
  7397. break;
  7398. case GLSLstd450Radians:
  7399. emit_unary_func_op(result_type, id, args[0], "radians");
  7400. break;
  7401. case GLSLstd450Degrees:
  7402. emit_unary_func_op(result_type, id, args[0], "degrees");
  7403. break;
  7404. case GLSLstd450Fma:
  7405. if ((!options.es && options.version < 400) || (options.es && options.version < 320))
  7406. {
  7407. auto expr = join(to_enclosed_expression(args[0]), " * ", to_enclosed_expression(args[1]), " + ",
  7408. to_enclosed_expression(args[2]));
  7409. emit_op(result_type, id, expr,
  7410. should_forward(args[0]) && should_forward(args[1]) && should_forward(args[2]));
  7411. for (uint32_t i = 0; i < 3; i++)
  7412. inherit_expression_dependencies(id, args[i]);
  7413. }
  7414. else
  7415. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "fma");
  7416. break;
  7417. case GLSLstd450Modf:
  7418. register_call_out_argument(args[1]);
  7419. if (!is_legacy())
  7420. {
  7421. forced_temporaries.insert(id);
  7422. emit_binary_func_op(result_type, id, args[0], args[1], "modf");
  7423. }
  7424. else
  7425. {
  7426. //NB. legacy GLSL doesn't have trunc() either, so we do a value cast
  7427. auto &op1_type = expression_type(args[1]);
  7428. auto via_type = op1_type;
  7429. via_type.basetype = SPIRType::Int;
  7430. statement(to_expression(args[1]), " = ",
  7431. type_to_glsl(op1_type), "(", type_to_glsl(via_type),
  7432. "(", to_expression(args[0]), "));");
  7433. emit_binary_op(result_type, id, args[0], args[1], "-");
  7434. }
  7435. break;
  7436. case GLSLstd450ModfStruct:
  7437. {
  7438. auto &type = get<SPIRType>(result_type);
  7439. emit_uninitialized_temporary_expression(result_type, id);
  7440. if (!is_legacy())
  7441. {
  7442. statement(to_expression(id), ".", to_member_name(type, 0), " = ", "modf(", to_expression(args[0]), ", ",
  7443. to_expression(id), ".", to_member_name(type, 1), ");");
  7444. }
  7445. else
  7446. {
  7447. //NB. legacy GLSL doesn't have trunc() either, so we do a value cast
  7448. auto &op0_type = expression_type(args[0]);
  7449. auto via_type = op0_type;
  7450. via_type.basetype = SPIRType::Int;
  7451. statement(to_expression(id), ".", to_member_name(type, 1), " = ", type_to_glsl(op0_type),
  7452. "(", type_to_glsl(via_type), "(", to_expression(args[0]), "));");
  7453. statement(to_expression(id), ".", to_member_name(type, 0), " = ", to_enclosed_expression(args[0]), " - ",
  7454. to_expression(id), ".", to_member_name(type, 1), ";");
  7455. }
  7456. break;
  7457. }
  7458. // Minmax
  7459. case GLSLstd450UMin:
  7460. emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", uint_type, false);
  7461. break;
  7462. case GLSLstd450SMin:
  7463. emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", int_type, false);
  7464. break;
  7465. case GLSLstd450FMin:
  7466. emit_binary_func_op(result_type, id, args[0], args[1], "min");
  7467. break;
  7468. case GLSLstd450FMax:
  7469. emit_binary_func_op(result_type, id, args[0], args[1], "max");
  7470. break;
  7471. case GLSLstd450UMax:
  7472. emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", uint_type, false);
  7473. break;
  7474. case GLSLstd450SMax:
  7475. emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", int_type, false);
  7476. break;
  7477. case GLSLstd450FClamp:
  7478. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp");
  7479. break;
  7480. case GLSLstd450UClamp:
  7481. emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", uint_type);
  7482. break;
  7483. case GLSLstd450SClamp:
  7484. emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", int_type);
  7485. break;
  7486. // Trig
  7487. case GLSLstd450Sin:
  7488. emit_unary_func_op(result_type, id, args[0], "sin");
  7489. break;
  7490. case GLSLstd450Cos:
  7491. emit_unary_func_op(result_type, id, args[0], "cos");
  7492. break;
  7493. case GLSLstd450Tan:
  7494. emit_unary_func_op(result_type, id, args[0], "tan");
  7495. break;
  7496. case GLSLstd450Asin:
  7497. emit_unary_func_op(result_type, id, args[0], "asin");
  7498. break;
  7499. case GLSLstd450Acos:
  7500. emit_unary_func_op(result_type, id, args[0], "acos");
  7501. break;
  7502. case GLSLstd450Atan:
  7503. emit_unary_func_op(result_type, id, args[0], "atan");
  7504. break;
  7505. case GLSLstd450Sinh:
  7506. if (!is_legacy())
  7507. emit_unary_func_op(result_type, id, args[0], "sinh");
  7508. else
  7509. {
  7510. bool forward = should_forward(args[0]);
  7511. auto expr = join("(exp(", to_expression(args[0]), ") - exp(-", to_enclosed_expression(args[0]), ")) * 0.5");
  7512. emit_op(result_type, id, expr, forward);
  7513. inherit_expression_dependencies(id, args[0]);
  7514. }
  7515. break;
  7516. case GLSLstd450Cosh:
  7517. if (!is_legacy())
  7518. emit_unary_func_op(result_type, id, args[0], "cosh");
  7519. else
  7520. {
  7521. bool forward = should_forward(args[0]);
  7522. auto expr = join("(exp(", to_expression(args[0]), ") + exp(-", to_enclosed_expression(args[0]), ")) * 0.5");
  7523. emit_op(result_type, id, expr, forward);
  7524. inherit_expression_dependencies(id, args[0]);
  7525. }
  7526. break;
  7527. case GLSLstd450Tanh:
  7528. if (!is_legacy())
  7529. emit_unary_func_op(result_type, id, args[0], "tanh");
  7530. else
  7531. {
  7532. // Create temporaries to store the result of exp(arg) and exp(-arg).
  7533. uint32_t &ids = extra_sub_expressions[id];
  7534. if (!ids)
  7535. {
  7536. ids = ir.increase_bound_by(2);
  7537. // Inherit precision qualifier (legacy has no NoContraction).
  7538. if (has_decoration(id, DecorationRelaxedPrecision))
  7539. {
  7540. set_decoration(ids, DecorationRelaxedPrecision);
  7541. set_decoration(ids + 1, DecorationRelaxedPrecision);
  7542. }
  7543. }
  7544. uint32_t epos_id = ids;
  7545. uint32_t eneg_id = ids + 1;
  7546. emit_op(result_type, epos_id, join("exp(", to_expression(args[0]), ")"), false);
  7547. emit_op(result_type, eneg_id, join("exp(-", to_enclosed_expression(args[0]), ")"), false);
  7548. inherit_expression_dependencies(epos_id, args[0]);
  7549. inherit_expression_dependencies(eneg_id, args[0]);
  7550. auto expr = join("(", to_enclosed_expression(epos_id), " - ", to_enclosed_expression(eneg_id), ") / "
  7551. "(", to_enclosed_expression(epos_id), " + ", to_enclosed_expression(eneg_id), ")");
  7552. emit_op(result_type, id, expr, true);
  7553. inherit_expression_dependencies(id, epos_id);
  7554. inherit_expression_dependencies(id, eneg_id);
  7555. }
  7556. break;
  7557. case GLSLstd450Asinh:
  7558. if (!is_legacy())
  7559. emit_unary_func_op(result_type, id, args[0], "asinh");
  7560. else
  7561. emit_emulated_ahyper_op(result_type, id, args[0], GLSLstd450Asinh);
  7562. break;
  7563. case GLSLstd450Acosh:
  7564. if (!is_legacy())
  7565. emit_unary_func_op(result_type, id, args[0], "acosh");
  7566. else
  7567. emit_emulated_ahyper_op(result_type, id, args[0], GLSLstd450Acosh);
  7568. break;
  7569. case GLSLstd450Atanh:
  7570. if (!is_legacy())
  7571. emit_unary_func_op(result_type, id, args[0], "atanh");
  7572. else
  7573. emit_emulated_ahyper_op(result_type, id, args[0], GLSLstd450Atanh);
  7574. break;
  7575. case GLSLstd450Atan2:
  7576. emit_binary_func_op(result_type, id, args[0], args[1], "atan");
  7577. break;
  7578. // Exponentials
  7579. case GLSLstd450Pow:
  7580. emit_binary_func_op(result_type, id, args[0], args[1], "pow");
  7581. break;
  7582. case GLSLstd450Exp:
  7583. emit_unary_func_op(result_type, id, args[0], "exp");
  7584. break;
  7585. case GLSLstd450Log:
  7586. emit_unary_func_op(result_type, id, args[0], "log");
  7587. break;
  7588. case GLSLstd450Exp2:
  7589. emit_unary_func_op(result_type, id, args[0], "exp2");
  7590. break;
  7591. case GLSLstd450Log2:
  7592. emit_unary_func_op(result_type, id, args[0], "log2");
  7593. break;
  7594. case GLSLstd450Sqrt:
  7595. emit_unary_func_op(result_type, id, args[0], "sqrt");
  7596. break;
  7597. case GLSLstd450InverseSqrt:
  7598. emit_unary_func_op(result_type, id, args[0], "inversesqrt");
  7599. break;
  7600. // Matrix math
  7601. case GLSLstd450Determinant:
  7602. {
  7603. // No need to transpose - it doesn't affect the determinant
  7604. auto *e = maybe_get<SPIRExpression>(args[0]);
  7605. bool old_transpose = e && e->need_transpose;
  7606. if (old_transpose)
  7607. e->need_transpose = false;
  7608. if (options.version < 150) // also matches ES 100
  7609. {
  7610. auto &type = expression_type(args[0]);
  7611. assert(type.vecsize >= 2 && type.vecsize <= 4);
  7612. assert(type.vecsize == type.columns);
  7613. // ARB_gpu_shader_fp64 needs GLSL 150, other types are not valid
  7614. if (type.basetype != SPIRType::Float)
  7615. SPIRV_CROSS_THROW("Unsupported type for matrix determinant");
  7616. bool relaxed = has_decoration(id, DecorationRelaxedPrecision);
  7617. require_polyfill(static_cast<Polyfill>(PolyfillDeterminant2x2 << (type.vecsize - 2)),
  7618. relaxed);
  7619. emit_unary_func_op(result_type, id, args[0],
  7620. (options.es && relaxed) ? "spvDeterminantMP" : "spvDeterminant");
  7621. }
  7622. else
  7623. emit_unary_func_op(result_type, id, args[0], "determinant");
  7624. if (old_transpose)
  7625. e->need_transpose = true;
  7626. break;
  7627. }
  7628. case GLSLstd450MatrixInverse:
  7629. {
  7630. // The inverse of the transpose is the same as the transpose of
  7631. // the inverse, so we can just flip need_transpose of the result.
  7632. auto *a = maybe_get<SPIRExpression>(args[0]);
  7633. bool old_transpose = a && a->need_transpose;
  7634. if (old_transpose)
  7635. a->need_transpose = false;
  7636. const char *func = "inverse";
  7637. if (options.version < 140) // also matches ES 100
  7638. {
  7639. auto &type = get<SPIRType>(result_type);
  7640. assert(type.vecsize >= 2 && type.vecsize <= 4);
  7641. assert(type.vecsize == type.columns);
  7642. // ARB_gpu_shader_fp64 needs GLSL 150, other types are invalid
  7643. if (type.basetype != SPIRType::Float)
  7644. SPIRV_CROSS_THROW("Unsupported type for matrix inverse");
  7645. bool relaxed = has_decoration(id, DecorationRelaxedPrecision);
  7646. require_polyfill(static_cast<Polyfill>(PolyfillMatrixInverse2x2 << (type.vecsize - 2)),
  7647. relaxed);
  7648. func = (options.es && relaxed) ? "spvInverseMP" : "spvInverse";
  7649. }
  7650. bool forward = should_forward(args[0]);
  7651. auto &e = emit_op(result_type, id, join(func, "(", to_unpacked_expression(args[0]), ")"), forward);
  7652. inherit_expression_dependencies(id, args[0]);
  7653. if (old_transpose)
  7654. {
  7655. e.need_transpose = true;
  7656. a->need_transpose = true;
  7657. }
  7658. break;
  7659. }
  7660. // Lerping
  7661. case GLSLstd450FMix:
  7662. case GLSLstd450IMix:
  7663. {
  7664. emit_mix_op(result_type, id, args[0], args[1], args[2]);
  7665. break;
  7666. }
  7667. case GLSLstd450Step:
  7668. emit_binary_func_op(result_type, id, args[0], args[1], "step");
  7669. break;
  7670. case GLSLstd450SmoothStep:
  7671. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "smoothstep");
  7672. break;
  7673. // Packing
  7674. case GLSLstd450Frexp:
  7675. register_call_out_argument(args[1]);
  7676. forced_temporaries.insert(id);
  7677. emit_binary_func_op(result_type, id, args[0], args[1], "frexp");
  7678. break;
  7679. case GLSLstd450FrexpStruct:
  7680. {
  7681. auto &type = get<SPIRType>(result_type);
  7682. emit_uninitialized_temporary_expression(result_type, id);
  7683. statement(to_expression(id), ".", to_member_name(type, 0), " = ", "frexp(", to_expression(args[0]), ", ",
  7684. to_expression(id), ".", to_member_name(type, 1), ");");
  7685. break;
  7686. }
  7687. case GLSLstd450Ldexp:
  7688. {
  7689. bool forward = should_forward(args[0]) && should_forward(args[1]);
  7690. auto op0 = to_unpacked_expression(args[0]);
  7691. auto op1 = to_unpacked_expression(args[1]);
  7692. auto &op1_type = expression_type(args[1]);
  7693. if (op1_type.basetype != SPIRType::Int)
  7694. {
  7695. // Need a value cast here.
  7696. auto target_type = op1_type;
  7697. target_type.basetype = SPIRType::Int;
  7698. op1 = join(type_to_glsl_constructor(target_type), "(", op1, ")");
  7699. }
  7700. auto expr = join("ldexp(", op0, ", ", op1, ")");
  7701. emit_op(result_type, id, expr, forward);
  7702. inherit_expression_dependencies(id, args[0]);
  7703. inherit_expression_dependencies(id, args[1]);
  7704. break;
  7705. }
  7706. case GLSLstd450PackSnorm4x8:
  7707. emit_unary_func_op(result_type, id, args[0], "packSnorm4x8");
  7708. break;
  7709. case GLSLstd450PackUnorm4x8:
  7710. emit_unary_func_op(result_type, id, args[0], "packUnorm4x8");
  7711. break;
  7712. case GLSLstd450PackSnorm2x16:
  7713. emit_unary_func_op(result_type, id, args[0], "packSnorm2x16");
  7714. break;
  7715. case GLSLstd450PackUnorm2x16:
  7716. emit_unary_func_op(result_type, id, args[0], "packUnorm2x16");
  7717. break;
  7718. case GLSLstd450PackHalf2x16:
  7719. emit_unary_func_op(result_type, id, args[0], "packHalf2x16");
  7720. break;
  7721. case GLSLstd450UnpackSnorm4x8:
  7722. emit_unary_func_op(result_type, id, args[0], "unpackSnorm4x8");
  7723. break;
  7724. case GLSLstd450UnpackUnorm4x8:
  7725. emit_unary_func_op(result_type, id, args[0], "unpackUnorm4x8");
  7726. break;
  7727. case GLSLstd450UnpackSnorm2x16:
  7728. emit_unary_func_op(result_type, id, args[0], "unpackSnorm2x16");
  7729. break;
  7730. case GLSLstd450UnpackUnorm2x16:
  7731. emit_unary_func_op(result_type, id, args[0], "unpackUnorm2x16");
  7732. break;
  7733. case GLSLstd450UnpackHalf2x16:
  7734. emit_unary_func_op(result_type, id, args[0], "unpackHalf2x16");
  7735. break;
  7736. case GLSLstd450PackDouble2x32:
  7737. emit_unary_func_op(result_type, id, args[0], "packDouble2x32");
  7738. break;
  7739. case GLSLstd450UnpackDouble2x32:
  7740. emit_unary_func_op(result_type, id, args[0], "unpackDouble2x32");
  7741. break;
  7742. // Vector math
  7743. case GLSLstd450Length:
  7744. emit_unary_func_op(result_type, id, args[0], "length");
  7745. break;
  7746. case GLSLstd450Distance:
  7747. emit_binary_func_op(result_type, id, args[0], args[1], "distance");
  7748. break;
  7749. case GLSLstd450Cross:
  7750. emit_binary_func_op(result_type, id, args[0], args[1], "cross");
  7751. break;
  7752. case GLSLstd450Normalize:
  7753. emit_unary_func_op(result_type, id, args[0], "normalize");
  7754. break;
  7755. case GLSLstd450FaceForward:
  7756. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "faceforward");
  7757. break;
  7758. case GLSLstd450Reflect:
  7759. emit_binary_func_op(result_type, id, args[0], args[1], "reflect");
  7760. break;
  7761. case GLSLstd450Refract:
  7762. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "refract");
  7763. break;
  7764. // Bit-fiddling
  7765. case GLSLstd450FindILsb:
  7766. // findLSB always returns int.
  7767. emit_unary_func_op_cast(result_type, id, args[0], "findLSB", expression_type(args[0]).basetype, int_type);
  7768. break;
  7769. case GLSLstd450FindSMsb:
  7770. emit_unary_func_op_cast(result_type, id, args[0], "findMSB", int_type, int_type);
  7771. break;
  7772. case GLSLstd450FindUMsb:
  7773. emit_unary_func_op_cast(result_type, id, args[0], "findMSB", uint_type,
  7774. int_type); // findMSB always returns int.
  7775. break;
  7776. // Multisampled varying
  7777. case GLSLstd450InterpolateAtCentroid:
  7778. emit_unary_func_op(result_type, id, args[0], "interpolateAtCentroid");
  7779. break;
  7780. case GLSLstd450InterpolateAtSample:
  7781. emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtSample");
  7782. break;
  7783. case GLSLstd450InterpolateAtOffset:
  7784. emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtOffset");
  7785. break;
  7786. case GLSLstd450NMin:
  7787. case GLSLstd450NMax:
  7788. {
  7789. emit_nminmax_op(result_type, id, args[0], args[1], op);
  7790. break;
  7791. }
  7792. case GLSLstd450NClamp:
  7793. {
  7794. // Make sure we have a unique ID here to avoid aliasing the extra sub-expressions between clamp and NMin sub-op.
  7795. // IDs cannot exceed 24 bits, so we can make use of the higher bits for some unique flags.
  7796. uint32_t &max_id = extra_sub_expressions[id | EXTRA_SUB_EXPRESSION_TYPE_AUX];
  7797. if (!max_id)
  7798. max_id = ir.increase_bound_by(1);
  7799. // Inherit precision qualifiers.
  7800. ir.meta[max_id] = ir.meta[id];
  7801. emit_nminmax_op(result_type, max_id, args[0], args[1], GLSLstd450NMax);
  7802. emit_nminmax_op(result_type, id, max_id, args[2], GLSLstd450NMin);
  7803. break;
  7804. }
  7805. default:
  7806. statement("// unimplemented GLSL op ", eop);
  7807. break;
  7808. }
  7809. }
  7810. void CompilerGLSL::emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op)
  7811. {
  7812. // Need to emulate this call.
  7813. uint32_t &ids = extra_sub_expressions[id];
  7814. if (!ids)
  7815. {
  7816. ids = ir.increase_bound_by(5);
  7817. auto btype = get<SPIRType>(result_type);
  7818. btype.basetype = SPIRType::Boolean;
  7819. set<SPIRType>(ids, btype);
  7820. }
  7821. uint32_t btype_id = ids + 0;
  7822. uint32_t left_nan_id = ids + 1;
  7823. uint32_t right_nan_id = ids + 2;
  7824. uint32_t tmp_id = ids + 3;
  7825. uint32_t mixed_first_id = ids + 4;
  7826. // Inherit precision qualifiers.
  7827. ir.meta[tmp_id] = ir.meta[id];
  7828. ir.meta[mixed_first_id] = ir.meta[id];
  7829. if (!is_legacy())
  7830. {
  7831. emit_unary_func_op(btype_id, left_nan_id, op0, "isnan");
  7832. emit_unary_func_op(btype_id, right_nan_id, op1, "isnan");
  7833. }
  7834. else if (expression_type(op0).vecsize > 1)
  7835. {
  7836. // If the number doesn't equal itself, it must be NaN
  7837. emit_binary_func_op(btype_id, left_nan_id, op0, op0, "notEqual");
  7838. emit_binary_func_op(btype_id, right_nan_id, op1, op1, "notEqual");
  7839. }
  7840. else
  7841. {
  7842. emit_binary_op(btype_id, left_nan_id, op0, op0, "!=");
  7843. emit_binary_op(btype_id, right_nan_id, op1, op1, "!=");
  7844. }
  7845. emit_binary_func_op(result_type, tmp_id, op0, op1, op == GLSLstd450NMin ? "min" : "max");
  7846. emit_mix_op(result_type, mixed_first_id, tmp_id, op1, left_nan_id);
  7847. emit_mix_op(result_type, id, mixed_first_id, op0, right_nan_id);
  7848. }
  7849. void CompilerGLSL::emit_emulated_ahyper_op(uint32_t result_type, uint32_t id, uint32_t op0, GLSLstd450 op)
  7850. {
  7851. const char *one = backend.float_literal_suffix ? "1.0f" : "1.0";
  7852. std::string expr;
  7853. bool forward = should_forward(op0);
  7854. switch (op)
  7855. {
  7856. case GLSLstd450Asinh:
  7857. expr = join("log(", to_enclosed_expression(op0), " + sqrt(",
  7858. to_enclosed_expression(op0), " * ", to_enclosed_expression(op0), " + ", one, "))");
  7859. emit_op(result_type, id, expr, forward);
  7860. break;
  7861. case GLSLstd450Acosh:
  7862. expr = join("log(", to_enclosed_expression(op0), " + sqrt(",
  7863. to_enclosed_expression(op0), " * ", to_enclosed_expression(op0), " - ", one, "))");
  7864. break;
  7865. case GLSLstd450Atanh:
  7866. expr = join("log((", one, " + ", to_enclosed_expression(op0), ") / "
  7867. "(", one, " - ", to_enclosed_expression(op0), ")) * 0.5",
  7868. backend.float_literal_suffix ? "f" : "");
  7869. break;
  7870. default:
  7871. SPIRV_CROSS_THROW("Invalid op.");
  7872. }
  7873. emit_op(result_type, id, expr, forward);
  7874. inherit_expression_dependencies(id, op0);
  7875. }
  7876. void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
  7877. uint32_t)
  7878. {
  7879. require_extension_internal("GL_AMD_shader_ballot");
  7880. enum AMDShaderBallot
  7881. {
  7882. SwizzleInvocationsAMD = 1,
  7883. SwizzleInvocationsMaskedAMD = 2,
  7884. WriteInvocationAMD = 3,
  7885. MbcntAMD = 4
  7886. };
  7887. auto op = static_cast<AMDShaderBallot>(eop);
  7888. switch (op)
  7889. {
  7890. case SwizzleInvocationsAMD:
  7891. emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsAMD");
  7892. register_control_dependent_expression(id);
  7893. break;
  7894. case SwizzleInvocationsMaskedAMD:
  7895. emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsMaskedAMD");
  7896. register_control_dependent_expression(id);
  7897. break;
  7898. case WriteInvocationAMD:
  7899. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "writeInvocationAMD");
  7900. register_control_dependent_expression(id);
  7901. break;
  7902. case MbcntAMD:
  7903. emit_unary_func_op(result_type, id, args[0], "mbcntAMD");
  7904. register_control_dependent_expression(id);
  7905. break;
  7906. default:
  7907. statement("// unimplemented SPV AMD shader ballot op ", eop);
  7908. break;
  7909. }
  7910. }
  7911. void CompilerGLSL::emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type, uint32_t id, uint32_t eop,
  7912. const uint32_t *args, uint32_t)
  7913. {
  7914. require_extension_internal("GL_AMD_shader_explicit_vertex_parameter");
  7915. enum AMDShaderExplicitVertexParameter
  7916. {
  7917. InterpolateAtVertexAMD = 1
  7918. };
  7919. auto op = static_cast<AMDShaderExplicitVertexParameter>(eop);
  7920. switch (op)
  7921. {
  7922. case InterpolateAtVertexAMD:
  7923. emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtVertexAMD");
  7924. break;
  7925. default:
  7926. statement("// unimplemented SPV AMD shader explicit vertex parameter op ", eop);
  7927. break;
  7928. }
  7929. }
  7930. void CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop,
  7931. const uint32_t *args, uint32_t)
  7932. {
  7933. require_extension_internal("GL_AMD_shader_trinary_minmax");
  7934. enum AMDShaderTrinaryMinMax
  7935. {
  7936. FMin3AMD = 1,
  7937. UMin3AMD = 2,
  7938. SMin3AMD = 3,
  7939. FMax3AMD = 4,
  7940. UMax3AMD = 5,
  7941. SMax3AMD = 6,
  7942. FMid3AMD = 7,
  7943. UMid3AMD = 8,
  7944. SMid3AMD = 9
  7945. };
  7946. auto op = static_cast<AMDShaderTrinaryMinMax>(eop);
  7947. switch (op)
  7948. {
  7949. case FMin3AMD:
  7950. case UMin3AMD:
  7951. case SMin3AMD:
  7952. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "min3");
  7953. break;
  7954. case FMax3AMD:
  7955. case UMax3AMD:
  7956. case SMax3AMD:
  7957. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "max3");
  7958. break;
  7959. case FMid3AMD:
  7960. case UMid3AMD:
  7961. case SMid3AMD:
  7962. emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "mid3");
  7963. break;
  7964. default:
  7965. statement("// unimplemented SPV AMD shader trinary minmax op ", eop);
  7966. break;
  7967. }
  7968. }
  7969. void CompilerGLSL::emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
  7970. uint32_t)
  7971. {
  7972. require_extension_internal("GL_AMD_gcn_shader");
  7973. enum AMDGCNShader
  7974. {
  7975. CubeFaceIndexAMD = 1,
  7976. CubeFaceCoordAMD = 2,
  7977. TimeAMD = 3
  7978. };
  7979. auto op = static_cast<AMDGCNShader>(eop);
  7980. switch (op)
  7981. {
  7982. case CubeFaceIndexAMD:
  7983. emit_unary_func_op(result_type, id, args[0], "cubeFaceIndexAMD");
  7984. break;
  7985. case CubeFaceCoordAMD:
  7986. emit_unary_func_op(result_type, id, args[0], "cubeFaceCoordAMD");
  7987. break;
  7988. case TimeAMD:
  7989. {
  7990. string expr = "timeAMD()";
  7991. emit_op(result_type, id, expr, true);
  7992. register_control_dependent_expression(id);
  7993. break;
  7994. }
  7995. default:
  7996. statement("// unimplemented SPV AMD gcn shader op ", eop);
  7997. break;
  7998. }
  7999. }
  8000. void CompilerGLSL::emit_subgroup_op(const Instruction &i)
  8001. {
  8002. const uint32_t *ops = stream(i);
  8003. auto op = static_cast<Op>(i.op);
  8004. if (!options.vulkan_semantics && !is_supported_subgroup_op_in_opengl(op, ops))
  8005. SPIRV_CROSS_THROW("This subgroup operation is only supported in Vulkan semantics.");
  8006. // If we need to do implicit bitcasts, make sure we do it with the correct type.
  8007. uint32_t integer_width = get_integer_width_for_instruction(i);
  8008. auto int_type = to_signed_basetype(integer_width);
  8009. auto uint_type = to_unsigned_basetype(integer_width);
  8010. switch (op)
  8011. {
  8012. case OpGroupNonUniformElect:
  8013. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupElect);
  8014. break;
  8015. case OpGroupNonUniformBallotBitCount:
  8016. {
  8017. const GroupOperation operation = static_cast<GroupOperation>(ops[3]);
  8018. if (operation == GroupOperationReduce)
  8019. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotBitCount);
  8020. else if (operation == GroupOperationInclusiveScan || operation == GroupOperationExclusiveScan)
  8021. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout);
  8022. }
  8023. break;
  8024. case OpGroupNonUniformBallotBitExtract:
  8025. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotBitExtract);
  8026. break;
  8027. case OpGroupNonUniformInverseBallot:
  8028. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout);
  8029. break;
  8030. case OpGroupNonUniformBallot:
  8031. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallot);
  8032. break;
  8033. case OpGroupNonUniformBallotFindLSB:
  8034. case OpGroupNonUniformBallotFindMSB:
  8035. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotFindLSB_MSB);
  8036. break;
  8037. case OpGroupNonUniformBroadcast:
  8038. case OpGroupNonUniformBroadcastFirst:
  8039. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBroadcast_First);
  8040. break;
  8041. case OpGroupNonUniformShuffle:
  8042. case OpGroupNonUniformShuffleXor:
  8043. require_extension_internal("GL_KHR_shader_subgroup_shuffle");
  8044. break;
  8045. case OpGroupNonUniformShuffleUp:
  8046. case OpGroupNonUniformShuffleDown:
  8047. require_extension_internal("GL_KHR_shader_subgroup_shuffle_relative");
  8048. break;
  8049. case OpGroupNonUniformAll:
  8050. case OpGroupNonUniformAny:
  8051. case OpGroupNonUniformAllEqual:
  8052. {
  8053. const SPIRType &type = expression_type(ops[3]);
  8054. if (type.basetype == SPIRType::BaseType::Boolean && type.vecsize == 1u)
  8055. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupAll_Any_AllEqualBool);
  8056. else
  8057. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupAllEqualT);
  8058. }
  8059. break;
  8060. // clang-format off
  8061. #define GLSL_GROUP_OP(OP)\
  8062. case OpGroupNonUniform##OP:\
  8063. {\
  8064. auto operation = static_cast<GroupOperation>(ops[3]);\
  8065. if (operation == GroupOperationClusteredReduce)\
  8066. require_extension_internal("GL_KHR_shader_subgroup_clustered");\
  8067. else if (operation == GroupOperationReduce)\
  8068. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupArithmetic##OP##Reduce);\
  8069. else if (operation == GroupOperationExclusiveScan)\
  8070. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupArithmetic##OP##ExclusiveScan);\
  8071. else if (operation == GroupOperationInclusiveScan)\
  8072. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupArithmetic##OP##InclusiveScan);\
  8073. else\
  8074. SPIRV_CROSS_THROW("Invalid group operation.");\
  8075. break;\
  8076. }
  8077. GLSL_GROUP_OP(IAdd)
  8078. GLSL_GROUP_OP(FAdd)
  8079. GLSL_GROUP_OP(IMul)
  8080. GLSL_GROUP_OP(FMul)
  8081. #undef GLSL_GROUP_OP
  8082. // clang-format on
  8083. case OpGroupNonUniformFMin:
  8084. case OpGroupNonUniformFMax:
  8085. case OpGroupNonUniformSMin:
  8086. case OpGroupNonUniformSMax:
  8087. case OpGroupNonUniformUMin:
  8088. case OpGroupNonUniformUMax:
  8089. case OpGroupNonUniformBitwiseAnd:
  8090. case OpGroupNonUniformBitwiseOr:
  8091. case OpGroupNonUniformBitwiseXor:
  8092. case OpGroupNonUniformLogicalAnd:
  8093. case OpGroupNonUniformLogicalOr:
  8094. case OpGroupNonUniformLogicalXor:
  8095. {
  8096. auto operation = static_cast<GroupOperation>(ops[3]);
  8097. if (operation == GroupOperationClusteredReduce)
  8098. {
  8099. require_extension_internal("GL_KHR_shader_subgroup_clustered");
  8100. }
  8101. else if (operation == GroupOperationExclusiveScan || operation == GroupOperationInclusiveScan ||
  8102. operation == GroupOperationReduce)
  8103. {
  8104. require_extension_internal("GL_KHR_shader_subgroup_arithmetic");
  8105. }
  8106. else
  8107. SPIRV_CROSS_THROW("Invalid group operation.");
  8108. break;
  8109. }
  8110. case OpGroupNonUniformQuadSwap:
  8111. case OpGroupNonUniformQuadBroadcast:
  8112. require_extension_internal("GL_KHR_shader_subgroup_quad");
  8113. break;
  8114. default:
  8115. SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
  8116. }
  8117. uint32_t result_type = ops[0];
  8118. uint32_t id = ops[1];
  8119. auto scope = static_cast<Scope>(evaluate_constant_u32(ops[2]));
  8120. if (scope != ScopeSubgroup)
  8121. SPIRV_CROSS_THROW("Only subgroup scope is supported.");
  8122. switch (op)
  8123. {
  8124. case OpGroupNonUniformElect:
  8125. emit_op(result_type, id, "subgroupElect()", true);
  8126. break;
  8127. case OpGroupNonUniformBroadcast:
  8128. emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBroadcast");
  8129. break;
  8130. case OpGroupNonUniformBroadcastFirst:
  8131. emit_unary_func_op(result_type, id, ops[3], "subgroupBroadcastFirst");
  8132. break;
  8133. case OpGroupNonUniformBallot:
  8134. emit_unary_func_op(result_type, id, ops[3], "subgroupBallot");
  8135. break;
  8136. case OpGroupNonUniformInverseBallot:
  8137. emit_unary_func_op(result_type, id, ops[3], "subgroupInverseBallot");
  8138. break;
  8139. case OpGroupNonUniformBallotBitExtract:
  8140. emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBallotBitExtract");
  8141. break;
  8142. case OpGroupNonUniformBallotFindLSB:
  8143. emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindLSB");
  8144. break;
  8145. case OpGroupNonUniformBallotFindMSB:
  8146. emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindMSB");
  8147. break;
  8148. case OpGroupNonUniformBallotBitCount:
  8149. {
  8150. auto operation = static_cast<GroupOperation>(ops[3]);
  8151. if (operation == GroupOperationReduce)
  8152. emit_unary_func_op(result_type, id, ops[4], "subgroupBallotBitCount");
  8153. else if (operation == GroupOperationInclusiveScan)
  8154. emit_unary_func_op(result_type, id, ops[4], "subgroupBallotInclusiveBitCount");
  8155. else if (operation == GroupOperationExclusiveScan)
  8156. emit_unary_func_op(result_type, id, ops[4], "subgroupBallotExclusiveBitCount");
  8157. else
  8158. SPIRV_CROSS_THROW("Invalid BitCount operation.");
  8159. break;
  8160. }
  8161. case OpGroupNonUniformShuffle:
  8162. emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffle");
  8163. break;
  8164. case OpGroupNonUniformShuffleXor:
  8165. emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleXor");
  8166. break;
  8167. case OpGroupNonUniformShuffleUp:
  8168. emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleUp");
  8169. break;
  8170. case OpGroupNonUniformShuffleDown:
  8171. emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleDown");
  8172. break;
  8173. case OpGroupNonUniformAll:
  8174. emit_unary_func_op(result_type, id, ops[3], "subgroupAll");
  8175. break;
  8176. case OpGroupNonUniformAny:
  8177. emit_unary_func_op(result_type, id, ops[3], "subgroupAny");
  8178. break;
  8179. case OpGroupNonUniformAllEqual:
  8180. emit_unary_func_op(result_type, id, ops[3], "subgroupAllEqual");
  8181. break;
  8182. // clang-format off
  8183. #define GLSL_GROUP_OP(op, glsl_op) \
  8184. case OpGroupNonUniform##op: \
  8185. { \
  8186. auto operation = static_cast<GroupOperation>(ops[3]); \
  8187. if (operation == GroupOperationReduce) \
  8188. emit_unary_func_op(result_type, id, ops[4], "subgroup" #glsl_op); \
  8189. else if (operation == GroupOperationInclusiveScan) \
  8190. emit_unary_func_op(result_type, id, ops[4], "subgroupInclusive" #glsl_op); \
  8191. else if (operation == GroupOperationExclusiveScan) \
  8192. emit_unary_func_op(result_type, id, ops[4], "subgroupExclusive" #glsl_op); \
  8193. else if (operation == GroupOperationClusteredReduce) \
  8194. emit_binary_func_op(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op); \
  8195. else \
  8196. SPIRV_CROSS_THROW("Invalid group operation."); \
  8197. break; \
  8198. }
  8199. #define GLSL_GROUP_OP_CAST(op, glsl_op, type) \
  8200. case OpGroupNonUniform##op: \
  8201. { \
  8202. auto operation = static_cast<GroupOperation>(ops[3]); \
  8203. if (operation == GroupOperationReduce) \
  8204. emit_unary_func_op_cast(result_type, id, ops[4], "subgroup" #glsl_op, type, type); \
  8205. else if (operation == GroupOperationInclusiveScan) \
  8206. emit_unary_func_op_cast(result_type, id, ops[4], "subgroupInclusive" #glsl_op, type, type); \
  8207. else if (operation == GroupOperationExclusiveScan) \
  8208. emit_unary_func_op_cast(result_type, id, ops[4], "subgroupExclusive" #glsl_op, type, type); \
  8209. else if (operation == GroupOperationClusteredReduce) \
  8210. emit_binary_func_op_cast_clustered(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op, type); \
  8211. else \
  8212. SPIRV_CROSS_THROW("Invalid group operation."); \
  8213. break; \
  8214. }
  8215. GLSL_GROUP_OP(FAdd, Add)
  8216. GLSL_GROUP_OP(FMul, Mul)
  8217. GLSL_GROUP_OP(FMin, Min)
  8218. GLSL_GROUP_OP(FMax, Max)
  8219. GLSL_GROUP_OP(IAdd, Add)
  8220. GLSL_GROUP_OP(IMul, Mul)
  8221. GLSL_GROUP_OP_CAST(SMin, Min, int_type)
  8222. GLSL_GROUP_OP_CAST(SMax, Max, int_type)
  8223. GLSL_GROUP_OP_CAST(UMin, Min, uint_type)
  8224. GLSL_GROUP_OP_CAST(UMax, Max, uint_type)
  8225. GLSL_GROUP_OP(BitwiseAnd, And)
  8226. GLSL_GROUP_OP(BitwiseOr, Or)
  8227. GLSL_GROUP_OP(BitwiseXor, Xor)
  8228. GLSL_GROUP_OP(LogicalAnd, And)
  8229. GLSL_GROUP_OP(LogicalOr, Or)
  8230. GLSL_GROUP_OP(LogicalXor, Xor)
  8231. #undef GLSL_GROUP_OP
  8232. #undef GLSL_GROUP_OP_CAST
  8233. // clang-format on
  8234. case OpGroupNonUniformQuadSwap:
  8235. {
  8236. uint32_t direction = evaluate_constant_u32(ops[4]);
  8237. if (direction == 0)
  8238. emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapHorizontal");
  8239. else if (direction == 1)
  8240. emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapVertical");
  8241. else if (direction == 2)
  8242. emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapDiagonal");
  8243. else
  8244. SPIRV_CROSS_THROW("Invalid quad swap direction.");
  8245. break;
  8246. }
  8247. case OpGroupNonUniformQuadBroadcast:
  8248. {
  8249. emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupQuadBroadcast");
  8250. break;
  8251. }
  8252. default:
  8253. SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
  8254. }
  8255. register_control_dependent_expression(id);
  8256. }
  8257. string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
  8258. {
  8259. // OpBitcast can deal with pointers.
  8260. if (out_type.pointer || in_type.pointer)
  8261. {
  8262. if (out_type.vecsize == 2 || in_type.vecsize == 2)
  8263. require_extension_internal("GL_EXT_buffer_reference_uvec2");
  8264. return type_to_glsl(out_type);
  8265. }
  8266. if (out_type.basetype == in_type.basetype)
  8267. return "";
  8268. assert(out_type.basetype != SPIRType::Boolean);
  8269. assert(in_type.basetype != SPIRType::Boolean);
  8270. bool integral_cast = type_is_integral(out_type) && type_is_integral(in_type);
  8271. bool same_size_cast = out_type.width == in_type.width;
  8272. // Trivial bitcast case, casts between integers.
  8273. if (integral_cast && same_size_cast)
  8274. return type_to_glsl(out_type);
  8275. // Catch-all 8-bit arithmetic casts (GL_EXT_shader_explicit_arithmetic_types).
  8276. if (out_type.width == 8 && in_type.width >= 16 && integral_cast && in_type.vecsize == 1)
  8277. return "unpack8";
  8278. else if (in_type.width == 8 && out_type.width == 16 && integral_cast && out_type.vecsize == 1)
  8279. return "pack16";
  8280. else if (in_type.width == 8 && out_type.width == 32 && integral_cast && out_type.vecsize == 1)
  8281. return "pack32";
  8282. // Floating <-> Integer special casts. Just have to enumerate all cases. :(
  8283. // 16-bit, 32-bit and 64-bit floats.
  8284. if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float)
  8285. {
  8286. if (is_legacy_es())
  8287. SPIRV_CROSS_THROW("Float -> Uint bitcast not supported on legacy ESSL.");
  8288. else if (!options.es && options.version < 330)
  8289. require_extension_internal("GL_ARB_shader_bit_encoding");
  8290. return "floatBitsToUint";
  8291. }
  8292. else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float)
  8293. {
  8294. if (is_legacy_es())
  8295. SPIRV_CROSS_THROW("Float -> Int bitcast not supported on legacy ESSL.");
  8296. else if (!options.es && options.version < 330)
  8297. require_extension_internal("GL_ARB_shader_bit_encoding");
  8298. return "floatBitsToInt";
  8299. }
  8300. else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt)
  8301. {
  8302. if (is_legacy_es())
  8303. SPIRV_CROSS_THROW("Uint -> Float bitcast not supported on legacy ESSL.");
  8304. else if (!options.es && options.version < 330)
  8305. require_extension_internal("GL_ARB_shader_bit_encoding");
  8306. return "uintBitsToFloat";
  8307. }
  8308. else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int)
  8309. {
  8310. if (is_legacy_es())
  8311. SPIRV_CROSS_THROW("Int -> Float bitcast not supported on legacy ESSL.");
  8312. else if (!options.es && options.version < 330)
  8313. require_extension_internal("GL_ARB_shader_bit_encoding");
  8314. return "intBitsToFloat";
  8315. }
  8316. else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double)
  8317. return "doubleBitsToInt64";
  8318. else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double)
  8319. return "doubleBitsToUint64";
  8320. else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64)
  8321. return "int64BitsToDouble";
  8322. else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64)
  8323. return "uint64BitsToDouble";
  8324. else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Half)
  8325. return "float16BitsToInt16";
  8326. else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::Half)
  8327. return "float16BitsToUint16";
  8328. else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::Short)
  8329. return "int16BitsToFloat16";
  8330. else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UShort)
  8331. return "uint16BitsToFloat16";
  8332. // And finally, some even more special purpose casts.
  8333. if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UInt && in_type.vecsize == 2)
  8334. return "packUint2x32";
  8335. else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UInt64 && out_type.vecsize == 2)
  8336. return "unpackUint2x32";
  8337. else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
  8338. return "unpackFloat2x16";
  8339. else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2)
  8340. return "packFloat2x16";
  8341. else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Short && in_type.vecsize == 2)
  8342. return "packInt2x16";
  8343. else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int && in_type.vecsize == 1)
  8344. return "unpackInt2x16";
  8345. else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UShort && in_type.vecsize == 2)
  8346. return "packUint2x16";
  8347. else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
  8348. return "unpackUint2x16";
  8349. else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Short && in_type.vecsize == 4)
  8350. return "packInt4x16";
  8351. else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int64 && in_type.vecsize == 1)
  8352. return "unpackInt4x16";
  8353. else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UShort && in_type.vecsize == 4)
  8354. return "packUint4x16";
  8355. else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt64 && in_type.vecsize == 1)
  8356. return "unpackUint4x16";
  8357. return "";
  8358. }
  8359. string CompilerGLSL::bitcast_glsl(const SPIRType &result_type, uint32_t argument)
  8360. {
  8361. auto op = bitcast_glsl_op(result_type, expression_type(argument));
  8362. if (op.empty())
  8363. return to_enclosed_unpacked_expression(argument);
  8364. else
  8365. return join(op, "(", to_unpacked_expression(argument), ")");
  8366. }
  8367. std::string CompilerGLSL::bitcast_expression(SPIRType::BaseType target_type, uint32_t arg)
  8368. {
  8369. auto expr = to_expression(arg);
  8370. auto &src_type = expression_type(arg);
  8371. if (src_type.basetype != target_type)
  8372. {
  8373. auto target = src_type;
  8374. target.basetype = target_type;
  8375. expr = join(bitcast_glsl_op(target, src_type), "(", expr, ")");
  8376. }
  8377. return expr;
  8378. }
  8379. std::string CompilerGLSL::bitcast_expression(const SPIRType &target_type, SPIRType::BaseType expr_type,
  8380. const std::string &expr)
  8381. {
  8382. if (target_type.basetype == expr_type)
  8383. return expr;
  8384. auto src_type = target_type;
  8385. src_type.basetype = expr_type;
  8386. return join(bitcast_glsl_op(target_type, src_type), "(", expr, ")");
  8387. }
  8388. string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
  8389. {
  8390. switch (builtin)
  8391. {
  8392. case BuiltInPosition:
  8393. return "gl_Position";
  8394. case BuiltInPointSize:
  8395. return "gl_PointSize";
  8396. case BuiltInClipDistance:
  8397. {
  8398. if (options.es)
  8399. require_extension_internal("GL_EXT_clip_cull_distance");
  8400. return "gl_ClipDistance";
  8401. }
  8402. case BuiltInCullDistance:
  8403. {
  8404. if (options.es)
  8405. require_extension_internal("GL_EXT_clip_cull_distance");
  8406. return "gl_CullDistance";
  8407. }
  8408. case BuiltInVertexId:
  8409. if (options.vulkan_semantics)
  8410. SPIRV_CROSS_THROW("Cannot implement gl_VertexID in Vulkan GLSL. This shader was created "
  8411. "with GL semantics.");
  8412. return "gl_VertexID";
  8413. case BuiltInInstanceId:
  8414. if (options.vulkan_semantics)
  8415. {
  8416. auto model = get_entry_point().model;
  8417. switch (model)
  8418. {
  8419. case spv::ExecutionModelIntersectionKHR:
  8420. case spv::ExecutionModelAnyHitKHR:
  8421. case spv::ExecutionModelClosestHitKHR:
  8422. // gl_InstanceID is allowed in these shaders.
  8423. break;
  8424. default:
  8425. SPIRV_CROSS_THROW("Cannot implement gl_InstanceID in Vulkan GLSL. This shader was "
  8426. "created with GL semantics.");
  8427. }
  8428. }
  8429. if (!options.es && options.version < 140)
  8430. {
  8431. require_extension_internal("GL_ARB_draw_instanced");
  8432. }
  8433. return "gl_InstanceID";
  8434. case BuiltInVertexIndex:
  8435. if (options.vulkan_semantics)
  8436. return "gl_VertexIndex";
  8437. else
  8438. return "gl_VertexID"; // gl_VertexID already has the base offset applied.
  8439. case BuiltInInstanceIndex:
  8440. if (options.vulkan_semantics)
  8441. return "gl_InstanceIndex";
  8442. if (!options.es && options.version < 140)
  8443. {
  8444. require_extension_internal("GL_ARB_draw_instanced");
  8445. }
  8446. if (options.vertex.support_nonzero_base_instance)
  8447. {
  8448. if (!options.vulkan_semantics)
  8449. {
  8450. // This is a soft-enable. We will opt-in to using gl_BaseInstanceARB if supported.
  8451. require_extension_internal("GL_ARB_shader_draw_parameters");
  8452. }
  8453. return "(gl_InstanceID + SPIRV_Cross_BaseInstance)"; // ... but not gl_InstanceID.
  8454. }
  8455. else
  8456. return "gl_InstanceID";
  8457. case BuiltInPrimitiveId:
  8458. if (storage == StorageClassInput && get_entry_point().model == ExecutionModelGeometry)
  8459. return "gl_PrimitiveIDIn";
  8460. else
  8461. return "gl_PrimitiveID";
  8462. case BuiltInInvocationId:
  8463. return "gl_InvocationID";
  8464. case BuiltInLayer:
  8465. return "gl_Layer";
  8466. case BuiltInViewportIndex:
  8467. return "gl_ViewportIndex";
  8468. case BuiltInTessLevelOuter:
  8469. return "gl_TessLevelOuter";
  8470. case BuiltInTessLevelInner:
  8471. return "gl_TessLevelInner";
  8472. case BuiltInTessCoord:
  8473. return "gl_TessCoord";
  8474. case BuiltInFragCoord:
  8475. return "gl_FragCoord";
  8476. case BuiltInPointCoord:
  8477. return "gl_PointCoord";
  8478. case BuiltInFrontFacing:
  8479. return "gl_FrontFacing";
  8480. case BuiltInFragDepth:
  8481. return "gl_FragDepth";
  8482. case BuiltInNumWorkgroups:
  8483. return "gl_NumWorkGroups";
  8484. case BuiltInWorkgroupSize:
  8485. return "gl_WorkGroupSize";
  8486. case BuiltInWorkgroupId:
  8487. return "gl_WorkGroupID";
  8488. case BuiltInLocalInvocationId:
  8489. return "gl_LocalInvocationID";
  8490. case BuiltInGlobalInvocationId:
  8491. return "gl_GlobalInvocationID";
  8492. case BuiltInLocalInvocationIndex:
  8493. return "gl_LocalInvocationIndex";
  8494. case BuiltInHelperInvocation:
  8495. return "gl_HelperInvocation";
  8496. case BuiltInBaseVertex:
  8497. if (options.es)
  8498. SPIRV_CROSS_THROW("BaseVertex not supported in ES profile.");
  8499. if (options.vulkan_semantics)
  8500. {
  8501. if (options.version < 460)
  8502. {
  8503. require_extension_internal("GL_ARB_shader_draw_parameters");
  8504. return "gl_BaseVertexARB";
  8505. }
  8506. return "gl_BaseVertex";
  8507. }
  8508. // On regular GL, this is soft-enabled and we emit ifdefs in code.
  8509. require_extension_internal("GL_ARB_shader_draw_parameters");
  8510. return "SPIRV_Cross_BaseVertex";
  8511. case BuiltInBaseInstance:
  8512. if (options.es)
  8513. SPIRV_CROSS_THROW("BaseInstance not supported in ES profile.");
  8514. if (options.vulkan_semantics)
  8515. {
  8516. if (options.version < 460)
  8517. {
  8518. require_extension_internal("GL_ARB_shader_draw_parameters");
  8519. return "gl_BaseInstanceARB";
  8520. }
  8521. return "gl_BaseInstance";
  8522. }
  8523. // On regular GL, this is soft-enabled and we emit ifdefs in code.
  8524. require_extension_internal("GL_ARB_shader_draw_parameters");
  8525. return "SPIRV_Cross_BaseInstance";
  8526. case BuiltInDrawIndex:
  8527. if (options.es)
  8528. SPIRV_CROSS_THROW("DrawIndex not supported in ES profile.");
  8529. if (options.vulkan_semantics)
  8530. {
  8531. if (options.version < 460)
  8532. {
  8533. require_extension_internal("GL_ARB_shader_draw_parameters");
  8534. return "gl_DrawIDARB";
  8535. }
  8536. return "gl_DrawID";
  8537. }
  8538. // On regular GL, this is soft-enabled and we emit ifdefs in code.
  8539. require_extension_internal("GL_ARB_shader_draw_parameters");
  8540. return "gl_DrawIDARB";
  8541. case BuiltInSampleId:
  8542. if (is_legacy())
  8543. SPIRV_CROSS_THROW("Sample variables not supported in legacy GLSL.");
  8544. else if (options.es && options.version < 320)
  8545. require_extension_internal("GL_OES_sample_variables");
  8546. else if (!options.es && options.version < 400)
  8547. require_extension_internal("GL_ARB_sample_shading");
  8548. return "gl_SampleID";
  8549. case BuiltInSampleMask:
  8550. if (is_legacy())
  8551. SPIRV_CROSS_THROW("Sample variables not supported in legacy GLSL.");
  8552. else if (options.es && options.version < 320)
  8553. require_extension_internal("GL_OES_sample_variables");
  8554. else if (!options.es && options.version < 400)
  8555. require_extension_internal("GL_ARB_sample_shading");
  8556. if (storage == StorageClassInput)
  8557. return "gl_SampleMaskIn";
  8558. else
  8559. return "gl_SampleMask";
  8560. case BuiltInSamplePosition:
  8561. if (is_legacy())
  8562. SPIRV_CROSS_THROW("Sample variables not supported in legacy GLSL.");
  8563. else if (options.es && options.version < 320)
  8564. require_extension_internal("GL_OES_sample_variables");
  8565. else if (!options.es && options.version < 400)
  8566. require_extension_internal("GL_ARB_sample_shading");
  8567. return "gl_SamplePosition";
  8568. case BuiltInViewIndex:
  8569. if (options.vulkan_semantics)
  8570. return "gl_ViewIndex";
  8571. else
  8572. return "gl_ViewID_OVR";
  8573. case BuiltInNumSubgroups:
  8574. request_subgroup_feature(ShaderSubgroupSupportHelper::NumSubgroups);
  8575. return "gl_NumSubgroups";
  8576. case BuiltInSubgroupId:
  8577. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupID);
  8578. return "gl_SubgroupID";
  8579. case BuiltInSubgroupSize:
  8580. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupSize);
  8581. return "gl_SubgroupSize";
  8582. case BuiltInSubgroupLocalInvocationId:
  8583. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInvocationID);
  8584. return "gl_SubgroupInvocationID";
  8585. case BuiltInSubgroupEqMask:
  8586. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
  8587. return "gl_SubgroupEqMask";
  8588. case BuiltInSubgroupGeMask:
  8589. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
  8590. return "gl_SubgroupGeMask";
  8591. case BuiltInSubgroupGtMask:
  8592. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
  8593. return "gl_SubgroupGtMask";
  8594. case BuiltInSubgroupLeMask:
  8595. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
  8596. return "gl_SubgroupLeMask";
  8597. case BuiltInSubgroupLtMask:
  8598. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
  8599. return "gl_SubgroupLtMask";
  8600. case BuiltInLaunchIdKHR:
  8601. return ray_tracing_is_khr ? "gl_LaunchIDEXT" : "gl_LaunchIDNV";
  8602. case BuiltInLaunchSizeKHR:
  8603. return ray_tracing_is_khr ? "gl_LaunchSizeEXT" : "gl_LaunchSizeNV";
  8604. case BuiltInWorldRayOriginKHR:
  8605. return ray_tracing_is_khr ? "gl_WorldRayOriginEXT" : "gl_WorldRayOriginNV";
  8606. case BuiltInWorldRayDirectionKHR:
  8607. return ray_tracing_is_khr ? "gl_WorldRayDirectionEXT" : "gl_WorldRayDirectionNV";
  8608. case BuiltInObjectRayOriginKHR:
  8609. return ray_tracing_is_khr ? "gl_ObjectRayOriginEXT" : "gl_ObjectRayOriginNV";
  8610. case BuiltInObjectRayDirectionKHR:
  8611. return ray_tracing_is_khr ? "gl_ObjectRayDirectionEXT" : "gl_ObjectRayDirectionNV";
  8612. case BuiltInRayTminKHR:
  8613. return ray_tracing_is_khr ? "gl_RayTminEXT" : "gl_RayTminNV";
  8614. case BuiltInRayTmaxKHR:
  8615. return ray_tracing_is_khr ? "gl_RayTmaxEXT" : "gl_RayTmaxNV";
  8616. case BuiltInInstanceCustomIndexKHR:
  8617. return ray_tracing_is_khr ? "gl_InstanceCustomIndexEXT" : "gl_InstanceCustomIndexNV";
  8618. case BuiltInObjectToWorldKHR:
  8619. return ray_tracing_is_khr ? "gl_ObjectToWorldEXT" : "gl_ObjectToWorldNV";
  8620. case BuiltInWorldToObjectKHR:
  8621. return ray_tracing_is_khr ? "gl_WorldToObjectEXT" : "gl_WorldToObjectNV";
  8622. case BuiltInHitTNV:
  8623. // gl_HitTEXT is an alias of RayTMax in KHR.
  8624. return "gl_HitTNV";
  8625. case BuiltInHitKindKHR:
  8626. return ray_tracing_is_khr ? "gl_HitKindEXT" : "gl_HitKindNV";
  8627. case BuiltInIncomingRayFlagsKHR:
  8628. return ray_tracing_is_khr ? "gl_IncomingRayFlagsEXT" : "gl_IncomingRayFlagsNV";
  8629. case BuiltInBaryCoordKHR:
  8630. {
  8631. if (options.es && options.version < 320)
  8632. SPIRV_CROSS_THROW("gl_BaryCoordEXT requires ESSL 320.");
  8633. else if (!options.es && options.version < 450)
  8634. SPIRV_CROSS_THROW("gl_BaryCoordEXT requires GLSL 450.");
  8635. if (barycentric_is_nv)
  8636. {
  8637. require_extension_internal("GL_NV_fragment_shader_barycentric");
  8638. return "gl_BaryCoordNV";
  8639. }
  8640. else
  8641. {
  8642. require_extension_internal("GL_EXT_fragment_shader_barycentric");
  8643. return "gl_BaryCoordEXT";
  8644. }
  8645. }
  8646. case BuiltInBaryCoordNoPerspNV:
  8647. {
  8648. if (options.es && options.version < 320)
  8649. SPIRV_CROSS_THROW("gl_BaryCoordNoPerspEXT requires ESSL 320.");
  8650. else if (!options.es && options.version < 450)
  8651. SPIRV_CROSS_THROW("gl_BaryCoordNoPerspEXT requires GLSL 450.");
  8652. if (barycentric_is_nv)
  8653. {
  8654. require_extension_internal("GL_NV_fragment_shader_barycentric");
  8655. return "gl_BaryCoordNoPerspNV";
  8656. }
  8657. else
  8658. {
  8659. require_extension_internal("GL_EXT_fragment_shader_barycentric");
  8660. return "gl_BaryCoordNoPerspEXT";
  8661. }
  8662. }
  8663. case BuiltInFragStencilRefEXT:
  8664. {
  8665. if (!options.es)
  8666. {
  8667. require_extension_internal("GL_ARB_shader_stencil_export");
  8668. return "gl_FragStencilRefARB";
  8669. }
  8670. else
  8671. SPIRV_CROSS_THROW("Stencil export not supported in GLES.");
  8672. }
  8673. case BuiltInPrimitiveShadingRateKHR:
  8674. {
  8675. if (!options.vulkan_semantics)
  8676. SPIRV_CROSS_THROW("Can only use PrimitiveShadingRateKHR in Vulkan GLSL.");
  8677. require_extension_internal("GL_EXT_fragment_shading_rate");
  8678. return "gl_PrimitiveShadingRateEXT";
  8679. }
  8680. case BuiltInShadingRateKHR:
  8681. {
  8682. if (!options.vulkan_semantics)
  8683. SPIRV_CROSS_THROW("Can only use ShadingRateKHR in Vulkan GLSL.");
  8684. require_extension_internal("GL_EXT_fragment_shading_rate");
  8685. return "gl_ShadingRateEXT";
  8686. }
  8687. case BuiltInDeviceIndex:
  8688. if (!options.vulkan_semantics)
  8689. SPIRV_CROSS_THROW("Need Vulkan semantics for device group support.");
  8690. require_extension_internal("GL_EXT_device_group");
  8691. return "gl_DeviceIndex";
  8692. case BuiltInFullyCoveredEXT:
  8693. if (!options.es)
  8694. require_extension_internal("GL_NV_conservative_raster_underestimation");
  8695. else
  8696. SPIRV_CROSS_THROW("Need desktop GL to use GL_NV_conservative_raster_underestimation.");
  8697. return "gl_FragFullyCoveredNV";
  8698. case BuiltInPrimitiveTriangleIndicesEXT:
  8699. return "gl_PrimitiveTriangleIndicesEXT";
  8700. case BuiltInPrimitiveLineIndicesEXT:
  8701. return "gl_PrimitiveLineIndicesEXT";
  8702. case BuiltInPrimitivePointIndicesEXT:
  8703. return "gl_PrimitivePointIndicesEXT";
  8704. case BuiltInCullPrimitiveEXT:
  8705. return "gl_CullPrimitiveEXT";
  8706. default:
  8707. return join("gl_BuiltIn_", convert_to_string(builtin));
  8708. }
  8709. }
  8710. const char *CompilerGLSL::index_to_swizzle(uint32_t index)
  8711. {
  8712. switch (index)
  8713. {
  8714. case 0:
  8715. return "x";
  8716. case 1:
  8717. return "y";
  8718. case 2:
  8719. return "z";
  8720. case 3:
  8721. return "w";
  8722. default:
  8723. return "x"; // Don't crash, but engage the "undefined behavior" described for out-of-bounds logical addressing in spec.
  8724. }
  8725. }
  8726. void CompilerGLSL::access_chain_internal_append_index(std::string &expr, uint32_t /*base*/, const SPIRType * /*type*/,
  8727. AccessChainFlags flags, bool &access_chain_is_arrayed,
  8728. uint32_t index)
  8729. {
  8730. bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0;
  8731. bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0;
  8732. bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
  8733. string idx_expr = index_is_literal ? convert_to_string(index) : to_unpacked_expression(index, register_expression_read);
  8734. // For the case where the base of an OpPtrAccessChain already ends in [n],
  8735. // we need to use the index as an offset to the existing index, otherwise,
  8736. // we can just use the index directly.
  8737. if (ptr_chain && access_chain_is_arrayed)
  8738. {
  8739. size_t split_pos = expr.find_last_of(']');
  8740. string expr_front = expr.substr(0, split_pos);
  8741. string expr_back = expr.substr(split_pos);
  8742. expr = expr_front + " + " + enclose_expression(idx_expr) + expr_back;
  8743. }
  8744. else
  8745. {
  8746. expr += "[";
  8747. expr += idx_expr;
  8748. expr += "]";
  8749. }
  8750. }
  8751. bool CompilerGLSL::access_chain_needs_stage_io_builtin_translation(uint32_t)
  8752. {
  8753. return true;
  8754. }
  8755. string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count,
  8756. AccessChainFlags flags, AccessChainMeta *meta)
  8757. {
  8758. string expr;
  8759. bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0;
  8760. bool msb_is_id = (flags & ACCESS_CHAIN_LITERAL_MSB_FORCE_ID) != 0;
  8761. bool chain_only = (flags & ACCESS_CHAIN_CHAIN_ONLY_BIT) != 0;
  8762. bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0;
  8763. bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
  8764. bool flatten_member_reference = (flags & ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT) != 0;
  8765. if (!chain_only)
  8766. {
  8767. // We handle transpose explicitly, so don't resolve that here.
  8768. auto *e = maybe_get<SPIRExpression>(base);
  8769. bool old_transpose = e && e->need_transpose;
  8770. if (e)
  8771. e->need_transpose = false;
  8772. expr = to_enclosed_expression(base, register_expression_read);
  8773. if (e)
  8774. e->need_transpose = old_transpose;
  8775. }
  8776. // Start traversing type hierarchy at the proper non-pointer types,
  8777. // but keep type_id referencing the original pointer for use below.
  8778. uint32_t type_id = expression_type_id(base);
  8779. if (!backend.native_pointers)
  8780. {
  8781. if (ptr_chain)
  8782. SPIRV_CROSS_THROW("Backend does not support native pointers and does not support OpPtrAccessChain.");
  8783. // Wrapped buffer reference pointer types will need to poke into the internal "value" member before
  8784. // continuing the access chain.
  8785. if (should_dereference(base))
  8786. {
  8787. auto &type = get<SPIRType>(type_id);
  8788. expr = dereference_expression(type, expr);
  8789. }
  8790. }
  8791. const auto *type = &get_pointee_type(type_id);
  8792. bool access_chain_is_arrayed = expr.find_first_of('[') != string::npos;
  8793. bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base);
  8794. bool is_packed = has_extended_decoration(base, SPIRVCrossDecorationPhysicalTypePacked);
  8795. uint32_t physical_type = get_extended_decoration(base, SPIRVCrossDecorationPhysicalTypeID);
  8796. bool is_invariant = has_decoration(base, DecorationInvariant);
  8797. bool relaxed_precision = has_decoration(base, DecorationRelaxedPrecision);
  8798. bool pending_array_enclose = false;
  8799. bool dimension_flatten = false;
  8800. bool access_meshlet_position_y = false;
  8801. if (auto *base_expr = maybe_get<SPIRExpression>(base))
  8802. {
  8803. access_meshlet_position_y = base_expr->access_meshlet_position_y;
  8804. }
  8805. // If we are translating access to a structured buffer, the first subscript '._m0' must be hidden
  8806. bool hide_first_subscript = count > 1 && is_user_type_structured(base);
  8807. const auto append_index = [&](uint32_t index, bool is_literal, bool is_ptr_chain = false) {
  8808. AccessChainFlags mod_flags = flags;
  8809. if (!is_literal)
  8810. mod_flags &= ~ACCESS_CHAIN_INDEX_IS_LITERAL_BIT;
  8811. if (!is_ptr_chain)
  8812. mod_flags &= ~ACCESS_CHAIN_PTR_CHAIN_BIT;
  8813. access_chain_internal_append_index(expr, base, type, mod_flags, access_chain_is_arrayed, index);
  8814. check_physical_type_cast(expr, type, physical_type);
  8815. };
  8816. for (uint32_t i = 0; i < count; i++)
  8817. {
  8818. uint32_t index = indices[i];
  8819. bool is_literal = index_is_literal;
  8820. if (is_literal && msb_is_id && (index >> 31u) != 0u)
  8821. {
  8822. is_literal = false;
  8823. index &= 0x7fffffffu;
  8824. }
  8825. // Pointer chains
  8826. if (ptr_chain && i == 0)
  8827. {
  8828. // If we are flattening multidimensional arrays, only create opening bracket on first
  8829. // array index.
  8830. if (options.flatten_multidimensional_arrays)
  8831. {
  8832. dimension_flatten = type->array.size() >= 1;
  8833. pending_array_enclose = dimension_flatten;
  8834. if (pending_array_enclose)
  8835. expr += "[";
  8836. }
  8837. if (options.flatten_multidimensional_arrays && dimension_flatten)
  8838. {
  8839. // If we are flattening multidimensional arrays, do manual stride computation.
  8840. if (is_literal)
  8841. expr += convert_to_string(index);
  8842. else
  8843. expr += to_enclosed_expression(index, register_expression_read);
  8844. for (auto j = uint32_t(type->array.size()); j; j--)
  8845. {
  8846. expr += " * ";
  8847. expr += enclose_expression(to_array_size(*type, j - 1));
  8848. }
  8849. if (type->array.empty())
  8850. pending_array_enclose = false;
  8851. else
  8852. expr += " + ";
  8853. if (!pending_array_enclose)
  8854. expr += "]";
  8855. }
  8856. else
  8857. {
  8858. append_index(index, is_literal, true);
  8859. }
  8860. if (type->basetype == SPIRType::ControlPointArray)
  8861. {
  8862. type_id = type->parent_type;
  8863. type = &get<SPIRType>(type_id);
  8864. }
  8865. access_chain_is_arrayed = true;
  8866. }
  8867. // Arrays
  8868. else if (!type->array.empty())
  8869. {
  8870. // If we are flattening multidimensional arrays, only create opening bracket on first
  8871. // array index.
  8872. if (options.flatten_multidimensional_arrays && !pending_array_enclose)
  8873. {
  8874. dimension_flatten = type->array.size() > 1;
  8875. pending_array_enclose = dimension_flatten;
  8876. if (pending_array_enclose)
  8877. expr += "[";
  8878. }
  8879. assert(type->parent_type);
  8880. auto *var = maybe_get<SPIRVariable>(base);
  8881. if (backend.force_gl_in_out_block && i == 0 && var && is_builtin_variable(*var) &&
  8882. !has_decoration(type->self, DecorationBlock))
  8883. {
  8884. // This deals with scenarios for tesc/geom where arrays of gl_Position[] are declared.
  8885. // Normally, these variables live in blocks when compiled from GLSL,
  8886. // but HLSL seems to just emit straight arrays here.
  8887. // We must pretend this access goes through gl_in/gl_out arrays
  8888. // to be able to access certain builtins as arrays.
  8889. // Similar concerns apply for mesh shaders where we have to redirect to gl_MeshVerticesEXT or MeshPrimitivesEXT.
  8890. auto builtin = ir.meta[base].decoration.builtin_type;
  8891. bool mesh_shader = get_execution_model() == ExecutionModelMeshEXT;
  8892. switch (builtin)
  8893. {
  8894. case BuiltInCullDistance:
  8895. case BuiltInClipDistance:
  8896. if (type->array.size() == 1) // Red herring. Only consider block IO for two-dimensional arrays here.
  8897. {
  8898. append_index(index, is_literal);
  8899. break;
  8900. }
  8901. // fallthrough
  8902. case BuiltInPosition:
  8903. case BuiltInPointSize:
  8904. if (mesh_shader)
  8905. expr = join("gl_MeshVerticesEXT[", to_expression(index, register_expression_read), "].", expr);
  8906. else if (var->storage == StorageClassInput)
  8907. expr = join("gl_in[", to_expression(index, register_expression_read), "].", expr);
  8908. else if (var->storage == StorageClassOutput)
  8909. expr = join("gl_out[", to_expression(index, register_expression_read), "].", expr);
  8910. else
  8911. append_index(index, is_literal);
  8912. break;
  8913. case BuiltInPrimitiveId:
  8914. case BuiltInLayer:
  8915. case BuiltInViewportIndex:
  8916. case BuiltInCullPrimitiveEXT:
  8917. case BuiltInPrimitiveShadingRateKHR:
  8918. if (mesh_shader)
  8919. expr = join("gl_MeshPrimitivesEXT[", to_expression(index, register_expression_read), "].", expr);
  8920. else
  8921. append_index(index, is_literal);
  8922. break;
  8923. default:
  8924. append_index(index, is_literal);
  8925. break;
  8926. }
  8927. }
  8928. else if (backend.force_merged_mesh_block && i == 0 && var &&
  8929. !is_builtin_variable(*var) && var->storage == StorageClassOutput)
  8930. {
  8931. if (is_per_primitive_variable(*var))
  8932. expr = join("gl_MeshPrimitivesEXT[", to_expression(index, register_expression_read), "].", expr);
  8933. else
  8934. expr = join("gl_MeshVerticesEXT[", to_expression(index, register_expression_read), "].", expr);
  8935. }
  8936. else if (options.flatten_multidimensional_arrays && dimension_flatten)
  8937. {
  8938. // If we are flattening multidimensional arrays, do manual stride computation.
  8939. auto &parent_type = get<SPIRType>(type->parent_type);
  8940. if (is_literal)
  8941. expr += convert_to_string(index);
  8942. else
  8943. expr += to_enclosed_expression(index, register_expression_read);
  8944. for (auto j = uint32_t(parent_type.array.size()); j; j--)
  8945. {
  8946. expr += " * ";
  8947. expr += enclose_expression(to_array_size(parent_type, j - 1));
  8948. }
  8949. if (parent_type.array.empty())
  8950. pending_array_enclose = false;
  8951. else
  8952. expr += " + ";
  8953. if (!pending_array_enclose)
  8954. expr += "]";
  8955. }
  8956. // Some builtins are arrays in SPIR-V but not in other languages, e.g. gl_SampleMask[] is an array in SPIR-V but not in Metal.
  8957. // By throwing away the index, we imply the index was 0, which it must be for gl_SampleMask.
  8958. else if (!builtin_translates_to_nonarray(BuiltIn(get_decoration(base, DecorationBuiltIn))))
  8959. {
  8960. append_index(index, is_literal);
  8961. }
  8962. if (var && has_decoration(var->self, DecorationBuiltIn) &&
  8963. get_decoration(var->self, DecorationBuiltIn) == BuiltInPosition &&
  8964. get_execution_model() == ExecutionModelMeshEXT)
  8965. {
  8966. access_meshlet_position_y = true;
  8967. }
  8968. type_id = type->parent_type;
  8969. type = &get<SPIRType>(type_id);
  8970. access_chain_is_arrayed = true;
  8971. }
  8972. // For structs, the index refers to a constant, which indexes into the members, possibly through a redirection mapping.
  8973. // We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
  8974. else if (type->basetype == SPIRType::Struct)
  8975. {
  8976. if (!is_literal)
  8977. index = evaluate_constant_u32(index);
  8978. if (index < uint32_t(type->member_type_index_redirection.size()))
  8979. index = type->member_type_index_redirection[index];
  8980. if (index >= type->member_types.size())
  8981. SPIRV_CROSS_THROW("Member index is out of bounds!");
  8982. if (hide_first_subscript)
  8983. {
  8984. // First "._m0" subscript has been hidden, subsequent fields must be emitted even for structured buffers
  8985. hide_first_subscript = false;
  8986. }
  8987. else
  8988. {
  8989. BuiltIn builtin = BuiltInMax;
  8990. if (is_member_builtin(*type, index, &builtin) && access_chain_needs_stage_io_builtin_translation(base))
  8991. {
  8992. if (access_chain_is_arrayed)
  8993. {
  8994. expr += ".";
  8995. expr += builtin_to_glsl(builtin, type->storage);
  8996. }
  8997. else
  8998. expr = builtin_to_glsl(builtin, type->storage);
  8999. if (builtin == BuiltInPosition && get_execution_model() == ExecutionModelMeshEXT)
  9000. {
  9001. access_meshlet_position_y = true;
  9002. }
  9003. }
  9004. else
  9005. {
  9006. // If the member has a qualified name, use it as the entire chain
  9007. string qual_mbr_name = get_member_qualified_name(type_id, index);
  9008. if (!qual_mbr_name.empty())
  9009. expr = qual_mbr_name;
  9010. else if (flatten_member_reference)
  9011. expr += join("_", to_member_name(*type, index));
  9012. else
  9013. {
  9014. // Any pointer de-refences for values are handled in the first access chain.
  9015. // For pointer chains, the pointer-ness is resolved through an array access.
  9016. // The only time this is not true is when accessing array of SSBO/UBO.
  9017. // This case is explicitly handled.
  9018. expr += to_member_reference(base, *type, index, ptr_chain || i != 0);
  9019. }
  9020. }
  9021. }
  9022. if (has_member_decoration(type->self, index, DecorationInvariant))
  9023. is_invariant = true;
  9024. if (has_member_decoration(type->self, index, DecorationRelaxedPrecision))
  9025. relaxed_precision = true;
  9026. is_packed = member_is_packed_physical_type(*type, index);
  9027. if (member_is_remapped_physical_type(*type, index))
  9028. physical_type = get_extended_member_decoration(type->self, index, SPIRVCrossDecorationPhysicalTypeID);
  9029. else
  9030. physical_type = 0;
  9031. row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(*type, index);
  9032. type = &get<SPIRType>(type->member_types[index]);
  9033. }
  9034. // Matrix -> Vector
  9035. else if (type->columns > 1)
  9036. {
  9037. // If we have a row-major matrix here, we need to defer any transpose in case this access chain
  9038. // is used to store a column. We can resolve it right here and now if we access a scalar directly,
  9039. // by flipping indexing order of the matrix.
  9040. expr += "[";
  9041. if (is_literal)
  9042. expr += convert_to_string(index);
  9043. else
  9044. expr += to_unpacked_expression(index, register_expression_read);
  9045. expr += "]";
  9046. type_id = type->parent_type;
  9047. type = &get<SPIRType>(type_id);
  9048. }
  9049. // Vector -> Scalar
  9050. else if (type->vecsize > 1)
  9051. {
  9052. string deferred_index;
  9053. if (row_major_matrix_needs_conversion)
  9054. {
  9055. // Flip indexing order.
  9056. auto column_index = expr.find_last_of('[');
  9057. if (column_index != string::npos)
  9058. {
  9059. deferred_index = expr.substr(column_index);
  9060. expr.resize(column_index);
  9061. }
  9062. }
  9063. // Internally, access chain implementation can also be used on composites,
  9064. // ignore scalar access workarounds in this case.
  9065. StorageClass effective_storage = StorageClassGeneric;
  9066. bool ignore_potential_sliced_writes = false;
  9067. if ((flags & ACCESS_CHAIN_FORCE_COMPOSITE_BIT) == 0)
  9068. {
  9069. if (expression_type(base).pointer)
  9070. effective_storage = get_expression_effective_storage_class(base);
  9071. // Special consideration for control points.
  9072. // Control points can only be written by InvocationID, so there is no need
  9073. // to consider scalar access chains here.
  9074. // Cleans up some cases where it's very painful to determine the accurate storage class
  9075. // since blocks can be partially masked ...
  9076. auto *var = maybe_get_backing_variable(base);
  9077. if (var && var->storage == StorageClassOutput &&
  9078. get_execution_model() == ExecutionModelTessellationControl &&
  9079. !has_decoration(var->self, DecorationPatch))
  9080. {
  9081. ignore_potential_sliced_writes = true;
  9082. }
  9083. }
  9084. else
  9085. ignore_potential_sliced_writes = true;
  9086. if (!row_major_matrix_needs_conversion && !ignore_potential_sliced_writes)
  9087. {
  9088. // On some backends, we might not be able to safely access individual scalars in a vector.
  9089. // To work around this, we might have to cast the access chain reference to something which can,
  9090. // like a pointer to scalar, which we can then index into.
  9091. prepare_access_chain_for_scalar_access(expr, get<SPIRType>(type->parent_type), effective_storage,
  9092. is_packed);
  9093. }
  9094. if (is_literal)
  9095. {
  9096. bool out_of_bounds = (index >= type->vecsize);
  9097. if (!is_packed && !row_major_matrix_needs_conversion)
  9098. {
  9099. expr += ".";
  9100. expr += index_to_swizzle(out_of_bounds ? 0 : index);
  9101. }
  9102. else
  9103. {
  9104. // For packed vectors, we can only access them as an array, not by swizzle.
  9105. expr += join("[", out_of_bounds ? 0 : index, "]");
  9106. }
  9107. }
  9108. else if (ir.ids[index].get_type() == TypeConstant && !is_packed && !row_major_matrix_needs_conversion)
  9109. {
  9110. auto &c = get<SPIRConstant>(index);
  9111. bool out_of_bounds = (c.scalar() >= type->vecsize);
  9112. if (c.specialization)
  9113. {
  9114. // If the index is a spec constant, we cannot turn extract into a swizzle.
  9115. expr += join("[", out_of_bounds ? "0" : to_expression(index), "]");
  9116. }
  9117. else
  9118. {
  9119. expr += ".";
  9120. expr += index_to_swizzle(out_of_bounds ? 0 : c.scalar());
  9121. }
  9122. }
  9123. else
  9124. {
  9125. expr += "[";
  9126. expr += to_unpacked_expression(index, register_expression_read);
  9127. expr += "]";
  9128. }
  9129. if (row_major_matrix_needs_conversion && !ignore_potential_sliced_writes)
  9130. {
  9131. prepare_access_chain_for_scalar_access(expr, get<SPIRType>(type->parent_type), effective_storage,
  9132. is_packed);
  9133. }
  9134. if (access_meshlet_position_y)
  9135. {
  9136. if (is_literal)
  9137. {
  9138. access_meshlet_position_y = index == 1;
  9139. }
  9140. else
  9141. {
  9142. const auto *c = maybe_get<SPIRConstant>(index);
  9143. if (c)
  9144. access_meshlet_position_y = c->scalar() == 1;
  9145. else
  9146. {
  9147. // We don't know, but we have to assume no.
  9148. // Flip Y in mesh shaders is an opt-in horrible hack, so we'll have to assume shaders try to behave.
  9149. access_meshlet_position_y = false;
  9150. }
  9151. }
  9152. }
  9153. expr += deferred_index;
  9154. row_major_matrix_needs_conversion = false;
  9155. is_packed = false;
  9156. physical_type = 0;
  9157. type_id = type->parent_type;
  9158. type = &get<SPIRType>(type_id);
  9159. }
  9160. else if (!backend.allow_truncated_access_chain)
  9161. SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
  9162. }
  9163. if (pending_array_enclose)
  9164. {
  9165. SPIRV_CROSS_THROW("Flattening of multidimensional arrays were enabled, "
  9166. "but the access chain was terminated in the middle of a multidimensional array. "
  9167. "This is not supported.");
  9168. }
  9169. if (meta)
  9170. {
  9171. meta->need_transpose = row_major_matrix_needs_conversion;
  9172. meta->storage_is_packed = is_packed;
  9173. meta->storage_is_invariant = is_invariant;
  9174. meta->storage_physical_type = physical_type;
  9175. meta->relaxed_precision = relaxed_precision;
  9176. meta->access_meshlet_position_y = access_meshlet_position_y;
  9177. }
  9178. return expr;
  9179. }
  9180. void CompilerGLSL::check_physical_type_cast(std::string &, const SPIRType *, uint32_t)
  9181. {
  9182. }
  9183. void CompilerGLSL::prepare_access_chain_for_scalar_access(std::string &, const SPIRType &, spv::StorageClass, bool &)
  9184. {
  9185. }
  9186. string CompilerGLSL::to_flattened_struct_member(const string &basename, const SPIRType &type, uint32_t index)
  9187. {
  9188. auto ret = join(basename, "_", to_member_name(type, index));
  9189. ParsedIR::sanitize_underscores(ret);
  9190. return ret;
  9191. }
  9192. string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type,
  9193. AccessChainMeta *meta, bool ptr_chain)
  9194. {
  9195. if (flattened_buffer_blocks.count(base))
  9196. {
  9197. uint32_t matrix_stride = 0;
  9198. uint32_t array_stride = 0;
  9199. bool need_transpose = false;
  9200. flattened_access_chain_offset(expression_type(base), indices, count, 0, 16, &need_transpose, &matrix_stride,
  9201. &array_stride, ptr_chain);
  9202. if (meta)
  9203. {
  9204. meta->need_transpose = target_type.columns > 1 && need_transpose;
  9205. meta->storage_is_packed = false;
  9206. }
  9207. return flattened_access_chain(base, indices, count, target_type, 0, matrix_stride, array_stride,
  9208. need_transpose);
  9209. }
  9210. else if (flattened_structs.count(base) && count > 0)
  9211. {
  9212. AccessChainFlags flags = ACCESS_CHAIN_CHAIN_ONLY_BIT | ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT;
  9213. if (ptr_chain)
  9214. flags |= ACCESS_CHAIN_PTR_CHAIN_BIT;
  9215. if (flattened_structs[base])
  9216. {
  9217. flags |= ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT;
  9218. if (meta)
  9219. meta->flattened_struct = target_type.basetype == SPIRType::Struct;
  9220. }
  9221. auto chain = access_chain_internal(base, indices, count, flags, nullptr).substr(1);
  9222. if (meta)
  9223. {
  9224. meta->need_transpose = false;
  9225. meta->storage_is_packed = false;
  9226. }
  9227. auto basename = to_flattened_access_chain_expression(base);
  9228. auto ret = join(basename, "_", chain);
  9229. ParsedIR::sanitize_underscores(ret);
  9230. return ret;
  9231. }
  9232. else
  9233. {
  9234. AccessChainFlags flags = ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT;
  9235. if (ptr_chain)
  9236. flags |= ACCESS_CHAIN_PTR_CHAIN_BIT;
  9237. return access_chain_internal(base, indices, count, flags, meta);
  9238. }
  9239. }
  9240. string CompilerGLSL::load_flattened_struct(const string &basename, const SPIRType &type)
  9241. {
  9242. auto expr = type_to_glsl_constructor(type);
  9243. expr += '(';
  9244. for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
  9245. {
  9246. if (i)
  9247. expr += ", ";
  9248. auto &member_type = get<SPIRType>(type.member_types[i]);
  9249. if (member_type.basetype == SPIRType::Struct)
  9250. expr += load_flattened_struct(to_flattened_struct_member(basename, type, i), member_type);
  9251. else
  9252. expr += to_flattened_struct_member(basename, type, i);
  9253. }
  9254. expr += ')';
  9255. return expr;
  9256. }
  9257. std::string CompilerGLSL::to_flattened_access_chain_expression(uint32_t id)
  9258. {
  9259. // Do not use to_expression as that will unflatten access chains.
  9260. string basename;
  9261. if (const auto *var = maybe_get<SPIRVariable>(id))
  9262. basename = to_name(var->self);
  9263. else if (const auto *expr = maybe_get<SPIRExpression>(id))
  9264. basename = expr->expression;
  9265. else
  9266. basename = to_expression(id);
  9267. return basename;
  9268. }
  9269. void CompilerGLSL::store_flattened_struct(const string &basename, uint32_t rhs_id, const SPIRType &type,
  9270. const SmallVector<uint32_t> &indices)
  9271. {
  9272. SmallVector<uint32_t> sub_indices = indices;
  9273. sub_indices.push_back(0);
  9274. auto *member_type = &type;
  9275. for (auto &index : indices)
  9276. member_type = &get<SPIRType>(member_type->member_types[index]);
  9277. for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++)
  9278. {
  9279. sub_indices.back() = i;
  9280. auto lhs = join(basename, "_", to_member_name(*member_type, i));
  9281. ParsedIR::sanitize_underscores(lhs);
  9282. if (get<SPIRType>(member_type->member_types[i]).basetype == SPIRType::Struct)
  9283. {
  9284. store_flattened_struct(lhs, rhs_id, type, sub_indices);
  9285. }
  9286. else
  9287. {
  9288. auto rhs = to_expression(rhs_id) + to_multi_member_reference(type, sub_indices);
  9289. statement(lhs, " = ", rhs, ";");
  9290. }
  9291. }
  9292. }
  9293. void CompilerGLSL::store_flattened_struct(uint32_t lhs_id, uint32_t value)
  9294. {
  9295. auto &type = expression_type(lhs_id);
  9296. auto basename = to_flattened_access_chain_expression(lhs_id);
  9297. store_flattened_struct(basename, value, type, {});
  9298. }
  9299. std::string CompilerGLSL::flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count,
  9300. const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride,
  9301. uint32_t /* array_stride */, bool need_transpose)
  9302. {
  9303. if (!target_type.array.empty())
  9304. SPIRV_CROSS_THROW("Access chains that result in an array can not be flattened");
  9305. else if (target_type.basetype == SPIRType::Struct)
  9306. return flattened_access_chain_struct(base, indices, count, target_type, offset);
  9307. else if (target_type.columns > 1)
  9308. return flattened_access_chain_matrix(base, indices, count, target_type, offset, matrix_stride, need_transpose);
  9309. else
  9310. return flattened_access_chain_vector(base, indices, count, target_type, offset, matrix_stride, need_transpose);
  9311. }
  9312. std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count,
  9313. const SPIRType &target_type, uint32_t offset)
  9314. {
  9315. std::string expr;
  9316. if (backend.can_declare_struct_inline)
  9317. {
  9318. expr += type_to_glsl_constructor(target_type);
  9319. expr += "(";
  9320. }
  9321. else
  9322. expr += "{";
  9323. for (uint32_t i = 0; i < uint32_t(target_type.member_types.size()); ++i)
  9324. {
  9325. if (i != 0)
  9326. expr += ", ";
  9327. const SPIRType &member_type = get<SPIRType>(target_type.member_types[i]);
  9328. uint32_t member_offset = type_struct_member_offset(target_type, i);
  9329. // The access chain terminates at the struct, so we need to find matrix strides and row-major information
  9330. // ahead of time.
  9331. bool need_transpose = false;
  9332. bool relaxed = false;
  9333. uint32_t matrix_stride = 0;
  9334. if (member_type.columns > 1)
  9335. {
  9336. auto decorations = combined_decoration_for_member(target_type, i);
  9337. need_transpose = decorations.get(DecorationRowMajor);
  9338. relaxed = decorations.get(DecorationRelaxedPrecision);
  9339. matrix_stride = type_struct_member_matrix_stride(target_type, i);
  9340. }
  9341. auto tmp = flattened_access_chain(base, indices, count, member_type, offset + member_offset, matrix_stride,
  9342. 0 /* array_stride */, need_transpose);
  9343. // Cannot forward transpositions, so resolve them here.
  9344. if (need_transpose)
  9345. expr += convert_row_major_matrix(tmp, member_type, 0, false, relaxed);
  9346. else
  9347. expr += tmp;
  9348. }
  9349. expr += backend.can_declare_struct_inline ? ")" : "}";
  9350. return expr;
  9351. }
  9352. std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count,
  9353. const SPIRType &target_type, uint32_t offset,
  9354. uint32_t matrix_stride, bool need_transpose)
  9355. {
  9356. assert(matrix_stride);
  9357. SPIRType tmp_type = target_type;
  9358. if (need_transpose)
  9359. swap(tmp_type.vecsize, tmp_type.columns);
  9360. std::string expr;
  9361. expr += type_to_glsl_constructor(tmp_type);
  9362. expr += "(";
  9363. for (uint32_t i = 0; i < tmp_type.columns; i++)
  9364. {
  9365. if (i != 0)
  9366. expr += ", ";
  9367. expr += flattened_access_chain_vector(base, indices, count, tmp_type, offset + i * matrix_stride, matrix_stride,
  9368. /* need_transpose= */ false);
  9369. }
  9370. expr += ")";
  9371. return expr;
  9372. }
  9373. std::string CompilerGLSL::flattened_access_chain_vector(uint32_t base, const uint32_t *indices, uint32_t count,
  9374. const SPIRType &target_type, uint32_t offset,
  9375. uint32_t matrix_stride, bool need_transpose)
  9376. {
  9377. auto result = flattened_access_chain_offset(expression_type(base), indices, count, offset, 16);
  9378. auto buffer_name = to_name(expression_type(base).self);
  9379. if (need_transpose)
  9380. {
  9381. std::string expr;
  9382. if (target_type.vecsize > 1)
  9383. {
  9384. expr += type_to_glsl_constructor(target_type);
  9385. expr += "(";
  9386. }
  9387. for (uint32_t i = 0; i < target_type.vecsize; ++i)
  9388. {
  9389. if (i != 0)
  9390. expr += ", ";
  9391. uint32_t component_offset = result.second + i * matrix_stride;
  9392. assert(component_offset % (target_type.width / 8) == 0);
  9393. uint32_t index = component_offset / (target_type.width / 8);
  9394. expr += buffer_name;
  9395. expr += "[";
  9396. expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
  9397. expr += convert_to_string(index / 4);
  9398. expr += "]";
  9399. expr += vector_swizzle(1, index % 4);
  9400. }
  9401. if (target_type.vecsize > 1)
  9402. {
  9403. expr += ")";
  9404. }
  9405. return expr;
  9406. }
  9407. else
  9408. {
  9409. assert(result.second % (target_type.width / 8) == 0);
  9410. uint32_t index = result.second / (target_type.width / 8);
  9411. std::string expr;
  9412. expr += buffer_name;
  9413. expr += "[";
  9414. expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
  9415. expr += convert_to_string(index / 4);
  9416. expr += "]";
  9417. expr += vector_swizzle(target_type.vecsize, index % 4);
  9418. return expr;
  9419. }
  9420. }
  9421. std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(
  9422. const SPIRType &basetype, const uint32_t *indices, uint32_t count, uint32_t offset, uint32_t word_stride,
  9423. bool *need_transpose, uint32_t *out_matrix_stride, uint32_t *out_array_stride, bool ptr_chain)
  9424. {
  9425. // Start traversing type hierarchy at the proper non-pointer types.
  9426. const auto *type = &get_pointee_type(basetype);
  9427. std::string expr;
  9428. // Inherit matrix information in case we are access chaining a vector which might have come from a row major layout.
  9429. bool row_major_matrix_needs_conversion = need_transpose ? *need_transpose : false;
  9430. uint32_t matrix_stride = out_matrix_stride ? *out_matrix_stride : 0;
  9431. uint32_t array_stride = out_array_stride ? *out_array_stride : 0;
  9432. for (uint32_t i = 0; i < count; i++)
  9433. {
  9434. uint32_t index = indices[i];
  9435. // Pointers
  9436. if (ptr_chain && i == 0)
  9437. {
  9438. // Here, the pointer type will be decorated with an array stride.
  9439. array_stride = get_decoration(basetype.self, DecorationArrayStride);
  9440. if (!array_stride)
  9441. SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block.");
  9442. auto *constant = maybe_get<SPIRConstant>(index);
  9443. if (constant)
  9444. {
  9445. // Constant array access.
  9446. offset += constant->scalar() * array_stride;
  9447. }
  9448. else
  9449. {
  9450. // Dynamic array access.
  9451. if (array_stride % word_stride)
  9452. {
  9453. SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size "
  9454. "of a 4-component vector. "
  9455. "Likely culprit here is a float or vec2 array inside a push "
  9456. "constant block which is std430. "
  9457. "This cannot be flattened. Try using std140 layout instead.");
  9458. }
  9459. expr += to_enclosed_expression(index);
  9460. expr += " * ";
  9461. expr += convert_to_string(array_stride / word_stride);
  9462. expr += " + ";
  9463. }
  9464. }
  9465. // Arrays
  9466. else if (!type->array.empty())
  9467. {
  9468. auto *constant = maybe_get<SPIRConstant>(index);
  9469. if (constant)
  9470. {
  9471. // Constant array access.
  9472. offset += constant->scalar() * array_stride;
  9473. }
  9474. else
  9475. {
  9476. // Dynamic array access.
  9477. if (array_stride % word_stride)
  9478. {
  9479. SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size "
  9480. "of a 4-component vector. "
  9481. "Likely culprit here is a float or vec2 array inside a push "
  9482. "constant block which is std430. "
  9483. "This cannot be flattened. Try using std140 layout instead.");
  9484. }
  9485. expr += to_enclosed_expression(index, false);
  9486. expr += " * ";
  9487. expr += convert_to_string(array_stride / word_stride);
  9488. expr += " + ";
  9489. }
  9490. uint32_t parent_type = type->parent_type;
  9491. type = &get<SPIRType>(parent_type);
  9492. if (!type->array.empty())
  9493. array_stride = get_decoration(parent_type, DecorationArrayStride);
  9494. }
  9495. // For structs, the index refers to a constant, which indexes into the members.
  9496. // We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
  9497. else if (type->basetype == SPIRType::Struct)
  9498. {
  9499. index = evaluate_constant_u32(index);
  9500. if (index >= type->member_types.size())
  9501. SPIRV_CROSS_THROW("Member index is out of bounds!");
  9502. offset += type_struct_member_offset(*type, index);
  9503. auto &struct_type = *type;
  9504. type = &get<SPIRType>(type->member_types[index]);
  9505. if (type->columns > 1)
  9506. {
  9507. matrix_stride = type_struct_member_matrix_stride(struct_type, index);
  9508. row_major_matrix_needs_conversion =
  9509. combined_decoration_for_member(struct_type, index).get(DecorationRowMajor);
  9510. }
  9511. else
  9512. row_major_matrix_needs_conversion = false;
  9513. if (!type->array.empty())
  9514. array_stride = type_struct_member_array_stride(struct_type, index);
  9515. }
  9516. // Matrix -> Vector
  9517. else if (type->columns > 1)
  9518. {
  9519. auto *constant = maybe_get<SPIRConstant>(index);
  9520. if (constant)
  9521. {
  9522. index = evaluate_constant_u32(index);
  9523. offset += index * (row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride);
  9524. }
  9525. else
  9526. {
  9527. uint32_t indexing_stride = row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride;
  9528. // Dynamic array access.
  9529. if (indexing_stride % word_stride)
  9530. {
  9531. SPIRV_CROSS_THROW("Matrix stride for dynamic indexing must be divisible by the size of a "
  9532. "4-component vector. "
  9533. "Likely culprit here is a row-major matrix being accessed dynamically. "
  9534. "This cannot be flattened. Try using std140 layout instead.");
  9535. }
  9536. expr += to_enclosed_expression(index, false);
  9537. expr += " * ";
  9538. expr += convert_to_string(indexing_stride / word_stride);
  9539. expr += " + ";
  9540. }
  9541. type = &get<SPIRType>(type->parent_type);
  9542. }
  9543. // Vector -> Scalar
  9544. else if (type->vecsize > 1)
  9545. {
  9546. auto *constant = maybe_get<SPIRConstant>(index);
  9547. if (constant)
  9548. {
  9549. index = evaluate_constant_u32(index);
  9550. offset += index * (row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8));
  9551. }
  9552. else
  9553. {
  9554. uint32_t indexing_stride = row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8);
  9555. // Dynamic array access.
  9556. if (indexing_stride % word_stride)
  9557. {
  9558. SPIRV_CROSS_THROW("Stride for dynamic vector indexing must be divisible by the "
  9559. "size of a 4-component vector. "
  9560. "This cannot be flattened in legacy targets.");
  9561. }
  9562. expr += to_enclosed_expression(index, false);
  9563. expr += " * ";
  9564. expr += convert_to_string(indexing_stride / word_stride);
  9565. expr += " + ";
  9566. }
  9567. type = &get<SPIRType>(type->parent_type);
  9568. }
  9569. else
  9570. SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
  9571. }
  9572. if (need_transpose)
  9573. *need_transpose = row_major_matrix_needs_conversion;
  9574. if (out_matrix_stride)
  9575. *out_matrix_stride = matrix_stride;
  9576. if (out_array_stride)
  9577. *out_array_stride = array_stride;
  9578. return std::make_pair(expr, offset);
  9579. }
  9580. bool CompilerGLSL::should_dereference(uint32_t id)
  9581. {
  9582. const auto &type = expression_type(id);
  9583. // Non-pointer expressions don't need to be dereferenced.
  9584. if (!type.pointer)
  9585. return false;
  9586. // Handles shouldn't be dereferenced either.
  9587. if (!expression_is_lvalue(id))
  9588. return false;
  9589. // If id is a variable but not a phi variable, we should not dereference it.
  9590. if (auto *var = maybe_get<SPIRVariable>(id))
  9591. return var->phi_variable;
  9592. if (auto *expr = maybe_get<SPIRExpression>(id))
  9593. {
  9594. // If id is an access chain, we should not dereference it.
  9595. if (expr->access_chain)
  9596. return false;
  9597. // If id is a forwarded copy of a variable pointer, we should not dereference it.
  9598. SPIRVariable *var = nullptr;
  9599. while (expr->loaded_from && expression_is_forwarded(expr->self))
  9600. {
  9601. auto &src_type = expression_type(expr->loaded_from);
  9602. // To be a copy, the pointer and its source expression must be the
  9603. // same type. Can't check type.self, because for some reason that's
  9604. // usually the base type with pointers stripped off. This check is
  9605. // complex enough that I've hoisted it out of the while condition.
  9606. if (src_type.pointer != type.pointer || src_type.pointer_depth != type.pointer_depth ||
  9607. src_type.parent_type != type.parent_type)
  9608. break;
  9609. if ((var = maybe_get<SPIRVariable>(expr->loaded_from)))
  9610. break;
  9611. if (!(expr = maybe_get<SPIRExpression>(expr->loaded_from)))
  9612. break;
  9613. }
  9614. return !var || var->phi_variable;
  9615. }
  9616. // Otherwise, we should dereference this pointer expression.
  9617. return true;
  9618. }
  9619. bool CompilerGLSL::should_forward(uint32_t id) const
  9620. {
  9621. // If id is a variable we will try to forward it regardless of force_temporary check below
  9622. // This is important because otherwise we'll get local sampler copies (highp sampler2D foo = bar) that are invalid in OpenGL GLSL
  9623. auto *var = maybe_get<SPIRVariable>(id);
  9624. if (var)
  9625. {
  9626. // Never forward volatile builtin variables, e.g. SPIR-V 1.6 HelperInvocation.
  9627. return !(has_decoration(id, DecorationBuiltIn) && has_decoration(id, DecorationVolatile));
  9628. }
  9629. // For debugging emit temporary variables for all expressions
  9630. if (options.force_temporary)
  9631. return false;
  9632. // If an expression carries enough dependencies we need to stop forwarding at some point,
  9633. // or we explode compilers. There are usually limits to how much we can nest expressions.
  9634. auto *expr = maybe_get<SPIRExpression>(id);
  9635. const uint32_t max_expression_dependencies = 64;
  9636. if (expr && expr->expression_dependencies.size() >= max_expression_dependencies)
  9637. return false;
  9638. if (expr && expr->loaded_from
  9639. && has_decoration(expr->loaded_from, DecorationBuiltIn)
  9640. && has_decoration(expr->loaded_from, DecorationVolatile))
  9641. {
  9642. // Never forward volatile builtin variables, e.g. SPIR-V 1.6 HelperInvocation.
  9643. return false;
  9644. }
  9645. // Immutable expression can always be forwarded.
  9646. if (is_immutable(id))
  9647. return true;
  9648. return false;
  9649. }
  9650. bool CompilerGLSL::should_suppress_usage_tracking(uint32_t id) const
  9651. {
  9652. // Used only by opcodes which don't do any real "work", they just swizzle data in some fashion.
  9653. return !expression_is_forwarded(id) || expression_suppresses_usage_tracking(id);
  9654. }
  9655. void CompilerGLSL::track_expression_read(uint32_t id)
  9656. {
  9657. switch (ir.ids[id].get_type())
  9658. {
  9659. case TypeExpression:
  9660. {
  9661. auto &e = get<SPIRExpression>(id);
  9662. for (auto implied_read : e.implied_read_expressions)
  9663. track_expression_read(implied_read);
  9664. break;
  9665. }
  9666. case TypeAccessChain:
  9667. {
  9668. auto &e = get<SPIRAccessChain>(id);
  9669. for (auto implied_read : e.implied_read_expressions)
  9670. track_expression_read(implied_read);
  9671. break;
  9672. }
  9673. default:
  9674. break;
  9675. }
  9676. // If we try to read a forwarded temporary more than once we will stamp out possibly complex code twice.
  9677. // In this case, it's better to just bind the complex expression to the temporary and read that temporary twice.
  9678. if (expression_is_forwarded(id) && !expression_suppresses_usage_tracking(id))
  9679. {
  9680. auto &v = expression_usage_counts[id];
  9681. v++;
  9682. // If we create an expression outside a loop,
  9683. // but access it inside a loop, we're implicitly reading it multiple times.
  9684. // If the expression in question is expensive, we should hoist it out to avoid relying on loop-invariant code motion
  9685. // working inside the backend compiler.
  9686. if (expression_read_implies_multiple_reads(id))
  9687. v++;
  9688. if (v >= 2)
  9689. {
  9690. //if (v == 2)
  9691. // fprintf(stderr, "ID %u was forced to temporary due to more than 1 expression use!\n", id);
  9692. // Force a recompile after this pass to avoid forwarding this variable.
  9693. force_temporary_and_recompile(id);
  9694. }
  9695. }
  9696. }
  9697. bool CompilerGLSL::args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure)
  9698. {
  9699. if (forced_temporaries.find(id) != end(forced_temporaries))
  9700. return false;
  9701. for (uint32_t i = 0; i < num_args; i++)
  9702. if (!should_forward(args[i]))
  9703. return false;
  9704. // We need to forward globals as well.
  9705. if (!pure)
  9706. {
  9707. for (auto global : global_variables)
  9708. if (!should_forward(global))
  9709. return false;
  9710. for (auto aliased : aliased_variables)
  9711. if (!should_forward(aliased))
  9712. return false;
  9713. }
  9714. return true;
  9715. }
  9716. void CompilerGLSL::register_impure_function_call()
  9717. {
  9718. // Impure functions can modify globals and aliased variables, so invalidate them as well.
  9719. for (auto global : global_variables)
  9720. flush_dependees(get<SPIRVariable>(global));
  9721. for (auto aliased : aliased_variables)
  9722. flush_dependees(get<SPIRVariable>(aliased));
  9723. }
  9724. void CompilerGLSL::register_call_out_argument(uint32_t id)
  9725. {
  9726. register_write(id);
  9727. auto *var = maybe_get<SPIRVariable>(id);
  9728. if (var)
  9729. flush_variable_declaration(var->self);
  9730. }
  9731. string CompilerGLSL::variable_decl_function_local(SPIRVariable &var)
  9732. {
  9733. // These variables are always function local,
  9734. // so make sure we emit the variable without storage qualifiers.
  9735. // Some backends will inject custom variables locally in a function
  9736. // with a storage qualifier which is not function-local.
  9737. auto old_storage = var.storage;
  9738. var.storage = StorageClassFunction;
  9739. auto expr = variable_decl(var);
  9740. var.storage = old_storage;
  9741. return expr;
  9742. }
  9743. void CompilerGLSL::emit_variable_temporary_copies(const SPIRVariable &var)
  9744. {
  9745. // Ensure that we declare phi-variable copies even if the original declaration isn't deferred
  9746. if (var.allocate_temporary_copy && !flushed_phi_variables.count(var.self))
  9747. {
  9748. auto &type = get<SPIRType>(var.basetype);
  9749. auto &flags = get_decoration_bitset(var.self);
  9750. statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, join("_", var.self, "_copy")), ";");
  9751. flushed_phi_variables.insert(var.self);
  9752. }
  9753. }
  9754. void CompilerGLSL::flush_variable_declaration(uint32_t id)
  9755. {
  9756. // Ensure that we declare phi-variable copies even if the original declaration isn't deferred
  9757. auto *var = maybe_get<SPIRVariable>(id);
  9758. if (var && var->deferred_declaration)
  9759. {
  9760. string initializer;
  9761. if (options.force_zero_initialized_variables &&
  9762. (var->storage == StorageClassFunction || var->storage == StorageClassGeneric ||
  9763. var->storage == StorageClassPrivate) &&
  9764. !var->initializer && type_can_zero_initialize(get_variable_data_type(*var)))
  9765. {
  9766. initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(*var)));
  9767. }
  9768. statement(variable_decl_function_local(*var), initializer, ";");
  9769. var->deferred_declaration = false;
  9770. }
  9771. if (var)
  9772. {
  9773. emit_variable_temporary_copies(*var);
  9774. }
  9775. }
  9776. bool CompilerGLSL::remove_duplicate_swizzle(string &op)
  9777. {
  9778. auto pos = op.find_last_of('.');
  9779. if (pos == string::npos || pos == 0)
  9780. return false;
  9781. string final_swiz = op.substr(pos + 1, string::npos);
  9782. if (backend.swizzle_is_function)
  9783. {
  9784. if (final_swiz.size() < 2)
  9785. return false;
  9786. if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()")
  9787. final_swiz.erase(final_swiz.size() - 2, string::npos);
  9788. else
  9789. return false;
  9790. }
  9791. // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
  9792. // If so, and previous swizzle is of same length,
  9793. // we can drop the final swizzle altogether.
  9794. for (uint32_t i = 0; i < final_swiz.size(); i++)
  9795. {
  9796. static const char expected[] = { 'x', 'y', 'z', 'w' };
  9797. if (i >= 4 || final_swiz[i] != expected[i])
  9798. return false;
  9799. }
  9800. auto prevpos = op.find_last_of('.', pos - 1);
  9801. if (prevpos == string::npos)
  9802. return false;
  9803. prevpos++;
  9804. // Make sure there are only swizzles here ...
  9805. for (auto i = prevpos; i < pos; i++)
  9806. {
  9807. if (op[i] < 'w' || op[i] > 'z')
  9808. {
  9809. // If swizzles are foo.xyz() like in C++ backend for example, check for that.
  9810. if (backend.swizzle_is_function && i + 2 == pos && op[i] == '(' && op[i + 1] == ')')
  9811. break;
  9812. return false;
  9813. }
  9814. }
  9815. // If original swizzle is large enough, just carve out the components we need.
  9816. // E.g. foobar.wyx.xy will turn into foobar.wy.
  9817. if (pos - prevpos >= final_swiz.size())
  9818. {
  9819. op.erase(prevpos + final_swiz.size(), string::npos);
  9820. // Add back the function call ...
  9821. if (backend.swizzle_is_function)
  9822. op += "()";
  9823. }
  9824. return true;
  9825. }
  9826. // Optimizes away vector swizzles where we have something like
  9827. // vec3 foo;
  9828. // foo.xyz <-- swizzle expression does nothing.
  9829. // This is a very common pattern after OpCompositeCombine.
  9830. bool CompilerGLSL::remove_unity_swizzle(uint32_t base, string &op)
  9831. {
  9832. auto pos = op.find_last_of('.');
  9833. if (pos == string::npos || pos == 0)
  9834. return false;
  9835. string final_swiz = op.substr(pos + 1, string::npos);
  9836. if (backend.swizzle_is_function)
  9837. {
  9838. if (final_swiz.size() < 2)
  9839. return false;
  9840. if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()")
  9841. final_swiz.erase(final_swiz.size() - 2, string::npos);
  9842. else
  9843. return false;
  9844. }
  9845. // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
  9846. // If so, and previous swizzle is of same length,
  9847. // we can drop the final swizzle altogether.
  9848. for (uint32_t i = 0; i < final_swiz.size(); i++)
  9849. {
  9850. static const char expected[] = { 'x', 'y', 'z', 'w' };
  9851. if (i >= 4 || final_swiz[i] != expected[i])
  9852. return false;
  9853. }
  9854. auto &type = expression_type(base);
  9855. // Sanity checking ...
  9856. assert(type.columns == 1 && type.array.empty());
  9857. if (type.vecsize == final_swiz.size())
  9858. op.erase(pos, string::npos);
  9859. return true;
  9860. }
  9861. string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32_t *elems, uint32_t length)
  9862. {
  9863. ID base = 0;
  9864. string op;
  9865. string subop;
  9866. // Can only merge swizzles for vectors.
  9867. auto &type = get<SPIRType>(return_type);
  9868. bool can_apply_swizzle_opt = type.basetype != SPIRType::Struct && type.array.empty() && type.columns == 1;
  9869. bool swizzle_optimization = false;
  9870. for (uint32_t i = 0; i < length; i++)
  9871. {
  9872. auto *e = maybe_get<SPIRExpression>(elems[i]);
  9873. // If we're merging another scalar which belongs to the same base
  9874. // object, just merge the swizzles to avoid triggering more than 1 expression read as much as possible!
  9875. if (can_apply_swizzle_opt && e && e->base_expression && e->base_expression == base)
  9876. {
  9877. // Only supposed to be used for vector swizzle -> scalar.
  9878. assert(!e->expression.empty() && e->expression.front() == '.');
  9879. subop += e->expression.substr(1, string::npos);
  9880. swizzle_optimization = true;
  9881. }
  9882. else
  9883. {
  9884. // We'll likely end up with duplicated swizzles, e.g.
  9885. // foobar.xyz.xyz from patterns like
  9886. // OpVectorShuffle
  9887. // OpCompositeExtract x 3
  9888. // OpCompositeConstruct 3x + other scalar.
  9889. // Just modify op in-place.
  9890. if (swizzle_optimization)
  9891. {
  9892. if (backend.swizzle_is_function)
  9893. subop += "()";
  9894. // Don't attempt to remove unity swizzling if we managed to remove duplicate swizzles.
  9895. // The base "foo" might be vec4, while foo.xyz is vec3 (OpVectorShuffle) and looks like a vec3 due to the .xyz tacked on.
  9896. // We only want to remove the swizzles if we're certain that the resulting base will be the same vecsize.
  9897. // Essentially, we can only remove one set of swizzles, since that's what we have control over ...
  9898. // Case 1:
  9899. // foo.yxz.xyz: Duplicate swizzle kicks in, giving foo.yxz, we are done.
  9900. // foo.yxz was the result of OpVectorShuffle and we don't know the type of foo.
  9901. // Case 2:
  9902. // foo.xyz: Duplicate swizzle won't kick in.
  9903. // If foo is vec3, we can remove xyz, giving just foo.
  9904. if (!remove_duplicate_swizzle(subop))
  9905. remove_unity_swizzle(base, subop);
  9906. // Strips away redundant parens if we created them during component extraction.
  9907. strip_enclosed_expression(subop);
  9908. swizzle_optimization = false;
  9909. op += subop;
  9910. }
  9911. else
  9912. op += subop;
  9913. if (i)
  9914. op += ", ";
  9915. bool uses_buffer_offset =
  9916. type.basetype == SPIRType::Struct && has_member_decoration(type.self, i, DecorationOffset);
  9917. subop = to_composite_constructor_expression(type, elems[i], uses_buffer_offset);
  9918. }
  9919. base = e ? e->base_expression : ID(0);
  9920. }
  9921. if (swizzle_optimization)
  9922. {
  9923. if (backend.swizzle_is_function)
  9924. subop += "()";
  9925. if (!remove_duplicate_swizzle(subop))
  9926. remove_unity_swizzle(base, subop);
  9927. // Strips away redundant parens if we created them during component extraction.
  9928. strip_enclosed_expression(subop);
  9929. }
  9930. op += subop;
  9931. return op;
  9932. }
  9933. bool CompilerGLSL::skip_argument(uint32_t id) const
  9934. {
  9935. if (!combined_image_samplers.empty() || !options.vulkan_semantics)
  9936. {
  9937. auto &type = expression_type(id);
  9938. if (type.basetype == SPIRType::Sampler || (type.basetype == SPIRType::Image && type.image.sampled == 1))
  9939. return true;
  9940. }
  9941. return false;
  9942. }
  9943. bool CompilerGLSL::optimize_read_modify_write(const SPIRType &type, const string &lhs, const string &rhs)
  9944. {
  9945. // Do this with strings because we have a very clear pattern we can check for and it avoids
  9946. // adding lots of special cases to the code emission.
  9947. if (rhs.size() < lhs.size() + 3)
  9948. return false;
  9949. // Do not optimize matrices. They are a bit awkward to reason about in general
  9950. // (in which order does operation happen?), and it does not work on MSL anyways.
  9951. if (type.vecsize > 1 && type.columns > 1)
  9952. return false;
  9953. auto index = rhs.find(lhs);
  9954. if (index != 0)
  9955. return false;
  9956. // TODO: Shift operators, but it's not important for now.
  9957. auto op = rhs.find_first_of("+-/*%|&^", lhs.size() + 1);
  9958. if (op != lhs.size() + 1)
  9959. return false;
  9960. // Check that the op is followed by space. This excludes && and ||.
  9961. if (rhs[op + 1] != ' ')
  9962. return false;
  9963. char bop = rhs[op];
  9964. auto expr = rhs.substr(lhs.size() + 3);
  9965. // Avoids false positives where we get a = a * b + c.
  9966. // Normally, these expressions are always enclosed, but unexpected code paths may end up hitting this.
  9967. if (needs_enclose_expression(expr))
  9968. return false;
  9969. // Try to find increments and decrements. Makes it look neater as += 1, -= 1 is fairly rare to see in real code.
  9970. // Find some common patterns which are equivalent.
  9971. if ((bop == '+' || bop == '-') && (expr == "1" || expr == "uint(1)" || expr == "1u" || expr == "int(1u)"))
  9972. statement(lhs, bop, bop, ";");
  9973. else
  9974. statement(lhs, " ", bop, "= ", expr, ";");
  9975. return true;
  9976. }
  9977. void CompilerGLSL::register_control_dependent_expression(uint32_t expr)
  9978. {
  9979. if (forwarded_temporaries.find(expr) == end(forwarded_temporaries))
  9980. return;
  9981. assert(current_emitting_block);
  9982. current_emitting_block->invalidate_expressions.push_back(expr);
  9983. }
  9984. void CompilerGLSL::emit_block_instructions(SPIRBlock &block)
  9985. {
  9986. current_emitting_block = &block;
  9987. if (backend.requires_relaxed_precision_analysis)
  9988. {
  9989. // If PHI variables are consumed in unexpected precision contexts, copy them here.
  9990. for (size_t i = 0, n = block.phi_variables.size(); i < n; i++)
  9991. {
  9992. auto &phi = block.phi_variables[i];
  9993. // Ensure we only copy once. We know a-priori that this array will lay out
  9994. // the same function variables together.
  9995. if (i && block.phi_variables[i - 1].function_variable == phi.function_variable)
  9996. continue;
  9997. auto itr = temporary_to_mirror_precision_alias.find(phi.function_variable);
  9998. if (itr != temporary_to_mirror_precision_alias.end())
  9999. {
  10000. // Explicitly, we don't want to inherit RelaxedPrecision state in this CopyObject,
  10001. // so it helps to have handle_instruction_precision() on the outside of emit_instruction().
  10002. EmbeddedInstruction inst;
  10003. inst.op = OpCopyObject;
  10004. inst.length = 3;
  10005. inst.ops.push_back(expression_type_id(itr->first));
  10006. inst.ops.push_back(itr->second);
  10007. inst.ops.push_back(itr->first);
  10008. emit_instruction(inst);
  10009. }
  10010. }
  10011. }
  10012. for (auto &op : block.ops)
  10013. {
  10014. auto temporary_copy = handle_instruction_precision(op);
  10015. emit_instruction(op);
  10016. if (temporary_copy.dst_id)
  10017. {
  10018. // Explicitly, we don't want to inherit RelaxedPrecision state in this CopyObject,
  10019. // so it helps to have handle_instruction_precision() on the outside of emit_instruction().
  10020. EmbeddedInstruction inst;
  10021. inst.op = OpCopyObject;
  10022. inst.length = 3;
  10023. inst.ops.push_back(expression_type_id(temporary_copy.src_id));
  10024. inst.ops.push_back(temporary_copy.dst_id);
  10025. inst.ops.push_back(temporary_copy.src_id);
  10026. // Never attempt to hoist mirrored temporaries.
  10027. // They are hoisted in lock-step with their parents.
  10028. block_temporary_hoisting = true;
  10029. emit_instruction(inst);
  10030. block_temporary_hoisting = false;
  10031. }
  10032. }
  10033. current_emitting_block = nullptr;
  10034. }
  10035. void CompilerGLSL::disallow_forwarding_in_expression_chain(const SPIRExpression &expr)
  10036. {
  10037. // Allow trivially forwarded expressions like OpLoad or trivial shuffles,
  10038. // these will be marked as having suppressed usage tracking.
  10039. // Our only concern is to make sure arithmetic operations are done in similar ways.
  10040. if (expression_is_forwarded(expr.self) && !expression_suppresses_usage_tracking(expr.self) &&
  10041. forced_invariant_temporaries.count(expr.self) == 0)
  10042. {
  10043. force_temporary_and_recompile(expr.self);
  10044. forced_invariant_temporaries.insert(expr.self);
  10045. for (auto &dependent : expr.expression_dependencies)
  10046. disallow_forwarding_in_expression_chain(get<SPIRExpression>(dependent));
  10047. }
  10048. }
  10049. void CompilerGLSL::handle_store_to_invariant_variable(uint32_t store_id, uint32_t value_id)
  10050. {
  10051. // Variables or access chains marked invariant are complicated. We will need to make sure the code-gen leading up to
  10052. // this variable is consistent. The failure case for SPIRV-Cross is when an expression is forced to a temporary
  10053. // in one translation unit, but not another, e.g. due to multiple use of an expression.
  10054. // This causes variance despite the output variable being marked invariant, so the solution here is to force all dependent
  10055. // expressions to be temporaries.
  10056. // It is uncertain if this is enough to support invariant in all possible cases, but it should be good enough
  10057. // for all reasonable uses of invariant.
  10058. if (!has_decoration(store_id, DecorationInvariant))
  10059. return;
  10060. auto *expr = maybe_get<SPIRExpression>(value_id);
  10061. if (!expr)
  10062. return;
  10063. disallow_forwarding_in_expression_chain(*expr);
  10064. }
  10065. void CompilerGLSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression)
  10066. {
  10067. auto rhs = to_pointer_expression(rhs_expression);
  10068. // Statements to OpStore may be empty if it is a struct with zero members. Just forward the store to /dev/null.
  10069. if (!rhs.empty())
  10070. {
  10071. handle_store_to_invariant_variable(lhs_expression, rhs_expression);
  10072. if (!unroll_array_to_complex_store(lhs_expression, rhs_expression))
  10073. {
  10074. auto lhs = to_dereferenced_expression(lhs_expression);
  10075. if (has_decoration(lhs_expression, DecorationNonUniform))
  10076. convert_non_uniform_expression(lhs, lhs_expression);
  10077. // We might need to cast in order to store to a builtin.
  10078. cast_to_variable_store(lhs_expression, rhs, expression_type(rhs_expression));
  10079. // Tries to optimize assignments like "<lhs> = <lhs> op expr".
  10080. // While this is purely cosmetic, this is important for legacy ESSL where loop
  10081. // variable increments must be in either i++ or i += const-expr.
  10082. // Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0.
  10083. if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs))
  10084. statement(lhs, " = ", rhs, ";");
  10085. }
  10086. register_write(lhs_expression);
  10087. }
  10088. }
  10089. uint32_t CompilerGLSL::get_integer_width_for_instruction(const Instruction &instr) const
  10090. {
  10091. if (instr.length < 3)
  10092. return 32;
  10093. auto *ops = stream(instr);
  10094. switch (instr.op)
  10095. {
  10096. case OpSConvert:
  10097. case OpConvertSToF:
  10098. case OpUConvert:
  10099. case OpConvertUToF:
  10100. case OpIEqual:
  10101. case OpINotEqual:
  10102. case OpSLessThan:
  10103. case OpSLessThanEqual:
  10104. case OpSGreaterThan:
  10105. case OpSGreaterThanEqual:
  10106. case OpULessThan:
  10107. case OpULessThanEqual:
  10108. case OpUGreaterThan:
  10109. case OpUGreaterThanEqual:
  10110. return expression_type(ops[2]).width;
  10111. default:
  10112. {
  10113. // We can look at result type which is more robust.
  10114. auto *type = maybe_get<SPIRType>(ops[0]);
  10115. if (type && type_is_integral(*type))
  10116. return type->width;
  10117. else
  10118. return 32;
  10119. }
  10120. }
  10121. }
  10122. uint32_t CompilerGLSL::get_integer_width_for_glsl_instruction(GLSLstd450 op, const uint32_t *ops, uint32_t length) const
  10123. {
  10124. if (length < 1)
  10125. return 32;
  10126. switch (op)
  10127. {
  10128. case GLSLstd450SAbs:
  10129. case GLSLstd450SSign:
  10130. case GLSLstd450UMin:
  10131. case GLSLstd450SMin:
  10132. case GLSLstd450UMax:
  10133. case GLSLstd450SMax:
  10134. case GLSLstd450UClamp:
  10135. case GLSLstd450SClamp:
  10136. case GLSLstd450FindSMsb:
  10137. case GLSLstd450FindUMsb:
  10138. return expression_type(ops[0]).width;
  10139. default:
  10140. {
  10141. // We don't need to care about other opcodes, just return 32.
  10142. return 32;
  10143. }
  10144. }
  10145. }
  10146. void CompilerGLSL::forward_relaxed_precision(uint32_t dst_id, const uint32_t *args, uint32_t length)
  10147. {
  10148. // Only GLSL supports RelaxedPrecision directly.
  10149. // We cannot implement this in HLSL or MSL because it is tied to the type system.
  10150. // In SPIR-V, everything must masquerade as 32-bit.
  10151. if (!backend.requires_relaxed_precision_analysis)
  10152. return;
  10153. auto input_precision = analyze_expression_precision(args, length);
  10154. // For expressions which are loaded or directly forwarded, we inherit mediump implicitly.
  10155. // For dst_id to be analyzed properly, it must inherit any relaxed precision decoration from src_id.
  10156. if (input_precision == Options::Mediump)
  10157. set_decoration(dst_id, DecorationRelaxedPrecision);
  10158. }
  10159. CompilerGLSL::Options::Precision CompilerGLSL::analyze_expression_precision(const uint32_t *args, uint32_t length) const
  10160. {
  10161. // Now, analyze the precision at which the arguments would run.
  10162. // GLSL rules are such that the precision used to evaluate an expression is equal to the highest precision
  10163. // for the inputs. Constants do not have inherent precision and do not contribute to this decision.
  10164. // If all inputs are constants, they inherit precision from outer expressions, including an l-value.
  10165. // In this case, we'll have to force a temporary for dst_id so that we can bind the constant expression with
  10166. // correct precision.
  10167. bool expression_has_highp = false;
  10168. bool expression_has_mediump = false;
  10169. for (uint32_t i = 0; i < length; i++)
  10170. {
  10171. uint32_t arg = args[i];
  10172. auto handle_type = ir.ids[arg].get_type();
  10173. if (handle_type == TypeConstant || handle_type == TypeConstantOp || handle_type == TypeUndef)
  10174. continue;
  10175. if (has_decoration(arg, DecorationRelaxedPrecision))
  10176. expression_has_mediump = true;
  10177. else
  10178. expression_has_highp = true;
  10179. }
  10180. if (expression_has_highp)
  10181. return Options::Highp;
  10182. else if (expression_has_mediump)
  10183. return Options::Mediump;
  10184. else
  10185. return Options::DontCare;
  10186. }
  10187. void CompilerGLSL::analyze_precision_requirements(uint32_t type_id, uint32_t dst_id, uint32_t *args, uint32_t length)
  10188. {
  10189. if (!backend.requires_relaxed_precision_analysis)
  10190. return;
  10191. auto &type = get<SPIRType>(type_id);
  10192. // RelaxedPrecision only applies to 32-bit values.
  10193. if (type.basetype != SPIRType::Float && type.basetype != SPIRType::Int && type.basetype != SPIRType::UInt)
  10194. return;
  10195. bool operation_is_highp = !has_decoration(dst_id, DecorationRelaxedPrecision);
  10196. auto input_precision = analyze_expression_precision(args, length);
  10197. if (input_precision == Options::DontCare)
  10198. {
  10199. consume_temporary_in_precision_context(type_id, dst_id, input_precision);
  10200. return;
  10201. }
  10202. // In SPIR-V and GLSL, the semantics are flipped for how relaxed precision is determined.
  10203. // In SPIR-V, the operation itself marks RelaxedPrecision, meaning that inputs can be truncated to 16-bit.
  10204. // However, if the expression is not, inputs must be expanded to 32-bit first,
  10205. // since the operation must run at high precision.
  10206. // This is the awkward part, because if we have mediump inputs, or expressions which derived from mediump,
  10207. // we might have to forcefully bind the source IDs to highp temporaries. This is done by clearing decorations
  10208. // and forcing temporaries. Similarly for mediump operations. We bind highp expressions to mediump variables.
  10209. if ((operation_is_highp && input_precision == Options::Mediump) ||
  10210. (!operation_is_highp && input_precision == Options::Highp))
  10211. {
  10212. auto precision = operation_is_highp ? Options::Highp : Options::Mediump;
  10213. for (uint32_t i = 0; i < length; i++)
  10214. {
  10215. // Rewrites the opcode so that we consume an ID in correct precision context.
  10216. // This is pretty hacky, but it's the most straight forward way of implementing this without adding
  10217. // lots of extra passes to rewrite all code blocks.
  10218. args[i] = consume_temporary_in_precision_context(expression_type_id(args[i]), args[i], precision);
  10219. }
  10220. }
  10221. }
  10222. // This is probably not exhaustive ...
  10223. static bool opcode_is_precision_sensitive_operation(Op op)
  10224. {
  10225. switch (op)
  10226. {
  10227. case OpFAdd:
  10228. case OpFSub:
  10229. case OpFMul:
  10230. case OpFNegate:
  10231. case OpIAdd:
  10232. case OpISub:
  10233. case OpIMul:
  10234. case OpSNegate:
  10235. case OpFMod:
  10236. case OpFDiv:
  10237. case OpFRem:
  10238. case OpSMod:
  10239. case OpSDiv:
  10240. case OpSRem:
  10241. case OpUMod:
  10242. case OpUDiv:
  10243. case OpVectorTimesMatrix:
  10244. case OpMatrixTimesVector:
  10245. case OpMatrixTimesMatrix:
  10246. case OpDPdx:
  10247. case OpDPdy:
  10248. case OpDPdxCoarse:
  10249. case OpDPdyCoarse:
  10250. case OpDPdxFine:
  10251. case OpDPdyFine:
  10252. case OpFwidth:
  10253. case OpFwidthCoarse:
  10254. case OpFwidthFine:
  10255. case OpVectorTimesScalar:
  10256. case OpMatrixTimesScalar:
  10257. case OpOuterProduct:
  10258. case OpFConvert:
  10259. case OpSConvert:
  10260. case OpUConvert:
  10261. case OpConvertSToF:
  10262. case OpConvertUToF:
  10263. case OpConvertFToU:
  10264. case OpConvertFToS:
  10265. return true;
  10266. default:
  10267. return false;
  10268. }
  10269. }
  10270. // Instructions which just load data but don't do any arithmetic operation should just inherit the decoration.
  10271. // SPIR-V doesn't require this, but it's somewhat implied it has to work this way, relaxed precision is only
  10272. // relevant when operating on the IDs, not when shuffling things around.
  10273. static bool opcode_is_precision_forwarding_instruction(Op op, uint32_t &arg_count)
  10274. {
  10275. switch (op)
  10276. {
  10277. case OpLoad:
  10278. case OpAccessChain:
  10279. case OpInBoundsAccessChain:
  10280. case OpCompositeExtract:
  10281. case OpVectorExtractDynamic:
  10282. case OpSampledImage:
  10283. case OpImage:
  10284. case OpCopyObject:
  10285. case OpImageRead:
  10286. case OpImageFetch:
  10287. case OpImageSampleImplicitLod:
  10288. case OpImageSampleProjImplicitLod:
  10289. case OpImageSampleDrefImplicitLod:
  10290. case OpImageSampleProjDrefImplicitLod:
  10291. case OpImageSampleExplicitLod:
  10292. case OpImageSampleProjExplicitLod:
  10293. case OpImageSampleDrefExplicitLod:
  10294. case OpImageSampleProjDrefExplicitLod:
  10295. case OpImageGather:
  10296. case OpImageDrefGather:
  10297. case OpImageSparseRead:
  10298. case OpImageSparseFetch:
  10299. case OpImageSparseSampleImplicitLod:
  10300. case OpImageSparseSampleProjImplicitLod:
  10301. case OpImageSparseSampleDrefImplicitLod:
  10302. case OpImageSparseSampleProjDrefImplicitLod:
  10303. case OpImageSparseSampleExplicitLod:
  10304. case OpImageSparseSampleProjExplicitLod:
  10305. case OpImageSparseSampleDrefExplicitLod:
  10306. case OpImageSparseSampleProjDrefExplicitLod:
  10307. case OpImageSparseGather:
  10308. case OpImageSparseDrefGather:
  10309. arg_count = 1;
  10310. return true;
  10311. case OpVectorShuffle:
  10312. arg_count = 2;
  10313. return true;
  10314. case OpCompositeConstruct:
  10315. return true;
  10316. default:
  10317. break;
  10318. }
  10319. return false;
  10320. }
  10321. CompilerGLSL::TemporaryCopy CompilerGLSL::handle_instruction_precision(const Instruction &instruction)
  10322. {
  10323. auto ops = stream_mutable(instruction);
  10324. auto opcode = static_cast<Op>(instruction.op);
  10325. uint32_t length = instruction.length;
  10326. if (backend.requires_relaxed_precision_analysis)
  10327. {
  10328. if (length > 2)
  10329. {
  10330. uint32_t forwarding_length = length - 2;
  10331. if (opcode_is_precision_sensitive_operation(opcode))
  10332. analyze_precision_requirements(ops[0], ops[1], &ops[2], forwarding_length);
  10333. else if (opcode == OpExtInst && length >= 5 && get<SPIRExtension>(ops[2]).ext == SPIRExtension::GLSL)
  10334. analyze_precision_requirements(ops[0], ops[1], &ops[4], forwarding_length - 2);
  10335. else if (opcode_is_precision_forwarding_instruction(opcode, forwarding_length))
  10336. forward_relaxed_precision(ops[1], &ops[2], forwarding_length);
  10337. }
  10338. uint32_t result_type = 0, result_id = 0;
  10339. if (instruction_to_result_type(result_type, result_id, opcode, ops, length))
  10340. {
  10341. auto itr = temporary_to_mirror_precision_alias.find(ops[1]);
  10342. if (itr != temporary_to_mirror_precision_alias.end())
  10343. return { itr->second, itr->first };
  10344. }
  10345. }
  10346. return {};
  10347. }
  10348. void CompilerGLSL::emit_instruction(const Instruction &instruction)
  10349. {
  10350. auto ops = stream(instruction);
  10351. auto opcode = static_cast<Op>(instruction.op);
  10352. uint32_t length = instruction.length;
  10353. #define GLSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op)
  10354. #define GLSL_BOP_CAST(op, type) \
  10355. emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, \
  10356. opcode_is_sign_invariant(opcode), implicit_integer_promotion)
  10357. #define GLSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op)
  10358. #define GLSL_UOP_CAST(op) emit_unary_op_cast(ops[0], ops[1], ops[2], #op)
  10359. #define GLSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op)
  10360. #define GLSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op)
  10361. #define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
  10362. #define GLSL_BFOP_CAST(op, type) \
  10363. emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
  10364. #define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
  10365. #define GLSL_UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op)
  10366. // If we need to do implicit bitcasts, make sure we do it with the correct type.
  10367. uint32_t integer_width = get_integer_width_for_instruction(instruction);
  10368. auto int_type = to_signed_basetype(integer_width);
  10369. auto uint_type = to_unsigned_basetype(integer_width);
  10370. // Handle C implicit integer promotion rules.
  10371. // If we get implicit promotion to int, need to make sure we cast by value to intended return type,
  10372. // otherwise, future sign-dependent operations and bitcasts will break.
  10373. bool implicit_integer_promotion = integer_width < 32 && backend.implicit_c_integer_promotion_rules &&
  10374. opcode_can_promote_integer_implicitly(opcode) &&
  10375. get<SPIRType>(ops[0]).vecsize == 1;
  10376. opcode = get_remapped_spirv_op(opcode);
  10377. switch (opcode)
  10378. {
  10379. // Dealing with memory
  10380. case OpLoad:
  10381. {
  10382. uint32_t result_type = ops[0];
  10383. uint32_t id = ops[1];
  10384. uint32_t ptr = ops[2];
  10385. flush_variable_declaration(ptr);
  10386. // If we're loading from memory that cannot be changed by the shader,
  10387. // just forward the expression directly to avoid needless temporaries.
  10388. // If an expression is mutable and forwardable, we speculate that it is immutable.
  10389. bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries);
  10390. // If loading a non-native row-major matrix, mark the expression as need_transpose.
  10391. bool need_transpose = false;
  10392. bool old_need_transpose = false;
  10393. auto *ptr_expression = maybe_get<SPIRExpression>(ptr);
  10394. if (forward)
  10395. {
  10396. // If we're forwarding the load, we're also going to forward transpose state, so don't transpose while
  10397. // taking the expression.
  10398. if (ptr_expression && ptr_expression->need_transpose)
  10399. {
  10400. old_need_transpose = true;
  10401. ptr_expression->need_transpose = false;
  10402. need_transpose = true;
  10403. }
  10404. else if (is_non_native_row_major_matrix(ptr))
  10405. need_transpose = true;
  10406. }
  10407. // If we are forwarding this load,
  10408. // don't register the read to access chain here, defer that to when we actually use the expression,
  10409. // using the add_implied_read_expression mechanism.
  10410. string expr;
  10411. bool is_packed = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked);
  10412. bool is_remapped = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID);
  10413. if (forward || (!is_packed && !is_remapped))
  10414. {
  10415. // For the simple case, we do not need to deal with repacking.
  10416. expr = to_dereferenced_expression(ptr, false);
  10417. }
  10418. else
  10419. {
  10420. // If we are not forwarding the expression, we need to unpack and resolve any physical type remapping here before
  10421. // storing the expression to a temporary.
  10422. expr = to_unpacked_expression(ptr);
  10423. }
  10424. auto &type = get<SPIRType>(result_type);
  10425. auto &expr_type = expression_type(ptr);
  10426. // If the expression has more vector components than the result type, insert
  10427. // a swizzle. This shouldn't happen normally on valid SPIR-V, but it might
  10428. // happen with e.g. the MSL backend replacing the type of an input variable.
  10429. if (expr_type.vecsize > type.vecsize)
  10430. expr = enclose_expression(expr + vector_swizzle(type.vecsize, 0));
  10431. if (forward && ptr_expression)
  10432. ptr_expression->need_transpose = old_need_transpose;
  10433. // We might need to cast in order to load from a builtin.
  10434. cast_from_variable_load(ptr, expr, type);
  10435. if (forward && ptr_expression)
  10436. ptr_expression->need_transpose = false;
  10437. // We might be trying to load a gl_Position[N], where we should be
  10438. // doing float4[](gl_in[i].gl_Position, ...) instead.
  10439. // Similar workarounds are required for input arrays in tessellation.
  10440. // Also, loading from gl_SampleMask array needs special unroll.
  10441. unroll_array_from_complex_load(id, ptr, expr);
  10442. if (!type_is_opaque_value(type) && has_decoration(ptr, DecorationNonUniform))
  10443. {
  10444. // If we're loading something non-opaque, we need to handle non-uniform descriptor access.
  10445. convert_non_uniform_expression(expr, ptr);
  10446. }
  10447. if (forward && ptr_expression)
  10448. ptr_expression->need_transpose = old_need_transpose;
  10449. bool flattened = ptr_expression && flattened_buffer_blocks.count(ptr_expression->loaded_from) != 0;
  10450. if (backend.needs_row_major_load_workaround && !is_non_native_row_major_matrix(ptr) && !flattened)
  10451. rewrite_load_for_wrapped_row_major(expr, result_type, ptr);
  10452. // By default, suppress usage tracking since using same expression multiple times does not imply any extra work.
  10453. // However, if we try to load a complex, composite object from a flattened buffer,
  10454. // we should avoid emitting the same code over and over and lower the result to a temporary.
  10455. bool usage_tracking = flattened && (type.basetype == SPIRType::Struct || (type.columns > 1));
  10456. SPIRExpression *e = nullptr;
  10457. if (!forward && expression_is_non_value_type_array(ptr))
  10458. {
  10459. // Complicated load case where we need to make a copy of ptr, but we cannot, because
  10460. // it is an array, and our backend does not support arrays as value types.
  10461. // Emit the temporary, and copy it explicitly.
  10462. e = &emit_uninitialized_temporary_expression(result_type, id);
  10463. emit_array_copy(nullptr, id, ptr, StorageClassFunction, get_expression_effective_storage_class(ptr));
  10464. }
  10465. else
  10466. e = &emit_op(result_type, id, expr, forward, !usage_tracking);
  10467. e->need_transpose = need_transpose;
  10468. register_read(id, ptr, forward);
  10469. if (forward)
  10470. {
  10471. // Pass through whether the result is of a packed type and the physical type ID.
  10472. if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked))
  10473. set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
  10474. if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID))
  10475. {
  10476. set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID,
  10477. get_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID));
  10478. }
  10479. }
  10480. else
  10481. {
  10482. // This might have been set on an earlier compilation iteration, force it to be unset.
  10483. unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
  10484. unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
  10485. }
  10486. inherit_expression_dependencies(id, ptr);
  10487. if (forward)
  10488. add_implied_read_expression(*e, ptr);
  10489. break;
  10490. }
  10491. case OpInBoundsAccessChain:
  10492. case OpAccessChain:
  10493. case OpPtrAccessChain:
  10494. {
  10495. auto *var = maybe_get<SPIRVariable>(ops[2]);
  10496. if (var)
  10497. flush_variable_declaration(var->self);
  10498. // If the base is immutable, the access chain pointer must also be.
  10499. // If an expression is mutable and forwardable, we speculate that it is immutable.
  10500. AccessChainMeta meta;
  10501. bool ptr_chain = opcode == OpPtrAccessChain;
  10502. auto &target_type = get<SPIRType>(ops[0]);
  10503. auto e = access_chain(ops[2], &ops[3], length - 3, target_type, &meta, ptr_chain);
  10504. // If the base is flattened UBO of struct type, the expression has to be a composite.
  10505. // In that case, backends which do not support inline syntax need it to be bound to a temporary.
  10506. // Otherwise, invalid expressions like ({UBO[0].xyz, UBO[0].w, UBO[1]}).member are emitted.
  10507. bool requires_temporary = false;
  10508. if (flattened_buffer_blocks.count(ops[2]) && target_type.basetype == SPIRType::Struct)
  10509. requires_temporary = !backend.can_declare_struct_inline;
  10510. auto &expr = requires_temporary ?
  10511. emit_op(ops[0], ops[1], std::move(e), false) :
  10512. set<SPIRExpression>(ops[1], std::move(e), ops[0], should_forward(ops[2]));
  10513. auto *backing_variable = maybe_get_backing_variable(ops[2]);
  10514. expr.loaded_from = backing_variable ? backing_variable->self : ID(ops[2]);
  10515. expr.need_transpose = meta.need_transpose;
  10516. expr.access_chain = true;
  10517. expr.access_meshlet_position_y = meta.access_meshlet_position_y;
  10518. // Mark the result as being packed. Some platforms handled packed vectors differently than non-packed.
  10519. if (meta.storage_is_packed)
  10520. set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypePacked);
  10521. if (meta.storage_physical_type != 0)
  10522. set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type);
  10523. if (meta.storage_is_invariant)
  10524. set_decoration(ops[1], DecorationInvariant);
  10525. if (meta.flattened_struct)
  10526. flattened_structs[ops[1]] = true;
  10527. if (meta.relaxed_precision && backend.requires_relaxed_precision_analysis)
  10528. set_decoration(ops[1], DecorationRelaxedPrecision);
  10529. // If we have some expression dependencies in our access chain, this access chain is technically a forwarded
  10530. // temporary which could be subject to invalidation.
  10531. // Need to assume we're forwarded while calling inherit_expression_depdendencies.
  10532. forwarded_temporaries.insert(ops[1]);
  10533. // The access chain itself is never forced to a temporary, but its dependencies might.
  10534. suppressed_usage_tracking.insert(ops[1]);
  10535. for (uint32_t i = 2; i < length; i++)
  10536. {
  10537. inherit_expression_dependencies(ops[1], ops[i]);
  10538. add_implied_read_expression(expr, ops[i]);
  10539. }
  10540. // If we have no dependencies after all, i.e., all indices in the access chain are immutable temporaries,
  10541. // we're not forwarded after all.
  10542. if (expr.expression_dependencies.empty())
  10543. forwarded_temporaries.erase(ops[1]);
  10544. break;
  10545. }
  10546. case OpStore:
  10547. {
  10548. auto *var = maybe_get<SPIRVariable>(ops[0]);
  10549. if (var && var->statically_assigned)
  10550. var->static_expression = ops[1];
  10551. else if (var && var->loop_variable && !var->loop_variable_enable)
  10552. var->static_expression = ops[1];
  10553. else if (var && var->remapped_variable && var->static_expression)
  10554. {
  10555. // Skip the write.
  10556. }
  10557. else if (flattened_structs.count(ops[0]))
  10558. {
  10559. store_flattened_struct(ops[0], ops[1]);
  10560. register_write(ops[0]);
  10561. }
  10562. else
  10563. {
  10564. emit_store_statement(ops[0], ops[1]);
  10565. }
  10566. // Storing a pointer results in a variable pointer, so we must conservatively assume
  10567. // we can write through it.
  10568. if (expression_type(ops[1]).pointer)
  10569. register_write(ops[1]);
  10570. break;
  10571. }
  10572. case OpArrayLength:
  10573. {
  10574. uint32_t result_type = ops[0];
  10575. uint32_t id = ops[1];
  10576. auto e = access_chain_internal(ops[2], &ops[3], length - 3, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
  10577. if (has_decoration(ops[2], DecorationNonUniform))
  10578. convert_non_uniform_expression(e, ops[2]);
  10579. set<SPIRExpression>(id, join(type_to_glsl(get<SPIRType>(result_type)), "(", e, ".length())"), result_type,
  10580. true);
  10581. break;
  10582. }
  10583. // Function calls
  10584. case OpFunctionCall:
  10585. {
  10586. uint32_t result_type = ops[0];
  10587. uint32_t id = ops[1];
  10588. uint32_t func = ops[2];
  10589. const auto *arg = &ops[3];
  10590. length -= 3;
  10591. auto &callee = get<SPIRFunction>(func);
  10592. auto &return_type = get<SPIRType>(callee.return_type);
  10593. bool pure = function_is_pure(callee);
  10594. bool callee_has_out_variables = false;
  10595. bool emit_return_value_as_argument = false;
  10596. // Invalidate out variables passed to functions since they can be OpStore'd to.
  10597. for (uint32_t i = 0; i < length; i++)
  10598. {
  10599. if (callee.arguments[i].write_count)
  10600. {
  10601. register_call_out_argument(arg[i]);
  10602. callee_has_out_variables = true;
  10603. }
  10604. flush_variable_declaration(arg[i]);
  10605. }
  10606. if (!return_type.array.empty() && !backend.can_return_array)
  10607. {
  10608. callee_has_out_variables = true;
  10609. emit_return_value_as_argument = true;
  10610. }
  10611. if (!pure)
  10612. register_impure_function_call();
  10613. string funexpr;
  10614. SmallVector<string> arglist;
  10615. funexpr += to_name(func) + "(";
  10616. if (emit_return_value_as_argument)
  10617. {
  10618. statement(type_to_glsl(return_type), " ", to_name(id), type_to_array_glsl(return_type), ";");
  10619. arglist.push_back(to_name(id));
  10620. }
  10621. for (uint32_t i = 0; i < length; i++)
  10622. {
  10623. // Do not pass in separate images or samplers if we're remapping
  10624. // to combined image samplers.
  10625. if (skip_argument(arg[i]))
  10626. continue;
  10627. arglist.push_back(to_func_call_arg(callee.arguments[i], arg[i]));
  10628. }
  10629. for (auto &combined : callee.combined_parameters)
  10630. {
  10631. auto image_id = combined.global_image ? combined.image_id : VariableID(arg[combined.image_id]);
  10632. auto sampler_id = combined.global_sampler ? combined.sampler_id : VariableID(arg[combined.sampler_id]);
  10633. arglist.push_back(to_combined_image_sampler(image_id, sampler_id));
  10634. }
  10635. append_global_func_args(callee, length, arglist);
  10636. funexpr += merge(arglist);
  10637. funexpr += ")";
  10638. // Check for function call constraints.
  10639. check_function_call_constraints(arg, length);
  10640. if (return_type.basetype != SPIRType::Void)
  10641. {
  10642. // If the function actually writes to an out variable,
  10643. // take the conservative route and do not forward.
  10644. // The problem is that we might not read the function
  10645. // result (and emit the function) before an out variable
  10646. // is read (common case when return value is ignored!
  10647. // In order to avoid start tracking invalid variables,
  10648. // just avoid the forwarding problem altogether.
  10649. bool forward = args_will_forward(id, arg, length, pure) && !callee_has_out_variables && pure &&
  10650. (forced_temporaries.find(id) == end(forced_temporaries));
  10651. if (emit_return_value_as_argument)
  10652. {
  10653. statement(funexpr, ";");
  10654. set<SPIRExpression>(id, to_name(id), result_type, true);
  10655. }
  10656. else
  10657. emit_op(result_type, id, funexpr, forward);
  10658. // Function calls are implicit loads from all variables in question.
  10659. // Set dependencies for them.
  10660. for (uint32_t i = 0; i < length; i++)
  10661. register_read(id, arg[i], forward);
  10662. // If we're going to forward the temporary result,
  10663. // put dependencies on every variable that must not change.
  10664. if (forward)
  10665. register_global_read_dependencies(callee, id);
  10666. }
  10667. else
  10668. statement(funexpr, ";");
  10669. break;
  10670. }
  10671. // Composite munging
  10672. case OpCompositeConstruct:
  10673. {
  10674. uint32_t result_type = ops[0];
  10675. uint32_t id = ops[1];
  10676. const auto *const elems = &ops[2];
  10677. length -= 2;
  10678. bool forward = true;
  10679. for (uint32_t i = 0; i < length; i++)
  10680. forward = forward && should_forward(elems[i]);
  10681. auto &out_type = get<SPIRType>(result_type);
  10682. auto *in_type = length > 0 ? &expression_type(elems[0]) : nullptr;
  10683. // Only splat if we have vector constructors.
  10684. // Arrays and structs must be initialized properly in full.
  10685. bool composite = !out_type.array.empty() || out_type.basetype == SPIRType::Struct;
  10686. bool splat = false;
  10687. bool swizzle_splat = false;
  10688. if (in_type)
  10689. {
  10690. splat = in_type->vecsize == 1 && in_type->columns == 1 && !composite && backend.use_constructor_splatting;
  10691. swizzle_splat = in_type->vecsize == 1 && in_type->columns == 1 && backend.can_swizzle_scalar;
  10692. if (ir.ids[elems[0]].get_type() == TypeConstant && !type_is_floating_point(*in_type))
  10693. {
  10694. // Cannot swizzle literal integers as a special case.
  10695. swizzle_splat = false;
  10696. }
  10697. }
  10698. if (splat || swizzle_splat)
  10699. {
  10700. uint32_t input = elems[0];
  10701. for (uint32_t i = 0; i < length; i++)
  10702. {
  10703. if (input != elems[i])
  10704. {
  10705. splat = false;
  10706. swizzle_splat = false;
  10707. }
  10708. }
  10709. }
  10710. if (out_type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline)
  10711. forward = false;
  10712. if (!out_type.array.empty() && !backend.can_declare_arrays_inline)
  10713. forward = false;
  10714. if (type_is_empty(out_type) && !backend.supports_empty_struct)
  10715. forward = false;
  10716. string constructor_op;
  10717. if (backend.use_initializer_list && composite)
  10718. {
  10719. bool needs_trailing_tracket = false;
  10720. // Only use this path if we are building composites.
  10721. // This path cannot be used for arithmetic.
  10722. if (backend.use_typed_initializer_list && out_type.basetype == SPIRType::Struct && out_type.array.empty())
  10723. constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type));
  10724. else if (backend.use_typed_initializer_list && backend.array_is_value_type && !out_type.array.empty())
  10725. {
  10726. // MSL path. Array constructor is baked into type here, do not use _constructor variant.
  10727. constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type)) + "(";
  10728. needs_trailing_tracket = true;
  10729. }
  10730. constructor_op += "{ ";
  10731. if (type_is_empty(out_type) && !backend.supports_empty_struct)
  10732. constructor_op += "0";
  10733. else if (splat)
  10734. constructor_op += to_unpacked_expression(elems[0]);
  10735. else
  10736. constructor_op += build_composite_combiner(result_type, elems, length);
  10737. constructor_op += " }";
  10738. if (needs_trailing_tracket)
  10739. constructor_op += ")";
  10740. }
  10741. else if (swizzle_splat && !composite)
  10742. {
  10743. constructor_op = remap_swizzle(get<SPIRType>(result_type), 1, to_unpacked_expression(elems[0]));
  10744. }
  10745. else
  10746. {
  10747. constructor_op = type_to_glsl_constructor(get<SPIRType>(result_type)) + "(";
  10748. if (type_is_empty(out_type) && !backend.supports_empty_struct)
  10749. constructor_op += "0";
  10750. else if (splat)
  10751. constructor_op += to_unpacked_expression(elems[0]);
  10752. else
  10753. constructor_op += build_composite_combiner(result_type, elems, length);
  10754. constructor_op += ")";
  10755. }
  10756. if (!constructor_op.empty())
  10757. {
  10758. emit_op(result_type, id, constructor_op, forward);
  10759. for (uint32_t i = 0; i < length; i++)
  10760. inherit_expression_dependencies(id, elems[i]);
  10761. }
  10762. break;
  10763. }
  10764. case OpVectorInsertDynamic:
  10765. {
  10766. uint32_t result_type = ops[0];
  10767. uint32_t id = ops[1];
  10768. uint32_t vec = ops[2];
  10769. uint32_t comp = ops[3];
  10770. uint32_t index = ops[4];
  10771. flush_variable_declaration(vec);
  10772. // Make a copy, then use access chain to store the variable.
  10773. statement(declare_temporary(result_type, id), to_expression(vec), ";");
  10774. set<SPIRExpression>(id, to_name(id), result_type, true);
  10775. auto chain = access_chain_internal(id, &index, 1, 0, nullptr);
  10776. statement(chain, " = ", to_unpacked_expression(comp), ";");
  10777. break;
  10778. }
  10779. case OpVectorExtractDynamic:
  10780. {
  10781. uint32_t result_type = ops[0];
  10782. uint32_t id = ops[1];
  10783. auto expr = access_chain_internal(ops[2], &ops[3], 1, 0, nullptr);
  10784. emit_op(result_type, id, expr, should_forward(ops[2]));
  10785. inherit_expression_dependencies(id, ops[2]);
  10786. inherit_expression_dependencies(id, ops[3]);
  10787. break;
  10788. }
  10789. case OpCompositeExtract:
  10790. {
  10791. uint32_t result_type = ops[0];
  10792. uint32_t id = ops[1];
  10793. length -= 3;
  10794. auto &type = get<SPIRType>(result_type);
  10795. // We can only split the expression here if our expression is forwarded as a temporary.
  10796. bool allow_base_expression = forced_temporaries.find(id) == end(forced_temporaries);
  10797. // Do not allow base expression for struct members. We risk doing "swizzle" optimizations in this case.
  10798. auto &composite_type = expression_type(ops[2]);
  10799. bool composite_type_is_complex = composite_type.basetype == SPIRType::Struct || !composite_type.array.empty();
  10800. if (composite_type_is_complex)
  10801. allow_base_expression = false;
  10802. // Packed expressions or physical ID mapped expressions cannot be split up.
  10803. if (has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypePacked) ||
  10804. has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypeID))
  10805. allow_base_expression = false;
  10806. // Cannot use base expression for row-major matrix row-extraction since we need to interleave access pattern
  10807. // into the base expression.
  10808. if (is_non_native_row_major_matrix(ops[2]))
  10809. allow_base_expression = false;
  10810. AccessChainMeta meta;
  10811. SPIRExpression *e = nullptr;
  10812. auto *c = maybe_get<SPIRConstant>(ops[2]);
  10813. if (c && !c->specialization && !composite_type_is_complex)
  10814. {
  10815. auto expr = to_extract_constant_composite_expression(result_type, *c, ops + 3, length);
  10816. e = &emit_op(result_type, id, expr, true, true);
  10817. }
  10818. else if (allow_base_expression && should_forward(ops[2]) && type.vecsize == 1 && type.columns == 1 && length == 1)
  10819. {
  10820. // Only apply this optimization if result is scalar.
  10821. // We want to split the access chain from the base.
  10822. // This is so we can later combine different CompositeExtract results
  10823. // with CompositeConstruct without emitting code like
  10824. //
  10825. // vec3 temp = texture(...).xyz
  10826. // vec4(temp.x, temp.y, temp.z, 1.0).
  10827. //
  10828. // when we actually wanted to emit this
  10829. // vec4(texture(...).xyz, 1.0).
  10830. //
  10831. // Including the base will prevent this and would trigger multiple reads
  10832. // from expression causing it to be forced to an actual temporary in GLSL.
  10833. auto expr = access_chain_internal(ops[2], &ops[3], length,
  10834. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_CHAIN_ONLY_BIT |
  10835. ACCESS_CHAIN_FORCE_COMPOSITE_BIT, &meta);
  10836. e = &emit_op(result_type, id, expr, true, should_suppress_usage_tracking(ops[2]));
  10837. inherit_expression_dependencies(id, ops[2]);
  10838. e->base_expression = ops[2];
  10839. if (meta.relaxed_precision && backend.requires_relaxed_precision_analysis)
  10840. set_decoration(ops[1], DecorationRelaxedPrecision);
  10841. }
  10842. else
  10843. {
  10844. auto expr = access_chain_internal(ops[2], &ops[3], length,
  10845. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_FORCE_COMPOSITE_BIT, &meta);
  10846. e = &emit_op(result_type, id, expr, should_forward(ops[2]), should_suppress_usage_tracking(ops[2]));
  10847. inherit_expression_dependencies(id, ops[2]);
  10848. }
  10849. // Pass through some meta information to the loaded expression.
  10850. // We can still end up loading a buffer type to a variable, then CompositeExtract from it
  10851. // instead of loading everything through an access chain.
  10852. e->need_transpose = meta.need_transpose;
  10853. if (meta.storage_is_packed)
  10854. set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
  10855. if (meta.storage_physical_type != 0)
  10856. set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type);
  10857. if (meta.storage_is_invariant)
  10858. set_decoration(id, DecorationInvariant);
  10859. break;
  10860. }
  10861. case OpCompositeInsert:
  10862. {
  10863. uint32_t result_type = ops[0];
  10864. uint32_t id = ops[1];
  10865. uint32_t obj = ops[2];
  10866. uint32_t composite = ops[3];
  10867. const auto *elems = &ops[4];
  10868. length -= 4;
  10869. flush_variable_declaration(composite);
  10870. // CompositeInsert requires a copy + modification, but this is very awkward code in HLL.
  10871. // Speculate that the input composite is no longer used, and we can modify it in-place.
  10872. // There are various scenarios where this is not possible to satisfy.
  10873. bool can_modify_in_place = true;
  10874. forced_temporaries.insert(id);
  10875. // Cannot safely RMW PHI variables since they have no way to be invalidated,
  10876. // forcing temporaries is not going to help.
  10877. // This is similar for Constant and Undef inputs.
  10878. // The only safe thing to RMW is SPIRExpression.
  10879. // If the expression has already been used (i.e. used in a continue block), we have to keep using
  10880. // that loop variable, since we won't be able to override the expression after the fact.
  10881. // If the composite is hoisted, we might never be able to properly invalidate any usage
  10882. // of that composite in a subsequent loop iteration.
  10883. if (invalid_expressions.count(composite) ||
  10884. block_composite_insert_overwrite.count(composite) ||
  10885. hoisted_temporaries.count(id) || hoisted_temporaries.count(composite) ||
  10886. maybe_get<SPIRExpression>(composite) == nullptr)
  10887. {
  10888. can_modify_in_place = false;
  10889. }
  10890. else if (backend.requires_relaxed_precision_analysis &&
  10891. has_decoration(composite, DecorationRelaxedPrecision) !=
  10892. has_decoration(id, DecorationRelaxedPrecision) &&
  10893. get<SPIRType>(result_type).basetype != SPIRType::Struct)
  10894. {
  10895. // Similarly, if precision does not match for input and output,
  10896. // we cannot alias them. If we write a composite into a relaxed precision
  10897. // ID, we might get a false truncation.
  10898. can_modify_in_place = false;
  10899. }
  10900. if (can_modify_in_place)
  10901. {
  10902. // Have to make sure the modified SSA value is bound to a temporary so we can modify it in-place.
  10903. if (!forced_temporaries.count(composite))
  10904. force_temporary_and_recompile(composite);
  10905. auto chain = access_chain_internal(composite, elems, length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
  10906. statement(chain, " = ", to_unpacked_expression(obj), ";");
  10907. set<SPIRExpression>(id, to_expression(composite), result_type, true);
  10908. invalid_expressions.insert(composite);
  10909. composite_insert_overwritten.insert(composite);
  10910. }
  10911. else
  10912. {
  10913. if (maybe_get<SPIRUndef>(composite) != nullptr)
  10914. {
  10915. emit_uninitialized_temporary_expression(result_type, id);
  10916. }
  10917. else
  10918. {
  10919. // Make a copy, then use access chain to store the variable.
  10920. statement(declare_temporary(result_type, id), to_expression(composite), ";");
  10921. set<SPIRExpression>(id, to_name(id), result_type, true);
  10922. }
  10923. auto chain = access_chain_internal(id, elems, length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
  10924. statement(chain, " = ", to_unpacked_expression(obj), ";");
  10925. }
  10926. break;
  10927. }
  10928. case OpCopyMemory:
  10929. {
  10930. uint32_t lhs = ops[0];
  10931. uint32_t rhs = ops[1];
  10932. if (lhs != rhs)
  10933. {
  10934. uint32_t &tmp_id = extra_sub_expressions[instruction.offset | EXTRA_SUB_EXPRESSION_TYPE_STREAM_OFFSET];
  10935. if (!tmp_id)
  10936. tmp_id = ir.increase_bound_by(1);
  10937. uint32_t tmp_type_id = expression_type(rhs).parent_type;
  10938. EmbeddedInstruction fake_load, fake_store;
  10939. fake_load.op = OpLoad;
  10940. fake_load.length = 3;
  10941. fake_load.ops.push_back(tmp_type_id);
  10942. fake_load.ops.push_back(tmp_id);
  10943. fake_load.ops.push_back(rhs);
  10944. fake_store.op = OpStore;
  10945. fake_store.length = 2;
  10946. fake_store.ops.push_back(lhs);
  10947. fake_store.ops.push_back(tmp_id);
  10948. // Load and Store do a *lot* of workarounds, and we'd like to reuse them as much as possible.
  10949. // Synthesize a fake Load and Store pair for CopyMemory.
  10950. emit_instruction(fake_load);
  10951. emit_instruction(fake_store);
  10952. }
  10953. break;
  10954. }
  10955. case OpCopyLogical:
  10956. {
  10957. // This is used for copying object of different types, arrays and structs.
  10958. // We need to unroll the copy, element-by-element.
  10959. uint32_t result_type = ops[0];
  10960. uint32_t id = ops[1];
  10961. uint32_t rhs = ops[2];
  10962. emit_uninitialized_temporary_expression(result_type, id);
  10963. emit_copy_logical_type(id, result_type, rhs, expression_type_id(rhs), {});
  10964. break;
  10965. }
  10966. case OpCopyObject:
  10967. {
  10968. uint32_t result_type = ops[0];
  10969. uint32_t id = ops[1];
  10970. uint32_t rhs = ops[2];
  10971. bool pointer = get<SPIRType>(result_type).pointer;
  10972. auto *chain = maybe_get<SPIRAccessChain>(rhs);
  10973. auto *imgsamp = maybe_get<SPIRCombinedImageSampler>(rhs);
  10974. if (chain)
  10975. {
  10976. // Cannot lower to a SPIRExpression, just copy the object.
  10977. auto &e = set<SPIRAccessChain>(id, *chain);
  10978. e.self = id;
  10979. }
  10980. else if (imgsamp)
  10981. {
  10982. // Cannot lower to a SPIRExpression, just copy the object.
  10983. // GLSL does not currently use this type and will never get here, but MSL does.
  10984. // Handled here instead of CompilerMSL for better integration and general handling,
  10985. // and in case GLSL or other subclasses require it in the future.
  10986. auto &e = set<SPIRCombinedImageSampler>(id, *imgsamp);
  10987. e.self = id;
  10988. }
  10989. else if (expression_is_lvalue(rhs) && !pointer)
  10990. {
  10991. // Need a copy.
  10992. // For pointer types, we copy the pointer itself.
  10993. emit_op(result_type, id, to_unpacked_expression(rhs), false);
  10994. }
  10995. else
  10996. {
  10997. // RHS expression is immutable, so just forward it.
  10998. // Copying these things really make no sense, but
  10999. // seems to be allowed anyways.
  11000. auto &e = emit_op(result_type, id, to_expression(rhs), true, true);
  11001. if (pointer)
  11002. {
  11003. auto *var = maybe_get_backing_variable(rhs);
  11004. e.loaded_from = var ? var->self : ID(0);
  11005. }
  11006. // If we're copying an access chain, need to inherit the read expressions.
  11007. auto *rhs_expr = maybe_get<SPIRExpression>(rhs);
  11008. if (rhs_expr)
  11009. {
  11010. e.implied_read_expressions = rhs_expr->implied_read_expressions;
  11011. e.expression_dependencies = rhs_expr->expression_dependencies;
  11012. }
  11013. }
  11014. break;
  11015. }
  11016. case OpVectorShuffle:
  11017. {
  11018. uint32_t result_type = ops[0];
  11019. uint32_t id = ops[1];
  11020. uint32_t vec0 = ops[2];
  11021. uint32_t vec1 = ops[3];
  11022. const auto *elems = &ops[4];
  11023. length -= 4;
  11024. auto &type0 = expression_type(vec0);
  11025. // If we have the undefined swizzle index -1, we need to swizzle in undefined data,
  11026. // or in our case, T(0).
  11027. bool shuffle = false;
  11028. for (uint32_t i = 0; i < length; i++)
  11029. if (elems[i] >= type0.vecsize || elems[i] == 0xffffffffu)
  11030. shuffle = true;
  11031. // Cannot use swizzles with packed expressions, force shuffle path.
  11032. if (!shuffle && has_extended_decoration(vec0, SPIRVCrossDecorationPhysicalTypePacked))
  11033. shuffle = true;
  11034. string expr;
  11035. bool should_fwd, trivial_forward;
  11036. if (shuffle)
  11037. {
  11038. should_fwd = should_forward(vec0) && should_forward(vec1);
  11039. trivial_forward = should_suppress_usage_tracking(vec0) && should_suppress_usage_tracking(vec1);
  11040. // Constructor style and shuffling from two different vectors.
  11041. SmallVector<string> args;
  11042. for (uint32_t i = 0; i < length; i++)
  11043. {
  11044. if (elems[i] == 0xffffffffu)
  11045. {
  11046. // Use a constant 0 here.
  11047. // We could use the first component or similar, but then we risk propagating
  11048. // a value we might not need, and bog down codegen.
  11049. SPIRConstant c;
  11050. c.constant_type = type0.parent_type;
  11051. assert(type0.parent_type != ID(0));
  11052. args.push_back(constant_expression(c));
  11053. }
  11054. else if (elems[i] >= type0.vecsize)
  11055. args.push_back(to_extract_component_expression(vec1, elems[i] - type0.vecsize));
  11056. else
  11057. args.push_back(to_extract_component_expression(vec0, elems[i]));
  11058. }
  11059. expr += join(type_to_glsl_constructor(get<SPIRType>(result_type)), "(", merge(args), ")");
  11060. }
  11061. else
  11062. {
  11063. should_fwd = should_forward(vec0);
  11064. trivial_forward = should_suppress_usage_tracking(vec0);
  11065. // We only source from first vector, so can use swizzle.
  11066. // If the vector is packed, unpack it before applying a swizzle (needed for MSL)
  11067. expr += to_enclosed_unpacked_expression(vec0);
  11068. expr += ".";
  11069. for (uint32_t i = 0; i < length; i++)
  11070. {
  11071. assert(elems[i] != 0xffffffffu);
  11072. expr += index_to_swizzle(elems[i]);
  11073. }
  11074. if (backend.swizzle_is_function && length > 1)
  11075. expr += "()";
  11076. }
  11077. // A shuffle is trivial in that it doesn't actually *do* anything.
  11078. // We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed.
  11079. emit_op(result_type, id, expr, should_fwd, trivial_forward);
  11080. inherit_expression_dependencies(id, vec0);
  11081. if (vec0 != vec1)
  11082. inherit_expression_dependencies(id, vec1);
  11083. break;
  11084. }
  11085. // ALU
  11086. case OpIsNan:
  11087. if (!is_legacy())
  11088. GLSL_UFOP(isnan);
  11089. else
  11090. {
  11091. // Check if the number doesn't equal itself
  11092. auto &type = get<SPIRType>(ops[0]);
  11093. if (type.vecsize > 1)
  11094. emit_binary_func_op(ops[0], ops[1], ops[2], ops[2], "notEqual");
  11095. else
  11096. emit_binary_op(ops[0], ops[1], ops[2], ops[2], "!=");
  11097. }
  11098. break;
  11099. case OpIsInf:
  11100. if (!is_legacy())
  11101. GLSL_UFOP(isinf);
  11102. else
  11103. {
  11104. // inf * 2 == inf by IEEE 754 rules, note this also applies to 0.0
  11105. // This is more reliable than checking if product with zero is NaN
  11106. uint32_t result_type = ops[0];
  11107. uint32_t result_id = ops[1];
  11108. uint32_t operand = ops[2];
  11109. auto &type = get<SPIRType>(result_type);
  11110. std::string expr;
  11111. if (type.vecsize > 1)
  11112. {
  11113. expr = type_to_glsl_constructor(type);
  11114. expr += '(';
  11115. for (uint32_t i = 0; i < type.vecsize; i++)
  11116. {
  11117. auto comp = to_extract_component_expression(operand, i);
  11118. expr += join(comp, " != 0.0 && 2.0 * ", comp, " == ", comp);
  11119. if (i + 1 < type.vecsize)
  11120. expr += ", ";
  11121. }
  11122. expr += ')';
  11123. }
  11124. else
  11125. {
  11126. // Register an extra read to force writing out a temporary
  11127. auto oper = to_enclosed_expression(operand);
  11128. track_expression_read(operand);
  11129. expr += join(oper, " != 0.0 && 2.0 * ", oper, " == ", oper);
  11130. }
  11131. emit_op(result_type, result_id, expr, should_forward(operand));
  11132. inherit_expression_dependencies(result_id, operand);
  11133. }
  11134. break;
  11135. case OpSNegate:
  11136. if (implicit_integer_promotion || expression_type_id(ops[2]) != ops[0])
  11137. GLSL_UOP_CAST(-);
  11138. else
  11139. GLSL_UOP(-);
  11140. break;
  11141. case OpFNegate:
  11142. GLSL_UOP(-);
  11143. break;
  11144. case OpIAdd:
  11145. {
  11146. // For simple arith ops, prefer the output type if there's a mismatch to avoid extra bitcasts.
  11147. auto type = get<SPIRType>(ops[0]).basetype;
  11148. GLSL_BOP_CAST(+, type);
  11149. break;
  11150. }
  11151. case OpFAdd:
  11152. GLSL_BOP(+);
  11153. break;
  11154. case OpISub:
  11155. {
  11156. auto type = get<SPIRType>(ops[0]).basetype;
  11157. GLSL_BOP_CAST(-, type);
  11158. break;
  11159. }
  11160. case OpFSub:
  11161. GLSL_BOP(-);
  11162. break;
  11163. case OpIMul:
  11164. {
  11165. auto type = get<SPIRType>(ops[0]).basetype;
  11166. GLSL_BOP_CAST(*, type);
  11167. break;
  11168. }
  11169. case OpVectorTimesMatrix:
  11170. case OpMatrixTimesVector:
  11171. {
  11172. // If the matrix needs transpose, just flip the multiply order.
  11173. auto *e = maybe_get<SPIRExpression>(ops[opcode == OpMatrixTimesVector ? 2 : 3]);
  11174. if (e && e->need_transpose)
  11175. {
  11176. e->need_transpose = false;
  11177. string expr;
  11178. if (opcode == OpMatrixTimesVector)
  11179. expr = join(to_enclosed_unpacked_expression(ops[3]), " * ",
  11180. enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])));
  11181. else
  11182. expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ",
  11183. to_enclosed_unpacked_expression(ops[2]));
  11184. bool forward = should_forward(ops[2]) && should_forward(ops[3]);
  11185. emit_op(ops[0], ops[1], expr, forward);
  11186. e->need_transpose = true;
  11187. inherit_expression_dependencies(ops[1], ops[2]);
  11188. inherit_expression_dependencies(ops[1], ops[3]);
  11189. }
  11190. else
  11191. GLSL_BOP(*);
  11192. break;
  11193. }
  11194. case OpMatrixTimesMatrix:
  11195. {
  11196. auto *a = maybe_get<SPIRExpression>(ops[2]);
  11197. auto *b = maybe_get<SPIRExpression>(ops[3]);
  11198. // If both matrices need transpose, we can multiply in flipped order and tag the expression as transposed.
  11199. // a^T * b^T = (b * a)^T.
  11200. if (a && b && a->need_transpose && b->need_transpose)
  11201. {
  11202. a->need_transpose = false;
  11203. b->need_transpose = false;
  11204. auto expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ",
  11205. enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])));
  11206. bool forward = should_forward(ops[2]) && should_forward(ops[3]);
  11207. auto &e = emit_op(ops[0], ops[1], expr, forward);
  11208. e.need_transpose = true;
  11209. a->need_transpose = true;
  11210. b->need_transpose = true;
  11211. inherit_expression_dependencies(ops[1], ops[2]);
  11212. inherit_expression_dependencies(ops[1], ops[3]);
  11213. }
  11214. else
  11215. GLSL_BOP(*);
  11216. break;
  11217. }
  11218. case OpMatrixTimesScalar:
  11219. {
  11220. auto *a = maybe_get<SPIRExpression>(ops[2]);
  11221. // If the matrix need transpose, just mark the result as needing so.
  11222. if (a && a->need_transpose)
  11223. {
  11224. a->need_transpose = false;
  11225. auto expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])), " * ",
  11226. to_enclosed_unpacked_expression(ops[3]));
  11227. bool forward = should_forward(ops[2]) && should_forward(ops[3]);
  11228. auto &e = emit_op(ops[0], ops[1], expr, forward);
  11229. e.need_transpose = true;
  11230. a->need_transpose = true;
  11231. inherit_expression_dependencies(ops[1], ops[2]);
  11232. inherit_expression_dependencies(ops[1], ops[3]);
  11233. }
  11234. else
  11235. GLSL_BOP(*);
  11236. break;
  11237. }
  11238. case OpFMul:
  11239. case OpVectorTimesScalar:
  11240. GLSL_BOP(*);
  11241. break;
  11242. case OpOuterProduct:
  11243. if (options.version < 120) // Matches GLSL 1.10 / ESSL 1.00
  11244. {
  11245. uint32_t result_type = ops[0];
  11246. uint32_t id = ops[1];
  11247. uint32_t a = ops[2];
  11248. uint32_t b = ops[3];
  11249. auto &type = get<SPIRType>(result_type);
  11250. string expr = type_to_glsl_constructor(type);
  11251. expr += "(";
  11252. for (uint32_t col = 0; col < type.columns; col++)
  11253. {
  11254. expr += to_enclosed_expression(a);
  11255. expr += " * ";
  11256. expr += to_extract_component_expression(b, col);
  11257. if (col + 1 < type.columns)
  11258. expr += ", ";
  11259. }
  11260. expr += ")";
  11261. emit_op(result_type, id, expr, should_forward(a) && should_forward(b));
  11262. inherit_expression_dependencies(id, a);
  11263. inherit_expression_dependencies(id, b);
  11264. }
  11265. else
  11266. GLSL_BFOP(outerProduct);
  11267. break;
  11268. case OpDot:
  11269. GLSL_BFOP(dot);
  11270. break;
  11271. case OpTranspose:
  11272. if (options.version < 120) // Matches GLSL 1.10 / ESSL 1.00
  11273. {
  11274. // transpose() is not available, so instead, flip need_transpose,
  11275. // which can later be turned into an emulated transpose op by
  11276. // convert_row_major_matrix(), if necessary.
  11277. uint32_t result_type = ops[0];
  11278. uint32_t result_id = ops[1];
  11279. uint32_t input = ops[2];
  11280. // Force need_transpose to false temporarily to prevent
  11281. // to_expression() from doing the transpose.
  11282. bool need_transpose = false;
  11283. auto *input_e = maybe_get<SPIRExpression>(input);
  11284. if (input_e)
  11285. swap(need_transpose, input_e->need_transpose);
  11286. bool forward = should_forward(input);
  11287. auto &e = emit_op(result_type, result_id, to_expression(input), forward);
  11288. e.need_transpose = !need_transpose;
  11289. // Restore the old need_transpose flag.
  11290. if (input_e)
  11291. input_e->need_transpose = need_transpose;
  11292. }
  11293. else
  11294. GLSL_UFOP(transpose);
  11295. break;
  11296. case OpSRem:
  11297. {
  11298. uint32_t result_type = ops[0];
  11299. uint32_t result_id = ops[1];
  11300. uint32_t op0 = ops[2];
  11301. uint32_t op1 = ops[3];
  11302. // Needs special handling.
  11303. bool forward = should_forward(op0) && should_forward(op1);
  11304. auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "(",
  11305. to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")");
  11306. if (implicit_integer_promotion)
  11307. expr = join(type_to_glsl(get<SPIRType>(result_type)), '(', expr, ')');
  11308. emit_op(result_type, result_id, expr, forward);
  11309. inherit_expression_dependencies(result_id, op0);
  11310. inherit_expression_dependencies(result_id, op1);
  11311. break;
  11312. }
  11313. case OpSDiv:
  11314. GLSL_BOP_CAST(/, int_type);
  11315. break;
  11316. case OpUDiv:
  11317. GLSL_BOP_CAST(/, uint_type);
  11318. break;
  11319. case OpIAddCarry:
  11320. case OpISubBorrow:
  11321. {
  11322. if (options.es && options.version < 310)
  11323. SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310.");
  11324. else if (!options.es && options.version < 400)
  11325. SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 400.");
  11326. uint32_t result_type = ops[0];
  11327. uint32_t result_id = ops[1];
  11328. uint32_t op0 = ops[2];
  11329. uint32_t op1 = ops[3];
  11330. auto &type = get<SPIRType>(result_type);
  11331. emit_uninitialized_temporary_expression(result_type, result_id);
  11332. const char *op = opcode == OpIAddCarry ? "uaddCarry" : "usubBorrow";
  11333. statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", op, "(", to_expression(op0), ", ",
  11334. to_expression(op1), ", ", to_expression(result_id), ".", to_member_name(type, 1), ");");
  11335. break;
  11336. }
  11337. case OpUMulExtended:
  11338. case OpSMulExtended:
  11339. {
  11340. if (options.es && options.version < 310)
  11341. SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310.");
  11342. else if (!options.es && options.version < 400)
  11343. SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 4000.");
  11344. uint32_t result_type = ops[0];
  11345. uint32_t result_id = ops[1];
  11346. uint32_t op0 = ops[2];
  11347. uint32_t op1 = ops[3];
  11348. auto &type = get<SPIRType>(result_type);
  11349. emit_uninitialized_temporary_expression(result_type, result_id);
  11350. const char *op = opcode == OpUMulExtended ? "umulExtended" : "imulExtended";
  11351. statement(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(result_id), ".",
  11352. to_member_name(type, 1), ", ", to_expression(result_id), ".", to_member_name(type, 0), ");");
  11353. break;
  11354. }
  11355. case OpFDiv:
  11356. GLSL_BOP(/);
  11357. break;
  11358. case OpShiftRightLogical:
  11359. GLSL_BOP_CAST(>>, uint_type);
  11360. break;
  11361. case OpShiftRightArithmetic:
  11362. GLSL_BOP_CAST(>>, int_type);
  11363. break;
  11364. case OpShiftLeftLogical:
  11365. {
  11366. auto type = get<SPIRType>(ops[0]).basetype;
  11367. GLSL_BOP_CAST(<<, type);
  11368. break;
  11369. }
  11370. case OpBitwiseOr:
  11371. {
  11372. auto type = get<SPIRType>(ops[0]).basetype;
  11373. GLSL_BOP_CAST(|, type);
  11374. break;
  11375. }
  11376. case OpBitwiseXor:
  11377. {
  11378. auto type = get<SPIRType>(ops[0]).basetype;
  11379. GLSL_BOP_CAST(^, type);
  11380. break;
  11381. }
  11382. case OpBitwiseAnd:
  11383. {
  11384. auto type = get<SPIRType>(ops[0]).basetype;
  11385. GLSL_BOP_CAST(&, type);
  11386. break;
  11387. }
  11388. case OpNot:
  11389. if (implicit_integer_promotion || expression_type_id(ops[2]) != ops[0])
  11390. GLSL_UOP_CAST(~);
  11391. else
  11392. GLSL_UOP(~);
  11393. break;
  11394. case OpUMod:
  11395. GLSL_BOP_CAST(%, uint_type);
  11396. break;
  11397. case OpSMod:
  11398. GLSL_BOP_CAST(%, int_type);
  11399. break;
  11400. case OpFMod:
  11401. GLSL_BFOP(mod);
  11402. break;
  11403. case OpFRem:
  11404. {
  11405. uint32_t result_type = ops[0];
  11406. uint32_t result_id = ops[1];
  11407. uint32_t op0 = ops[2];
  11408. uint32_t op1 = ops[3];
  11409. // Needs special handling.
  11410. bool forward = should_forward(op0) && should_forward(op1);
  11411. std::string expr;
  11412. if (!is_legacy())
  11413. {
  11414. expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "trunc(",
  11415. to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")");
  11416. }
  11417. else
  11418. {
  11419. // Legacy GLSL has no trunc, emulate by casting to int and back
  11420. auto &op0_type = expression_type(op0);
  11421. auto via_type = op0_type;
  11422. via_type.basetype = SPIRType::Int;
  11423. expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ",
  11424. type_to_glsl(op0_type), "(", type_to_glsl(via_type), "(",
  11425. to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), "))");
  11426. }
  11427. emit_op(result_type, result_id, expr, forward);
  11428. inherit_expression_dependencies(result_id, op0);
  11429. inherit_expression_dependencies(result_id, op1);
  11430. break;
  11431. }
  11432. // Relational
  11433. case OpAny:
  11434. GLSL_UFOP(any);
  11435. break;
  11436. case OpAll:
  11437. GLSL_UFOP(all);
  11438. break;
  11439. case OpSelect:
  11440. emit_mix_op(ops[0], ops[1], ops[4], ops[3], ops[2]);
  11441. break;
  11442. case OpLogicalOr:
  11443. {
  11444. // No vector variant in GLSL for logical OR.
  11445. auto result_type = ops[0];
  11446. auto id = ops[1];
  11447. auto &type = get<SPIRType>(result_type);
  11448. if (type.vecsize > 1)
  11449. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "||", false, SPIRType::Unknown);
  11450. else
  11451. GLSL_BOP(||);
  11452. break;
  11453. }
  11454. case OpLogicalAnd:
  11455. {
  11456. // No vector variant in GLSL for logical AND.
  11457. auto result_type = ops[0];
  11458. auto id = ops[1];
  11459. auto &type = get<SPIRType>(result_type);
  11460. if (type.vecsize > 1)
  11461. emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "&&", false, SPIRType::Unknown);
  11462. else
  11463. GLSL_BOP(&&);
  11464. break;
  11465. }
  11466. case OpLogicalNot:
  11467. {
  11468. auto &type = get<SPIRType>(ops[0]);
  11469. if (type.vecsize > 1)
  11470. GLSL_UFOP(not );
  11471. else
  11472. GLSL_UOP(!);
  11473. break;
  11474. }
  11475. case OpIEqual:
  11476. {
  11477. if (expression_type(ops[2]).vecsize > 1)
  11478. GLSL_BFOP_CAST(equal, int_type);
  11479. else
  11480. GLSL_BOP_CAST(==, int_type);
  11481. break;
  11482. }
  11483. case OpLogicalEqual:
  11484. case OpFOrdEqual:
  11485. {
  11486. if (expression_type(ops[2]).vecsize > 1)
  11487. GLSL_BFOP(equal);
  11488. else
  11489. GLSL_BOP(==);
  11490. break;
  11491. }
  11492. case OpINotEqual:
  11493. {
  11494. if (expression_type(ops[2]).vecsize > 1)
  11495. GLSL_BFOP_CAST(notEqual, int_type);
  11496. else
  11497. GLSL_BOP_CAST(!=, int_type);
  11498. break;
  11499. }
  11500. case OpLogicalNotEqual:
  11501. case OpFOrdNotEqual:
  11502. case OpFUnordNotEqual:
  11503. {
  11504. // GLSL is fuzzy on what to do with ordered vs unordered not equal.
  11505. // glslang started emitting UnorderedNotEqual some time ago to harmonize with IEEE,
  11506. // but this means we have no easy way of implementing ordered not equal.
  11507. if (expression_type(ops[2]).vecsize > 1)
  11508. GLSL_BFOP(notEqual);
  11509. else
  11510. GLSL_BOP(!=);
  11511. break;
  11512. }
  11513. case OpUGreaterThan:
  11514. case OpSGreaterThan:
  11515. {
  11516. auto type = opcode == OpUGreaterThan ? uint_type : int_type;
  11517. if (expression_type(ops[2]).vecsize > 1)
  11518. GLSL_BFOP_CAST(greaterThan, type);
  11519. else
  11520. GLSL_BOP_CAST(>, type);
  11521. break;
  11522. }
  11523. case OpFOrdGreaterThan:
  11524. {
  11525. if (expression_type(ops[2]).vecsize > 1)
  11526. GLSL_BFOP(greaterThan);
  11527. else
  11528. GLSL_BOP(>);
  11529. break;
  11530. }
  11531. case OpUGreaterThanEqual:
  11532. case OpSGreaterThanEqual:
  11533. {
  11534. auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type;
  11535. if (expression_type(ops[2]).vecsize > 1)
  11536. GLSL_BFOP_CAST(greaterThanEqual, type);
  11537. else
  11538. GLSL_BOP_CAST(>=, type);
  11539. break;
  11540. }
  11541. case OpFOrdGreaterThanEqual:
  11542. {
  11543. if (expression_type(ops[2]).vecsize > 1)
  11544. GLSL_BFOP(greaterThanEqual);
  11545. else
  11546. GLSL_BOP(>=);
  11547. break;
  11548. }
  11549. case OpULessThan:
  11550. case OpSLessThan:
  11551. {
  11552. auto type = opcode == OpULessThan ? uint_type : int_type;
  11553. if (expression_type(ops[2]).vecsize > 1)
  11554. GLSL_BFOP_CAST(lessThan, type);
  11555. else
  11556. GLSL_BOP_CAST(<, type);
  11557. break;
  11558. }
  11559. case OpFOrdLessThan:
  11560. {
  11561. if (expression_type(ops[2]).vecsize > 1)
  11562. GLSL_BFOP(lessThan);
  11563. else
  11564. GLSL_BOP(<);
  11565. break;
  11566. }
  11567. case OpULessThanEqual:
  11568. case OpSLessThanEqual:
  11569. {
  11570. auto type = opcode == OpULessThanEqual ? uint_type : int_type;
  11571. if (expression_type(ops[2]).vecsize > 1)
  11572. GLSL_BFOP_CAST(lessThanEqual, type);
  11573. else
  11574. GLSL_BOP_CAST(<=, type);
  11575. break;
  11576. }
  11577. case OpFOrdLessThanEqual:
  11578. {
  11579. if (expression_type(ops[2]).vecsize > 1)
  11580. GLSL_BFOP(lessThanEqual);
  11581. else
  11582. GLSL_BOP(<=);
  11583. break;
  11584. }
  11585. // Conversion
  11586. case OpSConvert:
  11587. case OpConvertSToF:
  11588. case OpUConvert:
  11589. case OpConvertUToF:
  11590. {
  11591. auto input_type = opcode == OpSConvert || opcode == OpConvertSToF ? int_type : uint_type;
  11592. uint32_t result_type = ops[0];
  11593. uint32_t id = ops[1];
  11594. auto &type = get<SPIRType>(result_type);
  11595. auto &arg_type = expression_type(ops[2]);
  11596. auto func = type_to_glsl_constructor(type);
  11597. if (arg_type.width < type.width || type_is_floating_point(type))
  11598. emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), input_type, type.basetype);
  11599. else
  11600. emit_unary_func_op(result_type, id, ops[2], func.c_str());
  11601. break;
  11602. }
  11603. case OpConvertFToU:
  11604. case OpConvertFToS:
  11605. {
  11606. // Cast to expected arithmetic type, then potentially bitcast away to desired signedness.
  11607. uint32_t result_type = ops[0];
  11608. uint32_t id = ops[1];
  11609. auto &type = get<SPIRType>(result_type);
  11610. auto expected_type = type;
  11611. auto &float_type = expression_type(ops[2]);
  11612. expected_type.basetype =
  11613. opcode == OpConvertFToS ? to_signed_basetype(type.width) : to_unsigned_basetype(type.width);
  11614. auto func = type_to_glsl_constructor(expected_type);
  11615. emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), float_type.basetype, expected_type.basetype);
  11616. break;
  11617. }
  11618. case OpFConvert:
  11619. {
  11620. uint32_t result_type = ops[0];
  11621. uint32_t id = ops[1];
  11622. auto func = type_to_glsl_constructor(get<SPIRType>(result_type));
  11623. emit_unary_func_op(result_type, id, ops[2], func.c_str());
  11624. break;
  11625. }
  11626. case OpBitcast:
  11627. {
  11628. uint32_t result_type = ops[0];
  11629. uint32_t id = ops[1];
  11630. uint32_t arg = ops[2];
  11631. if (!emit_complex_bitcast(result_type, id, arg))
  11632. {
  11633. auto op = bitcast_glsl_op(get<SPIRType>(result_type), expression_type(arg));
  11634. emit_unary_func_op(result_type, id, arg, op.c_str());
  11635. }
  11636. break;
  11637. }
  11638. case OpQuantizeToF16:
  11639. {
  11640. uint32_t result_type = ops[0];
  11641. uint32_t id = ops[1];
  11642. uint32_t arg = ops[2];
  11643. string op;
  11644. auto &type = get<SPIRType>(result_type);
  11645. switch (type.vecsize)
  11646. {
  11647. case 1:
  11648. op = join("unpackHalf2x16(packHalf2x16(vec2(", to_expression(arg), "))).x");
  11649. break;
  11650. case 2:
  11651. op = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), "))");
  11652. break;
  11653. case 3:
  11654. {
  11655. auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))");
  11656. auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zz)).x");
  11657. op = join("vec3(", op0, ", ", op1, ")");
  11658. break;
  11659. }
  11660. case 4:
  11661. {
  11662. auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))");
  11663. auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zw))");
  11664. op = join("vec4(", op0, ", ", op1, ")");
  11665. break;
  11666. }
  11667. default:
  11668. SPIRV_CROSS_THROW("Illegal argument to OpQuantizeToF16.");
  11669. }
  11670. emit_op(result_type, id, op, should_forward(arg));
  11671. inherit_expression_dependencies(id, arg);
  11672. break;
  11673. }
  11674. // Derivatives
  11675. case OpDPdx:
  11676. GLSL_UFOP(dFdx);
  11677. if (is_legacy_es())
  11678. require_extension_internal("GL_OES_standard_derivatives");
  11679. register_control_dependent_expression(ops[1]);
  11680. break;
  11681. case OpDPdy:
  11682. GLSL_UFOP(dFdy);
  11683. if (is_legacy_es())
  11684. require_extension_internal("GL_OES_standard_derivatives");
  11685. register_control_dependent_expression(ops[1]);
  11686. break;
  11687. case OpDPdxFine:
  11688. GLSL_UFOP(dFdxFine);
  11689. if (options.es)
  11690. {
  11691. SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
  11692. }
  11693. if (options.version < 450)
  11694. require_extension_internal("GL_ARB_derivative_control");
  11695. register_control_dependent_expression(ops[1]);
  11696. break;
  11697. case OpDPdyFine:
  11698. GLSL_UFOP(dFdyFine);
  11699. if (options.es)
  11700. {
  11701. SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
  11702. }
  11703. if (options.version < 450)
  11704. require_extension_internal("GL_ARB_derivative_control");
  11705. register_control_dependent_expression(ops[1]);
  11706. break;
  11707. case OpDPdxCoarse:
  11708. if (options.es)
  11709. {
  11710. SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
  11711. }
  11712. GLSL_UFOP(dFdxCoarse);
  11713. if (options.version < 450)
  11714. require_extension_internal("GL_ARB_derivative_control");
  11715. register_control_dependent_expression(ops[1]);
  11716. break;
  11717. case OpDPdyCoarse:
  11718. GLSL_UFOP(dFdyCoarse);
  11719. if (options.es)
  11720. {
  11721. SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
  11722. }
  11723. if (options.version < 450)
  11724. require_extension_internal("GL_ARB_derivative_control");
  11725. register_control_dependent_expression(ops[1]);
  11726. break;
  11727. case OpFwidth:
  11728. GLSL_UFOP(fwidth);
  11729. if (is_legacy_es())
  11730. require_extension_internal("GL_OES_standard_derivatives");
  11731. register_control_dependent_expression(ops[1]);
  11732. break;
  11733. case OpFwidthCoarse:
  11734. GLSL_UFOP(fwidthCoarse);
  11735. if (options.es)
  11736. {
  11737. SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
  11738. }
  11739. if (options.version < 450)
  11740. require_extension_internal("GL_ARB_derivative_control");
  11741. register_control_dependent_expression(ops[1]);
  11742. break;
  11743. case OpFwidthFine:
  11744. GLSL_UFOP(fwidthFine);
  11745. if (options.es)
  11746. {
  11747. SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
  11748. }
  11749. if (options.version < 450)
  11750. require_extension_internal("GL_ARB_derivative_control");
  11751. register_control_dependent_expression(ops[1]);
  11752. break;
  11753. // Bitfield
  11754. case OpBitFieldInsert:
  11755. {
  11756. emit_bitfield_insert_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], "bitfieldInsert", SPIRType::Int);
  11757. break;
  11758. }
  11759. case OpBitFieldSExtract:
  11760. {
  11761. emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", int_type, int_type,
  11762. SPIRType::Int, SPIRType::Int);
  11763. break;
  11764. }
  11765. case OpBitFieldUExtract:
  11766. {
  11767. emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", uint_type, uint_type,
  11768. SPIRType::Int, SPIRType::Int);
  11769. break;
  11770. }
  11771. case OpBitReverse:
  11772. // BitReverse does not have issues with sign since result type must match input type.
  11773. GLSL_UFOP(bitfieldReverse);
  11774. break;
  11775. case OpBitCount:
  11776. {
  11777. auto basetype = expression_type(ops[2]).basetype;
  11778. emit_unary_func_op_cast(ops[0], ops[1], ops[2], "bitCount", basetype, int_type);
  11779. break;
  11780. }
  11781. // Atomics
  11782. case OpAtomicExchange:
  11783. {
  11784. uint32_t result_type = ops[0];
  11785. uint32_t id = ops[1];
  11786. uint32_t ptr = ops[2];
  11787. // Ignore semantics for now, probably only relevant to CL.
  11788. uint32_t val = ops[5];
  11789. const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange";
  11790. emit_atomic_func_op(result_type, id, ptr, val, op);
  11791. break;
  11792. }
  11793. case OpAtomicCompareExchange:
  11794. {
  11795. uint32_t result_type = ops[0];
  11796. uint32_t id = ops[1];
  11797. uint32_t ptr = ops[2];
  11798. uint32_t val = ops[6];
  11799. uint32_t comp = ops[7];
  11800. const char *op = check_atomic_image(ptr) ? "imageAtomicCompSwap" : "atomicCompSwap";
  11801. emit_atomic_func_op(result_type, id, ptr, comp, val, op);
  11802. break;
  11803. }
  11804. case OpAtomicLoad:
  11805. {
  11806. // In plain GLSL, we have no atomic loads, so emulate this by fetch adding by 0 and hope compiler figures it out.
  11807. // Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL.
  11808. auto &type = expression_type(ops[2]);
  11809. forced_temporaries.insert(ops[1]);
  11810. bool atomic_image = check_atomic_image(ops[2]);
  11811. bool unsigned_type = (type.basetype == SPIRType::UInt) ||
  11812. (atomic_image && get<SPIRType>(type.image.type).basetype == SPIRType::UInt);
  11813. const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd";
  11814. const char *increment = unsigned_type ? "0u" : "0";
  11815. emit_op(ops[0], ops[1],
  11816. join(op, "(",
  11817. to_non_uniform_aware_expression(ops[2]), ", ", increment, ")"), false);
  11818. flush_all_atomic_capable_variables();
  11819. break;
  11820. }
  11821. case OpAtomicStore:
  11822. {
  11823. // In plain GLSL, we have no atomic stores, so emulate this with an atomic exchange where we don't consume the result.
  11824. // Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL.
  11825. uint32_t ptr = ops[0];
  11826. // Ignore semantics for now, probably only relevant to CL.
  11827. uint32_t val = ops[3];
  11828. const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange";
  11829. statement(op, "(", to_non_uniform_aware_expression(ptr), ", ", to_expression(val), ");");
  11830. flush_all_atomic_capable_variables();
  11831. break;
  11832. }
  11833. case OpAtomicIIncrement:
  11834. case OpAtomicIDecrement:
  11835. {
  11836. forced_temporaries.insert(ops[1]);
  11837. auto &type = expression_type(ops[2]);
  11838. if (type.storage == StorageClassAtomicCounter)
  11839. {
  11840. // Legacy GLSL stuff, not sure if this is relevant to support.
  11841. if (opcode == OpAtomicIIncrement)
  11842. GLSL_UFOP(atomicCounterIncrement);
  11843. else
  11844. GLSL_UFOP(atomicCounterDecrement);
  11845. }
  11846. else
  11847. {
  11848. bool atomic_image = check_atomic_image(ops[2]);
  11849. bool unsigned_type = (type.basetype == SPIRType::UInt) ||
  11850. (atomic_image && get<SPIRType>(type.image.type).basetype == SPIRType::UInt);
  11851. const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd";
  11852. const char *increment = nullptr;
  11853. if (opcode == OpAtomicIIncrement && unsigned_type)
  11854. increment = "1u";
  11855. else if (opcode == OpAtomicIIncrement)
  11856. increment = "1";
  11857. else if (unsigned_type)
  11858. increment = "uint(-1)";
  11859. else
  11860. increment = "-1";
  11861. emit_op(ops[0], ops[1],
  11862. join(op, "(", to_non_uniform_aware_expression(ops[2]), ", ", increment, ")"), false);
  11863. }
  11864. flush_all_atomic_capable_variables();
  11865. break;
  11866. }
  11867. case OpAtomicIAdd:
  11868. case OpAtomicFAddEXT:
  11869. {
  11870. const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
  11871. emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
  11872. break;
  11873. }
  11874. case OpAtomicISub:
  11875. {
  11876. const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
  11877. forced_temporaries.insert(ops[1]);
  11878. auto expr = join(op, "(", to_non_uniform_aware_expression(ops[2]), ", -", to_enclosed_expression(ops[5]), ")");
  11879. emit_op(ops[0], ops[1], expr, should_forward(ops[2]) && should_forward(ops[5]));
  11880. flush_all_atomic_capable_variables();
  11881. break;
  11882. }
  11883. case OpAtomicSMin:
  11884. case OpAtomicUMin:
  11885. {
  11886. const char *op = check_atomic_image(ops[2]) ? "imageAtomicMin" : "atomicMin";
  11887. emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
  11888. break;
  11889. }
  11890. case OpAtomicSMax:
  11891. case OpAtomicUMax:
  11892. {
  11893. const char *op = check_atomic_image(ops[2]) ? "imageAtomicMax" : "atomicMax";
  11894. emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
  11895. break;
  11896. }
  11897. case OpAtomicAnd:
  11898. {
  11899. const char *op = check_atomic_image(ops[2]) ? "imageAtomicAnd" : "atomicAnd";
  11900. emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
  11901. break;
  11902. }
  11903. case OpAtomicOr:
  11904. {
  11905. const char *op = check_atomic_image(ops[2]) ? "imageAtomicOr" : "atomicOr";
  11906. emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
  11907. break;
  11908. }
  11909. case OpAtomicXor:
  11910. {
  11911. const char *op = check_atomic_image(ops[2]) ? "imageAtomicXor" : "atomicXor";
  11912. emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
  11913. break;
  11914. }
  11915. // Geometry shaders
  11916. case OpEmitVertex:
  11917. statement("EmitVertex();");
  11918. break;
  11919. case OpEndPrimitive:
  11920. statement("EndPrimitive();");
  11921. break;
  11922. case OpEmitStreamVertex:
  11923. {
  11924. if (options.es)
  11925. SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES.");
  11926. else if (!options.es && options.version < 400)
  11927. SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400.");
  11928. auto stream_expr = to_expression(ops[0]);
  11929. if (expression_type(ops[0]).basetype != SPIRType::Int)
  11930. stream_expr = join("int(", stream_expr, ")");
  11931. statement("EmitStreamVertex(", stream_expr, ");");
  11932. break;
  11933. }
  11934. case OpEndStreamPrimitive:
  11935. {
  11936. if (options.es)
  11937. SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES.");
  11938. else if (!options.es && options.version < 400)
  11939. SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400.");
  11940. auto stream_expr = to_expression(ops[0]);
  11941. if (expression_type(ops[0]).basetype != SPIRType::Int)
  11942. stream_expr = join("int(", stream_expr, ")");
  11943. statement("EndStreamPrimitive(", stream_expr, ");");
  11944. break;
  11945. }
  11946. // Textures
  11947. case OpImageSampleExplicitLod:
  11948. case OpImageSampleProjExplicitLod:
  11949. case OpImageSampleDrefExplicitLod:
  11950. case OpImageSampleProjDrefExplicitLod:
  11951. case OpImageSampleImplicitLod:
  11952. case OpImageSampleProjImplicitLod:
  11953. case OpImageSampleDrefImplicitLod:
  11954. case OpImageSampleProjDrefImplicitLod:
  11955. case OpImageFetch:
  11956. case OpImageGather:
  11957. case OpImageDrefGather:
  11958. // Gets a bit hairy, so move this to a separate instruction.
  11959. emit_texture_op(instruction, false);
  11960. break;
  11961. case OpImageSparseSampleExplicitLod:
  11962. case OpImageSparseSampleProjExplicitLod:
  11963. case OpImageSparseSampleDrefExplicitLod:
  11964. case OpImageSparseSampleProjDrefExplicitLod:
  11965. case OpImageSparseSampleImplicitLod:
  11966. case OpImageSparseSampleProjImplicitLod:
  11967. case OpImageSparseSampleDrefImplicitLod:
  11968. case OpImageSparseSampleProjDrefImplicitLod:
  11969. case OpImageSparseFetch:
  11970. case OpImageSparseGather:
  11971. case OpImageSparseDrefGather:
  11972. // Gets a bit hairy, so move this to a separate instruction.
  11973. emit_texture_op(instruction, true);
  11974. break;
  11975. case OpImageSparseTexelsResident:
  11976. if (options.es)
  11977. SPIRV_CROSS_THROW("Sparse feedback is not supported in GLSL.");
  11978. require_extension_internal("GL_ARB_sparse_texture2");
  11979. emit_unary_func_op_cast(ops[0], ops[1], ops[2], "sparseTexelsResidentARB", int_type, SPIRType::Boolean);
  11980. break;
  11981. case OpImage:
  11982. {
  11983. uint32_t result_type = ops[0];
  11984. uint32_t id = ops[1];
  11985. // Suppress usage tracking.
  11986. auto &e = emit_op(result_type, id, to_expression(ops[2]), true, true);
  11987. // When using the image, we need to know which variable it is actually loaded from.
  11988. auto *var = maybe_get_backing_variable(ops[2]);
  11989. e.loaded_from = var ? var->self : ID(0);
  11990. break;
  11991. }
  11992. case OpImageQueryLod:
  11993. {
  11994. const char *op = nullptr;
  11995. if (!options.es && options.version < 400)
  11996. {
  11997. require_extension_internal("GL_ARB_texture_query_lod");
  11998. // For some reason, the ARB spec is all-caps.
  11999. op = "textureQueryLOD";
  12000. }
  12001. else if (options.es)
  12002. {
  12003. if (options.version < 300)
  12004. SPIRV_CROSS_THROW("textureQueryLod not supported in legacy ES");
  12005. require_extension_internal("GL_EXT_texture_query_lod");
  12006. op = "textureQueryLOD";
  12007. }
  12008. else
  12009. op = "textureQueryLod";
  12010. auto sampler_expr = to_expression(ops[2]);
  12011. if (has_decoration(ops[2], DecorationNonUniform))
  12012. {
  12013. if (maybe_get_backing_variable(ops[2]))
  12014. convert_non_uniform_expression(sampler_expr, ops[2]);
  12015. else if (*backend.nonuniform_qualifier != '\0')
  12016. sampler_expr = join(backend.nonuniform_qualifier, "(", sampler_expr, ")");
  12017. }
  12018. bool forward = should_forward(ops[3]);
  12019. emit_op(ops[0], ops[1],
  12020. join(op, "(", sampler_expr, ", ", to_unpacked_expression(ops[3]), ")"),
  12021. forward);
  12022. inherit_expression_dependencies(ops[1], ops[2]);
  12023. inherit_expression_dependencies(ops[1], ops[3]);
  12024. register_control_dependent_expression(ops[1]);
  12025. break;
  12026. }
  12027. case OpImageQueryLevels:
  12028. {
  12029. uint32_t result_type = ops[0];
  12030. uint32_t id = ops[1];
  12031. if (!options.es && options.version < 430)
  12032. require_extension_internal("GL_ARB_texture_query_levels");
  12033. if (options.es)
  12034. SPIRV_CROSS_THROW("textureQueryLevels not supported in ES profile.");
  12035. auto expr = join("textureQueryLevels(", convert_separate_image_to_expression(ops[2]), ")");
  12036. auto &restype = get<SPIRType>(ops[0]);
  12037. expr = bitcast_expression(restype, SPIRType::Int, expr);
  12038. emit_op(result_type, id, expr, true);
  12039. break;
  12040. }
  12041. case OpImageQuerySamples:
  12042. {
  12043. auto &type = expression_type(ops[2]);
  12044. uint32_t result_type = ops[0];
  12045. uint32_t id = ops[1];
  12046. if (options.es)
  12047. SPIRV_CROSS_THROW("textureSamples and imageSamples not supported in ES profile.");
  12048. else if (options.version < 450)
  12049. require_extension_internal("GL_ARB_texture_query_samples");
  12050. string expr;
  12051. if (type.image.sampled == 2)
  12052. expr = join("imageSamples(", to_non_uniform_aware_expression(ops[2]), ")");
  12053. else
  12054. expr = join("textureSamples(", convert_separate_image_to_expression(ops[2]), ")");
  12055. auto &restype = get<SPIRType>(ops[0]);
  12056. expr = bitcast_expression(restype, SPIRType::Int, expr);
  12057. emit_op(result_type, id, expr, true);
  12058. break;
  12059. }
  12060. case OpSampledImage:
  12061. {
  12062. uint32_t result_type = ops[0];
  12063. uint32_t id = ops[1];
  12064. emit_sampled_image_op(result_type, id, ops[2], ops[3]);
  12065. inherit_expression_dependencies(id, ops[2]);
  12066. inherit_expression_dependencies(id, ops[3]);
  12067. break;
  12068. }
  12069. case OpImageQuerySizeLod:
  12070. {
  12071. uint32_t result_type = ops[0];
  12072. uint32_t id = ops[1];
  12073. uint32_t img = ops[2];
  12074. auto &type = expression_type(img);
  12075. auto &imgtype = get<SPIRType>(type.self);
  12076. std::string fname = "textureSize";
  12077. if (is_legacy_desktop())
  12078. {
  12079. fname = legacy_tex_op(fname, imgtype, img);
  12080. }
  12081. else if (is_legacy_es())
  12082. SPIRV_CROSS_THROW("textureSize is not supported in ESSL 100.");
  12083. auto expr = join(fname, "(", convert_separate_image_to_expression(img), ", ",
  12084. bitcast_expression(SPIRType::Int, ops[3]), ")");
  12085. // ES needs to emulate 1D images as 2D.
  12086. if (type.image.dim == Dim1D && options.es)
  12087. expr = join(expr, ".x");
  12088. auto &restype = get<SPIRType>(ops[0]);
  12089. expr = bitcast_expression(restype, SPIRType::Int, expr);
  12090. emit_op(result_type, id, expr, true);
  12091. break;
  12092. }
  12093. // Image load/store
  12094. case OpImageRead:
  12095. case OpImageSparseRead:
  12096. {
  12097. // We added Nonreadable speculatively to the OpImage variable due to glslangValidator
  12098. // not adding the proper qualifiers.
  12099. // If it turns out we need to read the image after all, remove the qualifier and recompile.
  12100. auto *var = maybe_get_backing_variable(ops[2]);
  12101. if (var)
  12102. {
  12103. auto &flags = get_decoration_bitset(var->self);
  12104. if (flags.get(DecorationNonReadable))
  12105. {
  12106. unset_decoration(var->self, DecorationNonReadable);
  12107. force_recompile();
  12108. }
  12109. }
  12110. uint32_t result_type = ops[0];
  12111. uint32_t id = ops[1];
  12112. bool pure;
  12113. string imgexpr;
  12114. auto &type = expression_type(ops[2]);
  12115. if (var && var->remapped_variable) // Remapped input, just read as-is without any op-code
  12116. {
  12117. if (type.image.ms)
  12118. SPIRV_CROSS_THROW("Trying to remap multisampled image to variable, this is not possible.");
  12119. auto itr =
  12120. find_if(begin(pls_inputs), end(pls_inputs), [var](const PlsRemap &pls) { return pls.id == var->self; });
  12121. if (itr == end(pls_inputs))
  12122. {
  12123. // For non-PLS inputs, we rely on subpass type remapping information to get it right
  12124. // since ImageRead always returns 4-component vectors and the backing type is opaque.
  12125. if (!var->remapped_components)
  12126. SPIRV_CROSS_THROW("subpassInput was remapped, but remap_components is not set correctly.");
  12127. imgexpr = remap_swizzle(get<SPIRType>(result_type), var->remapped_components, to_expression(ops[2]));
  12128. }
  12129. else
  12130. {
  12131. // PLS input could have different number of components than what the SPIR expects, swizzle to
  12132. // the appropriate vector size.
  12133. uint32_t components = pls_format_to_components(itr->format);
  12134. imgexpr = remap_swizzle(get<SPIRType>(result_type), components, to_expression(ops[2]));
  12135. }
  12136. pure = true;
  12137. }
  12138. else if (type.image.dim == DimSubpassData)
  12139. {
  12140. if (var && subpass_input_is_framebuffer_fetch(var->self))
  12141. {
  12142. imgexpr = to_expression(var->self);
  12143. }
  12144. else if (options.vulkan_semantics)
  12145. {
  12146. // With Vulkan semantics, use the proper Vulkan GLSL construct.
  12147. if (type.image.ms)
  12148. {
  12149. uint32_t operands = ops[4];
  12150. if (operands != ImageOperandsSampleMask || length != 6)
  12151. SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
  12152. "operand mask was used.");
  12153. uint32_t samples = ops[5];
  12154. imgexpr = join("subpassLoad(", to_non_uniform_aware_expression(ops[2]), ", ", to_expression(samples), ")");
  12155. }
  12156. else
  12157. imgexpr = join("subpassLoad(", to_non_uniform_aware_expression(ops[2]), ")");
  12158. }
  12159. else
  12160. {
  12161. if (type.image.ms)
  12162. {
  12163. uint32_t operands = ops[4];
  12164. if (operands != ImageOperandsSampleMask || length != 6)
  12165. SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
  12166. "operand mask was used.");
  12167. uint32_t samples = ops[5];
  12168. imgexpr = join("texelFetch(", to_non_uniform_aware_expression(ops[2]), ", ivec2(gl_FragCoord.xy), ",
  12169. to_expression(samples), ")");
  12170. }
  12171. else
  12172. {
  12173. // Implement subpass loads via texture barrier style sampling.
  12174. imgexpr = join("texelFetch(", to_non_uniform_aware_expression(ops[2]), ", ivec2(gl_FragCoord.xy), 0)");
  12175. }
  12176. }
  12177. imgexpr = remap_swizzle(get<SPIRType>(result_type), 4, imgexpr);
  12178. pure = true;
  12179. }
  12180. else
  12181. {
  12182. bool sparse = opcode == OpImageSparseRead;
  12183. uint32_t sparse_code_id = 0;
  12184. uint32_t sparse_texel_id = 0;
  12185. if (sparse)
  12186. emit_sparse_feedback_temporaries(ops[0], ops[1], sparse_code_id, sparse_texel_id);
  12187. // imageLoad only accepts int coords, not uint.
  12188. auto coord_expr = to_expression(ops[3]);
  12189. auto target_coord_type = expression_type(ops[3]);
  12190. target_coord_type.basetype = SPIRType::Int;
  12191. coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr);
  12192. // ES needs to emulate 1D images as 2D.
  12193. if (type.image.dim == Dim1D && options.es)
  12194. coord_expr = join("ivec2(", coord_expr, ", 0)");
  12195. // Plain image load/store.
  12196. if (sparse)
  12197. {
  12198. if (type.image.ms)
  12199. {
  12200. uint32_t operands = ops[4];
  12201. if (operands != ImageOperandsSampleMask || length != 6)
  12202. SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
  12203. "operand mask was used.");
  12204. uint32_t samples = ops[5];
  12205. statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_non_uniform_aware_expression(ops[2]), ", ",
  12206. coord_expr, ", ", to_expression(samples), ", ", to_expression(sparse_texel_id), ");");
  12207. }
  12208. else
  12209. {
  12210. statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_non_uniform_aware_expression(ops[2]), ", ",
  12211. coord_expr, ", ", to_expression(sparse_texel_id), ");");
  12212. }
  12213. imgexpr = join(type_to_glsl(get<SPIRType>(result_type)), "(", to_expression(sparse_code_id), ", ",
  12214. to_expression(sparse_texel_id), ")");
  12215. }
  12216. else
  12217. {
  12218. if (type.image.ms)
  12219. {
  12220. uint32_t operands = ops[4];
  12221. if (operands != ImageOperandsSampleMask || length != 6)
  12222. SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
  12223. "operand mask was used.");
  12224. uint32_t samples = ops[5];
  12225. imgexpr =
  12226. join("imageLoad(", to_non_uniform_aware_expression(ops[2]), ", ", coord_expr, ", ", to_expression(samples), ")");
  12227. }
  12228. else
  12229. imgexpr = join("imageLoad(", to_non_uniform_aware_expression(ops[2]), ", ", coord_expr, ")");
  12230. }
  12231. if (!sparse)
  12232. imgexpr = remap_swizzle(get<SPIRType>(result_type), 4, imgexpr);
  12233. pure = false;
  12234. }
  12235. if (var)
  12236. {
  12237. bool forward = forced_temporaries.find(id) == end(forced_temporaries);
  12238. auto &e = emit_op(result_type, id, imgexpr, forward);
  12239. // We only need to track dependencies if we're reading from image load/store.
  12240. if (!pure)
  12241. {
  12242. e.loaded_from = var->self;
  12243. if (forward)
  12244. var->dependees.push_back(id);
  12245. }
  12246. }
  12247. else
  12248. emit_op(result_type, id, imgexpr, false);
  12249. inherit_expression_dependencies(id, ops[2]);
  12250. if (type.image.ms)
  12251. inherit_expression_dependencies(id, ops[5]);
  12252. break;
  12253. }
  12254. case OpImageTexelPointer:
  12255. {
  12256. uint32_t result_type = ops[0];
  12257. uint32_t id = ops[1];
  12258. auto coord_expr = to_expression(ops[3]);
  12259. auto target_coord_type = expression_type(ops[3]);
  12260. target_coord_type.basetype = SPIRType::Int;
  12261. coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr);
  12262. auto expr = join(to_expression(ops[2]), ", ", coord_expr);
  12263. auto &e = set<SPIRExpression>(id, expr, result_type, true);
  12264. // When using the pointer, we need to know which variable it is actually loaded from.
  12265. auto *var = maybe_get_backing_variable(ops[2]);
  12266. e.loaded_from = var ? var->self : ID(0);
  12267. inherit_expression_dependencies(id, ops[3]);
  12268. break;
  12269. }
  12270. case OpImageWrite:
  12271. {
  12272. // We added Nonwritable speculatively to the OpImage variable due to glslangValidator
  12273. // not adding the proper qualifiers.
  12274. // If it turns out we need to write to the image after all, remove the qualifier and recompile.
  12275. auto *var = maybe_get_backing_variable(ops[0]);
  12276. if (var)
  12277. {
  12278. if (has_decoration(var->self, DecorationNonWritable))
  12279. {
  12280. unset_decoration(var->self, DecorationNonWritable);
  12281. force_recompile();
  12282. }
  12283. }
  12284. auto &type = expression_type(ops[0]);
  12285. auto &value_type = expression_type(ops[2]);
  12286. auto store_type = value_type;
  12287. store_type.vecsize = 4;
  12288. // imageStore only accepts int coords, not uint.
  12289. auto coord_expr = to_expression(ops[1]);
  12290. auto target_coord_type = expression_type(ops[1]);
  12291. target_coord_type.basetype = SPIRType::Int;
  12292. coord_expr = bitcast_expression(target_coord_type, expression_type(ops[1]).basetype, coord_expr);
  12293. // ES needs to emulate 1D images as 2D.
  12294. if (type.image.dim == Dim1D && options.es)
  12295. coord_expr = join("ivec2(", coord_expr, ", 0)");
  12296. if (type.image.ms)
  12297. {
  12298. uint32_t operands = ops[3];
  12299. if (operands != ImageOperandsSampleMask || length != 5)
  12300. SPIRV_CROSS_THROW("Multisampled image used in OpImageWrite, but unexpected operand mask was used.");
  12301. uint32_t samples = ops[4];
  12302. statement("imageStore(", to_non_uniform_aware_expression(ops[0]), ", ", coord_expr, ", ", to_expression(samples), ", ",
  12303. remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");");
  12304. }
  12305. else
  12306. statement("imageStore(", to_non_uniform_aware_expression(ops[0]), ", ", coord_expr, ", ",
  12307. remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");");
  12308. if (var && variable_storage_is_aliased(*var))
  12309. flush_all_aliased_variables();
  12310. break;
  12311. }
  12312. case OpImageQuerySize:
  12313. {
  12314. auto &type = expression_type(ops[2]);
  12315. uint32_t result_type = ops[0];
  12316. uint32_t id = ops[1];
  12317. if (type.basetype == SPIRType::Image)
  12318. {
  12319. string expr;
  12320. if (type.image.sampled == 2)
  12321. {
  12322. if (!options.es && options.version < 430)
  12323. require_extension_internal("GL_ARB_shader_image_size");
  12324. else if (options.es && options.version < 310)
  12325. SPIRV_CROSS_THROW("At least ESSL 3.10 required for imageSize.");
  12326. // The size of an image is always constant.
  12327. expr = join("imageSize(", to_non_uniform_aware_expression(ops[2]), ")");
  12328. }
  12329. else
  12330. {
  12331. // This path is hit for samplerBuffers and multisampled images which do not have LOD.
  12332. std::string fname = "textureSize";
  12333. if (is_legacy())
  12334. {
  12335. auto &imgtype = get<SPIRType>(type.self);
  12336. fname = legacy_tex_op(fname, imgtype, ops[2]);
  12337. }
  12338. expr = join(fname, "(", convert_separate_image_to_expression(ops[2]), ")");
  12339. }
  12340. auto &restype = get<SPIRType>(ops[0]);
  12341. expr = bitcast_expression(restype, SPIRType::Int, expr);
  12342. emit_op(result_type, id, expr, true);
  12343. }
  12344. else
  12345. SPIRV_CROSS_THROW("Invalid type for OpImageQuerySize.");
  12346. break;
  12347. }
  12348. // Compute
  12349. case OpControlBarrier:
  12350. case OpMemoryBarrier:
  12351. {
  12352. uint32_t execution_scope = 0;
  12353. uint32_t memory;
  12354. uint32_t semantics;
  12355. if (opcode == OpMemoryBarrier)
  12356. {
  12357. memory = evaluate_constant_u32(ops[0]);
  12358. semantics = evaluate_constant_u32(ops[1]);
  12359. }
  12360. else
  12361. {
  12362. execution_scope = evaluate_constant_u32(ops[0]);
  12363. memory = evaluate_constant_u32(ops[1]);
  12364. semantics = evaluate_constant_u32(ops[2]);
  12365. }
  12366. if (execution_scope == ScopeSubgroup || memory == ScopeSubgroup)
  12367. {
  12368. // OpControlBarrier with ScopeSubgroup is subgroupBarrier()
  12369. if (opcode != OpControlBarrier)
  12370. {
  12371. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMemBarrier);
  12372. }
  12373. else
  12374. {
  12375. request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBarrier);
  12376. }
  12377. }
  12378. if (execution_scope != ScopeSubgroup && get_entry_point().model == ExecutionModelTessellationControl)
  12379. {
  12380. // Control shaders only have barriers, and it implies memory barriers.
  12381. if (opcode == OpControlBarrier)
  12382. statement("barrier();");
  12383. break;
  12384. }
  12385. // We only care about these flags, acquire/release and friends are not relevant to GLSL.
  12386. semantics = mask_relevant_memory_semantics(semantics);
  12387. if (opcode == OpMemoryBarrier)
  12388. {
  12389. // If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier
  12390. // does what we need, so we avoid redundant barriers.
  12391. const Instruction *next = get_next_instruction_in_block(instruction);
  12392. if (next && next->op == OpControlBarrier)
  12393. {
  12394. auto *next_ops = stream(*next);
  12395. uint32_t next_memory = evaluate_constant_u32(next_ops[1]);
  12396. uint32_t next_semantics = evaluate_constant_u32(next_ops[2]);
  12397. next_semantics = mask_relevant_memory_semantics(next_semantics);
  12398. bool memory_scope_covered = false;
  12399. if (next_memory == memory)
  12400. memory_scope_covered = true;
  12401. else if (next_semantics == MemorySemanticsWorkgroupMemoryMask)
  12402. {
  12403. // If we only care about workgroup memory, either Device or Workgroup scope is fine,
  12404. // scope does not have to match.
  12405. if ((next_memory == ScopeDevice || next_memory == ScopeWorkgroup) &&
  12406. (memory == ScopeDevice || memory == ScopeWorkgroup))
  12407. {
  12408. memory_scope_covered = true;
  12409. }
  12410. }
  12411. else if (memory == ScopeWorkgroup && next_memory == ScopeDevice)
  12412. {
  12413. // The control barrier has device scope, but the memory barrier just has workgroup scope.
  12414. memory_scope_covered = true;
  12415. }
  12416. // If we have the same memory scope, and all memory types are covered, we're good.
  12417. if (memory_scope_covered && (semantics & next_semantics) == semantics)
  12418. break;
  12419. }
  12420. }
  12421. // We are synchronizing some memory or syncing execution,
  12422. // so we cannot forward any loads beyond the memory barrier.
  12423. if (semantics || opcode == OpControlBarrier)
  12424. {
  12425. assert(current_emitting_block);
  12426. flush_control_dependent_expressions(current_emitting_block->self);
  12427. flush_all_active_variables();
  12428. }
  12429. if (memory == ScopeWorkgroup) // Only need to consider memory within a group
  12430. {
  12431. if (semantics == MemorySemanticsWorkgroupMemoryMask)
  12432. {
  12433. // OpControlBarrier implies a memory barrier for shared memory as well.
  12434. bool implies_shared_barrier = opcode == OpControlBarrier && execution_scope == ScopeWorkgroup;
  12435. if (!implies_shared_barrier)
  12436. statement("memoryBarrierShared();");
  12437. }
  12438. else if (semantics != 0)
  12439. statement("groupMemoryBarrier();");
  12440. }
  12441. else if (memory == ScopeSubgroup)
  12442. {
  12443. const uint32_t all_barriers =
  12444. MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask;
  12445. if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
  12446. {
  12447. // These are not relevant for GLSL, but assume it means memoryBarrier().
  12448. // memoryBarrier() does everything, so no need to test anything else.
  12449. statement("subgroupMemoryBarrier();");
  12450. }
  12451. else if ((semantics & all_barriers) == all_barriers)
  12452. {
  12453. // Short-hand instead of emitting 3 barriers.
  12454. statement("subgroupMemoryBarrier();");
  12455. }
  12456. else
  12457. {
  12458. // Pick out individual barriers.
  12459. if (semantics & MemorySemanticsWorkgroupMemoryMask)
  12460. statement("subgroupMemoryBarrierShared();");
  12461. if (semantics & MemorySemanticsUniformMemoryMask)
  12462. statement("subgroupMemoryBarrierBuffer();");
  12463. if (semantics & MemorySemanticsImageMemoryMask)
  12464. statement("subgroupMemoryBarrierImage();");
  12465. }
  12466. }
  12467. else
  12468. {
  12469. const uint32_t all_barriers =
  12470. MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask;
  12471. if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
  12472. {
  12473. // These are not relevant for GLSL, but assume it means memoryBarrier().
  12474. // memoryBarrier() does everything, so no need to test anything else.
  12475. statement("memoryBarrier();");
  12476. }
  12477. else if ((semantics & all_barriers) == all_barriers)
  12478. {
  12479. // Short-hand instead of emitting 4 barriers.
  12480. statement("memoryBarrier();");
  12481. }
  12482. else
  12483. {
  12484. // Pick out individual barriers.
  12485. if (semantics & MemorySemanticsWorkgroupMemoryMask)
  12486. statement("memoryBarrierShared();");
  12487. if (semantics & MemorySemanticsUniformMemoryMask)
  12488. statement("memoryBarrierBuffer();");
  12489. if (semantics & MemorySemanticsImageMemoryMask)
  12490. statement("memoryBarrierImage();");
  12491. }
  12492. }
  12493. if (opcode == OpControlBarrier)
  12494. {
  12495. if (execution_scope == ScopeSubgroup)
  12496. statement("subgroupBarrier();");
  12497. else
  12498. statement("barrier();");
  12499. }
  12500. break;
  12501. }
  12502. case OpExtInst:
  12503. {
  12504. uint32_t extension_set = ops[2];
  12505. auto ext = get<SPIRExtension>(extension_set).ext;
  12506. if (ext == SPIRExtension::GLSL)
  12507. {
  12508. emit_glsl_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
  12509. }
  12510. else if (ext == SPIRExtension::SPV_AMD_shader_ballot)
  12511. {
  12512. emit_spv_amd_shader_ballot_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
  12513. }
  12514. else if (ext == SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter)
  12515. {
  12516. emit_spv_amd_shader_explicit_vertex_parameter_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
  12517. }
  12518. else if (ext == SPIRExtension::SPV_AMD_shader_trinary_minmax)
  12519. {
  12520. emit_spv_amd_shader_trinary_minmax_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
  12521. }
  12522. else if (ext == SPIRExtension::SPV_AMD_gcn_shader)
  12523. {
  12524. emit_spv_amd_gcn_shader_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
  12525. }
  12526. else if (ext == SPIRExtension::SPV_debug_info ||
  12527. ext == SPIRExtension::NonSemanticShaderDebugInfo ||
  12528. ext == SPIRExtension::NonSemanticGeneric)
  12529. {
  12530. break; // Ignore SPIR-V debug information extended instructions.
  12531. }
  12532. else if (ext == SPIRExtension::NonSemanticDebugPrintf)
  12533. {
  12534. // Operation 1 is printf.
  12535. if (ops[3] == 1)
  12536. {
  12537. if (!options.vulkan_semantics)
  12538. SPIRV_CROSS_THROW("Debug printf is only supported in Vulkan GLSL.\n");
  12539. require_extension_internal("GL_EXT_debug_printf");
  12540. auto &format_string = get<SPIRString>(ops[4]).str;
  12541. string expr = join("debugPrintfEXT(\"", format_string, "\"");
  12542. for (uint32_t i = 5; i < length; i++)
  12543. {
  12544. expr += ", ";
  12545. expr += to_expression(ops[i]);
  12546. }
  12547. statement(expr, ");");
  12548. }
  12549. }
  12550. else
  12551. {
  12552. statement("// unimplemented ext op ", instruction.op);
  12553. break;
  12554. }
  12555. break;
  12556. }
  12557. // Legacy sub-group stuff ...
  12558. case OpSubgroupBallotKHR:
  12559. {
  12560. uint32_t result_type = ops[0];
  12561. uint32_t id = ops[1];
  12562. string expr;
  12563. expr = join("uvec4(unpackUint2x32(ballotARB(" + to_expression(ops[2]) + ")), 0u, 0u)");
  12564. emit_op(result_type, id, expr, should_forward(ops[2]));
  12565. require_extension_internal("GL_ARB_shader_ballot");
  12566. inherit_expression_dependencies(id, ops[2]);
  12567. register_control_dependent_expression(ops[1]);
  12568. break;
  12569. }
  12570. case OpSubgroupFirstInvocationKHR:
  12571. {
  12572. uint32_t result_type = ops[0];
  12573. uint32_t id = ops[1];
  12574. emit_unary_func_op(result_type, id, ops[2], "readFirstInvocationARB");
  12575. require_extension_internal("GL_ARB_shader_ballot");
  12576. register_control_dependent_expression(ops[1]);
  12577. break;
  12578. }
  12579. case OpSubgroupReadInvocationKHR:
  12580. {
  12581. uint32_t result_type = ops[0];
  12582. uint32_t id = ops[1];
  12583. emit_binary_func_op(result_type, id, ops[2], ops[3], "readInvocationARB");
  12584. require_extension_internal("GL_ARB_shader_ballot");
  12585. register_control_dependent_expression(ops[1]);
  12586. break;
  12587. }
  12588. case OpSubgroupAllKHR:
  12589. {
  12590. uint32_t result_type = ops[0];
  12591. uint32_t id = ops[1];
  12592. emit_unary_func_op(result_type, id, ops[2], "allInvocationsARB");
  12593. require_extension_internal("GL_ARB_shader_group_vote");
  12594. register_control_dependent_expression(ops[1]);
  12595. break;
  12596. }
  12597. case OpSubgroupAnyKHR:
  12598. {
  12599. uint32_t result_type = ops[0];
  12600. uint32_t id = ops[1];
  12601. emit_unary_func_op(result_type, id, ops[2], "anyInvocationARB");
  12602. require_extension_internal("GL_ARB_shader_group_vote");
  12603. register_control_dependent_expression(ops[1]);
  12604. break;
  12605. }
  12606. case OpSubgroupAllEqualKHR:
  12607. {
  12608. uint32_t result_type = ops[0];
  12609. uint32_t id = ops[1];
  12610. emit_unary_func_op(result_type, id, ops[2], "allInvocationsEqualARB");
  12611. require_extension_internal("GL_ARB_shader_group_vote");
  12612. register_control_dependent_expression(ops[1]);
  12613. break;
  12614. }
  12615. case OpGroupIAddNonUniformAMD:
  12616. case OpGroupFAddNonUniformAMD:
  12617. {
  12618. uint32_t result_type = ops[0];
  12619. uint32_t id = ops[1];
  12620. emit_unary_func_op(result_type, id, ops[4], "addInvocationsNonUniformAMD");
  12621. require_extension_internal("GL_AMD_shader_ballot");
  12622. register_control_dependent_expression(ops[1]);
  12623. break;
  12624. }
  12625. case OpGroupFMinNonUniformAMD:
  12626. case OpGroupUMinNonUniformAMD:
  12627. case OpGroupSMinNonUniformAMD:
  12628. {
  12629. uint32_t result_type = ops[0];
  12630. uint32_t id = ops[1];
  12631. emit_unary_func_op(result_type, id, ops[4], "minInvocationsNonUniformAMD");
  12632. require_extension_internal("GL_AMD_shader_ballot");
  12633. register_control_dependent_expression(ops[1]);
  12634. break;
  12635. }
  12636. case OpGroupFMaxNonUniformAMD:
  12637. case OpGroupUMaxNonUniformAMD:
  12638. case OpGroupSMaxNonUniformAMD:
  12639. {
  12640. uint32_t result_type = ops[0];
  12641. uint32_t id = ops[1];
  12642. emit_unary_func_op(result_type, id, ops[4], "maxInvocationsNonUniformAMD");
  12643. require_extension_internal("GL_AMD_shader_ballot");
  12644. register_control_dependent_expression(ops[1]);
  12645. break;
  12646. }
  12647. case OpFragmentMaskFetchAMD:
  12648. {
  12649. auto &type = expression_type(ops[2]);
  12650. uint32_t result_type = ops[0];
  12651. uint32_t id = ops[1];
  12652. if (type.image.dim == spv::DimSubpassData)
  12653. {
  12654. emit_unary_func_op(result_type, id, ops[2], "fragmentMaskFetchAMD");
  12655. }
  12656. else
  12657. {
  12658. emit_binary_func_op(result_type, id, ops[2], ops[3], "fragmentMaskFetchAMD");
  12659. }
  12660. require_extension_internal("GL_AMD_shader_fragment_mask");
  12661. break;
  12662. }
  12663. case OpFragmentFetchAMD:
  12664. {
  12665. auto &type = expression_type(ops[2]);
  12666. uint32_t result_type = ops[0];
  12667. uint32_t id = ops[1];
  12668. if (type.image.dim == spv::DimSubpassData)
  12669. {
  12670. emit_binary_func_op(result_type, id, ops[2], ops[4], "fragmentFetchAMD");
  12671. }
  12672. else
  12673. {
  12674. emit_trinary_func_op(result_type, id, ops[2], ops[3], ops[4], "fragmentFetchAMD");
  12675. }
  12676. require_extension_internal("GL_AMD_shader_fragment_mask");
  12677. break;
  12678. }
  12679. // Vulkan 1.1 sub-group stuff ...
  12680. case OpGroupNonUniformElect:
  12681. case OpGroupNonUniformBroadcast:
  12682. case OpGroupNonUniformBroadcastFirst:
  12683. case OpGroupNonUniformBallot:
  12684. case OpGroupNonUniformInverseBallot:
  12685. case OpGroupNonUniformBallotBitExtract:
  12686. case OpGroupNonUniformBallotBitCount:
  12687. case OpGroupNonUniformBallotFindLSB:
  12688. case OpGroupNonUniformBallotFindMSB:
  12689. case OpGroupNonUniformShuffle:
  12690. case OpGroupNonUniformShuffleXor:
  12691. case OpGroupNonUniformShuffleUp:
  12692. case OpGroupNonUniformShuffleDown:
  12693. case OpGroupNonUniformAll:
  12694. case OpGroupNonUniformAny:
  12695. case OpGroupNonUniformAllEqual:
  12696. case OpGroupNonUniformFAdd:
  12697. case OpGroupNonUniformIAdd:
  12698. case OpGroupNonUniformFMul:
  12699. case OpGroupNonUniformIMul:
  12700. case OpGroupNonUniformFMin:
  12701. case OpGroupNonUniformFMax:
  12702. case OpGroupNonUniformSMin:
  12703. case OpGroupNonUniformSMax:
  12704. case OpGroupNonUniformUMin:
  12705. case OpGroupNonUniformUMax:
  12706. case OpGroupNonUniformBitwiseAnd:
  12707. case OpGroupNonUniformBitwiseOr:
  12708. case OpGroupNonUniformBitwiseXor:
  12709. case OpGroupNonUniformLogicalAnd:
  12710. case OpGroupNonUniformLogicalOr:
  12711. case OpGroupNonUniformLogicalXor:
  12712. case OpGroupNonUniformQuadSwap:
  12713. case OpGroupNonUniformQuadBroadcast:
  12714. emit_subgroup_op(instruction);
  12715. break;
  12716. case OpFUnordEqual:
  12717. case OpFUnordLessThan:
  12718. case OpFUnordGreaterThan:
  12719. case OpFUnordLessThanEqual:
  12720. case OpFUnordGreaterThanEqual:
  12721. {
  12722. // GLSL doesn't specify if floating point comparisons are ordered or unordered,
  12723. // but glslang always emits ordered floating point compares for GLSL.
  12724. // To get unordered compares, we can test the opposite thing and invert the result.
  12725. // This way, we force true when there is any NaN present.
  12726. uint32_t op0 = ops[2];
  12727. uint32_t op1 = ops[3];
  12728. string expr;
  12729. if (expression_type(op0).vecsize > 1)
  12730. {
  12731. const char *comp_op = nullptr;
  12732. switch (opcode)
  12733. {
  12734. case OpFUnordEqual:
  12735. comp_op = "notEqual";
  12736. break;
  12737. case OpFUnordLessThan:
  12738. comp_op = "greaterThanEqual";
  12739. break;
  12740. case OpFUnordLessThanEqual:
  12741. comp_op = "greaterThan";
  12742. break;
  12743. case OpFUnordGreaterThan:
  12744. comp_op = "lessThanEqual";
  12745. break;
  12746. case OpFUnordGreaterThanEqual:
  12747. comp_op = "lessThan";
  12748. break;
  12749. default:
  12750. assert(0);
  12751. break;
  12752. }
  12753. expr = join("not(", comp_op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), "))");
  12754. }
  12755. else
  12756. {
  12757. const char *comp_op = nullptr;
  12758. switch (opcode)
  12759. {
  12760. case OpFUnordEqual:
  12761. comp_op = " != ";
  12762. break;
  12763. case OpFUnordLessThan:
  12764. comp_op = " >= ";
  12765. break;
  12766. case OpFUnordLessThanEqual:
  12767. comp_op = " > ";
  12768. break;
  12769. case OpFUnordGreaterThan:
  12770. comp_op = " <= ";
  12771. break;
  12772. case OpFUnordGreaterThanEqual:
  12773. comp_op = " < ";
  12774. break;
  12775. default:
  12776. assert(0);
  12777. break;
  12778. }
  12779. expr = join("!(", to_enclosed_unpacked_expression(op0), comp_op, to_enclosed_unpacked_expression(op1), ")");
  12780. }
  12781. emit_op(ops[0], ops[1], expr, should_forward(op0) && should_forward(op1));
  12782. inherit_expression_dependencies(ops[1], op0);
  12783. inherit_expression_dependencies(ops[1], op1);
  12784. break;
  12785. }
  12786. case OpReportIntersectionKHR:
  12787. // NV is same opcode.
  12788. forced_temporaries.insert(ops[1]);
  12789. if (ray_tracing_is_khr)
  12790. GLSL_BFOP(reportIntersectionEXT);
  12791. else
  12792. GLSL_BFOP(reportIntersectionNV);
  12793. flush_control_dependent_expressions(current_emitting_block->self);
  12794. break;
  12795. case OpIgnoreIntersectionNV:
  12796. // KHR variant is a terminator.
  12797. statement("ignoreIntersectionNV();");
  12798. flush_control_dependent_expressions(current_emitting_block->self);
  12799. break;
  12800. case OpTerminateRayNV:
  12801. // KHR variant is a terminator.
  12802. statement("terminateRayNV();");
  12803. flush_control_dependent_expressions(current_emitting_block->self);
  12804. break;
  12805. case OpTraceNV:
  12806. statement("traceNV(", to_non_uniform_aware_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ",
  12807. to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ",
  12808. to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ",
  12809. to_expression(ops[9]), ", ", to_expression(ops[10]), ");");
  12810. flush_control_dependent_expressions(current_emitting_block->self);
  12811. break;
  12812. case OpTraceRayKHR:
  12813. if (!has_decoration(ops[10], DecorationLocation))
  12814. SPIRV_CROSS_THROW("A memory declaration object must be used in TraceRayKHR.");
  12815. statement("traceRayEXT(", to_non_uniform_aware_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ",
  12816. to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ",
  12817. to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ",
  12818. to_expression(ops[9]), ", ", get_decoration(ops[10], DecorationLocation), ");");
  12819. flush_control_dependent_expressions(current_emitting_block->self);
  12820. break;
  12821. case OpExecuteCallableNV:
  12822. statement("executeCallableNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");");
  12823. flush_control_dependent_expressions(current_emitting_block->self);
  12824. break;
  12825. case OpExecuteCallableKHR:
  12826. if (!has_decoration(ops[1], DecorationLocation))
  12827. SPIRV_CROSS_THROW("A memory declaration object must be used in ExecuteCallableKHR.");
  12828. statement("executeCallableEXT(", to_expression(ops[0]), ", ", get_decoration(ops[1], DecorationLocation), ");");
  12829. flush_control_dependent_expressions(current_emitting_block->self);
  12830. break;
  12831. // Don't bother forwarding temporaries. Avoids having to test expression invalidation with ray query objects.
  12832. case OpRayQueryInitializeKHR:
  12833. flush_variable_declaration(ops[0]);
  12834. statement("rayQueryInitializeEXT(",
  12835. to_expression(ops[0]), ", ", to_expression(ops[1]), ", ",
  12836. to_expression(ops[2]), ", ", to_expression(ops[3]), ", ",
  12837. to_expression(ops[4]), ", ", to_expression(ops[5]), ", ",
  12838. to_expression(ops[6]), ", ", to_expression(ops[7]), ");");
  12839. break;
  12840. case OpRayQueryProceedKHR:
  12841. flush_variable_declaration(ops[0]);
  12842. emit_op(ops[0], ops[1], join("rayQueryProceedEXT(", to_expression(ops[2]), ")"), false);
  12843. break;
  12844. case OpRayQueryTerminateKHR:
  12845. flush_variable_declaration(ops[0]);
  12846. statement("rayQueryTerminateEXT(", to_expression(ops[0]), ");");
  12847. break;
  12848. case OpRayQueryGenerateIntersectionKHR:
  12849. flush_variable_declaration(ops[0]);
  12850. statement("rayQueryGenerateIntersectionEXT(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");");
  12851. break;
  12852. case OpRayQueryConfirmIntersectionKHR:
  12853. flush_variable_declaration(ops[0]);
  12854. statement("rayQueryConfirmIntersectionEXT(", to_expression(ops[0]), ");");
  12855. break;
  12856. #define GLSL_RAY_QUERY_GET_OP(op) \
  12857. case OpRayQueryGet##op##KHR: \
  12858. flush_variable_declaration(ops[2]); \
  12859. emit_op(ops[0], ops[1], join("rayQueryGet" #op "EXT(", to_expression(ops[2]), ")"), false); \
  12860. break
  12861. #define GLSL_RAY_QUERY_GET_OP2(op) \
  12862. case OpRayQueryGet##op##KHR: \
  12863. flush_variable_declaration(ops[2]); \
  12864. emit_op(ops[0], ops[1], join("rayQueryGet" #op "EXT(", to_expression(ops[2]), ", ", "bool(", to_expression(ops[3]), "))"), false); \
  12865. break
  12866. GLSL_RAY_QUERY_GET_OP(RayTMin);
  12867. GLSL_RAY_QUERY_GET_OP(RayFlags);
  12868. GLSL_RAY_QUERY_GET_OP(WorldRayOrigin);
  12869. GLSL_RAY_QUERY_GET_OP(WorldRayDirection);
  12870. GLSL_RAY_QUERY_GET_OP(IntersectionCandidateAABBOpaque);
  12871. GLSL_RAY_QUERY_GET_OP2(IntersectionType);
  12872. GLSL_RAY_QUERY_GET_OP2(IntersectionT);
  12873. GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceCustomIndex);
  12874. GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceId);
  12875. GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceShaderBindingTableRecordOffset);
  12876. GLSL_RAY_QUERY_GET_OP2(IntersectionGeometryIndex);
  12877. GLSL_RAY_QUERY_GET_OP2(IntersectionPrimitiveIndex);
  12878. GLSL_RAY_QUERY_GET_OP2(IntersectionBarycentrics);
  12879. GLSL_RAY_QUERY_GET_OP2(IntersectionFrontFace);
  12880. GLSL_RAY_QUERY_GET_OP2(IntersectionObjectRayDirection);
  12881. GLSL_RAY_QUERY_GET_OP2(IntersectionObjectRayOrigin);
  12882. GLSL_RAY_QUERY_GET_OP2(IntersectionObjectToWorld);
  12883. GLSL_RAY_QUERY_GET_OP2(IntersectionWorldToObject);
  12884. #undef GLSL_RAY_QUERY_GET_OP
  12885. #undef GLSL_RAY_QUERY_GET_OP2
  12886. case OpConvertUToAccelerationStructureKHR:
  12887. {
  12888. require_extension_internal("GL_EXT_ray_tracing");
  12889. bool elide_temporary = should_forward(ops[2]) && forced_temporaries.count(ops[1]) == 0 &&
  12890. !hoisted_temporaries.count(ops[1]);
  12891. if (elide_temporary)
  12892. {
  12893. GLSL_UFOP(accelerationStructureEXT);
  12894. }
  12895. else
  12896. {
  12897. // Force this path in subsequent iterations.
  12898. forced_temporaries.insert(ops[1]);
  12899. // We cannot declare a temporary acceleration structure in GLSL.
  12900. // If we get to this point, we'll have to emit a temporary uvec2,
  12901. // and cast to RTAS on demand.
  12902. statement(declare_temporary(expression_type_id(ops[2]), ops[1]), to_unpacked_expression(ops[2]), ";");
  12903. // Use raw SPIRExpression interface to block all usage tracking.
  12904. set<SPIRExpression>(ops[1], join("accelerationStructureEXT(", to_name(ops[1]), ")"), ops[0], true);
  12905. }
  12906. break;
  12907. }
  12908. case OpConvertUToPtr:
  12909. {
  12910. auto &type = get<SPIRType>(ops[0]);
  12911. if (type.storage != StorageClassPhysicalStorageBufferEXT)
  12912. SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertUToPtr.");
  12913. auto &in_type = expression_type(ops[2]);
  12914. if (in_type.vecsize == 2)
  12915. require_extension_internal("GL_EXT_buffer_reference_uvec2");
  12916. auto op = type_to_glsl(type);
  12917. emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str());
  12918. break;
  12919. }
  12920. case OpConvertPtrToU:
  12921. {
  12922. auto &type = get<SPIRType>(ops[0]);
  12923. auto &ptr_type = expression_type(ops[2]);
  12924. if (ptr_type.storage != StorageClassPhysicalStorageBufferEXT)
  12925. SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertPtrToU.");
  12926. if (type.vecsize == 2)
  12927. require_extension_internal("GL_EXT_buffer_reference_uvec2");
  12928. auto op = type_to_glsl(type);
  12929. emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str());
  12930. break;
  12931. }
  12932. case OpUndef:
  12933. // Undefined value has been declared.
  12934. break;
  12935. case OpLine:
  12936. {
  12937. emit_line_directive(ops[0], ops[1]);
  12938. break;
  12939. }
  12940. case OpNoLine:
  12941. break;
  12942. case OpDemoteToHelperInvocationEXT:
  12943. if (!options.vulkan_semantics)
  12944. SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL.");
  12945. require_extension_internal("GL_EXT_demote_to_helper_invocation");
  12946. statement(backend.demote_literal, ";");
  12947. break;
  12948. case OpIsHelperInvocationEXT:
  12949. if (!options.vulkan_semantics)
  12950. SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL.");
  12951. require_extension_internal("GL_EXT_demote_to_helper_invocation");
  12952. // Helper lane state with demote is volatile by nature.
  12953. // Do not forward this.
  12954. emit_op(ops[0], ops[1], "helperInvocationEXT()", false);
  12955. break;
  12956. case OpBeginInvocationInterlockEXT:
  12957. // If the interlock is complex, we emit this elsewhere.
  12958. if (!interlocked_is_complex)
  12959. {
  12960. statement("SPIRV_Cross_beginInvocationInterlock();");
  12961. flush_all_active_variables();
  12962. // Make sure forwarding doesn't propagate outside interlock region.
  12963. }
  12964. break;
  12965. case OpEndInvocationInterlockEXT:
  12966. // If the interlock is complex, we emit this elsewhere.
  12967. if (!interlocked_is_complex)
  12968. {
  12969. statement("SPIRV_Cross_endInvocationInterlock();");
  12970. flush_all_active_variables();
  12971. // Make sure forwarding doesn't propagate outside interlock region.
  12972. }
  12973. break;
  12974. case OpSetMeshOutputsEXT:
  12975. statement("SetMeshOutputsEXT(", to_unpacked_expression(ops[0]), ", ", to_unpacked_expression(ops[1]), ");");
  12976. break;
  12977. case OpReadClockKHR:
  12978. {
  12979. auto &type = get<SPIRType>(ops[0]);
  12980. auto scope = static_cast<Scope>(evaluate_constant_u32(ops[2]));
  12981. const char *op = nullptr;
  12982. // Forwarding clock statements leads to a scenario where an SSA value can take on different
  12983. // values every time it's evaluated. Block any forwarding attempt.
  12984. // We also might want to invalidate all expressions to function as a sort of optimization
  12985. // barrier, but might be overkill for now.
  12986. if (scope == ScopeDevice)
  12987. {
  12988. require_extension_internal("GL_EXT_shader_realtime_clock");
  12989. if (type.basetype == SPIRType::BaseType::UInt64)
  12990. op = "clockRealtimeEXT()";
  12991. else if (type.basetype == SPIRType::BaseType::UInt && type.vecsize == 2)
  12992. op = "clockRealtime2x32EXT()";
  12993. else
  12994. SPIRV_CROSS_THROW("Unsupported result type for OpReadClockKHR opcode.");
  12995. }
  12996. else if (scope == ScopeSubgroup)
  12997. {
  12998. require_extension_internal("GL_ARB_shader_clock");
  12999. if (type.basetype == SPIRType::BaseType::UInt64)
  13000. op = "clockARB()";
  13001. else if (type.basetype == SPIRType::BaseType::UInt && type.vecsize == 2)
  13002. op = "clock2x32ARB()";
  13003. else
  13004. SPIRV_CROSS_THROW("Unsupported result type for OpReadClockKHR opcode.");
  13005. }
  13006. else
  13007. SPIRV_CROSS_THROW("Unsupported scope for OpReadClockKHR opcode.");
  13008. emit_op(ops[0], ops[1], op, false);
  13009. break;
  13010. }
  13011. default:
  13012. statement("// unimplemented op ", instruction.op);
  13013. break;
  13014. }
  13015. }
  13016. // Appends function arguments, mapped from global variables, beyond the specified arg index.
  13017. // This is used when a function call uses fewer arguments than the function defines.
  13018. // This situation may occur if the function signature has been dynamically modified to
  13019. // extract global variables referenced from within the function, and convert them to
  13020. // function arguments. This is necessary for shader languages that do not support global
  13021. // access to shader input content from within a function (eg. Metal). Each additional
  13022. // function args uses the name of the global variable. Function nesting will modify the
  13023. // functions and function calls all the way up the nesting chain.
  13024. void CompilerGLSL::append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector<string> &arglist)
  13025. {
  13026. auto &args = func.arguments;
  13027. uint32_t arg_cnt = uint32_t(args.size());
  13028. for (uint32_t arg_idx = index; arg_idx < arg_cnt; arg_idx++)
  13029. {
  13030. auto &arg = args[arg_idx];
  13031. assert(arg.alias_global_variable);
  13032. // If the underlying variable needs to be declared
  13033. // (ie. a local variable with deferred declaration), do so now.
  13034. uint32_t var_id = get<SPIRVariable>(arg.id).basevariable;
  13035. if (var_id)
  13036. flush_variable_declaration(var_id);
  13037. arglist.push_back(to_func_call_arg(arg, arg.id));
  13038. }
  13039. }
  13040. string CompilerGLSL::to_member_name(const SPIRType &type, uint32_t index)
  13041. {
  13042. if (type.type_alias != TypeID(0) &&
  13043. !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
  13044. {
  13045. return to_member_name(get<SPIRType>(type.type_alias), index);
  13046. }
  13047. auto &memb = ir.meta[type.self].members;
  13048. if (index < memb.size() && !memb[index].alias.empty())
  13049. return memb[index].alias;
  13050. else
  13051. return join("_m", index);
  13052. }
  13053. string CompilerGLSL::to_member_reference(uint32_t, const SPIRType &type, uint32_t index, bool)
  13054. {
  13055. return join(".", to_member_name(type, index));
  13056. }
  13057. string CompilerGLSL::to_multi_member_reference(const SPIRType &type, const SmallVector<uint32_t> &indices)
  13058. {
  13059. string ret;
  13060. auto *member_type = &type;
  13061. for (auto &index : indices)
  13062. {
  13063. ret += join(".", to_member_name(*member_type, index));
  13064. member_type = &get<SPIRType>(member_type->member_types[index]);
  13065. }
  13066. return ret;
  13067. }
  13068. void CompilerGLSL::add_member_name(SPIRType &type, uint32_t index)
  13069. {
  13070. auto &memb = ir.meta[type.self].members;
  13071. if (index < memb.size() && !memb[index].alias.empty())
  13072. {
  13073. auto &name = memb[index].alias;
  13074. if (name.empty())
  13075. return;
  13076. ParsedIR::sanitize_identifier(name, true, true);
  13077. update_name_cache(type.member_name_cache, name);
  13078. }
  13079. }
  13080. // Checks whether the ID is a row_major matrix that requires conversion before use
  13081. bool CompilerGLSL::is_non_native_row_major_matrix(uint32_t id)
  13082. {
  13083. // Natively supported row-major matrices do not need to be converted.
  13084. // Legacy targets do not support row major.
  13085. if (backend.native_row_major_matrix && !is_legacy())
  13086. return false;
  13087. auto *e = maybe_get<SPIRExpression>(id);
  13088. if (e)
  13089. return e->need_transpose;
  13090. else
  13091. return has_decoration(id, DecorationRowMajor);
  13092. }
  13093. // Checks whether the member is a row_major matrix that requires conversion before use
  13094. bool CompilerGLSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index)
  13095. {
  13096. // Natively supported row-major matrices do not need to be converted.
  13097. if (backend.native_row_major_matrix && !is_legacy())
  13098. return false;
  13099. // Non-matrix or column-major matrix types do not need to be converted.
  13100. if (!has_member_decoration(type.self, index, DecorationRowMajor))
  13101. return false;
  13102. // Only square row-major matrices can be converted at this time.
  13103. // Converting non-square matrices will require defining custom GLSL function that
  13104. // swaps matrix elements while retaining the original dimensional form of the matrix.
  13105. const auto mbr_type = get<SPIRType>(type.member_types[index]);
  13106. if (mbr_type.columns != mbr_type.vecsize)
  13107. SPIRV_CROSS_THROW("Row-major matrices must be square on this platform.");
  13108. return true;
  13109. }
  13110. // Checks if we need to remap physical type IDs when declaring the type in a buffer.
  13111. bool CompilerGLSL::member_is_remapped_physical_type(const SPIRType &type, uint32_t index) const
  13112. {
  13113. return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID);
  13114. }
  13115. // Checks whether the member is in packed data type, that might need to be unpacked.
  13116. bool CompilerGLSL::member_is_packed_physical_type(const SPIRType &type, uint32_t index) const
  13117. {
  13118. return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypePacked);
  13119. }
  13120. // Wraps the expression string in a function call that converts the
  13121. // row_major matrix result of the expression to a column_major matrix.
  13122. // Base implementation uses the standard library transpose() function.
  13123. // Subclasses may override to use a different function.
  13124. string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, uint32_t /* physical_type_id */,
  13125. bool /*is_packed*/, bool relaxed)
  13126. {
  13127. strip_enclosed_expression(exp_str);
  13128. if (!is_matrix(exp_type))
  13129. {
  13130. auto column_index = exp_str.find_last_of('[');
  13131. if (column_index == string::npos)
  13132. return exp_str;
  13133. auto column_expr = exp_str.substr(column_index);
  13134. exp_str.resize(column_index);
  13135. auto transposed_expr = type_to_glsl_constructor(exp_type) + "(";
  13136. // Loading a column from a row-major matrix. Unroll the load.
  13137. for (uint32_t c = 0; c < exp_type.vecsize; c++)
  13138. {
  13139. transposed_expr += join(exp_str, '[', c, ']', column_expr);
  13140. if (c + 1 < exp_type.vecsize)
  13141. transposed_expr += ", ";
  13142. }
  13143. transposed_expr += ")";
  13144. return transposed_expr;
  13145. }
  13146. else if (options.version < 120)
  13147. {
  13148. // GLSL 110, ES 100 do not have transpose(), so emulate it. Note that
  13149. // these GLSL versions do not support non-square matrices.
  13150. if (exp_type.vecsize == 2 && exp_type.columns == 2)
  13151. require_polyfill(PolyfillTranspose2x2, relaxed);
  13152. else if (exp_type.vecsize == 3 && exp_type.columns == 3)
  13153. require_polyfill(PolyfillTranspose3x3, relaxed);
  13154. else if (exp_type.vecsize == 4 && exp_type.columns == 4)
  13155. require_polyfill(PolyfillTranspose4x4, relaxed);
  13156. else
  13157. SPIRV_CROSS_THROW("Non-square matrices are not supported in legacy GLSL, cannot transpose.");
  13158. return join("spvTranspose", (options.es && relaxed) ? "MP" : "", "(", exp_str, ")");
  13159. }
  13160. else
  13161. return join("transpose(", exp_str, ")");
  13162. }
  13163. string CompilerGLSL::variable_decl(const SPIRType &type, const string &name, uint32_t id)
  13164. {
  13165. string type_name = type_to_glsl(type, id);
  13166. remap_variable_type_name(type, name, type_name);
  13167. return join(type_name, " ", name, type_to_array_glsl(type));
  13168. }
  13169. bool CompilerGLSL::variable_decl_is_remapped_storage(const SPIRVariable &var, StorageClass storage) const
  13170. {
  13171. return var.storage == storage;
  13172. }
  13173. // Emit a structure member. Subclasses may override to modify output,
  13174. // or to dynamically add a padding member if needed.
  13175. void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
  13176. const string &qualifier, uint32_t)
  13177. {
  13178. auto &membertype = get<SPIRType>(member_type_id);
  13179. Bitset memberflags;
  13180. auto &memb = ir.meta[type.self].members;
  13181. if (index < memb.size())
  13182. memberflags = memb[index].decoration_flags;
  13183. string qualifiers;
  13184. bool is_block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
  13185. ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
  13186. if (is_block)
  13187. qualifiers = to_interpolation_qualifiers(memberflags);
  13188. statement(layout_for_member(type, index), qualifiers, qualifier, flags_to_qualifiers_glsl(membertype, memberflags),
  13189. variable_decl(membertype, to_member_name(type, index)), ";");
  13190. }
  13191. void CompilerGLSL::emit_struct_padding_target(const SPIRType &)
  13192. {
  13193. }
  13194. string CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags)
  13195. {
  13196. // GL_EXT_buffer_reference variables can be marked as restrict.
  13197. if (flags.get(DecorationRestrictPointerEXT))
  13198. return "restrict ";
  13199. string qual;
  13200. if (type_is_floating_point(type) && flags.get(DecorationNoContraction) && backend.support_precise_qualifier)
  13201. qual = "precise ";
  13202. // Structs do not have precision qualifiers, neither do doubles (desktop only anyways, so no mediump/highp).
  13203. bool type_supports_precision =
  13204. type.basetype == SPIRType::Float || type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt ||
  13205. type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage ||
  13206. type.basetype == SPIRType::Sampler;
  13207. if (!type_supports_precision)
  13208. return qual;
  13209. if (options.es)
  13210. {
  13211. auto &execution = get_entry_point();
  13212. if (flags.get(DecorationRelaxedPrecision))
  13213. {
  13214. bool implied_fmediump = type.basetype == SPIRType::Float &&
  13215. options.fragment.default_float_precision == Options::Mediump &&
  13216. execution.model == ExecutionModelFragment;
  13217. bool implied_imediump = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
  13218. options.fragment.default_int_precision == Options::Mediump &&
  13219. execution.model == ExecutionModelFragment;
  13220. qual += (implied_fmediump || implied_imediump) ? "" : "mediump ";
  13221. }
  13222. else
  13223. {
  13224. bool implied_fhighp =
  13225. type.basetype == SPIRType::Float && ((options.fragment.default_float_precision == Options::Highp &&
  13226. execution.model == ExecutionModelFragment) ||
  13227. (execution.model != ExecutionModelFragment));
  13228. bool implied_ihighp = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
  13229. ((options.fragment.default_int_precision == Options::Highp &&
  13230. execution.model == ExecutionModelFragment) ||
  13231. (execution.model != ExecutionModelFragment));
  13232. qual += (implied_fhighp || implied_ihighp) ? "" : "highp ";
  13233. }
  13234. }
  13235. else if (backend.allow_precision_qualifiers)
  13236. {
  13237. // Vulkan GLSL supports precision qualifiers, even in desktop profiles, which is convenient.
  13238. // The default is highp however, so only emit mediump in the rare case that a shader has these.
  13239. if (flags.get(DecorationRelaxedPrecision))
  13240. qual += "mediump ";
  13241. }
  13242. return qual;
  13243. }
  13244. string CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id)
  13245. {
  13246. auto &type = expression_type(id);
  13247. bool use_precision_qualifiers = backend.allow_precision_qualifiers;
  13248. if (use_precision_qualifiers && (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage))
  13249. {
  13250. // Force mediump for the sampler type. We cannot declare 16-bit or smaller image types.
  13251. auto &result_type = get<SPIRType>(type.image.type);
  13252. if (result_type.width < 32)
  13253. return "mediump ";
  13254. }
  13255. return flags_to_qualifiers_glsl(type, ir.meta[id].decoration.decoration_flags);
  13256. }
  13257. void CompilerGLSL::fixup_io_block_patch_primitive_qualifiers(const SPIRVariable &var)
  13258. {
  13259. // Works around weird behavior in glslangValidator where
  13260. // a patch out block is translated to just block members getting the decoration.
  13261. // To make glslang not complain when we compile again, we have to transform this back to a case where
  13262. // the variable itself has Patch decoration, and not members.
  13263. // Same for perprimitiveEXT.
  13264. auto &type = get<SPIRType>(var.basetype);
  13265. if (has_decoration(type.self, DecorationBlock))
  13266. {
  13267. uint32_t member_count = uint32_t(type.member_types.size());
  13268. Decoration promoted_decoration = {};
  13269. bool do_promote_decoration = false;
  13270. for (uint32_t i = 0; i < member_count; i++)
  13271. {
  13272. if (has_member_decoration(type.self, i, DecorationPatch))
  13273. {
  13274. promoted_decoration = DecorationPatch;
  13275. do_promote_decoration = true;
  13276. break;
  13277. }
  13278. else if (has_member_decoration(type.self, i, DecorationPerPrimitiveEXT))
  13279. {
  13280. promoted_decoration = DecorationPerPrimitiveEXT;
  13281. do_promote_decoration = true;
  13282. break;
  13283. }
  13284. }
  13285. if (do_promote_decoration)
  13286. {
  13287. set_decoration(var.self, promoted_decoration);
  13288. for (uint32_t i = 0; i < member_count; i++)
  13289. unset_member_decoration(type.self, i, promoted_decoration);
  13290. }
  13291. }
  13292. }
  13293. string CompilerGLSL::to_qualifiers_glsl(uint32_t id)
  13294. {
  13295. auto &flags = get_decoration_bitset(id);
  13296. string res;
  13297. auto *var = maybe_get<SPIRVariable>(id);
  13298. if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied)
  13299. res += "shared ";
  13300. else if (var && var->storage == StorageClassTaskPayloadWorkgroupEXT && !backend.shared_is_implied)
  13301. res += "taskPayloadSharedEXT ";
  13302. res += to_interpolation_qualifiers(flags);
  13303. if (var)
  13304. res += to_storage_qualifiers_glsl(*var);
  13305. auto &type = expression_type(id);
  13306. if (type.image.dim != DimSubpassData && type.image.sampled == 2)
  13307. {
  13308. if (flags.get(DecorationCoherent))
  13309. res += "coherent ";
  13310. if (flags.get(DecorationRestrict))
  13311. res += "restrict ";
  13312. if (flags.get(DecorationNonWritable))
  13313. res += "readonly ";
  13314. bool formatted_load = type.image.format == ImageFormatUnknown;
  13315. if (flags.get(DecorationNonReadable))
  13316. {
  13317. res += "writeonly ";
  13318. formatted_load = false;
  13319. }
  13320. if (formatted_load)
  13321. {
  13322. if (!options.es)
  13323. require_extension_internal("GL_EXT_shader_image_load_formatted");
  13324. else
  13325. SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_image_load_formatted in ESSL.");
  13326. }
  13327. }
  13328. res += to_precision_qualifiers_glsl(id);
  13329. return res;
  13330. }
  13331. string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg)
  13332. {
  13333. // glslangValidator seems to make all arguments pointer no matter what which is rather bizarre ...
  13334. auto &type = expression_type(arg.id);
  13335. const char *direction = "";
  13336. if (type.pointer)
  13337. {
  13338. if (arg.write_count && arg.read_count)
  13339. direction = "inout ";
  13340. else if (arg.write_count)
  13341. direction = "out ";
  13342. }
  13343. return join(direction, to_qualifiers_glsl(arg.id), variable_decl(type, to_name(arg.id), arg.id));
  13344. }
  13345. string CompilerGLSL::to_initializer_expression(const SPIRVariable &var)
  13346. {
  13347. return to_unpacked_expression(var.initializer);
  13348. }
  13349. string CompilerGLSL::to_zero_initialized_expression(uint32_t type_id)
  13350. {
  13351. #ifndef NDEBUG
  13352. auto &type = get<SPIRType>(type_id);
  13353. assert(type.storage == StorageClassPrivate || type.storage == StorageClassFunction ||
  13354. type.storage == StorageClassGeneric);
  13355. #endif
  13356. uint32_t id = ir.increase_bound_by(1);
  13357. ir.make_constant_null(id, type_id, false);
  13358. return constant_expression(get<SPIRConstant>(id));
  13359. }
  13360. bool CompilerGLSL::type_can_zero_initialize(const SPIRType &type) const
  13361. {
  13362. if (type.pointer)
  13363. return false;
  13364. if (!type.array.empty() && options.flatten_multidimensional_arrays)
  13365. return false;
  13366. for (auto &literal : type.array_size_literal)
  13367. if (!literal)
  13368. return false;
  13369. for (auto &memb : type.member_types)
  13370. if (!type_can_zero_initialize(get<SPIRType>(memb)))
  13371. return false;
  13372. return true;
  13373. }
  13374. string CompilerGLSL::variable_decl(const SPIRVariable &variable)
  13375. {
  13376. // Ignore the pointer type since GLSL doesn't have pointers.
  13377. auto &type = get_variable_data_type(variable);
  13378. if (type.pointer_depth > 1 && !backend.support_pointer_to_pointer)
  13379. SPIRV_CROSS_THROW("Cannot declare pointer-to-pointer types.");
  13380. auto res = join(to_qualifiers_glsl(variable.self), variable_decl(type, to_name(variable.self), variable.self));
  13381. if (variable.loop_variable && variable.static_expression)
  13382. {
  13383. uint32_t expr = variable.static_expression;
  13384. if (ir.ids[expr].get_type() != TypeUndef)
  13385. res += join(" = ", to_unpacked_expression(variable.static_expression));
  13386. else if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
  13387. res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable)));
  13388. }
  13389. else if (variable.initializer && !variable_decl_is_remapped_storage(variable, StorageClassWorkgroup))
  13390. {
  13391. uint32_t expr = variable.initializer;
  13392. if (ir.ids[expr].get_type() != TypeUndef)
  13393. res += join(" = ", to_initializer_expression(variable));
  13394. else if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
  13395. res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable)));
  13396. }
  13397. return res;
  13398. }
  13399. const char *CompilerGLSL::to_pls_qualifiers_glsl(const SPIRVariable &variable)
  13400. {
  13401. auto &flags = get_decoration_bitset(variable.self);
  13402. if (flags.get(DecorationRelaxedPrecision))
  13403. return "mediump ";
  13404. else
  13405. return "highp ";
  13406. }
  13407. string CompilerGLSL::pls_decl(const PlsRemap &var)
  13408. {
  13409. auto &variable = get<SPIRVariable>(var.id);
  13410. SPIRType type;
  13411. type.vecsize = pls_format_to_components(var.format);
  13412. type.basetype = pls_format_to_basetype(var.format);
  13413. return join(to_pls_layout(var.format), to_pls_qualifiers_glsl(variable), type_to_glsl(type), " ",
  13414. to_name(variable.self));
  13415. }
  13416. uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type) const
  13417. {
  13418. return to_array_size_literal(type, uint32_t(type.array.size() - 1));
  13419. }
  13420. uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type, uint32_t index) const
  13421. {
  13422. assert(type.array.size() == type.array_size_literal.size());
  13423. if (type.array_size_literal[index])
  13424. {
  13425. return type.array[index];
  13426. }
  13427. else
  13428. {
  13429. // Use the default spec constant value.
  13430. // This is the best we can do.
  13431. return evaluate_constant_u32(type.array[index]);
  13432. }
  13433. }
  13434. string CompilerGLSL::to_array_size(const SPIRType &type, uint32_t index)
  13435. {
  13436. assert(type.array.size() == type.array_size_literal.size());
  13437. auto &size = type.array[index];
  13438. if (!type.array_size_literal[index])
  13439. return to_expression(size);
  13440. else if (size)
  13441. return convert_to_string(size);
  13442. else if (!backend.unsized_array_supported)
  13443. {
  13444. // For runtime-sized arrays, we can work around
  13445. // lack of standard support for this by simply having
  13446. // a single element array.
  13447. //
  13448. // Runtime length arrays must always be the last element
  13449. // in an interface block.
  13450. return "1";
  13451. }
  13452. else
  13453. return "";
  13454. }
  13455. string CompilerGLSL::type_to_array_glsl(const SPIRType &type)
  13456. {
  13457. if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct)
  13458. {
  13459. // We are using a wrapped pointer type, and we should not emit any array declarations here.
  13460. return "";
  13461. }
  13462. if (type.array.empty())
  13463. return "";
  13464. if (options.flatten_multidimensional_arrays)
  13465. {
  13466. string res;
  13467. res += "[";
  13468. for (auto i = uint32_t(type.array.size()); i; i--)
  13469. {
  13470. res += enclose_expression(to_array_size(type, i - 1));
  13471. if (i > 1)
  13472. res += " * ";
  13473. }
  13474. res += "]";
  13475. return res;
  13476. }
  13477. else
  13478. {
  13479. if (type.array.size() > 1)
  13480. {
  13481. if (!options.es && options.version < 430)
  13482. require_extension_internal("GL_ARB_arrays_of_arrays");
  13483. else if (options.es && options.version < 310)
  13484. SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310. "
  13485. "Try using --flatten-multidimensional-arrays or set "
  13486. "options.flatten_multidimensional_arrays to true.");
  13487. }
  13488. string res;
  13489. for (auto i = uint32_t(type.array.size()); i; i--)
  13490. {
  13491. res += "[";
  13492. res += to_array_size(type, i - 1);
  13493. res += "]";
  13494. }
  13495. return res;
  13496. }
  13497. }
  13498. string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id)
  13499. {
  13500. auto &imagetype = get<SPIRType>(type.image.type);
  13501. string res;
  13502. switch (imagetype.basetype)
  13503. {
  13504. case SPIRType::Int64:
  13505. res = "i64";
  13506. require_extension_internal("GL_EXT_shader_image_int64");
  13507. break;
  13508. case SPIRType::UInt64:
  13509. res = "u64";
  13510. require_extension_internal("GL_EXT_shader_image_int64");
  13511. break;
  13512. case SPIRType::Int:
  13513. case SPIRType::Short:
  13514. case SPIRType::SByte:
  13515. res = "i";
  13516. break;
  13517. case SPIRType::UInt:
  13518. case SPIRType::UShort:
  13519. case SPIRType::UByte:
  13520. res = "u";
  13521. break;
  13522. default:
  13523. break;
  13524. }
  13525. // For half image types, we will force mediump for the sampler, and cast to f16 after any sampling operation.
  13526. // We cannot express a true half texture type in GLSL. Neither for short integer formats for that matter.
  13527. if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics)
  13528. return res + "subpassInput" + (type.image.ms ? "MS" : "");
  13529. else if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData &&
  13530. subpass_input_is_framebuffer_fetch(id))
  13531. {
  13532. SPIRType sampled_type = get<SPIRType>(type.image.type);
  13533. sampled_type.vecsize = 4;
  13534. return type_to_glsl(sampled_type);
  13535. }
  13536. // If we're emulating subpassInput with samplers, force sampler2D
  13537. // so we don't have to specify format.
  13538. if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData)
  13539. {
  13540. // Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V.
  13541. if (type.image.dim == DimBuffer && type.image.sampled == 1)
  13542. res += "sampler";
  13543. else
  13544. res += type.image.sampled == 2 ? "image" : "texture";
  13545. }
  13546. else
  13547. res += "sampler";
  13548. switch (type.image.dim)
  13549. {
  13550. case Dim1D:
  13551. // ES doesn't support 1D. Fake it with 2D.
  13552. res += options.es ? "2D" : "1D";
  13553. break;
  13554. case Dim2D:
  13555. res += "2D";
  13556. break;
  13557. case Dim3D:
  13558. res += "3D";
  13559. break;
  13560. case DimCube:
  13561. res += "Cube";
  13562. break;
  13563. case DimRect:
  13564. if (options.es)
  13565. SPIRV_CROSS_THROW("Rectangle textures are not supported on OpenGL ES.");
  13566. if (is_legacy_desktop())
  13567. require_extension_internal("GL_ARB_texture_rectangle");
  13568. res += "2DRect";
  13569. break;
  13570. case DimBuffer:
  13571. if (options.es && options.version < 320)
  13572. require_extension_internal("GL_EXT_texture_buffer");
  13573. else if (!options.es && options.version < 300)
  13574. require_extension_internal("GL_EXT_texture_buffer_object");
  13575. res += "Buffer";
  13576. break;
  13577. case DimSubpassData:
  13578. res += "2D";
  13579. break;
  13580. default:
  13581. SPIRV_CROSS_THROW("Only 1D, 2D, 2DRect, 3D, Buffer, InputTarget and Cube textures supported.");
  13582. }
  13583. if (type.image.ms)
  13584. res += "MS";
  13585. if (type.image.arrayed)
  13586. {
  13587. if (is_legacy_desktop())
  13588. require_extension_internal("GL_EXT_texture_array");
  13589. res += "Array";
  13590. }
  13591. // "Shadow" state in GLSL only exists for samplers and combined image samplers.
  13592. if (((type.basetype == SPIRType::SampledImage) || (type.basetype == SPIRType::Sampler)) &&
  13593. is_depth_image(type, id))
  13594. {
  13595. res += "Shadow";
  13596. if (type.image.dim == DimCube && is_legacy())
  13597. {
  13598. if (!options.es)
  13599. require_extension_internal("GL_EXT_gpu_shader4");
  13600. else
  13601. {
  13602. require_extension_internal("GL_NV_shadow_samplers_cube");
  13603. res += "NV";
  13604. }
  13605. }
  13606. }
  13607. return res;
  13608. }
  13609. string CompilerGLSL::type_to_glsl_constructor(const SPIRType &type)
  13610. {
  13611. if (backend.use_array_constructor && type.array.size() > 1)
  13612. {
  13613. if (options.flatten_multidimensional_arrays)
  13614. SPIRV_CROSS_THROW("Cannot flatten constructors of multidimensional array constructors, "
  13615. "e.g. float[][]().");
  13616. else if (!options.es && options.version < 430)
  13617. require_extension_internal("GL_ARB_arrays_of_arrays");
  13618. else if (options.es && options.version < 310)
  13619. SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310.");
  13620. }
  13621. auto e = type_to_glsl(type);
  13622. if (backend.use_array_constructor)
  13623. {
  13624. for (uint32_t i = 0; i < type.array.size(); i++)
  13625. e += "[]";
  13626. }
  13627. return e;
  13628. }
  13629. // The optional id parameter indicates the object whose type we are trying
  13630. // to find the description for. It is optional. Most type descriptions do not
  13631. // depend on a specific object's use of that type.
  13632. string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id)
  13633. {
  13634. if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct)
  13635. {
  13636. // Need to create a magic type name which compacts the entire type information.
  13637. string name = type_to_glsl(get_pointee_type(type));
  13638. for (size_t i = 0; i < type.array.size(); i++)
  13639. {
  13640. if (type.array_size_literal[i])
  13641. name += join(type.array[i], "_");
  13642. else
  13643. name += join("id", type.array[i], "_");
  13644. }
  13645. name += "Pointer";
  13646. return name;
  13647. }
  13648. switch (type.basetype)
  13649. {
  13650. case SPIRType::Struct:
  13651. // Need OpName lookup here to get a "sensible" name for a struct.
  13652. if (backend.explicit_struct_type)
  13653. return join("struct ", to_name(type.self));
  13654. else
  13655. return to_name(type.self);
  13656. case SPIRType::Image:
  13657. case SPIRType::SampledImage:
  13658. return image_type_glsl(type, id);
  13659. case SPIRType::Sampler:
  13660. // The depth field is set by calling code based on the variable ID of the sampler, effectively reintroducing
  13661. // this distinction into the type system.
  13662. return comparison_ids.count(id) ? "samplerShadow" : "sampler";
  13663. case SPIRType::AccelerationStructure:
  13664. return ray_tracing_is_khr ? "accelerationStructureEXT" : "accelerationStructureNV";
  13665. case SPIRType::RayQuery:
  13666. return "rayQueryEXT";
  13667. case SPIRType::Void:
  13668. return "void";
  13669. default:
  13670. break;
  13671. }
  13672. if (type.basetype == SPIRType::UInt && is_legacy())
  13673. {
  13674. if (options.es)
  13675. SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy ESSL.");
  13676. else
  13677. require_extension_internal("GL_EXT_gpu_shader4");
  13678. }
  13679. if (type.basetype == SPIRType::AtomicCounter)
  13680. {
  13681. if (options.es && options.version < 310)
  13682. SPIRV_CROSS_THROW("At least ESSL 3.10 required for atomic counters.");
  13683. else if (!options.es && options.version < 420)
  13684. require_extension_internal("GL_ARB_shader_atomic_counters");
  13685. }
  13686. if (type.vecsize == 1 && type.columns == 1) // Scalar builtin
  13687. {
  13688. switch (type.basetype)
  13689. {
  13690. case SPIRType::Boolean:
  13691. return "bool";
  13692. case SPIRType::SByte:
  13693. return backend.basic_int8_type;
  13694. case SPIRType::UByte:
  13695. return backend.basic_uint8_type;
  13696. case SPIRType::Short:
  13697. return backend.basic_int16_type;
  13698. case SPIRType::UShort:
  13699. return backend.basic_uint16_type;
  13700. case SPIRType::Int:
  13701. return backend.basic_int_type;
  13702. case SPIRType::UInt:
  13703. return backend.basic_uint_type;
  13704. case SPIRType::AtomicCounter:
  13705. return "atomic_uint";
  13706. case SPIRType::Half:
  13707. return "float16_t";
  13708. case SPIRType::Float:
  13709. return "float";
  13710. case SPIRType::Double:
  13711. return "double";
  13712. case SPIRType::Int64:
  13713. return "int64_t";
  13714. case SPIRType::UInt64:
  13715. return "uint64_t";
  13716. default:
  13717. return "???";
  13718. }
  13719. }
  13720. else if (type.vecsize > 1 && type.columns == 1) // Vector builtin
  13721. {
  13722. switch (type.basetype)
  13723. {
  13724. case SPIRType::Boolean:
  13725. return join("bvec", type.vecsize);
  13726. case SPIRType::SByte:
  13727. return join("i8vec", type.vecsize);
  13728. case SPIRType::UByte:
  13729. return join("u8vec", type.vecsize);
  13730. case SPIRType::Short:
  13731. return join("i16vec", type.vecsize);
  13732. case SPIRType::UShort:
  13733. return join("u16vec", type.vecsize);
  13734. case SPIRType::Int:
  13735. return join("ivec", type.vecsize);
  13736. case SPIRType::UInt:
  13737. return join("uvec", type.vecsize);
  13738. case SPIRType::Half:
  13739. return join("f16vec", type.vecsize);
  13740. case SPIRType::Float:
  13741. return join("vec", type.vecsize);
  13742. case SPIRType::Double:
  13743. return join("dvec", type.vecsize);
  13744. case SPIRType::Int64:
  13745. return join("i64vec", type.vecsize);
  13746. case SPIRType::UInt64:
  13747. return join("u64vec", type.vecsize);
  13748. default:
  13749. return "???";
  13750. }
  13751. }
  13752. else if (type.vecsize == type.columns) // Simple Matrix builtin
  13753. {
  13754. switch (type.basetype)
  13755. {
  13756. case SPIRType::Boolean:
  13757. return join("bmat", type.vecsize);
  13758. case SPIRType::Int:
  13759. return join("imat", type.vecsize);
  13760. case SPIRType::UInt:
  13761. return join("umat", type.vecsize);
  13762. case SPIRType::Half:
  13763. return join("f16mat", type.vecsize);
  13764. case SPIRType::Float:
  13765. return join("mat", type.vecsize);
  13766. case SPIRType::Double:
  13767. return join("dmat", type.vecsize);
  13768. // Matrix types not supported for int64/uint64.
  13769. default:
  13770. return "???";
  13771. }
  13772. }
  13773. else
  13774. {
  13775. switch (type.basetype)
  13776. {
  13777. case SPIRType::Boolean:
  13778. return join("bmat", type.columns, "x", type.vecsize);
  13779. case SPIRType::Int:
  13780. return join("imat", type.columns, "x", type.vecsize);
  13781. case SPIRType::UInt:
  13782. return join("umat", type.columns, "x", type.vecsize);
  13783. case SPIRType::Half:
  13784. return join("f16mat", type.columns, "x", type.vecsize);
  13785. case SPIRType::Float:
  13786. return join("mat", type.columns, "x", type.vecsize);
  13787. case SPIRType::Double:
  13788. return join("dmat", type.columns, "x", type.vecsize);
  13789. // Matrix types not supported for int64/uint64.
  13790. default:
  13791. return "???";
  13792. }
  13793. }
  13794. }
  13795. void CompilerGLSL::add_variable(unordered_set<string> &variables_primary,
  13796. const unordered_set<string> &variables_secondary, string &name)
  13797. {
  13798. if (name.empty())
  13799. return;
  13800. ParsedIR::sanitize_underscores(name);
  13801. if (ParsedIR::is_globally_reserved_identifier(name, true))
  13802. {
  13803. name.clear();
  13804. return;
  13805. }
  13806. update_name_cache(variables_primary, variables_secondary, name);
  13807. }
  13808. void CompilerGLSL::add_local_variable_name(uint32_t id)
  13809. {
  13810. add_variable(local_variable_names, block_names, ir.meta[id].decoration.alias);
  13811. }
  13812. void CompilerGLSL::add_resource_name(uint32_t id)
  13813. {
  13814. add_variable(resource_names, block_names, ir.meta[id].decoration.alias);
  13815. }
  13816. void CompilerGLSL::add_header_line(const std::string &line)
  13817. {
  13818. header_lines.push_back(line);
  13819. }
  13820. bool CompilerGLSL::has_extension(const std::string &ext) const
  13821. {
  13822. auto itr = find(begin(forced_extensions), end(forced_extensions), ext);
  13823. return itr != end(forced_extensions);
  13824. }
  13825. void CompilerGLSL::require_extension(const std::string &ext)
  13826. {
  13827. if (!has_extension(ext))
  13828. forced_extensions.push_back(ext);
  13829. }
  13830. const SmallVector<std::string> &CompilerGLSL::get_required_extensions() const
  13831. {
  13832. return forced_extensions;
  13833. }
  13834. void CompilerGLSL::require_extension_internal(const string &ext)
  13835. {
  13836. if (backend.supports_extensions && !has_extension(ext))
  13837. {
  13838. forced_extensions.push_back(ext);
  13839. force_recompile();
  13840. }
  13841. }
  13842. void CompilerGLSL::flatten_buffer_block(VariableID id)
  13843. {
  13844. auto &var = get<SPIRVariable>(id);
  13845. auto &type = get<SPIRType>(var.basetype);
  13846. auto name = to_name(type.self, false);
  13847. auto &flags = get_decoration_bitset(type.self);
  13848. if (!type.array.empty())
  13849. SPIRV_CROSS_THROW(name + " is an array of UBOs.");
  13850. if (type.basetype != SPIRType::Struct)
  13851. SPIRV_CROSS_THROW(name + " is not a struct.");
  13852. if (!flags.get(DecorationBlock))
  13853. SPIRV_CROSS_THROW(name + " is not a block.");
  13854. if (type.member_types.empty())
  13855. SPIRV_CROSS_THROW(name + " is an empty struct.");
  13856. flattened_buffer_blocks.insert(id);
  13857. }
  13858. bool CompilerGLSL::builtin_translates_to_nonarray(spv::BuiltIn /*builtin*/) const
  13859. {
  13860. return false; // GLSL itself does not need to translate array builtin types to non-array builtin types
  13861. }
  13862. bool CompilerGLSL::is_user_type_structured(uint32_t /*id*/) const
  13863. {
  13864. return false; // GLSL itself does not have structured user type, but HLSL does with StructuredBuffer and RWStructuredBuffer resources.
  13865. }
  13866. bool CompilerGLSL::check_atomic_image(uint32_t id)
  13867. {
  13868. auto &type = expression_type(id);
  13869. if (type.storage == StorageClassImage)
  13870. {
  13871. if (options.es && options.version < 320)
  13872. require_extension_internal("GL_OES_shader_image_atomic");
  13873. auto *var = maybe_get_backing_variable(id);
  13874. if (var)
  13875. {
  13876. if (has_decoration(var->self, DecorationNonWritable) || has_decoration(var->self, DecorationNonReadable))
  13877. {
  13878. unset_decoration(var->self, DecorationNonWritable);
  13879. unset_decoration(var->self, DecorationNonReadable);
  13880. force_recompile();
  13881. }
  13882. }
  13883. return true;
  13884. }
  13885. else
  13886. return false;
  13887. }
  13888. void CompilerGLSL::add_function_overload(const SPIRFunction &func)
  13889. {
  13890. Hasher hasher;
  13891. for (auto &arg : func.arguments)
  13892. {
  13893. // Parameters can vary with pointer type or not,
  13894. // but that will not change the signature in GLSL/HLSL,
  13895. // so strip the pointer type before hashing.
  13896. uint32_t type_id = get_pointee_type_id(arg.type);
  13897. auto &type = get<SPIRType>(type_id);
  13898. if (!combined_image_samplers.empty())
  13899. {
  13900. // If we have combined image samplers, we cannot really trust the image and sampler arguments
  13901. // we pass down to callees, because they may be shuffled around.
  13902. // Ignore these arguments, to make sure that functions need to differ in some other way
  13903. // to be considered different overloads.
  13904. if (type.basetype == SPIRType::SampledImage ||
  13905. (type.basetype == SPIRType::Image && type.image.sampled == 1) || type.basetype == SPIRType::Sampler)
  13906. {
  13907. continue;
  13908. }
  13909. }
  13910. hasher.u32(type_id);
  13911. }
  13912. uint64_t types_hash = hasher.get();
  13913. auto function_name = to_name(func.self);
  13914. auto itr = function_overloads.find(function_name);
  13915. if (itr != end(function_overloads))
  13916. {
  13917. // There exists a function with this name already.
  13918. auto &overloads = itr->second;
  13919. if (overloads.count(types_hash) != 0)
  13920. {
  13921. // Overload conflict, assign a new name.
  13922. add_resource_name(func.self);
  13923. function_overloads[to_name(func.self)].insert(types_hash);
  13924. }
  13925. else
  13926. {
  13927. // Can reuse the name.
  13928. overloads.insert(types_hash);
  13929. }
  13930. }
  13931. else
  13932. {
  13933. // First time we see this function name.
  13934. add_resource_name(func.self);
  13935. function_overloads[to_name(func.self)].insert(types_hash);
  13936. }
  13937. }
  13938. void CompilerGLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags)
  13939. {
  13940. if (func.self != ir.default_entry_point)
  13941. add_function_overload(func);
  13942. // Avoid shadow declarations.
  13943. local_variable_names = resource_names;
  13944. string decl;
  13945. auto &type = get<SPIRType>(func.return_type);
  13946. decl += flags_to_qualifiers_glsl(type, return_flags);
  13947. decl += type_to_glsl(type);
  13948. decl += type_to_array_glsl(type);
  13949. decl += " ";
  13950. if (func.self == ir.default_entry_point)
  13951. {
  13952. // If we need complex fallback in GLSL, we just wrap main() in a function
  13953. // and interlock the entire shader ...
  13954. if (interlocked_is_complex)
  13955. decl += "spvMainInterlockedBody";
  13956. else
  13957. decl += "main";
  13958. processing_entry_point = true;
  13959. }
  13960. else
  13961. decl += to_name(func.self);
  13962. decl += "(";
  13963. SmallVector<string> arglist;
  13964. for (auto &arg : func.arguments)
  13965. {
  13966. // Do not pass in separate images or samplers if we're remapping
  13967. // to combined image samplers.
  13968. if (skip_argument(arg.id))
  13969. continue;
  13970. // Might change the variable name if it already exists in this function.
  13971. // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
  13972. // to use same name for variables.
  13973. // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
  13974. add_local_variable_name(arg.id);
  13975. arglist.push_back(argument_decl(arg));
  13976. // Hold a pointer to the parameter so we can invalidate the readonly field if needed.
  13977. auto *var = maybe_get<SPIRVariable>(arg.id);
  13978. if (var)
  13979. var->parameter = &arg;
  13980. }
  13981. for (auto &arg : func.shadow_arguments)
  13982. {
  13983. // Might change the variable name if it already exists in this function.
  13984. // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
  13985. // to use same name for variables.
  13986. // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
  13987. add_local_variable_name(arg.id);
  13988. arglist.push_back(argument_decl(arg));
  13989. // Hold a pointer to the parameter so we can invalidate the readonly field if needed.
  13990. auto *var = maybe_get<SPIRVariable>(arg.id);
  13991. if (var)
  13992. var->parameter = &arg;
  13993. }
  13994. decl += merge(arglist);
  13995. decl += ")";
  13996. statement(decl);
  13997. }
  13998. void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags)
  13999. {
  14000. // Avoid potential cycles.
  14001. if (func.active)
  14002. return;
  14003. func.active = true;
  14004. // If we depend on a function, emit that function before we emit our own function.
  14005. for (auto block : func.blocks)
  14006. {
  14007. auto &b = get<SPIRBlock>(block);
  14008. for (auto &i : b.ops)
  14009. {
  14010. auto ops = stream(i);
  14011. auto op = static_cast<Op>(i.op);
  14012. if (op == OpFunctionCall)
  14013. {
  14014. // Recursively emit functions which are called.
  14015. uint32_t id = ops[2];
  14016. emit_function(get<SPIRFunction>(id), ir.meta[ops[1]].decoration.decoration_flags);
  14017. }
  14018. }
  14019. }
  14020. if (func.entry_line.file_id != 0)
  14021. emit_line_directive(func.entry_line.file_id, func.entry_line.line_literal);
  14022. emit_function_prototype(func, return_flags);
  14023. begin_scope();
  14024. if (func.self == ir.default_entry_point)
  14025. emit_entry_point_declarations();
  14026. current_function = &func;
  14027. auto &entry_block = get<SPIRBlock>(func.entry_block);
  14028. sort(begin(func.constant_arrays_needed_on_stack), end(func.constant_arrays_needed_on_stack));
  14029. for (auto &array : func.constant_arrays_needed_on_stack)
  14030. {
  14031. auto &c = get<SPIRConstant>(array);
  14032. auto &type = get<SPIRType>(c.constant_type);
  14033. statement(variable_decl(type, join("_", array, "_array_copy")), " = ", constant_expression(c), ";");
  14034. }
  14035. for (auto &v : func.local_variables)
  14036. {
  14037. auto &var = get<SPIRVariable>(v);
  14038. var.deferred_declaration = false;
  14039. if (variable_decl_is_remapped_storage(var, StorageClassWorkgroup))
  14040. {
  14041. // Special variable type which cannot have initializer,
  14042. // need to be declared as standalone variables.
  14043. // Comes from MSL which can push global variables as local variables in main function.
  14044. add_local_variable_name(var.self);
  14045. statement(variable_decl(var), ";");
  14046. var.deferred_declaration = false;
  14047. }
  14048. else if (var.storage == StorageClassPrivate)
  14049. {
  14050. // These variables will not have had their CFG usage analyzed, so move it to the entry block.
  14051. // Comes from MSL which can push global variables as local variables in main function.
  14052. // We could just declare them right now, but we would miss out on an important initialization case which is
  14053. // LUT declaration in MSL.
  14054. // If we don't declare the variable when it is assigned we're forced to go through a helper function
  14055. // which copies elements one by one.
  14056. add_local_variable_name(var.self);
  14057. if (var.initializer)
  14058. {
  14059. statement(variable_decl(var), ";");
  14060. var.deferred_declaration = false;
  14061. }
  14062. else
  14063. {
  14064. auto &dominated = entry_block.dominated_variables;
  14065. if (find(begin(dominated), end(dominated), var.self) == end(dominated))
  14066. entry_block.dominated_variables.push_back(var.self);
  14067. var.deferred_declaration = true;
  14068. }
  14069. }
  14070. else if (var.storage == StorageClassFunction && var.remapped_variable && var.static_expression)
  14071. {
  14072. // No need to declare this variable, it has a static expression.
  14073. var.deferred_declaration = false;
  14074. }
  14075. else if (expression_is_lvalue(v))
  14076. {
  14077. add_local_variable_name(var.self);
  14078. // Loop variables should never be declared early, they are explicitly emitted in a loop.
  14079. if (var.initializer && !var.loop_variable)
  14080. statement(variable_decl_function_local(var), ";");
  14081. else
  14082. {
  14083. // Don't declare variable until first use to declutter the GLSL output quite a lot.
  14084. // If we don't touch the variable before first branch,
  14085. // declare it then since we need variable declaration to be in top scope.
  14086. var.deferred_declaration = true;
  14087. }
  14088. }
  14089. else
  14090. {
  14091. // HACK: SPIR-V in older glslang output likes to use samplers and images as local variables, but GLSL does not allow this.
  14092. // For these types (non-lvalue), we enforce forwarding through a shadowed variable.
  14093. // This means that when we OpStore to these variables, we just write in the expression ID directly.
  14094. // This breaks any kind of branching, since the variable must be statically assigned.
  14095. // Branching on samplers and images would be pretty much impossible to fake in GLSL.
  14096. var.statically_assigned = true;
  14097. }
  14098. var.loop_variable_enable = false;
  14099. // Loop variables are never declared outside their for-loop, so block any implicit declaration.
  14100. if (var.loop_variable)
  14101. {
  14102. var.deferred_declaration = false;
  14103. // Need to reset the static expression so we can fallback to initializer if need be.
  14104. var.static_expression = 0;
  14105. }
  14106. }
  14107. // Enforce declaration order for regression testing purposes.
  14108. for (auto &block_id : func.blocks)
  14109. {
  14110. auto &block = get<SPIRBlock>(block_id);
  14111. sort(begin(block.dominated_variables), end(block.dominated_variables));
  14112. }
  14113. for (auto &line : current_function->fixup_hooks_in)
  14114. line();
  14115. emit_block_chain(entry_block);
  14116. end_scope();
  14117. processing_entry_point = false;
  14118. statement("");
  14119. // Make sure deferred declaration state for local variables is cleared when we are done with function.
  14120. // We risk declaring Private/Workgroup variables in places we are not supposed to otherwise.
  14121. for (auto &v : func.local_variables)
  14122. {
  14123. auto &var = get<SPIRVariable>(v);
  14124. var.deferred_declaration = false;
  14125. }
  14126. }
  14127. void CompilerGLSL::emit_fixup()
  14128. {
  14129. if (is_vertex_like_shader())
  14130. {
  14131. if (options.vertex.fixup_clipspace)
  14132. {
  14133. const char *suffix = backend.float_literal_suffix ? "f" : "";
  14134. statement("gl_Position.z = 2.0", suffix, " * gl_Position.z - gl_Position.w;");
  14135. }
  14136. if (options.vertex.flip_vert_y)
  14137. statement("gl_Position.y = -gl_Position.y;");
  14138. }
  14139. }
  14140. void CompilerGLSL::flush_phi(BlockID from, BlockID to)
  14141. {
  14142. auto &child = get<SPIRBlock>(to);
  14143. if (child.ignore_phi_from_block == from)
  14144. return;
  14145. unordered_set<uint32_t> temporary_phi_variables;
  14146. for (auto itr = begin(child.phi_variables); itr != end(child.phi_variables); ++itr)
  14147. {
  14148. auto &phi = *itr;
  14149. if (phi.parent == from)
  14150. {
  14151. auto &var = get<SPIRVariable>(phi.function_variable);
  14152. // A Phi variable might be a loop variable, so flush to static expression.
  14153. if (var.loop_variable && !var.loop_variable_enable)
  14154. var.static_expression = phi.local_variable;
  14155. else
  14156. {
  14157. flush_variable_declaration(phi.function_variable);
  14158. // Check if we are going to write to a Phi variable that another statement will read from
  14159. // as part of another Phi node in our target block.
  14160. // For this case, we will need to copy phi.function_variable to a temporary, and use that for future reads.
  14161. // This is judged to be extremely rare, so deal with it here using a simple, but suboptimal algorithm.
  14162. bool need_saved_temporary =
  14163. find_if(itr + 1, end(child.phi_variables), [&](const SPIRBlock::Phi &future_phi) -> bool {
  14164. return future_phi.local_variable == ID(phi.function_variable) && future_phi.parent == from;
  14165. }) != end(child.phi_variables);
  14166. if (need_saved_temporary)
  14167. {
  14168. // Need to make sure we declare the phi variable with a copy at the right scope.
  14169. // We cannot safely declare a temporary here since we might be inside a continue block.
  14170. if (!var.allocate_temporary_copy)
  14171. {
  14172. var.allocate_temporary_copy = true;
  14173. force_recompile();
  14174. }
  14175. statement("_", phi.function_variable, "_copy", " = ", to_name(phi.function_variable), ";");
  14176. temporary_phi_variables.insert(phi.function_variable);
  14177. }
  14178. // This might be called in continue block, so make sure we
  14179. // use this to emit ESSL 1.0 compliant increments/decrements.
  14180. auto lhs = to_expression(phi.function_variable);
  14181. string rhs;
  14182. if (temporary_phi_variables.count(phi.local_variable))
  14183. rhs = join("_", phi.local_variable, "_copy");
  14184. else
  14185. rhs = to_pointer_expression(phi.local_variable);
  14186. if (!optimize_read_modify_write(get<SPIRType>(var.basetype), lhs, rhs))
  14187. statement(lhs, " = ", rhs, ";");
  14188. }
  14189. register_write(phi.function_variable);
  14190. }
  14191. }
  14192. }
  14193. void CompilerGLSL::branch_to_continue(BlockID from, BlockID to)
  14194. {
  14195. auto &to_block = get<SPIRBlock>(to);
  14196. if (from == to)
  14197. return;
  14198. assert(is_continue(to));
  14199. if (to_block.complex_continue)
  14200. {
  14201. // Just emit the whole block chain as is.
  14202. auto usage_counts = expression_usage_counts;
  14203. emit_block_chain(to_block);
  14204. // Expression usage counts are moot after returning from the continue block.
  14205. expression_usage_counts = usage_counts;
  14206. }
  14207. else
  14208. {
  14209. auto &from_block = get<SPIRBlock>(from);
  14210. bool outside_control_flow = false;
  14211. uint32_t loop_dominator = 0;
  14212. // FIXME: Refactor this to not use the old loop_dominator tracking.
  14213. if (from_block.merge_block)
  14214. {
  14215. // If we are a loop header, we don't set the loop dominator,
  14216. // so just use "self" here.
  14217. loop_dominator = from;
  14218. }
  14219. else if (from_block.loop_dominator != BlockID(SPIRBlock::NoDominator))
  14220. {
  14221. loop_dominator = from_block.loop_dominator;
  14222. }
  14223. if (loop_dominator != 0)
  14224. {
  14225. auto &cfg = get_cfg_for_current_function();
  14226. // For non-complex continue blocks, we implicitly branch to the continue block
  14227. // by having the continue block be part of the loop header in for (; ; continue-block).
  14228. outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(loop_dominator, from);
  14229. }
  14230. // Some simplification for for-loops. We always end up with a useless continue;
  14231. // statement since we branch to a loop block.
  14232. // Walk the CFG, if we unconditionally execute the block calling continue assuming we're in the loop block,
  14233. // we can avoid writing out an explicit continue statement.
  14234. // Similar optimization to return statements if we know we're outside flow control.
  14235. if (!outside_control_flow)
  14236. statement("continue;");
  14237. }
  14238. }
  14239. void CompilerGLSL::branch(BlockID from, BlockID to)
  14240. {
  14241. flush_phi(from, to);
  14242. flush_control_dependent_expressions(from);
  14243. bool to_is_continue = is_continue(to);
  14244. // This is only a continue if we branch to our loop dominator.
  14245. if ((ir.block_meta[to] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) != 0 && get<SPIRBlock>(from).loop_dominator == to)
  14246. {
  14247. // This can happen if we had a complex continue block which was emitted.
  14248. // Once the continue block tries to branch to the loop header, just emit continue;
  14249. // and end the chain here.
  14250. statement("continue;");
  14251. }
  14252. else if (from != to && is_break(to))
  14253. {
  14254. // We cannot break to ourselves, so check explicitly for from != to.
  14255. // This case can trigger if a loop header is all three of these things:
  14256. // - Continue block
  14257. // - Loop header
  14258. // - Break merge target all at once ...
  14259. // Very dirty workaround.
  14260. // Switch constructs are able to break, but they cannot break out of a loop at the same time,
  14261. // yet SPIR-V allows it.
  14262. // Only sensible solution is to make a ladder variable, which we declare at the top of the switch block,
  14263. // write to the ladder here, and defer the break.
  14264. // The loop we're breaking out of must dominate the switch block, or there is no ladder breaking case.
  14265. if (is_loop_break(to))
  14266. {
  14267. for (size_t n = current_emitting_switch_stack.size(); n; n--)
  14268. {
  14269. auto *current_emitting_switch = current_emitting_switch_stack[n - 1];
  14270. if (current_emitting_switch &&
  14271. current_emitting_switch->loop_dominator != BlockID(SPIRBlock::NoDominator) &&
  14272. get<SPIRBlock>(current_emitting_switch->loop_dominator).merge_block == to)
  14273. {
  14274. if (!current_emitting_switch->need_ladder_break)
  14275. {
  14276. force_recompile();
  14277. current_emitting_switch->need_ladder_break = true;
  14278. }
  14279. statement("_", current_emitting_switch->self, "_ladder_break = true;");
  14280. }
  14281. else
  14282. break;
  14283. }
  14284. }
  14285. statement("break;");
  14286. }
  14287. else if (to_is_continue || from == to)
  14288. {
  14289. // For from == to case can happen for a do-while loop which branches into itself.
  14290. // We don't mark these cases as continue blocks, but the only possible way to branch into
  14291. // ourselves is through means of continue blocks.
  14292. // If we are merging to a continue block, there is no need to emit the block chain for continue here.
  14293. // We can branch to the continue block after we merge execution.
  14294. // Here we make use of structured control flow rules from spec:
  14295. // 2.11: - the merge block declared by a header block cannot be a merge block declared by any other header block
  14296. // - each header block must strictly dominate its merge block, unless the merge block is unreachable in the CFG
  14297. // If we are branching to a merge block, we must be inside a construct which dominates the merge block.
  14298. auto &block_meta = ir.block_meta[to];
  14299. bool branching_to_merge =
  14300. (block_meta & (ParsedIR::BLOCK_META_SELECTION_MERGE_BIT | ParsedIR::BLOCK_META_MULTISELECT_MERGE_BIT |
  14301. ParsedIR::BLOCK_META_LOOP_MERGE_BIT)) != 0;
  14302. if (!to_is_continue || !branching_to_merge)
  14303. branch_to_continue(from, to);
  14304. }
  14305. else if (!is_conditional(to))
  14306. emit_block_chain(get<SPIRBlock>(to));
  14307. // It is important that we check for break before continue.
  14308. // A block might serve two purposes, a break block for the inner scope, and
  14309. // a continue block in the outer scope.
  14310. // Inner scope always takes precedence.
  14311. }
  14312. void CompilerGLSL::branch(BlockID from, uint32_t cond, BlockID true_block, BlockID false_block)
  14313. {
  14314. auto &from_block = get<SPIRBlock>(from);
  14315. BlockID merge_block = from_block.merge == SPIRBlock::MergeSelection ? from_block.next_block : BlockID(0);
  14316. // If we branch directly to our selection merge target, we don't need a code path.
  14317. bool true_block_needs_code = true_block != merge_block || flush_phi_required(from, true_block);
  14318. bool false_block_needs_code = false_block != merge_block || flush_phi_required(from, false_block);
  14319. if (!true_block_needs_code && !false_block_needs_code)
  14320. return;
  14321. // We might have a loop merge here. Only consider selection flattening constructs.
  14322. // Loop hints are handled explicitly elsewhere.
  14323. if (from_block.hint == SPIRBlock::HintFlatten || from_block.hint == SPIRBlock::HintDontFlatten)
  14324. emit_block_hints(from_block);
  14325. if (true_block_needs_code)
  14326. {
  14327. statement("if (", to_expression(cond), ")");
  14328. begin_scope();
  14329. branch(from, true_block);
  14330. end_scope();
  14331. if (false_block_needs_code)
  14332. {
  14333. statement("else");
  14334. begin_scope();
  14335. branch(from, false_block);
  14336. end_scope();
  14337. }
  14338. }
  14339. else if (false_block_needs_code)
  14340. {
  14341. // Only need false path, use negative conditional.
  14342. statement("if (!", to_enclosed_expression(cond), ")");
  14343. begin_scope();
  14344. branch(from, false_block);
  14345. end_scope();
  14346. }
  14347. }
  14348. // FIXME: This currently cannot handle complex continue blocks
  14349. // as in do-while.
  14350. // This should be seen as a "trivial" continue block.
  14351. string CompilerGLSL::emit_continue_block(uint32_t continue_block, bool follow_true_block, bool follow_false_block)
  14352. {
  14353. auto *block = &get<SPIRBlock>(continue_block);
  14354. // While emitting the continue block, declare_temporary will check this
  14355. // if we have to emit temporaries.
  14356. current_continue_block = block;
  14357. SmallVector<string> statements;
  14358. // Capture all statements into our list.
  14359. auto *old = redirect_statement;
  14360. redirect_statement = &statements;
  14361. // Stamp out all blocks one after each other.
  14362. while ((ir.block_meta[block->self] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) == 0)
  14363. {
  14364. // Write out all instructions we have in this block.
  14365. emit_block_instructions(*block);
  14366. // For plain branchless for/while continue blocks.
  14367. if (block->next_block)
  14368. {
  14369. flush_phi(continue_block, block->next_block);
  14370. block = &get<SPIRBlock>(block->next_block);
  14371. }
  14372. // For do while blocks. The last block will be a select block.
  14373. else if (block->true_block && follow_true_block)
  14374. {
  14375. flush_phi(continue_block, block->true_block);
  14376. block = &get<SPIRBlock>(block->true_block);
  14377. }
  14378. else if (block->false_block && follow_false_block)
  14379. {
  14380. flush_phi(continue_block, block->false_block);
  14381. block = &get<SPIRBlock>(block->false_block);
  14382. }
  14383. else
  14384. {
  14385. SPIRV_CROSS_THROW("Invalid continue block detected!");
  14386. }
  14387. }
  14388. // Restore old pointer.
  14389. redirect_statement = old;
  14390. // Somewhat ugly, strip off the last ';' since we use ',' instead.
  14391. // Ideally, we should select this behavior in statement().
  14392. for (auto &s : statements)
  14393. {
  14394. if (!s.empty() && s.back() == ';')
  14395. s.erase(s.size() - 1, 1);
  14396. }
  14397. current_continue_block = nullptr;
  14398. return merge(statements);
  14399. }
  14400. void CompilerGLSL::emit_while_loop_initializers(const SPIRBlock &block)
  14401. {
  14402. // While loops do not take initializers, so declare all of them outside.
  14403. for (auto &loop_var : block.loop_variables)
  14404. {
  14405. auto &var = get<SPIRVariable>(loop_var);
  14406. statement(variable_decl(var), ";");
  14407. }
  14408. }
  14409. string CompilerGLSL::emit_for_loop_initializers(const SPIRBlock &block)
  14410. {
  14411. if (block.loop_variables.empty())
  14412. return "";
  14413. bool same_types = for_loop_initializers_are_same_type(block);
  14414. // We can only declare for loop initializers if all variables are of same type.
  14415. // If we cannot do this, declare individual variables before the loop header.
  14416. // We might have a loop variable candidate which was not assigned to for some reason.
  14417. uint32_t missing_initializers = 0;
  14418. for (auto &variable : block.loop_variables)
  14419. {
  14420. uint32_t expr = get<SPIRVariable>(variable).static_expression;
  14421. // Sometimes loop variables are initialized with OpUndef, but we can just declare
  14422. // a plain variable without initializer in this case.
  14423. if (expr == 0 || ir.ids[expr].get_type() == TypeUndef)
  14424. missing_initializers++;
  14425. }
  14426. if (block.loop_variables.size() == 1 && missing_initializers == 0)
  14427. {
  14428. return variable_decl(get<SPIRVariable>(block.loop_variables.front()));
  14429. }
  14430. else if (!same_types || missing_initializers == uint32_t(block.loop_variables.size()))
  14431. {
  14432. for (auto &loop_var : block.loop_variables)
  14433. statement(variable_decl(get<SPIRVariable>(loop_var)), ";");
  14434. return "";
  14435. }
  14436. else
  14437. {
  14438. // We have a mix of loop variables, either ones with a clear initializer, or ones without.
  14439. // Separate the two streams.
  14440. string expr;
  14441. for (auto &loop_var : block.loop_variables)
  14442. {
  14443. uint32_t static_expr = get<SPIRVariable>(loop_var).static_expression;
  14444. if (static_expr == 0 || ir.ids[static_expr].get_type() == TypeUndef)
  14445. {
  14446. statement(variable_decl(get<SPIRVariable>(loop_var)), ";");
  14447. }
  14448. else
  14449. {
  14450. auto &var = get<SPIRVariable>(loop_var);
  14451. auto &type = get_variable_data_type(var);
  14452. if (expr.empty())
  14453. {
  14454. // For loop initializers are of the form <type id = value, id = value, id = value, etc ...
  14455. expr = join(to_qualifiers_glsl(var.self), type_to_glsl(type), " ");
  14456. }
  14457. else
  14458. {
  14459. expr += ", ";
  14460. // In MSL, being based on C++, the asterisk marking a pointer
  14461. // binds to the identifier, not the type.
  14462. if (type.pointer)
  14463. expr += "* ";
  14464. }
  14465. expr += join(to_name(loop_var), " = ", to_pointer_expression(var.static_expression));
  14466. }
  14467. }
  14468. return expr;
  14469. }
  14470. }
  14471. bool CompilerGLSL::for_loop_initializers_are_same_type(const SPIRBlock &block)
  14472. {
  14473. if (block.loop_variables.size() <= 1)
  14474. return true;
  14475. uint32_t expected = 0;
  14476. Bitset expected_flags;
  14477. for (auto &var : block.loop_variables)
  14478. {
  14479. // Don't care about uninitialized variables as they will not be part of the initializers.
  14480. uint32_t expr = get<SPIRVariable>(var).static_expression;
  14481. if (expr == 0 || ir.ids[expr].get_type() == TypeUndef)
  14482. continue;
  14483. if (expected == 0)
  14484. {
  14485. expected = get<SPIRVariable>(var).basetype;
  14486. expected_flags = get_decoration_bitset(var);
  14487. }
  14488. else if (expected != get<SPIRVariable>(var).basetype)
  14489. return false;
  14490. // Precision flags and things like that must also match.
  14491. if (expected_flags != get_decoration_bitset(var))
  14492. return false;
  14493. }
  14494. return true;
  14495. }
  14496. void CompilerGLSL::emit_block_instructions_with_masked_debug(SPIRBlock &block)
  14497. {
  14498. // Have to block debug instructions such as OpLine here, since it will be treated as a statement otherwise,
  14499. // which breaks loop optimizations.
  14500. // Any line directive would be declared outside the loop body, which would just be confusing either way.
  14501. bool old_block_debug_directives = block_debug_directives;
  14502. block_debug_directives = true;
  14503. emit_block_instructions(block);
  14504. block_debug_directives = old_block_debug_directives;
  14505. }
  14506. bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method)
  14507. {
  14508. SPIRBlock::ContinueBlockType continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
  14509. if (method == SPIRBlock::MergeToSelectForLoop || method == SPIRBlock::MergeToSelectContinueForLoop)
  14510. {
  14511. uint32_t current_count = statement_count;
  14512. // If we're trying to create a true for loop,
  14513. // we need to make sure that all opcodes before branch statement do not actually emit any code.
  14514. // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
  14515. emit_block_instructions_with_masked_debug(block);
  14516. bool condition_is_temporary = forced_temporaries.find(block.condition) == end(forced_temporaries);
  14517. // This can work! We only did trivial things which could be forwarded in block body!
  14518. if (current_count == statement_count && condition_is_temporary)
  14519. {
  14520. switch (continue_type)
  14521. {
  14522. case SPIRBlock::ForLoop:
  14523. {
  14524. // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
  14525. flush_undeclared_variables(block);
  14526. // Important that we do this in this order because
  14527. // emitting the continue block can invalidate the condition expression.
  14528. auto initializer = emit_for_loop_initializers(block);
  14529. auto condition = to_expression(block.condition);
  14530. // Condition might have to be inverted.
  14531. if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
  14532. condition = join("!", enclose_expression(condition));
  14533. emit_block_hints(block);
  14534. if (method != SPIRBlock::MergeToSelectContinueForLoop)
  14535. {
  14536. auto continue_block = emit_continue_block(block.continue_block, false, false);
  14537. statement("for (", initializer, "; ", condition, "; ", continue_block, ")");
  14538. }
  14539. else
  14540. statement("for (", initializer, "; ", condition, "; )");
  14541. break;
  14542. }
  14543. case SPIRBlock::WhileLoop:
  14544. {
  14545. // This block may be a dominating block, so make sure we flush undeclared variables before building the while loop header.
  14546. flush_undeclared_variables(block);
  14547. emit_while_loop_initializers(block);
  14548. emit_block_hints(block);
  14549. auto condition = to_expression(block.condition);
  14550. // Condition might have to be inverted.
  14551. if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
  14552. condition = join("!", enclose_expression(condition));
  14553. statement("while (", condition, ")");
  14554. break;
  14555. }
  14556. default:
  14557. block.disable_block_optimization = true;
  14558. force_recompile();
  14559. begin_scope(); // We'll see an end_scope() later.
  14560. return false;
  14561. }
  14562. begin_scope();
  14563. return true;
  14564. }
  14565. else
  14566. {
  14567. block.disable_block_optimization = true;
  14568. force_recompile();
  14569. begin_scope(); // We'll see an end_scope() later.
  14570. return false;
  14571. }
  14572. }
  14573. else if (method == SPIRBlock::MergeToDirectForLoop)
  14574. {
  14575. auto &child = get<SPIRBlock>(block.next_block);
  14576. // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
  14577. flush_undeclared_variables(child);
  14578. uint32_t current_count = statement_count;
  14579. // If we're trying to create a true for loop,
  14580. // we need to make sure that all opcodes before branch statement do not actually emit any code.
  14581. // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
  14582. emit_block_instructions_with_masked_debug(child);
  14583. bool condition_is_temporary = forced_temporaries.find(child.condition) == end(forced_temporaries);
  14584. if (current_count == statement_count && condition_is_temporary)
  14585. {
  14586. uint32_t target_block = child.true_block;
  14587. switch (continue_type)
  14588. {
  14589. case SPIRBlock::ForLoop:
  14590. {
  14591. // Important that we do this in this order because
  14592. // emitting the continue block can invalidate the condition expression.
  14593. auto initializer = emit_for_loop_initializers(block);
  14594. auto condition = to_expression(child.condition);
  14595. // Condition might have to be inverted.
  14596. if (execution_is_noop(get<SPIRBlock>(child.true_block), get<SPIRBlock>(block.merge_block)))
  14597. {
  14598. condition = join("!", enclose_expression(condition));
  14599. target_block = child.false_block;
  14600. }
  14601. auto continue_block = emit_continue_block(block.continue_block, false, false);
  14602. emit_block_hints(block);
  14603. statement("for (", initializer, "; ", condition, "; ", continue_block, ")");
  14604. break;
  14605. }
  14606. case SPIRBlock::WhileLoop:
  14607. {
  14608. emit_while_loop_initializers(block);
  14609. emit_block_hints(block);
  14610. auto condition = to_expression(child.condition);
  14611. // Condition might have to be inverted.
  14612. if (execution_is_noop(get<SPIRBlock>(child.true_block), get<SPIRBlock>(block.merge_block)))
  14613. {
  14614. condition = join("!", enclose_expression(condition));
  14615. target_block = child.false_block;
  14616. }
  14617. statement("while (", condition, ")");
  14618. break;
  14619. }
  14620. default:
  14621. block.disable_block_optimization = true;
  14622. force_recompile();
  14623. begin_scope(); // We'll see an end_scope() later.
  14624. return false;
  14625. }
  14626. begin_scope();
  14627. branch(child.self, target_block);
  14628. return true;
  14629. }
  14630. else
  14631. {
  14632. block.disable_block_optimization = true;
  14633. force_recompile();
  14634. begin_scope(); // We'll see an end_scope() later.
  14635. return false;
  14636. }
  14637. }
  14638. else
  14639. return false;
  14640. }
  14641. void CompilerGLSL::flush_undeclared_variables(SPIRBlock &block)
  14642. {
  14643. for (auto &v : block.dominated_variables)
  14644. flush_variable_declaration(v);
  14645. }
  14646. void CompilerGLSL::emit_hoisted_temporaries(SmallVector<pair<TypeID, ID>> &temporaries)
  14647. {
  14648. // If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header.
  14649. // Need to sort these to ensure that reference output is stable.
  14650. sort(begin(temporaries), end(temporaries),
  14651. [](const pair<TypeID, ID> &a, const pair<TypeID, ID> &b) { return a.second < b.second; });
  14652. for (auto &tmp : temporaries)
  14653. {
  14654. auto &type = get<SPIRType>(tmp.first);
  14655. // There are some rare scenarios where we are asked to declare pointer types as hoisted temporaries.
  14656. // This should be ignored unless we're doing actual variable pointers and backend supports it.
  14657. // Access chains cannot normally be lowered to temporaries in GLSL and HLSL.
  14658. if (type.pointer && !backend.native_pointers)
  14659. continue;
  14660. add_local_variable_name(tmp.second);
  14661. auto &flags = get_decoration_bitset(tmp.second);
  14662. // Not all targets support pointer literals, so don't bother with that case.
  14663. string initializer;
  14664. if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
  14665. initializer = join(" = ", to_zero_initialized_expression(tmp.first));
  14666. statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(tmp.second)), initializer, ";");
  14667. hoisted_temporaries.insert(tmp.second);
  14668. forced_temporaries.insert(tmp.second);
  14669. // The temporary might be read from before it's assigned, set up the expression now.
  14670. set<SPIRExpression>(tmp.second, to_name(tmp.second), tmp.first, true);
  14671. // If we have hoisted temporaries in multi-precision contexts, emit that here too ...
  14672. // We will not be able to analyze hoisted-ness for dependent temporaries that we hallucinate here.
  14673. auto mirrored_precision_itr = temporary_to_mirror_precision_alias.find(tmp.second);
  14674. if (mirrored_precision_itr != temporary_to_mirror_precision_alias.end())
  14675. {
  14676. uint32_t mirror_id = mirrored_precision_itr->second;
  14677. auto &mirror_flags = get_decoration_bitset(mirror_id);
  14678. statement(flags_to_qualifiers_glsl(type, mirror_flags),
  14679. variable_decl(type, to_name(mirror_id)),
  14680. initializer, ";");
  14681. // The temporary might be read from before it's assigned, set up the expression now.
  14682. set<SPIRExpression>(mirror_id, to_name(mirror_id), tmp.first, true);
  14683. hoisted_temporaries.insert(mirror_id);
  14684. }
  14685. }
  14686. }
  14687. void CompilerGLSL::emit_block_chain(SPIRBlock &block)
  14688. {
  14689. bool select_branch_to_true_block = false;
  14690. bool select_branch_to_false_block = false;
  14691. bool skip_direct_branch = false;
  14692. bool emitted_loop_header_variables = false;
  14693. bool force_complex_continue_block = false;
  14694. ValueSaver<uint32_t> loop_level_saver(current_loop_level);
  14695. if (block.merge == SPIRBlock::MergeLoop)
  14696. add_loop_level();
  14697. // If we're emitting PHI variables with precision aliases, we have to emit them as hoisted temporaries.
  14698. for (auto var_id : block.dominated_variables)
  14699. {
  14700. auto &var = get<SPIRVariable>(var_id);
  14701. if (var.phi_variable)
  14702. {
  14703. auto mirrored_precision_itr = temporary_to_mirror_precision_alias.find(var_id);
  14704. if (mirrored_precision_itr != temporary_to_mirror_precision_alias.end() &&
  14705. find_if(block.declare_temporary.begin(), block.declare_temporary.end(),
  14706. [mirrored_precision_itr](const std::pair<TypeID, VariableID> &p) {
  14707. return p.second == mirrored_precision_itr->second;
  14708. }) == block.declare_temporary.end())
  14709. {
  14710. block.declare_temporary.push_back({ var.basetype, mirrored_precision_itr->second });
  14711. }
  14712. }
  14713. }
  14714. emit_hoisted_temporaries(block.declare_temporary);
  14715. SPIRBlock::ContinueBlockType continue_type = SPIRBlock::ContinueNone;
  14716. if (block.continue_block)
  14717. {
  14718. continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
  14719. // If we know we cannot emit a loop, mark the block early as a complex loop so we don't force unnecessary recompiles.
  14720. if (continue_type == SPIRBlock::ComplexLoop)
  14721. block.complex_continue = true;
  14722. }
  14723. // If we have loop variables, stop masking out access to the variable now.
  14724. for (auto var_id : block.loop_variables)
  14725. {
  14726. auto &var = get<SPIRVariable>(var_id);
  14727. var.loop_variable_enable = true;
  14728. // We're not going to declare the variable directly, so emit a copy here.
  14729. emit_variable_temporary_copies(var);
  14730. }
  14731. // Remember deferred declaration state. We will restore it before returning.
  14732. SmallVector<bool, 64> rearm_dominated_variables(block.dominated_variables.size());
  14733. for (size_t i = 0; i < block.dominated_variables.size(); i++)
  14734. {
  14735. uint32_t var_id = block.dominated_variables[i];
  14736. auto &var = get<SPIRVariable>(var_id);
  14737. rearm_dominated_variables[i] = var.deferred_declaration;
  14738. }
  14739. // This is the method often used by spirv-opt to implement loops.
  14740. // The loop header goes straight into the continue block.
  14741. // However, don't attempt this on ESSL 1.0, because if a loop variable is used in a continue block,
  14742. // it *MUST* be used in the continue block. This loop method will not work.
  14743. if (!is_legacy_es() && block_is_loop_candidate(block, SPIRBlock::MergeToSelectContinueForLoop))
  14744. {
  14745. flush_undeclared_variables(block);
  14746. if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectContinueForLoop))
  14747. {
  14748. if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
  14749. select_branch_to_false_block = true;
  14750. else
  14751. select_branch_to_true_block = true;
  14752. emitted_loop_header_variables = true;
  14753. force_complex_continue_block = true;
  14754. }
  14755. }
  14756. // This is the older loop behavior in glslang which branches to loop body directly from the loop header.
  14757. else if (block_is_loop_candidate(block, SPIRBlock::MergeToSelectForLoop))
  14758. {
  14759. flush_undeclared_variables(block);
  14760. if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectForLoop))
  14761. {
  14762. // The body of while, is actually just the true (or false) block, so always branch there unconditionally.
  14763. if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
  14764. select_branch_to_false_block = true;
  14765. else
  14766. select_branch_to_true_block = true;
  14767. emitted_loop_header_variables = true;
  14768. }
  14769. }
  14770. // This is the newer loop behavior in glslang which branches from Loop header directly to
  14771. // a new block, which in turn has a OpBranchSelection without a selection merge.
  14772. else if (block_is_loop_candidate(block, SPIRBlock::MergeToDirectForLoop))
  14773. {
  14774. flush_undeclared_variables(block);
  14775. if (attempt_emit_loop_header(block, SPIRBlock::MergeToDirectForLoop))
  14776. {
  14777. skip_direct_branch = true;
  14778. emitted_loop_header_variables = true;
  14779. }
  14780. }
  14781. else if (continue_type == SPIRBlock::DoWhileLoop)
  14782. {
  14783. flush_undeclared_variables(block);
  14784. emit_while_loop_initializers(block);
  14785. emitted_loop_header_variables = true;
  14786. // We have some temporaries where the loop header is the dominator.
  14787. // We risk a case where we have code like:
  14788. // for (;;) { create-temporary; break; } consume-temporary;
  14789. // so force-declare temporaries here.
  14790. emit_hoisted_temporaries(block.potential_declare_temporary);
  14791. statement("do");
  14792. begin_scope();
  14793. emit_block_instructions(block);
  14794. }
  14795. else if (block.merge == SPIRBlock::MergeLoop)
  14796. {
  14797. flush_undeclared_variables(block);
  14798. emit_while_loop_initializers(block);
  14799. emitted_loop_header_variables = true;
  14800. // We have a generic loop without any distinguishable pattern like for, while or do while.
  14801. get<SPIRBlock>(block.continue_block).complex_continue = true;
  14802. continue_type = SPIRBlock::ComplexLoop;
  14803. // We have some temporaries where the loop header is the dominator.
  14804. // We risk a case where we have code like:
  14805. // for (;;) { create-temporary; break; } consume-temporary;
  14806. // so force-declare temporaries here.
  14807. emit_hoisted_temporaries(block.potential_declare_temporary);
  14808. emit_block_hints(block);
  14809. statement("for (;;)");
  14810. begin_scope();
  14811. emit_block_instructions(block);
  14812. }
  14813. else
  14814. {
  14815. emit_block_instructions(block);
  14816. }
  14817. // If we didn't successfully emit a loop header and we had loop variable candidates, we have a problem
  14818. // as writes to said loop variables might have been masked out, we need a recompile.
  14819. if (!emitted_loop_header_variables && !block.loop_variables.empty())
  14820. {
  14821. force_recompile_guarantee_forward_progress();
  14822. for (auto var : block.loop_variables)
  14823. get<SPIRVariable>(var).loop_variable = false;
  14824. block.loop_variables.clear();
  14825. }
  14826. flush_undeclared_variables(block);
  14827. bool emit_next_block = true;
  14828. // Handle end of block.
  14829. switch (block.terminator)
  14830. {
  14831. case SPIRBlock::Direct:
  14832. // True when emitting complex continue block.
  14833. if (block.loop_dominator == block.next_block)
  14834. {
  14835. branch(block.self, block.next_block);
  14836. emit_next_block = false;
  14837. }
  14838. // True if MergeToDirectForLoop succeeded.
  14839. else if (skip_direct_branch)
  14840. emit_next_block = false;
  14841. else if (is_continue(block.next_block) || is_break(block.next_block) || is_conditional(block.next_block))
  14842. {
  14843. branch(block.self, block.next_block);
  14844. emit_next_block = false;
  14845. }
  14846. break;
  14847. case SPIRBlock::Select:
  14848. // True if MergeToSelectForLoop or MergeToSelectContinueForLoop succeeded.
  14849. if (select_branch_to_true_block)
  14850. {
  14851. if (force_complex_continue_block)
  14852. {
  14853. assert(block.true_block == block.continue_block);
  14854. // We're going to emit a continue block directly here, so make sure it's marked as complex.
  14855. auto &complex_continue = get<SPIRBlock>(block.continue_block).complex_continue;
  14856. bool old_complex = complex_continue;
  14857. complex_continue = true;
  14858. branch(block.self, block.true_block);
  14859. complex_continue = old_complex;
  14860. }
  14861. else
  14862. branch(block.self, block.true_block);
  14863. }
  14864. else if (select_branch_to_false_block)
  14865. {
  14866. if (force_complex_continue_block)
  14867. {
  14868. assert(block.false_block == block.continue_block);
  14869. // We're going to emit a continue block directly here, so make sure it's marked as complex.
  14870. auto &complex_continue = get<SPIRBlock>(block.continue_block).complex_continue;
  14871. bool old_complex = complex_continue;
  14872. complex_continue = true;
  14873. branch(block.self, block.false_block);
  14874. complex_continue = old_complex;
  14875. }
  14876. else
  14877. branch(block.self, block.false_block);
  14878. }
  14879. else
  14880. branch(block.self, block.condition, block.true_block, block.false_block);
  14881. break;
  14882. case SPIRBlock::MultiSelect:
  14883. {
  14884. auto &type = expression_type(block.condition);
  14885. bool unsigned_case = type.basetype == SPIRType::UInt || type.basetype == SPIRType::UShort ||
  14886. type.basetype == SPIRType::UByte || type.basetype == SPIRType::UInt64;
  14887. if (block.merge == SPIRBlock::MergeNone)
  14888. SPIRV_CROSS_THROW("Switch statement is not structured");
  14889. if (!backend.support_64bit_switch && (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64))
  14890. {
  14891. // SPIR-V spec suggests this is allowed, but we cannot support it in higher level languages.
  14892. SPIRV_CROSS_THROW("Cannot use 64-bit switch selectors.");
  14893. }
  14894. const char *label_suffix = "";
  14895. if (type.basetype == SPIRType::UInt && backend.uint32_t_literal_suffix)
  14896. label_suffix = "u";
  14897. else if (type.basetype == SPIRType::Int64 && backend.support_64bit_switch)
  14898. label_suffix = "l";
  14899. else if (type.basetype == SPIRType::UInt64 && backend.support_64bit_switch)
  14900. label_suffix = "ul";
  14901. else if (type.basetype == SPIRType::UShort)
  14902. label_suffix = backend.uint16_t_literal_suffix;
  14903. else if (type.basetype == SPIRType::Short)
  14904. label_suffix = backend.int16_t_literal_suffix;
  14905. current_emitting_switch_stack.push_back(&block);
  14906. if (block.need_ladder_break)
  14907. statement("bool _", block.self, "_ladder_break = false;");
  14908. // Find all unique case constructs.
  14909. unordered_map<uint32_t, SmallVector<uint64_t>> case_constructs;
  14910. SmallVector<uint32_t> block_declaration_order;
  14911. SmallVector<uint64_t> literals_to_merge;
  14912. // If a switch case branches to the default block for some reason, we can just remove that literal from consideration
  14913. // and let the default: block handle it.
  14914. // 2.11 in SPIR-V spec states that for fall-through cases, there is a very strict declaration order which we can take advantage of here.
  14915. // We only need to consider possible fallthrough if order[i] branches to order[i + 1].
  14916. auto &cases = get_case_list(block);
  14917. for (auto &c : cases)
  14918. {
  14919. if (c.block != block.next_block && c.block != block.default_block)
  14920. {
  14921. if (!case_constructs.count(c.block))
  14922. block_declaration_order.push_back(c.block);
  14923. case_constructs[c.block].push_back(c.value);
  14924. }
  14925. else if (c.block == block.next_block && block.default_block != block.next_block)
  14926. {
  14927. // We might have to flush phi inside specific case labels.
  14928. // If we can piggyback on default:, do so instead.
  14929. literals_to_merge.push_back(c.value);
  14930. }
  14931. }
  14932. // Empty literal array -> default.
  14933. if (block.default_block != block.next_block)
  14934. {
  14935. auto &default_block = get<SPIRBlock>(block.default_block);
  14936. // We need to slide in the default block somewhere in this chain
  14937. // if there are fall-through scenarios since the default is declared separately in OpSwitch.
  14938. // Only consider trivial fall-through cases here.
  14939. size_t num_blocks = block_declaration_order.size();
  14940. bool injected_block = false;
  14941. for (size_t i = 0; i < num_blocks; i++)
  14942. {
  14943. auto &case_block = get<SPIRBlock>(block_declaration_order[i]);
  14944. if (execution_is_direct_branch(case_block, default_block))
  14945. {
  14946. // Fallthrough to default block, we must inject the default block here.
  14947. block_declaration_order.insert(begin(block_declaration_order) + i + 1, block.default_block);
  14948. injected_block = true;
  14949. break;
  14950. }
  14951. else if (execution_is_direct_branch(default_block, case_block))
  14952. {
  14953. // Default case is falling through to another case label, we must inject the default block here.
  14954. block_declaration_order.insert(begin(block_declaration_order) + i, block.default_block);
  14955. injected_block = true;
  14956. break;
  14957. }
  14958. }
  14959. // Order does not matter.
  14960. if (!injected_block)
  14961. block_declaration_order.push_back(block.default_block);
  14962. else if (is_legacy_es())
  14963. SPIRV_CROSS_THROW("Default case label fallthrough to other case label is not supported in ESSL 1.0.");
  14964. case_constructs[block.default_block] = {};
  14965. }
  14966. size_t num_blocks = block_declaration_order.size();
  14967. const auto to_case_label = [](uint64_t literal, uint32_t width, bool is_unsigned_case) -> string
  14968. {
  14969. if (is_unsigned_case)
  14970. return convert_to_string(literal);
  14971. // For smaller cases, the literals are compiled as 32 bit wide
  14972. // literals so we don't need to care for all sizes specifically.
  14973. if (width <= 32)
  14974. {
  14975. return convert_to_string(int64_t(int32_t(literal)));
  14976. }
  14977. return convert_to_string(int64_t(literal));
  14978. };
  14979. const auto to_legacy_case_label = [&](uint32_t condition, const SmallVector<uint64_t> &labels,
  14980. const char *suffix) -> string {
  14981. string ret;
  14982. size_t count = labels.size();
  14983. for (size_t i = 0; i < count; i++)
  14984. {
  14985. if (i)
  14986. ret += " || ";
  14987. ret += join(count > 1 ? "(" : "", to_enclosed_expression(condition), " == ", labels[i], suffix,
  14988. count > 1 ? ")" : "");
  14989. }
  14990. return ret;
  14991. };
  14992. // We need to deal with a complex scenario for OpPhi. If we have case-fallthrough and Phi in the picture,
  14993. // we need to flush phi nodes outside the switch block in a branch,
  14994. // and skip any Phi handling inside the case label to make fall-through work as expected.
  14995. // This kind of code-gen is super awkward and it's a last resort. Normally we would want to handle this
  14996. // inside the case label if at all possible.
  14997. for (size_t i = 1; backend.support_case_fallthrough && i < num_blocks; i++)
  14998. {
  14999. if (flush_phi_required(block.self, block_declaration_order[i]) &&
  15000. flush_phi_required(block_declaration_order[i - 1], block_declaration_order[i]))
  15001. {
  15002. uint32_t target_block = block_declaration_order[i];
  15003. // Make sure we flush Phi, it might have been marked to be ignored earlier.
  15004. get<SPIRBlock>(target_block).ignore_phi_from_block = 0;
  15005. auto &literals = case_constructs[target_block];
  15006. if (literals.empty())
  15007. {
  15008. // Oh boy, gotta make a complete negative test instead! o.o
  15009. // Find all possible literals that would *not* make us enter the default block.
  15010. // If none of those literals match, we flush Phi ...
  15011. SmallVector<string> conditions;
  15012. for (size_t j = 0; j < num_blocks; j++)
  15013. {
  15014. auto &negative_literals = case_constructs[block_declaration_order[j]];
  15015. for (auto &case_label : negative_literals)
  15016. conditions.push_back(join(to_enclosed_expression(block.condition),
  15017. " != ", to_case_label(case_label, type.width, unsigned_case)));
  15018. }
  15019. statement("if (", merge(conditions, " && "), ")");
  15020. begin_scope();
  15021. flush_phi(block.self, target_block);
  15022. end_scope();
  15023. }
  15024. else
  15025. {
  15026. SmallVector<string> conditions;
  15027. conditions.reserve(literals.size());
  15028. for (auto &case_label : literals)
  15029. conditions.push_back(join(to_enclosed_expression(block.condition),
  15030. " == ", to_case_label(case_label, type.width, unsigned_case)));
  15031. statement("if (", merge(conditions, " || "), ")");
  15032. begin_scope();
  15033. flush_phi(block.self, target_block);
  15034. end_scope();
  15035. }
  15036. // Mark the block so that we don't flush Phi from header to case label.
  15037. get<SPIRBlock>(target_block).ignore_phi_from_block = block.self;
  15038. }
  15039. }
  15040. // If there is only one default block, and no cases, this is a case where SPIRV-opt decided to emulate
  15041. // non-structured exits with the help of a switch block.
  15042. // This is buggy on FXC, so just emit the logical equivalent of a do { } while(false), which is more idiomatic.
  15043. bool block_like_switch = cases.empty();
  15044. // If this is true, the switch is completely meaningless, and we should just avoid it.
  15045. bool collapsed_switch = block_like_switch && block.default_block == block.next_block;
  15046. if (!collapsed_switch)
  15047. {
  15048. if (block_like_switch || is_legacy_es())
  15049. {
  15050. // ESSL 1.0 is not guaranteed to support do/while.
  15051. if (is_legacy_es())
  15052. {
  15053. uint32_t counter = statement_count;
  15054. statement("for (int spvDummy", counter, " = 0; spvDummy", counter, " < 1; spvDummy", counter,
  15055. "++)");
  15056. }
  15057. else
  15058. statement("do");
  15059. }
  15060. else
  15061. {
  15062. emit_block_hints(block);
  15063. statement("switch (", to_unpacked_expression(block.condition), ")");
  15064. }
  15065. begin_scope();
  15066. }
  15067. for (size_t i = 0; i < num_blocks; i++)
  15068. {
  15069. uint32_t target_block = block_declaration_order[i];
  15070. auto &literals = case_constructs[target_block];
  15071. if (literals.empty())
  15072. {
  15073. // Default case.
  15074. if (!block_like_switch)
  15075. {
  15076. if (is_legacy_es())
  15077. statement("else");
  15078. else
  15079. statement("default:");
  15080. }
  15081. }
  15082. else
  15083. {
  15084. if (is_legacy_es())
  15085. {
  15086. statement((i ? "else " : ""), "if (", to_legacy_case_label(block.condition, literals, label_suffix),
  15087. ")");
  15088. }
  15089. else
  15090. {
  15091. for (auto &case_literal : literals)
  15092. {
  15093. // The case label value must be sign-extended properly in SPIR-V, so we can assume 32-bit values here.
  15094. statement("case ", to_case_label(case_literal, type.width, unsigned_case), label_suffix, ":");
  15095. }
  15096. }
  15097. }
  15098. auto &case_block = get<SPIRBlock>(target_block);
  15099. if (backend.support_case_fallthrough && i + 1 < num_blocks &&
  15100. execution_is_direct_branch(case_block, get<SPIRBlock>(block_declaration_order[i + 1])))
  15101. {
  15102. // We will fall through here, so just terminate the block chain early.
  15103. // We still need to deal with Phi potentially.
  15104. // No need for a stack-like thing here since we only do fall-through when there is a
  15105. // single trivial branch to fall-through target..
  15106. current_emitting_switch_fallthrough = true;
  15107. }
  15108. else
  15109. current_emitting_switch_fallthrough = false;
  15110. if (!block_like_switch)
  15111. begin_scope();
  15112. branch(block.self, target_block);
  15113. if (!block_like_switch)
  15114. end_scope();
  15115. current_emitting_switch_fallthrough = false;
  15116. }
  15117. // Might still have to flush phi variables if we branch from loop header directly to merge target.
  15118. // This is supposed to emit all cases where we branch from header to merge block directly.
  15119. // There are two main scenarios where cannot rely on default fallthrough.
  15120. // - There is an explicit default: label already.
  15121. // In this case, literals_to_merge need to form their own "default" case, so that we avoid executing that block.
  15122. // - Header -> Merge requires flushing PHI. In this case, we need to collect all cases and flush PHI there.
  15123. bool header_merge_requires_phi = flush_phi_required(block.self, block.next_block);
  15124. bool need_fallthrough_block = block.default_block == block.next_block || !literals_to_merge.empty();
  15125. if (!collapsed_switch && ((header_merge_requires_phi && need_fallthrough_block) || !literals_to_merge.empty()))
  15126. {
  15127. for (auto &case_literal : literals_to_merge)
  15128. statement("case ", to_case_label(case_literal, type.width, unsigned_case), label_suffix, ":");
  15129. if (block.default_block == block.next_block)
  15130. {
  15131. if (is_legacy_es())
  15132. statement("else");
  15133. else
  15134. statement("default:");
  15135. }
  15136. begin_scope();
  15137. flush_phi(block.self, block.next_block);
  15138. statement("break;");
  15139. end_scope();
  15140. }
  15141. if (!collapsed_switch)
  15142. {
  15143. if (block_like_switch && !is_legacy_es())
  15144. end_scope_decl("while(false)");
  15145. else
  15146. end_scope();
  15147. }
  15148. else
  15149. flush_phi(block.self, block.next_block);
  15150. if (block.need_ladder_break)
  15151. {
  15152. statement("if (_", block.self, "_ladder_break)");
  15153. begin_scope();
  15154. statement("break;");
  15155. end_scope();
  15156. }
  15157. current_emitting_switch_stack.pop_back();
  15158. break;
  15159. }
  15160. case SPIRBlock::Return:
  15161. {
  15162. for (auto &line : current_function->fixup_hooks_out)
  15163. line();
  15164. if (processing_entry_point)
  15165. emit_fixup();
  15166. auto &cfg = get_cfg_for_current_function();
  15167. if (block.return_value)
  15168. {
  15169. auto &type = expression_type(block.return_value);
  15170. if (!type.array.empty() && !backend.can_return_array)
  15171. {
  15172. // If we cannot return arrays, we will have a special out argument we can write to instead.
  15173. // The backend is responsible for setting this up, and redirection the return values as appropriate.
  15174. if (ir.ids[block.return_value].get_type() != TypeUndef)
  15175. {
  15176. emit_array_copy("spvReturnValue", 0, block.return_value, StorageClassFunction,
  15177. get_expression_effective_storage_class(block.return_value));
  15178. }
  15179. if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) ||
  15180. block.loop_dominator != BlockID(SPIRBlock::NoDominator))
  15181. {
  15182. statement("return;");
  15183. }
  15184. }
  15185. else
  15186. {
  15187. // OpReturnValue can return Undef, so don't emit anything for this case.
  15188. if (ir.ids[block.return_value].get_type() != TypeUndef)
  15189. statement("return ", to_unpacked_expression(block.return_value), ";");
  15190. }
  15191. }
  15192. else if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) ||
  15193. block.loop_dominator != BlockID(SPIRBlock::NoDominator))
  15194. {
  15195. // If this block is the very final block and not called from control flow,
  15196. // we do not need an explicit return which looks out of place. Just end the function here.
  15197. // In the very weird case of for(;;) { return; } executing return is unconditional,
  15198. // but we actually need a return here ...
  15199. statement("return;");
  15200. }
  15201. break;
  15202. }
  15203. // If the Kill is terminating a block with a (probably synthetic) return value, emit a return value statement.
  15204. case SPIRBlock::Kill:
  15205. statement(backend.discard_literal, ";");
  15206. if (block.return_value)
  15207. statement("return ", to_unpacked_expression(block.return_value), ";");
  15208. break;
  15209. case SPIRBlock::Unreachable:
  15210. {
  15211. // Avoid emitting false fallthrough, which can happen for
  15212. // if (cond) break; else discard; inside a case label.
  15213. // Discard is not always implementable as a terminator.
  15214. auto &cfg = get_cfg_for_current_function();
  15215. bool inner_dominator_is_switch = false;
  15216. ID id = block.self;
  15217. while (id)
  15218. {
  15219. auto &iter_block = get<SPIRBlock>(id);
  15220. if (iter_block.terminator == SPIRBlock::MultiSelect ||
  15221. iter_block.merge == SPIRBlock::MergeLoop)
  15222. {
  15223. ID next_block = iter_block.merge == SPIRBlock::MergeLoop ?
  15224. iter_block.merge_block : iter_block.next_block;
  15225. bool outside_construct = next_block && cfg.find_common_dominator(next_block, block.self) == next_block;
  15226. if (!outside_construct)
  15227. {
  15228. inner_dominator_is_switch = iter_block.terminator == SPIRBlock::MultiSelect;
  15229. break;
  15230. }
  15231. }
  15232. if (cfg.get_preceding_edges(id).empty())
  15233. break;
  15234. id = cfg.get_immediate_dominator(id);
  15235. }
  15236. if (inner_dominator_is_switch)
  15237. statement("break; // unreachable workaround");
  15238. emit_next_block = false;
  15239. break;
  15240. }
  15241. case SPIRBlock::IgnoreIntersection:
  15242. statement("ignoreIntersectionEXT;");
  15243. break;
  15244. case SPIRBlock::TerminateRay:
  15245. statement("terminateRayEXT;");
  15246. break;
  15247. case SPIRBlock::EmitMeshTasks:
  15248. emit_mesh_tasks(block);
  15249. break;
  15250. default:
  15251. SPIRV_CROSS_THROW("Unimplemented block terminator.");
  15252. }
  15253. if (block.next_block && emit_next_block)
  15254. {
  15255. // If we hit this case, we're dealing with an unconditional branch, which means we will output
  15256. // that block after this. If we had selection merge, we already flushed phi variables.
  15257. if (block.merge != SPIRBlock::MergeSelection)
  15258. {
  15259. flush_phi(block.self, block.next_block);
  15260. // For a direct branch, need to remember to invalidate expressions in the next linear block instead.
  15261. get<SPIRBlock>(block.next_block).invalidate_expressions = block.invalidate_expressions;
  15262. }
  15263. // For switch fallthrough cases, we terminate the chain here, but we still need to handle Phi.
  15264. if (!current_emitting_switch_fallthrough)
  15265. {
  15266. // For merge selects we might have ignored the fact that a merge target
  15267. // could have been a break; or continue;
  15268. // We will need to deal with it here.
  15269. if (is_loop_break(block.next_block))
  15270. {
  15271. // Cannot check for just break, because switch statements will also use break.
  15272. assert(block.merge == SPIRBlock::MergeSelection);
  15273. statement("break;");
  15274. }
  15275. else if (is_continue(block.next_block))
  15276. {
  15277. assert(block.merge == SPIRBlock::MergeSelection);
  15278. branch_to_continue(block.self, block.next_block);
  15279. }
  15280. else if (BlockID(block.self) != block.next_block)
  15281. emit_block_chain(get<SPIRBlock>(block.next_block));
  15282. }
  15283. }
  15284. if (block.merge == SPIRBlock::MergeLoop)
  15285. {
  15286. if (continue_type == SPIRBlock::DoWhileLoop)
  15287. {
  15288. // Make sure that we run the continue block to get the expressions set, but this
  15289. // should become an empty string.
  15290. // We have no fallbacks if we cannot forward everything to temporaries ...
  15291. const auto &continue_block = get<SPIRBlock>(block.continue_block);
  15292. bool positive_test = execution_is_noop(get<SPIRBlock>(continue_block.true_block),
  15293. get<SPIRBlock>(continue_block.loop_dominator));
  15294. uint32_t current_count = statement_count;
  15295. auto statements = emit_continue_block(block.continue_block, positive_test, !positive_test);
  15296. if (statement_count != current_count)
  15297. {
  15298. // The DoWhile block has side effects, force ComplexLoop pattern next pass.
  15299. get<SPIRBlock>(block.continue_block).complex_continue = true;
  15300. force_recompile();
  15301. }
  15302. // Might have to invert the do-while test here.
  15303. auto condition = to_expression(continue_block.condition);
  15304. if (!positive_test)
  15305. condition = join("!", enclose_expression(condition));
  15306. end_scope_decl(join("while (", condition, ")"));
  15307. }
  15308. else
  15309. end_scope();
  15310. loop_level_saver.release();
  15311. // We cannot break out of two loops at once, so don't check for break; here.
  15312. // Using block.self as the "from" block isn't quite right, but it has the same scope
  15313. // and dominance structure, so it's fine.
  15314. if (is_continue(block.merge_block))
  15315. branch_to_continue(block.self, block.merge_block);
  15316. else
  15317. emit_block_chain(get<SPIRBlock>(block.merge_block));
  15318. }
  15319. // Forget about control dependent expressions now.
  15320. block.invalidate_expressions.clear();
  15321. // After we return, we must be out of scope, so if we somehow have to re-emit this function,
  15322. // re-declare variables if necessary.
  15323. assert(rearm_dominated_variables.size() == block.dominated_variables.size());
  15324. for (size_t i = 0; i < block.dominated_variables.size(); i++)
  15325. {
  15326. uint32_t var = block.dominated_variables[i];
  15327. get<SPIRVariable>(var).deferred_declaration = rearm_dominated_variables[i];
  15328. }
  15329. // Just like for deferred declaration, we need to forget about loop variable enable
  15330. // if our block chain is reinstantiated later.
  15331. for (auto &var_id : block.loop_variables)
  15332. get<SPIRVariable>(var_id).loop_variable_enable = false;
  15333. }
  15334. void CompilerGLSL::begin_scope()
  15335. {
  15336. statement("{");
  15337. indent++;
  15338. }
  15339. void CompilerGLSL::end_scope()
  15340. {
  15341. if (!indent)
  15342. SPIRV_CROSS_THROW("Popping empty indent stack.");
  15343. indent--;
  15344. statement("}");
  15345. }
  15346. void CompilerGLSL::end_scope(const string &trailer)
  15347. {
  15348. if (!indent)
  15349. SPIRV_CROSS_THROW("Popping empty indent stack.");
  15350. indent--;
  15351. statement("}", trailer);
  15352. }
  15353. void CompilerGLSL::end_scope_decl()
  15354. {
  15355. if (!indent)
  15356. SPIRV_CROSS_THROW("Popping empty indent stack.");
  15357. indent--;
  15358. statement("};");
  15359. }
  15360. void CompilerGLSL::end_scope_decl(const string &decl)
  15361. {
  15362. if (!indent)
  15363. SPIRV_CROSS_THROW("Popping empty indent stack.");
  15364. indent--;
  15365. statement("} ", decl, ";");
  15366. }
  15367. void CompilerGLSL::check_function_call_constraints(const uint32_t *args, uint32_t length)
  15368. {
  15369. // If our variable is remapped, and we rely on type-remapping information as
  15370. // well, then we cannot pass the variable as a function parameter.
  15371. // Fixing this is non-trivial without stamping out variants of the same function,
  15372. // so for now warn about this and suggest workarounds instead.
  15373. for (uint32_t i = 0; i < length; i++)
  15374. {
  15375. auto *var = maybe_get<SPIRVariable>(args[i]);
  15376. if (!var || !var->remapped_variable)
  15377. continue;
  15378. auto &type = get<SPIRType>(var->basetype);
  15379. if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData)
  15380. {
  15381. SPIRV_CROSS_THROW("Tried passing a remapped subpassInput variable to a function. "
  15382. "This will not work correctly because type-remapping information is lost. "
  15383. "To workaround, please consider not passing the subpass input as a function parameter, "
  15384. "or use in/out variables instead which do not need type remapping information.");
  15385. }
  15386. }
  15387. }
  15388. const Instruction *CompilerGLSL::get_next_instruction_in_block(const Instruction &instr)
  15389. {
  15390. // FIXME: This is kind of hacky. There should be a cleaner way.
  15391. auto offset = uint32_t(&instr - current_emitting_block->ops.data());
  15392. if ((offset + 1) < current_emitting_block->ops.size())
  15393. return &current_emitting_block->ops[offset + 1];
  15394. else
  15395. return nullptr;
  15396. }
  15397. uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics)
  15398. {
  15399. return semantics & (MemorySemanticsAtomicCounterMemoryMask | MemorySemanticsImageMemoryMask |
  15400. MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask |
  15401. MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask);
  15402. }
  15403. bool CompilerGLSL::emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t rhs_id, StorageClass, StorageClass)
  15404. {
  15405. string lhs;
  15406. if (expr)
  15407. lhs = expr;
  15408. else
  15409. lhs = to_expression(lhs_id);
  15410. statement(lhs, " = ", to_expression(rhs_id), ";");
  15411. return true;
  15412. }
  15413. bool CompilerGLSL::unroll_array_to_complex_store(uint32_t target_id, uint32_t source_id)
  15414. {
  15415. if (!backend.force_gl_in_out_block)
  15416. return false;
  15417. // This path is only relevant for GL backends.
  15418. auto *var = maybe_get<SPIRVariable>(target_id);
  15419. if (!var || var->storage != StorageClassOutput)
  15420. return false;
  15421. if (!is_builtin_variable(*var) || BuiltIn(get_decoration(var->self, DecorationBuiltIn)) != BuiltInSampleMask)
  15422. return false;
  15423. auto &type = expression_type(source_id);
  15424. string array_expr;
  15425. if (type.array_size_literal.back())
  15426. {
  15427. array_expr = convert_to_string(type.array.back());
  15428. if (type.array.back() == 0)
  15429. SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array.");
  15430. }
  15431. else
  15432. array_expr = to_expression(type.array.back());
  15433. SPIRType target_type;
  15434. target_type.basetype = SPIRType::Int;
  15435. statement("for (int i = 0; i < int(", array_expr, "); i++)");
  15436. begin_scope();
  15437. statement(to_expression(target_id), "[i] = ",
  15438. bitcast_expression(target_type, type.basetype, join(to_expression(source_id), "[i]")),
  15439. ";");
  15440. end_scope();
  15441. return true;
  15442. }
  15443. void CompilerGLSL::unroll_array_from_complex_load(uint32_t target_id, uint32_t source_id, std::string &expr)
  15444. {
  15445. if (!backend.force_gl_in_out_block)
  15446. return;
  15447. // This path is only relevant for GL backends.
  15448. auto *var = maybe_get<SPIRVariable>(source_id);
  15449. if (!var)
  15450. return;
  15451. if (var->storage != StorageClassInput && var->storage != StorageClassOutput)
  15452. return;
  15453. auto &type = get_variable_data_type(*var);
  15454. if (type.array.empty())
  15455. return;
  15456. auto builtin = BuiltIn(get_decoration(var->self, DecorationBuiltIn));
  15457. bool is_builtin = is_builtin_variable(*var) &&
  15458. (builtin == BuiltInPointSize ||
  15459. builtin == BuiltInPosition ||
  15460. builtin == BuiltInSampleMask);
  15461. bool is_tess = is_tessellation_shader();
  15462. bool is_patch = has_decoration(var->self, DecorationPatch);
  15463. bool is_sample_mask = is_builtin && builtin == BuiltInSampleMask;
  15464. // Tessellation input arrays are special in that they are unsized, so we cannot directly copy from it.
  15465. // We must unroll the array load.
  15466. // For builtins, we couldn't catch this case normally,
  15467. // because this is resolved in the OpAccessChain in most cases.
  15468. // If we load the entire array, we have no choice but to unroll here.
  15469. if (!is_patch && (is_builtin || is_tess))
  15470. {
  15471. auto new_expr = join("_", target_id, "_unrolled");
  15472. statement(variable_decl(type, new_expr, target_id), ";");
  15473. string array_expr;
  15474. if (type.array_size_literal.back())
  15475. {
  15476. array_expr = convert_to_string(type.array.back());
  15477. if (type.array.back() == 0)
  15478. SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array.");
  15479. }
  15480. else
  15481. array_expr = to_expression(type.array.back());
  15482. // The array size might be a specialization constant, so use a for-loop instead.
  15483. statement("for (int i = 0; i < int(", array_expr, "); i++)");
  15484. begin_scope();
  15485. if (is_builtin && !is_sample_mask)
  15486. statement(new_expr, "[i] = gl_in[i].", expr, ";");
  15487. else if (is_sample_mask)
  15488. {
  15489. SPIRType target_type;
  15490. target_type.basetype = SPIRType::Int;
  15491. statement(new_expr, "[i] = ", bitcast_expression(target_type, type.basetype, join(expr, "[i]")), ";");
  15492. }
  15493. else
  15494. statement(new_expr, "[i] = ", expr, "[i];");
  15495. end_scope();
  15496. expr = std::move(new_expr);
  15497. }
  15498. }
  15499. void CompilerGLSL::cast_from_variable_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type)
  15500. {
  15501. // We will handle array cases elsewhere.
  15502. if (!expr_type.array.empty())
  15503. return;
  15504. auto *var = maybe_get_backing_variable(source_id);
  15505. if (var)
  15506. source_id = var->self;
  15507. // Only interested in standalone builtin variables.
  15508. if (!has_decoration(source_id, DecorationBuiltIn))
  15509. {
  15510. // Except for int attributes in legacy GLSL, which are cast from float.
  15511. if (is_legacy() && expr_type.basetype == SPIRType::Int && var && var->storage == StorageClassInput)
  15512. expr = join(type_to_glsl(expr_type), "(", expr, ")");
  15513. return;
  15514. }
  15515. auto builtin = static_cast<BuiltIn>(get_decoration(source_id, DecorationBuiltIn));
  15516. auto expected_type = expr_type.basetype;
  15517. // TODO: Fill in for more builtins.
  15518. switch (builtin)
  15519. {
  15520. case BuiltInLayer:
  15521. case BuiltInPrimitiveId:
  15522. case BuiltInViewportIndex:
  15523. case BuiltInInstanceId:
  15524. case BuiltInInstanceIndex:
  15525. case BuiltInVertexId:
  15526. case BuiltInVertexIndex:
  15527. case BuiltInSampleId:
  15528. case BuiltInBaseVertex:
  15529. case BuiltInBaseInstance:
  15530. case BuiltInDrawIndex:
  15531. case BuiltInFragStencilRefEXT:
  15532. case BuiltInInstanceCustomIndexNV:
  15533. case BuiltInSampleMask:
  15534. case BuiltInPrimitiveShadingRateKHR:
  15535. case BuiltInShadingRateKHR:
  15536. expected_type = SPIRType::Int;
  15537. break;
  15538. case BuiltInGlobalInvocationId:
  15539. case BuiltInLocalInvocationId:
  15540. case BuiltInWorkgroupId:
  15541. case BuiltInLocalInvocationIndex:
  15542. case BuiltInWorkgroupSize:
  15543. case BuiltInNumWorkgroups:
  15544. case BuiltInIncomingRayFlagsNV:
  15545. case BuiltInLaunchIdNV:
  15546. case BuiltInLaunchSizeNV:
  15547. case BuiltInPrimitiveTriangleIndicesEXT:
  15548. case BuiltInPrimitiveLineIndicesEXT:
  15549. case BuiltInPrimitivePointIndicesEXT:
  15550. expected_type = SPIRType::UInt;
  15551. break;
  15552. default:
  15553. break;
  15554. }
  15555. if (expected_type != expr_type.basetype)
  15556. expr = bitcast_expression(expr_type, expected_type, expr);
  15557. }
  15558. void CompilerGLSL::cast_to_variable_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type)
  15559. {
  15560. auto *var = maybe_get_backing_variable(target_id);
  15561. if (var)
  15562. target_id = var->self;
  15563. // Only interested in standalone builtin variables.
  15564. if (!has_decoration(target_id, DecorationBuiltIn))
  15565. return;
  15566. auto builtin = static_cast<BuiltIn>(get_decoration(target_id, DecorationBuiltIn));
  15567. auto expected_type = expr_type.basetype;
  15568. // TODO: Fill in for more builtins.
  15569. switch (builtin)
  15570. {
  15571. case BuiltInLayer:
  15572. case BuiltInPrimitiveId:
  15573. case BuiltInViewportIndex:
  15574. case BuiltInFragStencilRefEXT:
  15575. case BuiltInSampleMask:
  15576. case BuiltInPrimitiveShadingRateKHR:
  15577. case BuiltInShadingRateKHR:
  15578. expected_type = SPIRType::Int;
  15579. break;
  15580. default:
  15581. break;
  15582. }
  15583. if (expected_type != expr_type.basetype)
  15584. {
  15585. auto type = expr_type;
  15586. type.basetype = expected_type;
  15587. expr = bitcast_expression(type, expr_type.basetype, expr);
  15588. }
  15589. }
  15590. void CompilerGLSL::convert_non_uniform_expression(string &expr, uint32_t ptr_id)
  15591. {
  15592. if (*backend.nonuniform_qualifier == '\0')
  15593. return;
  15594. auto *var = maybe_get_backing_variable(ptr_id);
  15595. if (!var)
  15596. return;
  15597. if (var->storage != StorageClassUniformConstant &&
  15598. var->storage != StorageClassStorageBuffer &&
  15599. var->storage != StorageClassUniform)
  15600. return;
  15601. auto &backing_type = get<SPIRType>(var->basetype);
  15602. if (backing_type.array.empty())
  15603. return;
  15604. // If we get here, we know we're accessing an arrayed resource which
  15605. // might require nonuniform qualifier.
  15606. auto start_array_index = expr.find_first_of('[');
  15607. if (start_array_index == string::npos)
  15608. return;
  15609. // We've opened a bracket, track expressions until we can close the bracket.
  15610. // This must be our resource index.
  15611. size_t end_array_index = string::npos;
  15612. unsigned bracket_count = 1;
  15613. for (size_t index = start_array_index + 1; index < expr.size(); index++)
  15614. {
  15615. if (expr[index] == ']')
  15616. {
  15617. if (--bracket_count == 0)
  15618. {
  15619. end_array_index = index;
  15620. break;
  15621. }
  15622. }
  15623. else if (expr[index] == '[')
  15624. bracket_count++;
  15625. }
  15626. assert(bracket_count == 0);
  15627. // Doesn't really make sense to declare a non-arrayed image with nonuniformEXT, but there's
  15628. // nothing we can do here to express that.
  15629. if (start_array_index == string::npos || end_array_index == string::npos || end_array_index < start_array_index)
  15630. return;
  15631. start_array_index++;
  15632. expr = join(expr.substr(0, start_array_index), backend.nonuniform_qualifier, "(",
  15633. expr.substr(start_array_index, end_array_index - start_array_index), ")",
  15634. expr.substr(end_array_index, string::npos));
  15635. }
  15636. void CompilerGLSL::emit_block_hints(const SPIRBlock &block)
  15637. {
  15638. if ((options.es && options.version < 310) || (!options.es && options.version < 140))
  15639. return;
  15640. switch (block.hint)
  15641. {
  15642. case SPIRBlock::HintFlatten:
  15643. require_extension_internal("GL_EXT_control_flow_attributes");
  15644. statement("SPIRV_CROSS_FLATTEN");
  15645. break;
  15646. case SPIRBlock::HintDontFlatten:
  15647. require_extension_internal("GL_EXT_control_flow_attributes");
  15648. statement("SPIRV_CROSS_BRANCH");
  15649. break;
  15650. case SPIRBlock::HintUnroll:
  15651. require_extension_internal("GL_EXT_control_flow_attributes");
  15652. statement("SPIRV_CROSS_UNROLL");
  15653. break;
  15654. case SPIRBlock::HintDontUnroll:
  15655. require_extension_internal("GL_EXT_control_flow_attributes");
  15656. statement("SPIRV_CROSS_LOOP");
  15657. break;
  15658. default:
  15659. break;
  15660. }
  15661. }
  15662. void CompilerGLSL::preserve_alias_on_reset(uint32_t id)
  15663. {
  15664. preserved_aliases[id] = get_name(id);
  15665. }
  15666. void CompilerGLSL::reset_name_caches()
  15667. {
  15668. for (auto &preserved : preserved_aliases)
  15669. set_name(preserved.first, preserved.second);
  15670. preserved_aliases.clear();
  15671. resource_names.clear();
  15672. block_input_names.clear();
  15673. block_output_names.clear();
  15674. block_ubo_names.clear();
  15675. block_ssbo_names.clear();
  15676. block_names.clear();
  15677. function_overloads.clear();
  15678. }
  15679. void CompilerGLSL::fixup_anonymous_struct_names(std::unordered_set<uint32_t> &visited, const SPIRType &type)
  15680. {
  15681. if (visited.count(type.self))
  15682. return;
  15683. visited.insert(type.self);
  15684. for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
  15685. {
  15686. auto &mbr_type = get<SPIRType>(type.member_types[i]);
  15687. if (mbr_type.basetype == SPIRType::Struct)
  15688. {
  15689. // If there are multiple aliases, the output might be somewhat unpredictable,
  15690. // but the only real alternative in that case is to do nothing, which isn't any better.
  15691. // This check should be fine in practice.
  15692. if (get_name(mbr_type.self).empty() && !get_member_name(type.self, i).empty())
  15693. {
  15694. auto anon_name = join("anon_", get_member_name(type.self, i));
  15695. ParsedIR::sanitize_underscores(anon_name);
  15696. set_name(mbr_type.self, anon_name);
  15697. }
  15698. fixup_anonymous_struct_names(visited, mbr_type);
  15699. }
  15700. }
  15701. }
  15702. void CompilerGLSL::fixup_anonymous_struct_names()
  15703. {
  15704. // HLSL codegen can often end up emitting anonymous structs inside blocks, which
  15705. // breaks GL linking since all names must match ...
  15706. // Try to emit sensible code, so attempt to find such structs and emit anon_$member.
  15707. // Breaks exponential explosion with weird type trees.
  15708. std::unordered_set<uint32_t> visited;
  15709. ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) {
  15710. if (type.basetype == SPIRType::Struct &&
  15711. (has_decoration(type.self, DecorationBlock) ||
  15712. has_decoration(type.self, DecorationBufferBlock)))
  15713. {
  15714. fixup_anonymous_struct_names(visited, type);
  15715. }
  15716. });
  15717. }
  15718. void CompilerGLSL::fixup_type_alias()
  15719. {
  15720. // Due to how some backends work, the "master" type of type_alias must be a block-like type if it exists.
  15721. ir.for_each_typed_id<SPIRType>([&](uint32_t self, SPIRType &type) {
  15722. if (!type.type_alias)
  15723. return;
  15724. if (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock))
  15725. {
  15726. // Top-level block types should never alias anything else.
  15727. type.type_alias = 0;
  15728. }
  15729. else if (type_is_block_like(type) && type.self == ID(self))
  15730. {
  15731. // A block-like type is any type which contains Offset decoration, but not top-level blocks,
  15732. // i.e. blocks which are placed inside buffers.
  15733. // Become the master.
  15734. ir.for_each_typed_id<SPIRType>([&](uint32_t other_id, SPIRType &other_type) {
  15735. if (other_id == self)
  15736. return;
  15737. if (other_type.type_alias == type.type_alias)
  15738. other_type.type_alias = self;
  15739. });
  15740. this->get<SPIRType>(type.type_alias).type_alias = self;
  15741. type.type_alias = 0;
  15742. }
  15743. });
  15744. }
  15745. void CompilerGLSL::reorder_type_alias()
  15746. {
  15747. // Reorder declaration of types so that the master of the type alias is always emitted first.
  15748. // We need this in case a type B depends on type A (A must come before in the vector), but A is an alias of a type Abuffer, which
  15749. // means declaration of A doesn't happen (yet), and order would be B, ABuffer and not ABuffer, B. Fix this up here.
  15750. auto loop_lock = ir.create_loop_hard_lock();
  15751. auto &type_ids = ir.ids_for_type[TypeType];
  15752. for (auto alias_itr = begin(type_ids); alias_itr != end(type_ids); ++alias_itr)
  15753. {
  15754. auto &type = get<SPIRType>(*alias_itr);
  15755. if (type.type_alias != TypeID(0) &&
  15756. !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
  15757. {
  15758. // We will skip declaring this type, so make sure the type_alias type comes before.
  15759. auto master_itr = find(begin(type_ids), end(type_ids), ID(type.type_alias));
  15760. assert(master_itr != end(type_ids));
  15761. if (alias_itr < master_itr)
  15762. {
  15763. // Must also swap the type order for the constant-type joined array.
  15764. auto &joined_types = ir.ids_for_constant_undef_or_type;
  15765. auto alt_alias_itr = find(begin(joined_types), end(joined_types), *alias_itr);
  15766. auto alt_master_itr = find(begin(joined_types), end(joined_types), *master_itr);
  15767. assert(alt_alias_itr != end(joined_types));
  15768. assert(alt_master_itr != end(joined_types));
  15769. swap(*alias_itr, *master_itr);
  15770. swap(*alt_alias_itr, *alt_master_itr);
  15771. }
  15772. }
  15773. }
  15774. }
  15775. void CompilerGLSL::emit_line_directive(uint32_t file_id, uint32_t line_literal)
  15776. {
  15777. // If we are redirecting statements, ignore the line directive.
  15778. // Common case here is continue blocks.
  15779. if (redirect_statement)
  15780. return;
  15781. // If we're emitting code in a sensitive context such as condition blocks in for loops, don't emit
  15782. // any line directives, because it's not possible.
  15783. if (block_debug_directives)
  15784. return;
  15785. if (options.emit_line_directives)
  15786. {
  15787. require_extension_internal("GL_GOOGLE_cpp_style_line_directive");
  15788. statement_no_indent("#line ", line_literal, " \"", get<SPIRString>(file_id).str, "\"");
  15789. }
  15790. }
  15791. void CompilerGLSL::emit_copy_logical_type(uint32_t lhs_id, uint32_t lhs_type_id, uint32_t rhs_id, uint32_t rhs_type_id,
  15792. SmallVector<uint32_t> chain)
  15793. {
  15794. // Fully unroll all member/array indices one by one.
  15795. auto &lhs_type = get<SPIRType>(lhs_type_id);
  15796. auto &rhs_type = get<SPIRType>(rhs_type_id);
  15797. if (!lhs_type.array.empty())
  15798. {
  15799. // Could use a loop here to support specialization constants, but it gets rather complicated with nested array types,
  15800. // and this is a rather obscure opcode anyways, keep it simple unless we are forced to.
  15801. uint32_t array_size = to_array_size_literal(lhs_type);
  15802. chain.push_back(0);
  15803. for (uint32_t i = 0; i < array_size; i++)
  15804. {
  15805. chain.back() = i;
  15806. emit_copy_logical_type(lhs_id, lhs_type.parent_type, rhs_id, rhs_type.parent_type, chain);
  15807. }
  15808. }
  15809. else if (lhs_type.basetype == SPIRType::Struct)
  15810. {
  15811. chain.push_back(0);
  15812. uint32_t member_count = uint32_t(lhs_type.member_types.size());
  15813. for (uint32_t i = 0; i < member_count; i++)
  15814. {
  15815. chain.back() = i;
  15816. emit_copy_logical_type(lhs_id, lhs_type.member_types[i], rhs_id, rhs_type.member_types[i], chain);
  15817. }
  15818. }
  15819. else
  15820. {
  15821. // Need to handle unpack/packing fixups since this can differ wildly between the logical types,
  15822. // particularly in MSL.
  15823. // To deal with this, we emit access chains and go through emit_store_statement
  15824. // to deal with all the special cases we can encounter.
  15825. AccessChainMeta lhs_meta, rhs_meta;
  15826. auto lhs = access_chain_internal(lhs_id, chain.data(), uint32_t(chain.size()),
  15827. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &lhs_meta);
  15828. auto rhs = access_chain_internal(rhs_id, chain.data(), uint32_t(chain.size()),
  15829. ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &rhs_meta);
  15830. uint32_t id = ir.increase_bound_by(2);
  15831. lhs_id = id;
  15832. rhs_id = id + 1;
  15833. {
  15834. auto &lhs_expr = set<SPIRExpression>(lhs_id, std::move(lhs), lhs_type_id, true);
  15835. lhs_expr.need_transpose = lhs_meta.need_transpose;
  15836. if (lhs_meta.storage_is_packed)
  15837. set_extended_decoration(lhs_id, SPIRVCrossDecorationPhysicalTypePacked);
  15838. if (lhs_meta.storage_physical_type != 0)
  15839. set_extended_decoration(lhs_id, SPIRVCrossDecorationPhysicalTypeID, lhs_meta.storage_physical_type);
  15840. forwarded_temporaries.insert(lhs_id);
  15841. suppressed_usage_tracking.insert(lhs_id);
  15842. }
  15843. {
  15844. auto &rhs_expr = set<SPIRExpression>(rhs_id, std::move(rhs), rhs_type_id, true);
  15845. rhs_expr.need_transpose = rhs_meta.need_transpose;
  15846. if (rhs_meta.storage_is_packed)
  15847. set_extended_decoration(rhs_id, SPIRVCrossDecorationPhysicalTypePacked);
  15848. if (rhs_meta.storage_physical_type != 0)
  15849. set_extended_decoration(rhs_id, SPIRVCrossDecorationPhysicalTypeID, rhs_meta.storage_physical_type);
  15850. forwarded_temporaries.insert(rhs_id);
  15851. suppressed_usage_tracking.insert(rhs_id);
  15852. }
  15853. emit_store_statement(lhs_id, rhs_id);
  15854. }
  15855. }
  15856. bool CompilerGLSL::subpass_input_is_framebuffer_fetch(uint32_t id) const
  15857. {
  15858. if (!has_decoration(id, DecorationInputAttachmentIndex))
  15859. return false;
  15860. uint32_t input_attachment_index = get_decoration(id, DecorationInputAttachmentIndex);
  15861. for (auto &remap : subpass_to_framebuffer_fetch_attachment)
  15862. if (remap.first == input_attachment_index)
  15863. return true;
  15864. return false;
  15865. }
  15866. const SPIRVariable *CompilerGLSL::find_subpass_input_by_attachment_index(uint32_t index) const
  15867. {
  15868. const SPIRVariable *ret = nullptr;
  15869. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
  15870. if (has_decoration(var.self, DecorationInputAttachmentIndex) &&
  15871. get_decoration(var.self, DecorationInputAttachmentIndex) == index)
  15872. {
  15873. ret = &var;
  15874. }
  15875. });
  15876. return ret;
  15877. }
  15878. const SPIRVariable *CompilerGLSL::find_color_output_by_location(uint32_t location) const
  15879. {
  15880. const SPIRVariable *ret = nullptr;
  15881. ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
  15882. if (var.storage == StorageClassOutput && get_decoration(var.self, DecorationLocation) == location)
  15883. ret = &var;
  15884. });
  15885. return ret;
  15886. }
  15887. void CompilerGLSL::emit_inout_fragment_outputs_copy_to_subpass_inputs()
  15888. {
  15889. for (auto &remap : subpass_to_framebuffer_fetch_attachment)
  15890. {
  15891. auto *subpass_var = find_subpass_input_by_attachment_index(remap.first);
  15892. auto *output_var = find_color_output_by_location(remap.second);
  15893. if (!subpass_var)
  15894. continue;
  15895. if (!output_var)
  15896. SPIRV_CROSS_THROW("Need to declare the corresponding fragment output variable to be able "
  15897. "to read from it.");
  15898. if (is_array(get<SPIRType>(output_var->basetype)))
  15899. SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_framebuffer_fetch with arrays of color outputs.");
  15900. auto &func = get<SPIRFunction>(get_entry_point().self);
  15901. func.fixup_hooks_in.push_back([=]() {
  15902. if (is_legacy())
  15903. {
  15904. statement(to_expression(subpass_var->self), " = ", "gl_LastFragData[",
  15905. get_decoration(output_var->self, DecorationLocation), "];");
  15906. }
  15907. else
  15908. {
  15909. uint32_t num_rt_components = this->get<SPIRType>(output_var->basetype).vecsize;
  15910. statement(to_expression(subpass_var->self), vector_swizzle(num_rt_components, 0), " = ",
  15911. to_expression(output_var->self), ";");
  15912. }
  15913. });
  15914. }
  15915. }
  15916. bool CompilerGLSL::variable_is_depth_or_compare(VariableID id) const
  15917. {
  15918. return is_depth_image(get<SPIRType>(get<SPIRVariable>(id).basetype), id);
  15919. }
  15920. const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extension_name(Candidate c)
  15921. {
  15922. static const char *const retval[CandidateCount] = { "GL_KHR_shader_subgroup_ballot",
  15923. "GL_KHR_shader_subgroup_basic",
  15924. "GL_KHR_shader_subgroup_vote",
  15925. "GL_KHR_shader_subgroup_arithmetic",
  15926. "GL_NV_gpu_shader_5",
  15927. "GL_NV_shader_thread_group",
  15928. "GL_NV_shader_thread_shuffle",
  15929. "GL_ARB_shader_ballot",
  15930. "GL_ARB_shader_group_vote",
  15931. "GL_AMD_gcn_shader" };
  15932. return retval[c];
  15933. }
  15934. SmallVector<std::string> CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_names(Candidate c)
  15935. {
  15936. switch (c)
  15937. {
  15938. case ARB_shader_ballot:
  15939. return { "GL_ARB_shader_int64" };
  15940. case AMD_gcn_shader:
  15941. return { "GL_AMD_gpu_shader_int64", "GL_NV_gpu_shader5" };
  15942. default:
  15943. return {};
  15944. }
  15945. }
  15946. const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_predicate(Candidate c)
  15947. {
  15948. switch (c)
  15949. {
  15950. case ARB_shader_ballot:
  15951. return "defined(GL_ARB_shader_int64)";
  15952. case AMD_gcn_shader:
  15953. return "(defined(GL_AMD_gpu_shader_int64) || defined(GL_NV_gpu_shader5))";
  15954. default:
  15955. return "";
  15956. }
  15957. }
  15958. CompilerGLSL::ShaderSubgroupSupportHelper::FeatureVector CompilerGLSL::ShaderSubgroupSupportHelper::
  15959. get_feature_dependencies(Feature feature)
  15960. {
  15961. switch (feature)
  15962. {
  15963. case SubgroupAllEqualT:
  15964. return { SubgroupBroadcast_First, SubgroupAll_Any_AllEqualBool };
  15965. case SubgroupElect:
  15966. return { SubgroupBallotFindLSB_MSB, SubgroupBallot, SubgroupInvocationID };
  15967. case SubgroupInverseBallot_InclBitCount_ExclBitCout:
  15968. return { SubgroupMask };
  15969. case SubgroupBallotBitCount:
  15970. return { SubgroupBallot };
  15971. case SubgroupArithmeticIAddReduce:
  15972. case SubgroupArithmeticIAddInclusiveScan:
  15973. case SubgroupArithmeticFAddReduce:
  15974. case SubgroupArithmeticFAddInclusiveScan:
  15975. case SubgroupArithmeticIMulReduce:
  15976. case SubgroupArithmeticIMulInclusiveScan:
  15977. case SubgroupArithmeticFMulReduce:
  15978. case SubgroupArithmeticFMulInclusiveScan:
  15979. return { SubgroupSize, SubgroupBallot, SubgroupBallotBitCount, SubgroupMask, SubgroupBallotBitExtract };
  15980. case SubgroupArithmeticIAddExclusiveScan:
  15981. case SubgroupArithmeticFAddExclusiveScan:
  15982. case SubgroupArithmeticIMulExclusiveScan:
  15983. case SubgroupArithmeticFMulExclusiveScan:
  15984. return { SubgroupSize, SubgroupBallot, SubgroupBallotBitCount,
  15985. SubgroupMask, SubgroupElect, SubgroupBallotBitExtract };
  15986. default:
  15987. return {};
  15988. }
  15989. }
  15990. CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::
  15991. get_feature_dependency_mask(Feature feature)
  15992. {
  15993. return build_mask(get_feature_dependencies(feature));
  15994. }
  15995. bool CompilerGLSL::ShaderSubgroupSupportHelper::can_feature_be_implemented_without_extensions(Feature feature)
  15996. {
  15997. static const bool retval[FeatureCount] = {
  15998. false, false, false, false, false, false,
  15999. true, // SubgroupBalloFindLSB_MSB
  16000. false, false, false, false,
  16001. true, // SubgroupMemBarrier - replaced with workgroup memory barriers
  16002. false, false, true, false,
  16003. false, false, false, false, false, false, // iadd, fadd
  16004. false, false, false, false, false, false, // imul , fmul
  16005. };
  16006. return retval[feature];
  16007. }
  16008. CompilerGLSL::ShaderSubgroupSupportHelper::Candidate CompilerGLSL::ShaderSubgroupSupportHelper::
  16009. get_KHR_extension_for_feature(Feature feature)
  16010. {
  16011. static const Candidate extensions[FeatureCount] = {
  16012. KHR_shader_subgroup_ballot, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic,
  16013. KHR_shader_subgroup_basic, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_vote,
  16014. KHR_shader_subgroup_vote, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic,
  16015. KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot,
  16016. KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic,
  16017. KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic,
  16018. KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic,
  16019. KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic,
  16020. };
  16021. return extensions[feature];
  16022. }
  16023. void CompilerGLSL::ShaderSubgroupSupportHelper::request_feature(Feature feature)
  16024. {
  16025. feature_mask |= (FeatureMask(1) << feature) | get_feature_dependency_mask(feature);
  16026. }
  16027. bool CompilerGLSL::ShaderSubgroupSupportHelper::is_feature_requested(Feature feature) const
  16028. {
  16029. return (feature_mask & (1u << feature)) != 0;
  16030. }
  16031. CompilerGLSL::ShaderSubgroupSupportHelper::Result CompilerGLSL::ShaderSubgroupSupportHelper::resolve() const
  16032. {
  16033. Result res;
  16034. for (uint32_t i = 0u; i < FeatureCount; ++i)
  16035. {
  16036. if (feature_mask & (1u << i))
  16037. {
  16038. auto feature = static_cast<Feature>(i);
  16039. std::unordered_set<uint32_t> unique_candidates;
  16040. auto candidates = get_candidates_for_feature(feature);
  16041. unique_candidates.insert(candidates.begin(), candidates.end());
  16042. auto deps = get_feature_dependencies(feature);
  16043. for (Feature d : deps)
  16044. {
  16045. candidates = get_candidates_for_feature(d);
  16046. if (!candidates.empty())
  16047. unique_candidates.insert(candidates.begin(), candidates.end());
  16048. }
  16049. for (uint32_t c : unique_candidates)
  16050. ++res.weights[static_cast<Candidate>(c)];
  16051. }
  16052. }
  16053. return res;
  16054. }
  16055. CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper::
  16056. get_candidates_for_feature(Feature ft, const Result &r)
  16057. {
  16058. auto c = get_candidates_for_feature(ft);
  16059. auto cmp = [&r](Candidate a, Candidate b) {
  16060. if (r.weights[a] == r.weights[b])
  16061. return a < b; // Prefer candidates with lower enum value
  16062. return r.weights[a] > r.weights[b];
  16063. };
  16064. std::sort(c.begin(), c.end(), cmp);
  16065. return c;
  16066. }
  16067. CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper::
  16068. get_candidates_for_feature(Feature feature)
  16069. {
  16070. switch (feature)
  16071. {
  16072. case SubgroupMask:
  16073. return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot };
  16074. case SubgroupSize:
  16075. return { KHR_shader_subgroup_basic, NV_shader_thread_group, AMD_gcn_shader, ARB_shader_ballot };
  16076. case SubgroupInvocationID:
  16077. return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot };
  16078. case SubgroupID:
  16079. return { KHR_shader_subgroup_basic, NV_shader_thread_group };
  16080. case NumSubgroups:
  16081. return { KHR_shader_subgroup_basic, NV_shader_thread_group };
  16082. case SubgroupBroadcast_First:
  16083. return { KHR_shader_subgroup_ballot, NV_shader_thread_shuffle, ARB_shader_ballot };
  16084. case SubgroupBallotFindLSB_MSB:
  16085. return { KHR_shader_subgroup_ballot, NV_shader_thread_group };
  16086. case SubgroupAll_Any_AllEqualBool:
  16087. return { KHR_shader_subgroup_vote, NV_gpu_shader_5, ARB_shader_group_vote, AMD_gcn_shader };
  16088. case SubgroupAllEqualT:
  16089. return {}; // depends on other features only
  16090. case SubgroupElect:
  16091. return {}; // depends on other features only
  16092. case SubgroupBallot:
  16093. return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot };
  16094. case SubgroupBarrier:
  16095. return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot, AMD_gcn_shader };
  16096. case SubgroupMemBarrier:
  16097. return { KHR_shader_subgroup_basic };
  16098. case SubgroupInverseBallot_InclBitCount_ExclBitCout:
  16099. return {};
  16100. case SubgroupBallotBitExtract:
  16101. return { NV_shader_thread_group };
  16102. case SubgroupBallotBitCount:
  16103. return {};
  16104. case SubgroupArithmeticIAddReduce:
  16105. case SubgroupArithmeticIAddExclusiveScan:
  16106. case SubgroupArithmeticIAddInclusiveScan:
  16107. case SubgroupArithmeticFAddReduce:
  16108. case SubgroupArithmeticFAddExclusiveScan:
  16109. case SubgroupArithmeticFAddInclusiveScan:
  16110. case SubgroupArithmeticIMulReduce:
  16111. case SubgroupArithmeticIMulExclusiveScan:
  16112. case SubgroupArithmeticIMulInclusiveScan:
  16113. case SubgroupArithmeticFMulReduce:
  16114. case SubgroupArithmeticFMulExclusiveScan:
  16115. case SubgroupArithmeticFMulInclusiveScan:
  16116. return { KHR_shader_subgroup_arithmetic, NV_shader_thread_shuffle };
  16117. default:
  16118. return {};
  16119. }
  16120. }
  16121. CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::build_mask(
  16122. const SmallVector<Feature> &features)
  16123. {
  16124. FeatureMask mask = 0;
  16125. for (Feature f : features)
  16126. mask |= FeatureMask(1) << f;
  16127. return mask;
  16128. }
  16129. CompilerGLSL::ShaderSubgroupSupportHelper::Result::Result()
  16130. {
  16131. for (auto &weight : weights)
  16132. weight = 0;
  16133. // Make sure KHR_shader_subgroup extensions are always prefered.
  16134. const uint32_t big_num = FeatureCount;
  16135. weights[KHR_shader_subgroup_ballot] = big_num;
  16136. weights[KHR_shader_subgroup_basic] = big_num;
  16137. weights[KHR_shader_subgroup_vote] = big_num;
  16138. weights[KHR_shader_subgroup_arithmetic] = big_num;
  16139. }
  16140. void CompilerGLSL::request_workaround_wrapper_overload(TypeID id)
  16141. {
  16142. // Must be ordered to maintain deterministic output, so vector is appropriate.
  16143. if (find(begin(workaround_ubo_load_overload_types), end(workaround_ubo_load_overload_types), id) ==
  16144. end(workaround_ubo_load_overload_types))
  16145. {
  16146. force_recompile();
  16147. workaround_ubo_load_overload_types.push_back(id);
  16148. }
  16149. }
  16150. void CompilerGLSL::rewrite_load_for_wrapped_row_major(std::string &expr, TypeID loaded_type, ID ptr)
  16151. {
  16152. // Loading row-major matrices from UBOs on older AMD Windows OpenGL drivers is problematic.
  16153. // To load these types correctly, we must first wrap them in a dummy function which only purpose is to
  16154. // ensure row_major decoration is actually respected.
  16155. auto *var = maybe_get_backing_variable(ptr);
  16156. if (!var)
  16157. return;
  16158. auto &backing_type = get<SPIRType>(var->basetype);
  16159. bool is_ubo = backing_type.basetype == SPIRType::Struct && backing_type.storage == StorageClassUniform &&
  16160. has_decoration(backing_type.self, DecorationBlock);
  16161. if (!is_ubo)
  16162. return;
  16163. auto *type = &get<SPIRType>(loaded_type);
  16164. bool rewrite = false;
  16165. bool relaxed = options.es;
  16166. if (is_matrix(*type))
  16167. {
  16168. // To avoid adding a lot of unnecessary meta tracking to forward the row_major state,
  16169. // we will simply look at the base struct itself. It is exceptionally rare to mix and match row-major/col-major state.
  16170. // If there is any row-major action going on, we apply the workaround.
  16171. // It is harmless to apply the workaround to column-major matrices, so this is still a valid solution.
  16172. // If an access chain occurred, the workaround is not required, so loading vectors or scalars don't need workaround.
  16173. type = &backing_type;
  16174. }
  16175. else
  16176. {
  16177. // If we're loading a composite, we don't have overloads like these.
  16178. relaxed = false;
  16179. }
  16180. if (type->basetype == SPIRType::Struct)
  16181. {
  16182. // If we're loading a struct where any member is a row-major matrix, apply the workaround.
  16183. for (uint32_t i = 0; i < uint32_t(type->member_types.size()); i++)
  16184. {
  16185. auto decorations = combined_decoration_for_member(*type, i);
  16186. if (decorations.get(DecorationRowMajor))
  16187. rewrite = true;
  16188. // Since we decide on a per-struct basis, only use mediump wrapper if all candidates are mediump.
  16189. if (!decorations.get(DecorationRelaxedPrecision))
  16190. relaxed = false;
  16191. }
  16192. }
  16193. if (rewrite)
  16194. {
  16195. request_workaround_wrapper_overload(loaded_type);
  16196. expr = join("spvWorkaroundRowMajor", (relaxed ? "MP" : ""), "(", expr, ")");
  16197. }
  16198. }
  16199. void CompilerGLSL::mask_stage_output_by_location(uint32_t location, uint32_t component)
  16200. {
  16201. masked_output_locations.insert({ location, component });
  16202. }
  16203. void CompilerGLSL::mask_stage_output_by_builtin(BuiltIn builtin)
  16204. {
  16205. masked_output_builtins.insert(builtin);
  16206. }
  16207. bool CompilerGLSL::is_stage_output_variable_masked(const SPIRVariable &var) const
  16208. {
  16209. auto &type = get<SPIRType>(var.basetype);
  16210. bool is_block = has_decoration(type.self, DecorationBlock);
  16211. // Blocks by themselves are never masked. Must be masked per-member.
  16212. if (is_block)
  16213. return false;
  16214. bool is_builtin = has_decoration(var.self, DecorationBuiltIn);
  16215. if (is_builtin)
  16216. {
  16217. return is_stage_output_builtin_masked(BuiltIn(get_decoration(var.self, DecorationBuiltIn)));
  16218. }
  16219. else
  16220. {
  16221. if (!has_decoration(var.self, DecorationLocation))
  16222. return false;
  16223. return is_stage_output_location_masked(
  16224. get_decoration(var.self, DecorationLocation),
  16225. get_decoration(var.self, DecorationComponent));
  16226. }
  16227. }
  16228. bool CompilerGLSL::is_stage_output_block_member_masked(const SPIRVariable &var, uint32_t index, bool strip_array) const
  16229. {
  16230. auto &type = get<SPIRType>(var.basetype);
  16231. bool is_block = has_decoration(type.self, DecorationBlock);
  16232. if (!is_block)
  16233. return false;
  16234. BuiltIn builtin = BuiltInMax;
  16235. if (is_member_builtin(type, index, &builtin))
  16236. {
  16237. return is_stage_output_builtin_masked(builtin);
  16238. }
  16239. else
  16240. {
  16241. uint32_t location = get_declared_member_location(var, index, strip_array);
  16242. uint32_t component = get_member_decoration(type.self, index, DecorationComponent);
  16243. return is_stage_output_location_masked(location, component);
  16244. }
  16245. }
  16246. bool CompilerGLSL::is_per_primitive_variable(const SPIRVariable &var) const
  16247. {
  16248. if (has_decoration(var.self, DecorationPerPrimitiveEXT))
  16249. return true;
  16250. auto &type = get<SPIRType>(var.basetype);
  16251. if (!has_decoration(type.self, DecorationBlock))
  16252. return false;
  16253. for (uint32_t i = 0, n = uint32_t(type.member_types.size()); i < n; i++)
  16254. if (!has_member_decoration(type.self, i, DecorationPerPrimitiveEXT))
  16255. return false;
  16256. return true;
  16257. }
  16258. bool CompilerGLSL::is_stage_output_location_masked(uint32_t location, uint32_t component) const
  16259. {
  16260. return masked_output_locations.count({ location, component }) != 0;
  16261. }
  16262. bool CompilerGLSL::is_stage_output_builtin_masked(spv::BuiltIn builtin) const
  16263. {
  16264. return masked_output_builtins.count(builtin) != 0;
  16265. }
  16266. uint32_t CompilerGLSL::get_declared_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const
  16267. {
  16268. auto &block_type = get<SPIRType>(var.basetype);
  16269. if (has_member_decoration(block_type.self, mbr_idx, DecorationLocation))
  16270. return get_member_decoration(block_type.self, mbr_idx, DecorationLocation);
  16271. else
  16272. return get_accumulated_member_location(var, mbr_idx, strip_array);
  16273. }
  16274. uint32_t CompilerGLSL::get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const
  16275. {
  16276. auto &type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var);
  16277. uint32_t location = get_decoration(var.self, DecorationLocation);
  16278. for (uint32_t i = 0; i < mbr_idx; i++)
  16279. {
  16280. auto &mbr_type = get<SPIRType>(type.member_types[i]);
  16281. // Start counting from any place we have a new location decoration.
  16282. if (has_member_decoration(type.self, mbr_idx, DecorationLocation))
  16283. location = get_member_decoration(type.self, mbr_idx, DecorationLocation);
  16284. uint32_t location_count = type_to_location_count(mbr_type);
  16285. location += location_count;
  16286. }
  16287. return location;
  16288. }
  16289. StorageClass CompilerGLSL::get_expression_effective_storage_class(uint32_t ptr)
  16290. {
  16291. auto *var = maybe_get_backing_variable(ptr);
  16292. // If the expression has been lowered to a temporary, we need to use the Generic storage class.
  16293. // We're looking for the effective storage class of a given expression.
  16294. // An access chain or forwarded OpLoads from such access chains
  16295. // will generally have the storage class of the underlying variable, but if the load was not forwarded
  16296. // we have lost any address space qualifiers.
  16297. bool forced_temporary = ir.ids[ptr].get_type() == TypeExpression && !get<SPIRExpression>(ptr).access_chain &&
  16298. (forced_temporaries.count(ptr) != 0 || forwarded_temporaries.count(ptr) == 0);
  16299. if (var && !forced_temporary)
  16300. {
  16301. if (variable_decl_is_remapped_storage(*var, StorageClassWorkgroup))
  16302. return StorageClassWorkgroup;
  16303. if (variable_decl_is_remapped_storage(*var, StorageClassStorageBuffer))
  16304. return StorageClassStorageBuffer;
  16305. // Normalize SSBOs to StorageBuffer here.
  16306. if (var->storage == StorageClassUniform &&
  16307. has_decoration(get<SPIRType>(var->basetype).self, DecorationBufferBlock))
  16308. return StorageClassStorageBuffer;
  16309. else
  16310. return var->storage;
  16311. }
  16312. else
  16313. return expression_type(ptr).storage;
  16314. }
  16315. uint32_t CompilerGLSL::type_to_location_count(const SPIRType &type) const
  16316. {
  16317. uint32_t count;
  16318. if (type.basetype == SPIRType::Struct)
  16319. {
  16320. uint32_t mbr_count = uint32_t(type.member_types.size());
  16321. count = 0;
  16322. for (uint32_t i = 0; i < mbr_count; i++)
  16323. count += type_to_location_count(get<SPIRType>(type.member_types[i]));
  16324. }
  16325. else
  16326. {
  16327. count = type.columns > 1 ? type.columns : 1;
  16328. }
  16329. uint32_t dim_count = uint32_t(type.array.size());
  16330. for (uint32_t i = 0; i < dim_count; i++)
  16331. count *= to_array_size_literal(type, i);
  16332. return count;
  16333. }