HLOperationLower.cpp 283 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971
  1. ///////////////////////////////////////////////////////////////////////////////
  2. // //
  3. // HLOperationLower.cpp //
  4. // Copyright (C) Microsoft Corporation. All rights reserved. //
  5. // This file is distributed under the University of Illinois Open Source //
  6. // License. See LICENSE.TXT for details. //
  7. // //
  8. // Lower functions to lower HL operations to DXIL operations. //
  9. // //
  10. ///////////////////////////////////////////////////////////////////////////////
  11. #include "dxc/HLSL/DxilModule.h"
  12. #include "dxc/HLSL/DxilOperations.h"
  13. #include "dxc/HLSL/HLMatrixLowerHelper.h"
  14. #include "dxc/HLSL/HLModule.h"
  15. #include "dxc/HLSL/DxilUtil.h"
  16. #include "dxc/HLSL/HLOperationLower.h"
  17. #include "dxc/HLSL/HLOperationLowerExtension.h"
  18. #include "dxc/HLSL/HLOperations.h"
  19. #include "dxc/HlslIntrinsicOp.h"
  20. #include "llvm/IR/GetElementPtrTypeIterator.h"
  21. #include "llvm/IR/IRBuilder.h"
  22. #include "llvm/IR/Instructions.h"
  23. #include "llvm/IR/Module.h"
  24. #include <unordered_set>
  25. using namespace llvm;
  26. using namespace hlsl;
  27. struct HLOperationLowerHelper {
  28. OP &hlslOP;
  29. Type *voidTy;
  30. Type *f32Ty;
  31. Type *i32Ty;
  32. llvm::Type *i1Ty;
  33. Type *i8Ty;
  34. DxilTypeSystem &dxilTypeSys;
  35. DxilFunctionProps *functionProps;
  36. bool bLegacyCBufferLoad;
  37. DataLayout dataLayout;
  38. HLOperationLowerHelper(HLModule &HLM);
  39. };
  40. HLOperationLowerHelper::HLOperationLowerHelper(HLModule &HLM)
  41. : hlslOP(*HLM.GetOP()), dxilTypeSys(HLM.GetTypeSystem()),
  42. dataLayout(DataLayout(HLM.GetHLOptions().bUseMinPrecision
  43. ? hlsl::DXIL::kLegacyLayoutString
  44. : hlsl::DXIL::kNewLayoutString)) {
  45. llvm::LLVMContext &Ctx = HLM.GetCtx();
  46. voidTy = Type::getVoidTy(Ctx);
  47. f32Ty = Type::getFloatTy(Ctx);
  48. i32Ty = Type::getInt32Ty(Ctx);
  49. i1Ty = Type::getInt1Ty(Ctx);
  50. i8Ty = Type::getInt8Ty(Ctx);
  51. Function *EntryFunc = HLM.GetEntryFunction();
  52. functionProps = nullptr;
  53. if (HLM.HasDxilFunctionProps(EntryFunc))
  54. functionProps = &HLM.GetDxilFunctionProps(EntryFunc);
  55. bLegacyCBufferLoad = HLM.GetHLOptions().bLegacyCBufferLoad;
  56. }
  57. struct HLObjectOperationLowerHelper {
  58. private:
  59. // For object intrinsics.
  60. HLModule &HLM;
  61. struct ResAttribute {
  62. DXIL::ResourceClass RC;
  63. DXIL::ResourceKind RK;
  64. Type *ResourceType;
  65. };
  66. std::unordered_map<Value *, ResAttribute> HandleMetaMap;
  67. std::unordered_set<LoadInst *> &UpdateCounterSet;
  68. std::unordered_set<Value *> &NonUniformSet;
  69. // Map from pointer of cbuffer to pointer of resource.
  70. // For cbuffer like this:
  71. // cbuffer A {
  72. // Texture2D T;
  73. // };
  74. // A global resource Texture2D T2 will be created for Texture2D T.
  75. // CBPtrToResourceMap[T] will return T2.
  76. std::unordered_map<Value *, Value *> CBPtrToResourceMap;
  77. public:
  78. HLObjectOperationLowerHelper(HLModule &HLM,
  79. std::unordered_set<LoadInst *> &UpdateCounter,
  80. std::unordered_set<Value *> &NonUniform)
  81. : HLM(HLM), UpdateCounterSet(UpdateCounter), NonUniformSet(NonUniform) {}
  82. DXIL::ResourceClass GetRC(Value *Handle) {
  83. ResAttribute &Res = FindCreateHandleResourceBase(Handle);
  84. return Res.RC;
  85. }
  86. DXIL::ResourceKind GetRK(Value *Handle) {
  87. ResAttribute &Res = FindCreateHandleResourceBase(Handle);
  88. return Res.RK;
  89. }
  90. Type *GetResourceType(Value *Handle) {
  91. ResAttribute &Res = FindCreateHandleResourceBase(Handle);
  92. return Res.ResourceType;
  93. }
  94. void MarkHasCounter(Type *Ty, Value *handle) {
  95. DXIL::ResourceClass RC = GetRC(handle);
  96. DXASSERT_LOCALVAR(RC, RC == DXIL::ResourceClass::UAV,
  97. "must UAV for counter");
  98. std::unordered_set<Value *> resSet;
  99. MarkHasCounterOnCreateHandle(handle, resSet);
  100. }
  101. void MarkNonUniform(Value *V) { NonUniformSet.insert(V); }
  102. Value *GetOrCreateResourceForCbPtr(GetElementPtrInst *CbPtr,
  103. GlobalVariable *CbGV, MDNode *MD) {
  104. // Change array idx to 0 to make sure all array ptr share same key.
  105. Value *Key = UniformCbPtr(CbPtr, CbGV);
  106. if (CBPtrToResourceMap.count(Key))
  107. return CBPtrToResourceMap[Key];
  108. Value *Resource = CreateResourceForCbPtr(CbPtr, CbGV, MD);
  109. CBPtrToResourceMap[Key] = Resource;
  110. return Resource;
  111. }
  112. Value *LowerCbResourcePtr(GetElementPtrInst *CbPtr, Value *ResPtr) {
  113. // Simple case.
  114. if (ResPtr->getType() == CbPtr->getType())
  115. return ResPtr;
  116. // Array case.
  117. DXASSERT_NOMSG(ResPtr->getType()->getPointerElementType()->isArrayTy());
  118. IRBuilder<> Builder(CbPtr);
  119. gep_type_iterator GEPIt = gep_type_begin(CbPtr), E = gep_type_end(CbPtr);
  120. Value *arrayIdx = GEPIt.getOperand();
  121. // Only calc array idx and size.
  122. // Ignore struct type part.
  123. for (; GEPIt != E; ++GEPIt) {
  124. if (GEPIt->isArrayTy()) {
  125. arrayIdx = Builder.CreateMul(
  126. arrayIdx, Builder.getInt32(GEPIt->getArrayNumElements()));
  127. arrayIdx = Builder.CreateAdd(arrayIdx, GEPIt.getOperand());
  128. }
  129. }
  130. return Builder.CreateGEP(ResPtr, {Builder.getInt32(0), arrayIdx});
  131. }
  132. private:
  133. ResAttribute &FindCreateHandleResourceBase(Value *Handle) {
  134. if (HandleMetaMap.count(Handle))
  135. return HandleMetaMap[Handle];
  136. // Add invalid first to avoid dead loop.
  137. HandleMetaMap[Handle] = {DXIL::ResourceClass::Invalid,
  138. DXIL::ResourceKind::Invalid,
  139. StructType::get(Type::getVoidTy(HLM.GetCtx()))};
  140. if (Argument *Arg = dyn_cast<Argument>(Handle)) {
  141. MDNode *MD = HLM.GetDxilResourceAttrib(Arg);
  142. if (!MD) {
  143. Handle->getContext().emitError("cannot map resource to handle");
  144. return HandleMetaMap[Handle];
  145. }
  146. DxilResourceBase Res(DxilResource::Class::Invalid);
  147. HLM.LoadDxilResourceBaseFromMDNode(MD, Res);
  148. ResAttribute Attrib = {Res.GetClass(), Res.GetKind(),
  149. Res.GetGlobalSymbol()->getType()};
  150. HandleMetaMap[Handle] = Attrib;
  151. return HandleMetaMap[Handle];
  152. }
  153. if (LoadInst *LI = dyn_cast<LoadInst>(Handle)) {
  154. Value *Ptr = LI->getPointerOperand();
  155. for (User *U : Ptr->users()) {
  156. if (CallInst *CI = dyn_cast<CallInst>(U)) {
  157. DxilFunctionAnnotation *FnAnnot = HLM.GetFunctionAnnotation(CI->getCalledFunction());
  158. if (FnAnnot) {
  159. for (auto &arg : CI->arg_operands()) {
  160. if (arg == Ptr) {
  161. unsigned argNo = arg.getOperandNo();
  162. DxilParameterAnnotation &ParamAnnot = FnAnnot->GetParameterAnnotation(argNo);
  163. MDNode *MD = ParamAnnot.GetResourceAttribute();
  164. if (!MD) {
  165. Handle->getContext().emitError(
  166. "cannot map resource to handle");
  167. return HandleMetaMap[Handle];
  168. }
  169. DxilResourceBase Res(DxilResource::Class::Invalid);
  170. HLM.LoadDxilResourceBaseFromMDNode(MD, Res);
  171. ResAttribute Attrib = {Res.GetClass(), Res.GetKind(),
  172. Res.GetGlobalSymbol()->getType()};
  173. HandleMetaMap[Handle] = Attrib;
  174. return HandleMetaMap[Handle];
  175. }
  176. }
  177. }
  178. }
  179. if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
  180. Value *V = SI->getValueOperand();
  181. ResAttribute Attrib = FindCreateHandleResourceBase(V);
  182. HandleMetaMap[Handle] = Attrib;
  183. return HandleMetaMap[Handle];
  184. }
  185. }
  186. // Cannot find.
  187. Handle->getContext().emitError("cannot map resource to handle");
  188. return HandleMetaMap[Handle];
  189. }
  190. if (CallInst *CI = dyn_cast<CallInst>(Handle)) {
  191. MDNode *MD = HLM.GetDxilResourceAttrib(CI->getCalledFunction());
  192. if (!MD) {
  193. Handle->getContext().emitError("cannot map resource to handle");
  194. return HandleMetaMap[Handle];
  195. }
  196. DxilResourceBase Res(DxilResource::Class::Invalid);
  197. HLM.LoadDxilResourceBaseFromMDNode(MD, Res);
  198. ResAttribute Attrib = {Res.GetClass(), Res.GetKind(),
  199. Res.GetGlobalSymbol()->getType()};
  200. HandleMetaMap[Handle] = Attrib;
  201. return HandleMetaMap[Handle];
  202. }
  203. if (SelectInst *Sel = dyn_cast<SelectInst>(Handle)) {
  204. ResAttribute &ResT = FindCreateHandleResourceBase(Sel->getTrueValue());
  205. // Use MDT here, ResourceClass, ResourceID match is done at
  206. // DxilGenerationPass::AddCreateHandleForPhiNodeAndSelect.
  207. HandleMetaMap[Handle] = ResT;
  208. FindCreateHandleResourceBase(Sel->getFalseValue());
  209. return ResT;
  210. }
  211. if (PHINode *Phi = dyn_cast<PHINode>(Handle)) {
  212. if (Phi->getNumOperands() == 0) {
  213. Handle->getContext().emitError("cannot map resource to handle");
  214. return HandleMetaMap[Handle];
  215. }
  216. ResAttribute &Res0 = FindCreateHandleResourceBase(Phi->getOperand(0));
  217. // Use Res0 here, ResourceClass, ResourceID match is done at
  218. // DxilGenerationPass::AddCreateHandleForPhiNodeAndSelect.
  219. HandleMetaMap[Handle] = Res0;
  220. for (unsigned i = 1; i < Phi->getNumOperands(); i++) {
  221. FindCreateHandleResourceBase(Phi->getOperand(i));
  222. }
  223. return Res0;
  224. }
  225. Handle->getContext().emitError("cannot map resource to handle");
  226. return HandleMetaMap[Handle];
  227. }
  228. CallInst *FindCreateHandle(Value *handle,
  229. std::unordered_set<Value *> &resSet) {
  230. // Already checked.
  231. if (resSet.count(handle))
  232. return nullptr;
  233. resSet.insert(handle);
  234. if (CallInst *CI = dyn_cast<CallInst>(handle))
  235. return CI;
  236. if (SelectInst *Sel = dyn_cast<SelectInst>(handle)) {
  237. if (CallInst *CI = FindCreateHandle(Sel->getTrueValue(), resSet))
  238. return CI;
  239. if (CallInst *CI = FindCreateHandle(Sel->getFalseValue(), resSet))
  240. return CI;
  241. return nullptr;
  242. }
  243. if (PHINode *Phi = dyn_cast<PHINode>(handle)) {
  244. for (unsigned i = 0; i < Phi->getNumOperands(); i++) {
  245. if (CallInst *CI = FindCreateHandle(Phi->getOperand(i), resSet))
  246. return CI;
  247. }
  248. return nullptr;
  249. }
  250. return nullptr;
  251. }
  252. void MarkHasCounterOnCreateHandle(Value *handle,
  253. std::unordered_set<Value *> &resSet) {
  254. // Already checked.
  255. if (resSet.count(handle))
  256. return;
  257. resSet.insert(handle);
  258. if (CallInst *CI = dyn_cast<CallInst>(handle)) {
  259. Value *Res =
  260. CI->getArgOperand(HLOperandIndex::kCreateHandleResourceOpIdx);
  261. LoadInst *LdRes = dyn_cast<LoadInst>(Res);
  262. if (!LdRes) {
  263. CI->getContext().emitError(CI, "cannot map resource to handle");
  264. return;
  265. }
  266. UpdateCounterSet.insert(LdRes);
  267. return;
  268. }
  269. if (SelectInst *Sel = dyn_cast<SelectInst>(handle)) {
  270. MarkHasCounterOnCreateHandle(Sel->getTrueValue(), resSet);
  271. MarkHasCounterOnCreateHandle(Sel->getFalseValue(), resSet);
  272. }
  273. if (PHINode *Phi = dyn_cast<PHINode>(handle)) {
  274. for (unsigned i = 0; i < Phi->getNumOperands(); i++) {
  275. MarkHasCounterOnCreateHandle(Phi->getOperand(i), resSet);
  276. }
  277. }
  278. }
  279. Value *UniformCbPtr(GetElementPtrInst *CbPtr, GlobalVariable *CbGV) {
  280. gep_type_iterator GEPIt = gep_type_begin(CbPtr), E = gep_type_end(CbPtr);
  281. std::vector<Value *> idxList(CbPtr->idx_begin(), CbPtr->idx_end());
  282. unsigned i = 0;
  283. IRBuilder<> Builder(HLM.GetCtx());
  284. Value *zero = Builder.getInt32(0);
  285. for (; GEPIt != E; ++GEPIt, ++i) {
  286. if (GEPIt->isArrayTy()) {
  287. // Change array idx to 0 to make sure all array ptr share same key.
  288. idxList[i] = zero;
  289. }
  290. }
  291. Value *Key = Builder.CreateInBoundsGEP(CbGV, idxList);
  292. return Key;
  293. }
  294. Value *CreateResourceForCbPtr(GetElementPtrInst *CbPtr, GlobalVariable *CbGV,
  295. MDNode *MD) {
  296. Type *CbTy = CbPtr->getPointerOperandType();
  297. DXASSERT_LOCALVAR(CbTy, CbTy == CbGV->getType(), "else arg not point to var");
  298. gep_type_iterator GEPIt = gep_type_begin(CbPtr), E = gep_type_end(CbPtr);
  299. unsigned i = 0;
  300. IRBuilder<> Builder(HLM.GetCtx());
  301. unsigned arraySize = 1;
  302. DxilTypeSystem &typeSys = HLM.GetTypeSystem();
  303. std::string Name;
  304. for (; GEPIt != E; ++GEPIt, ++i) {
  305. if (GEPIt->isArrayTy()) {
  306. arraySize *= GEPIt->getArrayNumElements();
  307. } else if (GEPIt->isStructTy()) {
  308. DxilStructAnnotation *typeAnnot =
  309. typeSys.GetStructAnnotation(cast<StructType>(*GEPIt));
  310. DXASSERT_NOMSG(typeAnnot);
  311. unsigned idx = cast<ConstantInt>(GEPIt.getOperand())->getLimitedValue();
  312. DXASSERT_NOMSG(typeAnnot->GetNumFields() > idx);
  313. DxilFieldAnnotation &fieldAnnot = typeAnnot->GetFieldAnnotation(idx);
  314. if (!Name.empty())
  315. Name += ".";
  316. Name += fieldAnnot.GetFieldName();
  317. }
  318. }
  319. Type *Ty = CbPtr->getResultElementType();
  320. if (arraySize > 1) {
  321. Ty = ArrayType::get(Ty, arraySize);
  322. }
  323. return CreateResourceGV(Ty, Name, MD);
  324. }
  325. Value *CreateResourceGV(Type *Ty, StringRef Name, MDNode *MD) {
  326. Module &M = *HLM.GetModule();
  327. Constant *GV = M.getOrInsertGlobal(Name, Ty);
  328. // Create resource and set GV as globalSym.
  329. HLM.AddResourceWithGlobalVariableAndMDNode(GV, MD);
  330. return GV;
  331. }
  332. };
  333. using IntrinsicLowerFuncTy = Value *(CallInst *CI, IntrinsicOp IOP,
  334. DXIL::OpCode opcode,
  335. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated);
  336. struct IntrinsicLower {
  337. // Intrinsic opcode.
  338. IntrinsicOp IntriOpcode;
  339. // Lower function.
  340. IntrinsicLowerFuncTy &LowerFunc;
  341. // DXIL opcode if can direct map.
  342. DXIL::OpCode DxilOpcode;
  343. };
  344. // IOP intrinsics.
  345. namespace {
  346. Value *TrivialDxilOperation(Function *dxilFunc, OP::OpCode opcode, ArrayRef<Value *> refArgs,
  347. Type *Ty, Type *RetTy, OP *hlslOP,
  348. IRBuilder<> &Builder) {
  349. unsigned argNum = refArgs.size();
  350. std::vector<Value *> args = refArgs;
  351. if (Ty->isVectorTy()) {
  352. Value *retVal = llvm::UndefValue::get(RetTy);
  353. unsigned vecSize = Ty->getVectorNumElements();
  354. for (unsigned i = 0; i < vecSize; i++) {
  355. // Update vector args, skip known opcode arg.
  356. for (unsigned argIdx = HLOperandIndex::kUnaryOpSrc0Idx; argIdx < argNum;
  357. argIdx++) {
  358. if (refArgs[argIdx]->getType()->isVectorTy()) {
  359. Value *arg = refArgs[argIdx];
  360. args[argIdx] = Builder.CreateExtractElement(arg, i);
  361. }
  362. }
  363. Value *EltOP =
  364. Builder.CreateCall(dxilFunc, args, hlslOP->GetOpCodeName(opcode));
  365. retVal = Builder.CreateInsertElement(retVal, EltOP, i);
  366. }
  367. return retVal;
  368. } else {
  369. if (!RetTy->isVoidTy()) {
  370. Value *retVal =
  371. Builder.CreateCall(dxilFunc, args, hlslOP->GetOpCodeName(opcode));
  372. return retVal;
  373. } else {
  374. // Cannot add name to void.
  375. return Builder.CreateCall(dxilFunc, args);
  376. }
  377. }
  378. }
  379. // Generates a DXIL operation over an overloaded type (Ty), returning a
  380. // RetTy value; when Ty is a vector, it will replicate per-element operations
  381. // into RetTy to rebuild it.
  382. Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef<Value *> refArgs,
  383. Type *Ty, Type *RetTy, OP *hlslOP,
  384. IRBuilder<> &Builder) {
  385. Type *EltTy = Ty->getScalarType();
  386. Function *dxilFunc = hlslOP->GetOpFunc(opcode, EltTy);
  387. return TrivialDxilOperation(dxilFunc, opcode, refArgs, Ty, RetTy, hlslOP, Builder);
  388. }
  389. Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef<Value *> refArgs,
  390. Type *Ty, Instruction *Inst, OP *hlslOP) {
  391. DXASSERT(refArgs.size() > 0, "else opcode isn't in signature");
  392. DXASSERT(refArgs[0] == nullptr,
  393. "else caller has already filled the value in");
  394. IRBuilder<> B(Inst);
  395. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  396. const_cast<llvm::Value **>(refArgs.data())[0] =
  397. opArg; // actually stack memory from caller
  398. return TrivialDxilOperation(opcode, refArgs, Ty, Inst->getType(), hlslOP, B);
  399. }
  400. Value *TrivialDxilUnaryOperationRet(OP::OpCode opcode, Value *src, Type *RetTy,
  401. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  402. Type *Ty = src->getType();
  403. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  404. Value *args[] = {opArg, src};
  405. return TrivialDxilOperation(opcode, args, Ty, RetTy, hlslOP, Builder);
  406. }
  407. Value *TrivialDxilUnaryOperation(OP::OpCode opcode, Value *src,
  408. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  409. return TrivialDxilUnaryOperationRet(opcode, src, src->getType(), hlslOP,
  410. Builder);
  411. }
  412. Value *TrivialDxilBinaryOperation(OP::OpCode opcode, Value *src0, Value *src1,
  413. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  414. Type *Ty = src0->getType();
  415. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  416. Value *args[] = {opArg, src0, src1};
  417. return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
  418. }
  419. Value *TrivialDxilTrinaryOperation(OP::OpCode opcode, Value *src0, Value *src1,
  420. Value *src2, hlsl::OP *hlslOP,
  421. IRBuilder<> &Builder) {
  422. Type *Ty = src0->getType();
  423. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  424. Value *args[] = {opArg, src0, src1, src2};
  425. return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
  426. }
  427. Value *TrivialUnaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  428. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  429. Value *src0 = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  430. IRBuilder<> Builder(CI);
  431. hlsl::OP *hlslOP = &helper.hlslOP;
  432. Value *retVal = TrivialDxilUnaryOperationRet(opcode, src0, CI->getType(), hlslOP, Builder);
  433. return retVal;
  434. }
  435. Value *TrivialBinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  436. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  437. hlsl::OP *hlslOP = &helper.hlslOP;
  438. Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  439. Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  440. IRBuilder<> Builder(CI);
  441. Value *binOp =
  442. TrivialDxilBinaryOperation(opcode, src0, src1, hlslOP, Builder);
  443. return binOp;
  444. }
  445. Value *TrivialTrinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  446. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  447. hlsl::OP *hlslOP = &helper.hlslOP;
  448. Value *src0 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  449. Value *src1 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  450. Value *src2 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  451. IRBuilder<> Builder(CI);
  452. Value *triOp =
  453. TrivialDxilTrinaryOperation(opcode, src0, src1, src2, hlslOP, Builder);
  454. return triOp;
  455. }
  456. Value *TrivialIsSpecialFloat(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  457. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  458. hlsl::OP *hlslOP = &helper.hlslOP;
  459. Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  460. IRBuilder<> Builder(CI);
  461. Type *Ty = src->getType();
  462. Type *RetTy = Type::getInt1Ty(CI->getContext());
  463. if (Ty->isVectorTy())
  464. RetTy = VectorType::get(RetTy, Ty->getVectorNumElements());
  465. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  466. Value *args[] = {opArg, src};
  467. return TrivialDxilOperation(opcode, args, Ty, RetTy, hlslOP, Builder);
  468. }
  469. Value *TranslateNonUniformResourceIndex(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  470. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  471. for (User *U : CI->users()) {
  472. if (CastInst *I = dyn_cast<CastInst>(U)) {
  473. pObjHelper->MarkNonUniform(I);
  474. }
  475. }
  476. Value *V = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  477. pObjHelper->MarkNonUniform(V);
  478. CI->replaceAllUsesWith(V);
  479. return nullptr;
  480. }
  481. Value *TrivialBarrier(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  482. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  483. hlsl::OP *OP = &helper.hlslOP;
  484. Function *dxilFunc = OP->GetOpFunc(OP::OpCode::Barrier, CI->getType());
  485. Constant *opArg = OP->GetU32Const((unsigned)OP::OpCode::Barrier);
  486. unsigned uglobal = static_cast<unsigned>(DXIL::BarrierMode::UAVFenceGlobal);
  487. unsigned g = static_cast<unsigned>(DXIL::BarrierMode::TGSMFence);
  488. unsigned t = static_cast<unsigned>(DXIL::BarrierMode::SyncThreadGroup);
  489. // unsigned ut = static_cast<unsigned>(DXIL::BarrierMode::UAVFenceThreadGroup);
  490. unsigned barrierMode;
  491. switch (IOP) {
  492. case IntrinsicOp::IOP_AllMemoryBarrier:
  493. barrierMode = uglobal | g;
  494. break;
  495. case IntrinsicOp::IOP_AllMemoryBarrierWithGroupSync:
  496. barrierMode = uglobal | g | t;
  497. break;
  498. case IntrinsicOp::IOP_GroupMemoryBarrier:
  499. barrierMode = g;
  500. break;
  501. case IntrinsicOp::IOP_GroupMemoryBarrierWithGroupSync:
  502. barrierMode = g | t;
  503. break;
  504. case IntrinsicOp::IOP_DeviceMemoryBarrier:
  505. barrierMode = uglobal;
  506. break;
  507. case IntrinsicOp::IOP_DeviceMemoryBarrierWithGroupSync:
  508. barrierMode = uglobal | t;
  509. break;
  510. default:
  511. DXASSERT(0, "invalid opcode for barrier");
  512. break;
  513. }
  514. Value *src0 = OP->GetU32Const(static_cast<unsigned>(barrierMode));
  515. Value *args[] = {opArg, src0};
  516. IRBuilder<> Builder(CI);
  517. Builder.CreateCall(dxilFunc, args);
  518. return nullptr;
  519. }
  520. Value *TranslateD3DColorToUByte4(CallInst *CI, IntrinsicOp IOP,
  521. OP::OpCode opcode,
  522. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  523. hlsl::OP *hlslOP = &helper.hlslOP;
  524. IRBuilder<> Builder(CI);
  525. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  526. Type *Ty = val->getType();
  527. Constant *toByteConst = ConstantFP::get(Ty->getScalarType(), 255);
  528. if (Ty != Ty->getScalarType()) {
  529. toByteConst =
  530. ConstantVector::getSplat(Ty->getVectorNumElements(), toByteConst);
  531. }
  532. Value *byte4 = Builder.CreateFMul(toByteConst, val);
  533. byte4 =
  534. TrivialDxilUnaryOperation(OP::OpCode::Round_z, byte4, hlslOP, Builder);
  535. return Builder.CreateBitCast(byte4, CI->getType());
  536. }
  537. Value *TranslateAddUint64(CallInst *CI, IntrinsicOp IOP,
  538. OP::OpCode opcode,
  539. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  540. hlsl::OP *hlslOP = &helper.hlslOP;
  541. IRBuilder<> Builder(CI);
  542. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  543. Type *Ty = val->getType();
  544. VectorType *VT = dyn_cast<VectorType>(Ty);
  545. if (!VT) {
  546. CI->getContext().emitError(
  547. CI, "AddUint64 can only be applied to uint2 and uint4 operands");
  548. return UndefValue::get(Ty);
  549. }
  550. unsigned size = VT->getNumElements();
  551. if (size != 2 && size != 4) {
  552. CI->getContext().emitError(
  553. CI, "AddUint64 can only be applied to uint2 and uint4 operands");
  554. return UndefValue::get(Ty);
  555. }
  556. Value *op0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  557. Value *op1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  558. Value *RetVal = UndefValue::get(Ty);
  559. Function *AddC = hlslOP->GetOpFunc(DXIL::OpCode::UAddc, helper.i32Ty);
  560. Value *opArg = Builder.getInt32(static_cast<unsigned>(DXIL::OpCode::UAddc));
  561. for (unsigned i=0; i<size; i+=2) {
  562. Value *low0 = Builder.CreateExtractElement(op0, i);
  563. Value *low1 = Builder.CreateExtractElement(op1, i);
  564. Value *lowWithC = Builder.CreateCall(AddC, { opArg, low0, low1});
  565. Value *low = Builder.CreateExtractValue(lowWithC, 0);
  566. RetVal = Builder.CreateInsertElement(RetVal, low, i);
  567. Value *carry = Builder.CreateExtractValue(lowWithC, 1);
  568. // Ext i1 to i32
  569. carry = Builder.CreateZExt(carry, helper.i32Ty);
  570. Value *hi0 = Builder.CreateExtractElement(op0, i+1);
  571. Value *hi1 = Builder.CreateExtractElement(op1, i+1);
  572. Value *hi = Builder.CreateAdd(hi0, hi1);
  573. hi = Builder.CreateAdd(hi, carry);
  574. RetVal = Builder.CreateInsertElement(RetVal, hi, i+1);
  575. }
  576. return RetVal;
  577. }
  578. bool IsValidLoadInput(Value *V) {
  579. // Must be load input.
  580. // TODO: report this error on front-end
  581. if (!isa<CallInst>(V)) {
  582. V->getContext().emitError("attribute evaluation can only be done on values "
  583. "taken directly from inputs");
  584. return false;
  585. }
  586. CallInst *CI = cast<CallInst>(V);
  587. // Must be immediate.
  588. ConstantInt *opArg =
  589. cast<ConstantInt>(CI->getArgOperand(DXIL::OperandIndex::kOpcodeIdx));
  590. DXIL::OpCode op = static_cast<DXIL::OpCode>(opArg->getLimitedValue());
  591. if (op != DXIL::OpCode::LoadInput) {
  592. V->getContext().emitError("attribute evaluation can only be done on values "
  593. "taken directly from inputs");
  594. return false;
  595. }
  596. return true;
  597. }
  598. // Apply current shuffle vector mask on top of previous shuffle mask.
  599. // For example, if previous mask is (12,11,10,13) and current mask is (3,1,0,2)
  600. // new mask would be (13,11,12,10)
  601. Constant *AccumulateMask(Constant *curMask, Constant *prevMask) {
  602. if (curMask == nullptr) {
  603. return prevMask;
  604. }
  605. unsigned size = cast<VectorType>(curMask->getType())->getNumElements();
  606. SmallVector<uint32_t, 16> Elts;
  607. for (unsigned i = 0; i != size; ++i) {
  608. ConstantInt *Index = cast<ConstantInt>(curMask->getAggregateElement(i));
  609. ConstantInt *IVal =
  610. cast<ConstantInt>(prevMask->getAggregateElement(Index->getSExtValue()));
  611. Elts.emplace_back(IVal->getSExtValue());
  612. }
  613. return ConstantDataVector::get(curMask->getContext(), Elts);
  614. }
  615. Constant *GetLoadInputsForEvaluate(Value *V, std::vector<CallInst*> &loadList) {
  616. Constant *shufMask = nullptr;
  617. if (V->getType()->isVectorTy()) {
  618. // Must be insert element inst. Keeping track of masks for shuffle vector
  619. Value *Vec = V;
  620. while (ShuffleVectorInst *shuf = dyn_cast<ShuffleVectorInst>(Vec)) {
  621. shufMask = AccumulateMask(shufMask, shuf->getMask());
  622. Vec = shuf->getOperand(0);
  623. }
  624. // TODO: We are assuming that the operand of insertelement is a LoadInput.
  625. // This will fail on the case where we pass in matrix member using array subscript.
  626. while (!isa<UndefValue>(Vec)) {
  627. InsertElementInst *insertInst = cast<InsertElementInst>(Vec);
  628. Vec = insertInst->getOperand(0);
  629. Value *Elt = insertInst->getOperand(1);
  630. if (IsValidLoadInput(Elt)) {
  631. loadList.emplace_back(cast<CallInst>(Elt));
  632. }
  633. }
  634. } else {
  635. if (IsValidLoadInput(V)) {
  636. loadList.emplace_back(cast<CallInst>(V));
  637. }
  638. }
  639. return shufMask;
  640. }
  641. // Swizzle could reduce the dimensionality of the Type, but
  642. // for temporary insertelement instructions should maintain the existing size of the loadinput.
  643. // So we have to analyze the type of src in order to determine the actual size required.
  644. Type *GetInsertElementTypeForEvaluate(Value *src) {
  645. if (InsertElementInst *IE = dyn_cast<InsertElementInst>(src)) {
  646. return src->getType();
  647. }
  648. else if (ShuffleVectorInst *SV = dyn_cast<ShuffleVectorInst>(src)) {
  649. return SV->getOperand(0)->getType();
  650. }
  651. src->getContext().emitError("Invalid type call for EvaluateAttribute function");
  652. return nullptr;
  653. }
  654. Value *TranslateEvalSample(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  655. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  656. hlsl::OP *hlslOP = &helper.hlslOP;
  657. Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  658. Value *sampleIdx = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  659. IRBuilder<> Builder(CI);
  660. std::vector<CallInst*> loadList;
  661. Constant *shufMask = GetLoadInputsForEvaluate(val, loadList);
  662. unsigned size = loadList.size();
  663. OP::OpCode opcode = OP::OpCode::EvalSampleIndex;
  664. Value *opArg = hlslOP->GetU32Const((unsigned)opcode);
  665. Type *Ty = GetInsertElementTypeForEvaluate(val);
  666. Function *evalFunc = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
  667. Value *result = UndefValue::get(Ty);
  668. for (unsigned i = 0; i < size; i++) {
  669. CallInst *loadInput = loadList[size-1-i];
  670. Value *inputElemID = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx);
  671. Value *rowIdx = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
  672. Value *colIdx = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
  673. Value *Elt = Builder.CreateCall(evalFunc, { opArg, inputElemID, rowIdx, colIdx, sampleIdx });
  674. result = Builder.CreateInsertElement(result, Elt, i);
  675. }
  676. if (shufMask)
  677. result = Builder.CreateShuffleVector(result, UndefValue::get(Ty), shufMask);
  678. return result;
  679. }
  680. Value *TranslateEvalSnapped(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  681. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  682. hlsl::OP *hlslOP = &helper.hlslOP;
  683. Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  684. Value *offset = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  685. IRBuilder<> Builder(CI);
  686. Value *offsetX = Builder.CreateExtractElement(offset, (uint64_t)0);
  687. Value *offsetY = Builder.CreateExtractElement(offset, 1);
  688. std::vector<CallInst*> loadList;
  689. Constant *shufMask = GetLoadInputsForEvaluate(val, loadList);
  690. unsigned size = loadList.size();
  691. OP::OpCode opcode = OP::OpCode::EvalSnapped;
  692. Value *opArg = hlslOP->GetU32Const((unsigned)opcode);
  693. Type *Ty = GetInsertElementTypeForEvaluate(val);
  694. Function *evalFunc = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
  695. Value *result = UndefValue::get(Ty);
  696. for (unsigned i = 0; i < size; i++) {
  697. CallInst *loadInput = loadList[size-1-i];
  698. Value *inputElemID = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx);
  699. Value *rowIdx = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
  700. Value *colIdx = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
  701. Value *Elt = Builder.CreateCall(evalFunc, { opArg, inputElemID, rowIdx, colIdx, offsetX, offsetY });
  702. result = Builder.CreateInsertElement(result, Elt, i);
  703. }
  704. if (shufMask)
  705. result = Builder.CreateShuffleVector(result, UndefValue::get(Ty), shufMask);
  706. return result;
  707. }
  708. Value *TranslateEvalCentroid(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  709. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  710. hlsl::OP *hlslOP = &helper.hlslOP;
  711. Value *src = CI->getArgOperand(DXIL::OperandIndex::kUnarySrc0OpIdx);
  712. std::vector<CallInst*> loadList;
  713. Constant *shufMask = GetLoadInputsForEvaluate(src, loadList);
  714. unsigned size = loadList.size();
  715. IRBuilder<> Builder(CI);
  716. OP::OpCode opcode = OP::OpCode::EvalCentroid;
  717. Value *opArg = hlslOP->GetU32Const((unsigned)opcode);
  718. Type *Ty = GetInsertElementTypeForEvaluate(src);
  719. Function *evalFunc = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
  720. Value *result = UndefValue::get(Ty);
  721. for (unsigned i = 0; i < size; i++) {
  722. CallInst *loadInput = loadList[size-1-i];
  723. Value *inputElemID = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx);
  724. Value *rowIdx = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
  725. Value *colIdx = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
  726. Value *Elt = Builder.CreateCall(evalFunc, { opArg, inputElemID, rowIdx, colIdx });
  727. result = Builder.CreateInsertElement(result, Elt, i);
  728. }
  729. if (shufMask)
  730. result = Builder.CreateShuffleVector(result, UndefValue::get(Ty), shufMask);
  731. return result;
  732. }
  733. Value *TranslateGetAttributeAtVertex(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  734. HLOperationLowerHelper &helper,
  735. HLObjectOperationLowerHelper *pObjHelper,
  736. bool &Translated) {
  737. DXASSERT(op == OP::OpCode::AttributeAtVertex, "Wrong opcode to translate");
  738. hlsl::OP *hlslOP = &helper.hlslOP;
  739. IRBuilder<> Builder(CI);
  740. Type *Ty = CI->getType();
  741. Value *val = CI->getArgOperand(DXIL::OperandIndex::kBinarySrc0OpIdx);
  742. Value *vertexIdx = CI->getArgOperand(DXIL::OperandIndex::kBinarySrc1OpIdx);
  743. Value *vertexI8Idx = Builder.CreateTrunc(vertexIdx, Type::getInt8Ty(CI->getContext()));
  744. // Check the range of VertexID
  745. Value *vertex0 = Builder.getInt8(0);
  746. Value *vertex1 = Builder.getInt8(1);
  747. Value *vertex2 = Builder.getInt8(2);
  748. if (vertexI8Idx != vertex0 && vertexI8Idx != vertex1 && vertexI8Idx != vertex2) {
  749. CI->getContext().emitError(CI, "VertexID at GetAttributeAtVertex can only range from 0 to 2");
  750. return UndefValue::get(Ty);
  751. }
  752. std::vector<CallInst*> loadList;
  753. Constant *shufMask = GetLoadInputsForEvaluate(val, loadList);
  754. unsigned size = loadList.size();
  755. Value *opArg = hlslOP->GetU32Const((unsigned)op);
  756. Function *evalFunc = hlslOP->GetOpFunc(op, Ty->getScalarType());
  757. Value *result = UndefValue::get(Ty);
  758. for (unsigned i = 0; i < size; ++i) {
  759. CallInst *loadInput = loadList[size - 1 - i];
  760. Value *inputElemID = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx);
  761. Value *rowIdx = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
  762. Value *colIdx = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
  763. Value *Elt = Builder.CreateCall(evalFunc, { opArg, inputElemID, rowIdx, colIdx, vertexI8Idx });
  764. result = Builder.CreateInsertElement(result, Elt, i);
  765. }
  766. if (shufMask)
  767. result = Builder.CreateShuffleVector(result, UndefValue::get(Ty), shufMask);
  768. return result;
  769. }
  770. Value *TrivialNoArgOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  771. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  772. hlsl::OP *hlslOP = &helper.hlslOP;
  773. Type *Ty = Type::getVoidTy(CI->getContext());
  774. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  775. Value *args[] = {opArg};
  776. IRBuilder<> Builder(CI);
  777. Value *dxilOp = TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
  778. return dxilOp;
  779. }
  780. Value *TrivialNoArgWithRetOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  781. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  782. hlsl::OP *hlslOP = &helper.hlslOP;
  783. Type *Ty = CI->getType();
  784. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  785. Value *args[] = {opArg};
  786. IRBuilder<> Builder(CI);
  787. Value *dxilOp = TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
  788. return dxilOp;
  789. }
  790. Value *TranslateGetRTSamplePos(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  791. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  792. hlsl::OP *hlslOP = &helper.hlslOP;
  793. OP::OpCode opcode = OP::OpCode::RenderTargetGetSamplePosition;
  794. IRBuilder<> Builder(CI);
  795. Type *Ty = Type::getVoidTy(CI->getContext());
  796. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  797. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  798. Value *args[] = {opArg, val};
  799. Value *samplePos =
  800. TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
  801. Value *result = UndefValue::get(CI->getType());
  802. Value *samplePosX = Builder.CreateExtractValue(samplePos, 0);
  803. Value *samplePosY = Builder.CreateExtractValue(samplePos, 1);
  804. result = Builder.CreateInsertElement(result, samplePosX, (uint64_t)0);
  805. result = Builder.CreateInsertElement(result, samplePosY, 1);
  806. return result;
  807. }
  808. // val QuadReadLaneAt(val, uint);
  809. Value *TranslateQuadReadLaneAt(CallInst *CI, IntrinsicOp IOP,
  810. OP::OpCode opcode,
  811. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  812. hlsl::OP *hlslOP = &helper.hlslOP;
  813. Value *refArgs[] = {nullptr, CI->getOperand(1), CI->getOperand(2)};
  814. return TrivialDxilOperation(DXIL::OpCode::QuadReadLaneAt, refArgs,
  815. CI->getOperand(1)->getType(), CI, hlslOP);
  816. }
  817. // Wave intrinsics of the form fn(val,QuadOpKind)->val
  818. Value *TranslateQuadReadAcross(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  819. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  820. hlsl::OP *hlslOP = &helper.hlslOP;
  821. DXIL::QuadOpKind opKind;
  822. switch (IOP) {
  823. case IntrinsicOp::IOP_QuadReadAcrossX: opKind = DXIL::QuadOpKind::ReadAcrossX; break;
  824. case IntrinsicOp::IOP_QuadReadAcrossY: opKind = DXIL::QuadOpKind::ReadAcrossY; break;
  825. default: DXASSERT_NOMSG(IOP == IntrinsicOp::IOP_QuadReadAcrossDiagonal);
  826. case IntrinsicOp::IOP_QuadReadAcrossDiagonal: opKind = DXIL::QuadOpKind::ReadAcrossDiagonal; break;
  827. }
  828. Constant *OpArg = hlslOP->GetI8Const((unsigned)opKind);
  829. Value *refArgs[] = {nullptr, CI->getOperand(1), OpArg};
  830. return TrivialDxilOperation(DXIL::OpCode::QuadOp, refArgs,
  831. CI->getOperand(1)->getType(), CI, hlslOP);
  832. }
  833. // WaveAllEqual(val<n>)->bool<n>
  834. Value *TranslateWaveAllEqual(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  835. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  836. hlsl::OP *hlslOP = &helper.hlslOP;
  837. Value *src = CI->getArgOperand(HLOperandIndex::kWaveAllEqualValueOpIdx);
  838. IRBuilder<> Builder(CI);
  839. Type *Ty = src->getType();
  840. Type *RetTy = Type::getInt1Ty(CI->getContext());
  841. if (Ty->isVectorTy())
  842. RetTy = VectorType::get(RetTy, Ty->getVectorNumElements());
  843. Constant *opArg = hlslOP->GetU32Const((unsigned)DXIL::OpCode::WaveActiveAllEqual);
  844. Value *args[] = {opArg, src};
  845. return TrivialDxilOperation(DXIL::OpCode::WaveActiveAllEqual, args, Ty, RetTy,
  846. hlslOP, Builder);
  847. }
  848. // Wave intrinsics of the form fn(valA)->valB, where no overloading takes place
  849. Value *TranslateWaveA2B(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  850. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  851. hlsl::OP *hlslOP = &helper.hlslOP;
  852. Value *refArgs[] = {nullptr, CI->getOperand(1)};
  853. return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP);
  854. }
  855. // Wave ballot intrinsic.
  856. Value *TranslateWaveBallot(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  857. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  858. // The high-level operation is uint4 ballot(i1).
  859. // The DXIL operation is struct.u4 ballot(i1).
  860. // To avoid updating users with more than a simple replace, we translate into
  861. // a call into struct.u4, then reassemble the vector.
  862. // Scalarization and constant propagation take care of cleanup.
  863. IRBuilder<> B(CI);
  864. // Make the DXIL call itself.
  865. hlsl::OP *hlslOP = &helper.hlslOP;
  866. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  867. Value *refArgs[] = { opArg, CI->getOperand(1) };
  868. Function *dxilFunc = hlslOP->GetOpFunc(opcode, Type::getVoidTy(CI->getContext()));
  869. Value *dxilVal = B.CreateCall(dxilFunc, refArgs, hlslOP->GetOpCodeName(opcode));
  870. // Assign from the call results into a vector.
  871. Type *ResTy = CI->getType();
  872. DXASSERT_NOMSG(ResTy->isVectorTy() && ResTy->getVectorNumElements() == 4);
  873. DXASSERT_NOMSG(dxilVal->getType()->isStructTy() &&
  874. dxilVal->getType()->getNumContainedTypes() == 4);
  875. // 'x' component is the first vector element, highest bits.
  876. Value *ResVal = llvm::UndefValue::get(ResTy);
  877. for (unsigned Idx = 0; Idx < 4; ++Idx) {
  878. ResVal = B.CreateInsertElement(
  879. ResVal, B.CreateExtractValue(dxilVal, ArrayRef<unsigned>(Idx)), Idx);
  880. }
  881. return ResVal;
  882. }
  883. static bool WaveIntrinsicNeedsSign(OP::OpCode opcode) {
  884. return opcode == OP::OpCode::WaveActiveOp ||
  885. opcode == OP::OpCode::WavePrefixOp;
  886. }
  887. static unsigned WaveIntrinsicToSignedOpKind(IntrinsicOp IOP) {
  888. if (IOP == IntrinsicOp::IOP_WaveActiveUMax ||
  889. IOP == IntrinsicOp::IOP_WaveActiveUMin ||
  890. IOP == IntrinsicOp::IOP_WaveActiveUSum ||
  891. IOP == IntrinsicOp::IOP_WaveActiveUProduct ||
  892. IOP == IntrinsicOp::IOP_WavePrefixUSum ||
  893. IOP == IntrinsicOp::IOP_WavePrefixUProduct)
  894. return (unsigned)DXIL::SignedOpKind::Unsigned;
  895. return (unsigned)DXIL::SignedOpKind::Signed;
  896. }
  897. static unsigned WaveIntrinsicToOpKind(IntrinsicOp IOP) {
  898. switch (IOP) {
  899. // Bit operations.
  900. case IntrinsicOp::IOP_WaveActiveBitOr:
  901. return (unsigned)DXIL::WaveBitOpKind::Or;
  902. case IntrinsicOp::IOP_WaveActiveBitAnd:
  903. return (unsigned)DXIL::WaveBitOpKind::And;
  904. case IntrinsicOp::IOP_WaveActiveBitXor:
  905. return (unsigned)DXIL::WaveBitOpKind::Xor;
  906. // Prefix operations.
  907. case IntrinsicOp::IOP_WavePrefixSum:
  908. case IntrinsicOp::IOP_WavePrefixUSum:
  909. return (unsigned)DXIL::WaveOpKind::Sum;
  910. case IntrinsicOp::IOP_WavePrefixProduct:
  911. case IntrinsicOp::IOP_WavePrefixUProduct:
  912. return (unsigned)DXIL::WaveOpKind::Product;
  913. // Numeric operations.
  914. case IntrinsicOp::IOP_WaveActiveMax:
  915. case IntrinsicOp::IOP_WaveActiveUMax:
  916. return (unsigned)DXIL::WaveOpKind::Max;
  917. case IntrinsicOp::IOP_WaveActiveMin:
  918. case IntrinsicOp::IOP_WaveActiveUMin:
  919. return (unsigned)DXIL::WaveOpKind::Min;
  920. case IntrinsicOp::IOP_WaveActiveSum:
  921. case IntrinsicOp::IOP_WaveActiveUSum:
  922. return (unsigned)DXIL::WaveOpKind::Sum;
  923. case IntrinsicOp::IOP_WaveActiveProduct:
  924. case IntrinsicOp::IOP_WaveActiveUProduct:
  925. default:
  926. DXASSERT(IOP == IntrinsicOp::IOP_WaveActiveProduct ||
  927. IOP == IntrinsicOp::IOP_WaveActiveUProduct,
  928. "else caller passed incorrect value");
  929. return (unsigned)DXIL::WaveOpKind::Product;
  930. }
  931. }
  932. // Wave intrinsics of the form fn(valA)->valA
  933. Value *TranslateWaveA2A(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  934. HLOperationLowerHelper &helper,
  935. HLObjectOperationLowerHelper *pObjHelper,
  936. bool &Translated) {
  937. hlsl::OP *hlslOP = &helper.hlslOP;
  938. Constant *kindValInt = hlslOP->GetI8Const(WaveIntrinsicToOpKind(IOP));
  939. Constant *signValInt = hlslOP->GetI8Const(WaveIntrinsicToSignedOpKind(IOP));
  940. Value *refArgs[] = {nullptr, CI->getOperand(1), kindValInt, signValInt};
  941. unsigned refArgCount = _countof(refArgs);
  942. if (!WaveIntrinsicNeedsSign(opcode))
  943. refArgCount--;
  944. return TrivialDxilOperation(opcode,
  945. llvm::ArrayRef<Value *>(refArgs, refArgCount),
  946. CI->getOperand(1)->getType(), CI, hlslOP);
  947. }
  948. // Wave intrinsics of the form fn()->val
  949. Value *TranslateWaveToVal(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  950. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  951. hlsl::OP *hlslOP = &helper.hlslOP;
  952. Value *refArgs[] = {nullptr};
  953. return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP);
  954. }
  955. // Wave intrinsics of the form fn(val,lane)->val
  956. Value *TranslateWaveReadLaneAt(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  957. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  958. hlsl::OP *hlslOP = &helper.hlslOP;
  959. Value *refArgs[] = {nullptr, CI->getOperand(1), CI->getOperand(2)};
  960. return TrivialDxilOperation(DXIL::OpCode::WaveReadLaneAt, refArgs,
  961. CI->getOperand(1)->getType(), CI, hlslOP);
  962. }
  963. // Wave intrinsics of the form fn(val)->val
  964. Value *TranslateWaveReadLaneFirst(CallInst *CI, IntrinsicOp IOP,
  965. OP::OpCode opcode,
  966. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  967. hlsl::OP *hlslOP = &helper.hlslOP;
  968. Value *refArgs[] = {nullptr, CI->getOperand(1)};
  969. return TrivialDxilOperation(DXIL::OpCode::WaveReadLaneFirst, refArgs,
  970. CI->getOperand(1)->getType(), CI, hlslOP);
  971. }
  972. Value *TransalteAbs(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  973. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  974. hlsl::OP *hlslOP = &helper.hlslOP;
  975. Type *pOverloadTy = CI->getType()->getScalarType();
  976. if (pOverloadTy->isFloatingPointTy()) {
  977. Value *refArgs[] = {nullptr, CI->getOperand(1)};
  978. return TrivialDxilOperation(DXIL::OpCode::FAbs, refArgs, CI->getType(), CI,
  979. hlslOP);
  980. } else {
  981. Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  982. IRBuilder<> Builder(CI);
  983. Value *neg = Builder.CreateNeg(src);
  984. Value *refArgs[] = {nullptr, src, neg};
  985. return TrivialDxilBinaryOperation(DXIL::OpCode::IMax, src, neg, hlslOP,
  986. Builder);
  987. }
  988. }
  989. Value *GenerateCmpNEZero(Value *val, IRBuilder<> Builder) {
  990. Type *Ty = val->getType();
  991. Type *EltTy = Ty->getScalarType();
  992. Constant *zero = nullptr;
  993. if (EltTy->isFloatingPointTy())
  994. zero = ConstantFP::get(EltTy, 0);
  995. else
  996. zero = ConstantInt::get(EltTy, 0);
  997. if (Ty != EltTy) {
  998. zero = ConstantVector::getSplat(Ty->getVectorNumElements(), zero);
  999. }
  1000. if (EltTy->isFloatingPointTy())
  1001. return Builder.CreateFCmpUNE(val, zero);
  1002. else
  1003. return Builder.CreateICmpNE(val, zero);
  1004. }
  1005. Value *TranslateAllForValue(Value *val, IRBuilder<> &Builder) {
  1006. Value *cond = GenerateCmpNEZero(val, Builder);
  1007. Type *Ty = val->getType();
  1008. Type *EltTy = Ty->getScalarType();
  1009. if (Ty != EltTy) {
  1010. Value *Result = Builder.CreateExtractElement(cond, (uint64_t)0);
  1011. for (unsigned i = 1; i < Ty->getVectorNumElements(); i++) {
  1012. Value *Elt = Builder.CreateExtractElement(cond, i);
  1013. Result = Builder.CreateAnd(Result, Elt);
  1014. }
  1015. return Result;
  1016. } else
  1017. return cond;
  1018. }
  1019. Value *TranslateAll(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1020. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1021. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1022. IRBuilder<> Builder(CI);
  1023. return TranslateAllForValue(val, Builder);
  1024. }
  1025. Value *TranslateAny(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1026. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1027. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1028. IRBuilder<> Builder(CI);
  1029. Value *cond = GenerateCmpNEZero(val, Builder);
  1030. Type *Ty = val->getType();
  1031. Type *EltTy = Ty->getScalarType();
  1032. if (Ty != EltTy) {
  1033. Value *Result = Builder.CreateExtractElement(cond, (uint64_t)0);
  1034. for (unsigned i = 1; i < Ty->getVectorNumElements(); i++) {
  1035. Value *Elt = Builder.CreateExtractElement(cond, i);
  1036. Result = Builder.CreateOr(Result, Elt);
  1037. }
  1038. return Result;
  1039. } else
  1040. return cond;
  1041. }
  1042. Value *TranslateBitcast(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1043. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1044. Type *Ty = CI->getType();
  1045. Value *op = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1046. IRBuilder<> Builder(CI);
  1047. return Builder.CreateBitCast(op, Ty);
  1048. }
  1049. Value *TranslateDoubleAsUint(Value *x, Value *lo, Value *hi,
  1050. IRBuilder<> &Builder, hlsl::OP *hlslOP) {
  1051. Type *Ty = x->getType();
  1052. Type *outTy = lo->getType()->getPointerElementType();
  1053. DXIL::OpCode opcode = DXIL::OpCode::SplitDouble;
  1054. Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
  1055. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
  1056. if (Ty->isVectorTy()) {
  1057. Value *retValLo = llvm::UndefValue::get(outTy);
  1058. Value *retValHi = llvm::UndefValue::get(outTy);
  1059. unsigned vecSize = Ty->getVectorNumElements();
  1060. for (unsigned i = 0; i < vecSize; i++) {
  1061. Value *Elt = Builder.CreateExtractElement(x, i);
  1062. Value *EltOP = Builder.CreateCall(dxilFunc, {opArg, Elt},
  1063. hlslOP->GetOpCodeName(opcode));
  1064. Value *EltLo = Builder.CreateExtractValue(EltOP, 0);
  1065. retValLo = Builder.CreateInsertElement(retValLo, EltLo, i);
  1066. Value *EltHi = Builder.CreateExtractValue(EltOP, 1);
  1067. retValHi = Builder.CreateInsertElement(retValHi, EltHi, i);
  1068. }
  1069. Builder.CreateStore(retValLo, lo);
  1070. Builder.CreateStore(retValHi, hi);
  1071. } else {
  1072. Value *retVal =
  1073. Builder.CreateCall(dxilFunc, {opArg, x}, hlslOP->GetOpCodeName(opcode));
  1074. Value *retValLo = Builder.CreateExtractValue(retVal, 0);
  1075. Value *retValHi = Builder.CreateExtractValue(retVal, 1);
  1076. Builder.CreateStore(retValLo, lo);
  1077. Builder.CreateStore(retValHi, hi);
  1078. }
  1079. return nullptr;
  1080. }
  1081. Value *TranslateAsUint(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1082. HLOperationLowerHelper &helper,
  1083. HLObjectOperationLowerHelper *pObjHelper,
  1084. bool &Translated) {
  1085. if (CI->getNumArgOperands() == 2) {
  1086. return TranslateBitcast(CI, IOP, opcode, helper, pObjHelper, Translated);
  1087. } else {
  1088. DXASSERT_NOMSG(CI->getNumArgOperands() == 4);
  1089. hlsl::OP *hlslOP = &helper.hlslOP;
  1090. Value *x = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  1091. DXASSERT_NOMSG(x->getType()->getScalarType()->isDoubleTy());
  1092. Value *lo = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  1093. Value *hi = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  1094. IRBuilder<> Builder(CI);
  1095. return TranslateDoubleAsUint(x, lo, hi, Builder, hlslOP);
  1096. }
  1097. }
  1098. Value *TranslateAsDouble(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1099. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1100. hlsl::OP *hlslOP = &helper.hlslOP;
  1101. Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1102. Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1103. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
  1104. IRBuilder<> Builder(CI);
  1105. return TrivialDxilOperation(opcode, { opArg, x, y }, CI->getType(), CI->getType(), hlslOP, Builder);
  1106. }
  1107. Value *TranslateAtan2(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1108. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1109. hlsl::OP *hlslOP = &helper.hlslOP;
  1110. Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1111. Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1112. IRBuilder<> Builder(CI);
  1113. Value *tan = Builder.CreateFDiv(y, x);
  1114. Value *atan =
  1115. TrivialDxilUnaryOperation(OP::OpCode::Atan, tan, hlslOP, Builder);
  1116. // TODO: include M_PI from math.h.
  1117. const double M_PI = 3.14159265358979323846;
  1118. // Modify atan result based on https://en.wikipedia.org/wiki/Atan2.
  1119. Type *Ty = x->getType();
  1120. Constant *pi = ConstantFP::get(Ty->getScalarType(), M_PI);
  1121. Constant *halfPi = ConstantFP::get(Ty->getScalarType(), M_PI / 2);
  1122. Constant *negHalfPi = ConstantFP::get(Ty->getScalarType(), -M_PI / 2);
  1123. Constant *zero = ConstantFP::get(Ty->getScalarType(), 0);
  1124. if (Ty != Ty->getScalarType()) {
  1125. unsigned vecSize = Ty->getVectorNumElements();
  1126. pi = ConstantVector::getSplat(vecSize, pi);
  1127. halfPi = ConstantVector::getSplat(vecSize, halfPi);
  1128. negHalfPi = ConstantVector::getSplat(vecSize, negHalfPi);
  1129. zero = ConstantVector::getSplat(vecSize, zero);
  1130. }
  1131. Value *atanAddPi = Builder.CreateFAdd(atan, pi);
  1132. Value *atanSubPi = Builder.CreateFSub(atan, pi);
  1133. // x > 0 -> atan.
  1134. Value *result = atan;
  1135. Value *xLt0 = Builder.CreateFCmpOLT(x, zero);
  1136. Value *xEq0 = Builder.CreateFCmpOEQ(x, zero);
  1137. Value *yGe0 = Builder.CreateFCmpOGE(y, zero);
  1138. Value *yLt0 = Builder.CreateFCmpOLT(y, zero);
  1139. // x < 0, y >= 0 -> atan + pi.
  1140. Value *xLt0AndyGe0 = Builder.CreateAnd(xLt0, yGe0);
  1141. result = Builder.CreateSelect(xLt0AndyGe0, atanAddPi, result);
  1142. // x < 0, y < 0 -> atan - pi.
  1143. Value *xLt0AndYLt0 = Builder.CreateAnd(xLt0, yLt0);
  1144. result = Builder.CreateSelect(xLt0AndYLt0, atanSubPi, result);
  1145. // x == 0, y < 0 -> -pi/2
  1146. Value *xEq0AndYLt0 = Builder.CreateAnd(xEq0, yLt0);
  1147. result = Builder.CreateSelect(xEq0AndYLt0, negHalfPi, result);
  1148. // x == 0, y > 0 -> pi/2
  1149. Value *xEq0AndYGe0 = Builder.CreateAnd(xEq0, yGe0);
  1150. result = Builder.CreateSelect(xEq0AndYGe0, halfPi, result);
  1151. return result;
  1152. }
  1153. Value *TranslateClamp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1154. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1155. hlsl::OP *hlslOP = &helper.hlslOP;
  1156. Type *Ty = CI->getType();
  1157. Type *EltTy = Ty->getScalarType();
  1158. DXIL::OpCode maxOp = DXIL::OpCode::FMax;
  1159. DXIL::OpCode minOp = DXIL::OpCode::FMin;
  1160. if (IOP == IntrinsicOp::IOP_uclamp) {
  1161. maxOp = DXIL::OpCode::UMax;
  1162. minOp = DXIL::OpCode::UMin;
  1163. } else if (EltTy->isIntegerTy()) {
  1164. maxOp = DXIL::OpCode::IMax;
  1165. minOp = DXIL::OpCode::IMin;
  1166. }
  1167. Value *x = CI->getArgOperand(HLOperandIndex::kClampOpXIdx);
  1168. Value *maxVal = CI->getArgOperand(HLOperandIndex::kClampOpMaxIdx);
  1169. Value *minVal = CI->getArgOperand(HLOperandIndex::kClampOpMinIdx);
  1170. IRBuilder<> Builder(CI);
  1171. // min(max(x, minVal), maxVal).
  1172. Value *maxXMinVal =
  1173. TrivialDxilBinaryOperation(maxOp, x, minVal, hlslOP, Builder);
  1174. return TrivialDxilBinaryOperation(minOp, maxXMinVal, maxVal, hlslOP, Builder);
  1175. }
  1176. Value *TranslateClip(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1177. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1178. hlsl::OP *hlslOP = &helper.hlslOP;
  1179. Function *discard =
  1180. hlslOP->GetOpFunc(OP::OpCode::Discard, Type::getVoidTy(CI->getContext()));
  1181. IRBuilder<> Builder(CI);
  1182. Value *cond = nullptr;
  1183. Value *arg = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1184. if (VectorType *VT = dyn_cast<VectorType>(arg->getType())) {
  1185. Value *elt = Builder.CreateExtractElement(arg, (uint64_t)0);
  1186. cond = Builder.CreateFCmpOLT(elt, hlslOP->GetFloatConst(0));
  1187. for (unsigned i = 1; i < VT->getNumElements(); i++) {
  1188. Value *elt = Builder.CreateExtractElement(arg, i);
  1189. Value *eltCond = Builder.CreateFCmpOLT(elt, hlslOP->GetFloatConst(0));
  1190. cond = Builder.CreateOr(cond, eltCond);
  1191. }
  1192. } else
  1193. cond = Builder.CreateFCmpOLT(arg, hlslOP->GetFloatConst(0));
  1194. Constant *opArg = hlslOP->GetU32Const((unsigned)OP::OpCode::Discard);
  1195. Builder.CreateCall(discard, {opArg, cond});
  1196. return nullptr;
  1197. }
  1198. Value *TranslateCross(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1199. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1200. VectorType *VT = cast<VectorType>(CI->getType());
  1201. DXASSERT_NOMSG(VT->getNumElements() == 3);
  1202. Value *op0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1203. Value *op1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1204. IRBuilder<> Builder(CI);
  1205. Value *op0_x = Builder.CreateExtractElement(op0, (uint64_t)0);
  1206. Value *op0_y = Builder.CreateExtractElement(op0, 1);
  1207. Value *op0_z = Builder.CreateExtractElement(op0, 2);
  1208. Value *op1_x = Builder.CreateExtractElement(op1, (uint64_t)0);
  1209. Value *op1_y = Builder.CreateExtractElement(op1, 1);
  1210. Value *op1_z = Builder.CreateExtractElement(op1, 2);
  1211. auto MulSub = [&](Value *x0, Value *y0, Value *x1, Value *y1) -> Value * {
  1212. Value *xy = Builder.CreateFMul(x0, y1);
  1213. Value *yx = Builder.CreateFMul(y0, x1);
  1214. return Builder.CreateFSub(xy, yx);
  1215. };
  1216. Value *yz_zy = MulSub(op0_y, op0_z, op1_y, op1_z);
  1217. Value *zx_xz = MulSub(op0_z, op0_x, op1_z, op1_x);
  1218. Value *xy_yx = MulSub(op0_x, op0_y, op1_x, op1_y);
  1219. Value *cross = UndefValue::get(VT);
  1220. cross = Builder.CreateInsertElement(cross, yz_zy, (uint64_t)0);
  1221. cross = Builder.CreateInsertElement(cross, zx_xz, 1);
  1222. cross = Builder.CreateInsertElement(cross, xy_yx, 2);
  1223. return cross;
  1224. }
  1225. Value *TranslateDegrees(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1226. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1227. IRBuilder<> Builder(CI);
  1228. Type *Ty = CI->getType();
  1229. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1230. // 180/pi.
  1231. // TODO: include M_PI from math.h.
  1232. const double M_PI = 3.14159265358979323846;
  1233. Constant *toDegreeConst = ConstantFP::get(Ty->getScalarType(), 180 / M_PI);
  1234. if (Ty != Ty->getScalarType()) {
  1235. toDegreeConst =
  1236. ConstantVector::getSplat(Ty->getVectorNumElements(), toDegreeConst);
  1237. }
  1238. return Builder.CreateFMul(toDegreeConst, val);
  1239. }
  1240. Value *TranslateDst(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1241. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1242. Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1243. Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1244. Type *Ty = src1->getType();
  1245. IRBuilder<> Builder(CI);
  1246. Value *Result = UndefValue::get(Ty);
  1247. Constant *oneConst = ConstantFP::get(Ty->getScalarType(), 1);
  1248. // dest.x = 1;
  1249. Result = Builder.CreateInsertElement(Result, oneConst, (uint64_t)0);
  1250. // dest.y = src0.y * src1.y;
  1251. Value *src0_y = Builder.CreateExtractElement(src0, 1);
  1252. Value *src1_y = Builder.CreateExtractElement(src1, 1);
  1253. Value *yMuly = Builder.CreateFMul(src0_y, src1_y);
  1254. Result = Builder.CreateInsertElement(Result, yMuly, 1);
  1255. // dest.z = src0.z;
  1256. Value *src0_z = Builder.CreateExtractElement(src0, 2);
  1257. Result = Builder.CreateInsertElement(Result, src0_z, 2);
  1258. // dest.w = src1.w;
  1259. Value *src1_w = Builder.CreateExtractElement(src1, 3);
  1260. Result = Builder.CreateInsertElement(Result, src1_w, 3);
  1261. return Result;
  1262. }
  1263. Value *TranslateFirstbitHi(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1264. HLOperationLowerHelper &helper,
  1265. HLObjectOperationLowerHelper *pObjHelper,
  1266. bool &Translated) {
  1267. Value *firstbitHi =
  1268. TrivialUnaryOperation(CI, IOP, opcode, helper, pObjHelper, Translated);
  1269. // firstbitHi == -1? -1 : (bitWidth-1 -firstbitHi);
  1270. IRBuilder<> Builder(CI);
  1271. Constant *neg1 = Builder.getInt32(-1);
  1272. Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1273. Type *Ty = src->getType();
  1274. IntegerType *EltTy = cast<IntegerType>(Ty->getScalarType());
  1275. Constant *bitWidth = Builder.getInt32(EltTy->getBitWidth()-1);
  1276. if (Ty == Ty->getScalarType()) {
  1277. Value *sub = Builder.CreateSub(bitWidth, firstbitHi);
  1278. Value *cond = Builder.CreateICmpEQ(neg1, firstbitHi);
  1279. return Builder.CreateSelect(cond, neg1, sub);
  1280. } else {
  1281. Value *result = UndefValue::get(CI->getType());
  1282. unsigned vecSize = Ty->getVectorNumElements();
  1283. for (unsigned i = 0; i < vecSize; i++) {
  1284. Value *EltFirstBit = Builder.CreateExtractElement(firstbitHi, i);
  1285. Value *sub = Builder.CreateSub(bitWidth, EltFirstBit);
  1286. Value *cond = Builder.CreateICmpEQ(neg1, EltFirstBit);
  1287. Value *Elt = Builder.CreateSelect(cond, neg1, sub);
  1288. result = Builder.CreateInsertElement(result, Elt, i);
  1289. }
  1290. return result;
  1291. }
  1292. }
  1293. Value *TranslateFirstbitLo(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1294. HLOperationLowerHelper &helper,
  1295. HLObjectOperationLowerHelper *pObjHelper,
  1296. bool &Translated) {
  1297. Value *firstbitLo =
  1298. TrivialUnaryOperation(CI, IOP, opcode, helper, pObjHelper, Translated);
  1299. return firstbitLo;
  1300. }
  1301. Value *TranslateLit(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1302. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1303. Value *n_dot_l = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  1304. Value *n_dot_h = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  1305. Value *m = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  1306. IRBuilder<> Builder(CI);
  1307. Type *Ty = m->getType();
  1308. Value *Result = UndefValue::get(VectorType::get(Ty, 4));
  1309. // Result = (ambient, diffuse, specular, 1)
  1310. // ambient = 1.
  1311. Constant *oneConst = ConstantFP::get(Ty, 1);
  1312. Result = Builder.CreateInsertElement(Result, oneConst, (uint64_t)0);
  1313. // Result.w = 1.
  1314. Result = Builder.CreateInsertElement(Result, oneConst, 3);
  1315. // diffuse = (n_dot_l < 0) ? 0 : n_dot_l.
  1316. Constant *zeroConst = ConstantFP::get(Ty, 0);
  1317. Value *nlCmp = Builder.CreateFCmpOLT(n_dot_l, zeroConst);
  1318. Value *diffuse = Builder.CreateSelect(nlCmp, zeroConst, n_dot_l);
  1319. Result = Builder.CreateInsertElement(Result, diffuse, 1);
  1320. // specular = ((n_dot_l < 0) || (n_dot_h < 0)) ? 0: (n_dot_h * m).
  1321. Value *nhCmp = Builder.CreateFCmpOLT(n_dot_h, zeroConst);
  1322. Value *specCond = Builder.CreateOr(nlCmp, nhCmp);
  1323. Value *nhMulM = Builder.CreateFMul(n_dot_h, m);
  1324. Value *spec = Builder.CreateSelect(specCond, zeroConst, nhMulM);
  1325. Result = Builder.CreateInsertElement(Result, spec, 2);
  1326. return Result;
  1327. }
  1328. Value *TranslateRadians(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1329. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1330. IRBuilder<> Builder(CI);
  1331. Type *Ty = CI->getType();
  1332. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1333. // pi/180.
  1334. // TODO: include M_PI from math.h.
  1335. const double M_PI = 3.14159265358979323846;
  1336. Constant *toRadianConst = ConstantFP::get(Ty->getScalarType(), M_PI / 180);
  1337. if (Ty != Ty->getScalarType()) {
  1338. toRadianConst =
  1339. ConstantVector::getSplat(Ty->getVectorNumElements(), toRadianConst);
  1340. }
  1341. return Builder.CreateFMul(toRadianConst, val);
  1342. }
  1343. Value *TranslateF16ToF32(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1344. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1345. IRBuilder<> Builder(CI);
  1346. Value *x = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1347. Type *Ty = CI->getType();
  1348. Function *f16tof32 =
  1349. helper.hlslOP.GetOpFunc(opcode, helper.voidTy);
  1350. return TrivialDxilOperation(
  1351. f16tof32, opcode, {Builder.getInt32(static_cast<unsigned>(opcode)), x},
  1352. x->getType(), Ty, &helper.hlslOP, Builder);
  1353. }
  1354. Value *TranslateF32ToF16(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1355. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1356. IRBuilder<> Builder(CI);
  1357. Value *x = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1358. Type *Ty = CI->getType();
  1359. Function *f32tof16 =
  1360. helper.hlslOP.GetOpFunc(opcode, helper.voidTy);
  1361. return TrivialDxilOperation(
  1362. f32tof16, opcode, {Builder.getInt32(static_cast<unsigned>(opcode)), x},
  1363. x->getType(), Ty, &helper.hlslOP, Builder);
  1364. }
  1365. Value *TranslateLength(CallInst *CI, Value *val, hlsl::OP *hlslOP) {
  1366. IRBuilder<> Builder(CI);
  1367. if (VectorType *VT = dyn_cast<VectorType>(val->getType())) {
  1368. Value *Elt = Builder.CreateExtractElement(val, (uint64_t)0);
  1369. unsigned size = VT->getNumElements();
  1370. if (size > 1) {
  1371. Value *Sum = Builder.CreateFMul(Elt, Elt);
  1372. for (unsigned i = 1; i < size; i++) {
  1373. Elt = Builder.CreateExtractElement(val, i);
  1374. Value *Mul = Builder.CreateFMul(Elt, Elt);
  1375. Sum = Builder.CreateFAdd(Sum, Mul);
  1376. }
  1377. DXIL::OpCode sqrt = DXIL::OpCode::Sqrt;
  1378. Function *dxilSqrt = hlslOP->GetOpFunc(sqrt, VT->getElementType());
  1379. Value *opArg = hlslOP->GetI32Const((unsigned)sqrt);
  1380. return Builder.CreateCall(dxilSqrt, {opArg, Sum},
  1381. hlslOP->GetOpCodeName(sqrt));
  1382. } else {
  1383. val = Elt;
  1384. }
  1385. }
  1386. DXIL::OpCode fabs = DXIL::OpCode::FAbs;
  1387. Function *dxilFAbs = hlslOP->GetOpFunc(fabs, val->getType());
  1388. Value *opArg = hlslOP->GetI32Const((unsigned)fabs);
  1389. return Builder.CreateCall(dxilFAbs, {opArg, val},
  1390. hlslOP->GetOpCodeName(fabs));
  1391. }
  1392. Value *TranslateLength(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1393. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1394. hlsl::OP *hlslOP = &helper.hlslOP;
  1395. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1396. return TranslateLength(CI, val, hlslOP);
  1397. }
  1398. Value *TranslateModF(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1399. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1400. hlsl::OP *hlslOP = &helper.hlslOP;
  1401. Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1402. Value *outIntPtr = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1403. IRBuilder<> Builder(CI);
  1404. Value *Result =
  1405. TrivialDxilUnaryOperation(OP::OpCode::Round_z, val, hlslOP, Builder);
  1406. Value *intPortion = Builder.CreateFSub(val, Result);
  1407. Builder.CreateStore(intPortion, outIntPtr);
  1408. return Result;
  1409. }
  1410. Value *TranslateDistance(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1411. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1412. hlsl::OP *hlslOP = &helper.hlslOP;
  1413. Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1414. Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1415. IRBuilder<> Builder(CI);
  1416. Value *sub = Builder.CreateFSub(src0, src1);
  1417. return TranslateLength(CI, sub, hlslOP);
  1418. }
  1419. Value *TranslateExp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1420. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1421. hlsl::OP *hlslOP = &helper.hlslOP;
  1422. IRBuilder<> Builder(CI);
  1423. Type *Ty = CI->getType();
  1424. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1425. // TODO: include M_LOG2E from math.h.
  1426. const double M_LOG2E = 1.44269504088896340736;
  1427. Constant *log2eConst = ConstantFP::get(Ty->getScalarType(), M_LOG2E);
  1428. if (Ty != Ty->getScalarType()) {
  1429. log2eConst =
  1430. ConstantVector::getSplat(Ty->getVectorNumElements(), log2eConst);
  1431. }
  1432. val = Builder.CreateFMul(log2eConst, val);
  1433. Value *exp = TrivialDxilUnaryOperation(OP::OpCode::Exp, val, hlslOP, Builder);
  1434. return exp;
  1435. }
  1436. Value *TranslateLog(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1437. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1438. hlsl::OP *hlslOP = &helper.hlslOP;
  1439. IRBuilder<> Builder(CI);
  1440. Type *Ty = CI->getType();
  1441. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1442. // TODO: include M_LN2 from math.h.
  1443. const double M_LN2 = 0.693147180559945309417;
  1444. Constant *ln2Const = ConstantFP::get(Ty->getScalarType(), M_LN2);
  1445. if (Ty != Ty->getScalarType()) {
  1446. ln2Const = ConstantVector::getSplat(Ty->getVectorNumElements(), ln2Const);
  1447. }
  1448. Value *log = TrivialDxilUnaryOperation(OP::OpCode::Log, val, hlslOP, Builder);
  1449. return Builder.CreateFMul(ln2Const, log);
  1450. }
  1451. Value *TranslateLog10(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1452. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1453. hlsl::OP *hlslOP = &helper.hlslOP;
  1454. IRBuilder<> Builder(CI);
  1455. Type *Ty = CI->getType();
  1456. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1457. // TODO: include M_LN2 from math.h.
  1458. const double M_LN2 = 0.693147180559945309417;
  1459. const double M_LN10 = 2.30258509299404568402;
  1460. Constant *log2_10Const = ConstantFP::get(Ty->getScalarType(), M_LN2 / M_LN10);
  1461. if (Ty != Ty->getScalarType()) {
  1462. log2_10Const =
  1463. ConstantVector::getSplat(Ty->getVectorNumElements(), log2_10Const);
  1464. }
  1465. Value *log = TrivialDxilUnaryOperation(OP::OpCode::Log, val, hlslOP, Builder);
  1466. return Builder.CreateFMul(log2_10Const, log);
  1467. }
  1468. Value *TranslateFMod(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1469. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1470. hlsl::OP *hlslOP = &helper.hlslOP;
  1471. Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1472. Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1473. IRBuilder<> Builder(CI);
  1474. Value *div = Builder.CreateFDiv(src0, src1);
  1475. Value *negDiv = Builder.CreateFNeg(div);
  1476. Value *ge = Builder.CreateFCmpOGE(div, negDiv);
  1477. Value *absDiv =
  1478. TrivialDxilUnaryOperation(OP::OpCode::FAbs, div, hlslOP, Builder);
  1479. Value *frc =
  1480. TrivialDxilUnaryOperation(OP::OpCode::Frc, absDiv, hlslOP, Builder);
  1481. Value *negFrc = Builder.CreateFNeg(frc);
  1482. Value *realFrc = Builder.CreateSelect(ge, frc, negFrc);
  1483. return Builder.CreateFMul(realFrc, src1);
  1484. }
  1485. Value *TranslateFUIBinary(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1486. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1487. bool isFloat = CI->getType()->getScalarType()->isFloatingPointTy();
  1488. if (isFloat) {
  1489. switch (IOP) {
  1490. case IntrinsicOp::IOP_max:
  1491. opcode = OP::OpCode::FMax;
  1492. break;
  1493. case IntrinsicOp::IOP_min:
  1494. default:
  1495. DXASSERT(IOP == IntrinsicOp::IOP_min, "");
  1496. opcode = OP::OpCode::FMin;
  1497. break;
  1498. }
  1499. }
  1500. return TrivialBinaryOperation(CI, IOP, opcode, helper, pObjHelper, Translated);
  1501. }
  1502. Value *TranslateFUITrinary(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1503. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1504. bool isFloat = CI->getType()->getScalarType()->isFloatingPointTy();
  1505. if (isFloat) {
  1506. switch (IOP) {
  1507. case IntrinsicOp::IOP_mad:
  1508. default:
  1509. DXASSERT(IOP == IntrinsicOp::IOP_mad, "");
  1510. opcode = OP::OpCode::FMad;
  1511. break;
  1512. }
  1513. }
  1514. return TrivialTrinaryOperation(CI, IOP, opcode, helper, pObjHelper, Translated);
  1515. }
  1516. Value *TranslateFrexp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1517. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1518. hlsl::OP *hlslOP = &helper.hlslOP;
  1519. Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1520. Value *expPtr = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1521. IRBuilder<> Builder(CI);
  1522. Type *i32Ty = Type::getInt32Ty(CI->getContext());
  1523. Constant *exponentMaskConst = ConstantInt::get(i32Ty, 0x7f800000);
  1524. Constant *mantisaMaskConst = ConstantInt::get(i32Ty, 0x007fffff);
  1525. Constant *exponentShiftConst = ConstantInt::get(i32Ty, 23);
  1526. Constant *mantisaOrConst = ConstantInt::get(i32Ty, 0x3f000000);
  1527. Constant *exponentBiasConst = ConstantInt::get(i32Ty, -(int)0x3f000000);
  1528. Constant *zeroVal = hlslOP->GetFloatConst(0);
  1529. // int iVal = asint(val);
  1530. Type *dstTy = i32Ty;
  1531. Type *Ty = val->getType();
  1532. if (Ty->isVectorTy()) {
  1533. unsigned vecSize = Ty->getVectorNumElements();
  1534. dstTy = VectorType::get(i32Ty, vecSize);
  1535. exponentMaskConst = ConstantVector::getSplat(vecSize, exponentMaskConst);
  1536. mantisaMaskConst = ConstantVector::getSplat(vecSize, mantisaMaskConst);
  1537. exponentShiftConst = ConstantVector::getSplat(vecSize, exponentShiftConst);
  1538. mantisaOrConst = ConstantVector::getSplat(vecSize, mantisaOrConst);
  1539. exponentBiasConst = ConstantVector::getSplat(vecSize, exponentBiasConst);
  1540. zeroVal = ConstantVector::getSplat(vecSize, zeroVal);
  1541. }
  1542. // bool ne = val != 0;
  1543. Value *notZero = Builder.CreateFCmpUNE(val, zeroVal);
  1544. notZero = Builder.CreateZExt(notZero, dstTy);
  1545. Value *intVal = Builder.CreateBitCast(val, dstTy);
  1546. // temp = intVal & exponentMask;
  1547. Value *temp = Builder.CreateAnd(intVal, exponentMaskConst);
  1548. // temp = temp + exponentBias;
  1549. temp = Builder.CreateAdd(temp, exponentBiasConst);
  1550. // temp = temp & ne;
  1551. temp = Builder.CreateAnd(temp, notZero);
  1552. // temp = temp >> exponentShift;
  1553. temp = Builder.CreateAShr(temp, exponentShiftConst);
  1554. // exp = float(temp);
  1555. Value *exp = Builder.CreateSIToFP(temp, Ty);
  1556. Builder.CreateStore(exp, expPtr);
  1557. // temp = iVal & mantisaMask;
  1558. temp = Builder.CreateAnd(intVal, mantisaMaskConst);
  1559. // temp = temp | mantisaOr;
  1560. temp = Builder.CreateOr(temp, mantisaOrConst);
  1561. // mantisa = temp & ne;
  1562. Value *mantisa = Builder.CreateAnd(temp, notZero);
  1563. return Builder.CreateBitCast(mantisa, Ty);
  1564. }
  1565. Value *TranslateLdExp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1566. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1567. hlsl::OP *hlslOP = &helper.hlslOP;
  1568. Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1569. Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1570. IRBuilder<> Builder(CI);
  1571. Value *exp =
  1572. TrivialDxilUnaryOperation(OP::OpCode::Exp, src1, hlslOP, Builder);
  1573. return Builder.CreateFMul(exp, src0);
  1574. }
  1575. Value *TranslateFWidth(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1576. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1577. hlsl::OP *hlslOP = &helper.hlslOP;
  1578. Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1579. IRBuilder<> Builder(CI);
  1580. Value *ddx =
  1581. TrivialDxilUnaryOperation(OP::OpCode::DerivCoarseX, src, hlslOP, Builder);
  1582. Value *absDdx =
  1583. TrivialDxilUnaryOperation(OP::OpCode::FAbs, ddx, hlslOP, Builder);
  1584. Value *ddy =
  1585. TrivialDxilUnaryOperation(OP::OpCode::DerivCoarseY, src, hlslOP, Builder);
  1586. Value *absDdy =
  1587. TrivialDxilUnaryOperation(OP::OpCode::FAbs, ddy, hlslOP, Builder);
  1588. return Builder.CreateFAdd(absDdx, absDdy);
  1589. }
  1590. Value *TranslateNormalize(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1591. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1592. hlsl::OP *hlslOP = &helper.hlslOP;
  1593. Type *Ty = CI->getType();
  1594. Value *op = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1595. IRBuilder<> Builder(CI);
  1596. Value *length = TranslateLength(CI, op, hlslOP);
  1597. if (Ty != length->getType()) {
  1598. VectorType *VT = cast<VectorType>(Ty);
  1599. Value *vecLength = UndefValue::get(VT);
  1600. for (unsigned i = 0; i < VT->getNumElements(); i++)
  1601. vecLength = Builder.CreateInsertElement(vecLength, length, i);
  1602. length = vecLength;
  1603. }
  1604. return Builder.CreateFDiv(op, length);
  1605. }
  1606. Value *TranslateLerp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1607. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1608. // x + s(y-x)
  1609. Value *x = CI->getArgOperand(HLOperandIndex::kLerpOpXIdx);
  1610. Value *y = CI->getArgOperand(HLOperandIndex::kLerpOpYIdx);
  1611. IRBuilder<> Builder(CI);
  1612. Value *ySubx = Builder.CreateFSub(y, x);
  1613. Value *s = CI->getArgOperand(HLOperandIndex::kLerpOpSIdx);
  1614. Value *sMulSub = Builder.CreateFMul(s, ySubx);
  1615. return Builder.CreateFAdd(x, sMulSub);
  1616. }
  1617. Value *TrivialDotOperation(OP::OpCode opcode, Value *src0,
  1618. Value *src1, hlsl::OP *hlslOP,
  1619. IRBuilder<> &Builder) {
  1620. Type *Ty = src0->getType()->getScalarType();
  1621. Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty);
  1622. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  1623. SmallVector<Value *, 9> args;
  1624. args.emplace_back(opArg);
  1625. unsigned vecSize = src0->getType()->getVectorNumElements();
  1626. for (unsigned i = 0; i < vecSize; i++)
  1627. args.emplace_back(Builder.CreateExtractElement(src0, i));
  1628. for (unsigned i = 0; i < vecSize; i++)
  1629. args.emplace_back(Builder.CreateExtractElement(src1, i));
  1630. Value *dotOP = Builder.CreateCall(dxilFunc, args);
  1631. return dotOP;
  1632. }
  1633. Value *TranslateIDot(Value *arg0, Value *arg1, unsigned vecSize, hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  1634. Value *Elt0 = Builder.CreateExtractElement(arg0, (uint64_t)0);
  1635. Value *Elt1 = Builder.CreateExtractElement(arg1, (uint64_t)0);
  1636. Value *Result = Builder.CreateMul(Elt0, Elt1);
  1637. switch (vecSize) {
  1638. case 4:
  1639. Elt0 = Builder.CreateExtractElement(arg0, 3);
  1640. Elt1 = Builder.CreateExtractElement(arg1, 3);
  1641. Result = TrivialDxilTrinaryOperation(DXIL::OpCode::IMad, Elt0, Elt1, Result, hlslOP, Builder);
  1642. // Pass thru.
  1643. case 3:
  1644. Elt0 = Builder.CreateExtractElement(arg0, 2);
  1645. Elt1 = Builder.CreateExtractElement(arg1, 2);
  1646. Result = TrivialDxilTrinaryOperation(DXIL::OpCode::IMad, Elt0, Elt1, Result, hlslOP, Builder);
  1647. // Pass thru.
  1648. case 2:
  1649. Elt0 = Builder.CreateExtractElement(arg0, 1);
  1650. Elt1 = Builder.CreateExtractElement(arg1, 1);
  1651. Result = TrivialDxilTrinaryOperation(DXIL::OpCode::IMad, Elt0, Elt1, Result, hlslOP, Builder);
  1652. break;
  1653. default:
  1654. case 1:
  1655. DXASSERT(vecSize == 1, "invalid vector size.");
  1656. }
  1657. return Result;
  1658. }
  1659. Value *TranslateFDot(Value *arg0, Value *arg1, unsigned vecSize,
  1660. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  1661. switch (vecSize) {
  1662. case 2:
  1663. return TrivialDotOperation(OP::OpCode::Dot2, arg0, arg1, hlslOP, Builder);
  1664. break;
  1665. case 3:
  1666. return TrivialDotOperation(OP::OpCode::Dot3, arg0, arg1, hlslOP, Builder);
  1667. break;
  1668. case 4:
  1669. return TrivialDotOperation(OP::OpCode::Dot4, arg0, arg1, hlslOP, Builder);
  1670. break;
  1671. default:
  1672. DXASSERT(vecSize == 1, "wrong vector size");
  1673. {
  1674. Value *vecMul = Builder.CreateFMul(arg0, arg1);
  1675. return Builder.CreateExtractElement(vecMul, (uint64_t)0);
  1676. }
  1677. break;
  1678. }
  1679. }
  1680. Value *TranslateDot(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1681. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1682. hlsl::OP *hlslOP = &helper.hlslOP;
  1683. Value *arg0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1684. Type *Ty = arg0->getType();
  1685. unsigned vecSize = Ty->getVectorNumElements();
  1686. Value *arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1687. IRBuilder<> Builder(CI);
  1688. if (Ty->getScalarType()->isFloatingPointTy()) {
  1689. return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder);
  1690. } else {
  1691. return TranslateIDot(arg0, arg1, vecSize, hlslOP, Builder);
  1692. }
  1693. }
  1694. Value *TranslateReflect(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  1695. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1696. hlsl::OP *hlslOP = &helper.hlslOP;
  1697. // v = i - 2 * n * dot(i•n).
  1698. IRBuilder<> Builder(CI);
  1699. Value *i = CI->getArgOperand(HLOperandIndex::kReflectOpIIdx);
  1700. Value *n = CI->getArgOperand(HLOperandIndex::kReflectOpNIdx);
  1701. VectorType *VT = cast<VectorType>(i->getType());
  1702. unsigned vecSize = VT->getNumElements();
  1703. Value *dot = TranslateFDot(i, n, vecSize, hlslOP, Builder);
  1704. // 2 * dot (i, n).
  1705. dot = Builder.CreateFMul(hlslOP->GetFloatConst(2), dot);
  1706. // 2 * n * dot(i, n).
  1707. Value *vecDot = Builder.CreateVectorSplat(vecSize, dot);
  1708. Value *nMulDot = Builder.CreateFMul(vecDot, n);
  1709. // i - 2 * n * dot(i, n).
  1710. return Builder.CreateFSub(i, nMulDot);
  1711. }
  1712. Value *TranslateRefract(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  1713. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1714. hlsl::OP *hlslOP = &helper.hlslOP;
  1715. // d = dot(i•n);
  1716. // t = 1 - eta * eta * ( 1 - d*d);
  1717. // cond = t >= 1;
  1718. // r = eta * i - (eta * d + sqrt(t)) * n;
  1719. // return cond ? r : 0;
  1720. IRBuilder<> Builder(CI);
  1721. Value *i = CI->getArgOperand(HLOperandIndex::kRefractOpIIdx);
  1722. Value *n = CI->getArgOperand(HLOperandIndex::kRefractOpNIdx);
  1723. Value *eta = CI->getArgOperand(HLOperandIndex::kRefractOpEtaIdx);
  1724. VectorType *VT = cast<VectorType>(i->getType());
  1725. unsigned vecSize = VT->getNumElements();
  1726. Value *dot = TranslateFDot(i, n, vecSize, hlslOP, Builder);
  1727. // eta * eta;
  1728. Value *eta2 = Builder.CreateFMul(eta, eta);
  1729. // d*d;
  1730. Value *dot2 = Builder.CreateFMul(dot, dot);
  1731. Constant *one = ConstantFP::get(eta->getType(), 1);
  1732. Constant *zero = ConstantFP::get(eta->getType(), 0);
  1733. // 1- d*d;
  1734. dot2 = Builder.CreateFSub(one, dot2);
  1735. // eta * eta * (1-d*d);
  1736. eta2 = Builder.CreateFMul(dot2, eta2);
  1737. // t = 1 - eta * eta * ( 1 - d*d);
  1738. Value *t = Builder.CreateFSub(one, eta2);
  1739. // cond = t >= 0;
  1740. Value *cond = Builder.CreateFCmpOGE(t, zero);
  1741. // eta * i;
  1742. Value *vecEta = UndefValue::get(VT);
  1743. for (unsigned i = 0; i < vecSize; i++)
  1744. vecEta = Builder.CreateInsertElement(vecEta, eta, i);
  1745. Value *etaMulI = Builder.CreateFMul(i, vecEta);
  1746. // sqrt(t);
  1747. Value *sqrt = TrivialDxilUnaryOperation(OP::OpCode::Sqrt, t, hlslOP, Builder);
  1748. // eta * d;
  1749. Value *etaMulD = Builder.CreateFMul(eta, dot);
  1750. // eta * d + sqrt(t);
  1751. Value *etaSqrt = Builder.CreateFAdd(etaMulD, sqrt);
  1752. // (eta * d + sqrt(t)) * n;
  1753. Value *vecEtaSqrt = Builder.CreateVectorSplat(vecSize, etaSqrt);
  1754. Value *r = Builder.CreateFMul(vecEtaSqrt, n);
  1755. // r = eta * i - (eta * d + sqrt(t)) * n;
  1756. r = Builder.CreateFSub(etaMulI, r);
  1757. Value *refract =
  1758. Builder.CreateSelect(cond, r, ConstantVector::getSplat(vecSize, zero));
  1759. return refract;
  1760. }
  1761. Value *TranslateSmoothStep(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1762. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1763. hlsl::OP *hlslOP = &helper.hlslOP;
  1764. // s = saturate((x-min)/(max-min)).
  1765. IRBuilder<> Builder(CI);
  1766. Value *minVal = CI->getArgOperand(HLOperandIndex::kSmoothStepOpMinIdx);
  1767. Value *maxVal = CI->getArgOperand(HLOperandIndex::kSmoothStepOpMaxIdx);
  1768. Value *maxSubMin = Builder.CreateFSub(maxVal, minVal);
  1769. Value *x = CI->getArgOperand(HLOperandIndex::kSmoothStepOpXIdx);
  1770. Value *xSubMin = Builder.CreateFSub(x, minVal);
  1771. Value *satVal = Builder.CreateFDiv(xSubMin, maxSubMin);
  1772. Value *s = TrivialDxilUnaryOperation(DXIL::OpCode::Saturate, satVal, hlslOP,
  1773. Builder);
  1774. // return s * s *(3-2*s).
  1775. Constant *c2 = ConstantFP::get(CI->getType(),2);
  1776. Constant *c3 = ConstantFP::get(CI->getType(),3);
  1777. Value *sMul2 = Builder.CreateFMul(s, c2);
  1778. Value *result = Builder.CreateFSub(c3, sMul2);
  1779. result = Builder.CreateFMul(s, result);
  1780. result = Builder.CreateFMul(s, result);
  1781. return result;
  1782. }
  1783. Value *TranslateMSad4(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1784. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1785. hlsl::OP *hlslOP = &helper.hlslOP;
  1786. Value *ref = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  1787. Value *src = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  1788. Value *accum = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  1789. Type *Ty = CI->getType();
  1790. IRBuilder<> Builder(CI);
  1791. Value *vecRef = UndefValue::get(Ty);
  1792. for (unsigned i = 0; i < 4; i++)
  1793. vecRef = Builder.CreateInsertElement(vecRef, ref, i);
  1794. Value *srcX = Builder.CreateExtractElement(src, (uint64_t)0);
  1795. Value *srcY = Builder.CreateExtractElement(src, 1);
  1796. Value *byteSrc = UndefValue::get(Ty);
  1797. byteSrc = Builder.CreateInsertElement(byteSrc, srcX, (uint64_t)0);
  1798. // ushr r0.yzw, srcX, l(0, 8, 16, 24)
  1799. // bfi r1.yzw, l(0, 8, 16, 24), l(0, 24, 16, 8), srcX, r0.yyzw
  1800. Value *bfiOpArg =
  1801. hlslOP->GetU32Const(static_cast<unsigned>(DXIL::OpCode::Bfi));
  1802. Value *imm8 = hlslOP->GetU32Const(8);
  1803. Value *imm16 = hlslOP->GetU32Const(16);
  1804. Value *imm24 = hlslOP->GetU32Const(24);
  1805. Ty = ref->getType();
  1806. // Get x[31:8].
  1807. Value *srcXShift = Builder.CreateLShr(srcX, imm8);
  1808. // y[0~7] x[31:8].
  1809. Value *byteSrcElt = TrivialDxilOperation(
  1810. DXIL::OpCode::Bfi, {bfiOpArg, imm8, imm24, srcY, srcXShift}, Ty, Ty,
  1811. hlslOP, Builder);
  1812. byteSrc = Builder.CreateInsertElement(byteSrc, byteSrcElt, 1);
  1813. // Get x[31:16].
  1814. srcXShift = Builder.CreateLShr(srcXShift, imm8);
  1815. // y[0~15] x[31:16].
  1816. byteSrcElt = TrivialDxilOperation(DXIL::OpCode::Bfi,
  1817. {bfiOpArg, imm16, imm16, srcY, srcXShift},
  1818. Ty, Ty, hlslOP, Builder);
  1819. byteSrc = Builder.CreateInsertElement(byteSrc, byteSrcElt, 2);
  1820. // Get x[31:24].
  1821. srcXShift = Builder.CreateLShr(srcXShift, imm8);
  1822. // y[0~23] x[31:24].
  1823. byteSrcElt = TrivialDxilOperation(DXIL::OpCode::Bfi,
  1824. {bfiOpArg, imm24, imm8, srcY, srcXShift},
  1825. Ty, Ty, hlslOP, Builder);
  1826. byteSrc = Builder.CreateInsertElement(byteSrc, byteSrcElt, 3);
  1827. // Msad on vecref and byteSrc.
  1828. return TrivialDxilTrinaryOperation(DXIL::OpCode::Msad, vecRef, byteSrc, accum,
  1829. hlslOP, Builder);
  1830. }
  1831. Value *TranslateRCP(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1832. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1833. Type *Ty = CI->getType();
  1834. Value *op = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1835. IRBuilder<> Builder(CI);
  1836. Constant *one = ConstantFP::get(Ty->getScalarType(), 1.0);
  1837. if (Ty != Ty->getScalarType()) {
  1838. one = ConstantVector::getSplat(Ty->getVectorNumElements(), one);
  1839. }
  1840. return Builder.CreateFDiv(one, op);
  1841. }
  1842. Value *TranslateSign(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1843. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1844. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1845. Type *Ty = val->getType();
  1846. Type *EltTy = Ty->getScalarType();
  1847. IRBuilder<> Builder(CI);
  1848. if (EltTy->isIntegerTy()) {
  1849. Constant *zero = ConstantInt::get(Ty->getScalarType(), 0);
  1850. if (Ty != EltTy) {
  1851. zero = ConstantVector::getSplat(Ty->getVectorNumElements(), zero);
  1852. }
  1853. Value *zeroLtVal = Builder.CreateICmpSLT(zero, val);
  1854. zeroLtVal = Builder.CreateZExt(zeroLtVal, CI->getType());
  1855. Value *valLtZero = Builder.CreateICmpSLT(val, zero);
  1856. valLtZero = Builder.CreateZExt(valLtZero, CI->getType());
  1857. return Builder.CreateSub(zeroLtVal, valLtZero);
  1858. } else {
  1859. Constant *zero = ConstantFP::get(Ty->getScalarType(), 0.0);
  1860. if (Ty != EltTy) {
  1861. zero = ConstantVector::getSplat(Ty->getVectorNumElements(), zero);
  1862. }
  1863. Value *zeroLtVal = Builder.CreateFCmpOLT(zero, val);
  1864. zeroLtVal = Builder.CreateZExt(zeroLtVal, CI->getType());
  1865. Value *valLtZero = Builder.CreateFCmpOLT(val, zero);
  1866. valLtZero = Builder.CreateZExt(valLtZero, CI->getType());
  1867. return Builder.CreateSub(zeroLtVal, valLtZero);
  1868. }
  1869. }
  1870. Value *TranslateStep(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1871. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1872. Value *edge = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1873. Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1874. Type *Ty = CI->getType();
  1875. IRBuilder<> Builder(CI);
  1876. Constant *one = ConstantFP::get(Ty->getScalarType(), 1.0);
  1877. Constant *zero = ConstantFP::get(Ty->getScalarType(), 0);
  1878. Value *cond = Builder.CreateFCmpOLT(x, edge);
  1879. if (Ty != Ty->getScalarType()) {
  1880. one = ConstantVector::getSplat(Ty->getVectorNumElements(), one);
  1881. zero = ConstantVector::getSplat(Ty->getVectorNumElements(), zero);
  1882. }
  1883. return Builder.CreateSelect(cond, zero, one);
  1884. }
  1885. Value *TranslatePow(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1886. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1887. hlsl::OP *hlslOP = &helper.hlslOP;
  1888. Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1889. Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1890. IRBuilder<> Builder(CI);
  1891. // t = log(x);
  1892. Value *logX =
  1893. TrivialDxilUnaryOperation(DXIL::OpCode::Log, x, hlslOP, Builder);
  1894. // t = y * t;
  1895. Value *mulY = Builder.CreateFMul(logX, y);
  1896. // pow = exp(t);
  1897. return TrivialDxilUnaryOperation(DXIL::OpCode::Exp, mulY, hlslOP, Builder);
  1898. }
  1899. Value *TranslateFaceforward(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  1900. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1901. hlsl::OP *hlslOP = &helper.hlslOP;
  1902. Type *Ty = CI->getType();
  1903. Value *n = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  1904. Value *i = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  1905. Value *ng = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  1906. IRBuilder<> Builder(CI);
  1907. unsigned vecSize = Ty->getVectorNumElements();
  1908. // -n x sign(dot(i, ng)).
  1909. Value *dotOp = TranslateFDot(i, ng, vecSize, hlslOP, Builder);
  1910. Constant *zero = ConstantFP::get(Ty->getScalarType(), 0);
  1911. Value *dotLtZero = Builder.CreateFCmpOLT(dotOp, zero);
  1912. Value *negN = Builder.CreateFNeg(n);
  1913. Value *faceforward = Builder.CreateSelect(dotLtZero, n, negN);
  1914. return faceforward;
  1915. }
  1916. }
  1917. // MOP intrinsics
  1918. namespace {
  1919. Value *TranslateGetSamplePosition(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  1920. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1921. hlsl::OP *hlslOP = &helper.hlslOP;
  1922. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  1923. IRBuilder<> Builder(CI);
  1924. Value *sampleIdx =
  1925. CI->getArgOperand(HLOperandIndex::kGetSamplePositionSampleIdxOpIndex);
  1926. OP::OpCode opcode = OP::OpCode::Texture2DMSGetSamplePosition;
  1927. llvm::Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  1928. Function *dxilFunc =
  1929. hlslOP->GetOpFunc(opcode, Type::getVoidTy(CI->getContext()));
  1930. Value *args[] = {opArg, handle, sampleIdx};
  1931. Value *samplePos = Builder.CreateCall(dxilFunc, args);
  1932. Value *result = UndefValue::get(CI->getType());
  1933. Value *samplePosX = Builder.CreateExtractValue(samplePos, 0);
  1934. Value *samplePosY = Builder.CreateExtractValue(samplePos, 1);
  1935. result = Builder.CreateInsertElement(result, samplePosX, (uint64_t)0);
  1936. result = Builder.CreateInsertElement(result, samplePosY, 1);
  1937. return result;
  1938. }
  1939. Value *TranslateGetDimensions(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  1940. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1941. hlsl::OP *hlslOP = &helper.hlslOP;
  1942. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  1943. DxilResource::Kind RK = pObjHelper->GetRK(handle);
  1944. IRBuilder<> Builder(CI);
  1945. OP::OpCode opcode = OP::OpCode::GetDimensions;
  1946. llvm::Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  1947. Function *dxilFunc =
  1948. hlslOP->GetOpFunc(opcode, Type::getVoidTy(CI->getContext()));
  1949. Type *i32Ty = Type::getInt32Ty(CI->getContext());
  1950. Value *mipLevel = UndefValue::get(i32Ty);
  1951. unsigned widthOpIdx = HLOperandIndex::kGetDimensionsMipWidthOpIndex;
  1952. switch (RK) {
  1953. case DxilResource::Kind::Texture1D:
  1954. case DxilResource::Kind::Texture1DArray:
  1955. case DxilResource::Kind::Texture2D:
  1956. case DxilResource::Kind::Texture2DArray:
  1957. case DxilResource::Kind::TextureCube:
  1958. case DxilResource::Kind::TextureCubeArray:
  1959. case DxilResource::Kind::Texture3D: {
  1960. Value *opMipLevel =
  1961. CI->getArgOperand(HLOperandIndex::kGetDimensionsMipLevelOpIndex);
  1962. // mipLevel is in parameter, should not be pointer.
  1963. if (!opMipLevel->getType()->isPointerTy())
  1964. mipLevel = opMipLevel;
  1965. else {
  1966. // No mip level.
  1967. widthOpIdx = HLOperandIndex::kGetDimensionsNoMipWidthOpIndex;
  1968. mipLevel = ConstantInt::get(i32Ty, 0);
  1969. }
  1970. } break;
  1971. default:
  1972. widthOpIdx = HLOperandIndex::kGetDimensionsNoMipWidthOpIndex;
  1973. break;
  1974. }
  1975. Value *args[] = {opArg, handle, mipLevel};
  1976. Value *dims = Builder.CreateCall(dxilFunc, args);
  1977. unsigned dimensionIdx = 0;
  1978. Value *width = Builder.CreateExtractValue(dims, dimensionIdx++);
  1979. Value *widthPtr = CI->getArgOperand(widthOpIdx);
  1980. if (widthPtr->getType()->getPointerElementType()->isFloatingPointTy())
  1981. width = Builder.CreateSIToFP(width,
  1982. widthPtr->getType()->getPointerElementType());
  1983. Builder.CreateStore(width, widthPtr);
  1984. if (RK == DxilResource::Kind::StructuredBuffer) {
  1985. // Set stride.
  1986. Value *stridePtr = CI->getArgOperand(widthOpIdx + 1);
  1987. const DataLayout &DL = helper.dataLayout;
  1988. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  1989. Type *bufTy = pObjHelper->GetResourceType(handle);
  1990. Type *bufRetTy = bufTy->getStructElementType(0);
  1991. unsigned stride = DL.getTypeAllocSize(bufRetTy);
  1992. Builder.CreateStore(hlslOP->GetU32Const(stride), stridePtr);
  1993. } else {
  1994. if (widthOpIdx == HLOperandIndex::kGetDimensionsMipWidthOpIndex ||
  1995. // Samples is in w channel too.
  1996. RK == DXIL::ResourceKind::Texture2DMS) {
  1997. // Has mip.
  1998. for (unsigned argIdx = widthOpIdx + 1;
  1999. argIdx < CI->getNumArgOperands() - 1; argIdx++) {
  2000. Value *dim = Builder.CreateExtractValue(dims, dimensionIdx++);
  2001. Value *ptr = CI->getArgOperand(argIdx);
  2002. if (ptr->getType()->getPointerElementType()->isFloatingPointTy())
  2003. dim = Builder.CreateSIToFP(dim,
  2004. ptr->getType()->getPointerElementType());
  2005. Builder.CreateStore(dim, ptr);
  2006. }
  2007. // NumOfLevel is in w channel.
  2008. dimensionIdx = 3;
  2009. Value *dim = Builder.CreateExtractValue(dims, dimensionIdx);
  2010. Value *ptr = CI->getArgOperand(CI->getNumArgOperands() - 1);
  2011. if (ptr->getType()->getPointerElementType()->isFloatingPointTy())
  2012. dim =
  2013. Builder.CreateSIToFP(dim, ptr->getType()->getPointerElementType());
  2014. Builder.CreateStore(dim, ptr);
  2015. } else {
  2016. for (unsigned argIdx = widthOpIdx + 1; argIdx < CI->getNumArgOperands();
  2017. argIdx++) {
  2018. Value *dim = Builder.CreateExtractValue(dims, dimensionIdx++);
  2019. Value *ptr = CI->getArgOperand(argIdx);
  2020. if (ptr->getType()->getPointerElementType()->isFloatingPointTy())
  2021. dim = Builder.CreateSIToFP(dim,
  2022. ptr->getType()->getPointerElementType());
  2023. Builder.CreateStore(dim, ptr);
  2024. }
  2025. }
  2026. }
  2027. return nullptr;
  2028. }
  2029. Value *GenerateUpdateCounter(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2030. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2031. hlsl::OP *hlslOP = &helper.hlslOP;
  2032. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  2033. pObjHelper->MarkHasCounter(handle->getType(), handle);
  2034. bool bInc = IOP == IntrinsicOp::MOP_IncrementCounter;
  2035. IRBuilder<> Builder(CI);
  2036. OP::OpCode OpCode = OP::OpCode::BufferUpdateCounter;
  2037. Value *OpCodeArg = hlslOP->GetU32Const((unsigned)OpCode);
  2038. Value *IncVal = hlslOP->GetI8Const(bInc ? 1 : -1);
  2039. // Create BufferUpdateCounter call.
  2040. Value *Args[] = {OpCodeArg, handle, IncVal};
  2041. Function *F =
  2042. hlslOP->GetOpFunc(OpCode, Type::getVoidTy(handle->getContext()));
  2043. return Builder.CreateCall(F, Args);
  2044. }
  2045. Value *ScalarizeResRet(Type *RetTy, Value *ResRet, IRBuilder<> &Builder) {
  2046. // Extract value part.
  2047. Value *retVal = llvm::UndefValue::get(RetTy);
  2048. if (RetTy->isVectorTy()) {
  2049. for (unsigned i = 0; i < RetTy->getVectorNumElements(); i++) {
  2050. Value *retComp = Builder.CreateExtractValue(ResRet, i);
  2051. retVal = Builder.CreateInsertElement(retVal, retComp, i);
  2052. }
  2053. } else {
  2054. retVal = Builder.CreateExtractValue(ResRet, 0);
  2055. }
  2056. return retVal;
  2057. }
  2058. Value *ScalarizeElements(Type *RetTy, ArrayRef<Value*> Elts, IRBuilder<> &Builder) {
  2059. // Extract value part.
  2060. Value *retVal = llvm::UndefValue::get(RetTy);
  2061. if (RetTy->isVectorTy()) {
  2062. unsigned vecSize = RetTy->getVectorNumElements();
  2063. DXASSERT(vecSize <= Elts.size(), "vector size mismatch");
  2064. for (unsigned i = 0; i < vecSize; i++) {
  2065. Value *retComp = Elts[i];
  2066. retVal = Builder.CreateInsertElement(retVal, retComp, i);
  2067. }
  2068. } else {
  2069. retVal = Elts[0];
  2070. }
  2071. return retVal;
  2072. }
  2073. void UpdateStatus(Value *ResRet, Value *status, IRBuilder<> &Builder,
  2074. hlsl::OP *hlslOp) {
  2075. if (status && !isa<UndefValue>(status)) {
  2076. Value *statusVal = Builder.CreateExtractValue(ResRet, DXIL::kResRetStatusIndex);
  2077. Value *checkAccessOp = hlslOp->GetI32Const(
  2078. static_cast<unsigned>(DXIL::OpCode::CheckAccessFullyMapped));
  2079. Function *checkAccessFn = hlslOp->GetOpFunc(
  2080. DXIL::OpCode::CheckAccessFullyMapped, statusVal->getType());
  2081. // CheckAccess on status.
  2082. Value *bStatus =
  2083. Builder.CreateCall(checkAccessFn, {checkAccessOp, statusVal});
  2084. Value *extStatus =
  2085. Builder.CreateZExt(bStatus, Type::getInt32Ty(status->getContext()));
  2086. Builder.CreateStore(extStatus, status);
  2087. }
  2088. }
  2089. Value *SplatToVector(Value *Elt, Type *DstTy, IRBuilder<> &Builder) {
  2090. Value *Result = UndefValue::get(DstTy);
  2091. for (unsigned i = 0; i < DstTy->getVectorNumElements(); i++)
  2092. Result = Builder.CreateInsertElement(Result, Elt, i);
  2093. return Result;
  2094. }
  2095. // Sample intrinsics.
  2096. struct SampleHelper {
  2097. SampleHelper(CallInst *CI, OP::OpCode op, HLObjectOperationLowerHelper *pObjHelper);
  2098. OP::OpCode opcode;
  2099. Value *texHandle;
  2100. Value *samplerHandle;
  2101. static const unsigned kMaxCoordDimensions = 4;
  2102. Value *coord[kMaxCoordDimensions];
  2103. Value *special; // For CompareValue, Bias, LOD.
  2104. // SampleGrad only.
  2105. static const unsigned kMaxDDXYDimensions = 3;
  2106. Value *ddx[kMaxDDXYDimensions];
  2107. Value *ddy[kMaxDDXYDimensions];
  2108. // Optional.
  2109. static const unsigned kMaxOffsetDimensions = 3;
  2110. Value *offset[kMaxOffsetDimensions];
  2111. Value *clamp;
  2112. Value *status;
  2113. void TranslateCoord(CallInst *CI, unsigned coordIdx,
  2114. unsigned coordDimensions) {
  2115. Value *coordArg = CI->getArgOperand(coordIdx);
  2116. IRBuilder<> Builder(CI);
  2117. for (unsigned i = 0; i < coordDimensions; i++)
  2118. coord[i] = Builder.CreateExtractElement(coordArg, i);
  2119. Value *undefF = UndefValue::get(Type::getFloatTy(CI->getContext()));
  2120. for (unsigned i = coordDimensions; i < kMaxCoordDimensions; i++)
  2121. coord[i] = undefF;
  2122. }
  2123. void TranslateOffset(CallInst *CI, unsigned offsetIdx,
  2124. unsigned offsetDimensions) {
  2125. Value *undefI = UndefValue::get(Type::getInt32Ty(CI->getContext()));
  2126. if (CI->getNumArgOperands() > offsetIdx) {
  2127. Value *offsetArg = CI->getArgOperand(offsetIdx);
  2128. IRBuilder<> Builder(CI);
  2129. for (unsigned i = 0; i < offsetDimensions; i++)
  2130. offset[i] = Builder.CreateExtractElement(offsetArg, i);
  2131. for (unsigned i = offsetDimensions; i < kMaxOffsetDimensions; i++)
  2132. offset[i] = undefI;
  2133. } else {
  2134. for (unsigned i = 0; i < kMaxOffsetDimensions; i++)
  2135. offset[i] = undefI;
  2136. }
  2137. }
  2138. void SetClamp(CallInst *CI, unsigned clampIdx) {
  2139. if (CI->getNumArgOperands() > clampIdx) {
  2140. clamp = CI->getArgOperand(clampIdx);
  2141. if (clamp->getType()->isVectorTy()) {
  2142. IRBuilder<> Builder(CI);
  2143. clamp = Builder.CreateExtractElement(clamp, (uint64_t)0);
  2144. }
  2145. } else
  2146. clamp = UndefValue::get(Type::getFloatTy(CI->getContext()));
  2147. }
  2148. void SetStatus(CallInst *CI, unsigned statusIdx) {
  2149. if (CI->getNumArgOperands() == (statusIdx + 1))
  2150. status = CI->getArgOperand(statusIdx);
  2151. else
  2152. status = nullptr;
  2153. }
  2154. void SetDDXY(CallInst *CI, MutableArrayRef<Value *> ddxy, Value *ddxyArg,
  2155. unsigned ddxySize) {
  2156. IRBuilder<> Builder(CI);
  2157. for (unsigned i = 0; i < ddxySize; i++)
  2158. ddxy[i] = Builder.CreateExtractElement(ddxyArg, i);
  2159. Value *undefF = UndefValue::get(Type::getFloatTy(CI->getContext()));
  2160. for (unsigned i = ddxySize; i < kMaxDDXYDimensions; i++)
  2161. ddxy[i] = undefF;
  2162. }
  2163. };
  2164. SampleHelper::SampleHelper(
  2165. CallInst *CI, OP::OpCode op, HLObjectOperationLowerHelper *pObjHelper)
  2166. : opcode(op) {
  2167. const unsigned thisIdx =
  2168. HLOperandIndex::kHandleOpIdx; // opcode takes arg0, this pointer is arg1.
  2169. const unsigned kSamplerArgIndex = HLOperandIndex::kSampleSamplerArgIndex;
  2170. IRBuilder<> Builder(CI);
  2171. texHandle = CI->getArgOperand(thisIdx);
  2172. samplerHandle = CI->getArgOperand(kSamplerArgIndex);
  2173. DXIL::ResourceKind RK = pObjHelper->GetRK(texHandle);
  2174. if (RK == DXIL::ResourceKind::Invalid) {
  2175. opcode = DXIL::OpCode::NumOpCodes;
  2176. return;
  2177. }
  2178. unsigned coordDimensions = DxilResource::GetNumCoords(RK);
  2179. unsigned offsetDimensions = DxilResource::GetNumOffsets(RK);
  2180. const unsigned kCoordArgIdx = HLOperandIndex::kSampleCoordArgIndex;
  2181. TranslateCoord(CI, kCoordArgIdx, coordDimensions);
  2182. special = nullptr;
  2183. switch (op) {
  2184. case OP::OpCode::Sample:
  2185. TranslateOffset(CI, HLOperandIndex::kSampleOffsetArgIndex,
  2186. offsetDimensions);
  2187. SetClamp(CI, HLOperandIndex::kSampleClampArgIndex);
  2188. SetStatus(CI, HLOperandIndex::kSampleStatusArgIndex);
  2189. break;
  2190. case OP::OpCode::SampleLevel:
  2191. special = CI->getArgOperand(HLOperandIndex::kSampleLLevelArgIndex);
  2192. TranslateOffset(CI, HLOperandIndex::kSampleLOffsetArgIndex,
  2193. offsetDimensions);
  2194. SetStatus(CI, HLOperandIndex::kSampleLStatusArgIndex);
  2195. break;
  2196. case OP::OpCode::SampleBias:
  2197. special = CI->getArgOperand(HLOperandIndex::kSampleBBiasArgIndex);
  2198. TranslateOffset(CI, HLOperandIndex::kSampleBOffsetArgIndex,
  2199. offsetDimensions);
  2200. SetClamp(CI, HLOperandIndex::kSampleBClampArgIndex);
  2201. SetStatus(CI, HLOperandIndex::kSampleBStatusArgIndex);
  2202. break;
  2203. case OP::OpCode::SampleCmp:
  2204. special = CI->getArgOperand(HLOperandIndex::kSampleCmpCmpValArgIndex);
  2205. TranslateOffset(CI, HLOperandIndex::kSampleCmpOffsetArgIndex,
  2206. offsetDimensions);
  2207. SetClamp(CI, HLOperandIndex::kSampleCmpClampArgIndex);
  2208. SetStatus(CI, HLOperandIndex::kSampleCmpStatusArgIndex);
  2209. break;
  2210. case OP::OpCode::SampleCmpLevelZero:
  2211. special = CI->getArgOperand(HLOperandIndex::kSampleCmpLZCmpValArgIndex);
  2212. TranslateOffset(CI, HLOperandIndex::kSampleCmpLZOffsetArgIndex,
  2213. offsetDimensions);
  2214. SetStatus(CI, HLOperandIndex::kSampleCmpLZStatusArgIndex);
  2215. break;
  2216. case OP::OpCode::SampleGrad:
  2217. SetDDXY(CI, ddx, CI->getArgOperand(HLOperandIndex::kSampleGDDXArgIndex),
  2218. offsetDimensions);
  2219. SetDDXY(CI, ddy, CI->getArgOperand(HLOperandIndex::kSampleGDDYArgIndex),
  2220. offsetDimensions);
  2221. TranslateOffset(CI, HLOperandIndex::kSampleGOffsetArgIndex,
  2222. offsetDimensions);
  2223. SetClamp(CI, HLOperandIndex::kSampleGClampArgIndex);
  2224. SetStatus(CI, HLOperandIndex::kSampleGStatusArgIndex);
  2225. break;
  2226. case OP::OpCode::CalculateLOD:
  2227. // Only need coord for LOD calculation.
  2228. break;
  2229. default:
  2230. DXASSERT(0, "invalid opcode for Sample");
  2231. break;
  2232. }
  2233. }
  2234. Value *TranslateCalculateLOD(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2235. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2236. hlsl::OP *hlslOP = &helper.hlslOP;
  2237. SampleHelper sampleHelper(CI, OP::OpCode::CalculateLOD, pObjHelper);
  2238. if (sampleHelper.opcode == DXIL::OpCode::NumOpCodes) {
  2239. Translated = false;
  2240. return nullptr;
  2241. }
  2242. bool bClamped = IOP == IntrinsicOp::MOP_CalculateLevelOfDetail;
  2243. IRBuilder<> Builder(CI);
  2244. Value *opArg =
  2245. hlslOP->GetU32Const(static_cast<unsigned>(OP::OpCode::CalculateLOD));
  2246. Value *clamped = hlslOP->GetI1Const(bClamped);
  2247. Value *args[] = {opArg,
  2248. sampleHelper.texHandle,
  2249. sampleHelper.samplerHandle,
  2250. sampleHelper.coord[0],
  2251. sampleHelper.coord[1],
  2252. sampleHelper.coord[2],
  2253. clamped};
  2254. Function *dxilFunc = hlslOP->GetOpFunc(OP::OpCode::CalculateLOD,
  2255. Type::getFloatTy(opArg->getContext()));
  2256. Value *LOD = Builder.CreateCall(dxilFunc, args);
  2257. return LOD;
  2258. }
  2259. Value *TranslateCheckAccess(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2260. HLOperationLowerHelper &helper,
  2261. HLObjectOperationLowerHelper *pObjHelper,
  2262. bool &Translated) {
  2263. // Translate CheckAccess into uint->bool, later optimization should remove it.
  2264. // Real checkaccess is generated in UpdateStatus.
  2265. IRBuilder<> Builder(CI);
  2266. Value *V = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  2267. return Builder.CreateTrunc(V, helper.i1Ty);
  2268. }
  2269. void GenerateDxilSample(CallInst *CI, Function *F, ArrayRef<Value *> sampleArgs,
  2270. Value *status, hlsl::OP *hlslOp) {
  2271. IRBuilder<> Builder(CI);
  2272. CallInst *call = Builder.CreateCall(F, sampleArgs);
  2273. // extract value part
  2274. Value *retVal = ScalarizeResRet(CI->getType(), call, Builder);
  2275. // Replace ret val.
  2276. CI->replaceAllUsesWith(retVal);
  2277. // get status
  2278. if (status) {
  2279. UpdateStatus(call, status, Builder, hlslOp);
  2280. }
  2281. }
  2282. Value *TranslateSample(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2283. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2284. hlsl::OP *hlslOP = &helper.hlslOP;
  2285. SampleHelper sampleHelper(CI, opcode, pObjHelper);
  2286. if (sampleHelper.opcode == DXIL::OpCode::NumOpCodes) {
  2287. Translated = false;
  2288. return nullptr;
  2289. }
  2290. Type *Ty = CI->getType();
  2291. Function *F = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
  2292. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  2293. switch (opcode) {
  2294. case OP::OpCode::Sample: {
  2295. Value *sampleArgs[] = {
  2296. opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
  2297. // Coord.
  2298. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2299. sampleHelper.coord[3],
  2300. // Offset.
  2301. sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
  2302. // Clamp.
  2303. sampleHelper.clamp};
  2304. GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
  2305. } break;
  2306. case OP::OpCode::SampleLevel: {
  2307. Value *sampleArgs[] = {
  2308. opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
  2309. // Coord.
  2310. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2311. sampleHelper.coord[3],
  2312. // Offset.
  2313. sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
  2314. // LOD.
  2315. sampleHelper.special};
  2316. GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
  2317. } break;
  2318. case OP::OpCode::SampleGrad: {
  2319. Value *sampleArgs[] = {
  2320. opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
  2321. // Coord.
  2322. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2323. sampleHelper.coord[3],
  2324. // Offset.
  2325. sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
  2326. // Ddx.
  2327. sampleHelper.ddx[0], sampleHelper.ddx[1], sampleHelper.ddx[2],
  2328. // Ddy.
  2329. sampleHelper.ddy[0], sampleHelper.ddy[1], sampleHelper.ddy[2],
  2330. // Clamp.
  2331. sampleHelper.clamp};
  2332. GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
  2333. } break;
  2334. case OP::OpCode::SampleBias: {
  2335. // Clamp bias for immediate.
  2336. Value *bias = sampleHelper.special;
  2337. if (ConstantFP *FP = dyn_cast<ConstantFP>(bias)) {
  2338. float v = FP->getValueAPF().convertToFloat();
  2339. if (v > DXIL::kMaxMipLodBias)
  2340. bias = ConstantFP::get(FP->getType(), DXIL::kMaxMipLodBias);
  2341. if (v < DXIL::kMinMipLodBias)
  2342. bias = ConstantFP::get(FP->getType(), DXIL::kMinMipLodBias);
  2343. }
  2344. Value *sampleArgs[] = {
  2345. opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
  2346. // Coord.
  2347. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2348. sampleHelper.coord[3],
  2349. // Offset.
  2350. sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
  2351. // Bias.
  2352. bias,
  2353. // Clamp.
  2354. sampleHelper.clamp};
  2355. GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
  2356. } break;
  2357. case OP::OpCode::SampleCmp: {
  2358. Value *sampleArgs[] = {
  2359. opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
  2360. // Coord.
  2361. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2362. sampleHelper.coord[3],
  2363. // Offset.
  2364. sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
  2365. // CmpVal.
  2366. sampleHelper.special,
  2367. // Clamp.
  2368. sampleHelper.clamp};
  2369. GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
  2370. } break;
  2371. case OP::OpCode::SampleCmpLevelZero:
  2372. default: {
  2373. DXASSERT(opcode == OP::OpCode::SampleCmpLevelZero, "invalid sample opcode");
  2374. Value *sampleArgs[] = {
  2375. opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
  2376. // Coord.
  2377. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2378. sampleHelper.coord[3],
  2379. // Offset.
  2380. sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
  2381. // CmpVal.
  2382. sampleHelper.special};
  2383. GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
  2384. } break;
  2385. }
  2386. // CI is replaced in GenerateDxilSample.
  2387. return nullptr;
  2388. }
  2389. // Gather intrinsics.
  2390. struct GatherHelper {
  2391. enum class GatherChannel {
  2392. GatherAll,
  2393. GatherRed,
  2394. GatherGreen,
  2395. GatherBlue,
  2396. GatherAlpha,
  2397. };
  2398. GatherHelper(CallInst *CI, OP::OpCode op, HLObjectOperationLowerHelper *pObjHelper,
  2399. GatherHelper::GatherChannel ch);
  2400. OP::OpCode opcode;
  2401. Value *texHandle;
  2402. Value *samplerHandle;
  2403. static const unsigned kMaxCoordDimensions = 4;
  2404. Value *coord[kMaxCoordDimensions];
  2405. unsigned channel;
  2406. Value *special; // For CompareValue, Bias, LOD.
  2407. // Optional.
  2408. static const unsigned kMaxOffsetDimensions = 2;
  2409. Value *offset[kMaxOffsetDimensions];
  2410. // For the overload send different offset for each sample.
  2411. // Only save 3 sampleOffsets because use offset for normal overload as first
  2412. // sample offset.
  2413. static const unsigned kSampleOffsetDimensions = 3;
  2414. Value *sampleOffsets[kSampleOffsetDimensions][kMaxOffsetDimensions];
  2415. Value *status;
  2416. bool hasSampleOffsets;
  2417. void TranslateCoord(CallInst *CI, unsigned coordIdx,
  2418. unsigned coordDimensions) {
  2419. Value *coordArg = CI->getArgOperand(coordIdx);
  2420. IRBuilder<> Builder(CI);
  2421. for (unsigned i = 0; i < coordDimensions; i++)
  2422. coord[i] = Builder.CreateExtractElement(coordArg, i);
  2423. Value *undefF = UndefValue::get(Type::getFloatTy(CI->getContext()));
  2424. for (unsigned i = coordDimensions; i < kMaxCoordDimensions; i++)
  2425. coord[i] = undefF;
  2426. }
  2427. void SetStatus(CallInst *CI, unsigned statusIdx) {
  2428. if (CI->getNumArgOperands() == (statusIdx + 1))
  2429. status = CI->getArgOperand(statusIdx);
  2430. else
  2431. status = nullptr;
  2432. }
  2433. void TranslateOffset(CallInst *CI, unsigned offsetIdx,
  2434. unsigned offsetDimensions) {
  2435. Value *undefI = UndefValue::get(Type::getInt32Ty(CI->getContext()));
  2436. if (CI->getNumArgOperands() > offsetIdx) {
  2437. Value *offsetArg = CI->getArgOperand(offsetIdx);
  2438. IRBuilder<> Builder(CI);
  2439. for (unsigned i = 0; i < offsetDimensions; i++)
  2440. offset[i] = Builder.CreateExtractElement(offsetArg, i);
  2441. for (unsigned i = offsetDimensions; i < kMaxOffsetDimensions; i++)
  2442. offset[i] = undefI;
  2443. } else {
  2444. for (unsigned i = 0; i < kMaxOffsetDimensions; i++)
  2445. offset[i] = undefI;
  2446. }
  2447. }
  2448. void TranslateSampleOffset(CallInst *CI, unsigned offsetIdx,
  2449. unsigned offsetDimensions) {
  2450. Value *undefI = UndefValue::get(Type::getInt32Ty(CI->getContext()));
  2451. if (CI->getNumArgOperands() >= (offsetIdx + kSampleOffsetDimensions)) {
  2452. hasSampleOffsets = true;
  2453. IRBuilder<> Builder(CI);
  2454. for (unsigned ch = 0; ch < kSampleOffsetDimensions; ch++) {
  2455. Value *offsetArg = CI->getArgOperand(offsetIdx + ch);
  2456. for (unsigned i = 0; i < offsetDimensions; i++)
  2457. sampleOffsets[ch][i] = Builder.CreateExtractElement(offsetArg, i);
  2458. for (unsigned i = offsetDimensions; i < kMaxOffsetDimensions; i++)
  2459. sampleOffsets[ch][i] = undefI;
  2460. }
  2461. }
  2462. }
  2463. // Update the offset args for gather with sample offset at sampleIdx.
  2464. void UpdateOffsetInGatherArgs(MutableArrayRef<Value *> gatherArgs,
  2465. unsigned sampleIdx) {
  2466. unsigned offsetBase = DXIL::OperandIndex::kTextureGatherOffset0OpIdx;
  2467. for (unsigned i = 0; i < kMaxOffsetDimensions; i++)
  2468. // -1 because offset for sample 0 is in GatherHelper::offset.
  2469. gatherArgs[offsetBase + i] = sampleOffsets[sampleIdx - 1][i];
  2470. }
  2471. };
  2472. GatherHelper::GatherHelper(
  2473. CallInst *CI, OP::OpCode op, HLObjectOperationLowerHelper *pObjHelper,
  2474. GatherHelper::GatherChannel ch)
  2475. : opcode(op), special(nullptr), hasSampleOffsets(false) {
  2476. const unsigned thisIdx =
  2477. HLOperandIndex::kHandleOpIdx; // opcode takes arg0, this pointer is arg1.
  2478. const unsigned kSamplerArgIndex = HLOperandIndex::kSampleSamplerArgIndex;
  2479. switch (ch) {
  2480. case GatherChannel::GatherAll:
  2481. channel = 0;
  2482. break;
  2483. case GatherChannel::GatherRed:
  2484. channel = 0;
  2485. break;
  2486. case GatherChannel::GatherGreen:
  2487. channel = 1;
  2488. break;
  2489. case GatherChannel::GatherBlue:
  2490. channel = 2;
  2491. break;
  2492. case GatherChannel::GatherAlpha:
  2493. channel = 3;
  2494. break;
  2495. }
  2496. IRBuilder<> Builder(CI);
  2497. texHandle = CI->getArgOperand(thisIdx);
  2498. samplerHandle = CI->getArgOperand(kSamplerArgIndex);
  2499. DXIL::ResourceKind RK = pObjHelper->GetRK(texHandle);
  2500. if (RK == DXIL::ResourceKind::Invalid) {
  2501. opcode = DXIL::OpCode::NumOpCodes;
  2502. return;
  2503. }
  2504. unsigned coordSize = DxilResource::GetNumCoords(RK);
  2505. unsigned offsetSize = DxilResource::GetNumOffsets(RK);
  2506. const unsigned kCoordArgIdx = HLOperandIndex::kSampleCoordArgIndex;
  2507. TranslateCoord(CI, kCoordArgIdx, coordSize);
  2508. switch (op) {
  2509. case OP::OpCode::TextureGather: {
  2510. TranslateOffset(CI, HLOperandIndex::kGatherOffsetArgIndex, offsetSize);
  2511. // Gather all don't have sample offset version overload.
  2512. if (ch != GatherChannel::GatherAll)
  2513. TranslateSampleOffset(CI, HLOperandIndex::kGatherSampleOffsetArgIndex,
  2514. offsetSize);
  2515. unsigned statusIdx =
  2516. hasSampleOffsets ? HLOperandIndex::kGatherStatusWithSampleOffsetArgIndex
  2517. : HLOperandIndex::kGatherStatusArgIndex;
  2518. SetStatus(CI, statusIdx);
  2519. } break;
  2520. case OP::OpCode::TextureGatherCmp: {
  2521. special = CI->getArgOperand(HLOperandIndex::kGatherCmpCmpValArgIndex);
  2522. TranslateOffset(CI, HLOperandIndex::kGatherCmpOffsetArgIndex, offsetSize);
  2523. // Gather all don't have sample offset version overload.
  2524. if (ch != GatherChannel::GatherAll)
  2525. TranslateSampleOffset(CI, HLOperandIndex::kGatherCmpSampleOffsetArgIndex,
  2526. offsetSize);
  2527. unsigned statusIdx =
  2528. hasSampleOffsets
  2529. ? HLOperandIndex::kGatherCmpStatusWithSampleOffsetArgIndex
  2530. : HLOperandIndex::kGatherCmpStatusArgIndex;
  2531. SetStatus(CI, statusIdx);
  2532. } break;
  2533. default:
  2534. DXASSERT(0, "invalid opcode for Gather");
  2535. break;
  2536. }
  2537. }
  2538. void GenerateDxilGather(CallInst *CI, Function *F,
  2539. MutableArrayRef<Value *> gatherArgs,
  2540. GatherHelper &helper, hlsl::OP *hlslOp) {
  2541. IRBuilder<> Builder(CI);
  2542. CallInst *call = Builder.CreateCall(F, gatherArgs);
  2543. if (!helper.hasSampleOffsets) {
  2544. // extract value part
  2545. Value *retVal = ScalarizeResRet(CI->getType(), call, Builder);
  2546. // Replace ret val.
  2547. CI->replaceAllUsesWith(retVal);
  2548. } else {
  2549. Value *retVal = UndefValue::get(CI->getType());
  2550. Value *elt = Builder.CreateExtractValue(call, (uint64_t)0);
  2551. retVal = Builder.CreateInsertElement(retVal, elt, (uint64_t)0);
  2552. helper.UpdateOffsetInGatherArgs(gatherArgs, /*sampleIdx*/ 1);
  2553. CallInst *callY = Builder.CreateCall(F, gatherArgs);
  2554. elt = Builder.CreateExtractValue(callY, (uint64_t)1);
  2555. retVal = Builder.CreateInsertElement(retVal, elt, 1);
  2556. helper.UpdateOffsetInGatherArgs(gatherArgs, /*sampleIdx*/ 2);
  2557. CallInst *callZ = Builder.CreateCall(F, gatherArgs);
  2558. elt = Builder.CreateExtractValue(callZ, (uint64_t)2);
  2559. retVal = Builder.CreateInsertElement(retVal, elt, 2);
  2560. helper.UpdateOffsetInGatherArgs(gatherArgs, /*sampleIdx*/ 3);
  2561. CallInst *callW = Builder.CreateCall(F, gatherArgs);
  2562. elt = Builder.CreateExtractValue(callW, (uint64_t)3);
  2563. retVal = Builder.CreateInsertElement(retVal, elt, 3);
  2564. // Replace ret val.
  2565. CI->replaceAllUsesWith(retVal);
  2566. // TODO: UpdateStatus for each gather call.
  2567. }
  2568. // Get status
  2569. if (helper.status) {
  2570. UpdateStatus(call, helper.status, Builder, hlslOp);
  2571. }
  2572. }
  2573. Value *TranslateGather(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2574. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2575. hlsl::OP *hlslOP = &helper.hlslOP;
  2576. GatherHelper::GatherChannel ch = GatherHelper::GatherChannel::GatherAll;
  2577. switch (IOP) {
  2578. case IntrinsicOp::MOP_Gather:
  2579. case IntrinsicOp::MOP_GatherCmp:
  2580. ch = GatherHelper::GatherChannel::GatherAll;
  2581. break;
  2582. case IntrinsicOp::MOP_GatherRed:
  2583. case IntrinsicOp::MOP_GatherCmpRed:
  2584. ch = GatherHelper::GatherChannel::GatherRed;
  2585. break;
  2586. case IntrinsicOp::MOP_GatherGreen:
  2587. case IntrinsicOp::MOP_GatherCmpGreen:
  2588. ch = GatherHelper::GatherChannel::GatherGreen;
  2589. break;
  2590. case IntrinsicOp::MOP_GatherBlue:
  2591. case IntrinsicOp::MOP_GatherCmpBlue:
  2592. ch = GatherHelper::GatherChannel::GatherBlue;
  2593. break;
  2594. case IntrinsicOp::MOP_GatherAlpha:
  2595. case IntrinsicOp::MOP_GatherCmpAlpha:
  2596. ch = GatherHelper::GatherChannel::GatherAlpha;
  2597. break;
  2598. default:
  2599. DXASSERT(0, "invalid gather intrinsic");
  2600. break;
  2601. }
  2602. GatherHelper gatherHelper(CI, opcode, pObjHelper, ch);
  2603. if (gatherHelper.opcode == DXIL::OpCode::NumOpCodes) {
  2604. Translated = false;
  2605. return nullptr;
  2606. }
  2607. Type *Ty = CI->getType();
  2608. Function *F = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
  2609. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  2610. Value *channelArg = hlslOP->GetU32Const(gatherHelper.channel);
  2611. switch (opcode) {
  2612. case OP::OpCode::TextureGather: {
  2613. Value *gatherArgs[] = {
  2614. opArg, gatherHelper.texHandle, gatherHelper.samplerHandle,
  2615. // Coord.
  2616. gatherHelper.coord[0], gatherHelper.coord[1], gatherHelper.coord[2],
  2617. gatherHelper.coord[3],
  2618. // Offset.
  2619. gatherHelper.offset[0], gatherHelper.offset[1],
  2620. // Channel.
  2621. channelArg};
  2622. GenerateDxilGather(CI, F, gatherArgs, gatherHelper, hlslOP);
  2623. } break;
  2624. case OP::OpCode::TextureGatherCmp: {
  2625. Value *gatherArgs[] = {
  2626. opArg, gatherHelper.texHandle, gatherHelper.samplerHandle,
  2627. // Coord.
  2628. gatherHelper.coord[0], gatherHelper.coord[1], gatherHelper.coord[2],
  2629. gatherHelper.coord[3],
  2630. // Offset.
  2631. gatherHelper.offset[0], gatherHelper.offset[1],
  2632. // Channel.
  2633. channelArg,
  2634. // CmpVal.
  2635. gatherHelper.special};
  2636. GenerateDxilGather(CI, F, gatherArgs, gatherHelper, hlslOP);
  2637. } break;
  2638. default:
  2639. DXASSERT(0, "invalid opcode for Gather");
  2640. break;
  2641. }
  2642. // CI is replaced in GenerateDxilGather.
  2643. return nullptr;
  2644. }
  2645. // Load/Store intrinsics.
  2646. struct ResLoadHelper {
  2647. ResLoadHelper(CallInst *CI, DxilResource::Kind RK, DxilResourceBase::Class RC,
  2648. Value *h, IntrinsicOp IOP, bool bForSubscript=false);
  2649. ResLoadHelper(CallInst *CI, DxilResource::Kind RK, DxilResourceBase::Class RC,
  2650. Value *h, Value *mip);
  2651. // For double subscript.
  2652. ResLoadHelper(Instruction *ldInst, Value *h, Value *idx, Value *mip)
  2653. : opcode(OP::OpCode::TextureLoad),
  2654. intrinsicOpCode(IntrinsicOp::Num_Intrinsics), handle(h), retVal(ldInst),
  2655. addr(idx), offset(nullptr), status(nullptr), mipLevel(mip) {}
  2656. OP::OpCode opcode;
  2657. IntrinsicOp intrinsicOpCode;
  2658. unsigned dxilMajor;
  2659. unsigned dxilMinor;
  2660. Value *handle;
  2661. Value *retVal;
  2662. Value *addr;
  2663. Value *offset;
  2664. Value *status;
  2665. Value *mipLevel;
  2666. };
  2667. ResLoadHelper::ResLoadHelper(CallInst *CI, DxilResource::Kind RK,
  2668. DxilResourceBase::Class RC, Value *hdl, IntrinsicOp IOP, bool bForSubscript)
  2669. : intrinsicOpCode(IOP), handle(hdl), offset(nullptr), status(nullptr) {
  2670. switch (RK) {
  2671. case DxilResource::Kind::RawBuffer:
  2672. case DxilResource::Kind::StructuredBuffer:
  2673. opcode = OP::OpCode::RawBufferLoad;
  2674. break;
  2675. case DxilResource::Kind::TypedBuffer:
  2676. opcode = OP::OpCode::BufferLoad;
  2677. break;
  2678. case DxilResource::Kind::Invalid:
  2679. DXASSERT(0, "invalid resource kind");
  2680. break;
  2681. default:
  2682. opcode = OP::OpCode::TextureLoad;
  2683. break;
  2684. }
  2685. retVal = CI;
  2686. const unsigned kAddrIdx = HLOperandIndex::kBufLoadAddrOpIdx;
  2687. addr = CI->getArgOperand(kAddrIdx);
  2688. unsigned argc = CI->getNumArgOperands();
  2689. if (opcode == OP::OpCode::TextureLoad) {
  2690. // mip at last channel
  2691. unsigned coordSize = DxilResource::GetNumCoords(RK);
  2692. if (RC == DxilResourceBase::Class::SRV) {
  2693. if (bForSubscript) {
  2694. // Use 0 when access by [].
  2695. mipLevel = IRBuilder<>(CI).getInt32(0);
  2696. } else {
  2697. if (coordSize == 1 && !addr->getType()->isVectorTy()) {
  2698. // Use addr when access by Load.
  2699. mipLevel = addr;
  2700. } else {
  2701. mipLevel = IRBuilder<>(CI).CreateExtractElement(addr, coordSize);
  2702. }
  2703. }
  2704. } else {
  2705. // Set mip level to undef for UAV.
  2706. mipLevel = UndefValue::get(Type::getInt32Ty(addr->getContext()));
  2707. }
  2708. if (RC == DxilResourceBase::Class::SRV) {
  2709. unsigned offsetIdx = HLOperandIndex::kTexLoadOffsetOpIdx;
  2710. unsigned statusIdx = HLOperandIndex::kTexLoadStatusOpIdx;
  2711. if (RK == DxilResource::Kind::Texture2DMS ||
  2712. RK == DxilResource::Kind::Texture2DMSArray) {
  2713. offsetIdx = HLOperandIndex::kTex2DMSLoadOffsetOpIdx;
  2714. statusIdx = HLOperandIndex::kTex2DMSLoadStatusOpIdx;
  2715. mipLevel =
  2716. CI->getArgOperand(HLOperandIndex::kTex2DMSLoadSampleIdxOpIdx);
  2717. }
  2718. if (argc > offsetIdx)
  2719. offset = CI->getArgOperand(offsetIdx);
  2720. if (argc > statusIdx)
  2721. status = CI->getArgOperand(statusIdx);
  2722. } else {
  2723. const unsigned kStatusIdx = HLOperandIndex::kRWTexLoadStatusOpIdx;
  2724. if (argc > kStatusIdx)
  2725. status = CI->getArgOperand(kStatusIdx);
  2726. }
  2727. } else {
  2728. const unsigned kStatusIdx = HLOperandIndex::kBufLoadStatusOpIdx;
  2729. if (argc > kStatusIdx)
  2730. status = CI->getArgOperand(kStatusIdx);
  2731. }
  2732. }
  2733. ResLoadHelper::ResLoadHelper(CallInst *CI, DxilResource::Kind RK,
  2734. DxilResourceBase::Class RC, Value *hdl, Value *mip)
  2735. : handle(hdl), offset(nullptr), status(nullptr) {
  2736. DXASSERT(RK != DxilResource::Kind::RawBuffer &&
  2737. RK != DxilResource::Kind::TypedBuffer &&
  2738. RK != DxilResource::Kind::Invalid,
  2739. "invalid resource kind");
  2740. opcode = OP::OpCode::TextureLoad;
  2741. retVal = CI;
  2742. mipLevel = mip;
  2743. const unsigned kAddrIdx = HLOperandIndex::kMipLoadAddrOpIdx;
  2744. addr = CI->getArgOperand(kAddrIdx);
  2745. unsigned argc = CI->getNumArgOperands();
  2746. const unsigned kOffsetIdx = HLOperandIndex::kMipLoadOffsetOpIdx;
  2747. const unsigned kStatusIdx = HLOperandIndex::kMipLoadStatusOpIdx;
  2748. if (argc > kOffsetIdx)
  2749. offset = CI->getArgOperand(kOffsetIdx);
  2750. if (argc > kStatusIdx)
  2751. status = CI->getArgOperand(kStatusIdx);
  2752. }
  2753. void TranslateStructBufSubscript(CallInst *CI, Value *handle, Value *status,
  2754. hlsl::OP *OP, const DataLayout &DL);
  2755. // Create { v0, v1 } from { v0.lo, v0.hi, v1.lo, v1.hi }
  2756. void Make64bitResultForLoad(Type *EltTy, ArrayRef<Value *> resultElts32,
  2757. unsigned size, MutableArrayRef<Value *> resultElts,
  2758. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  2759. Type *i64Ty = Builder.getInt64Ty();
  2760. Type *doubleTy = Builder.getDoubleTy();
  2761. if (EltTy == doubleTy) {
  2762. Function *makeDouble =
  2763. hlslOP->GetOpFunc(DXIL::OpCode::MakeDouble, doubleTy);
  2764. Value *makeDoubleOpArg =
  2765. Builder.getInt32((unsigned)DXIL::OpCode::MakeDouble);
  2766. for (unsigned i = 0; i < size; i++) {
  2767. Value *lo = resultElts32[2 * i];
  2768. Value *hi = resultElts32[2 * i + 1];
  2769. Value *V = Builder.CreateCall(makeDouble, {makeDoubleOpArg, lo, hi});
  2770. resultElts[i] = V;
  2771. }
  2772. } else {
  2773. for (unsigned i = 0; i < size; i++) {
  2774. Value *lo = resultElts32[2 * i];
  2775. Value *hi = resultElts32[2 * i + 1];
  2776. lo = Builder.CreateZExt(lo, i64Ty);
  2777. hi = Builder.CreateZExt(hi, i64Ty);
  2778. hi = Builder.CreateShl(hi, 32);
  2779. resultElts[i] = Builder.CreateOr(lo, hi);
  2780. }
  2781. }
  2782. }
  2783. static uint8_t GetRawBufferMaskFromIOP(IntrinsicOp IOP, hlsl::OP *OP) {
  2784. switch (IOP) {
  2785. // one component
  2786. case IntrinsicOp::MOP_Load:
  2787. return DXIL::kCompMask_X;
  2788. // two component
  2789. case IntrinsicOp::MOP_Load2:
  2790. return DXIL::kCompMask_X | DXIL::kCompMask_Y;
  2791. // three component
  2792. case IntrinsicOp::MOP_Load3:
  2793. return DXIL::kCompMask_X | DXIL::kCompMask_Y | DXIL::kCompMask_Z;
  2794. // four component
  2795. case IntrinsicOp::MOP_Load4:
  2796. return DXIL::kCompMask_All;
  2797. default:
  2798. DXASSERT(false, "Invalid Intrinsic for computing load mask.");
  2799. return 0;
  2800. }
  2801. }
  2802. static Constant *GetRawBufferMaskForETy(Type *Ty, unsigned NumComponents, hlsl::OP *OP) {
  2803. Type *ETy = Ty->getScalarType();
  2804. bool is64 = ETy->isDoubleTy() || ETy == Type::getInt64Ty(ETy->getContext());
  2805. unsigned mask = 0;
  2806. if (is64) {
  2807. switch (NumComponents) {
  2808. case 0:
  2809. break;
  2810. case 1:
  2811. mask = DXIL::kCompMask_X | DXIL::kCompMask_Y;
  2812. break;
  2813. case 2:
  2814. mask = DXIL::kCompMask_All;
  2815. break;
  2816. default:
  2817. DXASSERT(false, "Cannot load more than 2 components for 64bit types.");
  2818. }
  2819. }
  2820. else {
  2821. switch (NumComponents) {
  2822. case 0:
  2823. break;
  2824. case 1:
  2825. mask = DXIL::kCompMask_X;
  2826. break;
  2827. case 2:
  2828. mask = DXIL::kCompMask_X | DXIL::kCompMask_Y;
  2829. break;
  2830. case 3:
  2831. mask = DXIL::kCompMask_X | DXIL::kCompMask_Y | DXIL::kCompMask_Z;
  2832. break;
  2833. case 4:
  2834. mask = DXIL::kCompMask_All;
  2835. break;
  2836. default:
  2837. DXASSERT(false, "Cannot load more than 2 components for 64bit types.");
  2838. }
  2839. }
  2840. return OP->GetI8Const(mask);
  2841. }
  2842. void TranslateLoad(ResLoadHelper &helper, HLResource::Kind RK,
  2843. IRBuilder<> &Builder, hlsl::OP *OP, const DataLayout &DL) {
  2844. Type *Ty = helper.retVal->getType();
  2845. if (Ty->isPointerTy()) {
  2846. TranslateStructBufSubscript(cast<CallInst>(helper.retVal), helper.handle,
  2847. helper.status, OP, DL);
  2848. return;
  2849. }
  2850. OP::OpCode opcode = helper.opcode;
  2851. Type *i32Ty = Builder.getInt32Ty();
  2852. Type *i64Ty = Builder.getInt64Ty();
  2853. Type *doubleTy = Builder.getDoubleTy();
  2854. Type *EltTy = Ty->getScalarType();
  2855. Constant *Alignment = OP->GetI32Const(OP->GetAllocSizeForType(EltTy));
  2856. bool is64 = EltTy == i64Ty || EltTy == doubleTy;
  2857. if (is64) {
  2858. EltTy = i32Ty;
  2859. }
  2860. Function *F = OP->GetOpFunc(opcode, EltTy);
  2861. llvm::Constant *opArg = OP->GetU32Const((unsigned)opcode);
  2862. llvm::Value *undefI = llvm::UndefValue::get(i32Ty);
  2863. SmallVector<Value *, 12> loadArgs;
  2864. loadArgs.emplace_back(opArg); // opcode
  2865. loadArgs.emplace_back(helper.handle); // resource handle
  2866. if (opcode == OP::OpCode::TextureLoad) {
  2867. // set mip level
  2868. loadArgs.emplace_back(helper.mipLevel);
  2869. }
  2870. if (opcode == OP::OpCode::TextureLoad) {
  2871. // texture coord
  2872. unsigned coordSize = DxilResource::GetNumCoords(RK);
  2873. bool isVectorAddr = helper.addr->getType()->isVectorTy();
  2874. for (unsigned i = 0; i < 3; i++) {
  2875. if (i < coordSize) {
  2876. loadArgs.emplace_back(
  2877. isVectorAddr ? Builder.CreateExtractElement(helper.addr, i) : helper.addr);
  2878. }
  2879. else
  2880. loadArgs.emplace_back(undefI);
  2881. }
  2882. } else {
  2883. if (helper.addr->getType()->isVectorTy()) {
  2884. Value *scalarOffset =
  2885. Builder.CreateExtractElement(helper.addr, (uint64_t)0);
  2886. // TODO: calculate the real address based on opcode
  2887. loadArgs.emplace_back(scalarOffset); // offset
  2888. } else {
  2889. // TODO: calculate the real address based on opcode
  2890. loadArgs.emplace_back(helper.addr); // offset
  2891. }
  2892. }
  2893. // offset 0
  2894. if (opcode == OP::OpCode::TextureLoad) {
  2895. if (helper.offset && !isa<llvm::UndefValue>(helper.offset)) {
  2896. unsigned offsetSize = DxilResource::GetNumOffsets(RK);
  2897. for (unsigned i = 0; i < 3; i++) {
  2898. if (i < offsetSize)
  2899. loadArgs.emplace_back(Builder.CreateExtractElement(helper.offset, i));
  2900. else
  2901. loadArgs.emplace_back(undefI);
  2902. }
  2903. } else {
  2904. loadArgs.emplace_back(undefI);
  2905. loadArgs.emplace_back(undefI);
  2906. loadArgs.emplace_back(undefI);
  2907. }
  2908. }
  2909. // Offset 1
  2910. if (RK == DxilResource::Kind::RawBuffer) {
  2911. // elementOffset, mask, alignment
  2912. loadArgs.emplace_back(undefI);
  2913. Type *rtnTy = helper.retVal->getType();
  2914. unsigned numComponents = 1;
  2915. if (VectorType *VTy = dyn_cast<VectorType>(rtnTy)) {
  2916. rtnTy = VTy->getElementType();
  2917. numComponents = VTy->getNumElements();
  2918. }
  2919. loadArgs.emplace_back(GetRawBufferMaskForETy(rtnTy, numComponents, OP));
  2920. loadArgs.emplace_back(Alignment);
  2921. }
  2922. else if (RK == DxilResource::Kind::TypedBuffer) {
  2923. loadArgs.emplace_back(undefI);
  2924. }
  2925. else if (RK == DxilResource::Kind::StructuredBuffer) {
  2926. // elementOffset, mask, alignment
  2927. loadArgs.emplace_back(
  2928. OP->GetU32Const(0)); // For case use built-in types in structure buffer.
  2929. loadArgs.emplace_back(OP->GetU8Const(0)); // When is this case hit?
  2930. loadArgs.emplace_back(Alignment);
  2931. }
  2932. Value *ResRet =
  2933. Builder.CreateCall(F, loadArgs, OP->GetOpCodeName(opcode));
  2934. Value *retValNew = nullptr;
  2935. if (!is64) {
  2936. retValNew = ScalarizeResRet(Ty, ResRet, Builder);
  2937. } else {
  2938. unsigned size = 1;
  2939. if (Ty->isVectorTy()) {
  2940. size = Ty->getVectorNumElements();
  2941. }
  2942. DXASSERT(size <= 2, "typed buffer only allow 4 dwords");
  2943. EltTy = Ty->getScalarType();
  2944. Value *Elts[2];
  2945. Make64bitResultForLoad(Ty->getScalarType(),
  2946. {
  2947. Builder.CreateExtractValue(ResRet, 0),
  2948. Builder.CreateExtractValue(ResRet, 1),
  2949. Builder.CreateExtractValue(ResRet, 2),
  2950. Builder.CreateExtractValue(ResRet, 3),
  2951. },
  2952. size, Elts, OP, Builder);
  2953. retValNew = ScalarizeElements(Ty, Elts, Builder);
  2954. }
  2955. // replace
  2956. helper.retVal->replaceAllUsesWith(retValNew);
  2957. // Save new ret val.
  2958. helper.retVal = retValNew;
  2959. // get status
  2960. UpdateStatus(ResRet, helper.status, Builder, OP);
  2961. }
  2962. Value *TranslateResourceLoad(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2963. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2964. hlsl::OP *hlslOP = &helper.hlslOP;
  2965. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  2966. IRBuilder<> Builder(CI);
  2967. DXIL::ResourceClass RC = pObjHelper->GetRC(handle);
  2968. DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
  2969. ResLoadHelper loadHelper(CI, RK, RC, handle, IOP);
  2970. TranslateLoad(loadHelper, RK, Builder, hlslOP, helper.dataLayout);
  2971. // CI is replaced in TranslateLoad.
  2972. return nullptr;
  2973. }
  2974. // Split { v0, v1 } to { v0.lo, v0.hi, v1.lo, v1.hi }
  2975. void Split64bitValForStore(Type *EltTy, ArrayRef<Value *> vals, unsigned size,
  2976. MutableArrayRef<Value *> vals32, hlsl::OP *hlslOP,
  2977. IRBuilder<> &Builder) {
  2978. Type *i32Ty = Builder.getInt32Ty();
  2979. Type *doubleTy = Builder.getDoubleTy();
  2980. Value *undefI32 = UndefValue::get(i32Ty);
  2981. if (EltTy == doubleTy) {
  2982. Function *dToU = hlslOP->GetOpFunc(DXIL::OpCode::SplitDouble, doubleTy);
  2983. Value *dToUOpArg = Builder.getInt32((unsigned)DXIL::OpCode::SplitDouble);
  2984. for (unsigned i = 0; i < size; i++) {
  2985. if (isa<UndefValue>(vals[i])) {
  2986. vals32[2 * i] = undefI32;
  2987. vals32[2 * i + 1] = undefI32;
  2988. } else {
  2989. Value *retVal = Builder.CreateCall(dToU, {dToUOpArg, vals[i]});
  2990. Value *lo = Builder.CreateExtractValue(retVal, 0);
  2991. Value *hi = Builder.CreateExtractValue(retVal, 1);
  2992. vals32[2 * i] = lo;
  2993. vals32[2 * i + 1] = hi;
  2994. }
  2995. }
  2996. } else {
  2997. for (unsigned i = 0; i < size; i++) {
  2998. if (isa<UndefValue>(vals[i])) {
  2999. vals32[2 * i] = undefI32;
  3000. vals32[2 * i + 1] = undefI32;
  3001. } else {
  3002. Value *lo = Builder.CreateTrunc(vals[i], i32Ty);
  3003. Value *hi = Builder.CreateLShr(vals[i], 32);
  3004. hi = Builder.CreateTrunc(hi, i32Ty);
  3005. vals32[2 * i] = lo;
  3006. vals32[2 * i + 1] = hi;
  3007. }
  3008. }
  3009. }
  3010. }
  3011. void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
  3012. Value *offset, IRBuilder<> &Builder, hlsl::OP *OP) {
  3013. Type *Ty = val->getType();
  3014. OP::OpCode opcode;
  3015. switch (RK) {
  3016. case DxilResource::Kind::RawBuffer:
  3017. case DxilResource::Kind::StructuredBuffer:
  3018. opcode = OP::OpCode::RawBufferStore;
  3019. break;
  3020. case DxilResource::Kind::TypedBuffer:
  3021. opcode = OP::OpCode::BufferStore;
  3022. break;
  3023. case DxilResource::Kind::Invalid:
  3024. DXASSERT(0, "invalid resource kind");
  3025. break;
  3026. default:
  3027. opcode = OP::OpCode::TextureStore;
  3028. break;
  3029. }
  3030. Type *i32Ty = Builder.getInt32Ty();
  3031. Type *i64Ty = Builder.getInt64Ty();
  3032. Type *doubleTy = Builder.getDoubleTy();
  3033. Type *EltTy = Ty->getScalarType();
  3034. Constant *Alignment = OP->GetI32Const(OP->GetAllocSizeForType(EltTy));
  3035. bool is64 = EltTy == i64Ty || EltTy == doubleTy;
  3036. if (is64) {
  3037. EltTy = i32Ty;
  3038. }
  3039. Function *F = OP->GetOpFunc(opcode, EltTy);
  3040. llvm::Constant *opArg = OP->GetU32Const((unsigned)opcode);
  3041. llvm::Value *undefI =
  3042. llvm::UndefValue::get(llvm::Type::getInt32Ty(Ty->getContext()));
  3043. llvm::Value *undefVal = llvm::UndefValue::get(Ty->getScalarType());
  3044. SmallVector<Value *, 13> storeArgs;
  3045. storeArgs.emplace_back(opArg); // opcode
  3046. storeArgs.emplace_back(handle); // resource handle
  3047. if (RK == DxilResource::Kind::RawBuffer ||
  3048. RK == DxilResource::Kind::TypedBuffer) {
  3049. // Offset 0
  3050. if (offset->getType()->isVectorTy()) {
  3051. Value *scalarOffset = Builder.CreateExtractElement(offset, (uint64_t)0);
  3052. storeArgs.emplace_back(scalarOffset); // offset
  3053. } else {
  3054. storeArgs.emplace_back(offset); // offset
  3055. }
  3056. // Offset 1
  3057. storeArgs.emplace_back(undefI);
  3058. } else {
  3059. // texture store
  3060. unsigned coordSize = DxilResource::GetNumCoords(RK);
  3061. // Set x first.
  3062. if (offset->getType()->isVectorTy())
  3063. storeArgs.emplace_back(Builder.CreateExtractElement(offset, (uint64_t)0));
  3064. else
  3065. storeArgs.emplace_back(offset);
  3066. for (unsigned i = 1; i < 3; i++) {
  3067. if (i < coordSize)
  3068. storeArgs.emplace_back(Builder.CreateExtractElement(offset, i));
  3069. else
  3070. storeArgs.emplace_back(undefI);
  3071. }
  3072. // TODO: support mip for texture ST
  3073. }
  3074. // values
  3075. bool isTyped = opcode == OP::OpCode::TextureStore ||
  3076. RK == DxilResource::Kind::TypedBuffer;
  3077. uint8_t mask = 0;
  3078. if (Ty->isVectorTy()) {
  3079. unsigned vecSize = Ty->getVectorNumElements();
  3080. Value *emptyVal = undefVal;
  3081. if (isTyped) {
  3082. mask = DXIL::kCompMask_All;
  3083. emptyVal = Builder.CreateExtractElement(val, (uint64_t)0);
  3084. }
  3085. for (unsigned i = 0; i < 4; i++) {
  3086. if (i < vecSize) {
  3087. storeArgs.emplace_back(Builder.CreateExtractElement(val, i));
  3088. mask |= (1<<i);
  3089. } else {
  3090. storeArgs.emplace_back(emptyVal);
  3091. }
  3092. }
  3093. } else {
  3094. if (isTyped) {
  3095. mask = DXIL::kCompMask_All;
  3096. storeArgs.emplace_back(val);
  3097. storeArgs.emplace_back(val);
  3098. storeArgs.emplace_back(val);
  3099. storeArgs.emplace_back(val);
  3100. } else {
  3101. storeArgs.emplace_back(val);
  3102. storeArgs.emplace_back(undefVal);
  3103. storeArgs.emplace_back(undefVal);
  3104. storeArgs.emplace_back(undefVal);
  3105. mask = DXIL::kCompMask_X;
  3106. }
  3107. }
  3108. if (is64) {
  3109. unsigned size = 1;
  3110. if (Ty->isVectorTy()) {
  3111. size = Ty->getVectorNumElements();
  3112. }
  3113. DXASSERT(size <= 2, "raw/typed buffer only allow 4 dwords");
  3114. unsigned val0OpIdx = opcode == DXIL::OpCode::TextureStore
  3115. ? DXIL::OperandIndex::kTextureStoreVal0OpIdx
  3116. : DXIL::OperandIndex::kBufferStoreVal0OpIdx;
  3117. Value *V0 = storeArgs[val0OpIdx];
  3118. Value *V1 = storeArgs[val0OpIdx+1];
  3119. Value *vals32[4];
  3120. EltTy = Ty->getScalarType();
  3121. Split64bitValForStore(EltTy, {V0, V1}, size, vals32, OP, Builder);
  3122. // Fill the uninit vals.
  3123. if (size == 1) {
  3124. vals32[2] = vals32[0];
  3125. vals32[3] = vals32[1];
  3126. }
  3127. // Change valOp to 32 version.
  3128. for (unsigned i = 0; i < 4; i++) {
  3129. storeArgs[val0OpIdx + i] = vals32[i];
  3130. }
  3131. // change mask for double
  3132. if (opcode == DXIL::OpCode::RawBufferStore) {
  3133. mask = size == 1 ?
  3134. DXIL::kCompMask_X | DXIL::kCompMask_Y : DXIL::kCompMask_All;
  3135. }
  3136. }
  3137. storeArgs.emplace_back(OP->GetU8Const(mask)); // mask
  3138. if (opcode == DXIL::OpCode::RawBufferStore)
  3139. storeArgs.emplace_back(Alignment); // alignment only for raw buffer
  3140. Builder.CreateCall(F, storeArgs);
  3141. }
  3142. Value *TranslateResourceStore(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  3143. HLOperationLowerHelper &helper,
  3144. HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3145. hlsl::OP *hlslOP = &helper.hlslOP;
  3146. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  3147. IRBuilder<> Builder(CI);
  3148. DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
  3149. Value *val = CI->getArgOperand(HLOperandIndex::kStoreValOpIdx);
  3150. Value *offset = CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx);
  3151. TranslateStore(RK, handle, val, offset, Builder, hlslOP);
  3152. return nullptr;
  3153. }
  3154. }
  3155. // Atomic intrinsics.
  3156. namespace {
  3157. // Atomic intrinsics.
  3158. struct AtomicHelper {
  3159. AtomicHelper(CallInst *CI, OP::OpCode op, Value *h);
  3160. AtomicHelper(CallInst *CI, OP::OpCode op, Value *h, Value *bufIdx,
  3161. Value *baseOffset);
  3162. OP::OpCode opcode;
  3163. Value *handle;
  3164. Value *addr;
  3165. Value *offset; // Offset for structrued buffer.
  3166. Value *value;
  3167. Value *originalValue;
  3168. Value *compareValue;
  3169. };
  3170. // For MOP version of Interlocked*.
  3171. AtomicHelper::AtomicHelper(CallInst *CI, OP::OpCode op, Value *h)
  3172. : opcode(op), handle(h), offset(nullptr), originalValue(nullptr) {
  3173. addr = CI->getArgOperand(HLOperandIndex::kObjectInterlockedDestOpIndex);
  3174. if (op == OP::OpCode::AtomicCompareExchange) {
  3175. compareValue = CI->getArgOperand(
  3176. HLOperandIndex::kObjectInterlockedCmpCompareValueOpIndex);
  3177. value =
  3178. CI->getArgOperand(HLOperandIndex::kObjectInterlockedCmpValueOpIndex);
  3179. if (CI->getNumArgOperands() ==
  3180. (HLOperandIndex::kObjectInterlockedCmpOriginalValueOpIndex + 1))
  3181. originalValue = CI->getArgOperand(
  3182. HLOperandIndex::kObjectInterlockedCmpOriginalValueOpIndex);
  3183. } else {
  3184. value = CI->getArgOperand(HLOperandIndex::kObjectInterlockedValueOpIndex);
  3185. if (CI->getNumArgOperands() ==
  3186. (HLOperandIndex::kObjectInterlockedOriginalValueOpIndex + 1))
  3187. originalValue = CI->getArgOperand(
  3188. HLOperandIndex::kObjectInterlockedOriginalValueOpIndex);
  3189. }
  3190. }
  3191. // For IOP version of Interlocked*.
  3192. AtomicHelper::AtomicHelper(CallInst *CI, OP::OpCode op, Value *h, Value *bufIdx,
  3193. Value *baseOffset)
  3194. : opcode(op), handle(h), addr(bufIdx),
  3195. offset(baseOffset), originalValue(nullptr) {
  3196. if (op == OP::OpCode::AtomicCompareExchange) {
  3197. compareValue =
  3198. CI->getArgOperand(HLOperandIndex::kInterlockedCmpCompareValueOpIndex);
  3199. value = CI->getArgOperand(HLOperandIndex::kInterlockedCmpValueOpIndex);
  3200. if (CI->getNumArgOperands() ==
  3201. (HLOperandIndex::kInterlockedCmpOriginalValueOpIndex + 1))
  3202. originalValue = CI->getArgOperand(
  3203. HLOperandIndex::kInterlockedCmpOriginalValueOpIndex);
  3204. } else {
  3205. value = CI->getArgOperand(HLOperandIndex::kInterlockedValueOpIndex);
  3206. if (CI->getNumArgOperands() ==
  3207. (HLOperandIndex::kInterlockedOriginalValueOpIndex + 1))
  3208. originalValue =
  3209. CI->getArgOperand(HLOperandIndex::kInterlockedOriginalValueOpIndex);
  3210. }
  3211. }
  3212. void TranslateAtomicBinaryOperation(AtomicHelper &helper,
  3213. DXIL::AtomicBinOpCode atomicOp,
  3214. IRBuilder<> &Builder, hlsl::OP *hlslOP) {
  3215. Value *handle = helper.handle;
  3216. Value *addr = helper.addr;
  3217. Value *val = helper.value;
  3218. Type *Ty = val->getType();
  3219. Value *undefI = UndefValue::get(Type::getInt32Ty(Ty->getContext()));
  3220. Function *dxilAtomic = hlslOP->GetOpFunc(helper.opcode, Ty->getScalarType());
  3221. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(helper.opcode));
  3222. Value *atomicOpArg = hlslOP->GetU32Const(static_cast<unsigned>(atomicOp));
  3223. Value *args[] = {opArg, handle, atomicOpArg,
  3224. undefI, undefI, undefI, // coordinates
  3225. val};
  3226. // Setup coordinates.
  3227. if (addr->getType()->isVectorTy()) {
  3228. unsigned vectorNumElements = addr->getType()->getVectorNumElements();
  3229. DXASSERT(vectorNumElements <= 3, "up to 3 elements for atomic binary op");
  3230. _Analysis_assume_(vectorNumElements <= 3);
  3231. for (unsigned i = 0; i < vectorNumElements; i++) {
  3232. Value *Elt = Builder.CreateExtractElement(addr, i);
  3233. args[DXIL::OperandIndex::kAtomicBinOpCoord0OpIdx + i] = Elt;
  3234. }
  3235. } else
  3236. args[DXIL::OperandIndex::kAtomicBinOpCoord0OpIdx] = addr;
  3237. // Set offset for structured buffer.
  3238. if (helper.offset)
  3239. args[DXIL::OperandIndex::kAtomicBinOpCoord1OpIdx] = helper.offset;
  3240. Value *origVal =
  3241. Builder.CreateCall(dxilAtomic, args, hlslOP->GetAtomicOpName(atomicOp));
  3242. if (helper.originalValue) {
  3243. Builder.CreateStore(origVal, helper.originalValue);
  3244. }
  3245. }
  3246. Value *TranslateMopAtomicBinaryOperation(CallInst *CI, IntrinsicOp IOP,
  3247. OP::OpCode opcode,
  3248. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3249. hlsl::OP *hlslOP = &helper.hlslOP;
  3250. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  3251. IRBuilder<> Builder(CI);
  3252. switch (IOP) {
  3253. case IntrinsicOp::MOP_InterlockedAdd: {
  3254. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3255. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Add, Builder,
  3256. hlslOP);
  3257. } break;
  3258. case IntrinsicOp::MOP_InterlockedAnd: {
  3259. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3260. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::And, Builder,
  3261. hlslOP);
  3262. } break;
  3263. case IntrinsicOp::MOP_InterlockedExchange: {
  3264. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3265. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Exchange,
  3266. Builder, hlslOP);
  3267. } break;
  3268. case IntrinsicOp::MOP_InterlockedMax: {
  3269. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3270. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMax, Builder,
  3271. hlslOP);
  3272. } break;
  3273. case IntrinsicOp::MOP_InterlockedMin: {
  3274. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3275. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMin, Builder,
  3276. hlslOP);
  3277. } break;
  3278. case IntrinsicOp::MOP_InterlockedUMax: {
  3279. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3280. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMax, Builder,
  3281. hlslOP);
  3282. } break;
  3283. case IntrinsicOp::MOP_InterlockedUMin: {
  3284. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3285. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMin, Builder,
  3286. hlslOP);
  3287. } break;
  3288. case IntrinsicOp::MOP_InterlockedOr: {
  3289. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3290. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Or, Builder,
  3291. hlslOP);
  3292. } break;
  3293. case IntrinsicOp::MOP_InterlockedXor: {
  3294. default:
  3295. DXASSERT(IOP == IntrinsicOp::MOP_InterlockedXor,
  3296. "invalid MOP atomic intrinsic");
  3297. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3298. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Xor, Builder,
  3299. hlslOP);
  3300. } break;
  3301. }
  3302. return nullptr;
  3303. }
  3304. void TranslateAtomicCmpXChg(AtomicHelper &helper, IRBuilder<> &Builder,
  3305. hlsl::OP *hlslOP) {
  3306. Value *handle = helper.handle;
  3307. Value *addr = helper.addr;
  3308. Value *val = helper.value;
  3309. Value *cmpVal = helper.compareValue;
  3310. Type *Ty = val->getType();
  3311. Value *undefI = UndefValue::get(Type::getInt32Ty(Ty->getContext()));
  3312. Function *dxilAtomic = hlslOP->GetOpFunc(helper.opcode, Ty->getScalarType());
  3313. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(helper.opcode));
  3314. Value *args[] = {opArg, handle, undefI, undefI, undefI, // coordinates
  3315. cmpVal, val};
  3316. // Setup coordinates.
  3317. if (addr->getType()->isVectorTy()) {
  3318. unsigned vectorNumElements = addr->getType()->getVectorNumElements();
  3319. DXASSERT(vectorNumElements <= 3, "up to 3 elements in atomic op");
  3320. _Analysis_assume_(vectorNumElements <= 3);
  3321. for (unsigned i = 0; i < vectorNumElements; i++) {
  3322. Value *Elt = Builder.CreateExtractElement(addr, i);
  3323. args[DXIL::OperandIndex::kAtomicCmpExchangeCoord0OpIdx + i] = Elt;
  3324. }
  3325. } else
  3326. args[DXIL::OperandIndex::kAtomicCmpExchangeCoord0OpIdx] = addr;
  3327. // Set offset for structured buffer.
  3328. if (helper.offset)
  3329. args[DXIL::OperandIndex::kAtomicCmpExchangeCoord1OpIdx] = helper.offset;
  3330. Value *origVal = Builder.CreateCall(dxilAtomic, args);
  3331. if (helper.originalValue) {
  3332. Builder.CreateStore(origVal, helper.originalValue);
  3333. }
  3334. }
  3335. Value *TranslateMopAtomicCmpXChg(CallInst *CI, IntrinsicOp IOP,
  3336. OP::OpCode opcode,
  3337. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3338. hlsl::OP *hlslOP = &helper.hlslOP;
  3339. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  3340. IRBuilder<> Builder(CI);
  3341. AtomicHelper atomicHelper(CI, OP::OpCode::AtomicCompareExchange, handle);
  3342. TranslateAtomicCmpXChg(atomicHelper, Builder, hlslOP);
  3343. return nullptr;
  3344. }
  3345. void TranslateSharedMemAtomicBinOp(CallInst *CI, IntrinsicOp IOP, Value *addr) {
  3346. AtomicRMWInst::BinOp Op;
  3347. switch (IOP) {
  3348. case IntrinsicOp::IOP_InterlockedAdd:
  3349. Op = AtomicRMWInst::BinOp::Add;
  3350. break;
  3351. case IntrinsicOp::IOP_InterlockedAnd:
  3352. Op = AtomicRMWInst::BinOp::And;
  3353. break;
  3354. case IntrinsicOp::IOP_InterlockedExchange:
  3355. Op = AtomicRMWInst::BinOp::Xchg;
  3356. break;
  3357. case IntrinsicOp::IOP_InterlockedMax:
  3358. Op = AtomicRMWInst::BinOp::Max;
  3359. break;
  3360. case IntrinsicOp::IOP_InterlockedUMax:
  3361. Op = AtomicRMWInst::BinOp::UMax;
  3362. break;
  3363. case IntrinsicOp::IOP_InterlockedMin:
  3364. Op = AtomicRMWInst::BinOp::Min;
  3365. break;
  3366. case IntrinsicOp::IOP_InterlockedUMin:
  3367. Op = AtomicRMWInst::BinOp::UMin;
  3368. break;
  3369. case IntrinsicOp::IOP_InterlockedOr:
  3370. Op = AtomicRMWInst::BinOp::Or;
  3371. break;
  3372. case IntrinsicOp::IOP_InterlockedXor:
  3373. default:
  3374. DXASSERT(IOP == IntrinsicOp::IOP_InterlockedXor, "Invalid Intrinsic");
  3375. Op = AtomicRMWInst::BinOp::Xor;
  3376. break;
  3377. }
  3378. Value *val = CI->getArgOperand(HLOperandIndex::kInterlockedValueOpIndex);
  3379. IRBuilder<> Builder(CI);
  3380. Value *Result = Builder.CreateAtomicRMW(
  3381. Op, addr, val, AtomicOrdering::SequentiallyConsistent);
  3382. if (CI->getNumArgOperands() >
  3383. HLOperandIndex::kInterlockedOriginalValueOpIndex)
  3384. Builder.CreateStore(
  3385. Result,
  3386. CI->getArgOperand(HLOperandIndex::kInterlockedOriginalValueOpIndex));
  3387. }
  3388. Value *TranslateIopAtomicBinaryOperation(CallInst *CI, IntrinsicOp IOP,
  3389. DXIL::OpCode opcode,
  3390. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3391. Value *addr = CI->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex);
  3392. // Get the original addr from cast.
  3393. if (CastInst *castInst = dyn_cast<CastInst>(addr))
  3394. addr = castInst->getOperand(0);
  3395. else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(addr)) {
  3396. if (CE->getOpcode() == Instruction::AddrSpaceCast) {
  3397. addr = CE->getOperand(0);
  3398. }
  3399. }
  3400. unsigned addressSpace = addr->getType()->getPointerAddressSpace();
  3401. if (addressSpace == DXIL::kTGSMAddrSpace)
  3402. TranslateSharedMemAtomicBinOp(CI, IOP, addr);
  3403. else {
  3404. // buffer atomic translated in TranslateSubscript.
  3405. // Do nothing here.
  3406. // Mark not translated.
  3407. Translated = false;
  3408. }
  3409. return nullptr;
  3410. }
  3411. void TranslateSharedMemAtomicCmpXChg(CallInst *CI, Value *addr) {
  3412. Value *val = CI->getArgOperand(HLOperandIndex::kInterlockedCmpValueOpIndex);
  3413. Value *cmpVal =
  3414. CI->getArgOperand(HLOperandIndex::kInterlockedCmpCompareValueOpIndex);
  3415. IRBuilder<> Builder(CI);
  3416. Value *Result = Builder.CreateAtomicCmpXchg(
  3417. addr, cmpVal, val, AtomicOrdering::SequentiallyConsistent,
  3418. AtomicOrdering::SequentiallyConsistent);
  3419. if (CI->getNumArgOperands() >
  3420. HLOperandIndex::kInterlockedCmpOriginalValueOpIndex) {
  3421. Value *originVal = Builder.CreateExtractValue(Result, 0);
  3422. Builder.CreateStore(
  3423. originVal,
  3424. CI->getArgOperand(HLOperandIndex::kInterlockedCmpOriginalValueOpIndex));
  3425. }
  3426. }
  3427. Value *TranslateIopAtomicCmpXChg(CallInst *CI, IntrinsicOp IOP,
  3428. DXIL::OpCode opcode,
  3429. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3430. Value *addr = CI->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex);
  3431. // Get the original addr from cast.
  3432. if (CastInst *castInst = dyn_cast<CastInst>(addr))
  3433. addr = castInst->getOperand(0);
  3434. unsigned addressSpace = addr->getType()->getPointerAddressSpace();
  3435. if (addressSpace == DXIL::kTGSMAddrSpace)
  3436. TranslateSharedMemAtomicCmpXChg(CI, addr);
  3437. else {
  3438. // buffer atomic translated in TranslateSubscript.
  3439. // Do nothing here.
  3440. // Mark not translated.
  3441. Translated = false;
  3442. }
  3443. return nullptr;
  3444. }
  3445. }
  3446. // Process Tess Factor.
  3447. namespace {
  3448. // Clamp to [0.0f..1.0f], NaN->0.0f.
  3449. Value *CleanupTessFactorScale(Value *input, hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  3450. float fMin = 0;
  3451. float fMax = 1;
  3452. Type *f32Ty = input->getType()->getScalarType();
  3453. Value *minFactor = ConstantFP::get(f32Ty, fMin);
  3454. Value *maxFactor = ConstantFP::get(f32Ty, fMax);
  3455. Type *Ty = input->getType();
  3456. if (Ty->isVectorTy())
  3457. minFactor = SplatToVector(minFactor, input->getType(), Builder);
  3458. Value *temp = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, input, minFactor, hlslOP, Builder);
  3459. if (Ty->isVectorTy())
  3460. maxFactor = SplatToVector(maxFactor, input->getType(), Builder);
  3461. return TrivialDxilBinaryOperation(DXIL::OpCode::FMin, temp, maxFactor, hlslOP, Builder);
  3462. }
  3463. // Clamp to [1.0f..Inf], NaN->1.0f.
  3464. Value *CleanupTessFactor(Value *input, hlsl::OP *hlslOP, IRBuilder<> &Builder)
  3465. {
  3466. float fMin = 1.0;
  3467. Type *f32Ty = input->getType()->getScalarType();
  3468. Value *minFactor = ConstantFP::get(f32Ty, fMin);
  3469. minFactor = SplatToVector(minFactor, input->getType(), Builder);
  3470. return TrivialDxilBinaryOperation(DXIL::OpCode::FMax, input, minFactor, hlslOP, Builder);
  3471. }
  3472. // Do partitioning-specific clamping.
  3473. Value *ClampTessFactor(Value *input, DXIL::TessellatorPartitioning partitionMode,
  3474. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  3475. const unsigned kTESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR = 64;
  3476. const unsigned kTESSELLATOR_MAX_ODD_TESSELLATION_FACTOR = 63;
  3477. const unsigned kTESSELLATOR_MIN_EVEN_TESSELLATION_FACTOR = 2;
  3478. const unsigned kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR = 1;
  3479. const unsigned kTESSELLATOR_MAX_TESSELLATION_FACTOR = 64;
  3480. float fMin;
  3481. float fMax;
  3482. switch (partitionMode) {
  3483. case DXIL::TessellatorPartitioning::Integer:
  3484. fMin = kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR;
  3485. fMax = kTESSELLATOR_MAX_TESSELLATION_FACTOR;
  3486. break;
  3487. case DXIL::TessellatorPartitioning::Pow2:
  3488. fMin = kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR;
  3489. fMax = kTESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR;
  3490. break;
  3491. case DXIL::TessellatorPartitioning::FractionalOdd:
  3492. fMin = kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR;
  3493. fMax = kTESSELLATOR_MAX_ODD_TESSELLATION_FACTOR;
  3494. break;
  3495. case DXIL::TessellatorPartitioning::FractionalEven:
  3496. default:
  3497. DXASSERT(partitionMode == DXIL::TessellatorPartitioning::FractionalEven,
  3498. "invalid partition mode");
  3499. fMin = kTESSELLATOR_MIN_EVEN_TESSELLATION_FACTOR;
  3500. fMax = kTESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR;
  3501. break;
  3502. }
  3503. Type *f32Ty = input->getType()->getScalarType();
  3504. Value *minFactor = ConstantFP::get(f32Ty, fMin);
  3505. Value *maxFactor = ConstantFP::get(f32Ty, fMax);
  3506. Type *Ty = input->getType();
  3507. if (Ty->isVectorTy())
  3508. minFactor = SplatToVector(minFactor, input->getType(), Builder);
  3509. Value *temp = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, input, minFactor, hlslOP, Builder);
  3510. if (Ty->isVectorTy())
  3511. maxFactor = SplatToVector(maxFactor, input->getType(), Builder);
  3512. return TrivialDxilBinaryOperation(DXIL::OpCode::FMin, temp, maxFactor, hlslOP, Builder);
  3513. }
  3514. // round up for integer/pow2 partitioning
  3515. // note that this code assumes the inputs should be in the range [1, inf),
  3516. // which should be enforced by the clamp above.
  3517. Value *RoundUpTessFactor(Value *input, DXIL::TessellatorPartitioning partitionMode,
  3518. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  3519. switch (partitionMode) {
  3520. case DXIL::TessellatorPartitioning::Integer:
  3521. return TrivialDxilUnaryOperation(DXIL::OpCode::Round_pi, input, hlslOP, Builder);
  3522. case DXIL::TessellatorPartitioning::Pow2: {
  3523. const unsigned kExponentMask = 0x7f800000;
  3524. const unsigned kExponentLSB = 0x00800000;
  3525. const unsigned kMantissaMask = 0x007fffff;
  3526. Type *Ty = input->getType();
  3527. // (val = (asuint(val) & mantissamask) ?
  3528. // (asuint(val) & exponentmask) + exponentbump :
  3529. // asuint(val) & exponentmask;
  3530. Type *uintTy = Type::getInt32Ty(Ty->getContext());
  3531. if (Ty->isVectorTy())
  3532. uintTy = VectorType::get(uintTy, Ty->getVectorNumElements());
  3533. Value *uintVal = Builder.CreateCast(Instruction::CastOps::FPToUI, input, uintTy);
  3534. Value *mantMask = ConstantInt::get(uintTy->getScalarType(), kMantissaMask);
  3535. mantMask = SplatToVector(mantMask, uintTy, Builder);
  3536. Value *manVal = Builder.CreateAnd(uintVal, mantMask);
  3537. Value *expMask = ConstantInt::get(uintTy->getScalarType(), kExponentMask);
  3538. expMask = SplatToVector(expMask, uintTy, Builder);
  3539. Value *expVal = Builder.CreateAnd(uintVal, expMask);
  3540. Value *expLSB = ConstantInt::get(uintTy->getScalarType(), kExponentLSB);
  3541. expLSB = SplatToVector(expLSB, uintTy, Builder);
  3542. Value *newExpVal = Builder.CreateAdd(expVal, expLSB);
  3543. Value *manValNotZero = Builder.CreateICmpEQ(manVal, ConstantAggregateZero::get(uintTy));
  3544. Value *factors = Builder.CreateSelect(manValNotZero, newExpVal, expVal);
  3545. return Builder.CreateUIToFP(factors, Ty);
  3546. } break;
  3547. case DXIL::TessellatorPartitioning::FractionalEven:
  3548. case DXIL::TessellatorPartitioning::FractionalOdd:
  3549. return input;
  3550. default:
  3551. DXASSERT(0, "invalid partition mode");
  3552. return nullptr;
  3553. }
  3554. }
  3555. Value *TranslateProcessIsolineTessFactors(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  3556. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3557. hlsl::OP *hlslOP = &helper.hlslOP;
  3558. // Get partition mode
  3559. DXASSERT(helper.functionProps, "");
  3560. DXASSERT(helper.functionProps->shaderKind == ShaderModel::Kind::Hull, "must be hull shader");
  3561. DXIL::TessellatorPartitioning partition = helper.functionProps->ShaderProps.HS.partition;
  3562. IRBuilder<> Builder(CI);
  3563. Value *rawDetailFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRawDetailFactor);
  3564. rawDetailFactor = Builder.CreateExtractElement(rawDetailFactor, (uint64_t)0);
  3565. Value *rawDensityFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRawDensityFactor);
  3566. rawDensityFactor = Builder.CreateExtractElement(rawDensityFactor, (uint64_t)0);
  3567. Value *init = UndefValue::get(VectorType::get(helper.f32Ty, 2));
  3568. init = Builder.CreateInsertElement(init, rawDetailFactor, (uint64_t)0);
  3569. init = Builder.CreateInsertElement(init, rawDetailFactor, (uint64_t)1);
  3570. Value *clamped = ClampTessFactor(init, partition, hlslOP, Builder);
  3571. Value *rounded = RoundUpTessFactor(clamped, partition, hlslOP, Builder);
  3572. Value *roundedDetailFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedDetailFactor);
  3573. Value *temp = UndefValue::get(VectorType::get(helper.f32Ty, 1));
  3574. Value *roundedX = Builder.CreateExtractElement(rounded, (uint64_t)0);
  3575. temp = Builder.CreateInsertElement(temp, roundedX, (uint64_t)0);
  3576. Builder.CreateStore(temp, roundedDetailFactor);
  3577. Value *roundedDensityFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedDensityFactor);
  3578. Value *roundedY = Builder.CreateExtractElement(rounded, 1);
  3579. temp = Builder.CreateInsertElement(temp, roundedY, (uint64_t)0);
  3580. Builder.CreateStore(temp, roundedDensityFactor);
  3581. return nullptr;
  3582. }
  3583. // 3 inputs, 1 result
  3584. Value *ApplyTriTessFactorOp(Value *input, DXIL::OpCode opcode, hlsl::OP *hlslOP,
  3585. IRBuilder<> &Builder) {
  3586. Value *input0 = Builder.CreateExtractElement(input, (uint64_t)0);
  3587. Value *input1 = Builder.CreateExtractElement(input, 1);
  3588. Value *input2 = Builder.CreateExtractElement(input, 2);
  3589. if (opcode == DXIL::OpCode::FMax || opcode == DXIL::OpCode::FMin) {
  3590. Value *temp =
  3591. TrivialDxilBinaryOperation(opcode, input0, input1, hlslOP, Builder);
  3592. Value *combined =
  3593. TrivialDxilBinaryOperation(opcode, temp, input2, hlslOP, Builder);
  3594. return combined;
  3595. } else {
  3596. // Avg.
  3597. Value *temp = Builder.CreateFAdd(input0, input1);
  3598. Value *combined = Builder.CreateFAdd(temp, input2);
  3599. Value *rcp = ConstantFP::get(input0->getType(), 1.0 / 3.0);
  3600. combined = Builder.CreateFMul(combined, rcp);
  3601. return combined;
  3602. }
  3603. }
  3604. // 4 inputs, 1 result
  3605. Value *ApplyQuadTessFactorOp(Value *input, DXIL::OpCode opcode,
  3606. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  3607. Value *input0 = Builder.CreateExtractElement(input, (uint64_t)0);
  3608. Value *input1 = Builder.CreateExtractElement(input, 1);
  3609. Value *input2 = Builder.CreateExtractElement(input, 2);
  3610. Value *input3 = Builder.CreateExtractElement(input, 3);
  3611. if (opcode == DXIL::OpCode::FMax || opcode == DXIL::OpCode::FMin) {
  3612. Value *temp0 =
  3613. TrivialDxilBinaryOperation(opcode, input0, input1, hlslOP, Builder);
  3614. Value *temp1 =
  3615. TrivialDxilBinaryOperation(opcode, input2, input3, hlslOP, Builder);
  3616. Value *combined =
  3617. TrivialDxilBinaryOperation(opcode, temp0, temp1, hlslOP, Builder);
  3618. return combined;
  3619. } else {
  3620. // Avg.
  3621. Value *temp0 = Builder.CreateFAdd(input0, input1);
  3622. Value *temp1 = Builder.CreateFAdd(input2, input3);
  3623. Value *combined = Builder.CreateFAdd(temp0, temp1);
  3624. Value *rcp = ConstantFP::get(input0->getType(), 0.25);
  3625. combined = Builder.CreateFMul(combined, rcp);
  3626. return combined;
  3627. }
  3628. }
  3629. // 4 inputs, 2 result
  3630. Value *Apply2DQuadTessFactorOp(Value *input, DXIL::OpCode opcode,
  3631. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  3632. Value *input0 = Builder.CreateExtractElement(input, (uint64_t)0);
  3633. Value *input1 = Builder.CreateExtractElement(input, 1);
  3634. Value *input2 = Builder.CreateExtractElement(input, 2);
  3635. Value *input3 = Builder.CreateExtractElement(input, 3);
  3636. if (opcode == DXIL::OpCode::FMax || opcode == DXIL::OpCode::FMin) {
  3637. Value *temp0 =
  3638. TrivialDxilBinaryOperation(opcode, input0, input1, hlslOP, Builder);
  3639. Value *temp1 =
  3640. TrivialDxilBinaryOperation(opcode, input2, input3, hlslOP, Builder);
  3641. Value *combined = UndefValue::get(VectorType::get(input0->getType(), 2));
  3642. combined = Builder.CreateInsertElement(combined, temp0, (uint64_t)0);
  3643. combined = Builder.CreateInsertElement(combined, temp1, 1);
  3644. return combined;
  3645. } else {
  3646. // Avg.
  3647. Value *temp0 = Builder.CreateFAdd(input0, input1);
  3648. Value *temp1 = Builder.CreateFAdd(input2, input3);
  3649. Value *combined = UndefValue::get(VectorType::get(input0->getType(), 2));
  3650. combined = Builder.CreateInsertElement(combined, temp0, (uint64_t)0);
  3651. combined = Builder.CreateInsertElement(combined, temp1, 1);
  3652. Constant *rcp = ConstantFP::get(input0->getType(), 0.5);
  3653. rcp = ConstantVector::getSplat(2, rcp);
  3654. combined = Builder.CreateFMul(combined, rcp);
  3655. return combined;
  3656. }
  3657. }
  3658. Value *ResolveSmallValue(Value **pClampedResult, Value *rounded, Value *averageUnscaled,
  3659. float cutoffVal, DXIL::TessellatorPartitioning partitionMode, hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  3660. Value *clampedResult = *pClampedResult;
  3661. Value *clampedVal = clampedResult;
  3662. Value *roundedVal = rounded;
  3663. // Do partitioning-specific clamping.
  3664. Value *clampedAvg = ClampTessFactor(averageUnscaled, partitionMode, hlslOP, Builder);
  3665. Constant *cutoffVals = ConstantFP::get(Type::getFloatTy(rounded->getContext()), cutoffVal);
  3666. if (clampedAvg->getType()->isVectorTy())
  3667. cutoffVals = ConstantVector::getSplat(clampedAvg->getType()->getVectorNumElements(), cutoffVals);
  3668. // Limit the value.
  3669. clampedAvg = TrivialDxilBinaryOperation(DXIL::OpCode::FMin, clampedAvg, cutoffVals, hlslOP, Builder);
  3670. // Round up for integer/pow2 partitioning.
  3671. Value *roundedAvg = RoundUpTessFactor(clampedAvg, partitionMode, hlslOP, Builder);
  3672. if (rounded->getType() != cutoffVals->getType())
  3673. cutoffVals = ConstantVector::getSplat(rounded->getType()->getVectorNumElements(), cutoffVals);
  3674. // If the scaled value is less than three, then take the unscaled average.
  3675. Value *lt = Builder.CreateFCmpOLT(rounded, cutoffVals);
  3676. if (clampedAvg->getType() != clampedVal->getType())
  3677. clampedAvg = SplatToVector(clampedAvg, clampedVal->getType(), Builder);
  3678. *pClampedResult = Builder.CreateSelect(lt, clampedAvg, clampedVal);
  3679. if (roundedAvg->getType() != roundedVal->getType())
  3680. roundedAvg = SplatToVector(roundedAvg, roundedVal->getType(), Builder);
  3681. Value *result = Builder.CreateSelect(lt, roundedAvg, roundedVal);
  3682. return result;
  3683. }
  3684. void ResolveQuadAxes( Value **pFinalResult, Value **pClampedResult,
  3685. float cutoffVal, DXIL::TessellatorPartitioning partitionMode, hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  3686. Value *finalResult = *pFinalResult;
  3687. Value *clampedResult = *pClampedResult;
  3688. Value *clampR = clampedResult;
  3689. Value *finalR = finalResult;
  3690. Type *f32Ty = Type::getFloatTy(finalR->getContext());
  3691. Constant *cutoffVals = ConstantFP::get(f32Ty, cutoffVal);
  3692. Value *minValsX = cutoffVals;
  3693. Value *minValsY = RoundUpTessFactor(cutoffVals, partitionMode, hlslOP, Builder);
  3694. Value *clampRX = Builder.CreateExtractElement(clampR, (uint64_t)0);
  3695. Value *clampRY = Builder.CreateExtractElement(clampR, 1);
  3696. Value *maxValsX = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, clampRX, clampRY, hlslOP, Builder);
  3697. Value *finalRX = Builder.CreateExtractElement(finalR, (uint64_t)0);
  3698. Value *finalRY = Builder.CreateExtractElement(finalR, 1);
  3699. Value *maxValsY = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, finalRX, finalRY, hlslOP, Builder);
  3700. // Don't go over our threshold ("final" one is rounded).
  3701. Value * optionX = TrivialDxilBinaryOperation(DXIL::OpCode::FMin, maxValsX, minValsX, hlslOP, Builder);
  3702. Value * optionY = TrivialDxilBinaryOperation(DXIL::OpCode::FMin, maxValsY, minValsY, hlslOP, Builder);
  3703. Value *clampL = SplatToVector(optionX, clampR->getType(), Builder);
  3704. Value *finalL = SplatToVector(optionY, finalR->getType(), Builder);
  3705. cutoffVals = ConstantVector::getSplat(2, cutoffVals);
  3706. Value *lt = Builder.CreateFCmpOLT(clampedResult, cutoffVals);
  3707. *pClampedResult = Builder.CreateSelect(lt, clampL, clampR);
  3708. *pFinalResult = Builder.CreateSelect(lt, finalL, finalR);
  3709. }
  3710. Value *TranslateProcessTessFactors(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  3711. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3712. hlsl::OP *hlslOP = &helper.hlslOP;
  3713. // Get partition mode
  3714. DXASSERT(helper.functionProps, "");
  3715. DXASSERT(helper.functionProps->shaderKind == ShaderModel::Kind::Hull, "must be hull shader");
  3716. DXIL::TessellatorPartitioning partition = helper.functionProps->ShaderProps.HS.partition;
  3717. IRBuilder<> Builder(CI);
  3718. DXIL::OpCode tessFactorOp = DXIL::OpCode::NumOpCodes;
  3719. switch (IOP) {
  3720. case IntrinsicOp::IOP_Process2DQuadTessFactorsMax:
  3721. case IntrinsicOp::IOP_ProcessQuadTessFactorsMax:
  3722. case IntrinsicOp::IOP_ProcessTriTessFactorsMax:
  3723. tessFactorOp = DXIL::OpCode::FMax;
  3724. break;
  3725. case IntrinsicOp::IOP_Process2DQuadTessFactorsMin:
  3726. case IntrinsicOp::IOP_ProcessQuadTessFactorsMin:
  3727. case IntrinsicOp::IOP_ProcessTriTessFactorsMin:
  3728. tessFactorOp = DXIL::OpCode::FMin;
  3729. break;
  3730. default:
  3731. // Default is Avg.
  3732. break;
  3733. }
  3734. Value *rawEdgeFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRawEdgeFactor);
  3735. Value *insideScale = CI->getArgOperand(HLOperandIndex::kProcessTessFactorInsideScale);
  3736. // Clamp to [0.0f..1.0f], NaN->0.0f.
  3737. Value *scales = CleanupTessFactorScale(insideScale, hlslOP, Builder);
  3738. // Do partitioning-specific clamping.
  3739. Value *clamped = ClampTessFactor(rawEdgeFactor, partition, hlslOP, Builder);
  3740. // Round up for integer/pow2 partitioning.
  3741. Value *rounded = RoundUpTessFactor(clamped, partition, hlslOP, Builder);
  3742. // Store the output.
  3743. Value *roundedEdgeFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedEdgeFactor);
  3744. Builder.CreateStore(rounded, roundedEdgeFactor);
  3745. // Clamp to [1.0f..Inf], NaN->1.0f.
  3746. bool isQuad = false;
  3747. Value *clean = CleanupTessFactor(rawEdgeFactor, hlslOP, Builder);
  3748. Value *factors = nullptr;
  3749. switch (IOP) {
  3750. case IntrinsicOp::IOP_Process2DQuadTessFactorsAvg:
  3751. case IntrinsicOp::IOP_Process2DQuadTessFactorsMax:
  3752. case IntrinsicOp::IOP_Process2DQuadTessFactorsMin:
  3753. factors = Apply2DQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
  3754. break;
  3755. case IntrinsicOp::IOP_ProcessQuadTessFactorsAvg:
  3756. case IntrinsicOp::IOP_ProcessQuadTessFactorsMax:
  3757. case IntrinsicOp::IOP_ProcessQuadTessFactorsMin:
  3758. factors = ApplyQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
  3759. isQuad = true;
  3760. break;
  3761. case IntrinsicOp::IOP_ProcessTriTessFactorsAvg:
  3762. case IntrinsicOp::IOP_ProcessTriTessFactorsMax:
  3763. case IntrinsicOp::IOP_ProcessTriTessFactorsMin:
  3764. factors = ApplyTriTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
  3765. break;
  3766. default:
  3767. DXASSERT(0, "invalid opcode for ProcessTessFactor");
  3768. break;
  3769. }
  3770. Value *scaledI = nullptr;
  3771. if (scales->getType() == factors->getType())
  3772. scaledI = Builder.CreateFMul(factors, scales);
  3773. else {
  3774. Value *vecFactors = SplatToVector(factors, scales->getType(), Builder);
  3775. scaledI = Builder.CreateFMul(vecFactors, scales);
  3776. }
  3777. // Do partitioning-specific clamping.
  3778. Value *clampedI = ClampTessFactor(scaledI, partition, hlslOP, Builder);
  3779. // Round up for integer/pow2 partitioning.
  3780. Value *roundedI = RoundUpTessFactor(clampedI, partition, hlslOP, Builder);
  3781. Value *finalI = roundedI;
  3782. if (partition == DXIL::TessellatorPartitioning::FractionalOdd) {
  3783. // If not max, set to AVG.
  3784. if (tessFactorOp != DXIL::OpCode::FMax)
  3785. tessFactorOp = DXIL::OpCode::NumOpCodes;
  3786. bool b2D = false;
  3787. Value *avgFactorsI = nullptr;
  3788. switch (IOP) {
  3789. case IntrinsicOp::IOP_Process2DQuadTessFactorsAvg:
  3790. case IntrinsicOp::IOP_Process2DQuadTessFactorsMax:
  3791. case IntrinsicOp::IOP_Process2DQuadTessFactorsMin:
  3792. avgFactorsI = Apply2DQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
  3793. b2D = true;
  3794. break;
  3795. case IntrinsicOp::IOP_ProcessQuadTessFactorsAvg:
  3796. case IntrinsicOp::IOP_ProcessQuadTessFactorsMax:
  3797. case IntrinsicOp::IOP_ProcessQuadTessFactorsMin:
  3798. avgFactorsI = ApplyQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
  3799. break;
  3800. case IntrinsicOp::IOP_ProcessTriTessFactorsAvg:
  3801. case IntrinsicOp::IOP_ProcessTriTessFactorsMax:
  3802. case IntrinsicOp::IOP_ProcessTriTessFactorsMin:
  3803. avgFactorsI = ApplyTriTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
  3804. break;
  3805. default:
  3806. DXASSERT(0, "invalid opcode for ProcessTessFactor");
  3807. break;
  3808. }
  3809. finalI =
  3810. ResolveSmallValue(/*inout*/&clampedI, roundedI, avgFactorsI, /*cufoff*/ 3.0,
  3811. partition, hlslOP, Builder);
  3812. if (b2D)
  3813. ResolveQuadAxes(/*inout*/&finalI, /*inout*/&clampedI, /*cutoff*/3.0, partition, hlslOP, Builder);
  3814. }
  3815. Value *unroundedInsideFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorUnRoundedInsideFactor);
  3816. Type *outFactorTy = unroundedInsideFactor->getType()->getPointerElementType();
  3817. if (outFactorTy != clampedI->getType()) {
  3818. DXASSERT(isQuad, "quad only write one channel of out factor");
  3819. clampedI = Builder.CreateExtractElement(clampedI, (uint64_t)0);
  3820. // Splat clampedI to float2.
  3821. clampedI = SplatToVector(clampedI, outFactorTy, Builder);
  3822. }
  3823. Builder.CreateStore(clampedI, unroundedInsideFactor);
  3824. Value *roundedInsideFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedInsideFactor);
  3825. if (outFactorTy != finalI->getType()) {
  3826. DXASSERT(isQuad, "quad only write one channel of out factor");
  3827. finalI = Builder.CreateExtractElement(finalI, (uint64_t)0);
  3828. // Splat finalI to float2.
  3829. finalI = SplatToVector(finalI, outFactorTy, Builder);
  3830. }
  3831. Builder.CreateStore(finalI, roundedInsideFactor);
  3832. return nullptr;
  3833. }
  3834. }
  3835. // Ray Tracing.
  3836. namespace {
  3837. Value *TranslateReportIntersection(CallInst *CI, IntrinsicOp IOP,
  3838. OP::OpCode opcode,
  3839. HLOperationLowerHelper &helper,
  3840. HLObjectOperationLowerHelper *pObjHelper,
  3841. bool &Translated) {
  3842. hlsl::OP *hlslOP = &helper.hlslOP;
  3843. Value *THit = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  3844. Value *HitKind = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  3845. Value *Attr = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  3846. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
  3847. Type *Ty = Attr->getType();
  3848. Function *F = hlslOP->GetOpFunc(opcode, Ty);
  3849. IRBuilder<> Builder(CI);
  3850. return Builder.CreateCall(F, {opArg, THit, HitKind, Attr});
  3851. }
  3852. Value *TranslateCallShader(CallInst *CI, IntrinsicOp IOP,
  3853. OP::OpCode opcode,
  3854. HLOperationLowerHelper &helper,
  3855. HLObjectOperationLowerHelper *pObjHelper,
  3856. bool &Translated) {
  3857. hlsl::OP *hlslOP = &helper.hlslOP;
  3858. Value *ShaderIndex = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  3859. Value *Parameter = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  3860. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
  3861. Type *Ty = Parameter->getType();
  3862. Function *F = hlslOP->GetOpFunc(opcode, Ty);
  3863. IRBuilder<> Builder(CI);
  3864. return Builder.CreateCall(F, {opArg, ShaderIndex, Parameter});
  3865. }
  3866. Value *TranslateTraceRay(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  3867. HLOperationLowerHelper &helper,
  3868. HLObjectOperationLowerHelper *pObjHelper,
  3869. bool &Translated) {
  3870. hlsl::OP *hlslOP = &helper.hlslOP;
  3871. Value *rayDesc = CI->getArgOperand(HLOperandIndex::kTraceRayRayDescOpIdx);
  3872. Value *payLoad = CI->getArgOperand(HLOperandIndex::kTraceRayPayLoadOpIdx);
  3873. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
  3874. Value *Args[DXIL::OperandIndex::kTraceRayNumOp];
  3875. Args[0] = opArg;
  3876. for (unsigned i = 1; i < HLOperandIndex::kTraceRayRayDescOpIdx; i++) {
  3877. Args[i] = CI->getArgOperand(i);
  3878. }
  3879. IRBuilder<> Builder(CI);
  3880. // struct RayDesc
  3881. //{
  3882. // float3 Origin;
  3883. // float TMin;
  3884. // float3 Direction;
  3885. // float TMax;
  3886. //};
  3887. Value *zeroIdx = hlslOP->GetU32Const(0);
  3888. Value *origin = Builder.CreateGEP(rayDesc, {zeroIdx, zeroIdx});
  3889. origin = Builder.CreateLoad(origin);
  3890. unsigned index = DXIL::OperandIndex::kTraceRayRayDescOpIdx;
  3891. Args[index++] = Builder.CreateExtractElement(origin, (uint64_t)0);
  3892. Args[index++] = Builder.CreateExtractElement(origin, 1);
  3893. Args[index++] = Builder.CreateExtractElement(origin, 2);
  3894. Value *tmin = Builder.CreateGEP(rayDesc, {zeroIdx, hlslOP->GetU32Const(1)});
  3895. tmin = Builder.CreateLoad(tmin);
  3896. Args[index++] = tmin;
  3897. Value *direction = Builder.CreateGEP(rayDesc, {zeroIdx, hlslOP->GetU32Const(2)});
  3898. direction = Builder.CreateLoad(direction);
  3899. Args[index++] = Builder.CreateExtractElement(direction, (uint64_t)0);
  3900. Args[index++] = Builder.CreateExtractElement(direction, 1);
  3901. Args[index++] = Builder.CreateExtractElement(direction, 2);
  3902. Value *tmax = Builder.CreateGEP(rayDesc, {zeroIdx, hlslOP->GetU32Const(3)});
  3903. tmax = Builder.CreateLoad(tmax);
  3904. Args[index++] = tmax;
  3905. Args[DXIL::OperandIndex::kTraceRayPayloadOpIdx] = payLoad;
  3906. Type *Ty = payLoad->getType();
  3907. Function *F = hlslOP->GetOpFunc(opcode, Ty);
  3908. return Builder.CreateCall(F, Args);
  3909. }
  3910. Value *TranslateNoArgVectorOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  3911. HLOperationLowerHelper &helper,
  3912. HLObjectOperationLowerHelper *pObjHelper,
  3913. bool &Translated) {
  3914. hlsl::OP *hlslOP = &helper.hlslOP;
  3915. VectorType *Ty = cast<VectorType>(CI->getType());
  3916. uint8_t vals[] = {0,1,2,3};
  3917. Constant *src = ConstantDataVector::get(CI->getContext(), vals);
  3918. Value *retVal = TrivialDxilOperation(opcode, {nullptr, src}, Ty, CI, hlslOP);
  3919. return retVal;
  3920. }
  3921. Value *TranslateNoArgMatrixOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  3922. HLOperationLowerHelper &helper,
  3923. HLObjectOperationLowerHelper *pObjHelper,
  3924. bool &Translated) {
  3925. hlsl::OP *hlslOP = &helper.hlslOP;
  3926. VectorType *Ty = cast<VectorType>(CI->getType());
  3927. uint32_t rVals[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2};
  3928. Constant *rows = ConstantDataVector::get(CI->getContext(), rVals);
  3929. uint8_t cVals[] = {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3};
  3930. Constant *cols = ConstantDataVector::get(CI->getContext(), cVals);
  3931. Value *retVal =
  3932. TrivialDxilOperation(opcode, {nullptr, rows, cols}, Ty, CI, hlslOP);
  3933. return retVal;
  3934. }
  3935. } // namespace
  3936. // Lower table.
  3937. namespace {
  3938. Value *EmptyLower(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode,
  3939. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3940. DXASSERT(0, "unsupported intrinsic");
  3941. return nullptr;
  3942. }
  3943. // SPIRV change starts
  3944. #ifdef ENABLE_SPIRV_CODEGEN
  3945. Value *UnsupportedVulkanIntrinsic(CallInst *CI, IntrinsicOp IOP,
  3946. DXIL::OpCode opcode,
  3947. HLOperationLowerHelper &helper,
  3948. HLObjectOperationLowerHelper *pObjHelper,
  3949. bool &Translated) {
  3950. DXASSERT(0, "unsupported Vulkan intrinsic");
  3951. return nullptr;
  3952. }
  3953. #endif // ENABLE_SPIRV_CODEGEN
  3954. // SPIRV change ends
  3955. Value *StreamOutputLower(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode,
  3956. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3957. // Translated in DxilGenerationPass::GenerateStreamOutputOperation.
  3958. // Do nothing here.
  3959. // Mark not translated.
  3960. Translated = false;
  3961. return nullptr;
  3962. }
  3963. // This table has to match IntrinsicOp orders
  3964. IntrinsicLower gLowerTable[static_cast<unsigned>(IntrinsicOp::Num_Intrinsics)] = {
  3965. {IntrinsicOp::IOP_AcceptHitAndEndSearch, TrivialNoArgOperation, DXIL::OpCode::AcceptHitAndEndSearch},
  3966. {IntrinsicOp::IOP_AddUint64, TranslateAddUint64, DXIL::OpCode::UAddc},
  3967. {IntrinsicOp::IOP_AllMemoryBarrier, TrivialBarrier, DXIL::OpCode::Barrier},
  3968. {IntrinsicOp::IOP_AllMemoryBarrierWithGroupSync, TrivialBarrier, DXIL::OpCode::Barrier},
  3969. {IntrinsicOp::IOP_CallShader, TranslateCallShader, DXIL::OpCode::CallShader},
  3970. {IntrinsicOp::IOP_CheckAccessFullyMapped, TranslateCheckAccess, DXIL::OpCode::CheckAccessFullyMapped},
  3971. {IntrinsicOp::IOP_D3DCOLORtoUBYTE4, TranslateD3DColorToUByte4, DXIL::OpCode::NumOpCodes},
  3972. {IntrinsicOp::IOP_DeviceMemoryBarrier, TrivialBarrier, DXIL::OpCode::Barrier},
  3973. {IntrinsicOp::IOP_DeviceMemoryBarrierWithGroupSync, TrivialBarrier, DXIL::OpCode::Barrier},
  3974. {IntrinsicOp::IOP_DispatchRaysDimensions, TranslateNoArgVectorOperation, DXIL::OpCode::DispatchRaysDimensions},
  3975. {IntrinsicOp::IOP_DispatchRaysIndex, TranslateNoArgVectorOperation, DXIL::OpCode::DispatchRaysIndex},
  3976. {IntrinsicOp::IOP_EvaluateAttributeAtSample, TranslateEvalSample, DXIL::OpCode::NumOpCodes},
  3977. {IntrinsicOp::IOP_EvaluateAttributeCentroid, TranslateEvalCentroid, DXIL::OpCode::EvalCentroid},
  3978. {IntrinsicOp::IOP_EvaluateAttributeSnapped, TranslateEvalSnapped, DXIL::OpCode::NumOpCodes},
  3979. {IntrinsicOp::IOP_GetAttributeAtVertex, TranslateGetAttributeAtVertex, DXIL::OpCode::AttributeAtVertex},
  3980. {IntrinsicOp::IOP_GetRenderTargetSampleCount, TrivialNoArgOperation, DXIL::OpCode::RenderTargetGetSampleCount},
  3981. {IntrinsicOp::IOP_GetRenderTargetSamplePosition, TranslateGetRTSamplePos, DXIL::OpCode::NumOpCodes},
  3982. {IntrinsicOp::IOP_GroupMemoryBarrier, TrivialBarrier, DXIL::OpCode::Barrier},
  3983. {IntrinsicOp::IOP_GroupMemoryBarrierWithGroupSync, TrivialBarrier, DXIL::OpCode::Barrier},
  3984. {IntrinsicOp::IOP_HitKind, TrivialNoArgWithRetOperation, DXIL::OpCode::HitKind},
  3985. {IntrinsicOp::IOP_IgnoreHit, TrivialNoArgOperation, DXIL::OpCode::IgnoreHit},
  3986. {IntrinsicOp::IOP_InstanceID, TrivialNoArgWithRetOperation, DXIL::OpCode::InstanceID},
  3987. {IntrinsicOp::IOP_InstanceIndex, TrivialNoArgWithRetOperation, DXIL::OpCode::InstanceIndex},
  3988. {IntrinsicOp::IOP_InterlockedAdd, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  3989. {IntrinsicOp::IOP_InterlockedAnd, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  3990. {IntrinsicOp::IOP_InterlockedCompareExchange, TranslateIopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
  3991. {IntrinsicOp::IOP_InterlockedCompareStore, TranslateIopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
  3992. {IntrinsicOp::IOP_InterlockedExchange, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  3993. {IntrinsicOp::IOP_InterlockedMax, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  3994. {IntrinsicOp::IOP_InterlockedMin, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  3995. {IntrinsicOp::IOP_InterlockedOr, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  3996. {IntrinsicOp::IOP_InterlockedXor, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  3997. {IntrinsicOp::IOP_NonUniformResourceIndex, TranslateNonUniformResourceIndex, DXIL::OpCode::NumOpCodes},
  3998. {IntrinsicOp::IOP_ObjectRayDirection, TranslateNoArgVectorOperation, DXIL::OpCode::ObjectRayDirection},
  3999. {IntrinsicOp::IOP_ObjectRayOrigin, TranslateNoArgVectorOperation, DXIL::OpCode::ObjectRayOrigin},
  4000. {IntrinsicOp::IOP_ObjectToWorld, TranslateNoArgMatrixOperation, DXIL::OpCode::ObjectToWorld},
  4001. {IntrinsicOp::IOP_PrimitiveIndex, TrivialNoArgWithRetOperation, DXIL::OpCode::PrimitiveID},
  4002. {IntrinsicOp::IOP_Process2DQuadTessFactorsAvg, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4003. {IntrinsicOp::IOP_Process2DQuadTessFactorsMax, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4004. {IntrinsicOp::IOP_Process2DQuadTessFactorsMin, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4005. {IntrinsicOp::IOP_ProcessIsolineTessFactors, TranslateProcessIsolineTessFactors, DXIL::OpCode::NumOpCodes},
  4006. {IntrinsicOp::IOP_ProcessQuadTessFactorsAvg, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4007. {IntrinsicOp::IOP_ProcessQuadTessFactorsMax, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4008. {IntrinsicOp::IOP_ProcessQuadTessFactorsMin, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4009. {IntrinsicOp::IOP_ProcessTriTessFactorsAvg, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4010. {IntrinsicOp::IOP_ProcessTriTessFactorsMax, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4011. {IntrinsicOp::IOP_ProcessTriTessFactorsMin, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4012. {IntrinsicOp::IOP_QuadReadAcrossDiagonal, TranslateQuadReadAcross, DXIL::OpCode::QuadOp},
  4013. {IntrinsicOp::IOP_QuadReadAcrossX, TranslateQuadReadAcross, DXIL::OpCode::QuadOp},
  4014. {IntrinsicOp::IOP_QuadReadAcrossY, TranslateQuadReadAcross, DXIL::OpCode::QuadOp},
  4015. {IntrinsicOp::IOP_QuadReadLaneAt, TranslateQuadReadLaneAt, DXIL::OpCode::NumOpCodes},
  4016. {IntrinsicOp::IOP_RayFlags, TrivialNoArgWithRetOperation, DXIL::OpCode::RayFlags},
  4017. {IntrinsicOp::IOP_RayTCurrent, TrivialNoArgWithRetOperation, DXIL::OpCode::RayTCurrent},
  4018. {IntrinsicOp::IOP_RayTMin, TrivialNoArgWithRetOperation, DXIL::OpCode::RayTMin},
  4019. {IntrinsicOp::IOP_ReportHit, TranslateReportIntersection, DXIL::OpCode::ReportHit},
  4020. {IntrinsicOp::IOP_TraceRay, TranslateTraceRay, DXIL::OpCode::TraceRay},
  4021. {IntrinsicOp::IOP_WaveActiveAllEqual, TranslateWaveAllEqual, DXIL::OpCode::WaveActiveAllEqual},
  4022. {IntrinsicOp::IOP_WaveActiveAllTrue, TranslateWaveA2B, DXIL::OpCode::WaveAllTrue},
  4023. {IntrinsicOp::IOP_WaveActiveAnyTrue, TranslateWaveA2B, DXIL::OpCode::WaveAnyTrue},
  4024. {IntrinsicOp::IOP_WaveActiveBallot, TranslateWaveBallot, DXIL::OpCode::WaveActiveBallot},
  4025. {IntrinsicOp::IOP_WaveActiveBitAnd, TranslateWaveA2A, DXIL::OpCode::WaveActiveBit},
  4026. {IntrinsicOp::IOP_WaveActiveBitOr, TranslateWaveA2A, DXIL::OpCode::WaveActiveBit},
  4027. {IntrinsicOp::IOP_WaveActiveBitXor, TranslateWaveA2A, DXIL::OpCode::WaveActiveBit},
  4028. {IntrinsicOp::IOP_WaveActiveCountBits, TranslateWaveA2B, DXIL::OpCode::WaveAllBitCount},
  4029. {IntrinsicOp::IOP_WaveActiveMax, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp},
  4030. {IntrinsicOp::IOP_WaveActiveMin, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp},
  4031. {IntrinsicOp::IOP_WaveActiveProduct, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp},
  4032. {IntrinsicOp::IOP_WaveActiveSum, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp},
  4033. {IntrinsicOp::IOP_WaveGetLaneCount, TranslateWaveToVal, DXIL::OpCode::WaveGetLaneCount},
  4034. {IntrinsicOp::IOP_WaveGetLaneIndex, TranslateWaveToVal, DXIL::OpCode::WaveGetLaneIndex},
  4035. {IntrinsicOp::IOP_WaveIsFirstLane, TranslateWaveToVal, DXIL::OpCode::WaveIsFirstLane},
  4036. {IntrinsicOp::IOP_WavePrefixCountBits, TranslateWaveA2B, DXIL::OpCode::WavePrefixBitCount},
  4037. {IntrinsicOp::IOP_WavePrefixProduct, TranslateWaveA2A, DXIL::OpCode::WavePrefixOp},
  4038. {IntrinsicOp::IOP_WavePrefixSum, TranslateWaveA2A, DXIL::OpCode::WavePrefixOp},
  4039. {IntrinsicOp::IOP_WaveReadLaneAt, TranslateWaveReadLaneAt, DXIL::OpCode::WaveReadLaneAt},
  4040. {IntrinsicOp::IOP_WaveReadLaneFirst, TranslateWaveReadLaneFirst, DXIL::OpCode::WaveReadLaneFirst},
  4041. {IntrinsicOp::IOP_WorldRayDirection, TranslateNoArgVectorOperation, DXIL::OpCode::WorldRayDirection},
  4042. {IntrinsicOp::IOP_WorldRayOrigin, TranslateNoArgVectorOperation, DXIL::OpCode::WorldRayOrigin},
  4043. {IntrinsicOp::IOP_WorldToObject, TranslateNoArgMatrixOperation, DXIL::OpCode::WorldToObject},
  4044. {IntrinsicOp::IOP_abort, EmptyLower, DXIL::OpCode::NumOpCodes},
  4045. {IntrinsicOp::IOP_abs, TransalteAbs, DXIL::OpCode::NumOpCodes},
  4046. {IntrinsicOp::IOP_acos, TrivialUnaryOperation, DXIL::OpCode::Acos},
  4047. {IntrinsicOp::IOP_all, TranslateAll, DXIL::OpCode::NumOpCodes},
  4048. {IntrinsicOp::IOP_any, TranslateAny, DXIL::OpCode::NumOpCodes},
  4049. {IntrinsicOp::IOP_asdouble, TranslateAsDouble, DXIL::OpCode::MakeDouble},
  4050. {IntrinsicOp::IOP_asfloat, TranslateBitcast, DXIL::OpCode::NumOpCodes},
  4051. {IntrinsicOp::IOP_asfloat16, TranslateBitcast, DXIL::OpCode::NumOpCodes},
  4052. {IntrinsicOp::IOP_asin, TrivialUnaryOperation, DXIL::OpCode::Asin},
  4053. {IntrinsicOp::IOP_asint, TranslateBitcast, DXIL::OpCode::NumOpCodes},
  4054. {IntrinsicOp::IOP_asint16, TranslateBitcast, DXIL::OpCode::NumOpCodes},
  4055. {IntrinsicOp::IOP_asuint, TranslateAsUint, DXIL::OpCode::SplitDouble},
  4056. {IntrinsicOp::IOP_asuint16, TranslateAsUint, DXIL::OpCode::NumOpCodes},
  4057. {IntrinsicOp::IOP_atan, TrivialUnaryOperation, DXIL::OpCode::Atan},
  4058. {IntrinsicOp::IOP_atan2, TranslateAtan2, DXIL::OpCode::NumOpCodes},
  4059. {IntrinsicOp::IOP_ceil, TrivialUnaryOperation, DXIL::OpCode::Round_pi},
  4060. {IntrinsicOp::IOP_clamp, TranslateClamp, DXIL::OpCode::NumOpCodes},
  4061. {IntrinsicOp::IOP_clip, TranslateClip, DXIL::OpCode::NumOpCodes},
  4062. {IntrinsicOp::IOP_cos, TrivialUnaryOperation, DXIL::OpCode::Cos},
  4063. {IntrinsicOp::IOP_cosh, TrivialUnaryOperation, DXIL::OpCode::Hcos},
  4064. {IntrinsicOp::IOP_countbits, TrivialUnaryOperation, DXIL::OpCode::Countbits},
  4065. {IntrinsicOp::IOP_cross, TranslateCross, DXIL::OpCode::NumOpCodes},
  4066. {IntrinsicOp::IOP_ddx, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseX},
  4067. {IntrinsicOp::IOP_ddx_coarse, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseX},
  4068. {IntrinsicOp::IOP_ddx_fine, TrivialUnaryOperation, DXIL::OpCode::DerivFineX},
  4069. {IntrinsicOp::IOP_ddy, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseY},
  4070. {IntrinsicOp::IOP_ddy_coarse, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseY},
  4071. {IntrinsicOp::IOP_ddy_fine, TrivialUnaryOperation, DXIL::OpCode::DerivFineY},
  4072. {IntrinsicOp::IOP_degrees, TranslateDegrees, DXIL::OpCode::NumOpCodes},
  4073. {IntrinsicOp::IOP_determinant, EmptyLower, DXIL::OpCode::NumOpCodes},
  4074. {IntrinsicOp::IOP_distance, TranslateDistance, DXIL::OpCode::NumOpCodes},
  4075. {IntrinsicOp::IOP_dot, TranslateDot, DXIL::OpCode::NumOpCodes},
  4076. {IntrinsicOp::IOP_dst, TranslateDst, DXIL::OpCode::NumOpCodes},
  4077. {IntrinsicOp::IOP_exp, TranslateExp, DXIL::OpCode::NumOpCodes},
  4078. {IntrinsicOp::IOP_exp2, TrivialUnaryOperation, DXIL::OpCode::Exp},
  4079. {IntrinsicOp::IOP_f16tof32, TranslateF16ToF32, DXIL::OpCode::LegacyF16ToF32},
  4080. {IntrinsicOp::IOP_f32tof16, TranslateF32ToF16, DXIL::OpCode::LegacyF32ToF16},
  4081. {IntrinsicOp::IOP_faceforward, TranslateFaceforward, DXIL::OpCode::NumOpCodes},
  4082. {IntrinsicOp::IOP_firstbithigh, TranslateFirstbitHi, DXIL::OpCode::FirstbitSHi},
  4083. {IntrinsicOp::IOP_firstbitlow, TranslateFirstbitLo, DXIL::OpCode::FirstbitLo},
  4084. {IntrinsicOp::IOP_floor, TrivialUnaryOperation, DXIL::OpCode::Round_ni},
  4085. {IntrinsicOp::IOP_fma, TrivialTrinaryOperation, DXIL::OpCode::Fma},
  4086. {IntrinsicOp::IOP_fmod, TranslateFMod, DXIL::OpCode::NumOpCodes},
  4087. {IntrinsicOp::IOP_frac, TrivialUnaryOperation, DXIL::OpCode::Frc},
  4088. {IntrinsicOp::IOP_frexp, TranslateFrexp, DXIL::OpCode::NumOpCodes},
  4089. {IntrinsicOp::IOP_fwidth, TranslateFWidth, DXIL::OpCode::NumOpCodes},
  4090. {IntrinsicOp::IOP_isfinite, TrivialIsSpecialFloat, DXIL::OpCode::IsFinite},
  4091. {IntrinsicOp::IOP_isinf, TrivialIsSpecialFloat, DXIL::OpCode::IsInf},
  4092. {IntrinsicOp::IOP_isnan, TrivialIsSpecialFloat, DXIL::OpCode::IsNaN},
  4093. {IntrinsicOp::IOP_ldexp, TranslateLdExp, DXIL::OpCode::NumOpCodes},
  4094. {IntrinsicOp::IOP_length, TranslateLength, DXIL::OpCode::NumOpCodes},
  4095. {IntrinsicOp::IOP_lerp, TranslateLerp, DXIL::OpCode::NumOpCodes},
  4096. {IntrinsicOp::IOP_lit, TranslateLit, DXIL::OpCode::NumOpCodes},
  4097. {IntrinsicOp::IOP_log, TranslateLog, DXIL::OpCode::NumOpCodes},
  4098. {IntrinsicOp::IOP_log10, TranslateLog10, DXIL::OpCode::NumOpCodes},
  4099. {IntrinsicOp::IOP_log2, TrivialUnaryOperation, DXIL::OpCode::Log},
  4100. {IntrinsicOp::IOP_mad, TranslateFUITrinary, DXIL::OpCode::IMad},
  4101. {IntrinsicOp::IOP_max, TranslateFUIBinary, DXIL::OpCode::IMax},
  4102. {IntrinsicOp::IOP_min, TranslateFUIBinary, DXIL::OpCode::IMin},
  4103. {IntrinsicOp::IOP_modf, TranslateModF, DXIL::OpCode::NumOpCodes},
  4104. {IntrinsicOp::IOP_msad4, TranslateMSad4, DXIL::OpCode::NumOpCodes},
  4105. {IntrinsicOp::IOP_mul, EmptyLower, DXIL::OpCode::NumOpCodes},
  4106. {IntrinsicOp::IOP_normalize, TranslateNormalize, DXIL::OpCode::NumOpCodes},
  4107. {IntrinsicOp::IOP_pow, TranslatePow, DXIL::OpCode::NumOpCodes},
  4108. {IntrinsicOp::IOP_radians, TranslateRadians, DXIL::OpCode::NumOpCodes},
  4109. {IntrinsicOp::IOP_rcp, TranslateRCP, DXIL::OpCode::NumOpCodes},
  4110. {IntrinsicOp::IOP_reflect, TranslateReflect, DXIL::OpCode::NumOpCodes},
  4111. {IntrinsicOp::IOP_refract, TranslateRefract, DXIL::OpCode::NumOpCodes},
  4112. {IntrinsicOp::IOP_reversebits, TrivialUnaryOperation, DXIL::OpCode::Bfrev},
  4113. {IntrinsicOp::IOP_round, TrivialUnaryOperation, DXIL::OpCode::Round_ne},
  4114. {IntrinsicOp::IOP_rsqrt, TrivialUnaryOperation, DXIL::OpCode::Rsqrt},
  4115. {IntrinsicOp::IOP_saturate, TrivialUnaryOperation, DXIL::OpCode::Saturate},
  4116. {IntrinsicOp::IOP_sign, TranslateSign, DXIL::OpCode::NumOpCodes},
  4117. {IntrinsicOp::IOP_sin, TrivialUnaryOperation, DXIL::OpCode::Sin},
  4118. {IntrinsicOp::IOP_sincos, EmptyLower, DXIL::OpCode::NumOpCodes},
  4119. {IntrinsicOp::IOP_sinh, TrivialUnaryOperation, DXIL::OpCode::Hsin},
  4120. {IntrinsicOp::IOP_smoothstep, TranslateSmoothStep, DXIL::OpCode::NumOpCodes},
  4121. {IntrinsicOp::IOP_source_mark, EmptyLower, DXIL::OpCode::NumOpCodes},
  4122. {IntrinsicOp::IOP_sqrt, TrivialUnaryOperation, DXIL::OpCode::Sqrt},
  4123. {IntrinsicOp::IOP_step, TranslateStep, DXIL::OpCode::NumOpCodes},
  4124. {IntrinsicOp::IOP_tan, TrivialUnaryOperation, DXIL::OpCode::Tan},
  4125. {IntrinsicOp::IOP_tanh, TrivialUnaryOperation, DXIL::OpCode::Htan},
  4126. {IntrinsicOp::IOP_tex1D, EmptyLower, DXIL::OpCode::NumOpCodes},
  4127. {IntrinsicOp::IOP_tex1Dbias, EmptyLower, DXIL::OpCode::NumOpCodes},
  4128. {IntrinsicOp::IOP_tex1Dgrad, EmptyLower, DXIL::OpCode::NumOpCodes},
  4129. {IntrinsicOp::IOP_tex1Dlod, EmptyLower, DXIL::OpCode::NumOpCodes},
  4130. {IntrinsicOp::IOP_tex1Dproj, EmptyLower, DXIL::OpCode::NumOpCodes},
  4131. {IntrinsicOp::IOP_tex2D, EmptyLower, DXIL::OpCode::NumOpCodes},
  4132. {IntrinsicOp::IOP_tex2Dbias, EmptyLower, DXIL::OpCode::NumOpCodes},
  4133. {IntrinsicOp::IOP_tex2Dgrad, EmptyLower, DXIL::OpCode::NumOpCodes},
  4134. {IntrinsicOp::IOP_tex2Dlod, EmptyLower, DXIL::OpCode::NumOpCodes},
  4135. {IntrinsicOp::IOP_tex2Dproj, EmptyLower, DXIL::OpCode::NumOpCodes},
  4136. {IntrinsicOp::IOP_tex3D, EmptyLower, DXIL::OpCode::NumOpCodes},
  4137. {IntrinsicOp::IOP_tex3Dbias, EmptyLower, DXIL::OpCode::NumOpCodes},
  4138. {IntrinsicOp::IOP_tex3Dgrad, EmptyLower, DXIL::OpCode::NumOpCodes},
  4139. {IntrinsicOp::IOP_tex3Dlod, EmptyLower, DXIL::OpCode::NumOpCodes},
  4140. {IntrinsicOp::IOP_tex3Dproj, EmptyLower, DXIL::OpCode::NumOpCodes},
  4141. {IntrinsicOp::IOP_texCUBE, EmptyLower, DXIL::OpCode::NumOpCodes},
  4142. {IntrinsicOp::IOP_texCUBEbias, EmptyLower, DXIL::OpCode::NumOpCodes},
  4143. {IntrinsicOp::IOP_texCUBEgrad, EmptyLower, DXIL::OpCode::NumOpCodes},
  4144. {IntrinsicOp::IOP_texCUBElod, EmptyLower, DXIL::OpCode::NumOpCodes},
  4145. {IntrinsicOp::IOP_texCUBEproj, EmptyLower, DXIL::OpCode::NumOpCodes},
  4146. {IntrinsicOp::IOP_transpose, EmptyLower, DXIL::OpCode::NumOpCodes},
  4147. {IntrinsicOp::IOP_trunc, TrivialUnaryOperation, DXIL::OpCode::Round_z},
  4148. {IntrinsicOp::MOP_Append, StreamOutputLower, DXIL::OpCode::EmitStream},
  4149. {IntrinsicOp::MOP_RestartStrip, StreamOutputLower, DXIL::OpCode::CutStream},
  4150. {IntrinsicOp::MOP_CalculateLevelOfDetail, TranslateCalculateLOD, DXIL::OpCode::NumOpCodes},
  4151. {IntrinsicOp::MOP_CalculateLevelOfDetailUnclamped, TranslateCalculateLOD, DXIL::OpCode::NumOpCodes},
  4152. {IntrinsicOp::MOP_GetDimensions, TranslateGetDimensions, DXIL::OpCode::NumOpCodes},
  4153. {IntrinsicOp::MOP_Load, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
  4154. {IntrinsicOp::MOP_Sample, TranslateSample, DXIL::OpCode::Sample},
  4155. {IntrinsicOp::MOP_SampleBias, TranslateSample, DXIL::OpCode::SampleBias},
  4156. {IntrinsicOp::MOP_SampleCmp, TranslateSample, DXIL::OpCode::SampleCmp},
  4157. {IntrinsicOp::MOP_SampleCmpLevelZero, TranslateSample, DXIL::OpCode::SampleCmpLevelZero},
  4158. {IntrinsicOp::MOP_SampleGrad, TranslateSample, DXIL::OpCode::SampleGrad},
  4159. {IntrinsicOp::MOP_SampleLevel, TranslateSample, DXIL::OpCode::SampleLevel},
  4160. {IntrinsicOp::MOP_Gather, TranslateGather, DXIL::OpCode::TextureGather},
  4161. {IntrinsicOp::MOP_GatherAlpha, TranslateGather, DXIL::OpCode::TextureGather},
  4162. {IntrinsicOp::MOP_GatherBlue, TranslateGather, DXIL::OpCode::TextureGather},
  4163. {IntrinsicOp::MOP_GatherCmp, TranslateGather, DXIL::OpCode::TextureGatherCmp},
  4164. {IntrinsicOp::MOP_GatherCmpAlpha, TranslateGather, DXIL::OpCode::TextureGatherCmp},
  4165. {IntrinsicOp::MOP_GatherCmpBlue, TranslateGather, DXIL::OpCode::TextureGatherCmp},
  4166. {IntrinsicOp::MOP_GatherCmpGreen, TranslateGather, DXIL::OpCode::TextureGatherCmp},
  4167. {IntrinsicOp::MOP_GatherCmpRed, TranslateGather, DXIL::OpCode::TextureGatherCmp},
  4168. {IntrinsicOp::MOP_GatherGreen, TranslateGather, DXIL::OpCode::TextureGather},
  4169. {IntrinsicOp::MOP_GatherRed, TranslateGather, DXIL::OpCode::TextureGather},
  4170. {IntrinsicOp::MOP_GetSamplePosition, TranslateGetSamplePosition, DXIL::OpCode::NumOpCodes},
  4171. {IntrinsicOp::MOP_Load2, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
  4172. {IntrinsicOp::MOP_Load3, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
  4173. {IntrinsicOp::MOP_Load4, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
  4174. {IntrinsicOp::MOP_InterlockedAdd, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4175. {IntrinsicOp::MOP_InterlockedAnd, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4176. {IntrinsicOp::MOP_InterlockedCompareExchange, TranslateMopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
  4177. {IntrinsicOp::MOP_InterlockedCompareStore, TranslateMopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
  4178. {IntrinsicOp::MOP_InterlockedExchange, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4179. {IntrinsicOp::MOP_InterlockedMax, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4180. {IntrinsicOp::MOP_InterlockedMin, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4181. {IntrinsicOp::MOP_InterlockedOr, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4182. {IntrinsicOp::MOP_InterlockedXor, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4183. {IntrinsicOp::MOP_Store, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
  4184. {IntrinsicOp::MOP_Store2, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
  4185. {IntrinsicOp::MOP_Store3, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
  4186. {IntrinsicOp::MOP_Store4, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
  4187. {IntrinsicOp::MOP_DecrementCounter, GenerateUpdateCounter, DXIL::OpCode::NumOpCodes},
  4188. {IntrinsicOp::MOP_IncrementCounter, GenerateUpdateCounter, DXIL::OpCode::NumOpCodes},
  4189. {IntrinsicOp::MOP_Consume, EmptyLower, DXIL::OpCode::NumOpCodes},
  4190. // SPIRV change starts
  4191. #ifdef ENABLE_SPIRV_CODEGEN
  4192. {IntrinsicOp::MOP_SubpassLoad, UnsupportedVulkanIntrinsic, DXIL::OpCode::NumOpCodes},
  4193. #endif // ENABLE_SPIRV_CODEGEN
  4194. // SPIRV change ends
  4195. // Manully added part.
  4196. { IntrinsicOp::IOP_InterlockedUMax, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes },
  4197. { IntrinsicOp::IOP_InterlockedUMin, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes },
  4198. { IntrinsicOp::IOP_WaveActiveUMax, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp },
  4199. { IntrinsicOp::IOP_WaveActiveUMin, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp },
  4200. { IntrinsicOp::IOP_WaveActiveUProduct, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp },
  4201. { IntrinsicOp::IOP_WaveActiveUSum, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp },
  4202. { IntrinsicOp::IOP_WavePrefixUProduct, TranslateWaveA2A, DXIL::OpCode::WavePrefixOp },
  4203. { IntrinsicOp::IOP_WavePrefixUSum, TranslateWaveA2A, DXIL::OpCode::WavePrefixOp },
  4204. { IntrinsicOp::IOP_uclamp, TranslateClamp, DXIL::OpCode::NumOpCodes },
  4205. { IntrinsicOp::IOP_ufirstbithigh, TranslateFirstbitHi, DXIL::OpCode::FirstbitHi },
  4206. { IntrinsicOp::IOP_umad, TranslateFUITrinary, DXIL::OpCode::UMad},
  4207. { IntrinsicOp::IOP_umax, TranslateFUIBinary, DXIL::OpCode::UMax},
  4208. { IntrinsicOp::IOP_umin, TranslateFUIBinary, DXIL::OpCode::UMin },
  4209. { IntrinsicOp::IOP_umul, TranslateFUIBinary, DXIL::OpCode::UMul },
  4210. { IntrinsicOp::MOP_InterlockedUMax, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes },
  4211. { IntrinsicOp::MOP_InterlockedUMin, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes },
  4212. };
  4213. }
  4214. static void TranslateBuiltinIntrinsic(CallInst *CI,
  4215. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  4216. unsigned opcode = hlsl::GetHLOpcode(CI);
  4217. const IntrinsicLower &lower = gLowerTable[opcode];
  4218. Value *Result =
  4219. lower.LowerFunc(CI, lower.IntriOpcode, lower.DxilOpcode, helper, pObjHelper, Translated);
  4220. if (Result)
  4221. CI->replaceAllUsesWith(Result);
  4222. }
  4223. // SharedMem.
  4224. namespace {
  4225. bool IsSharedMemPtr(Value *Ptr) {
  4226. return Ptr->getType()->getPointerAddressSpace() == DXIL::kTGSMAddrSpace;
  4227. }
  4228. bool IsLocalVariablePtr(Value *Ptr) {
  4229. while (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
  4230. Ptr = GEP->getPointerOperand();
  4231. }
  4232. bool isAlloca = isa<AllocaInst>(Ptr);
  4233. if (isAlloca) return true;
  4234. GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr);
  4235. if (!GV) return false;
  4236. return GV->getLinkage() == GlobalValue::LinkageTypes::InternalLinkage;
  4237. }
  4238. }
  4239. // Constant buffer.
  4240. namespace {
  4241. unsigned GetEltTypeByteSizeForConstBuf(Type *EltType, const DataLayout &DL) {
  4242. DXASSERT(EltType->isIntegerTy() || EltType->isFloatingPointTy(),
  4243. "not an element type");
  4244. // TODO: Use real size after change constant buffer into linear layout.
  4245. if (DL.getTypeSizeInBits(EltType) <= 32) {
  4246. // Constant buffer is 4 bytes align.
  4247. return 4;
  4248. } else
  4249. return 8;
  4250. }
  4251. Value *GenerateCBLoad(Value *handle, Value *offset, Type *EltTy, OP *hlslOP,
  4252. IRBuilder<> &Builder) {
  4253. Constant *OpArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CBufferLoad);
  4254. // Align to 8 bytes for now.
  4255. Constant *align = hlslOP->GetU32Const(8);
  4256. Type *i1Ty = Type::getInt1Ty(EltTy->getContext());
  4257. if (EltTy != i1Ty) {
  4258. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoad, EltTy);
  4259. return Builder.CreateCall(CBLoad, {OpArg, handle, offset, align});
  4260. } else {
  4261. Type *i32Ty = Type::getInt32Ty(EltTy->getContext());
  4262. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoad, i32Ty);
  4263. Value *Result = Builder.CreateCall(CBLoad, {OpArg, handle, offset, align});
  4264. return Builder.CreateICmpEQ(Result, hlslOP->GetU32Const(0));
  4265. }
  4266. }
  4267. Value *TranslateConstBufMatLd(Type *matType, Value *handle, Value *offset,
  4268. bool colMajor, OP *OP, const DataLayout &DL,
  4269. IRBuilder<> &Builder) {
  4270. unsigned col, row;
  4271. Type *EltTy = HLMatrixLower::GetMatrixInfo(matType, col, row);
  4272. unsigned matSize = col * row;
  4273. std::vector<Value *> elts(matSize);
  4274. Value *EltByteSize = ConstantInt::get(
  4275. offset->getType(), GetEltTypeByteSizeForConstBuf(EltTy, DL));
  4276. // TODO: use real size after change constant buffer into linear layout.
  4277. Value *baseOffset = offset;
  4278. for (unsigned i = 0; i < matSize; i++) {
  4279. elts[i] = GenerateCBLoad(handle, baseOffset, EltTy, OP, Builder);
  4280. baseOffset = Builder.CreateAdd(baseOffset, EltByteSize);
  4281. }
  4282. return HLMatrixLower::BuildVector(EltTy, col * row, elts, Builder);
  4283. }
  4284. void TranslateCBGep(GetElementPtrInst *GEP, Value *handle, Value *baseOffset,
  4285. hlsl::OP *hlslOP, IRBuilder<> &Builder,
  4286. DxilFieldAnnotation *prevFieldAnnotation,
  4287. const DataLayout &DL, DxilTypeSystem &dxilTypeSys);
  4288. Value *GenerateVecEltFromGEP(Value *ldData, GetElementPtrInst *GEP,
  4289. IRBuilder<> &Builder) {
  4290. DXASSERT(GEP->getNumIndices() == 2, "must have 2 level");
  4291. Value *baseIdx = (GEP->idx_begin())->get();
  4292. Value *zeroIdx = Builder.getInt32(0);
  4293. DXASSERT_LOCALVAR(baseIdx && zeroIdx, baseIdx == zeroIdx,
  4294. "base index must be 0");
  4295. Value *idx = (GEP->idx_begin() + 1)->get();
  4296. if (ConstantInt *cidx = dyn_cast<ConstantInt>(idx)) {
  4297. return Builder.CreateExtractElement(ldData, idx);
  4298. } else {
  4299. // Dynamic indexing.
  4300. // Copy vec to array.
  4301. Type *Ty = ldData->getType();
  4302. Type *EltTy = Ty->getVectorElementType();
  4303. unsigned vecSize = Ty->getVectorNumElements();
  4304. ArrayType *AT = ArrayType::get(EltTy, vecSize);
  4305. IRBuilder<> AllocaBuilder(
  4306. GEP->getParent()->getParent()->getEntryBlock().getFirstInsertionPt());
  4307. Value *tempArray = AllocaBuilder.CreateAlloca(AT);
  4308. Value *zero = Builder.getInt32(0);
  4309. for (unsigned int i = 0; i < vecSize; i++) {
  4310. Value *Elt = Builder.CreateExtractElement(ldData, Builder.getInt32(i));
  4311. Value *Ptr =
  4312. Builder.CreateInBoundsGEP(tempArray, {zero, Builder.getInt32(i)});
  4313. Builder.CreateStore(Elt, Ptr);
  4314. }
  4315. // Load from temp array.
  4316. Value *EltGEP = Builder.CreateInBoundsGEP(tempArray, {zero, idx});
  4317. return Builder.CreateLoad(EltGEP);
  4318. }
  4319. }
  4320. void TranslateCBAddressUser(Instruction *user, Value *handle, Value *baseOffset,
  4321. hlsl::OP *hlslOP,
  4322. DxilFieldAnnotation *prevFieldAnnotation,
  4323. DxilTypeSystem &dxilTypeSys, const DataLayout &DL) {
  4324. IRBuilder<> Builder(user);
  4325. if (CallInst *CI = dyn_cast<CallInst>(user)) {
  4326. HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
  4327. unsigned opcode = GetHLOpcode(CI);
  4328. if (group == HLOpcodeGroup::HLMatLoadStore) {
  4329. HLMatLoadStoreOpcode matOp = static_cast<HLMatLoadStoreOpcode>(opcode);
  4330. bool colMajor = matOp == HLMatLoadStoreOpcode::ColMatLoad;
  4331. DXASSERT(matOp == HLMatLoadStoreOpcode::ColMatLoad ||
  4332. matOp == HLMatLoadStoreOpcode::RowMatLoad,
  4333. "No store on cbuffer");
  4334. Type *matType = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx)
  4335. ->getType()
  4336. ->getPointerElementType();
  4337. Value *newLd = TranslateConstBufMatLd(matType, handle, baseOffset,
  4338. colMajor, hlslOP, DL, Builder);
  4339. CI->replaceAllUsesWith(newLd);
  4340. CI->eraseFromParent();
  4341. } else if (group == HLOpcodeGroup::HLSubscript) {
  4342. HLSubscriptOpcode subOp = static_cast<HLSubscriptOpcode>(opcode);
  4343. Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx);
  4344. Type *matType = basePtr->getType()->getPointerElementType();
  4345. unsigned col, row;
  4346. Type *EltTy = HLMatrixLower::GetMatrixInfo(matType, col, row);
  4347. Value *EltByteSize = ConstantInt::get(
  4348. baseOffset->getType(), GetEltTypeByteSizeForConstBuf(EltTy, DL));
  4349. Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx);
  4350. Type *resultType = CI->getType()->getPointerElementType();
  4351. unsigned resultSize = 1;
  4352. if (resultType->isVectorTy())
  4353. resultSize = resultType->getVectorNumElements();
  4354. DXASSERT(resultSize <= 16, "up to 4x4 elements in vector or matrix");
  4355. _Analysis_assume_(resultSize <= 16);
  4356. Value *idxList[16];
  4357. switch (subOp) {
  4358. case HLSubscriptOpcode::ColMatSubscript:
  4359. case HLSubscriptOpcode::RowMatSubscript: {
  4360. for (unsigned i = 0; i < resultSize; i++) {
  4361. Value *idx =
  4362. CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx + i);
  4363. Value *offset = Builder.CreateMul(idx, EltByteSize);
  4364. idxList[i] = Builder.CreateAdd(baseOffset, offset);
  4365. }
  4366. } break;
  4367. case HLSubscriptOpcode::RowMatElement:
  4368. case HLSubscriptOpcode::ColMatElement: {
  4369. Constant *EltIdxs = cast<Constant>(idx);
  4370. for (unsigned i = 0; i < resultSize; i++) {
  4371. Value *offset =
  4372. Builder.CreateMul(EltIdxs->getAggregateElement(i), EltByteSize);
  4373. idxList[i] = Builder.CreateAdd(baseOffset, offset);
  4374. }
  4375. } break;
  4376. default:
  4377. DXASSERT(0, "invalid operation on const buffer");
  4378. break;
  4379. }
  4380. Value *ldData = UndefValue::get(resultType);
  4381. if (resultType->isVectorTy()) {
  4382. for (unsigned i = 0; i < resultSize; i++) {
  4383. Value *eltData =
  4384. GenerateCBLoad(handle, idxList[i], EltTy, hlslOP, Builder);
  4385. ldData = Builder.CreateInsertElement(ldData, eltData, i);
  4386. }
  4387. } else {
  4388. ldData = GenerateCBLoad(handle, idxList[0], EltTy, hlslOP, Builder);
  4389. }
  4390. for (auto U = CI->user_begin(); U != CI->user_end();) {
  4391. Value *subsUser = *(U++);
  4392. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(subsUser)) {
  4393. Value *subData = GenerateVecEltFromGEP(ldData, GEP, Builder);
  4394. for (auto gepU = GEP->user_begin(); gepU != GEP->user_end();) {
  4395. Value *gepUser = *(gepU++);
  4396. // Must be load here;
  4397. LoadInst *ldUser = cast<LoadInst>(gepUser);
  4398. ldUser->replaceAllUsesWith(subData);
  4399. ldUser->eraseFromParent();
  4400. }
  4401. GEP->eraseFromParent();
  4402. } else {
  4403. // Must be load here.
  4404. LoadInst *ldUser = cast<LoadInst>(subsUser);
  4405. ldUser->replaceAllUsesWith(ldData);
  4406. ldUser->eraseFromParent();
  4407. }
  4408. }
  4409. CI->eraseFromParent();
  4410. } else {
  4411. DXASSERT(0, "not implemented yet");
  4412. }
  4413. } else if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) {
  4414. Type *Ty = ldInst->getType();
  4415. Type *EltTy = Ty->getScalarType();
  4416. DXASSERT(!Ty->isAggregateType(), "should be flat in previous pass");
  4417. unsigned EltByteSize = GetEltTypeByteSizeForConstBuf(EltTy, DL);
  4418. Value *newLd = GenerateCBLoad(handle, baseOffset, EltTy, hlslOP, Builder);
  4419. if (Ty->isVectorTy()) {
  4420. Value *result = UndefValue::get(Ty);
  4421. result = Builder.CreateInsertElement(result, newLd, (uint64_t)0);
  4422. // Update offset by 4 bytes.
  4423. Value *offset =
  4424. Builder.CreateAdd(baseOffset, hlslOP->GetU32Const(EltByteSize));
  4425. for (unsigned i = 1; i < Ty->getVectorNumElements(); i++) {
  4426. Value *elt = GenerateCBLoad(handle, offset, EltTy, hlslOP, Builder);
  4427. result = Builder.CreateInsertElement(result, elt, i);
  4428. // Update offset by 4 bytes.
  4429. offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(EltByteSize));
  4430. }
  4431. newLd = result;
  4432. }
  4433. ldInst->replaceAllUsesWith(newLd);
  4434. ldInst->eraseFromParent();
  4435. } else {
  4436. // Must be GEP here
  4437. GetElementPtrInst *GEP = cast<GetElementPtrInst>(user);
  4438. TranslateCBGep(GEP, handle, baseOffset, hlslOP, Builder,
  4439. prevFieldAnnotation, DL, dxilTypeSys);
  4440. GEP->eraseFromParent();
  4441. }
  4442. }
  4443. void TranslateCBGep(GetElementPtrInst *GEP, Value *handle, Value *baseOffset,
  4444. hlsl::OP *hlslOP, IRBuilder<> &Builder,
  4445. DxilFieldAnnotation *prevFieldAnnotation,
  4446. const DataLayout &DL, DxilTypeSystem &dxilTypeSys) {
  4447. SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end());
  4448. Value *offset = baseOffset;
  4449. // update offset
  4450. DxilFieldAnnotation *fieldAnnotation = prevFieldAnnotation;
  4451. gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP);
  4452. for (; GEPIt != E; GEPIt++) {
  4453. Value *idx = GEPIt.getOperand();
  4454. unsigned immIdx = 0;
  4455. bool bImmIdx = false;
  4456. if (Constant *constIdx = dyn_cast<Constant>(idx)) {
  4457. immIdx = constIdx->getUniqueInteger().getLimitedValue();
  4458. bImmIdx = true;
  4459. }
  4460. if (GEPIt->isPointerTy()) {
  4461. Type *EltTy = GEPIt->getPointerElementType();
  4462. unsigned size = 0;
  4463. if (StructType *ST = dyn_cast<StructType>(EltTy)) {
  4464. DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST);
  4465. size = annotation->GetCBufferSize();
  4466. } else {
  4467. DXASSERT(fieldAnnotation, "must be a field");
  4468. if (ArrayType *AT = dyn_cast<ArrayType>(EltTy)) {
  4469. unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize(
  4470. *fieldAnnotation, EltTy, dxilTypeSys);
  4471. // Decide the nested array size.
  4472. unsigned nestedArraySize = 1;
  4473. Type *EltTy = AT->getArrayElementType();
  4474. // support multi level of array
  4475. while (EltTy->isArrayTy()) {
  4476. ArrayType *EltAT = cast<ArrayType>(EltTy);
  4477. nestedArraySize *= EltAT->getNumElements();
  4478. EltTy = EltAT->getElementType();
  4479. }
  4480. // Align to 4 * 4 bytes.
  4481. unsigned alignedSize = (EltSize + 15) & 0xfffffff0;
  4482. size = nestedArraySize * alignedSize;
  4483. } else {
  4484. size = DL.getTypeAllocSize(EltTy);
  4485. }
  4486. }
  4487. // Align to 4 * 4 bytes.
  4488. size = (size + 15) & 0xfffffff0;
  4489. if (bImmIdx) {
  4490. unsigned tempOffset = size * immIdx;
  4491. offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(tempOffset));
  4492. } else {
  4493. Value *tempOffset = Builder.CreateMul(idx, hlslOP->GetU32Const(size));
  4494. offset = Builder.CreateAdd(offset, tempOffset);
  4495. }
  4496. } else if (GEPIt->isStructTy()) {
  4497. StructType *ST = cast<StructType>(*GEPIt);
  4498. DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST);
  4499. fieldAnnotation = &annotation->GetFieldAnnotation(immIdx);
  4500. unsigned structOffset = fieldAnnotation->GetCBufferOffset();
  4501. offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(structOffset));
  4502. } else if (GEPIt->isArrayTy()) {
  4503. DXASSERT(fieldAnnotation != nullptr, "must a field");
  4504. unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize(
  4505. *fieldAnnotation, *GEPIt, dxilTypeSys);
  4506. // Decide the nested array size.
  4507. unsigned nestedArraySize = 1;
  4508. Type *EltTy = GEPIt->getArrayElementType();
  4509. // support multi level of array
  4510. while (EltTy->isArrayTy()) {
  4511. ArrayType *EltAT = cast<ArrayType>(EltTy);
  4512. nestedArraySize *= EltAT->getNumElements();
  4513. EltTy = EltAT->getElementType();
  4514. }
  4515. // Align to 4 * 4 bytes.
  4516. unsigned alignedSize = (EltSize + 15) & 0xfffffff0;
  4517. unsigned size = nestedArraySize * alignedSize;
  4518. if (bImmIdx) {
  4519. unsigned tempOffset = size * immIdx;
  4520. offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(tempOffset));
  4521. } else {
  4522. Value *tempOffset = Builder.CreateMul(idx, hlslOP->GetU32Const(size));
  4523. offset = Builder.CreateAdd(offset, tempOffset);
  4524. }
  4525. } else if (GEPIt->isVectorTy()) {
  4526. unsigned size = DL.getTypeAllocSize(GEPIt->getVectorElementType());
  4527. if (bImmIdx) {
  4528. unsigned tempOffset = size * immIdx;
  4529. offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(tempOffset));
  4530. } else {
  4531. Value *tempOffset = Builder.CreateMul(idx, hlslOP->GetU32Const(size));
  4532. offset = Builder.CreateAdd(offset, tempOffset);
  4533. }
  4534. } else {
  4535. gep_type_iterator temp = GEPIt;
  4536. temp++;
  4537. DXASSERT(temp == E, "scalar type must be the last");
  4538. }
  4539. }
  4540. for (auto U = GEP->user_begin(); U != GEP->user_end();) {
  4541. Instruction *user = cast<Instruction>(*(U++));
  4542. TranslateCBAddressUser(user, handle, offset, hlslOP, fieldAnnotation,
  4543. dxilTypeSys, DL);
  4544. }
  4545. }
  4546. void TranslateCBOperations(Value *handle, Value *ptr, Value *offset, OP *hlslOP,
  4547. DxilTypeSystem &dxilTypeSys, const DataLayout &DL) {
  4548. auto User = ptr->user_begin();
  4549. auto UserE = ptr->user_end();
  4550. for (; User != UserE;) {
  4551. // Must be Instruction.
  4552. Instruction *I = cast<Instruction>(*(User++));
  4553. TranslateCBAddressUser(I, handle, offset, hlslOP,
  4554. /*prevFieldAnnotation*/ nullptr, dxilTypeSys, DL);
  4555. }
  4556. }
  4557. Value *GenerateCBLoadLegacy(Value *handle, Value *legacyIdx,
  4558. unsigned channelOffset, Type *EltTy, OP *hlslOP,
  4559. IRBuilder<> &Builder) {
  4560. Constant *OpArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CBufferLoadLegacy);
  4561. Type *i1Ty = Type::getInt1Ty(EltTy->getContext());
  4562. Type *doubleTy = Type::getDoubleTy(EltTy->getContext());
  4563. Type *halfTy = Type::getHalfTy(EltTy->getContext());
  4564. Type *i64Ty = Type::getInt64Ty(EltTy->getContext());
  4565. Type *i16Ty = Type::getInt16Ty(EltTy->getContext());
  4566. bool isBool = EltTy == i1Ty;
  4567. bool is64 = (EltTy == doubleTy) | (EltTy == i64Ty);
  4568. bool is16 = (EltTy == halfTy || EltTy == i16Ty) && !hlslOP->UseMinPrecision();
  4569. bool isNormal = !isBool && !is64;
  4570. DXASSERT_LOCALVAR(is16, (is16 && channelOffset < 8) || channelOffset < 4,
  4571. "legacy cbuffer don't across 16 bytes register.");
  4572. if (isNormal) {
  4573. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
  4574. Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
  4575. return Builder.CreateExtractValue(loadLegacy, channelOffset);
  4576. } else if (is64) {
  4577. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
  4578. Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
  4579. DXASSERT((channelOffset&1)==0,"channel offset must be even for double");
  4580. unsigned eltIdx = channelOffset>>1;
  4581. Value *Result = Builder.CreateExtractValue(loadLegacy, eltIdx);
  4582. return Result;
  4583. } else {
  4584. DXASSERT(isBool, "bool should be i1");
  4585. Type *i32Ty = Type::getInt32Ty(EltTy->getContext());
  4586. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, i32Ty);
  4587. Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
  4588. Value *Result = Builder.CreateExtractValue(loadLegacy, channelOffset);
  4589. return Builder.CreateICmpEQ(Result, hlslOP->GetU32Const(0));
  4590. }
  4591. }
  4592. Value *GenerateCBLoadLegacy(Value *handle, Value *legacyIdx,
  4593. unsigned channelOffset, Type *EltTy,
  4594. unsigned vecSize, OP *hlslOP,
  4595. IRBuilder<> &Builder) {
  4596. Constant *OpArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CBufferLoadLegacy);
  4597. Type *i1Ty = Type::getInt1Ty(EltTy->getContext());
  4598. Type *doubleTy = Type::getDoubleTy(EltTy->getContext());
  4599. Type *i64Ty = Type::getInt64Ty(EltTy->getContext());
  4600. Type *halfTy = Type::getHalfTy(EltTy->getContext());
  4601. Type *shortTy = Type::getInt16Ty(EltTy->getContext());
  4602. bool isBool = EltTy == i1Ty;
  4603. bool is64 = (EltTy == doubleTy) | (EltTy == i64Ty);
  4604. bool is16 = (EltTy == shortTy || EltTy == halfTy) && !hlslOP->UseMinPrecision();
  4605. bool isNormal = !isBool && !is64 && !is16;
  4606. DXASSERT((is16 && channelOffset + vecSize <= 8) ||
  4607. (channelOffset + vecSize) <= 4,
  4608. "legacy cbuffer don't across 16 bytes register.");
  4609. if (isNormal) {
  4610. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
  4611. Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
  4612. Value *Result = UndefValue::get(VectorType::get(EltTy, vecSize));
  4613. for (unsigned i = 0; i < vecSize; ++i) {
  4614. Value *NewElt = Builder.CreateExtractValue(loadLegacy, channelOffset+i);
  4615. Result = Builder.CreateInsertElement(Result, NewElt, i);
  4616. }
  4617. return Result;
  4618. } else if (is16) {
  4619. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
  4620. Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
  4621. Value *Result = UndefValue::get(VectorType::get(EltTy, vecSize));
  4622. for (unsigned i = 0; i < vecSize; ++i) {
  4623. Value *NewElt = Builder.CreateExtractValue(loadLegacy, channelOffset + i);
  4624. Result = Builder.CreateInsertElement(Result, NewElt, i);
  4625. }
  4626. return Result;
  4627. } else if (is64) {
  4628. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
  4629. Value *loadLegacy = Builder.CreateCall(CBLoad, { OpArg, handle, legacyIdx });
  4630. Value *Result = UndefValue::get(VectorType::get(EltTy, vecSize));
  4631. unsigned smallVecSize = 2;
  4632. if (vecSize < smallVecSize)
  4633. smallVecSize = vecSize;
  4634. for (unsigned i = 0; i < smallVecSize; ++i) {
  4635. Value *NewElt = Builder.CreateExtractValue(loadLegacy, channelOffset+i);
  4636. Result = Builder.CreateInsertElement(Result, NewElt, i);
  4637. }
  4638. if (vecSize > 2) {
  4639. // Got to next cb register.
  4640. legacyIdx = Builder.CreateAdd(legacyIdx, hlslOP->GetU32Const(1));
  4641. Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
  4642. for (unsigned i = 2; i < vecSize; ++i) {
  4643. Value *NewElt =
  4644. Builder.CreateExtractValue(loadLegacy, i-2);
  4645. Result = Builder.CreateInsertElement(Result, NewElt, i);
  4646. }
  4647. }
  4648. return Result;
  4649. } else {
  4650. DXASSERT(isBool, "bool should be i1");
  4651. Type *i32Ty = Type::getInt32Ty(EltTy->getContext());
  4652. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, i32Ty);
  4653. Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
  4654. Value *Result = UndefValue::get(VectorType::get(i32Ty, vecSize));
  4655. for (unsigned i = 0; i < vecSize; ++i) {
  4656. Value *NewElt = Builder.CreateExtractValue(loadLegacy, channelOffset+i);
  4657. Result = Builder.CreateInsertElement(Result, NewElt, i);
  4658. }
  4659. return Builder.CreateICmpEQ(Result, ConstantAggregateZero::get(Result->getType()));
  4660. }
  4661. }
  4662. Value *TranslateConstBufMatLdLegacy(Type *matType, Value *handle,
  4663. Value *legacyIdx, bool colMajor, OP *OP,
  4664. const DataLayout &DL,
  4665. IRBuilder<> &Builder) {
  4666. unsigned col, row;
  4667. Type *EltTy = HLMatrixLower::GetMatrixInfo(matType, col, row);
  4668. unsigned matSize = col * row;
  4669. std::vector<Value *> elts(matSize);
  4670. unsigned EltByteSize = GetEltTypeByteSizeForConstBuf(EltTy, DL);
  4671. if (colMajor) {
  4672. unsigned colByteSize = 4 * EltByteSize;
  4673. unsigned colRegSize = (colByteSize + 15) >> 4;
  4674. for (unsigned c = 0; c < col; c++) {
  4675. Value *col = GenerateCBLoadLegacy(handle, legacyIdx, /*channelOffset*/ 0,
  4676. EltTy, row, OP, Builder);
  4677. for (unsigned r = 0; r < row; r++) {
  4678. unsigned matIdx = HLMatrixLower::GetColMajorIdx(r, c, row);
  4679. elts[matIdx] = Builder.CreateExtractElement(col, r);
  4680. }
  4681. // Update offset for a column.
  4682. legacyIdx = Builder.CreateAdd(legacyIdx, OP->GetU32Const(colRegSize));
  4683. }
  4684. } else {
  4685. unsigned rowByteSize = 4 * EltByteSize;
  4686. unsigned rowRegSize = (rowByteSize + 15) >> 4;
  4687. for (unsigned r = 0; r < row; r++) {
  4688. Value *row = GenerateCBLoadLegacy(handle, legacyIdx, /*channelOffset*/ 0,
  4689. EltTy, col, OP, Builder);
  4690. for (unsigned c = 0; c < col; c++) {
  4691. unsigned matIdx = HLMatrixLower::GetRowMajorIdx(r, c, col);
  4692. elts[matIdx] = Builder.CreateExtractElement(row, c);
  4693. }
  4694. // Update offset for a row.
  4695. legacyIdx = Builder.CreateAdd(legacyIdx, OP->GetU32Const(rowRegSize));
  4696. }
  4697. }
  4698. return HLMatrixLower::BuildVector(EltTy, col * row, elts, Builder);
  4699. }
  4700. void TranslateCBGepLegacy(GetElementPtrInst *GEP, Value *handle,
  4701. Value *legacyIdx, unsigned channelOffset,
  4702. hlsl::OP *hlslOP, IRBuilder<> &Builder,
  4703. DxilFieldAnnotation *prevFieldAnnotation,
  4704. const DataLayout &DL, DxilTypeSystem &dxilTypeSys,
  4705. HLObjectOperationLowerHelper *pObjHelper);
  4706. void TranslateResourceInCB(LoadInst *LI,
  4707. HLObjectOperationLowerHelper *pObjHelper,
  4708. GlobalVariable *CbGV) {
  4709. if (LI->user_empty()) {
  4710. LI->eraseFromParent();
  4711. return;
  4712. }
  4713. GetElementPtrInst *Ptr = cast<GetElementPtrInst>(LI->getPointerOperand());
  4714. CallInst *CI = cast<CallInst>(LI->user_back());
  4715. MDNode *MD = HLModule::GetDxilResourceAttrib(CI->getCalledFunction());
  4716. Value *ResPtr = pObjHelper->GetOrCreateResourceForCbPtr(Ptr, CbGV, MD);
  4717. // Lower Ptr to GV base Ptr.
  4718. Value *GvPtr = pObjHelper->LowerCbResourcePtr(Ptr, ResPtr);
  4719. IRBuilder<> Builder(LI);
  4720. Value *GvLd = Builder.CreateLoad(GvPtr);
  4721. LI->replaceAllUsesWith(GvLd);
  4722. LI->eraseFromParent();
  4723. }
  4724. void TranslateCBAddressUserLegacy(Instruction *user, Value *handle,
  4725. Value *legacyIdx, unsigned channelOffset,
  4726. hlsl::OP *hlslOP,
  4727. DxilFieldAnnotation *prevFieldAnnotation,
  4728. DxilTypeSystem &dxilTypeSys,
  4729. const DataLayout &DL,
  4730. HLObjectOperationLowerHelper *pObjHelper) {
  4731. IRBuilder<> Builder(user);
  4732. if (CallInst *CI = dyn_cast<CallInst>(user)) {
  4733. HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
  4734. unsigned opcode = GetHLOpcode(CI);
  4735. if (group == HLOpcodeGroup::HLMatLoadStore) {
  4736. HLMatLoadStoreOpcode matOp = static_cast<HLMatLoadStoreOpcode>(opcode);
  4737. bool colMajor = matOp == HLMatLoadStoreOpcode::ColMatLoad;
  4738. DXASSERT(matOp == HLMatLoadStoreOpcode::ColMatLoad ||
  4739. matOp == HLMatLoadStoreOpcode::RowMatLoad,
  4740. "No store on cbuffer");
  4741. Type *matType = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx)
  4742. ->getType()
  4743. ->getPointerElementType();
  4744. Value *newLd = TranslateConstBufMatLdLegacy(
  4745. matType, handle, legacyIdx, colMajor, hlslOP, DL, Builder);
  4746. CI->replaceAllUsesWith(newLd);
  4747. CI->eraseFromParent();
  4748. } else if (group == HLOpcodeGroup::HLSubscript) {
  4749. HLSubscriptOpcode subOp = static_cast<HLSubscriptOpcode>(opcode);
  4750. Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx);
  4751. Type *matType = basePtr->getType()->getPointerElementType();
  4752. unsigned col, row;
  4753. Type *EltTy = HLMatrixLower::GetMatrixInfo(matType, col, row);
  4754. Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx);
  4755. Type *resultType = CI->getType()->getPointerElementType();
  4756. unsigned resultSize = 1;
  4757. if (resultType->isVectorTy())
  4758. resultSize = resultType->getVectorNumElements();
  4759. DXASSERT(resultSize <= 16, "up to 4x4 elements in vector or matrix");
  4760. _Analysis_assume_(resultSize <= 16);
  4761. Value *idxList[16];
  4762. bool colMajor = subOp == HLSubscriptOpcode::ColMatSubscript ||
  4763. subOp == HLSubscriptOpcode::ColMatElement;
  4764. bool dynamicIndexing = !isa<ConstantInt>(idx) &&
  4765. !isa<ConstantAggregateZero>(idx) &&
  4766. !isa<ConstantDataSequential>(idx);
  4767. Value *ldData = UndefValue::get(resultType);
  4768. if (!dynamicIndexing) {
  4769. Value *matLd = TranslateConstBufMatLdLegacy(
  4770. matType, handle, legacyIdx, colMajor, hlslOP, DL, Builder);
  4771. // The matLd is keep original layout, just use the idx calc in
  4772. // EmitHLSLMatrixElement and EmitHLSLMatrixSubscript.
  4773. switch (subOp) {
  4774. case HLSubscriptOpcode::RowMatSubscript:
  4775. case HLSubscriptOpcode::ColMatSubscript: {
  4776. for (unsigned i = 0; i < resultSize; i++) {
  4777. idxList[i] =
  4778. CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx + i);
  4779. }
  4780. } break;
  4781. case HLSubscriptOpcode::RowMatElement:
  4782. case HLSubscriptOpcode::ColMatElement: {
  4783. Constant *EltIdxs = cast<Constant>(idx);
  4784. for (unsigned i = 0; i < resultSize; i++) {
  4785. idxList[i] = EltIdxs->getAggregateElement(i);
  4786. }
  4787. } break;
  4788. default:
  4789. DXASSERT(0, "invalid operation on const buffer");
  4790. break;
  4791. }
  4792. if (resultType->isVectorTy()) {
  4793. for (unsigned i = 0; i < resultSize; i++) {
  4794. Value *eltData = Builder.CreateExtractElement(matLd, idxList[i]);
  4795. ldData = Builder.CreateInsertElement(ldData, eltData, i);
  4796. }
  4797. } else {
  4798. Value *eltData = Builder.CreateExtractElement(matLd, idxList[0]);
  4799. ldData = eltData;
  4800. }
  4801. } else {
  4802. // Must be matSub here.
  4803. Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx);
  4804. if (colMajor) {
  4805. // idx is c * row + r.
  4806. // For first col, c is 0, so idx is r.
  4807. Value *one = Builder.getInt32(1);
  4808. // row.x = c[0].[idx]
  4809. // row.y = c[1].[idx]
  4810. // row.z = c[2].[idx]
  4811. // row.w = c[3].[idx]
  4812. Value *Elts[4];
  4813. ArrayType *AT = ArrayType::get(EltTy, col);
  4814. IRBuilder<> AllocaBuilder(user->getParent()
  4815. ->getParent()
  4816. ->getEntryBlock()
  4817. .getFirstInsertionPt());
  4818. Value *tempArray = AllocaBuilder.CreateAlloca(AT);
  4819. Value *zero = AllocaBuilder.getInt32(0);
  4820. Value *cbufIdx = legacyIdx;
  4821. for (unsigned int c = 0; c < col; c++) {
  4822. Value *ColVal =
  4823. GenerateCBLoadLegacy(handle, cbufIdx, /*channelOffset*/ 0,
  4824. EltTy, row, hlslOP, Builder);
  4825. // Convert ColVal to array for indexing.
  4826. for (unsigned int r = 0; r < row; r++) {
  4827. Value *Elt =
  4828. Builder.CreateExtractElement(ColVal, Builder.getInt32(r));
  4829. Value *Ptr = Builder.CreateInBoundsGEP(
  4830. tempArray, {zero, Builder.getInt32(r)});
  4831. Builder.CreateStore(Elt, Ptr);
  4832. }
  4833. Value *Ptr = Builder.CreateInBoundsGEP(tempArray, {zero, idx});
  4834. Elts[c] = Builder.CreateLoad(Ptr);
  4835. // Update cbufIdx.
  4836. cbufIdx = Builder.CreateAdd(cbufIdx, one);
  4837. }
  4838. if (resultType->isVectorTy()) {
  4839. for (unsigned int c = 0; c < col; c++) {
  4840. ldData = Builder.CreateInsertElement(ldData, Elts[c], c);
  4841. }
  4842. } else {
  4843. ldData = Elts[0];
  4844. }
  4845. } else {
  4846. // idx is r * col + c;
  4847. // r = idx / col;
  4848. Value *cCol = ConstantInt::get(idx->getType(), col);
  4849. idx = Builder.CreateUDiv(idx, cCol);
  4850. idx = Builder.CreateAdd(idx, legacyIdx);
  4851. // Just return a row.
  4852. ldData = GenerateCBLoadLegacy(handle, idx, /*channelOffset*/ 0, EltTy,
  4853. row, hlslOP, Builder);
  4854. }
  4855. if (!resultType->isVectorTy()) {
  4856. ldData = Builder.CreateExtractElement(ldData, Builder.getInt32(0));
  4857. }
  4858. }
  4859. for (auto U = CI->user_begin(); U != CI->user_end();) {
  4860. Value *subsUser = *(U++);
  4861. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(subsUser)) {
  4862. Value *subData = GenerateVecEltFromGEP(ldData, GEP, Builder);
  4863. for (auto gepU = GEP->user_begin(); gepU != GEP->user_end();) {
  4864. Value *gepUser = *(gepU++);
  4865. // Must be load here;
  4866. LoadInst *ldUser = cast<LoadInst>(gepUser);
  4867. ldUser->replaceAllUsesWith(subData);
  4868. ldUser->eraseFromParent();
  4869. }
  4870. GEP->eraseFromParent();
  4871. } else {
  4872. // Must be load here.
  4873. LoadInst *ldUser = cast<LoadInst>(subsUser);
  4874. ldUser->replaceAllUsesWith(ldData);
  4875. ldUser->eraseFromParent();
  4876. }
  4877. }
  4878. CI->eraseFromParent();
  4879. } else {
  4880. DXASSERT(0, "not implemented yet");
  4881. }
  4882. } else if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) {
  4883. Type *Ty = ldInst->getType();
  4884. Type *EltTy = Ty->getScalarType();
  4885. // Resource inside cbuffer is lowered after GenerateDxilOperations.
  4886. if (HLModule::IsHLSLObjectType(Ty)) {
  4887. CallInst *CI = cast<CallInst>(handle);
  4888. GlobalVariable *CbGV = cast<GlobalVariable>(
  4889. CI->getArgOperand(HLOperandIndex::kCreateHandleResourceOpIdx));
  4890. TranslateResourceInCB(ldInst, pObjHelper, CbGV);
  4891. return;
  4892. }
  4893. DXASSERT(!Ty->isAggregateType(), "should be flat in previous pass");
  4894. Value *newLd = nullptr;
  4895. if (Ty->isVectorTy())
  4896. newLd = GenerateCBLoadLegacy(handle, legacyIdx, channelOffset, EltTy,
  4897. Ty->getVectorNumElements(), hlslOP, Builder);
  4898. else
  4899. newLd = GenerateCBLoadLegacy(handle, legacyIdx, channelOffset, EltTy,
  4900. hlslOP, Builder);
  4901. ldInst->replaceAllUsesWith(newLd);
  4902. ldInst->eraseFromParent();
  4903. } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(user)) {
  4904. for (auto it = BCI->user_begin(); it != BCI->user_end(); ) {
  4905. Instruction *I = cast<Instruction>(*it++);
  4906. TranslateCBAddressUserLegacy(I,
  4907. handle, legacyIdx, channelOffset, hlslOP,
  4908. prevFieldAnnotation, dxilTypeSys,
  4909. DL, pObjHelper);
  4910. }
  4911. BCI->eraseFromParent();
  4912. } else {
  4913. // Must be GEP here
  4914. GetElementPtrInst *GEP = cast<GetElementPtrInst>(user);
  4915. TranslateCBGepLegacy(GEP, handle, legacyIdx, channelOffset, hlslOP, Builder,
  4916. prevFieldAnnotation, DL, dxilTypeSys, pObjHelper);
  4917. GEP->eraseFromParent();
  4918. }
  4919. }
  4920. void TranslateCBGepLegacy(GetElementPtrInst *GEP, Value *handle,
  4921. Value *legacyIndex, unsigned channel,
  4922. hlsl::OP *hlslOP, IRBuilder<> &Builder,
  4923. DxilFieldAnnotation *prevFieldAnnotation,
  4924. const DataLayout &DL, DxilTypeSystem &dxilTypeSys,
  4925. HLObjectOperationLowerHelper *pObjHelper) {
  4926. SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end());
  4927. // update offset
  4928. DxilFieldAnnotation *fieldAnnotation = prevFieldAnnotation;
  4929. gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP);
  4930. for (; GEPIt != E; GEPIt++) {
  4931. Value *idx = GEPIt.getOperand();
  4932. unsigned immIdx = 0;
  4933. bool bImmIdx = false;
  4934. if (Constant *constIdx = dyn_cast<Constant>(idx)) {
  4935. immIdx = constIdx->getUniqueInteger().getLimitedValue();
  4936. bImmIdx = true;
  4937. }
  4938. if (GEPIt->isPointerTy()) {
  4939. Type *EltTy = GEPIt->getPointerElementType();
  4940. unsigned size = 0;
  4941. if (StructType *ST = dyn_cast<StructType>(EltTy)) {
  4942. DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST);
  4943. size = annotation->GetCBufferSize();
  4944. } else {
  4945. DXASSERT(fieldAnnotation, "must be a field");
  4946. if (ArrayType *AT = dyn_cast<ArrayType>(EltTy)) {
  4947. unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize(
  4948. *fieldAnnotation, EltTy, dxilTypeSys);
  4949. // Decide the nested array size.
  4950. unsigned nestedArraySize = 1;
  4951. Type *EltTy = AT->getArrayElementType();
  4952. // support multi level of array
  4953. while (EltTy->isArrayTy()) {
  4954. ArrayType *EltAT = cast<ArrayType>(EltTy);
  4955. nestedArraySize *= EltAT->getNumElements();
  4956. EltTy = EltAT->getElementType();
  4957. }
  4958. // Align to 4 * 4 bytes.
  4959. unsigned alignedSize = (EltSize + 15) & 0xfffffff0;
  4960. size = nestedArraySize * alignedSize;
  4961. } else {
  4962. size = DL.getTypeAllocSize(EltTy);
  4963. }
  4964. }
  4965. // Skip 0 idx.
  4966. if (bImmIdx && immIdx == 0)
  4967. continue;
  4968. // Align to 4 * 4 bytes.
  4969. size = (size + 15) & 0xfffffff0;
  4970. // Take this as array idxing.
  4971. if (bImmIdx) {
  4972. unsigned tempOffset = size * immIdx;
  4973. unsigned idxInc = tempOffset >> 4;
  4974. legacyIndex = Builder.CreateAdd(legacyIndex, hlslOP->GetU32Const(idxInc));
  4975. } else {
  4976. Value *idxInc = Builder.CreateMul(idx, hlslOP->GetU32Const(size>>4));
  4977. legacyIndex = Builder.CreateAdd(legacyIndex, idxInc);
  4978. }
  4979. // Array always start from x channel.
  4980. channel = 0;
  4981. } else if (GEPIt->isStructTy()) {
  4982. StructType *ST = cast<StructType>(*GEPIt);
  4983. DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST);
  4984. fieldAnnotation = &annotation->GetFieldAnnotation(immIdx);
  4985. unsigned idxInc = 0;
  4986. unsigned structOffset = 0;
  4987. if (fieldAnnotation->GetCompType().Is16Bit() &&
  4988. !hlslOP->UseMinPrecision()) {
  4989. structOffset = fieldAnnotation->GetCBufferOffset() >> 1;
  4990. channel += structOffset;
  4991. idxInc = channel >> 3;
  4992. channel = channel & 0x7;
  4993. }
  4994. else {
  4995. structOffset = fieldAnnotation->GetCBufferOffset() >> 2;
  4996. channel += structOffset;
  4997. idxInc = channel >> 2;
  4998. channel = channel & 0x3;
  4999. }
  5000. if (idxInc)
  5001. legacyIndex = Builder.CreateAdd(legacyIndex, hlslOP->GetU32Const(idxInc));
  5002. } else if (GEPIt->isArrayTy()) {
  5003. DXASSERT(fieldAnnotation != nullptr, "must a field");
  5004. unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize(
  5005. *fieldAnnotation, *GEPIt, dxilTypeSys);
  5006. // Decide the nested array size.
  5007. unsigned nestedArraySize = 1;
  5008. Type *EltTy = GEPIt->getArrayElementType();
  5009. // support multi level of array
  5010. while (EltTy->isArrayTy()) {
  5011. ArrayType *EltAT = cast<ArrayType>(EltTy);
  5012. nestedArraySize *= EltAT->getNumElements();
  5013. EltTy = EltAT->getElementType();
  5014. }
  5015. // Align to 4 * 4 bytes.
  5016. unsigned alignedSize = (EltSize + 15) & 0xfffffff0;
  5017. unsigned size = nestedArraySize * alignedSize;
  5018. if (bImmIdx) {
  5019. unsigned tempOffset = size * immIdx;
  5020. unsigned idxInc = tempOffset >> 4;
  5021. legacyIndex = Builder.CreateAdd(legacyIndex, hlslOP->GetU32Const(idxInc));
  5022. } else {
  5023. Value *idxInc = Builder.CreateMul(idx, hlslOP->GetU32Const(size>>4));
  5024. legacyIndex = Builder.CreateAdd(legacyIndex, idxInc);
  5025. }
  5026. // Array always start from x channel.
  5027. channel = 0;
  5028. } else if (GEPIt->isVectorTy()) {
  5029. unsigned size = DL.getTypeAllocSize(GEPIt->getVectorElementType());
  5030. // Indexing on vector.
  5031. if (bImmIdx) {
  5032. unsigned tempOffset = size * immIdx;
  5033. unsigned channelInc = tempOffset >> 2;
  5034. DXASSERT((channel + channelInc)<=4, "vector should not cross cb register");
  5035. channel += channelInc;
  5036. if (channel == 4) {
  5037. // Get to another row.
  5038. // Update index and channel.
  5039. channel = 0;
  5040. legacyIndex = Builder.CreateAdd(legacyIndex, Builder.getInt32(1));
  5041. }
  5042. } else {
  5043. Type *EltTy = GEPIt->getVectorElementType();
  5044. // Load the whole register.
  5045. Value *newLd = GenerateCBLoadLegacy(handle, legacyIndex,
  5046. /*channelOffset*/ 0, EltTy,
  5047. /*vecSize*/ 4, hlslOP, Builder);
  5048. // Copy to array.
  5049. IRBuilder<> AllocaBuilder(GEP->getParent()->getParent()->getEntryBlock().getFirstInsertionPt());
  5050. Value *tempArray = AllocaBuilder.CreateAlloca(ArrayType::get(EltTy, 4));
  5051. Value *zeroIdx = hlslOP->GetU32Const(0);
  5052. for (unsigned i = 0; i < 4; i++) {
  5053. Value *Elt = Builder.CreateExtractElement(newLd, i);
  5054. Value *EltGEP = Builder.CreateInBoundsGEP(tempArray, {zeroIdx, hlslOP->GetU32Const(i)});
  5055. Builder.CreateStore(Elt, EltGEP);
  5056. }
  5057. // Make sure this is the end of GEP.
  5058. gep_type_iterator temp = GEPIt;
  5059. temp++;
  5060. DXASSERT(temp == E, "scalar type must be the last");
  5061. // Replace the GEP with array GEP.
  5062. Value *ArrayGEP = Builder.CreateInBoundsGEP(tempArray, {zeroIdx, idx});
  5063. GEP->replaceAllUsesWith(ArrayGEP);
  5064. return;
  5065. }
  5066. } else {
  5067. gep_type_iterator temp = GEPIt;
  5068. temp++;
  5069. DXASSERT(temp == E, "scalar type must be the last");
  5070. }
  5071. }
  5072. for (auto U = GEP->user_begin(); U != GEP->user_end();) {
  5073. Instruction *user = cast<Instruction>(*(U++));
  5074. TranslateCBAddressUserLegacy(user, handle, legacyIndex, channel, hlslOP, fieldAnnotation,
  5075. dxilTypeSys, DL, pObjHelper);
  5076. }
  5077. }
  5078. void TranslateCBOperationsLegacy(Value *handle, Value *ptr, OP *hlslOP,
  5079. DxilTypeSystem &dxilTypeSys,
  5080. const DataLayout &DL,
  5081. HLObjectOperationLowerHelper *pObjHelper) {
  5082. auto User = ptr->user_begin();
  5083. auto UserE = ptr->user_end();
  5084. Value *zeroIdx = hlslOP->GetU32Const(0);
  5085. for (; User != UserE;) {
  5086. // Must be Instruction.
  5087. Instruction *I = cast<Instruction>(*(User++));
  5088. TranslateCBAddressUserLegacy(
  5089. I, handle, zeroIdx, /*channelOffset*/ 0, hlslOP,
  5090. /*prevFieldAnnotation*/ nullptr, dxilTypeSys, DL, pObjHelper);
  5091. }
  5092. }
  5093. }
  5094. // Structured buffer.
  5095. namespace {
  5096. // Calculate offset.
  5097. Value *GEPIdxToOffset(GetElementPtrInst *GEP, IRBuilder<> &Builder,
  5098. hlsl::OP *OP, const DataLayout &DL) {
  5099. SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end());
  5100. Value *addr = nullptr;
  5101. // update offset
  5102. if (GEP->hasAllConstantIndices()) {
  5103. unsigned gepOffset =
  5104. DL.getIndexedOffset(GEP->getPointerOperandType(), Indices);
  5105. addr = OP->GetU32Const(gepOffset);
  5106. } else {
  5107. Value *offset = OP->GetU32Const(0);
  5108. gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP);
  5109. for (; GEPIt != E; GEPIt++) {
  5110. Value *idx = GEPIt.getOperand();
  5111. unsigned immIdx = 0;
  5112. if (llvm::Constant *constIdx = dyn_cast<llvm::Constant>(idx)) {
  5113. immIdx = constIdx->getUniqueInteger().getLimitedValue();
  5114. if (immIdx == 0) {
  5115. continue;
  5116. }
  5117. }
  5118. if (GEPIt->isPointerTy()) {
  5119. unsigned size = DL.getTypeAllocSize(GEPIt->getPointerElementType());
  5120. if (immIdx) {
  5121. unsigned tempOffset = size * immIdx;
  5122. offset = Builder.CreateAdd(offset, OP->GetU32Const(tempOffset));
  5123. } else {
  5124. Value *tempOffset = Builder.CreateMul(idx, OP->GetU32Const(size));
  5125. offset = Builder.CreateAdd(offset, tempOffset);
  5126. }
  5127. } else if (GEPIt->isStructTy()) {
  5128. unsigned structOffset = 0;
  5129. for (unsigned i = 0; i < immIdx; i++) {
  5130. structOffset += DL.getTypeAllocSize(GEPIt->getStructElementType(i));
  5131. }
  5132. offset = Builder.CreateAdd(offset, OP->GetU32Const(structOffset));
  5133. } else if (GEPIt->isArrayTy()) {
  5134. unsigned size = DL.getTypeAllocSize(GEPIt->getArrayElementType());
  5135. if (immIdx) {
  5136. unsigned tempOffset = size * immIdx;
  5137. offset = Builder.CreateAdd(offset, OP->GetU32Const(tempOffset));
  5138. } else {
  5139. Value *tempOffset = Builder.CreateMul(idx, OP->GetU32Const(size));
  5140. offset = Builder.CreateAdd(offset, tempOffset);
  5141. }
  5142. } else if (GEPIt->isVectorTy()) {
  5143. unsigned size = DL.getTypeAllocSize(GEPIt->getVectorElementType());
  5144. if (immIdx) {
  5145. unsigned tempOffset = size * immIdx;
  5146. offset = Builder.CreateAdd(offset, OP->GetU32Const(tempOffset));
  5147. } else {
  5148. Value *tempOffset = Builder.CreateMul(idx, OP->GetU32Const(size));
  5149. offset = Builder.CreateAdd(offset, tempOffset);
  5150. }
  5151. } else {
  5152. gep_type_iterator temp = GEPIt;
  5153. temp++;
  5154. DXASSERT(temp == E, "scalar type must be the last");
  5155. }
  5156. };
  5157. addr = offset;
  5158. }
  5159. // TODO: x4 for byte address
  5160. return addr;
  5161. }
  5162. void GenerateStructBufLd(Value *handle, Value *bufIdx, Value *offset,
  5163. Value *status, Type *EltTy,
  5164. MutableArrayRef<Value *> resultElts, hlsl::OP *OP,
  5165. IRBuilder<> &Builder, unsigned NumComponents, Constant *alignment) {
  5166. OP::OpCode opcode = OP::OpCode::RawBufferLoad;
  5167. DXASSERT(resultElts.size() <= 4,
  5168. "buffer load cannot load more than 4 values");
  5169. Type *i64Ty = Builder.getInt64Ty();
  5170. Type *doubleTy = Builder.getDoubleTy();
  5171. bool is64 = EltTy == i64Ty || EltTy == doubleTy;
  5172. if (!is64) {
  5173. Function *dxilF = OP->GetOpFunc(opcode, EltTy);
  5174. Constant *mask = GetRawBufferMaskForETy(EltTy, NumComponents, OP);
  5175. Value *Args[] = {OP->GetU32Const((unsigned)opcode), handle, bufIdx, offset, mask, alignment};
  5176. Value *Ld = Builder.CreateCall(dxilF, Args, OP::GetOpCodeName(opcode));
  5177. for (unsigned i = 0; i < resultElts.size(); i++) {
  5178. resultElts[i] = Builder.CreateExtractValue(Ld, i);
  5179. }
  5180. // status
  5181. UpdateStatus(Ld, status, Builder, OP);
  5182. return;
  5183. } else {
  5184. // 64 bit.
  5185. Function *dxilF = OP->GetOpFunc(opcode, Builder.getInt32Ty());
  5186. Constant *mask = GetRawBufferMaskForETy(EltTy, NumComponents < 2 ? NumComponents : 2, OP);
  5187. Value *Args[] = {OP->GetU32Const((unsigned)opcode), handle, bufIdx, offset, mask, alignment};
  5188. Value *Ld = Builder.CreateCall(dxilF, Args, OP::GetOpCodeName(opcode));
  5189. Value *resultElts32[8];
  5190. unsigned size = resultElts.size();
  5191. unsigned eltBase = 0;
  5192. for (unsigned i = 0; i < size; i++) {
  5193. if (i == 2) {
  5194. // Update offset 4 by 4 bytes.
  5195. Args[DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx] =
  5196. Builder.CreateAdd(offset, Builder.getInt32(4 * 4));
  5197. // Update Mask
  5198. Args[DXIL::OperandIndex::kRawBufferLoadMaskOpIdx] =
  5199. GetRawBufferMaskForETy(EltTy, NumComponents < 3 ? 0 : NumComponents - 2, OP);
  5200. Ld = Builder.CreateCall(dxilF, Args, OP::GetOpCodeName(opcode));
  5201. eltBase = 4;
  5202. }
  5203. unsigned resBase = 2 * i;
  5204. resultElts32[resBase] = Builder.CreateExtractValue(Ld, resBase - eltBase);
  5205. resultElts32[resBase + 1] =
  5206. Builder.CreateExtractValue(Ld, resBase + 1 - eltBase);
  5207. }
  5208. Make64bitResultForLoad(EltTy, resultElts32, size, resultElts, OP, Builder);
  5209. // status
  5210. UpdateStatus(Ld, status, Builder, OP);
  5211. return;
  5212. }
  5213. }
  5214. void GenerateStructBufSt(Value *handle, Value *bufIdx, Value *offset,
  5215. Type *EltTy, hlsl::OP *OP, IRBuilder<> &Builder,
  5216. ArrayRef<Value *> vals, uint8_t mask, Constant *alignment) {
  5217. OP::OpCode opcode = OP::OpCode::RawBufferStore;
  5218. DXASSERT(vals.size() == 4, "buffer store need 4 values");
  5219. Type *i64Ty = Builder.getInt64Ty();
  5220. Type *doubleTy = Builder.getDoubleTy();
  5221. bool is64 = EltTy == i64Ty || EltTy == doubleTy;
  5222. if (!is64) {
  5223. Value *Args[] = {OP->GetU32Const((unsigned)opcode),
  5224. handle,
  5225. bufIdx,
  5226. offset,
  5227. vals[0],
  5228. vals[1],
  5229. vals[2],
  5230. vals[3],
  5231. OP->GetU8Const(mask),
  5232. alignment};
  5233. Function *dxilF = OP->GetOpFunc(opcode, EltTy);
  5234. Builder.CreateCall(dxilF, Args);
  5235. } else {
  5236. Type *i32Ty = Builder.getInt32Ty();
  5237. Function *dxilF = OP->GetOpFunc(opcode, i32Ty);
  5238. Value *undefI32 = UndefValue::get(i32Ty);
  5239. Value *vals32[8] = {undefI32, undefI32, undefI32, undefI32,
  5240. undefI32, undefI32, undefI32, undefI32};
  5241. unsigned maskLo = 0;
  5242. unsigned maskHi = 0;
  5243. unsigned size = 0;
  5244. switch (mask) {
  5245. case 1:
  5246. maskLo = 3;
  5247. size = 1;
  5248. break;
  5249. case 3:
  5250. maskLo = 15;
  5251. size = 2;
  5252. break;
  5253. case 7:
  5254. maskLo = 15;
  5255. maskHi = 3;
  5256. size = 3;
  5257. break;
  5258. case 15:
  5259. maskLo = 15;
  5260. maskHi = 15;
  5261. size = 4;
  5262. break;
  5263. default:
  5264. DXASSERT(0, "invalid mask");
  5265. }
  5266. Split64bitValForStore(EltTy, vals, size, vals32, OP, Builder);
  5267. Value *Args[] = {OP->GetU32Const((unsigned)opcode),
  5268. handle,
  5269. bufIdx,
  5270. offset,
  5271. vals32[0],
  5272. vals32[1],
  5273. vals32[2],
  5274. vals32[3],
  5275. OP->GetU8Const(maskLo),
  5276. alignment};
  5277. Builder.CreateCall(dxilF, Args);
  5278. if (maskHi) {
  5279. // Update offset 4 by 4 bytes.
  5280. offset = Builder.CreateAdd(offset, Builder.getInt32(4 * 4));
  5281. Value *Args[] = {OP->GetU32Const((unsigned)opcode),
  5282. handle,
  5283. bufIdx,
  5284. offset,
  5285. vals32[4],
  5286. vals32[5],
  5287. vals32[6],
  5288. vals32[7],
  5289. OP->GetU8Const(maskHi),
  5290. alignment};
  5291. Builder.CreateCall(dxilF, Args);
  5292. }
  5293. }
  5294. }
  5295. Value *TranslateStructBufMatLd(Type *matType, IRBuilder<> &Builder,
  5296. Value *handle, hlsl::OP *OP, Value *status,
  5297. Value *bufIdx, Value *baseOffset,
  5298. bool colMajor, const DataLayout &DL) {
  5299. unsigned col, row;
  5300. Type *EltTy = HLMatrixLower::GetMatrixInfo(matType, col, row);
  5301. Constant* alignment = OP->GetI32Const(DL.getTypeAllocSize(EltTy));
  5302. Value *offset = baseOffset;
  5303. if (baseOffset == nullptr)
  5304. offset = OP->GetU32Const(0);
  5305. unsigned matSize = col * row;
  5306. std::vector<Value *> elts(matSize);
  5307. unsigned rest = (matSize % 4);
  5308. if (rest) {
  5309. Value *ResultElts[4];
  5310. GenerateStructBufLd(handle, bufIdx, offset, status, EltTy, ResultElts, OP, Builder, 3, alignment);
  5311. for (unsigned i = 0; i < rest; i++)
  5312. elts[i] = ResultElts[i];
  5313. offset = Builder.CreateAdd(offset, OP->GetU32Const(4 * rest));
  5314. }
  5315. for (unsigned i = rest; i < matSize; i += 4) {
  5316. Value *ResultElts[4];
  5317. GenerateStructBufLd(handle, bufIdx, offset, status, EltTy, ResultElts, OP, Builder, 4, alignment);
  5318. elts[i] = ResultElts[0];
  5319. elts[i + 1] = ResultElts[1];
  5320. elts[i + 2] = ResultElts[2];
  5321. elts[i + 3] = ResultElts[3];
  5322. // Update offset by 4*4bytes.
  5323. offset = Builder.CreateAdd(offset, OP->GetU32Const(4 * 4));
  5324. }
  5325. return HLMatrixLower::BuildVector(EltTy, col * row, elts, Builder);
  5326. }
  5327. void TranslateStructBufMatSt(Type *matType, IRBuilder<> &Builder, Value *handle,
  5328. hlsl::OP *OP, Value *bufIdx, Value *baseOffset,
  5329. Value *val, bool colMajor, const DataLayout &DL) {
  5330. unsigned col, row;
  5331. Type *EltTy = HLMatrixLower::GetMatrixInfo(matType, col, row);
  5332. Constant *Alignment = OP->GetI32Const(DL.getTypeAllocSize(EltTy));
  5333. Value *offset = baseOffset;
  5334. if (baseOffset == nullptr)
  5335. offset = OP->GetU32Const(0);
  5336. unsigned matSize = col * row;
  5337. Value *undefElt = UndefValue::get(EltTy);
  5338. unsigned storeSize = matSize;
  5339. if (matSize % 4) {
  5340. storeSize = matSize + 4 - (matSize & 3);
  5341. }
  5342. std::vector<Value *> elts(storeSize, undefElt);
  5343. if (colMajor) {
  5344. for (unsigned i = 0; i < matSize; i++)
  5345. elts[i] = Builder.CreateExtractElement(val, i);
  5346. } else {
  5347. for (unsigned r = 0; r < row; r++)
  5348. for (unsigned c = 0; c < col; c++) {
  5349. unsigned rowMajorIdx = r * col + c;
  5350. unsigned colMajorIdx = c * row + r;
  5351. elts[rowMajorIdx] = Builder.CreateExtractElement(val, colMajorIdx);
  5352. }
  5353. }
  5354. for (unsigned i = 0; i < matSize; i += 4) {
  5355. uint8_t mask = 0;
  5356. for (unsigned j = 0; j < 4 && (i+j) < matSize; j++) {
  5357. if (elts[i+j] != undefElt)
  5358. mask |= (1<<j);
  5359. }
  5360. GenerateStructBufSt(handle, bufIdx, offset, EltTy, OP, Builder,
  5361. {elts[i], elts[i + 1], elts[i + 2], elts[i + 3]}, mask,
  5362. Alignment);
  5363. // Update offset by 4*4bytes.
  5364. offset = Builder.CreateAdd(offset, OP->GetU32Const(4 * 4));
  5365. }
  5366. }
  5367. void TranslateStructBufMatLdSt(CallInst *CI, Value *handle, hlsl::OP *OP,
  5368. Value *status, Value *bufIdx,
  5369. Value *baseOffset, const DataLayout &DL) {
  5370. IRBuilder<> Builder(CI);
  5371. HLOpcodeGroup group = hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction());
  5372. unsigned opcode = GetHLOpcode(CI);
  5373. DXASSERT_LOCALVAR(group, group == HLOpcodeGroup::HLMatLoadStore,
  5374. "only translate matrix loadStore here.");
  5375. HLMatLoadStoreOpcode matOp = static_cast<HLMatLoadStoreOpcode>(opcode);
  5376. switch (matOp) {
  5377. case HLMatLoadStoreOpcode::ColMatLoad: {
  5378. Value *ptr = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx);
  5379. Value *NewLd = TranslateStructBufMatLd(
  5380. ptr->getType()->getPointerElementType(), Builder, handle, OP, status,
  5381. bufIdx, baseOffset, /*colMajor*/ true, DL);
  5382. CI->replaceAllUsesWith(NewLd);
  5383. } break;
  5384. case HLMatLoadStoreOpcode::RowMatLoad: {
  5385. Value *ptr = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx);
  5386. Value *NewLd = TranslateStructBufMatLd(
  5387. ptr->getType()->getPointerElementType(), Builder, handle, OP, status,
  5388. bufIdx, baseOffset, /*colMajor*/ false, DL);
  5389. CI->replaceAllUsesWith(NewLd);
  5390. } break;
  5391. case HLMatLoadStoreOpcode::ColMatStore: {
  5392. Value *ptr = CI->getArgOperand(HLOperandIndex::kMatStoreDstPtrOpIdx);
  5393. Value *val = CI->getArgOperand(HLOperandIndex::kMatStoreValOpIdx);
  5394. TranslateStructBufMatSt(ptr->getType()->getPointerElementType(), Builder,
  5395. handle, OP, bufIdx, baseOffset, val,
  5396. /*colMajor*/ true, DL);
  5397. } break;
  5398. case HLMatLoadStoreOpcode::RowMatStore: {
  5399. Value *ptr = CI->getArgOperand(HLOperandIndex::kMatStoreDstPtrOpIdx);
  5400. Value *val = CI->getArgOperand(HLOperandIndex::kMatStoreValOpIdx);
  5401. TranslateStructBufMatSt(ptr->getType()->getPointerElementType(), Builder,
  5402. handle, OP, bufIdx, baseOffset, val,
  5403. /*colMajor*/ false, DL);
  5404. } break;
  5405. }
  5406. CI->eraseFromParent();
  5407. }
  5408. void TranslateStructBufSubscriptUser(Instruction *user, Value *handle,
  5409. Value *bufIdx, Value *baseOffset,
  5410. Value *status, hlsl::OP *OP, const DataLayout &DL);
  5411. // subscript operator for matrix of struct element.
  5412. void TranslateStructBufMatSubscript(CallInst *CI, Value *handle,
  5413. hlsl::OP *hlslOP, Value *bufIdx,
  5414. Value *baseOffset, Value *status,
  5415. const DataLayout &DL) {
  5416. Value *zeroIdx = hlslOP->GetU32Const(0);
  5417. if (baseOffset == nullptr)
  5418. baseOffset = zeroIdx;
  5419. unsigned opcode = GetHLOpcode(CI);
  5420. IRBuilder<> subBuilder(CI);
  5421. HLSubscriptOpcode subOp = static_cast<HLSubscriptOpcode>(opcode);
  5422. Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx);
  5423. Type *matType = basePtr->getType()->getPointerElementType();
  5424. unsigned col, row;
  5425. Type *EltTy = HLMatrixLower::GetMatrixInfo(matType, col, row);
  5426. Constant *alignment = hlslOP->GetI32Const(DL.getTypeAllocSize(EltTy));
  5427. Value *EltByteSize = ConstantInt::get(
  5428. baseOffset->getType(), GetEltTypeByteSizeForConstBuf(EltTy, DL));
  5429. Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx);
  5430. Type *resultType = CI->getType()->getPointerElementType();
  5431. unsigned resultSize = 1;
  5432. if (resultType->isVectorTy())
  5433. resultSize = resultType->getVectorNumElements();
  5434. DXASSERT(resultSize <= 16, "up to 4x4 elements in vector or matrix");
  5435. _Analysis_assume_(resultSize <= 16);
  5436. std::vector<Value *> idxList(resultSize);
  5437. switch (subOp) {
  5438. case HLSubscriptOpcode::ColMatSubscript:
  5439. case HLSubscriptOpcode::RowMatSubscript: {
  5440. for (unsigned i = 0; i < resultSize; i++) {
  5441. Value *offset =
  5442. CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx + i);
  5443. offset = subBuilder.CreateMul(offset, EltByteSize);
  5444. idxList[i] = subBuilder.CreateAdd(baseOffset, offset);
  5445. }
  5446. } break;
  5447. case HLSubscriptOpcode::RowMatElement:
  5448. case HLSubscriptOpcode::ColMatElement: {
  5449. Constant *EltIdxs = cast<Constant>(idx);
  5450. for (unsigned i = 0; i < resultSize; i++) {
  5451. Value *offset =
  5452. subBuilder.CreateMul(EltIdxs->getAggregateElement(i), EltByteSize);
  5453. idxList[i] = subBuilder.CreateAdd(baseOffset, offset);
  5454. }
  5455. } break;
  5456. default:
  5457. DXASSERT(0, "invalid operation on const buffer");
  5458. break;
  5459. }
  5460. Value *undefElt = UndefValue::get(EltTy);
  5461. for (auto U = CI->user_begin(); U != CI->user_end();) {
  5462. Value *subsUser = *(U++);
  5463. if (resultSize == 1) {
  5464. TranslateStructBufSubscriptUser(cast<Instruction>(subsUser), handle,
  5465. bufIdx, idxList[0], status, hlslOP, DL);
  5466. continue;
  5467. }
  5468. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(subsUser)) {
  5469. Value *GEPOffset =
  5470. HLMatrixLower::LowerGEPOnMatIndexListToIndex(GEP, idxList);
  5471. for (auto gepU = GEP->user_begin(); gepU != GEP->user_end();) {
  5472. Instruction *gepUserInst = cast<Instruction>(*(gepU++));
  5473. TranslateStructBufSubscriptUser(gepUserInst, handle, bufIdx, GEPOffset,
  5474. status, hlslOP, DL);
  5475. }
  5476. GEP->eraseFromParent();
  5477. } else if (StoreInst *stUser = dyn_cast<StoreInst>(subsUser)) {
  5478. IRBuilder<> stBuilder(stUser);
  5479. Value *Val = stUser->getValueOperand();
  5480. if (Val->getType()->isVectorTy()) {
  5481. for (unsigned i = 0; i < resultSize; i++) {
  5482. Value *EltVal = stBuilder.CreateExtractElement(Val, i);
  5483. uint8_t mask = DXIL::kCompMask_X;
  5484. GenerateStructBufSt(handle, bufIdx, idxList[i], EltTy, hlslOP,
  5485. stBuilder, {EltVal, undefElt, undefElt, undefElt},
  5486. mask, alignment);
  5487. }
  5488. } else {
  5489. uint8_t mask = DXIL::kCompMask_X;
  5490. GenerateStructBufSt(handle, bufIdx, idxList[0], EltTy, hlslOP,
  5491. stBuilder, {Val, undefElt, undefElt, undefElt},
  5492. mask, alignment);
  5493. }
  5494. stUser->eraseFromParent();
  5495. } else {
  5496. // Must be load here.
  5497. LoadInst *ldUser = cast<LoadInst>(subsUser);
  5498. IRBuilder<> ldBuilder(ldUser);
  5499. Value *ldData = UndefValue::get(resultType);
  5500. if (resultType->isVectorTy()) {
  5501. for (unsigned i = 0; i < resultSize; i++) {
  5502. Value *ResultElt;
  5503. // TODO: This can be inefficient for row major matrix load
  5504. GenerateStructBufLd(handle, bufIdx, idxList[i],
  5505. /*status*/ nullptr, EltTy, ResultElt, hlslOP,
  5506. ldBuilder, 1, alignment);
  5507. ldData = ldBuilder.CreateInsertElement(ldData, ResultElt, i);
  5508. }
  5509. } else {
  5510. GenerateStructBufLd(handle, bufIdx, idxList[0], /*status*/ nullptr,
  5511. EltTy, ldData, hlslOP, ldBuilder, 4, alignment);
  5512. }
  5513. ldUser->replaceAllUsesWith(ldData);
  5514. ldUser->eraseFromParent();
  5515. }
  5516. }
  5517. CI->eraseFromParent();
  5518. }
  5519. void TranslateStructBufSubscriptUser(Instruction *user, Value *handle,
  5520. Value *bufIdx, Value *baseOffset,
  5521. Value *status, hlsl::OP *OP, const DataLayout &DL) {
  5522. IRBuilder<> Builder(user);
  5523. if (CallInst *userCall = dyn_cast<CallInst>(user)) {
  5524. HLOpcodeGroup group = // user call?
  5525. hlsl::GetHLOpcodeGroupByName(userCall->getCalledFunction());
  5526. unsigned opcode = GetHLOpcode(userCall);
  5527. // For case element type of structure buffer is not structure type.
  5528. if (baseOffset == nullptr)
  5529. baseOffset = OP->GetU32Const(0);
  5530. if (group == HLOpcodeGroup::HLIntrinsic) {
  5531. IntrinsicOp IOP = static_cast<IntrinsicOp>(opcode);
  5532. switch (IOP) {
  5533. case IntrinsicOp::MOP_Load: {
  5534. if (userCall->getType()->isPointerTy()) {
  5535. // Struct will return pointers which like []
  5536. } else {
  5537. // Use builtin types on structuredBuffer.
  5538. }
  5539. DXASSERT(0, "not implement yet");
  5540. } break;
  5541. case IntrinsicOp::IOP_InterlockedAdd: {
  5542. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  5543. baseOffset);
  5544. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Add,
  5545. Builder, OP);
  5546. } break;
  5547. case IntrinsicOp::IOP_InterlockedAnd: {
  5548. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  5549. baseOffset);
  5550. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::And,
  5551. Builder, OP);
  5552. } break;
  5553. case IntrinsicOp::IOP_InterlockedExchange: {
  5554. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  5555. baseOffset);
  5556. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Exchange,
  5557. Builder, OP);
  5558. } break;
  5559. case IntrinsicOp::IOP_InterlockedMax: {
  5560. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  5561. baseOffset);
  5562. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMax,
  5563. Builder, OP);
  5564. } break;
  5565. case IntrinsicOp::IOP_InterlockedMin: {
  5566. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  5567. baseOffset);
  5568. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMin,
  5569. Builder, OP);
  5570. } break;
  5571. case IntrinsicOp::IOP_InterlockedUMax: {
  5572. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  5573. baseOffset);
  5574. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMax,
  5575. Builder, OP);
  5576. } break;
  5577. case IntrinsicOp::IOP_InterlockedUMin: {
  5578. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  5579. baseOffset);
  5580. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMin,
  5581. Builder, OP);
  5582. } break;
  5583. case IntrinsicOp::IOP_InterlockedOr: {
  5584. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  5585. baseOffset);
  5586. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Or,
  5587. Builder, OP);
  5588. } break;
  5589. case IntrinsicOp::IOP_InterlockedXor: {
  5590. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  5591. baseOffset);
  5592. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Xor,
  5593. Builder, OP);
  5594. } break;
  5595. case IntrinsicOp::IOP_InterlockedCompareStore:
  5596. case IntrinsicOp::IOP_InterlockedCompareExchange: {
  5597. AtomicHelper helper(userCall, DXIL::OpCode::AtomicCompareExchange,
  5598. handle, bufIdx, baseOffset);
  5599. TranslateAtomicCmpXChg(helper, Builder, OP);
  5600. } break;
  5601. default:
  5602. DXASSERT(0, "invalid opcode");
  5603. break;
  5604. }
  5605. userCall->eraseFromParent();
  5606. } else if (group == HLOpcodeGroup::HLMatLoadStore)
  5607. // TODO: support 64 bit.
  5608. TranslateStructBufMatLdSt(userCall, handle, OP, status, bufIdx,
  5609. baseOffset, DL);
  5610. else if (group == HLOpcodeGroup::HLSubscript) {
  5611. TranslateStructBufMatSubscript(userCall, handle, OP, bufIdx, baseOffset, status, DL);
  5612. }
  5613. } else if (isa<LoadInst>(user) || isa<StoreInst>(user)) {
  5614. LoadInst *ldInst = dyn_cast<LoadInst>(user);
  5615. StoreInst *stInst = dyn_cast<StoreInst>(user);
  5616. Type *Ty = isa<LoadInst>(user) ? ldInst->getType()
  5617. : stInst->getValueOperand()->getType();
  5618. Type *pOverloadTy = Ty->getScalarType();
  5619. Value *offset = baseOffset;
  5620. if (baseOffset == nullptr)
  5621. offset = OP->GetU32Const(0);
  5622. unsigned arraySize = 1;
  5623. Value *eltSize = nullptr;
  5624. if (pOverloadTy->isArrayTy()) {
  5625. arraySize = pOverloadTy->getArrayNumElements();
  5626. eltSize = OP->GetU32Const(
  5627. DL.getTypeAllocSize(pOverloadTy->getArrayElementType()));
  5628. pOverloadTy = pOverloadTy->getArrayElementType()->getScalarType();
  5629. }
  5630. if (ldInst) {
  5631. auto LdElement = [&](Value *offset, IRBuilder<> &Builder) -> Value * {
  5632. Value *ResultElts[4];
  5633. unsigned numComponents = 0;
  5634. if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
  5635. numComponents = VTy->getNumElements();
  5636. }
  5637. else {
  5638. numComponents = 1;
  5639. }
  5640. Constant *alignment =
  5641. OP->GetI32Const(DL.getTypeAllocSize(Ty->getScalarType()));
  5642. GenerateStructBufLd(handle, bufIdx, offset, status, pOverloadTy,
  5643. ResultElts, OP, Builder, numComponents, alignment);
  5644. return ScalarizeElements(Ty, ResultElts, Builder);
  5645. };
  5646. Value *newLd = LdElement(offset, Builder);
  5647. if (arraySize > 1) {
  5648. newLd =
  5649. Builder.CreateInsertValue(UndefValue::get(Ty), newLd, (uint64_t)0);
  5650. for (unsigned i = 1; i < arraySize; i++) {
  5651. offset = Builder.CreateAdd(offset, eltSize);
  5652. Value *eltLd = LdElement(offset, Builder);
  5653. newLd = Builder.CreateInsertValue(newLd, eltLd, i);
  5654. }
  5655. }
  5656. ldInst->replaceAllUsesWith(newLd);
  5657. } else {
  5658. Value *val = stInst->getValueOperand();
  5659. auto StElement = [&](Value *offset, Value *val, IRBuilder<> &Builder) {
  5660. Value *undefVal = llvm::UndefValue::get(pOverloadTy);
  5661. Value *vals[] = {undefVal, undefVal, undefVal, undefVal};
  5662. uint8_t mask = 0;
  5663. if (Ty->isVectorTy()) {
  5664. unsigned vectorNumElements = Ty->getVectorNumElements();
  5665. DXASSERT(vectorNumElements <= 4, "up to 4 elements in vector");
  5666. _Analysis_assume_(vectorNumElements <= 4);
  5667. for (unsigned i = 0; i < vectorNumElements; i++) {
  5668. vals[i] = Builder.CreateExtractElement(val, i);
  5669. mask |= (1<<i);
  5670. }
  5671. } else {
  5672. vals[0] = val;
  5673. mask = DXIL::kCompMask_X;
  5674. }
  5675. Constant *alignment =
  5676. OP->GetI32Const(DL.getTypeAllocSize(Ty->getScalarType()));
  5677. GenerateStructBufSt(handle, bufIdx, offset, pOverloadTy, OP, Builder,
  5678. vals, mask, alignment);
  5679. };
  5680. if (arraySize > 1)
  5681. val = Builder.CreateExtractValue(val, 0);
  5682. StElement(offset, val, Builder);
  5683. if (arraySize > 1) {
  5684. val = stInst->getValueOperand();
  5685. for (unsigned i = 1; i < arraySize; i++) {
  5686. offset = Builder.CreateAdd(offset, eltSize);
  5687. Value *eltVal = Builder.CreateExtractValue(val, i);
  5688. StElement(offset, eltVal, Builder);
  5689. }
  5690. }
  5691. }
  5692. user->eraseFromParent();
  5693. } else {
  5694. // should only used by GEP
  5695. GetElementPtrInst *GEP = cast<GetElementPtrInst>(user);
  5696. Type *Ty = GEP->getType()->getPointerElementType();
  5697. Value *offset = GEPIdxToOffset(GEP, Builder, OP, DL);
  5698. DXASSERT_LOCALVAR(Ty, offset->getType() == Type::getInt32Ty(Ty->getContext()),
  5699. "else bitness is wrong");
  5700. if (baseOffset)
  5701. offset = Builder.CreateAdd(offset, baseOffset);
  5702. for (auto U = GEP->user_begin(); U != GEP->user_end();) {
  5703. Value *GEPUser = *(U++);
  5704. TranslateStructBufSubscriptUser(cast<Instruction>(GEPUser), handle,
  5705. bufIdx, offset, status, OP, DL);
  5706. }
  5707. // delete the inst
  5708. GEP->eraseFromParent();
  5709. }
  5710. }
  5711. void TranslateStructBufSubscript(CallInst *CI, Value *handle, Value *status,
  5712. hlsl::OP *OP, const DataLayout &DL) {
  5713. Value *bufIdx = CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx);
  5714. for (auto U = CI->user_begin(); U != CI->user_end();) {
  5715. Value *user = *(U++);
  5716. TranslateStructBufSubscriptUser(cast<Instruction>(user), handle, bufIdx,
  5717. /*baseOffset*/ nullptr, status, OP, DL);
  5718. }
  5719. }
  5720. }
  5721. // HLSubscript.
  5722. namespace {
  5723. Value *TranslateTypedBufLoad(CallInst *CI, DXIL::ResourceKind RK,
  5724. DXIL::ResourceClass RC, Value *handle,
  5725. LoadInst *ldInst, IRBuilder<> &Builder,
  5726. hlsl::OP *hlslOP, const DataLayout &DL) {
  5727. ResLoadHelper ldHelper(CI, RK, RC, handle, IntrinsicOp::MOP_Load, /*bForSubscript*/ true);
  5728. // Default sampleIdx for 2DMS textures.
  5729. if (RK == DxilResource::Kind::Texture2DMS ||
  5730. RK == DxilResource::Kind::Texture2DMSArray)
  5731. ldHelper.mipLevel = hlslOP->GetU32Const(0);
  5732. // use ldInst as retVal
  5733. ldHelper.retVal = ldInst;
  5734. TranslateLoad(ldHelper, RK, Builder, hlslOP, DL);
  5735. // delete the ld
  5736. ldInst->eraseFromParent();
  5737. return ldHelper.retVal;
  5738. }
  5739. Value *UpdateVectorElt(Value *VecVal, Value *EltVal, Value *EltIdx,
  5740. unsigned vectorSize, Instruction *InsertPt) {
  5741. IRBuilder<> Builder(InsertPt);
  5742. if (ConstantInt *CEltIdx = dyn_cast<ConstantInt>(EltIdx)) {
  5743. VecVal =
  5744. Builder.CreateInsertElement(VecVal, EltVal, CEltIdx->getLimitedValue());
  5745. } else {
  5746. BasicBlock *BB = InsertPt->getParent();
  5747. BasicBlock *EndBB = BB->splitBasicBlock(InsertPt);
  5748. TerminatorInst *TI = BB->getTerminator();
  5749. IRBuilder<> SwitchBuilder(TI);
  5750. LLVMContext &Ctx = InsertPt->getContext();
  5751. SwitchInst *Switch = SwitchBuilder.CreateSwitch(EltIdx, EndBB, vectorSize);
  5752. TI->eraseFromParent();
  5753. Function *F = EndBB->getParent();
  5754. IRBuilder<> endSwitchBuilder(EndBB->begin());
  5755. Type *Ty = VecVal->getType();
  5756. PHINode *VecPhi = endSwitchBuilder.CreatePHI(Ty, vectorSize + 1);
  5757. for (unsigned i = 0; i < vectorSize; i++) {
  5758. BasicBlock *CaseBB = BasicBlock::Create(Ctx, "case", F, EndBB);
  5759. Switch->addCase(SwitchBuilder.getInt32(i), CaseBB);
  5760. IRBuilder<> CaseBuilder(CaseBB);
  5761. Value *CaseVal = CaseBuilder.CreateInsertElement(VecVal, EltVal, i);
  5762. VecPhi->addIncoming(CaseVal, CaseBB);
  5763. CaseBuilder.CreateBr(EndBB);
  5764. }
  5765. VecPhi->addIncoming(VecVal, BB);
  5766. VecVal = VecPhi;
  5767. }
  5768. return VecVal;
  5769. }
  5770. void TranslateDefaultSubscript(CallInst *CI, HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  5771. auto U = CI->user_begin();
  5772. Value *ptr = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx);
  5773. hlsl::OP *hlslOP = &helper.hlslOP;
  5774. // Resource ptr.
  5775. Value *handle = ptr;
  5776. DXIL::ResourceClass RC = pObjHelper->GetRC(handle);
  5777. DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
  5778. Type *Ty = CI->getType()->getPointerElementType();
  5779. for (auto It = CI->user_begin(); It != CI->user_end(); ) {
  5780. User *user = *(It++);
  5781. Instruction *I = cast<Instruction>(user);
  5782. IRBuilder<> Builder(I);
  5783. if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) {
  5784. TranslateTypedBufLoad(CI, RK, RC, handle, ldInst, Builder, hlslOP, helper.dataLayout);
  5785. } else if (StoreInst *stInst = dyn_cast<StoreInst>(user)) {
  5786. Value *val = stInst->getValueOperand();
  5787. TranslateStore(RK, handle, val,
  5788. CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx),
  5789. Builder, hlslOP);
  5790. // delete the st
  5791. stInst->eraseFromParent();
  5792. } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(user)) {
  5793. // Must be vector type here.
  5794. unsigned vectorSize = Ty->getVectorNumElements();
  5795. DXASSERT(GEP->getNumIndices() == 2, "");
  5796. Use *GEPIdx = GEP->idx_begin();
  5797. GEPIdx++;
  5798. Value *EltIdx = *GEPIdx;
  5799. for (auto GEPIt = GEP->user_begin(); GEPIt != GEP->user_end();) {
  5800. User *GEPUser = *(GEPIt++);
  5801. if (StoreInst *SI = dyn_cast<StoreInst>(GEPUser)) {
  5802. IRBuilder<> StBuilder(SI);
  5803. // Generate Ld.
  5804. LoadInst *tmpLd = StBuilder.CreateLoad(CI);
  5805. Value *ldVal = TranslateTypedBufLoad(CI, RK, RC, handle, tmpLd, StBuilder,
  5806. hlslOP, helper.dataLayout);
  5807. // Update vector.
  5808. ldVal = UpdateVectorElt(ldVal, SI->getValueOperand(), EltIdx,
  5809. vectorSize, SI);
  5810. // Generate St.
  5811. // Reset insert point, UpdateVectorElt may move SI to different block.
  5812. StBuilder.SetInsertPoint(SI);
  5813. TranslateStore(RK, handle, ldVal,
  5814. CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx),
  5815. StBuilder, hlslOP);
  5816. SI->eraseFromParent();
  5817. continue;
  5818. }
  5819. if (!isa<CallInst>(GEPUser)) {
  5820. // Invalid operations.
  5821. Translated = false;
  5822. CI->getContext().emitError(GEP, "Invalid operation on typed buffer");
  5823. return;
  5824. }
  5825. CallInst *userCall = cast<CallInst>(GEPUser);
  5826. HLOpcodeGroup group =
  5827. hlsl::GetHLOpcodeGroupByName(userCall->getCalledFunction());
  5828. if (group != HLOpcodeGroup::HLIntrinsic) {
  5829. // Invalid operations.
  5830. Translated = false;
  5831. CI->getContext().emitError(userCall,
  5832. "Invalid operation on typed buffer");
  5833. return;
  5834. }
  5835. unsigned opcode = hlsl::GetHLOpcode(userCall);
  5836. IntrinsicOp IOP = static_cast<IntrinsicOp>(opcode);
  5837. switch (IOP) {
  5838. case IntrinsicOp::IOP_InterlockedAdd:
  5839. case IntrinsicOp::IOP_InterlockedAnd:
  5840. case IntrinsicOp::IOP_InterlockedExchange:
  5841. case IntrinsicOp::IOP_InterlockedMax:
  5842. case IntrinsicOp::IOP_InterlockedMin:
  5843. case IntrinsicOp::IOP_InterlockedUMax:
  5844. case IntrinsicOp::IOP_InterlockedUMin:
  5845. case IntrinsicOp::IOP_InterlockedOr:
  5846. case IntrinsicOp::IOP_InterlockedXor:
  5847. case IntrinsicOp::IOP_InterlockedCompareStore:
  5848. case IntrinsicOp::IOP_InterlockedCompareExchange: {
  5849. // Invalid operations.
  5850. Translated = false;
  5851. CI->getContext().emitError(
  5852. userCall, "Atomic operation on typed buffer is not supported");
  5853. return;
  5854. } break;
  5855. default:
  5856. // Invalid operations.
  5857. Translated = false;
  5858. CI->getContext().emitError(userCall,
  5859. "Invalid operation on typed buffer");
  5860. return;
  5861. break;
  5862. }
  5863. }
  5864. GEP->eraseFromParent();
  5865. } else {
  5866. CallInst *userCall = cast<CallInst>(user);
  5867. HLOpcodeGroup group =
  5868. hlsl::GetHLOpcodeGroupByName(userCall->getCalledFunction());
  5869. unsigned opcode = hlsl::GetHLOpcode(userCall);
  5870. if (group == HLOpcodeGroup::HLIntrinsic) {
  5871. IntrinsicOp IOP = static_cast<IntrinsicOp>(opcode);
  5872. if (RC == DXIL::ResourceClass::SRV) {
  5873. // Invalid operations.
  5874. Translated = false;
  5875. switch (IOP) {
  5876. case IntrinsicOp::IOP_InterlockedAdd:
  5877. case IntrinsicOp::IOP_InterlockedAnd:
  5878. case IntrinsicOp::IOP_InterlockedExchange:
  5879. case IntrinsicOp::IOP_InterlockedMax:
  5880. case IntrinsicOp::IOP_InterlockedMin:
  5881. case IntrinsicOp::IOP_InterlockedUMax:
  5882. case IntrinsicOp::IOP_InterlockedUMin:
  5883. case IntrinsicOp::IOP_InterlockedOr:
  5884. case IntrinsicOp::IOP_InterlockedXor:
  5885. case IntrinsicOp::IOP_InterlockedCompareStore:
  5886. case IntrinsicOp::IOP_InterlockedCompareExchange: {
  5887. CI->getContext().emitError(
  5888. userCall, "Atomic operation targets must be groupshared on UAV");
  5889. return;
  5890. } break;
  5891. default:
  5892. CI->getContext().emitError(userCall,
  5893. "Invalid operation on typed buffer");
  5894. return;
  5895. break;
  5896. }
  5897. }
  5898. switch (IOP) {
  5899. case IntrinsicOp::IOP_InterlockedAdd: {
  5900. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedAdd);
  5901. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  5902. helper.addr, /*offset*/ nullptr);
  5903. TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::Add,
  5904. Builder, hlslOP);
  5905. } break;
  5906. case IntrinsicOp::IOP_InterlockedAnd: {
  5907. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedAnd);
  5908. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  5909. helper.addr, /*offset*/ nullptr);
  5910. TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::And,
  5911. Builder, hlslOP);
  5912. } break;
  5913. case IntrinsicOp::IOP_InterlockedExchange: {
  5914. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedExchange);
  5915. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  5916. helper.addr, /*offset*/ nullptr);
  5917. TranslateAtomicBinaryOperation(
  5918. atomHelper, DXIL::AtomicBinOpCode::Exchange, Builder, hlslOP);
  5919. } break;
  5920. case IntrinsicOp::IOP_InterlockedMax: {
  5921. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedMax);
  5922. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  5923. helper.addr, /*offset*/ nullptr);
  5924. TranslateAtomicBinaryOperation(
  5925. atomHelper, DXIL::AtomicBinOpCode::IMax, Builder, hlslOP);
  5926. } break;
  5927. case IntrinsicOp::IOP_InterlockedMin: {
  5928. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedMin);
  5929. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  5930. helper.addr, /*offset*/ nullptr);
  5931. TranslateAtomicBinaryOperation(
  5932. atomHelper, DXIL::AtomicBinOpCode::IMin, Builder, hlslOP);
  5933. } break;
  5934. case IntrinsicOp::IOP_InterlockedUMax: {
  5935. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedUMax);
  5936. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  5937. helper.addr, /*offset*/ nullptr);
  5938. TranslateAtomicBinaryOperation(
  5939. atomHelper, DXIL::AtomicBinOpCode::UMax, Builder, hlslOP);
  5940. } break;
  5941. case IntrinsicOp::IOP_InterlockedUMin: {
  5942. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedUMin);
  5943. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  5944. helper.addr, /*offset*/ nullptr);
  5945. TranslateAtomicBinaryOperation(
  5946. atomHelper, DXIL::AtomicBinOpCode::UMin, Builder, hlslOP);
  5947. } break;
  5948. case IntrinsicOp::IOP_InterlockedOr: {
  5949. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedOr);
  5950. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  5951. helper.addr, /*offset*/ nullptr);
  5952. TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::Or,
  5953. Builder, hlslOP);
  5954. } break;
  5955. case IntrinsicOp::IOP_InterlockedXor: {
  5956. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedXor);
  5957. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  5958. helper.addr, /*offset*/ nullptr);
  5959. TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::Xor,
  5960. Builder, hlslOP);
  5961. } break;
  5962. case IntrinsicOp::IOP_InterlockedCompareStore:
  5963. case IntrinsicOp::IOP_InterlockedCompareExchange: {
  5964. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedCompareExchange);
  5965. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicCompareExchange,
  5966. handle, helper.addr, /*offset*/ nullptr);
  5967. TranslateAtomicCmpXChg(atomHelper, Builder, hlslOP);
  5968. } break;
  5969. default:
  5970. DXASSERT(0, "invalid opcode");
  5971. break;
  5972. }
  5973. } else {
  5974. DXASSERT(0, "invalid group");
  5975. }
  5976. userCall->eraseFromParent();
  5977. }
  5978. }
  5979. }
  5980. void TranslateHLSubscript(CallInst *CI, HLSubscriptOpcode opcode,
  5981. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  5982. if (CI->user_empty()) {
  5983. Translated = true;
  5984. return;
  5985. }
  5986. hlsl::OP *hlslOP = &helper.hlslOP;
  5987. Value *ptr = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx);
  5988. if (opcode == HLSubscriptOpcode::CBufferSubscript) {
  5989. HLModule::MergeGepUse(CI);
  5990. // Resource ptr.
  5991. Value *handle = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx);
  5992. if (helper.bLegacyCBufferLoad)
  5993. TranslateCBOperationsLegacy(handle, CI, hlslOP, helper.dxilTypeSys,
  5994. helper.dataLayout, pObjHelper);
  5995. else {
  5996. TranslateCBOperations(handle, CI, /*offset*/ hlslOP->GetU32Const(0),
  5997. hlslOP, helper.dxilTypeSys,
  5998. CI->getModule()->getDataLayout());
  5999. }
  6000. Translated = true;
  6001. return;
  6002. } else if (opcode == HLSubscriptOpcode::DoubleSubscript) {
  6003. // Resource ptr.
  6004. Value *handle = ptr;
  6005. DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
  6006. Value *coord = CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx);
  6007. Value *mipLevel =
  6008. CI->getArgOperand(HLOperandIndex::kDoubleSubscriptMipLevelOpIdx);
  6009. auto U = CI->user_begin();
  6010. DXASSERT(CI->hasOneUse(), "subscript should only has one use");
  6011. // TODO: support store.
  6012. Instruction *ldInst = cast<Instruction>(*U);
  6013. ResLoadHelper ldHelper(ldInst, handle, coord, mipLevel);
  6014. IRBuilder<> Builder(CI);
  6015. TranslateLoad(ldHelper, RK, Builder, hlslOP, helper.dataLayout);
  6016. ldInst->eraseFromParent();
  6017. Translated = true;
  6018. return;
  6019. } else {
  6020. Type *HandleTy = hlslOP->GetHandleType();
  6021. if (ptr->getType() == HandleTy) {
  6022. // Resource ptr.
  6023. Value *handle = ptr;
  6024. DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
  6025. if (RK == DxilResource::Kind::Invalid) {
  6026. Translated = false;
  6027. return;
  6028. }
  6029. Translated = true;
  6030. Type *ObjTy = pObjHelper->GetResourceType(handle);
  6031. Type *RetTy = ObjTy->getStructElementType(0);
  6032. if (RK == DxilResource::Kind::StructuredBuffer) {
  6033. TranslateStructBufSubscript(CI, handle, /*status*/ nullptr, hlslOP,
  6034. helper.dataLayout);
  6035. } else if (RetTy->isAggregateType() &&
  6036. RK == DxilResource::Kind::TypedBuffer) {
  6037. TranslateStructBufSubscript(CI, handle, /*status*/ nullptr, hlslOP,
  6038. helper.dataLayout);
  6039. // Clear offset for typed buf.
  6040. for (auto User : handle->users()) {
  6041. CallInst *CI = cast<CallInst>(User);
  6042. // Skip not lowered HL functions.
  6043. if (hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction()) != HLOpcodeGroup::NotHL)
  6044. continue;
  6045. switch (hlslOP->GetDxilOpFuncCallInst(CI)) {
  6046. case DXIL::OpCode::BufferLoad: {
  6047. CI->setArgOperand(DXIL::OperandIndex::kBufferLoadCoord1OpIdx,
  6048. UndefValue::get(helper.i32Ty));
  6049. } break;
  6050. case DXIL::OpCode::BufferStore: {
  6051. CI->setArgOperand(DXIL::OperandIndex::kBufferStoreCoord1OpIdx,
  6052. UndefValue::get(helper.i32Ty));
  6053. } break;
  6054. case DXIL::OpCode::AtomicBinOp: {
  6055. CI->setArgOperand(DXIL::OperandIndex::kAtomicBinOpCoord1OpIdx,
  6056. UndefValue::get(helper.i32Ty));
  6057. } break;
  6058. case DXIL::OpCode::AtomicCompareExchange: {
  6059. CI->setArgOperand(DXIL::OperandIndex::kAtomicCmpExchangeCoord1OpIdx,
  6060. UndefValue::get(helper.i32Ty));
  6061. } break;
  6062. case DXIL::OpCode::RawBufferLoad: {
  6063. // Structured buffer inside a typed buffer must be converted to typed buffer load.
  6064. // Typed buffer load is equivalent to raw buffer load, except there is no mask.
  6065. StructType *STy = cast<StructType>(CI->getFunctionType()->getReturnType());
  6066. Type *ETy = STy->getElementType(0);
  6067. SmallVector<Value *, 4> Args;
  6068. Args.emplace_back(hlslOP->GetI32Const((unsigned)DXIL::OpCode::BufferLoad));
  6069. Args.emplace_back(CI->getArgOperand(1)); // handle
  6070. Args.emplace_back(CI->getArgOperand(2)); // index
  6071. Args.emplace_back(UndefValue::get(helper.i32Ty)); // offset
  6072. IRBuilder<> builder(CI);
  6073. Function *newFunction = hlslOP->GetOpFunc(DXIL::OpCode::BufferLoad, ETy);
  6074. CallInst *newCall = builder.CreateCall(newFunction, Args);
  6075. CI->replaceAllUsesWith(newCall);
  6076. CI->eraseFromParent();
  6077. } break;
  6078. default:
  6079. DXASSERT(0, "Invalid operation on resource handle");
  6080. break;
  6081. }
  6082. }
  6083. } else {
  6084. TranslateDefaultSubscript(CI, helper, pObjHelper, Translated);
  6085. }
  6086. return;
  6087. }
  6088. }
  6089. Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx);
  6090. if (IsLocalVariablePtr(basePtr) || IsSharedMemPtr(basePtr)) {
  6091. // Translate matrix into vector of array for share memory or local
  6092. // variable should be done in HLMatrixLowerPass
  6093. DXASSERT_NOMSG(0);
  6094. Translated = true;
  6095. return;
  6096. }
  6097. // Other case should be take care in TranslateStructBufSubscript or
  6098. // TranslateCBOperations.
  6099. Translated = false;
  6100. return;
  6101. }
  6102. }
  6103. void TranslateSubscriptOperation(Function *F, HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper) {
  6104. for (auto U = F->user_begin(); U != F->user_end();) {
  6105. Value *user = *(U++);
  6106. if (!isa<Instruction>(user))
  6107. continue;
  6108. // must be call inst
  6109. CallInst *CI = cast<CallInst>(user);
  6110. unsigned opcode = GetHLOpcode(CI);
  6111. bool Translated = true;
  6112. TranslateHLSubscript(
  6113. CI, static_cast<HLSubscriptOpcode>(opcode), helper, pObjHelper, Translated);
  6114. if (Translated) {
  6115. // delete the call
  6116. DXASSERT(CI->use_empty(),
  6117. "else TranslateHLSubscript didn't replace/erase uses");
  6118. CI->eraseFromParent();
  6119. }
  6120. }
  6121. }
  6122. // Create BitCast if ptr, otherwise, create alloca of new type, write to bitcast of alloca, and return load from alloca
  6123. // If bOrigAllocaTy is true: create alloca of old type instead, write to alloca, and return load from bitcast of alloca
  6124. static Instruction *BitCastValueOrPtr(Value* V, Instruction *Insert, Type *Ty, bool bOrigAllocaTy = false, const Twine &Name = "") {
  6125. IRBuilder<> Builder(Insert);
  6126. if (Ty->isPointerTy()) {
  6127. // If pointer, we can bitcast directly
  6128. return cast<Instruction>(Builder.CreateBitCast(V, Ty, Name));
  6129. } else {
  6130. // If value, we have to alloca, store to bitcast ptr, and load
  6131. IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Insert));
  6132. Type *allocaTy = bOrigAllocaTy ? V->getType() : Ty;
  6133. Type *otherTy = bOrigAllocaTy ? Ty : V->getType();
  6134. Instruction *allocaInst = AllocaBuilder.CreateAlloca(allocaTy);
  6135. Instruction *bitCast = cast<Instruction>(Builder.CreateBitCast(allocaInst, otherTy->getPointerTo()));
  6136. Builder.CreateStore(V, bOrigAllocaTy ? allocaInst : bitCast);
  6137. return Builder.CreateLoad(bOrigAllocaTy ? bitCast : allocaInst, Name);
  6138. }
  6139. }
  6140. static Instruction *CreateTransposeShuffle(IRBuilder<> &Builder, Value *vecVal, unsigned toRows, unsigned toCols) {
  6141. SmallVector<int, 16> castMask(toCols * toRows);
  6142. unsigned idx = 0;
  6143. for (unsigned r = 0; r < toRows; r++)
  6144. for (unsigned c = 0; c < toCols; c++)
  6145. castMask[idx++] = c * toRows + r;
  6146. return cast<Instruction>(
  6147. Builder.CreateShuffleVector(vecVal, vecVal, castMask));
  6148. }
  6149. void TranslateHLBuiltinOperation(Function *F, HLOperationLowerHelper &helper,
  6150. hlsl::HLOpcodeGroup group, HLObjectOperationLowerHelper *pObjHelper) {
  6151. if (group == HLOpcodeGroup::HLIntrinsic) {
  6152. // map to dxil operations
  6153. for (auto U = F->user_begin(); U != F->user_end();) {
  6154. Value *User = *(U++);
  6155. if (!isa<Instruction>(User))
  6156. continue;
  6157. // must be call inst
  6158. CallInst *CI = cast<CallInst>(User);
  6159. // Keep the instruction to lower by other function.
  6160. bool Translated = true;
  6161. TranslateBuiltinIntrinsic(CI, helper, pObjHelper, Translated);
  6162. if (Translated) {
  6163. // delete the call
  6164. DXASSERT(CI->use_empty(),
  6165. "else TranslateBuiltinIntrinsic didn't replace/erase uses");
  6166. CI->eraseFromParent();
  6167. }
  6168. }
  6169. } else {
  6170. if (group == HLOpcodeGroup::HLMatLoadStore) {
  6171. // Both ld/st use arg1 for the pointer.
  6172. Type *PtrTy =
  6173. F->getFunctionType()->getParamType(HLOperandIndex::kMatLoadPtrOpIdx);
  6174. if (PtrTy->getPointerAddressSpace() == DXIL::kTGSMAddrSpace) {
  6175. // Translate matrix into vector of array for shared memory
  6176. // variable should be done in HLMatrixLowerPass.
  6177. if (!F->user_empty())
  6178. F->getContext().emitError("Fail to lower matrix load/store.");
  6179. } else if (PtrTy->getPointerAddressSpace() == DXIL::kDefaultAddrSpace) {
  6180. // Default address space may be function argument in lib target
  6181. if (!F->user_empty()) {
  6182. for (auto U = F->user_begin(); U != F->user_end();) {
  6183. Value *User = *(U++);
  6184. if (!isa<Instruction>(User))
  6185. continue;
  6186. // must be call inst
  6187. CallInst *CI = cast<CallInst>(User);
  6188. IRBuilder<> Builder(CI);
  6189. HLMatLoadStoreOpcode opcode = static_cast<HLMatLoadStoreOpcode>(hlsl::GetHLOpcode(CI));
  6190. switch (opcode) {
  6191. case HLMatLoadStoreOpcode::ColMatStore:
  6192. case HLMatLoadStoreOpcode::RowMatStore: {
  6193. Value *vecVal = CI->getArgOperand(HLOperandIndex::kMatStoreValOpIdx);
  6194. Value *matPtr = CI->getArgOperand(HLOperandIndex::kMatStoreDstPtrOpIdx);
  6195. Value *castPtr = Builder.CreateBitCast(matPtr, vecVal->getType()->getPointerTo());
  6196. Builder.CreateStore(vecVal, castPtr);
  6197. CI->eraseFromParent();
  6198. } break;
  6199. case HLMatLoadStoreOpcode::ColMatLoad:
  6200. case HLMatLoadStoreOpcode::RowMatLoad: {
  6201. Value *matPtr = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx);
  6202. Value *castPtr = Builder.CreateBitCast(matPtr, CI->getType()->getPointerTo());
  6203. Value *vecVal = Builder.CreateLoad(castPtr);
  6204. CI->replaceAllUsesWith(vecVal);
  6205. CI->eraseFromParent();
  6206. } break;
  6207. }
  6208. }
  6209. }
  6210. }
  6211. } else if (group == HLOpcodeGroup::HLCast) {
  6212. // HLCast may be used on matrix value function argument in lib target
  6213. if (!F->user_empty()) {
  6214. for (auto U = F->user_begin(); U != F->user_end();) {
  6215. Value *User = *(U++);
  6216. if (!isa<Instruction>(User))
  6217. continue;
  6218. // must be call inst
  6219. CallInst *CI = cast<CallInst>(User);
  6220. IRBuilder<> Builder(CI);
  6221. HLCastOpcode opcode = static_cast<HLCastOpcode>(hlsl::GetHLOpcode(CI));
  6222. bool bTranspose = false;
  6223. bool bColDest = false;
  6224. switch (opcode) {
  6225. case HLCastOpcode::RowMatrixToColMatrix:
  6226. bColDest = true;
  6227. case HLCastOpcode::ColMatrixToRowMatrix:
  6228. bTranspose = true;
  6229. case HLCastOpcode::ColMatrixToVecCast:
  6230. case HLCastOpcode::RowMatrixToVecCast: {
  6231. Value *matVal = CI->getArgOperand(HLOperandIndex::kInitFirstArgOpIdx);
  6232. Value *vecVal = BitCastValueOrPtr(matVal, CI, CI->getType(),
  6233. /*bOrigAllocaTy*/false,
  6234. matVal->getName());
  6235. if (bTranspose) {
  6236. unsigned row, col;
  6237. HLMatrixLower::GetMatrixInfo(matVal->getType(), col, row);
  6238. if (bColDest) std::swap(row, col);
  6239. vecVal = CreateTransposeShuffle(Builder, vecVal, row, col);
  6240. }
  6241. CI->replaceAllUsesWith(vecVal);
  6242. CI->eraseFromParent();
  6243. } break;
  6244. }
  6245. }
  6246. }
  6247. } else if (group == HLOpcodeGroup::HLSubscript) {
  6248. TranslateSubscriptOperation(F, helper, pObjHelper);
  6249. }
  6250. // map to math function or llvm ir
  6251. }
  6252. }
  6253. typedef std::unordered_map<llvm::Instruction *, llvm::Value *> HandleMap;
  6254. static void TranslateHLExtension(Function *F,
  6255. HLSLExtensionsCodegenHelper *helper,
  6256. OP& hlslOp) {
  6257. // Find all calls to the function F.
  6258. // Store the calls in a vector for now to be replaced the loop below.
  6259. // We use a two step "find then replace" to avoid removing uses while
  6260. // iterating.
  6261. SmallVector<CallInst *, 8> CallsToReplace;
  6262. for (User *U : F->users()) {
  6263. if (CallInst *CI = dyn_cast<CallInst>(U)) {
  6264. CallsToReplace.push_back(CI);
  6265. }
  6266. }
  6267. // Get the lowering strategy to use for this intrinsic.
  6268. llvm::StringRef LowerStrategy = GetHLLowerStrategy(F);
  6269. ExtensionLowering lower(LowerStrategy, helper, hlslOp);
  6270. // Replace all calls that were successfully translated.
  6271. for (CallInst *CI : CallsToReplace) {
  6272. Value *Result = lower.Translate(CI);
  6273. if (Result && Result != CI) {
  6274. CI->replaceAllUsesWith(Result);
  6275. CI->eraseFromParent();
  6276. }
  6277. }
  6278. }
  6279. namespace hlsl {
  6280. void TranslateBuiltinOperations(
  6281. HLModule &HLM, HLSLExtensionsCodegenHelper *extCodegenHelper,
  6282. std::unordered_set<LoadInst *> &UpdateCounterSet,
  6283. std::unordered_set<Value *> &NonUniformSet) {
  6284. HLOperationLowerHelper helper(HLM);
  6285. HLObjectOperationLowerHelper objHelper = {HLM, UpdateCounterSet,
  6286. NonUniformSet};
  6287. Module *M = HLM.GetModule();
  6288. // generate dxil operation
  6289. for (iplist<Function>::iterator F : M->getFunctionList()) {
  6290. if (F->user_empty())
  6291. continue;
  6292. if (!F->isDeclaration()) {
  6293. continue;
  6294. }
  6295. hlsl::HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(F);
  6296. if (group == HLOpcodeGroup::NotHL) {
  6297. // Nothing to do.
  6298. continue;
  6299. }
  6300. if (group == HLOpcodeGroup::HLExtIntrinsic) {
  6301. TranslateHLExtension(F, extCodegenHelper, helper.hlslOP);
  6302. continue;
  6303. }
  6304. TranslateHLBuiltinOperation(F, helper, group, &objHelper);
  6305. }
  6306. }
  6307. }