HLOperationLower.cpp 323 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796
  1. ///////////////////////////////////////////////////////////////////////////////
  2. // //
  3. // HLOperationLower.cpp //
  4. // Copyright (C) Microsoft Corporation. All rights reserved. //
  5. // This file is distributed under the University of Illinois Open Source //
  6. // License. See LICENSE.TXT for details. //
  7. // //
  8. // Lower functions to lower HL operations to DXIL operations. //
  9. // //
  10. ///////////////////////////////////////////////////////////////////////////////
  11. #define _USE_MATH_DEFINES
  12. #include <array>
  13. #include <cmath>
  14. #include <unordered_set>
  15. #include <functional>
  16. #include "dxc/DXIL/DxilModule.h"
  17. #include "dxc/DXIL/DxilOperations.h"
  18. #include "dxc/HLSL/HLMatrixLowerHelper.h"
  19. #include "dxc/HLSL/HLMatrixType.h"
  20. #include "dxc/HLSL/HLModule.h"
  21. #include "dxc/DXIL/DxilUtil.h"
  22. #include "dxc/HLSL/HLOperationLower.h"
  23. #include "dxc/HLSL/HLOperationLowerExtension.h"
  24. #include "dxc/HLSL/HLOperations.h"
  25. #include "dxc/HlslIntrinsicOp.h"
  26. #include "dxc/HLSL/DxilConvergent.h"
  27. #include "dxc/DXIL/DxilResourceProperties.h"
  28. #include "llvm/IR/GetElementPtrTypeIterator.h"
  29. #include "llvm/IR/IRBuilder.h"
  30. #include "llvm/IR/Instructions.h"
  31. #include "llvm/IR/Module.h"
  32. #include "llvm/ADT/APSInt.h"
  33. using namespace llvm;
  34. using namespace hlsl;
  35. struct HLOperationLowerHelper {
  36. OP &hlslOP;
  37. Type *voidTy;
  38. Type *f32Ty;
  39. Type *i32Ty;
  40. llvm::Type *i1Ty;
  41. Type *i8Ty;
  42. DxilTypeSystem &dxilTypeSys;
  43. DxilFunctionProps *functionProps;
  44. bool bLegacyCBufferLoad;
  45. DataLayout dataLayout;
  46. HLOperationLowerHelper(HLModule &HLM);
  47. };
  48. HLOperationLowerHelper::HLOperationLowerHelper(HLModule &HLM)
  49. : hlslOP(*HLM.GetOP()), dxilTypeSys(HLM.GetTypeSystem()),
  50. dataLayout(DataLayout(HLM.GetHLOptions().bUseMinPrecision
  51. ? hlsl::DXIL::kLegacyLayoutString
  52. : hlsl::DXIL::kNewLayoutString)) {
  53. llvm::LLVMContext &Ctx = HLM.GetCtx();
  54. voidTy = Type::getVoidTy(Ctx);
  55. f32Ty = Type::getFloatTy(Ctx);
  56. i32Ty = Type::getInt32Ty(Ctx);
  57. i1Ty = Type::getInt1Ty(Ctx);
  58. i8Ty = Type::getInt8Ty(Ctx);
  59. Function *EntryFunc = HLM.GetEntryFunction();
  60. functionProps = nullptr;
  61. if (HLM.HasDxilFunctionProps(EntryFunc))
  62. functionProps = &HLM.GetDxilFunctionProps(EntryFunc);
  63. bLegacyCBufferLoad = HLM.GetHLOptions().bLegacyCBufferLoad;
  64. }
  65. struct HLObjectOperationLowerHelper {
  66. private:
  67. // For object intrinsics.
  68. HLModule &HLM;
  69. struct ResAttribute {
  70. DXIL::ResourceClass RC;
  71. DXIL::ResourceKind RK;
  72. Type *ResourceType;
  73. };
  74. std::unordered_map<Value *, ResAttribute> HandleMetaMap;
  75. std::unordered_set<LoadInst *> &UpdateCounterSet;
  76. // Map from pointer of cbuffer to pointer of resource.
  77. // For cbuffer like this:
  78. // cbuffer A {
  79. // Texture2D T;
  80. // };
  81. // A global resource Texture2D T2 will be created for Texture2D T.
  82. // CBPtrToResourceMap[T] will return T2.
  83. std::unordered_map<Value *, Value *> CBPtrToResourceMap;
  84. public:
  85. HLObjectOperationLowerHelper(HLModule &HLM,
  86. std::unordered_set<LoadInst *> &UpdateCounter)
  87. : HLM(HLM), UpdateCounterSet(UpdateCounter) {}
  88. DXIL::ResourceClass GetRC(Value *Handle) {
  89. ResAttribute &Res = FindCreateHandleResourceBase(Handle);
  90. return Res.RC;
  91. }
  92. DXIL::ResourceKind GetRK(Value *Handle) {
  93. ResAttribute &Res = FindCreateHandleResourceBase(Handle);
  94. return Res.RK;
  95. }
  96. Type *GetResourceType(Value *Handle) {
  97. ResAttribute &Res = FindCreateHandleResourceBase(Handle);
  98. return Res.ResourceType;
  99. }
  100. void MarkHasCounter(Value *handle, Type *i8Ty) {
  101. CallInst *CIHandle = cast<CallInst>(handle);
  102. DXASSERT(hlsl::GetHLOpcodeGroup(CIHandle->getCalledFunction()) == HLOpcodeGroup::HLAnnotateHandle, "else invalid handle");
  103. // Mark has counter for the input handle.
  104. Value *counterHandle =
  105. CIHandle->getArgOperand(HLOperandIndex::kAnnotateHandleHandleOpIdx);
  106. // Change kind into StructurBufferWithCounter.
  107. CIHandle->setArgOperand(
  108. HLOperandIndex::kAnnotateHandleResourceKindOpIdx,
  109. ConstantInt::get(
  110. i8Ty,
  111. (unsigned)DXIL::ResourceKind::StructuredBufferWithCounter));
  112. DXIL::ResourceClass RC = GetRC(handle);
  113. DXASSERT_LOCALVAR(RC, RC == DXIL::ResourceClass::UAV,
  114. "must UAV for counter");
  115. std::unordered_set<Value *> resSet;
  116. MarkHasCounterOnCreateHandle(counterHandle, resSet);
  117. }
  118. Value *GetOrCreateResourceForCbPtr(GetElementPtrInst *CbPtr,
  119. GlobalVariable *CbGV,
  120. DxilResourceProperties &RP) {
  121. // Change array idx to 0 to make sure all array ptr share same key.
  122. Value *Key = UniformCbPtr(CbPtr, CbGV);
  123. if (CBPtrToResourceMap.count(Key))
  124. return CBPtrToResourceMap[Key];
  125. Value *Resource = CreateResourceForCbPtr(CbPtr, CbGV, RP);
  126. CBPtrToResourceMap[Key] = Resource;
  127. return Resource;
  128. }
  129. Value *LowerCbResourcePtr(GetElementPtrInst *CbPtr, Value *ResPtr) {
  130. // Simple case.
  131. if (ResPtr->getType() == CbPtr->getType())
  132. return ResPtr;
  133. // Array case.
  134. DXASSERT_NOMSG(ResPtr->getType()->getPointerElementType()->isArrayTy());
  135. IRBuilder<> Builder(CbPtr);
  136. gep_type_iterator GEPIt = gep_type_begin(CbPtr), E = gep_type_end(CbPtr);
  137. Value *arrayIdx = GEPIt.getOperand();
  138. // Only calc array idx and size.
  139. // Ignore struct type part.
  140. for (; GEPIt != E; ++GEPIt) {
  141. if (GEPIt->isArrayTy()) {
  142. arrayIdx = Builder.CreateMul(
  143. arrayIdx, Builder.getInt32(GEPIt->getArrayNumElements()));
  144. arrayIdx = Builder.CreateAdd(arrayIdx, GEPIt.getOperand());
  145. }
  146. }
  147. return Builder.CreateGEP(ResPtr, {Builder.getInt32(0), arrayIdx});
  148. }
  149. DxilResourceProperties GetResPropsFromAnnotateHandle(CallInst *Anno) {
  150. DXIL::ResourceClass RC =
  151. (DXIL::ResourceClass)cast<ConstantInt>(
  152. Anno->getArgOperand(
  153. HLOperandIndex::kAnnotateHandleResourceClassOpIdx))
  154. ->getLimitedValue();
  155. DXIL::ResourceKind RK =
  156. (DXIL::ResourceKind)cast<ConstantInt>(
  157. Anno->getArgOperand(
  158. HLOperandIndex::kAnnotateHandleResourceKindOpIdx))
  159. ->getLimitedValue();
  160. Constant *Props = cast<Constant>(Anno->getArgOperand(
  161. HLOperandIndex::kAnnotateHandleResourcePropertiesOpIdx));
  162. DxilResourceProperties RP = resource_helper::loadFromConstant(
  163. *Props, RC, RK);
  164. return RP;
  165. }
  166. private:
  167. ResAttribute &FindCreateHandleResourceBase(Value *Handle) {
  168. if (HandleMetaMap.count(Handle))
  169. return HandleMetaMap[Handle];
  170. // Add invalid first to avoid dead loop.
  171. HandleMetaMap[Handle] = {DXIL::ResourceClass::Invalid,
  172. DXIL::ResourceKind::Invalid,
  173. StructType::get(Type::getVoidTy(HLM.GetCtx()), nullptr)};
  174. if (CallInst *CI = dyn_cast<CallInst>(Handle)) {
  175. hlsl::HLOpcodeGroup group =
  176. hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction());
  177. if (group == HLOpcodeGroup::HLAnnotateHandle) {
  178. ConstantInt *RC = cast<ConstantInt>(CI->getArgOperand(
  179. HLOperandIndex::kAnnotateHandleResourceClassOpIdx));
  180. ConstantInt *RK = cast<ConstantInt>(CI->getArgOperand(
  181. HLOperandIndex::kAnnotateHandleResourceKindOpIdx));
  182. Type *ResTy =
  183. CI->getArgOperand(HLOperandIndex::kAnnotateHandleResourceTypeOpIdx)
  184. ->getType();
  185. ResAttribute Attrib = {(DXIL::ResourceClass)RC->getLimitedValue(),
  186. (DXIL::ResourceKind)RK->getLimitedValue(),
  187. ResTy};
  188. HandleMetaMap[Handle] = Attrib;
  189. return HandleMetaMap[Handle];
  190. }
  191. }
  192. Handle->getContext().emitError("cannot map resource to handle");
  193. return HandleMetaMap[Handle];
  194. }
  195. CallInst *FindCreateHandle(Value *handle,
  196. std::unordered_set<Value *> &resSet) {
  197. // Already checked.
  198. if (resSet.count(handle))
  199. return nullptr;
  200. resSet.insert(handle);
  201. if (CallInst *CI = dyn_cast<CallInst>(handle))
  202. return CI;
  203. if (SelectInst *Sel = dyn_cast<SelectInst>(handle)) {
  204. if (CallInst *CI = FindCreateHandle(Sel->getTrueValue(), resSet))
  205. return CI;
  206. if (CallInst *CI = FindCreateHandle(Sel->getFalseValue(), resSet))
  207. return CI;
  208. return nullptr;
  209. }
  210. if (PHINode *Phi = dyn_cast<PHINode>(handle)) {
  211. for (unsigned i = 0; i < Phi->getNumOperands(); i++) {
  212. if (CallInst *CI = FindCreateHandle(Phi->getOperand(i), resSet))
  213. return CI;
  214. }
  215. return nullptr;
  216. }
  217. return nullptr;
  218. }
  219. void MarkHasCounterOnCreateHandle(Value *handle,
  220. std::unordered_set<Value *> &resSet) {
  221. // Already checked.
  222. if (resSet.count(handle))
  223. return;
  224. resSet.insert(handle);
  225. if (CallInst *CI = dyn_cast<CallInst>(handle)) {
  226. Value *Res =
  227. CI->getArgOperand(HLOperandIndex::kCreateHandleResourceOpIdx);
  228. LoadInst *LdRes = dyn_cast<LoadInst>(Res);
  229. if (!LdRes) {
  230. dxilutil::EmitErrorOnInstruction(CI, "cannot map resource to handle.");
  231. return;
  232. }
  233. UpdateCounterSet.insert(LdRes);
  234. return;
  235. }
  236. if (SelectInst *Sel = dyn_cast<SelectInst>(handle)) {
  237. MarkHasCounterOnCreateHandle(Sel->getTrueValue(), resSet);
  238. MarkHasCounterOnCreateHandle(Sel->getFalseValue(), resSet);
  239. }
  240. if (PHINode *Phi = dyn_cast<PHINode>(handle)) {
  241. for (unsigned i = 0; i < Phi->getNumOperands(); i++) {
  242. MarkHasCounterOnCreateHandle(Phi->getOperand(i), resSet);
  243. }
  244. }
  245. }
  246. Value *UniformCbPtr(GetElementPtrInst *CbPtr, GlobalVariable *CbGV) {
  247. gep_type_iterator GEPIt = gep_type_begin(CbPtr), E = gep_type_end(CbPtr);
  248. std::vector<Value *> idxList(CbPtr->idx_begin(), CbPtr->idx_end());
  249. unsigned i = 0;
  250. IRBuilder<> Builder(HLM.GetCtx());
  251. Value *zero = Builder.getInt32(0);
  252. for (; GEPIt != E; ++GEPIt, ++i) {
  253. ConstantInt *ImmIdx = dyn_cast<ConstantInt>(GEPIt.getOperand());
  254. if (!ImmIdx) {
  255. // Remove dynamic indexing to avoid crash.
  256. idxList[i] = zero;
  257. }
  258. }
  259. Value *Key = Builder.CreateInBoundsGEP(CbGV, idxList);
  260. return Key;
  261. }
  262. Value *CreateResourceForCbPtr(GetElementPtrInst *CbPtr, GlobalVariable *CbGV,
  263. DxilResourceProperties &RP) {
  264. Type *CbTy = CbPtr->getPointerOperandType();
  265. DXASSERT_LOCALVAR(CbTy, CbTy == CbGV->getType(), "else arg not point to var");
  266. gep_type_iterator GEPIt = gep_type_begin(CbPtr), E = gep_type_end(CbPtr);
  267. unsigned i = 0;
  268. IRBuilder<> Builder(HLM.GetCtx());
  269. unsigned arraySize = 1;
  270. DxilTypeSystem &typeSys = HLM.GetTypeSystem();
  271. std::string Name;
  272. for (; GEPIt != E; ++GEPIt, ++i) {
  273. if (GEPIt->isArrayTy()) {
  274. arraySize *= GEPIt->getArrayNumElements();
  275. if (!Name.empty())
  276. Name += ".";
  277. if (ConstantInt *ImmIdx = dyn_cast<ConstantInt>(GEPIt.getOperand())) {
  278. unsigned idx = ImmIdx->getLimitedValue();
  279. Name += std::to_string(idx);
  280. }
  281. } else if (GEPIt->isStructTy()) {
  282. DxilStructAnnotation *typeAnnot =
  283. typeSys.GetStructAnnotation(cast<StructType>(*GEPIt));
  284. DXASSERT_NOMSG(typeAnnot);
  285. unsigned idx = cast<ConstantInt>(GEPIt.getOperand())->getLimitedValue();
  286. DXASSERT_NOMSG(typeAnnot->GetNumFields() > idx);
  287. DxilFieldAnnotation &fieldAnnot = typeAnnot->GetFieldAnnotation(idx);
  288. if (!Name.empty())
  289. Name += ".";
  290. Name += fieldAnnot.GetFieldName();
  291. }
  292. }
  293. Type *Ty = CbPtr->getResultElementType();
  294. // Not support resource array in cbuffer.
  295. unsigned ResBinding = HLM.GetBindingForResourceInCB(CbPtr, CbGV, RP.Class);
  296. return CreateResourceGV(Ty, Name, RP, ResBinding);
  297. }
  298. Value *CreateResourceGV(Type *Ty, StringRef Name, DxilResourceProperties &RP,
  299. unsigned ResBinding) {
  300. Module &M = *HLM.GetModule();
  301. Constant *GV = M.getOrInsertGlobal(Name, Ty);
  302. // Create resource and set GV as globalSym.
  303. DxilResourceBase *Res = HLM.AddResourceWithGlobalVariableAndProps(GV, RP);
  304. DXASSERT(Res, "fail to create resource for global variable in cbuffer");
  305. Res->SetLowerBound(ResBinding);
  306. return GV;
  307. }
  308. };
  309. // Helper for lowering resource extension methods.
  310. struct HLObjectExtensionLowerHelper : public hlsl::HLResourceLookup {
  311. explicit HLObjectExtensionLowerHelper(HLObjectOperationLowerHelper &ObjHelper)
  312. : m_ObjHelper(ObjHelper)
  313. { }
  314. virtual bool GetResourceKindName(Value *HLHandle, const char **ppName)
  315. {
  316. DXIL::ResourceKind K = m_ObjHelper.GetRK(HLHandle);
  317. bool Success = K != DXIL::ResourceKind::Invalid;
  318. if (Success)
  319. {
  320. *ppName = hlsl::GetResourceKindName(K);
  321. }
  322. return Success;
  323. }
  324. private:
  325. HLObjectOperationLowerHelper &m_ObjHelper;
  326. };
  327. using IntrinsicLowerFuncTy = Value *(CallInst *CI, IntrinsicOp IOP,
  328. DXIL::OpCode opcode,
  329. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated);
  330. struct IntrinsicLower {
  331. // Intrinsic opcode.
  332. IntrinsicOp IntriOpcode;
  333. // Lower function.
  334. IntrinsicLowerFuncTy &LowerFunc;
  335. // DXIL opcode if can direct map.
  336. DXIL::OpCode DxilOpcode;
  337. };
  338. // IOP intrinsics.
  339. namespace {
  340. Value *TrivialDxilOperation(Function *dxilFunc, OP::OpCode opcode, ArrayRef<Value *> refArgs,
  341. Type *Ty, Type *RetTy, OP *hlslOP,
  342. IRBuilder<> &Builder) {
  343. unsigned argNum = refArgs.size();
  344. std::vector<Value *> args = refArgs;
  345. if (Ty->isVectorTy()) {
  346. Value *retVal = llvm::UndefValue::get(RetTy);
  347. unsigned vecSize = Ty->getVectorNumElements();
  348. for (unsigned i = 0; i < vecSize; i++) {
  349. // Update vector args, skip known opcode arg.
  350. for (unsigned argIdx = HLOperandIndex::kUnaryOpSrc0Idx; argIdx < argNum;
  351. argIdx++) {
  352. if (refArgs[argIdx]->getType()->isVectorTy()) {
  353. Value *arg = refArgs[argIdx];
  354. args[argIdx] = Builder.CreateExtractElement(arg, i);
  355. }
  356. }
  357. Value *EltOP =
  358. Builder.CreateCall(dxilFunc, args, hlslOP->GetOpCodeName(opcode));
  359. retVal = Builder.CreateInsertElement(retVal, EltOP, i);
  360. }
  361. return retVal;
  362. } else {
  363. if (!RetTy->isVoidTy()) {
  364. Value *retVal =
  365. Builder.CreateCall(dxilFunc, args, hlslOP->GetOpCodeName(opcode));
  366. return retVal;
  367. } else {
  368. // Cannot add name to void.
  369. return Builder.CreateCall(dxilFunc, args);
  370. }
  371. }
  372. }
  373. // Generates a DXIL operation over an overloaded type (Ty), returning a
  374. // RetTy value; when Ty is a vector, it will replicate per-element operations
  375. // into RetTy to rebuild it.
  376. Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef<Value *> refArgs,
  377. Type *Ty, Type *RetTy, OP *hlslOP,
  378. IRBuilder<> &Builder) {
  379. Type *EltTy = Ty->getScalarType();
  380. Function *dxilFunc = hlslOP->GetOpFunc(opcode, EltTy);
  381. return TrivialDxilOperation(dxilFunc, opcode, refArgs, Ty, RetTy, hlslOP, Builder);
  382. }
  383. Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef<Value *> refArgs,
  384. Type *Ty, Instruction *Inst, OP *hlslOP) {
  385. DXASSERT(refArgs.size() > 0, "else opcode isn't in signature");
  386. DXASSERT(refArgs[0] == nullptr,
  387. "else caller has already filled the value in");
  388. IRBuilder<> B(Inst);
  389. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  390. const_cast<llvm::Value **>(refArgs.data())[0] =
  391. opArg; // actually stack memory from caller
  392. return TrivialDxilOperation(opcode, refArgs, Ty, Inst->getType(), hlslOP, B);
  393. }
  394. Value *TrivialDxilUnaryOperationRet(OP::OpCode opcode, Value *src, Type *RetTy,
  395. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  396. Type *Ty = src->getType();
  397. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  398. Value *args[] = {opArg, src};
  399. return TrivialDxilOperation(opcode, args, Ty, RetTy, hlslOP, Builder);
  400. }
  401. Value *TrivialDxilUnaryOperation(OP::OpCode opcode, Value *src,
  402. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  403. return TrivialDxilUnaryOperationRet(opcode, src, src->getType(), hlslOP,
  404. Builder);
  405. }
  406. Value *TrivialDxilBinaryOperation(OP::OpCode opcode, Value *src0, Value *src1,
  407. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  408. Type *Ty = src0->getType();
  409. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  410. Value *args[] = {opArg, src0, src1};
  411. return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
  412. }
  413. Value *TrivialDxilTrinaryOperation(OP::OpCode opcode, Value *src0, Value *src1,
  414. Value *src2, hlsl::OP *hlslOP,
  415. IRBuilder<> &Builder) {
  416. Type *Ty = src0->getType();
  417. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  418. Value *args[] = {opArg, src0, src1, src2};
  419. return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
  420. }
  421. Value *TrivialUnaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  422. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  423. Value *src0 = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  424. IRBuilder<> Builder(CI);
  425. hlsl::OP *hlslOP = &helper.hlslOP;
  426. Value *retVal = TrivialDxilUnaryOperationRet(opcode, src0, CI->getType(), hlslOP, Builder);
  427. return retVal;
  428. }
  429. Value *TrivialBinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  430. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  431. hlsl::OP *hlslOP = &helper.hlslOP;
  432. Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  433. Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  434. IRBuilder<> Builder(CI);
  435. Value *binOp =
  436. TrivialDxilBinaryOperation(opcode, src0, src1, hlslOP, Builder);
  437. return binOp;
  438. }
  439. Value *TrivialTrinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  440. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  441. hlsl::OP *hlslOP = &helper.hlslOP;
  442. Value *src0 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  443. Value *src1 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  444. Value *src2 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  445. IRBuilder<> Builder(CI);
  446. Value *triOp =
  447. TrivialDxilTrinaryOperation(opcode, src0, src1, src2, hlslOP, Builder);
  448. return triOp;
  449. }
  450. Value *TrivialIsSpecialFloat(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  451. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  452. hlsl::OP *hlslOP = &helper.hlslOP;
  453. Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  454. IRBuilder<> Builder(CI);
  455. Type *Ty = src->getType();
  456. Type *RetTy = Type::getInt1Ty(CI->getContext());
  457. if (Ty->isVectorTy())
  458. RetTy = VectorType::get(RetTy, Ty->getVectorNumElements());
  459. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  460. Value *args[] = {opArg, src};
  461. return TrivialDxilOperation(opcode, args, Ty, RetTy, hlslOP, Builder);
  462. }
  463. bool IsResourceGEP(GetElementPtrInst *I) {
  464. Type *Ty = I->getType()->getPointerElementType();
  465. Ty = dxilutil::GetArrayEltTy(Ty);
  466. // Only mark on GEP which point to resource.
  467. return dxilutil::IsHLSLResourceType(Ty);
  468. }
  469. Value *TranslateNonUniformResourceIndex(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  470. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  471. Value *V = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  472. CI->replaceAllUsesWith(V);
  473. for (User *U : V->users()) {
  474. if (GetElementPtrInst *I = dyn_cast<GetElementPtrInst>(U)) {
  475. // Only mark on GEP which point to resource.
  476. if (IsResourceGEP(I))
  477. DxilMDHelper::MarkNonUniform(I);
  478. } else if (CastInst *castI = dyn_cast<CastInst>(U)) {
  479. for (User *castU : castI->users()) {
  480. if (GetElementPtrInst *I = dyn_cast<GetElementPtrInst>(castU)) {
  481. // Only mark on GEP which point to resource.
  482. if (IsResourceGEP(I))
  483. DxilMDHelper::MarkNonUniform(I);
  484. }
  485. }
  486. } else if (CallInst *CI = dyn_cast<CallInst>(U)) {
  487. if (hlsl::GetHLOpcodeGroup(CI->getCalledFunction()) == hlsl::HLOpcodeGroup::HLCreateHandle)
  488. DxilMDHelper::MarkNonUniform(CI);
  489. }
  490. }
  491. return nullptr;
  492. }
  493. Value *TrivialBarrier(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  494. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  495. hlsl::OP *OP = &helper.hlslOP;
  496. Function *dxilFunc = OP->GetOpFunc(OP::OpCode::Barrier, CI->getType());
  497. Constant *opArg = OP->GetU32Const((unsigned)OP::OpCode::Barrier);
  498. unsigned uglobal = static_cast<unsigned>(DXIL::BarrierMode::UAVFenceGlobal);
  499. unsigned g = static_cast<unsigned>(DXIL::BarrierMode::TGSMFence);
  500. unsigned t = static_cast<unsigned>(DXIL::BarrierMode::SyncThreadGroup);
  501. // unsigned ut = static_cast<unsigned>(DXIL::BarrierMode::UAVFenceThreadGroup);
  502. unsigned barrierMode = 0;
  503. switch (IOP) {
  504. case IntrinsicOp::IOP_AllMemoryBarrier:
  505. barrierMode = uglobal | g;
  506. break;
  507. case IntrinsicOp::IOP_AllMemoryBarrierWithGroupSync:
  508. barrierMode = uglobal | g | t;
  509. break;
  510. case IntrinsicOp::IOP_GroupMemoryBarrier:
  511. barrierMode = g;
  512. break;
  513. case IntrinsicOp::IOP_GroupMemoryBarrierWithGroupSync:
  514. barrierMode = g | t;
  515. break;
  516. case IntrinsicOp::IOP_DeviceMemoryBarrier:
  517. barrierMode = uglobal;
  518. break;
  519. case IntrinsicOp::IOP_DeviceMemoryBarrierWithGroupSync:
  520. barrierMode = uglobal | t;
  521. break;
  522. default:
  523. DXASSERT(0, "invalid opcode for barrier");
  524. break;
  525. }
  526. Value *src0 = OP->GetU32Const(static_cast<unsigned>(barrierMode));
  527. Value *args[] = {opArg, src0};
  528. IRBuilder<> Builder(CI);
  529. Builder.CreateCall(dxilFunc, args);
  530. return nullptr;
  531. }
  532. Value *TranslateD3DColorToUByte4(CallInst *CI, IntrinsicOp IOP,
  533. OP::OpCode opcode,
  534. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  535. IRBuilder<> Builder(CI);
  536. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  537. Type *Ty = val->getType();
  538. // Use the same scaling factor used by FXC (i.e., 255.001953)
  539. // Excerpt from stackoverflow discussion:
  540. // "Built-in rounding, necessary because of truncation. 0.001953 * 256 = 0.5"
  541. Constant *toByteConst = ConstantFP::get(Ty->getScalarType(), 255.001953);
  542. if (Ty->isVectorTy()) {
  543. static constexpr int supportedVecElemCount = 4;
  544. if (Ty->getVectorNumElements() == supportedVecElemCount) {
  545. toByteConst = ConstantVector::getSplat(supportedVecElemCount, toByteConst);
  546. // Swizzle the input val -> val.zyxw
  547. std::vector<int> mask { 2, 1, 0, 3 };
  548. val = Builder.CreateShuffleVector(val, val, mask);
  549. } else {
  550. dxilutil::EmitErrorOnInstruction(CI, "Unsupported input type for intrinsic D3DColorToUByte4.");
  551. return UndefValue::get(CI->getType());
  552. }
  553. }
  554. Value *byte4 = Builder.CreateFMul(toByteConst, val);
  555. return Builder.CreateCast(Instruction::CastOps::FPToSI, byte4, CI->getType());
  556. }
  557. // Returns true if pow can be implemented using Fxc's mul-only code gen pattern.
  558. // Fxc uses the below rules when choosing mul-only code gen pattern to implement pow function.
  559. // Rule 1: Applicable only to power values in the range [INT32_MIN, INT32_MAX]
  560. // Rule 2: The maximum number of mul ops needed shouldn't exceed (2n+1) or (n+1) based on whether the power
  561. // is a positive or a negative value. Here "n" is the number of scalar elements in power.
  562. // Rule 3: Power must be an exact value.
  563. // +----------+---------------------+------------------+
  564. // | BaseType | IsExponentPositive | MaxMulOpsAllowed |
  565. // +----------+---------------------+------------------+
  566. // | float4x4 | True | 33 |
  567. // | float4x4 | False | 17 |
  568. // | float4x2 | True | 17 |
  569. // | float4x2 | False | 9 |
  570. // | float2x4 | True | 17 |
  571. // | float2x4 | False | 9 |
  572. // | float4 | True | 9 |
  573. // | float4 | False | 5 |
  574. // | float2 | True | 5 |
  575. // | float2 | False | 3 |
  576. // | float | True | 3 |
  577. // | float | False | 2 |
  578. // +----------+---------------------+------------------+
  579. bool CanUseFxcMulOnlyPatternForPow(IRBuilder<>& Builder, Value *x, Value *pow, int32_t& powI) {
  580. // Applicable only when power is a literal.
  581. if (!isa<ConstantDataVector>(pow) && !isa<ConstantFP>(pow)) {
  582. return false;
  583. }
  584. // Only apply this code gen on splat values.
  585. if (ConstantDataVector *cdv = dyn_cast<ConstantDataVector>(pow)) {
  586. if (!hlsl::dxilutil::IsSplat(cdv)) {
  587. return false;
  588. }
  589. }
  590. APFloat powAPF = isa<ConstantDataVector>(pow) ?
  591. cast<ConstantDataVector>(pow)->getElementAsAPFloat(0) : // should be a splat value
  592. cast<ConstantFP>(pow)->getValueAPF();
  593. APSInt powAPS(32, false);
  594. bool isExact = false;
  595. // Try converting float value of power to integer and also check if the float value is exact.
  596. APFloat::opStatus status = powAPF.convertToInteger(powAPS, APFloat::rmTowardZero, &isExact);
  597. if (status == APFloat::opStatus::opOK && isExact) {
  598. powI = powAPS.getExtValue();
  599. uint32_t powU = abs(powI);
  600. int setBitCount = 0;
  601. int maxBitSetPos = -1;
  602. for (int i = 0; i < 32; i++) {
  603. if ((powU >> i) & 1) {
  604. setBitCount++;
  605. maxBitSetPos = i;
  606. }
  607. }
  608. DXASSERT(maxBitSetPos <= 30, "msb should always be zero.");
  609. unsigned numElem = isa<ConstantDataVector>(pow) ? x->getType()->getVectorNumElements() : 1;
  610. int mulOpThreshold = powI < 0 ? numElem + 1 : 2 * numElem + 1;
  611. int mulOpNeeded = maxBitSetPos + setBitCount - 1;
  612. return mulOpNeeded <= mulOpThreshold;
  613. }
  614. return false;
  615. }
  616. Value *TranslatePowUsingFxcMulOnlyPattern(IRBuilder<>& Builder, Value *x, const int32_t y) {
  617. uint32_t absY = abs(y);
  618. // If y is zero then always return 1.
  619. if (absY == 0) {
  620. return ConstantFP::get(x->getType(), 1);
  621. }
  622. int lastSetPos = -1;
  623. Value *result = nullptr;
  624. Value *mul = nullptr;
  625. for (int i = 0; i < 32; i++) {
  626. if ((absY >> i) & 1) {
  627. for (int j = i; j > lastSetPos; j--) {
  628. if (!mul) {
  629. mul = x;
  630. }
  631. else {
  632. mul = Builder.CreateFMul(mul, mul);
  633. }
  634. }
  635. result = (result == nullptr) ? mul : Builder.CreateFMul(result, mul);
  636. lastSetPos = i;
  637. }
  638. }
  639. // Compute reciprocal for negative power values.
  640. if (y < 0) {
  641. Value* constOne = ConstantFP::get(x->getType(), 1);
  642. result = Builder.CreateFDiv(constOne, result);
  643. }
  644. return result;
  645. }
  646. Value *TranslatePowImpl(hlsl::OP *hlslOP, IRBuilder<>& Builder, Value *x, Value *y, bool isFXCCompatMode = false) {
  647. // As applicable implement pow using only mul ops as done by Fxc.
  648. int32_t p = 0;
  649. if (CanUseFxcMulOnlyPatternForPow(Builder, x, y, p)) {
  650. if (isFXCCompatMode) {
  651. return TranslatePowUsingFxcMulOnlyPattern(Builder, x, p);
  652. } else if (p == 2) {
  653. // Only take care 2 for it will not affect register pressure.
  654. return Builder.CreateFMul(x, x);
  655. }
  656. }
  657. // Default to log-mul-exp pattern if previous scenarios don't apply.
  658. // t = log(x);
  659. Value *logX =
  660. TrivialDxilUnaryOperation(DXIL::OpCode::Log, x, hlslOP, Builder);
  661. // t = y * t;
  662. Value *mulY = Builder.CreateFMul(logX, y);
  663. // pow = exp(t);
  664. return TrivialDxilUnaryOperation(DXIL::OpCode::Exp, mulY, hlslOP, Builder);
  665. }
  666. Value *TranslateAddUint64(CallInst *CI, IntrinsicOp IOP,
  667. OP::OpCode opcode,
  668. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  669. hlsl::OP *hlslOP = &helper.hlslOP;
  670. IRBuilder<> Builder(CI);
  671. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  672. Type *Ty = val->getType();
  673. VectorType *VT = dyn_cast<VectorType>(Ty);
  674. if (!VT) {
  675. dxilutil::EmitErrorOnInstruction(
  676. CI, "AddUint64 can only be applied to uint2 and uint4 operands.");
  677. return UndefValue::get(Ty);
  678. }
  679. unsigned size = VT->getNumElements();
  680. if (size != 2 && size != 4) {
  681. dxilutil::EmitErrorOnInstruction(
  682. CI, "AddUint64 can only be applied to uint2 and uint4 operands.");
  683. return UndefValue::get(Ty);
  684. }
  685. Value *op0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  686. Value *op1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  687. Value *RetVal = UndefValue::get(Ty);
  688. Function *AddC = hlslOP->GetOpFunc(DXIL::OpCode::UAddc, helper.i32Ty);
  689. Value *opArg = Builder.getInt32(static_cast<unsigned>(DXIL::OpCode::UAddc));
  690. for (unsigned i=0; i<size; i+=2) {
  691. Value *low0 = Builder.CreateExtractElement(op0, i);
  692. Value *low1 = Builder.CreateExtractElement(op1, i);
  693. Value *lowWithC = Builder.CreateCall(AddC, { opArg, low0, low1});
  694. Value *low = Builder.CreateExtractValue(lowWithC, 0);
  695. RetVal = Builder.CreateInsertElement(RetVal, low, i);
  696. Value *carry = Builder.CreateExtractValue(lowWithC, 1);
  697. // Ext i1 to i32
  698. carry = Builder.CreateZExt(carry, helper.i32Ty);
  699. Value *hi0 = Builder.CreateExtractElement(op0, i+1);
  700. Value *hi1 = Builder.CreateExtractElement(op1, i+1);
  701. Value *hi = Builder.CreateAdd(hi0, hi1);
  702. hi = Builder.CreateAdd(hi, carry);
  703. RetVal = Builder.CreateInsertElement(RetVal, hi, i+1);
  704. }
  705. return RetVal;
  706. }
  707. bool IsValidLoadInput(Value *V) {
  708. // Must be load input.
  709. // TODO: report this error on front-end
  710. if (!V || !isa<CallInst>(V)) {
  711. return false;
  712. }
  713. CallInst *CI = cast<CallInst>(V);
  714. // Must be immediate.
  715. ConstantInt *opArg =
  716. cast<ConstantInt>(CI->getArgOperand(DXIL::OperandIndex::kOpcodeIdx));
  717. DXIL::OpCode op = static_cast<DXIL::OpCode>(opArg->getLimitedValue());
  718. if (op != DXIL::OpCode::LoadInput) {
  719. return false;
  720. }
  721. return true;
  722. }
  723. // Tunnel through insert/extract element and shuffle to find original source
  724. // of scalar value, or specified element (vecIdx) of vector value.
  725. Value *FindScalarSource(Value *src, unsigned vecIdx = 0) {
  726. Type *srcTy = src->getType()->getScalarType();
  727. while (src && !isa<UndefValue>(src)) {
  728. if (src->getType()->isVectorTy()) {
  729. if (InsertElementInst *IE = dyn_cast<InsertElementInst>(src)) {
  730. unsigned curIdx = (unsigned)cast<ConstantInt>(IE->getOperand(2))
  731. ->getUniqueInteger().getLimitedValue();
  732. src = IE->getOperand( (curIdx == vecIdx) ? 1 : 0 );
  733. } else if (ShuffleVectorInst *SV = dyn_cast<ShuffleVectorInst>(src)) {
  734. int newIdx = SV->getMaskValue(vecIdx);
  735. if (newIdx < 0)
  736. return UndefValue::get(srcTy);
  737. vecIdx = (unsigned)newIdx;
  738. src = SV->getOperand(0);
  739. unsigned numElt = src->getType()->getVectorNumElements();
  740. if (numElt <= vecIdx) {
  741. vecIdx -= numElt;
  742. src = SV->getOperand(1);
  743. }
  744. } else {
  745. return UndefValue::get(srcTy); // Didn't find it.
  746. }
  747. } else {
  748. if (ExtractElementInst *EE = dyn_cast<ExtractElementInst>(src)) {
  749. vecIdx = (unsigned)cast<ConstantInt>(EE->getIndexOperand())
  750. ->getUniqueInteger().getLimitedValue();
  751. src = EE->getVectorOperand();
  752. } else if (hlsl::IsConvergentMarker(src)) {
  753. src = hlsl::GetConvergentSource(src);
  754. } else {
  755. break; // Found it.
  756. }
  757. }
  758. }
  759. return src;
  760. }
  761. // Finds corresponding inputs, calls translation for each, and returns
  762. // resulting vector or scalar.
  763. // Uses functor that takes (inputElemID, rowIdx, colIdx), and returns
  764. // translation for one input scalar.
  765. Value *TranslateEvalHelper(CallInst *CI, Value *val, IRBuilder<> &Builder,
  766. std::function<Value*(Value*, Value*, Value*)> fnTranslateScalarInput) {
  767. Type *Ty = CI->getType();
  768. Value *result = UndefValue::get(Ty);
  769. if (Ty->isVectorTy()) {
  770. for (unsigned i = 0; i < Ty->getVectorNumElements(); ++i) {
  771. Value *InputEl = FindScalarSource(val, i);
  772. if (!IsValidLoadInput(InputEl)) {
  773. dxilutil::EmitErrorOnInstruction(CI, "attribute evaluation can only be done "
  774. "on values taken directly from inputs.");
  775. return result;
  776. }
  777. CallInst *loadInput = cast<CallInst>(InputEl);
  778. Value *inputElemID = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx);
  779. Value *rowIdx = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
  780. Value *colIdx = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
  781. Value *Elt = fnTranslateScalarInput(inputElemID, rowIdx, colIdx);
  782. result = Builder.CreateInsertElement(result, Elt, i);
  783. }
  784. }
  785. else {
  786. Value *InputEl = FindScalarSource(val);
  787. if (!IsValidLoadInput(InputEl)) {
  788. dxilutil::EmitErrorOnInstruction(CI, "attribute evaluation can only be done "
  789. "on values taken directly from inputs.");
  790. return result;
  791. }
  792. CallInst *loadInput = cast<CallInst>(InputEl);
  793. Value *inputElemID = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx);
  794. Value *rowIdx = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
  795. Value *colIdx = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
  796. result = fnTranslateScalarInput(inputElemID, rowIdx, colIdx);
  797. }
  798. return result;
  799. }
  800. Value *TranslateEvalSample(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  801. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  802. hlsl::OP *hlslOP = &helper.hlslOP;
  803. Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  804. Value *sampleIdx = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  805. IRBuilder<> Builder(CI);
  806. OP::OpCode opcode = OP::OpCode::EvalSampleIndex;
  807. Value *opArg = hlslOP->GetU32Const((unsigned)opcode);
  808. Function *evalFunc = hlslOP->GetOpFunc(opcode, CI->getType()->getScalarType());
  809. return TranslateEvalHelper(CI, val, Builder,
  810. [&](Value *inputElemID, Value *rowIdx, Value *colIdx) -> Value* {
  811. return Builder.CreateCall(evalFunc, { opArg, inputElemID, rowIdx, colIdx, sampleIdx });
  812. }
  813. );
  814. }
  815. Value *TranslateEvalSnapped(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  816. HLOperationLowerHelper &helper,
  817. HLObjectOperationLowerHelper *pObjHelper,
  818. bool &Translated) {
  819. hlsl::OP *hlslOP = &helper.hlslOP;
  820. Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  821. Value *offset = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  822. IRBuilder<> Builder(CI);
  823. Value *offsetX = Builder.CreateExtractElement(offset, (uint64_t)0);
  824. Value *offsetY = Builder.CreateExtractElement(offset, 1);
  825. OP::OpCode opcode = OP::OpCode::EvalSnapped;
  826. Value *opArg = hlslOP->GetU32Const((unsigned)opcode);
  827. Function *evalFunc = hlslOP->GetOpFunc(opcode, CI->getType()->getScalarType());
  828. return TranslateEvalHelper(CI, val, Builder,
  829. [&](Value *inputElemID, Value *rowIdx, Value *colIdx) -> Value* {
  830. return Builder.CreateCall(evalFunc, { opArg, inputElemID, rowIdx, colIdx, offsetX, offsetY });
  831. }
  832. );
  833. }
  834. Value *TranslateEvalCentroid(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  835. HLOperationLowerHelper &helper,
  836. HLObjectOperationLowerHelper *pObjHelper,
  837. bool &Translated) {
  838. hlsl::OP *hlslOP = &helper.hlslOP;
  839. Value *val = CI->getArgOperand(DXIL::OperandIndex::kUnarySrc0OpIdx);
  840. IRBuilder<> Builder(CI);
  841. OP::OpCode opcode = OP::OpCode::EvalCentroid;
  842. Value *opArg = hlslOP->GetU32Const((unsigned)opcode);
  843. Function *evalFunc = hlslOP->GetOpFunc(opcode, CI->getType()->getScalarType());
  844. return TranslateEvalHelper(CI, val, Builder,
  845. [&](Value *inputElemID, Value *rowIdx, Value *colIdx) -> Value* {
  846. return Builder.CreateCall(evalFunc, { opArg, inputElemID, rowIdx, colIdx });
  847. }
  848. );
  849. }
  850. Value *TranslateGetAttributeAtVertex(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  851. HLOperationLowerHelper &helper,
  852. HLObjectOperationLowerHelper *pObjHelper,
  853. bool &Translated) {
  854. DXASSERT(op == OP::OpCode::AttributeAtVertex, "Wrong opcode to translate");
  855. hlsl::OP *hlslOP = &helper.hlslOP;
  856. IRBuilder<> Builder(CI);
  857. Value *val = CI->getArgOperand(DXIL::OperandIndex::kBinarySrc0OpIdx);
  858. Value *vertexIdx = CI->getArgOperand(DXIL::OperandIndex::kBinarySrc1OpIdx);
  859. Value *vertexI8Idx = Builder.CreateTrunc(vertexIdx, Type::getInt8Ty(CI->getContext()));
  860. Value *opArg = hlslOP->GetU32Const((unsigned)op);
  861. Function *evalFunc = hlslOP->GetOpFunc(op, val->getType()->getScalarType());
  862. return TranslateEvalHelper(CI, val, Builder,
  863. [&](Value *inputElemID, Value *rowIdx, Value *colIdx) -> Value* {
  864. return Builder.CreateCall(evalFunc, { opArg, inputElemID, rowIdx, colIdx, vertexI8Idx });
  865. }
  866. );
  867. }
  868. Value *TrivialNoArgOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  869. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  870. hlsl::OP *hlslOP = &helper.hlslOP;
  871. Type *Ty = Type::getVoidTy(CI->getContext());
  872. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  873. Value *args[] = {opArg};
  874. IRBuilder<> Builder(CI);
  875. Value *dxilOp = TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
  876. return dxilOp;
  877. }
  878. Value *TrivialNoArgWithRetOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  879. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  880. hlsl::OP *hlslOP = &helper.hlslOP;
  881. Type *Ty = CI->getType();
  882. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  883. Value *args[] = {opArg};
  884. IRBuilder<> Builder(CI);
  885. Value *dxilOp = TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
  886. return dxilOp;
  887. }
  888. Value *TranslateGetRTSamplePos(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  889. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  890. hlsl::OP *hlslOP = &helper.hlslOP;
  891. OP::OpCode opcode = OP::OpCode::RenderTargetGetSamplePosition;
  892. IRBuilder<> Builder(CI);
  893. Type *Ty = Type::getVoidTy(CI->getContext());
  894. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  895. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  896. Value *args[] = {opArg, val};
  897. Value *samplePos =
  898. TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
  899. Value *result = UndefValue::get(CI->getType());
  900. Value *samplePosX = Builder.CreateExtractValue(samplePos, 0);
  901. Value *samplePosY = Builder.CreateExtractValue(samplePos, 1);
  902. result = Builder.CreateInsertElement(result, samplePosX, (uint64_t)0);
  903. result = Builder.CreateInsertElement(result, samplePosY, 1);
  904. return result;
  905. }
  906. // val QuadReadLaneAt(val, uint);
  907. Value *TranslateQuadReadLaneAt(CallInst *CI, IntrinsicOp IOP,
  908. OP::OpCode opcode,
  909. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  910. hlsl::OP *hlslOP = &helper.hlslOP;
  911. Value *refArgs[] = {nullptr, CI->getOperand(1), CI->getOperand(2)};
  912. return TrivialDxilOperation(DXIL::OpCode::QuadReadLaneAt, refArgs,
  913. CI->getOperand(1)->getType(), CI, hlslOP);
  914. }
  915. // Wave intrinsics of the form fn(val,QuadOpKind)->val
  916. Value *TranslateQuadReadAcross(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  917. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  918. hlsl::OP *hlslOP = &helper.hlslOP;
  919. DXIL::QuadOpKind opKind;
  920. switch (IOP) {
  921. case IntrinsicOp::IOP_QuadReadAcrossX: opKind = DXIL::QuadOpKind::ReadAcrossX; break;
  922. case IntrinsicOp::IOP_QuadReadAcrossY: opKind = DXIL::QuadOpKind::ReadAcrossY; break;
  923. default: DXASSERT_NOMSG(IOP == IntrinsicOp::IOP_QuadReadAcrossDiagonal);
  924. case IntrinsicOp::IOP_QuadReadAcrossDiagonal: opKind = DXIL::QuadOpKind::ReadAcrossDiagonal; break;
  925. }
  926. Constant *OpArg = hlslOP->GetI8Const((unsigned)opKind);
  927. Value *refArgs[] = {nullptr, CI->getOperand(1), OpArg};
  928. return TrivialDxilOperation(DXIL::OpCode::QuadOp, refArgs,
  929. CI->getOperand(1)->getType(), CI, hlslOP);
  930. }
  931. // WaveAllEqual(val<n>)->bool<n>
  932. Value *TranslateWaveAllEqual(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  933. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  934. hlsl::OP *hlslOP = &helper.hlslOP;
  935. Value *src = CI->getArgOperand(HLOperandIndex::kWaveAllEqualValueOpIdx);
  936. IRBuilder<> Builder(CI);
  937. Type *Ty = src->getType();
  938. Type *RetTy = Type::getInt1Ty(CI->getContext());
  939. if (Ty->isVectorTy())
  940. RetTy = VectorType::get(RetTy, Ty->getVectorNumElements());
  941. Constant *opArg = hlslOP->GetU32Const((unsigned)DXIL::OpCode::WaveActiveAllEqual);
  942. Value *args[] = {opArg, src};
  943. return TrivialDxilOperation(DXIL::OpCode::WaveActiveAllEqual, args, Ty, RetTy,
  944. hlslOP, Builder);
  945. }
  946. // WaveMatch(val<n>)->uint4
  947. Value *TranslateWaveMatch(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opc,
  948. HLOperationLowerHelper &Helper,
  949. HLObjectOperationLowerHelper *ObjHelper,
  950. bool &Translated) {
  951. hlsl::OP *Op = &Helper.hlslOP;
  952. IRBuilder<> Builder(CI);
  953. // Generate a dx.op.waveMatch call for each scalar in the input, and perform
  954. // a bitwise AND between each result to derive the final bitmask in the case
  955. // of vector inputs.
  956. // (1) Collect the list of all scalar inputs (e.g. decompose vectors)
  957. SmallVector<Value *, 4> ScalarInputs;
  958. Value *Val = CI->getArgOperand(1);
  959. Type *ValTy = Val->getType();
  960. Type *EltTy = ValTy->getScalarType();
  961. if (ValTy->isVectorTy()) {
  962. for (uint64_t i = 0, e = ValTy->getVectorNumElements(); i != e; ++i) {
  963. Value *Elt = Builder.CreateExtractElement(Val, i);
  964. ScalarInputs.push_back(Elt);
  965. }
  966. } else {
  967. ScalarInputs.push_back(Val);
  968. }
  969. Value *Res = nullptr;
  970. Constant *OpcArg = Op->GetU32Const((unsigned)DXIL::OpCode::WaveMatch);
  971. Value *Fn = Op->GetOpFunc(OP::OpCode::WaveMatch, EltTy);
  972. // (2) For each scalar, emit a call to dx.op.waveMatch. If this is not the
  973. // first scalar, then AND the result with the accumulator.
  974. for (unsigned i = 0, e = ScalarInputs.size(); i != e; ++i) {
  975. Value *Args[] = { OpcArg, ScalarInputs[i] };
  976. Value *Call = Builder.CreateCall(Fn, Args);
  977. if (Res) {
  978. // Generate bitwise AND of the components
  979. for (unsigned j = 0; j != 4; ++j) {
  980. Value *ResVal = Builder.CreateExtractValue(Res, j);
  981. Value *CallVal = Builder.CreateExtractValue(Call, j);
  982. Value *And = Builder.CreateAnd(ResVal, CallVal);
  983. Res = Builder.CreateInsertValue(Res, And, j);
  984. }
  985. } else {
  986. Res = Call;
  987. }
  988. }
  989. // (3) Convert the final aggregate into a vector to make the types match
  990. Value *ResVec = UndefValue::get(CI->getType());
  991. for (unsigned i = 0; i != 4; ++i) {
  992. Value *Elt = Builder.CreateExtractValue(Res, i);
  993. ResVec = Builder.CreateInsertElement(ResVec, Elt, i);
  994. }
  995. return ResVec;
  996. }
  997. // Wave intrinsics of the form fn(valA)->valB, where no overloading takes place
  998. Value *TranslateWaveA2B(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  999. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1000. hlsl::OP *hlslOP = &helper.hlslOP;
  1001. Value *refArgs[] = {nullptr, CI->getOperand(1)};
  1002. return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP);
  1003. }
  1004. // Wave ballot intrinsic.
  1005. Value *TranslateWaveBallot(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1006. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1007. // The high-level operation is uint4 ballot(i1).
  1008. // The DXIL operation is struct.u4 ballot(i1).
  1009. // To avoid updating users with more than a simple replace, we translate into
  1010. // a call into struct.u4, then reassemble the vector.
  1011. // Scalarization and constant propagation take care of cleanup.
  1012. IRBuilder<> B(CI);
  1013. // Make the DXIL call itself.
  1014. hlsl::OP *hlslOP = &helper.hlslOP;
  1015. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  1016. Value *refArgs[] = { opArg, CI->getOperand(1) };
  1017. Function *dxilFunc = hlslOP->GetOpFunc(opcode, Type::getVoidTy(CI->getContext()));
  1018. Value *dxilVal = B.CreateCall(dxilFunc, refArgs, hlslOP->GetOpCodeName(opcode));
  1019. // Assign from the call results into a vector.
  1020. Type *ResTy = CI->getType();
  1021. DXASSERT_NOMSG(ResTy->isVectorTy() && ResTy->getVectorNumElements() == 4);
  1022. DXASSERT_NOMSG(dxilVal->getType()->isStructTy() &&
  1023. dxilVal->getType()->getNumContainedTypes() == 4);
  1024. // 'x' component is the first vector element, highest bits.
  1025. Value *ResVal = llvm::UndefValue::get(ResTy);
  1026. for (unsigned Idx = 0; Idx < 4; ++Idx) {
  1027. ResVal = B.CreateInsertElement(
  1028. ResVal, B.CreateExtractValue(dxilVal, ArrayRef<unsigned>(Idx)), Idx);
  1029. }
  1030. return ResVal;
  1031. }
  1032. static bool WaveIntrinsicNeedsSign(OP::OpCode opcode) {
  1033. return opcode == OP::OpCode::WaveActiveOp ||
  1034. opcode == OP::OpCode::WavePrefixOp;
  1035. }
  1036. static unsigned WaveIntrinsicToSignedOpKind(IntrinsicOp IOP) {
  1037. if (IOP == IntrinsicOp::IOP_WaveActiveUMax ||
  1038. IOP == IntrinsicOp::IOP_WaveActiveUMin ||
  1039. IOP == IntrinsicOp::IOP_WaveActiveUSum ||
  1040. IOP == IntrinsicOp::IOP_WaveActiveUProduct ||
  1041. IOP == IntrinsicOp::IOP_WaveMultiPrefixUProduct ||
  1042. IOP == IntrinsicOp::IOP_WaveMultiPrefixUSum ||
  1043. IOP == IntrinsicOp::IOP_WavePrefixUSum ||
  1044. IOP == IntrinsicOp::IOP_WavePrefixUProduct)
  1045. return (unsigned)DXIL::SignedOpKind::Unsigned;
  1046. return (unsigned)DXIL::SignedOpKind::Signed;
  1047. }
  1048. static unsigned WaveIntrinsicToOpKind(IntrinsicOp IOP) {
  1049. switch (IOP) {
  1050. // Bit operations.
  1051. case IntrinsicOp::IOP_WaveActiveBitOr:
  1052. return (unsigned)DXIL::WaveBitOpKind::Or;
  1053. case IntrinsicOp::IOP_WaveActiveBitAnd:
  1054. return (unsigned)DXIL::WaveBitOpKind::And;
  1055. case IntrinsicOp::IOP_WaveActiveBitXor:
  1056. return (unsigned)DXIL::WaveBitOpKind::Xor;
  1057. // Prefix operations.
  1058. case IntrinsicOp::IOP_WavePrefixSum:
  1059. case IntrinsicOp::IOP_WavePrefixUSum:
  1060. return (unsigned)DXIL::WaveOpKind::Sum;
  1061. case IntrinsicOp::IOP_WavePrefixProduct:
  1062. case IntrinsicOp::IOP_WavePrefixUProduct:
  1063. return (unsigned)DXIL::WaveOpKind::Product;
  1064. // Numeric operations.
  1065. case IntrinsicOp::IOP_WaveActiveMax:
  1066. case IntrinsicOp::IOP_WaveActiveUMax:
  1067. return (unsigned)DXIL::WaveOpKind::Max;
  1068. case IntrinsicOp::IOP_WaveActiveMin:
  1069. case IntrinsicOp::IOP_WaveActiveUMin:
  1070. return (unsigned)DXIL::WaveOpKind::Min;
  1071. case IntrinsicOp::IOP_WaveActiveSum:
  1072. case IntrinsicOp::IOP_WaveActiveUSum:
  1073. return (unsigned)DXIL::WaveOpKind::Sum;
  1074. case IntrinsicOp::IOP_WaveActiveProduct:
  1075. case IntrinsicOp::IOP_WaveActiveUProduct:
  1076. // MultiPrefix operations
  1077. case IntrinsicOp::IOP_WaveMultiPrefixBitAnd:
  1078. return (unsigned)DXIL::WaveMultiPrefixOpKind::And;
  1079. case IntrinsicOp::IOP_WaveMultiPrefixBitOr:
  1080. return (unsigned)DXIL::WaveMultiPrefixOpKind::Or;
  1081. case IntrinsicOp::IOP_WaveMultiPrefixBitXor:
  1082. return (unsigned)DXIL::WaveMultiPrefixOpKind::Xor;
  1083. case IntrinsicOp::IOP_WaveMultiPrefixProduct:
  1084. case IntrinsicOp::IOP_WaveMultiPrefixUProduct:
  1085. return (unsigned)DXIL::WaveMultiPrefixOpKind::Product;
  1086. case IntrinsicOp::IOP_WaveMultiPrefixSum:
  1087. case IntrinsicOp::IOP_WaveMultiPrefixUSum:
  1088. return (unsigned)DXIL::WaveMultiPrefixOpKind::Sum;
  1089. default:
  1090. DXASSERT(IOP == IntrinsicOp::IOP_WaveActiveProduct ||
  1091. IOP == IntrinsicOp::IOP_WaveActiveUProduct,
  1092. "else caller passed incorrect value");
  1093. return (unsigned)DXIL::WaveOpKind::Product;
  1094. }
  1095. }
  1096. // Wave intrinsics of the form fn(valA)->valA
  1097. Value *TranslateWaveA2A(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1098. HLOperationLowerHelper &helper,
  1099. HLObjectOperationLowerHelper *pObjHelper,
  1100. bool &Translated) {
  1101. hlsl::OP *hlslOP = &helper.hlslOP;
  1102. Constant *kindValInt = hlslOP->GetI8Const(WaveIntrinsicToOpKind(IOP));
  1103. Constant *signValInt = hlslOP->GetI8Const(WaveIntrinsicToSignedOpKind(IOP));
  1104. Value *refArgs[] = {nullptr, CI->getOperand(1), kindValInt, signValInt};
  1105. unsigned refArgCount = _countof(refArgs);
  1106. if (!WaveIntrinsicNeedsSign(opcode))
  1107. refArgCount--;
  1108. return TrivialDxilOperation(opcode,
  1109. llvm::ArrayRef<Value *>(refArgs, refArgCount),
  1110. CI->getOperand(1)->getType(), CI, hlslOP);
  1111. }
  1112. // WaveMultiPrefixOP(val<n>, mask) -> val<n>
  1113. Value *TranslateWaveMultiPrefix(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opc,
  1114. HLOperationLowerHelper &Helper,
  1115. HLObjectOperationLowerHelper *ObjHelper,
  1116. bool &Translated) {
  1117. hlsl::OP *Op = &Helper.hlslOP;
  1118. Constant *KindValInt = Op->GetI8Const(WaveIntrinsicToOpKind(IOP));
  1119. Constant *SignValInt = Op->GetI8Const(WaveIntrinsicToSignedOpKind(IOP));
  1120. // Decompose mask into scalars
  1121. IRBuilder<> Builder(CI);
  1122. Value *Mask = CI->getArgOperand(2);
  1123. Value *Mask0 = Builder.CreateExtractElement(Mask, (uint64_t)0);
  1124. Value *Mask1 = Builder.CreateExtractElement(Mask, (uint64_t)1);
  1125. Value *Mask2 = Builder.CreateExtractElement(Mask, (uint64_t)2);
  1126. Value *Mask3 = Builder.CreateExtractElement(Mask, (uint64_t)3);
  1127. Value *Args[] = { nullptr, CI->getOperand(1),
  1128. Mask0, Mask1, Mask2, Mask3, KindValInt, SignValInt };
  1129. return TrivialDxilOperation(Opc, Args, CI->getOperand(1)->getType(), CI, Op);
  1130. }
  1131. // WaveMultiPrefixBitCount(i1, mask) -> i32
  1132. Value *TranslateWaveMultiPrefixBitCount(CallInst *CI, IntrinsicOp IOP,
  1133. OP::OpCode Opc,
  1134. HLOperationLowerHelper &Helper,
  1135. HLObjectOperationLowerHelper *ObjHelper,
  1136. bool &Translated) {
  1137. hlsl::OP *Op = &Helper.hlslOP;
  1138. // Decompose mask into scalars
  1139. IRBuilder<> Builder(CI);
  1140. Value *Mask = CI->getArgOperand(2);
  1141. Value *Mask0 = Builder.CreateExtractElement(Mask, (uint64_t)0);
  1142. Value *Mask1 = Builder.CreateExtractElement(Mask, (uint64_t)1);
  1143. Value *Mask2 = Builder.CreateExtractElement(Mask, (uint64_t)2);
  1144. Value *Mask3 = Builder.CreateExtractElement(Mask, (uint64_t)3);
  1145. Value *Args[] = { nullptr, CI->getOperand(1), Mask0, Mask1, Mask2, Mask3 };
  1146. return TrivialDxilOperation(Opc, Args, Helper.voidTy, CI, Op);
  1147. }
  1148. // Wave intrinsics of the form fn()->val
  1149. Value *TranslateWaveToVal(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1150. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1151. hlsl::OP *hlslOP = &helper.hlslOP;
  1152. Value *refArgs[] = {nullptr};
  1153. return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP);
  1154. }
  1155. // Wave intrinsics of the form fn(val,lane)->val
  1156. Value *TranslateWaveReadLaneAt(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1157. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1158. hlsl::OP *hlslOP = &helper.hlslOP;
  1159. Value *refArgs[] = {nullptr, CI->getOperand(1), CI->getOperand(2)};
  1160. return TrivialDxilOperation(DXIL::OpCode::WaveReadLaneAt, refArgs,
  1161. CI->getOperand(1)->getType(), CI, hlslOP);
  1162. }
  1163. // Wave intrinsics of the form fn(val)->val
  1164. Value *TranslateWaveReadLaneFirst(CallInst *CI, IntrinsicOp IOP,
  1165. OP::OpCode opcode,
  1166. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1167. hlsl::OP *hlslOP = &helper.hlslOP;
  1168. Value *refArgs[] = {nullptr, CI->getOperand(1)};
  1169. return TrivialDxilOperation(DXIL::OpCode::WaveReadLaneFirst, refArgs,
  1170. CI->getOperand(1)->getType(), CI, hlslOP);
  1171. }
  1172. Value *TranslateAbs(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1173. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1174. hlsl::OP *hlslOP = &helper.hlslOP;
  1175. Type *pOverloadTy = CI->getType()->getScalarType();
  1176. if (pOverloadTy->isFloatingPointTy()) {
  1177. Value *refArgs[] = {nullptr, CI->getOperand(1)};
  1178. return TrivialDxilOperation(DXIL::OpCode::FAbs, refArgs, CI->getType(), CI,
  1179. hlslOP);
  1180. } else {
  1181. Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1182. IRBuilder<> Builder(CI);
  1183. Value *neg = Builder.CreateNeg(src);
  1184. return TrivialDxilBinaryOperation(DXIL::OpCode::IMax, src, neg, hlslOP,
  1185. Builder);
  1186. }
  1187. }
  1188. Value *TranslateUAbs(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1189. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1190. return CI->getOperand(HLOperandIndex::kUnaryOpSrc0Idx); // No-op
  1191. }
  1192. Value *GenerateCmpNEZero(Value *val, IRBuilder<> Builder) {
  1193. Type *Ty = val->getType();
  1194. Type *EltTy = Ty->getScalarType();
  1195. Constant *zero = nullptr;
  1196. if (EltTy->isFloatingPointTy())
  1197. zero = ConstantFP::get(EltTy, 0);
  1198. else
  1199. zero = ConstantInt::get(EltTy, 0);
  1200. if (Ty != EltTy) {
  1201. zero = ConstantVector::getSplat(Ty->getVectorNumElements(), zero);
  1202. }
  1203. if (EltTy->isFloatingPointTy())
  1204. return Builder.CreateFCmpUNE(val, zero);
  1205. else
  1206. return Builder.CreateICmpNE(val, zero);
  1207. }
  1208. Value *TranslateAllForValue(Value *val, IRBuilder<> &Builder) {
  1209. Value *cond = GenerateCmpNEZero(val, Builder);
  1210. Type *Ty = val->getType();
  1211. Type *EltTy = Ty->getScalarType();
  1212. if (Ty != EltTy) {
  1213. Value *Result = Builder.CreateExtractElement(cond, (uint64_t)0);
  1214. for (unsigned i = 1; i < Ty->getVectorNumElements(); i++) {
  1215. Value *Elt = Builder.CreateExtractElement(cond, i);
  1216. Result = Builder.CreateAnd(Result, Elt);
  1217. }
  1218. return Result;
  1219. } else
  1220. return cond;
  1221. }
  1222. Value *TranslateAll(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1223. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1224. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1225. IRBuilder<> Builder(CI);
  1226. return TranslateAllForValue(val, Builder);
  1227. }
  1228. Value *TranslateAny(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1229. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1230. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1231. IRBuilder<> Builder(CI);
  1232. Value *cond = GenerateCmpNEZero(val, Builder);
  1233. Type *Ty = val->getType();
  1234. Type *EltTy = Ty->getScalarType();
  1235. if (Ty != EltTy) {
  1236. Value *Result = Builder.CreateExtractElement(cond, (uint64_t)0);
  1237. for (unsigned i = 1; i < Ty->getVectorNumElements(); i++) {
  1238. Value *Elt = Builder.CreateExtractElement(cond, i);
  1239. Result = Builder.CreateOr(Result, Elt);
  1240. }
  1241. return Result;
  1242. } else
  1243. return cond;
  1244. }
  1245. Value *TranslateBitcast(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1246. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1247. Type *Ty = CI->getType();
  1248. Value *op = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1249. IRBuilder<> Builder(CI);
  1250. return Builder.CreateBitCast(op, Ty);
  1251. }
  1252. Value *TranslateDoubleAsUint(Value *x, Value *lo, Value *hi,
  1253. IRBuilder<> &Builder, hlsl::OP *hlslOP) {
  1254. Type *Ty = x->getType();
  1255. Type *outTy = lo->getType()->getPointerElementType();
  1256. DXIL::OpCode opcode = DXIL::OpCode::SplitDouble;
  1257. Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
  1258. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
  1259. if (Ty->isVectorTy()) {
  1260. Value *retValLo = llvm::UndefValue::get(outTy);
  1261. Value *retValHi = llvm::UndefValue::get(outTy);
  1262. unsigned vecSize = Ty->getVectorNumElements();
  1263. for (unsigned i = 0; i < vecSize; i++) {
  1264. Value *Elt = Builder.CreateExtractElement(x, i);
  1265. Value *EltOP = Builder.CreateCall(dxilFunc, {opArg, Elt},
  1266. hlslOP->GetOpCodeName(opcode));
  1267. Value *EltLo = Builder.CreateExtractValue(EltOP, 0);
  1268. retValLo = Builder.CreateInsertElement(retValLo, EltLo, i);
  1269. Value *EltHi = Builder.CreateExtractValue(EltOP, 1);
  1270. retValHi = Builder.CreateInsertElement(retValHi, EltHi, i);
  1271. }
  1272. Builder.CreateStore(retValLo, lo);
  1273. Builder.CreateStore(retValHi, hi);
  1274. } else {
  1275. Value *retVal =
  1276. Builder.CreateCall(dxilFunc, {opArg, x}, hlslOP->GetOpCodeName(opcode));
  1277. Value *retValLo = Builder.CreateExtractValue(retVal, 0);
  1278. Value *retValHi = Builder.CreateExtractValue(retVal, 1);
  1279. Builder.CreateStore(retValLo, lo);
  1280. Builder.CreateStore(retValHi, hi);
  1281. }
  1282. return nullptr;
  1283. }
  1284. Value *TranslateAsUint(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1285. HLOperationLowerHelper &helper,
  1286. HLObjectOperationLowerHelper *pObjHelper,
  1287. bool &Translated) {
  1288. if (CI->getNumArgOperands() == 2) {
  1289. return TranslateBitcast(CI, IOP, opcode, helper, pObjHelper, Translated);
  1290. } else {
  1291. DXASSERT_NOMSG(CI->getNumArgOperands() == 4);
  1292. hlsl::OP *hlslOP = &helper.hlslOP;
  1293. Value *x = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  1294. DXASSERT_NOMSG(x->getType()->getScalarType()->isDoubleTy());
  1295. Value *lo = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  1296. Value *hi = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  1297. IRBuilder<> Builder(CI);
  1298. return TranslateDoubleAsUint(x, lo, hi, Builder, hlslOP);
  1299. }
  1300. }
  1301. Value *TranslateAsDouble(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1302. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1303. hlsl::OP *hlslOP = &helper.hlslOP;
  1304. Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1305. Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1306. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
  1307. IRBuilder<> Builder(CI);
  1308. return TrivialDxilOperation(opcode, { opArg, x, y }, CI->getType(), CI->getType(), hlslOP, Builder);
  1309. }
  1310. Value *TranslateAtan2(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1311. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1312. hlsl::OP *hlslOP = &helper.hlslOP;
  1313. Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1314. Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1315. IRBuilder<> Builder(CI);
  1316. Value *tan = Builder.CreateFDiv(y, x);
  1317. Value *atan =
  1318. TrivialDxilUnaryOperation(OP::OpCode::Atan, tan, hlslOP, Builder);
  1319. // Modify atan result based on https://en.wikipedia.org/wiki/Atan2.
  1320. Type *Ty = x->getType();
  1321. Constant *pi = ConstantFP::get(Ty->getScalarType(), M_PI);
  1322. Constant *halfPi = ConstantFP::get(Ty->getScalarType(), M_PI / 2);
  1323. Constant *negHalfPi = ConstantFP::get(Ty->getScalarType(), -M_PI / 2);
  1324. Constant *zero = ConstantFP::get(Ty->getScalarType(), 0);
  1325. if (Ty->isVectorTy()) {
  1326. unsigned vecSize = Ty->getVectorNumElements();
  1327. pi = ConstantVector::getSplat(vecSize, pi);
  1328. halfPi = ConstantVector::getSplat(vecSize, halfPi);
  1329. negHalfPi = ConstantVector::getSplat(vecSize, negHalfPi);
  1330. zero = ConstantVector::getSplat(vecSize, zero);
  1331. }
  1332. Value *atanAddPi = Builder.CreateFAdd(atan, pi);
  1333. Value *atanSubPi = Builder.CreateFSub(atan, pi);
  1334. // x > 0 -> atan.
  1335. Value *result = atan;
  1336. Value *xLt0 = Builder.CreateFCmpOLT(x, zero);
  1337. Value *xEq0 = Builder.CreateFCmpOEQ(x, zero);
  1338. Value *yGe0 = Builder.CreateFCmpOGE(y, zero);
  1339. Value *yLt0 = Builder.CreateFCmpOLT(y, zero);
  1340. // x < 0, y >= 0 -> atan + pi.
  1341. Value *xLt0AndyGe0 = Builder.CreateAnd(xLt0, yGe0);
  1342. result = Builder.CreateSelect(xLt0AndyGe0, atanAddPi, result);
  1343. // x < 0, y < 0 -> atan - pi.
  1344. Value *xLt0AndYLt0 = Builder.CreateAnd(xLt0, yLt0);
  1345. result = Builder.CreateSelect(xLt0AndYLt0, atanSubPi, result);
  1346. // x == 0, y < 0 -> -pi/2
  1347. Value *xEq0AndYLt0 = Builder.CreateAnd(xEq0, yLt0);
  1348. result = Builder.CreateSelect(xEq0AndYLt0, negHalfPi, result);
  1349. // x == 0, y > 0 -> pi/2
  1350. Value *xEq0AndYGe0 = Builder.CreateAnd(xEq0, yGe0);
  1351. result = Builder.CreateSelect(xEq0AndYGe0, halfPi, result);
  1352. return result;
  1353. }
  1354. Value *TranslateClamp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1355. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1356. hlsl::OP *hlslOP = &helper.hlslOP;
  1357. Type *Ty = CI->getType();
  1358. Type *EltTy = Ty->getScalarType();
  1359. DXIL::OpCode maxOp = DXIL::OpCode::FMax;
  1360. DXIL::OpCode minOp = DXIL::OpCode::FMin;
  1361. if (IOP == IntrinsicOp::IOP_uclamp) {
  1362. maxOp = DXIL::OpCode::UMax;
  1363. minOp = DXIL::OpCode::UMin;
  1364. } else if (EltTy->isIntegerTy()) {
  1365. maxOp = DXIL::OpCode::IMax;
  1366. minOp = DXIL::OpCode::IMin;
  1367. }
  1368. Value *x = CI->getArgOperand(HLOperandIndex::kClampOpXIdx);
  1369. Value *maxVal = CI->getArgOperand(HLOperandIndex::kClampOpMaxIdx);
  1370. Value *minVal = CI->getArgOperand(HLOperandIndex::kClampOpMinIdx);
  1371. IRBuilder<> Builder(CI);
  1372. // min(max(x, minVal), maxVal).
  1373. Value *maxXMinVal =
  1374. TrivialDxilBinaryOperation(maxOp, x, minVal, hlslOP, Builder);
  1375. return TrivialDxilBinaryOperation(minOp, maxXMinVal, maxVal, hlslOP, Builder);
  1376. }
  1377. Value *TranslateClip(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1378. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1379. hlsl::OP *hlslOP = &helper.hlslOP;
  1380. Function *discard =
  1381. hlslOP->GetOpFunc(OP::OpCode::Discard, Type::getVoidTy(CI->getContext()));
  1382. IRBuilder<> Builder(CI);
  1383. Value *cond = nullptr;
  1384. Value *arg = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1385. if (VectorType *VT = dyn_cast<VectorType>(arg->getType())) {
  1386. Value *elt = Builder.CreateExtractElement(arg, (uint64_t)0);
  1387. cond = Builder.CreateFCmpOLT(elt, hlslOP->GetFloatConst(0));
  1388. for (unsigned i = 1; i < VT->getNumElements(); i++) {
  1389. Value *elt = Builder.CreateExtractElement(arg, i);
  1390. Value *eltCond = Builder.CreateFCmpOLT(elt, hlslOP->GetFloatConst(0));
  1391. cond = Builder.CreateOr(cond, eltCond);
  1392. }
  1393. } else
  1394. cond = Builder.CreateFCmpOLT(arg, hlslOP->GetFloatConst(0));
  1395. /*If discard condition evaluates to false at compile-time, then
  1396. don't emit the discard instruction.*/
  1397. if (ConstantInt *constCond = dyn_cast<ConstantInt>(cond))
  1398. if (!constCond->getLimitedValue())
  1399. return nullptr;
  1400. Constant *opArg = hlslOP->GetU32Const((unsigned)OP::OpCode::Discard);
  1401. Builder.CreateCall(discard, {opArg, cond});
  1402. return nullptr;
  1403. }
  1404. Value *TranslateCross(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1405. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1406. VectorType *VT = cast<VectorType>(CI->getType());
  1407. DXASSERT_NOMSG(VT->getNumElements() == 3);
  1408. Value *op0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1409. Value *op1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1410. IRBuilder<> Builder(CI);
  1411. Value *op0_x = Builder.CreateExtractElement(op0, (uint64_t)0);
  1412. Value *op0_y = Builder.CreateExtractElement(op0, 1);
  1413. Value *op0_z = Builder.CreateExtractElement(op0, 2);
  1414. Value *op1_x = Builder.CreateExtractElement(op1, (uint64_t)0);
  1415. Value *op1_y = Builder.CreateExtractElement(op1, 1);
  1416. Value *op1_z = Builder.CreateExtractElement(op1, 2);
  1417. auto MulSub = [&](Value *x0, Value *y0, Value *x1, Value *y1) -> Value * {
  1418. Value *xy = Builder.CreateFMul(x0, y1);
  1419. Value *yx = Builder.CreateFMul(y0, x1);
  1420. return Builder.CreateFSub(xy, yx);
  1421. };
  1422. Value *yz_zy = MulSub(op0_y, op0_z, op1_y, op1_z);
  1423. Value *zx_xz = MulSub(op0_z, op0_x, op1_z, op1_x);
  1424. Value *xy_yx = MulSub(op0_x, op0_y, op1_x, op1_y);
  1425. Value *cross = UndefValue::get(VT);
  1426. cross = Builder.CreateInsertElement(cross, yz_zy, (uint64_t)0);
  1427. cross = Builder.CreateInsertElement(cross, zx_xz, 1);
  1428. cross = Builder.CreateInsertElement(cross, xy_yx, 2);
  1429. return cross;
  1430. }
  1431. Value *TranslateDegrees(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1432. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1433. IRBuilder<> Builder(CI);
  1434. Type *Ty = CI->getType();
  1435. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1436. // 180/pi.
  1437. Constant *toDegreeConst = ConstantFP::get(Ty->getScalarType(), 180 / M_PI);
  1438. if (Ty != Ty->getScalarType()) {
  1439. toDegreeConst =
  1440. ConstantVector::getSplat(Ty->getVectorNumElements(), toDegreeConst);
  1441. }
  1442. return Builder.CreateFMul(toDegreeConst, val);
  1443. }
  1444. Value *TranslateDst(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1445. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1446. Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1447. Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1448. Type *Ty = src1->getType();
  1449. IRBuilder<> Builder(CI);
  1450. Value *Result = UndefValue::get(Ty);
  1451. Constant *oneConst = ConstantFP::get(Ty->getScalarType(), 1);
  1452. // dest.x = 1;
  1453. Result = Builder.CreateInsertElement(Result, oneConst, (uint64_t)0);
  1454. // dest.y = src0.y * src1.y;
  1455. Value *src0_y = Builder.CreateExtractElement(src0, 1);
  1456. Value *src1_y = Builder.CreateExtractElement(src1, 1);
  1457. Value *yMuly = Builder.CreateFMul(src0_y, src1_y);
  1458. Result = Builder.CreateInsertElement(Result, yMuly, 1);
  1459. // dest.z = src0.z;
  1460. Value *src0_z = Builder.CreateExtractElement(src0, 2);
  1461. Result = Builder.CreateInsertElement(Result, src0_z, 2);
  1462. // dest.w = src1.w;
  1463. Value *src1_w = Builder.CreateExtractElement(src1, 3);
  1464. Result = Builder.CreateInsertElement(Result, src1_w, 3);
  1465. return Result;
  1466. }
  1467. Value *TranslateFirstbitHi(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1468. HLOperationLowerHelper &helper,
  1469. HLObjectOperationLowerHelper *pObjHelper,
  1470. bool &Translated) {
  1471. Value *firstbitHi =
  1472. TrivialUnaryOperation(CI, IOP, opcode, helper, pObjHelper, Translated);
  1473. // firstbitHi == -1? -1 : (bitWidth-1 -firstbitHi);
  1474. IRBuilder<> Builder(CI);
  1475. Constant *neg1 = Builder.getInt32(-1);
  1476. Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1477. Type *Ty = src->getType();
  1478. IntegerType *EltTy = cast<IntegerType>(Ty->getScalarType());
  1479. Constant *bitWidth = Builder.getInt32(EltTy->getBitWidth()-1);
  1480. if (Ty == Ty->getScalarType()) {
  1481. Value *sub = Builder.CreateSub(bitWidth, firstbitHi);
  1482. Value *cond = Builder.CreateICmpEQ(neg1, firstbitHi);
  1483. return Builder.CreateSelect(cond, neg1, sub);
  1484. } else {
  1485. Value *result = UndefValue::get(CI->getType());
  1486. unsigned vecSize = Ty->getVectorNumElements();
  1487. for (unsigned i = 0; i < vecSize; i++) {
  1488. Value *EltFirstBit = Builder.CreateExtractElement(firstbitHi, i);
  1489. Value *sub = Builder.CreateSub(bitWidth, EltFirstBit);
  1490. Value *cond = Builder.CreateICmpEQ(neg1, EltFirstBit);
  1491. Value *Elt = Builder.CreateSelect(cond, neg1, sub);
  1492. result = Builder.CreateInsertElement(result, Elt, i);
  1493. }
  1494. return result;
  1495. }
  1496. }
  1497. Value *TranslateFirstbitLo(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1498. HLOperationLowerHelper &helper,
  1499. HLObjectOperationLowerHelper *pObjHelper,
  1500. bool &Translated) {
  1501. Value *firstbitLo =
  1502. TrivialUnaryOperation(CI, IOP, opcode, helper, pObjHelper, Translated);
  1503. return firstbitLo;
  1504. }
  1505. Value *TranslateLit(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1506. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1507. Value *n_dot_l = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  1508. Value *n_dot_h = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  1509. Value *m = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  1510. IRBuilder<> Builder(CI);
  1511. Type *Ty = m->getType();
  1512. Value *Result = UndefValue::get(VectorType::get(Ty, 4));
  1513. // Result = (ambient, diffuse, specular, 1)
  1514. // ambient = 1.
  1515. Constant *oneConst = ConstantFP::get(Ty, 1);
  1516. Result = Builder.CreateInsertElement(Result, oneConst, (uint64_t)0);
  1517. // Result.w = 1.
  1518. Result = Builder.CreateInsertElement(Result, oneConst, 3);
  1519. // diffuse = (n_dot_l < 0) ? 0 : n_dot_l.
  1520. Constant *zeroConst = ConstantFP::get(Ty, 0);
  1521. Value *nlCmp = Builder.CreateFCmpOLT(n_dot_l, zeroConst);
  1522. Value *diffuse = Builder.CreateSelect(nlCmp, zeroConst, n_dot_l);
  1523. Result = Builder.CreateInsertElement(Result, diffuse, 1);
  1524. // specular = ((n_dot_l < 0) || (n_dot_h < 0)) ? 0: (n_dot_h ^ m).
  1525. Value *nhCmp = Builder.CreateFCmpOLT(n_dot_h, zeroConst);
  1526. Value *specCond = Builder.CreateOr(nlCmp, nhCmp);
  1527. bool isFXCCompatMode = CI->getModule()->GetHLModule().GetHLOptions().bFXCCompatMode;
  1528. Value *nhPowM = TranslatePowImpl(&helper.hlslOP, Builder, n_dot_h, m, isFXCCompatMode);
  1529. Value *spec = Builder.CreateSelect(specCond, zeroConst, nhPowM);
  1530. Result = Builder.CreateInsertElement(Result, spec, 2);
  1531. return Result;
  1532. }
  1533. Value *TranslateRadians(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1534. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1535. IRBuilder<> Builder(CI);
  1536. Type *Ty = CI->getType();
  1537. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1538. // pi/180.
  1539. Constant *toRadianConst = ConstantFP::get(Ty->getScalarType(), M_PI / 180);
  1540. if (Ty != Ty->getScalarType()) {
  1541. toRadianConst =
  1542. ConstantVector::getSplat(Ty->getVectorNumElements(), toRadianConst);
  1543. }
  1544. return Builder.CreateFMul(toRadianConst, val);
  1545. }
  1546. Value *TranslateF16ToF32(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1547. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1548. IRBuilder<> Builder(CI);
  1549. Value *x = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1550. Type *Ty = CI->getType();
  1551. Function *f16tof32 =
  1552. helper.hlslOP.GetOpFunc(opcode, helper.voidTy);
  1553. return TrivialDxilOperation(
  1554. f16tof32, opcode, {Builder.getInt32(static_cast<unsigned>(opcode)), x},
  1555. x->getType(), Ty, &helper.hlslOP, Builder);
  1556. }
  1557. Value *TranslateF32ToF16(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1558. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1559. IRBuilder<> Builder(CI);
  1560. Value *x = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1561. Type *Ty = CI->getType();
  1562. Function *f32tof16 =
  1563. helper.hlslOP.GetOpFunc(opcode, helper.voidTy);
  1564. return TrivialDxilOperation(
  1565. f32tof16, opcode, {Builder.getInt32(static_cast<unsigned>(opcode)), x},
  1566. x->getType(), Ty, &helper.hlslOP, Builder);
  1567. }
  1568. Value *TranslateLength(CallInst *CI, Value *val, hlsl::OP *hlslOP) {
  1569. IRBuilder<> Builder(CI);
  1570. if (VectorType *VT = dyn_cast<VectorType>(val->getType())) {
  1571. Value *Elt = Builder.CreateExtractElement(val, (uint64_t)0);
  1572. unsigned size = VT->getNumElements();
  1573. if (size > 1) {
  1574. Value *Sum = Builder.CreateFMul(Elt, Elt);
  1575. for (unsigned i = 1; i < size; i++) {
  1576. Elt = Builder.CreateExtractElement(val, i);
  1577. Value *Mul = Builder.CreateFMul(Elt, Elt);
  1578. Sum = Builder.CreateFAdd(Sum, Mul);
  1579. }
  1580. DXIL::OpCode sqrt = DXIL::OpCode::Sqrt;
  1581. Function *dxilSqrt = hlslOP->GetOpFunc(sqrt, VT->getElementType());
  1582. Value *opArg = hlslOP->GetI32Const((unsigned)sqrt);
  1583. return Builder.CreateCall(dxilSqrt, {opArg, Sum},
  1584. hlslOP->GetOpCodeName(sqrt));
  1585. } else {
  1586. val = Elt;
  1587. }
  1588. }
  1589. DXIL::OpCode fabs = DXIL::OpCode::FAbs;
  1590. Function *dxilFAbs = hlslOP->GetOpFunc(fabs, val->getType());
  1591. Value *opArg = hlslOP->GetI32Const((unsigned)fabs);
  1592. return Builder.CreateCall(dxilFAbs, {opArg, val},
  1593. hlslOP->GetOpCodeName(fabs));
  1594. }
  1595. Value *TranslateLength(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1596. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1597. hlsl::OP *hlslOP = &helper.hlslOP;
  1598. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1599. return TranslateLength(CI, val, hlslOP);
  1600. }
  1601. Value *TranslateModF(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1602. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1603. hlsl::OP *hlslOP = &helper.hlslOP;
  1604. Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1605. Value *outIntPtr = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1606. IRBuilder<> Builder(CI);
  1607. Value *intP =
  1608. TrivialDxilUnaryOperation(OP::OpCode::Round_z, val, hlslOP, Builder);
  1609. Value *fracP = Builder.CreateFSub(val, intP);
  1610. Builder.CreateStore(intP, outIntPtr);
  1611. return fracP;
  1612. }
  1613. Value *TranslateDistance(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1614. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1615. hlsl::OP *hlslOP = &helper.hlslOP;
  1616. Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1617. Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1618. IRBuilder<> Builder(CI);
  1619. Value *sub = Builder.CreateFSub(src0, src1);
  1620. return TranslateLength(CI, sub, hlslOP);
  1621. }
  1622. Value *TranslateExp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1623. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1624. hlsl::OP *hlslOP = &helper.hlslOP;
  1625. IRBuilder<> Builder(CI);
  1626. Type *Ty = CI->getType();
  1627. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1628. Constant *log2eConst = ConstantFP::get(Ty->getScalarType(), M_LOG2E);
  1629. if (Ty != Ty->getScalarType()) {
  1630. log2eConst =
  1631. ConstantVector::getSplat(Ty->getVectorNumElements(), log2eConst);
  1632. }
  1633. val = Builder.CreateFMul(log2eConst, val);
  1634. Value *exp = TrivialDxilUnaryOperation(OP::OpCode::Exp, val, hlslOP, Builder);
  1635. return exp;
  1636. }
  1637. Value *TranslateLog(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1638. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1639. hlsl::OP *hlslOP = &helper.hlslOP;
  1640. IRBuilder<> Builder(CI);
  1641. Type *Ty = CI->getType();
  1642. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1643. Constant *ln2Const = ConstantFP::get(Ty->getScalarType(), M_LN2);
  1644. if (Ty != Ty->getScalarType()) {
  1645. ln2Const = ConstantVector::getSplat(Ty->getVectorNumElements(), ln2Const);
  1646. }
  1647. Value *log = TrivialDxilUnaryOperation(OP::OpCode::Log, val, hlslOP, Builder);
  1648. return Builder.CreateFMul(ln2Const, log);
  1649. }
  1650. Value *TranslateLog10(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1651. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1652. hlsl::OP *hlslOP = &helper.hlslOP;
  1653. IRBuilder<> Builder(CI);
  1654. Type *Ty = CI->getType();
  1655. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1656. Constant *log2_10Const = ConstantFP::get(Ty->getScalarType(), M_LN2 / M_LN10);
  1657. if (Ty != Ty->getScalarType()) {
  1658. log2_10Const =
  1659. ConstantVector::getSplat(Ty->getVectorNumElements(), log2_10Const);
  1660. }
  1661. Value *log = TrivialDxilUnaryOperation(OP::OpCode::Log, val, hlslOP, Builder);
  1662. return Builder.CreateFMul(log2_10Const, log);
  1663. }
  1664. Value *TranslateFMod(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1665. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1666. hlsl::OP *hlslOP = &helper.hlslOP;
  1667. Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1668. Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1669. IRBuilder<> Builder(CI);
  1670. Value *div = Builder.CreateFDiv(src0, src1);
  1671. Value *negDiv = Builder.CreateFNeg(div);
  1672. Value *ge = Builder.CreateFCmpOGE(div, negDiv);
  1673. Value *absDiv =
  1674. TrivialDxilUnaryOperation(OP::OpCode::FAbs, div, hlslOP, Builder);
  1675. Value *frc =
  1676. TrivialDxilUnaryOperation(OP::OpCode::Frc, absDiv, hlslOP, Builder);
  1677. Value *negFrc = Builder.CreateFNeg(frc);
  1678. Value *realFrc = Builder.CreateSelect(ge, frc, negFrc);
  1679. return Builder.CreateFMul(realFrc, src1);
  1680. }
  1681. Value *TranslateFUIBinary(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1682. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1683. bool isFloat = CI->getType()->getScalarType()->isFloatingPointTy();
  1684. if (isFloat) {
  1685. switch (IOP) {
  1686. case IntrinsicOp::IOP_max:
  1687. opcode = OP::OpCode::FMax;
  1688. break;
  1689. case IntrinsicOp::IOP_min:
  1690. default:
  1691. DXASSERT_NOMSG(IOP == IntrinsicOp::IOP_min);
  1692. opcode = OP::OpCode::FMin;
  1693. break;
  1694. }
  1695. }
  1696. return TrivialBinaryOperation(CI, IOP, opcode, helper, pObjHelper, Translated);
  1697. }
  1698. Value *TranslateFUITrinary(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1699. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1700. bool isFloat = CI->getType()->getScalarType()->isFloatingPointTy();
  1701. if (isFloat) {
  1702. switch (IOP) {
  1703. case IntrinsicOp::IOP_mad:
  1704. default:
  1705. DXASSERT_NOMSG(IOP == IntrinsicOp::IOP_mad);
  1706. opcode = OP::OpCode::FMad;
  1707. break;
  1708. }
  1709. }
  1710. return TrivialTrinaryOperation(CI, IOP, opcode, helper, pObjHelper, Translated);
  1711. }
  1712. Value *TranslateFrexp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1713. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1714. hlsl::OP *hlslOP = &helper.hlslOP;
  1715. Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1716. Value *expPtr = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1717. IRBuilder<> Builder(CI);
  1718. Type *i32Ty = Type::getInt32Ty(CI->getContext());
  1719. Constant *exponentMaskConst = ConstantInt::get(i32Ty, 0x7f800000);
  1720. Constant *mantisaMaskConst = ConstantInt::get(i32Ty, 0x007fffff);
  1721. Constant *exponentShiftConst = ConstantInt::get(i32Ty, 23);
  1722. Constant *mantisaOrConst = ConstantInt::get(i32Ty, 0x3f000000);
  1723. Constant *exponentBiasConst = ConstantInt::get(i32Ty, -(int)0x3f000000);
  1724. Constant *zeroVal = hlslOP->GetFloatConst(0);
  1725. // int iVal = asint(val);
  1726. Type *dstTy = i32Ty;
  1727. Type *Ty = val->getType();
  1728. if (Ty->isVectorTy()) {
  1729. unsigned vecSize = Ty->getVectorNumElements();
  1730. dstTy = VectorType::get(i32Ty, vecSize);
  1731. exponentMaskConst = ConstantVector::getSplat(vecSize, exponentMaskConst);
  1732. mantisaMaskConst = ConstantVector::getSplat(vecSize, mantisaMaskConst);
  1733. exponentShiftConst = ConstantVector::getSplat(vecSize, exponentShiftConst);
  1734. mantisaOrConst = ConstantVector::getSplat(vecSize, mantisaOrConst);
  1735. exponentBiasConst = ConstantVector::getSplat(vecSize, exponentBiasConst);
  1736. zeroVal = ConstantVector::getSplat(vecSize, zeroVal);
  1737. }
  1738. // bool ne = val != 0;
  1739. Value *notZero = Builder.CreateFCmpUNE(val, zeroVal);
  1740. notZero = Builder.CreateSExt(notZero, dstTy);
  1741. Value *intVal = Builder.CreateBitCast(val, dstTy);
  1742. // temp = intVal & exponentMask;
  1743. Value *temp = Builder.CreateAnd(intVal, exponentMaskConst);
  1744. // temp = temp + exponentBias;
  1745. temp = Builder.CreateAdd(temp, exponentBiasConst);
  1746. // temp = temp & ne;
  1747. temp = Builder.CreateAnd(temp, notZero);
  1748. // temp = temp >> exponentShift;
  1749. temp = Builder.CreateAShr(temp, exponentShiftConst);
  1750. // exp = float(temp);
  1751. Value *exp = Builder.CreateSIToFP(temp, Ty);
  1752. Builder.CreateStore(exp, expPtr);
  1753. // temp = iVal & mantisaMask;
  1754. temp = Builder.CreateAnd(intVal, mantisaMaskConst);
  1755. // temp = temp | mantisaOr;
  1756. temp = Builder.CreateOr(temp, mantisaOrConst);
  1757. // mantisa = temp & ne;
  1758. Value *mantisa = Builder.CreateAnd(temp, notZero);
  1759. return Builder.CreateBitCast(mantisa, Ty);
  1760. }
  1761. Value *TranslateLdExp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1762. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1763. hlsl::OP *hlslOP = &helper.hlslOP;
  1764. Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1765. Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1766. IRBuilder<> Builder(CI);
  1767. Value *exp =
  1768. TrivialDxilUnaryOperation(OP::OpCode::Exp, src1, hlslOP, Builder);
  1769. return Builder.CreateFMul(exp, src0);
  1770. }
  1771. Value *TranslateFWidth(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1772. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1773. hlsl::OP *hlslOP = &helper.hlslOP;
  1774. Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1775. IRBuilder<> Builder(CI);
  1776. Value *ddx =
  1777. TrivialDxilUnaryOperation(OP::OpCode::DerivCoarseX, src, hlslOP, Builder);
  1778. Value *absDdx =
  1779. TrivialDxilUnaryOperation(OP::OpCode::FAbs, ddx, hlslOP, Builder);
  1780. Value *ddy =
  1781. TrivialDxilUnaryOperation(OP::OpCode::DerivCoarseY, src, hlslOP, Builder);
  1782. Value *absDdy =
  1783. TrivialDxilUnaryOperation(OP::OpCode::FAbs, ddy, hlslOP, Builder);
  1784. return Builder.CreateFAdd(absDdx, absDdy);
  1785. }
  1786. Value *TranslateLerp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1787. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1788. // x + s(y-x)
  1789. Value *x = CI->getArgOperand(HLOperandIndex::kLerpOpXIdx);
  1790. Value *y = CI->getArgOperand(HLOperandIndex::kLerpOpYIdx);
  1791. IRBuilder<> Builder(CI);
  1792. Value *ySubx = Builder.CreateFSub(y, x);
  1793. Value *s = CI->getArgOperand(HLOperandIndex::kLerpOpSIdx);
  1794. Value *sMulSub = Builder.CreateFMul(s, ySubx);
  1795. return Builder.CreateFAdd(x, sMulSub);
  1796. }
  1797. Value *TrivialDotOperation(OP::OpCode opcode, Value *src0,
  1798. Value *src1, hlsl::OP *hlslOP,
  1799. IRBuilder<> &Builder) {
  1800. Type *Ty = src0->getType()->getScalarType();
  1801. Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty);
  1802. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  1803. SmallVector<Value *, 9> args;
  1804. args.emplace_back(opArg);
  1805. unsigned vecSize = src0->getType()->getVectorNumElements();
  1806. for (unsigned i = 0; i < vecSize; i++)
  1807. args.emplace_back(Builder.CreateExtractElement(src0, i));
  1808. for (unsigned i = 0; i < vecSize; i++)
  1809. args.emplace_back(Builder.CreateExtractElement(src1, i));
  1810. Value *dotOP = Builder.CreateCall(dxilFunc, args);
  1811. return dotOP;
  1812. }
  1813. Value *TranslateIDot(Value *arg0, Value *arg1, unsigned vecSize, hlsl::OP *hlslOP, IRBuilder<> &Builder, bool Unsigned = false) {
  1814. auto madOpCode = Unsigned ? DXIL::OpCode::UMad : DXIL::OpCode::IMad;
  1815. Value *Elt0 = Builder.CreateExtractElement(arg0, (uint64_t)0);
  1816. Value *Elt1 = Builder.CreateExtractElement(arg1, (uint64_t)0);
  1817. Value *Result = Builder.CreateMul(Elt0, Elt1);
  1818. for (unsigned iVecElt = 1; iVecElt < vecSize; ++iVecElt) {
  1819. Elt0 = Builder.CreateExtractElement(arg0, iVecElt);
  1820. Elt1 = Builder.CreateExtractElement(arg1, iVecElt);
  1821. Result = TrivialDxilTrinaryOperation(madOpCode, Elt0, Elt1, Result, hlslOP, Builder);
  1822. }
  1823. return Result;
  1824. }
  1825. Value *TranslateFDot(Value *arg0, Value *arg1, unsigned vecSize,
  1826. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  1827. switch (vecSize) {
  1828. case 2:
  1829. return TrivialDotOperation(OP::OpCode::Dot2, arg0, arg1, hlslOP, Builder);
  1830. break;
  1831. case 3:
  1832. return TrivialDotOperation(OP::OpCode::Dot3, arg0, arg1, hlslOP, Builder);
  1833. break;
  1834. case 4:
  1835. return TrivialDotOperation(OP::OpCode::Dot4, arg0, arg1, hlslOP, Builder);
  1836. break;
  1837. default:
  1838. DXASSERT(vecSize == 1, "wrong vector size");
  1839. {
  1840. Value *vecMul = Builder.CreateFMul(arg0, arg1);
  1841. return Builder.CreateExtractElement(vecMul, (uint64_t)0);
  1842. }
  1843. break;
  1844. }
  1845. }
  1846. Value *TranslateDot(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1847. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1848. hlsl::OP *hlslOP = &helper.hlslOP;
  1849. Value *arg0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1850. Type *Ty = arg0->getType();
  1851. unsigned vecSize = Ty->getVectorNumElements();
  1852. Value *arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1853. IRBuilder<> Builder(CI);
  1854. if (Ty->getScalarType()->isFloatingPointTy()) {
  1855. return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder);
  1856. } else {
  1857. return TranslateIDot(arg0, arg1, vecSize, hlslOP, Builder);
  1858. }
  1859. }
  1860. Value *TranslateNormalize(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1861. HLOperationLowerHelper &helper,
  1862. HLObjectOperationLowerHelper *pObjHelper,
  1863. bool &Translated) {
  1864. hlsl::OP *hlslOP = &helper.hlslOP;
  1865. Type *Ty = CI->getType();
  1866. Value *op = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1867. VectorType *VT = cast<VectorType>(Ty);
  1868. unsigned vecSize = VT->getNumElements();
  1869. IRBuilder<> Builder(CI);
  1870. Value *dot = TranslateFDot(op, op, vecSize, hlslOP, Builder);
  1871. DXIL::OpCode rsqrtOp = DXIL::OpCode::Rsqrt;
  1872. Function *dxilRsqrt = hlslOP->GetOpFunc(rsqrtOp, VT->getElementType());
  1873. Value *rsqrt = Builder.CreateCall(
  1874. dxilRsqrt, {hlslOP->GetI32Const((unsigned)rsqrtOp), dot},
  1875. hlslOP->GetOpCodeName(rsqrtOp));
  1876. Value *vecRsqrt = UndefValue::get(VT);
  1877. for (unsigned i = 0; i < VT->getNumElements(); i++)
  1878. vecRsqrt = Builder.CreateInsertElement(vecRsqrt, rsqrt, i);
  1879. return Builder.CreateFMul(op, vecRsqrt);
  1880. }
  1881. Value *TranslateReflect(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  1882. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1883. hlsl::OP *hlslOP = &helper.hlslOP;
  1884. // v = i - 2 * n * dot(i, n).
  1885. IRBuilder<> Builder(CI);
  1886. Value *i = CI->getArgOperand(HLOperandIndex::kReflectOpIIdx);
  1887. Value *n = CI->getArgOperand(HLOperandIndex::kReflectOpNIdx);
  1888. VectorType *VT = cast<VectorType>(i->getType());
  1889. unsigned vecSize = VT->getNumElements();
  1890. Value *dot = TranslateFDot(i, n, vecSize, hlslOP, Builder);
  1891. // 2 * dot (i, n).
  1892. dot = Builder.CreateFMul(hlslOP->GetFloatConst(2), dot);
  1893. // 2 * n * dot(i, n).
  1894. Value *vecDot = Builder.CreateVectorSplat(vecSize, dot);
  1895. Value *nMulDot = Builder.CreateFMul(vecDot, n);
  1896. // i - 2 * n * dot(i, n).
  1897. return Builder.CreateFSub(i, nMulDot);
  1898. }
  1899. Value *TranslateRefract(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  1900. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1901. hlsl::OP *hlslOP = &helper.hlslOP;
  1902. // d = dot(i, n);
  1903. // t = 1 - eta * eta * ( 1 - d*d);
  1904. // cond = t >= 1;
  1905. // r = eta * i - (eta * d + sqrt(t)) * n;
  1906. // return cond ? r : 0;
  1907. IRBuilder<> Builder(CI);
  1908. Value *i = CI->getArgOperand(HLOperandIndex::kRefractOpIIdx);
  1909. Value *n = CI->getArgOperand(HLOperandIndex::kRefractOpNIdx);
  1910. Value *eta = CI->getArgOperand(HLOperandIndex::kRefractOpEtaIdx);
  1911. VectorType *VT = cast<VectorType>(i->getType());
  1912. unsigned vecSize = VT->getNumElements();
  1913. Value *dot = TranslateFDot(i, n, vecSize, hlslOP, Builder);
  1914. // eta * eta;
  1915. Value *eta2 = Builder.CreateFMul(eta, eta);
  1916. // d*d;
  1917. Value *dot2 = Builder.CreateFMul(dot, dot);
  1918. Constant *one = ConstantFP::get(eta->getType(), 1);
  1919. Constant *zero = ConstantFP::get(eta->getType(), 0);
  1920. // 1- d*d;
  1921. dot2 = Builder.CreateFSub(one, dot2);
  1922. // eta * eta * (1-d*d);
  1923. eta2 = Builder.CreateFMul(dot2, eta2);
  1924. // t = 1 - eta * eta * ( 1 - d*d);
  1925. Value *t = Builder.CreateFSub(one, eta2);
  1926. // cond = t >= 0;
  1927. Value *cond = Builder.CreateFCmpOGE(t, zero);
  1928. // eta * i;
  1929. Value *vecEta = UndefValue::get(VT);
  1930. for (unsigned i = 0; i < vecSize; i++)
  1931. vecEta = Builder.CreateInsertElement(vecEta, eta, i);
  1932. Value *etaMulI = Builder.CreateFMul(i, vecEta);
  1933. // sqrt(t);
  1934. Value *sqrt = TrivialDxilUnaryOperation(OP::OpCode::Sqrt, t, hlslOP, Builder);
  1935. // eta * d;
  1936. Value *etaMulD = Builder.CreateFMul(eta, dot);
  1937. // eta * d + sqrt(t);
  1938. Value *etaSqrt = Builder.CreateFAdd(etaMulD, sqrt);
  1939. // (eta * d + sqrt(t)) * n;
  1940. Value *vecEtaSqrt = Builder.CreateVectorSplat(vecSize, etaSqrt);
  1941. Value *r = Builder.CreateFMul(vecEtaSqrt, n);
  1942. // r = eta * i - (eta * d + sqrt(t)) * n;
  1943. r = Builder.CreateFSub(etaMulI, r);
  1944. Value *refract =
  1945. Builder.CreateSelect(cond, r, ConstantVector::getSplat(vecSize, zero));
  1946. return refract;
  1947. }
  1948. Value *TranslateSmoothStep(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1949. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1950. hlsl::OP *hlslOP = &helper.hlslOP;
  1951. // s = saturate((x-min)/(max-min)).
  1952. IRBuilder<> Builder(CI);
  1953. Value *minVal = CI->getArgOperand(HLOperandIndex::kSmoothStepOpMinIdx);
  1954. Value *maxVal = CI->getArgOperand(HLOperandIndex::kSmoothStepOpMaxIdx);
  1955. Value *maxSubMin = Builder.CreateFSub(maxVal, minVal);
  1956. Value *x = CI->getArgOperand(HLOperandIndex::kSmoothStepOpXIdx);
  1957. Value *xSubMin = Builder.CreateFSub(x, minVal);
  1958. Value *satVal = Builder.CreateFDiv(xSubMin, maxSubMin);
  1959. Value *s = TrivialDxilUnaryOperation(DXIL::OpCode::Saturate, satVal, hlslOP,
  1960. Builder);
  1961. // return s * s *(3-2*s).
  1962. Constant *c2 = ConstantFP::get(CI->getType(),2);
  1963. Constant *c3 = ConstantFP::get(CI->getType(),3);
  1964. Value *sMul2 = Builder.CreateFMul(s, c2);
  1965. Value *result = Builder.CreateFSub(c3, sMul2);
  1966. result = Builder.CreateFMul(s, result);
  1967. result = Builder.CreateFMul(s, result);
  1968. return result;
  1969. }
  1970. Value *TranslateMSad4(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1971. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1972. hlsl::OP *hlslOP = &helper.hlslOP;
  1973. Value *ref = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  1974. Value *src = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  1975. Value *accum = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  1976. Type *Ty = CI->getType();
  1977. IRBuilder<> Builder(CI);
  1978. Value *vecRef = UndefValue::get(Ty);
  1979. for (unsigned i = 0; i < 4; i++)
  1980. vecRef = Builder.CreateInsertElement(vecRef, ref, i);
  1981. Value *srcX = Builder.CreateExtractElement(src, (uint64_t)0);
  1982. Value *srcY = Builder.CreateExtractElement(src, 1);
  1983. Value *byteSrc = UndefValue::get(Ty);
  1984. byteSrc = Builder.CreateInsertElement(byteSrc, srcX, (uint64_t)0);
  1985. // ushr r0.yzw, srcX, l(0, 8, 16, 24)
  1986. // bfi r1.yzw, l(0, 8, 16, 24), l(0, 24, 16, 8), srcX, r0.yyzw
  1987. Value *bfiOpArg =
  1988. hlslOP->GetU32Const(static_cast<unsigned>(DXIL::OpCode::Bfi));
  1989. Value *imm8 = hlslOP->GetU32Const(8);
  1990. Value *imm16 = hlslOP->GetU32Const(16);
  1991. Value *imm24 = hlslOP->GetU32Const(24);
  1992. Ty = ref->getType();
  1993. // Get x[31:8].
  1994. Value *srcXShift = Builder.CreateLShr(srcX, imm8);
  1995. // y[0~7] x[31:8].
  1996. Value *byteSrcElt = TrivialDxilOperation(
  1997. DXIL::OpCode::Bfi, {bfiOpArg, imm8, imm24, srcY, srcXShift}, Ty, Ty,
  1998. hlslOP, Builder);
  1999. byteSrc = Builder.CreateInsertElement(byteSrc, byteSrcElt, 1);
  2000. // Get x[31:16].
  2001. srcXShift = Builder.CreateLShr(srcXShift, imm8);
  2002. // y[0~15] x[31:16].
  2003. byteSrcElt = TrivialDxilOperation(DXIL::OpCode::Bfi,
  2004. {bfiOpArg, imm16, imm16, srcY, srcXShift},
  2005. Ty, Ty, hlslOP, Builder);
  2006. byteSrc = Builder.CreateInsertElement(byteSrc, byteSrcElt, 2);
  2007. // Get x[31:24].
  2008. srcXShift = Builder.CreateLShr(srcXShift, imm8);
  2009. // y[0~23] x[31:24].
  2010. byteSrcElt = TrivialDxilOperation(DXIL::OpCode::Bfi,
  2011. {bfiOpArg, imm24, imm8, srcY, srcXShift},
  2012. Ty, Ty, hlslOP, Builder);
  2013. byteSrc = Builder.CreateInsertElement(byteSrc, byteSrcElt, 3);
  2014. // Msad on vecref and byteSrc.
  2015. return TrivialDxilTrinaryOperation(DXIL::OpCode::Msad, vecRef, byteSrc, accum,
  2016. hlslOP, Builder);
  2017. }
  2018. Value *TranslateRCP(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2019. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2020. Type *Ty = CI->getType();
  2021. Value *op = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  2022. IRBuilder<> Builder(CI);
  2023. Constant *one = ConstantFP::get(Ty->getScalarType(), 1.0);
  2024. if (Ty != Ty->getScalarType()) {
  2025. one = ConstantVector::getSplat(Ty->getVectorNumElements(), one);
  2026. }
  2027. return Builder.CreateFDiv(one, op);
  2028. }
  2029. Value *TranslateSign(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2030. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2031. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  2032. Type *Ty = val->getType();
  2033. bool IsInt = Ty->getScalarType()->isIntegerTy();
  2034. IRBuilder<> Builder(CI);
  2035. Constant *zero = Constant::getNullValue(Ty);
  2036. Value *zeroLtVal = IsInt ? Builder.CreateICmpSLT(zero, val) : Builder.CreateFCmpOLT(zero, val);
  2037. Value *valLtZero = IsInt ? Builder.CreateICmpSLT(val, zero) : Builder.CreateFCmpOLT(val, zero);
  2038. zeroLtVal = Builder.CreateZExt(zeroLtVal, CI->getType());
  2039. valLtZero = Builder.CreateZExt(valLtZero, CI->getType());
  2040. return Builder.CreateSub(zeroLtVal, valLtZero);
  2041. }
  2042. Value *TranslateUSign(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2043. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2044. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  2045. Type *Ty = val->getType();
  2046. IRBuilder<> Builder(CI);
  2047. Constant *zero = Constant::getNullValue(Ty);
  2048. Value *nonZero = Builder.CreateICmpNE(val, zero);
  2049. return Builder.CreateZExt(nonZero, CI->getType());
  2050. }
  2051. Value *TranslateStep(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2052. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2053. Value *edge = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  2054. Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  2055. Type *Ty = CI->getType();
  2056. IRBuilder<> Builder(CI);
  2057. Constant *one = ConstantFP::get(Ty->getScalarType(), 1.0);
  2058. Constant *zero = ConstantFP::get(Ty->getScalarType(), 0);
  2059. Value *cond = Builder.CreateFCmpOLT(x, edge);
  2060. if (Ty != Ty->getScalarType()) {
  2061. one = ConstantVector::getSplat(Ty->getVectorNumElements(), one);
  2062. zero = ConstantVector::getSplat(Ty->getVectorNumElements(), zero);
  2063. }
  2064. return Builder.CreateSelect(cond, zero, one);
  2065. }
  2066. Value *TranslatePow(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2067. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2068. hlsl::OP *hlslOP = &helper.hlslOP;
  2069. Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  2070. Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  2071. bool isFXCCompatMode = CI->getModule()->GetHLModule().GetHLOptions().bFXCCompatMode;
  2072. IRBuilder<> Builder(CI);
  2073. return TranslatePowImpl(hlslOP,Builder,x,y,isFXCCompatMode);
  2074. }
  2075. Value *TranslatePrintf(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode,
  2076. HLOperationLowerHelper &helper,
  2077. HLObjectOperationLowerHelper *pObjHelper,
  2078. bool &Translated) {
  2079. Translated = false;
  2080. CI->getContext().emitError(CI, "use of undeclared identifier 'printf'");
  2081. return nullptr;
  2082. }
  2083. Value *TranslateFaceforward(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  2084. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2085. hlsl::OP *hlslOP = &helper.hlslOP;
  2086. Type *Ty = CI->getType();
  2087. Value *n = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  2088. Value *i = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  2089. Value *ng = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  2090. IRBuilder<> Builder(CI);
  2091. unsigned vecSize = Ty->getVectorNumElements();
  2092. // -n x sign(dot(i, ng)).
  2093. Value *dotOp = TranslateFDot(i, ng, vecSize, hlslOP, Builder);
  2094. Constant *zero = ConstantFP::get(Ty->getScalarType(), 0);
  2095. Value *dotLtZero = Builder.CreateFCmpOLT(dotOp, zero);
  2096. Value *negN = Builder.CreateFNeg(n);
  2097. Value *faceforward = Builder.CreateSelect(dotLtZero, n, negN);
  2098. return faceforward;
  2099. }
  2100. Value *TrivialSetMeshOutputCounts(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  2101. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2102. hlsl::OP *hlslOP = &helper.hlslOP;
  2103. Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  2104. Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  2105. IRBuilder<> Builder(CI);
  2106. Constant *opArg = hlslOP->GetU32Const((unsigned)op);
  2107. Value *args[] = { opArg, src0, src1 };
  2108. Function *dxilFunc = hlslOP->GetOpFunc(op, Type::getVoidTy(CI->getContext()));
  2109. Builder.CreateCall(dxilFunc, args);
  2110. return nullptr;
  2111. }
  2112. Value *TrivialDispatchMesh(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  2113. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2114. hlsl::OP *hlslOP = &helper.hlslOP;
  2115. Value *src0 = CI->getArgOperand(HLOperandIndex::kDispatchMeshOpThreadX);
  2116. Value *src1 = CI->getArgOperand(HLOperandIndex::kDispatchMeshOpThreadY);
  2117. Value *src2 = CI->getArgOperand(HLOperandIndex::kDispatchMeshOpThreadZ);
  2118. Value *src3 = CI->getArgOperand(HLOperandIndex::kDispatchMeshOpPayload);
  2119. IRBuilder<> Builder(CI);
  2120. Constant *opArg = hlslOP->GetU32Const((unsigned)op);
  2121. Value *args[] = { opArg, src0, src1, src2, src3 };
  2122. Function *dxilFunc = hlslOP->GetOpFunc(op, src3->getType());
  2123. Builder.CreateCall(dxilFunc, args);
  2124. return nullptr;
  2125. }
  2126. }
  2127. // MOP intrinsics
  2128. namespace {
  2129. Value *TranslateGetSamplePosition(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  2130. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2131. hlsl::OP *hlslOP = &helper.hlslOP;
  2132. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  2133. IRBuilder<> Builder(CI);
  2134. Value *sampleIdx =
  2135. CI->getArgOperand(HLOperandIndex::kGetSamplePositionSampleIdxOpIndex);
  2136. OP::OpCode opcode = OP::OpCode::Texture2DMSGetSamplePosition;
  2137. llvm::Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  2138. Function *dxilFunc =
  2139. hlslOP->GetOpFunc(opcode, Type::getVoidTy(CI->getContext()));
  2140. Value *args[] = {opArg, handle, sampleIdx};
  2141. Value *samplePos = Builder.CreateCall(dxilFunc, args);
  2142. Value *result = UndefValue::get(CI->getType());
  2143. Value *samplePosX = Builder.CreateExtractValue(samplePos, 0);
  2144. Value *samplePosY = Builder.CreateExtractValue(samplePos, 1);
  2145. result = Builder.CreateInsertElement(result, samplePosX, (uint64_t)0);
  2146. result = Builder.CreateInsertElement(result, samplePosY, 1);
  2147. return result;
  2148. }
  2149. Value *TranslateGetDimensions(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  2150. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2151. hlsl::OP *hlslOP = &helper.hlslOP;
  2152. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  2153. DxilResource::Kind RK = pObjHelper->GetRK(handle);
  2154. IRBuilder<> Builder(CI);
  2155. OP::OpCode opcode = OP::OpCode::GetDimensions;
  2156. llvm::Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  2157. Function *dxilFunc =
  2158. hlslOP->GetOpFunc(opcode, Type::getVoidTy(CI->getContext()));
  2159. Type *i32Ty = Type::getInt32Ty(CI->getContext());
  2160. Value *mipLevel = UndefValue::get(i32Ty);
  2161. unsigned widthOpIdx = HLOperandIndex::kGetDimensionsMipWidthOpIndex;
  2162. switch (RK) {
  2163. case DxilResource::Kind::Texture1D:
  2164. case DxilResource::Kind::Texture1DArray:
  2165. case DxilResource::Kind::Texture2D:
  2166. case DxilResource::Kind::Texture2DArray:
  2167. case DxilResource::Kind::TextureCube:
  2168. case DxilResource::Kind::TextureCubeArray:
  2169. case DxilResource::Kind::Texture3D: {
  2170. Value *opMipLevel =
  2171. CI->getArgOperand(HLOperandIndex::kGetDimensionsMipLevelOpIndex);
  2172. // mipLevel is in parameter, should not be pointer.
  2173. if (!opMipLevel->getType()->isPointerTy())
  2174. mipLevel = opMipLevel;
  2175. else {
  2176. // No mip level.
  2177. widthOpIdx = HLOperandIndex::kGetDimensionsNoMipWidthOpIndex;
  2178. mipLevel = ConstantInt::get(i32Ty, 0);
  2179. }
  2180. } break;
  2181. default:
  2182. widthOpIdx = HLOperandIndex::kGetDimensionsNoMipWidthOpIndex;
  2183. break;
  2184. }
  2185. Value *args[] = {opArg, handle, mipLevel};
  2186. Value *dims = Builder.CreateCall(dxilFunc, args);
  2187. unsigned dimensionIdx = 0;
  2188. Value *width = Builder.CreateExtractValue(dims, dimensionIdx++);
  2189. Value *widthPtr = CI->getArgOperand(widthOpIdx);
  2190. if (widthPtr->getType()->getPointerElementType()->isFloatingPointTy())
  2191. width = Builder.CreateSIToFP(width,
  2192. widthPtr->getType()->getPointerElementType());
  2193. Builder.CreateStore(width, widthPtr);
  2194. if (DXIL::IsStructuredBuffer(RK)) {
  2195. // Set stride.
  2196. Value *stridePtr = CI->getArgOperand(widthOpIdx + 1);
  2197. const DataLayout &DL = helper.dataLayout;
  2198. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  2199. Type *bufTy = pObjHelper->GetResourceType(handle);
  2200. Type *bufRetTy = bufTy->getStructElementType(0);
  2201. unsigned stride = DL.getTypeAllocSize(bufRetTy);
  2202. Builder.CreateStore(hlslOP->GetU32Const(stride), stridePtr);
  2203. } else {
  2204. if (widthOpIdx == HLOperandIndex::kGetDimensionsMipWidthOpIndex ||
  2205. // Samples is in w channel too.
  2206. RK == DXIL::ResourceKind::Texture2DMS) {
  2207. // Has mip.
  2208. for (unsigned argIdx = widthOpIdx + 1;
  2209. argIdx < CI->getNumArgOperands() - 1; argIdx++) {
  2210. Value *dim = Builder.CreateExtractValue(dims, dimensionIdx++);
  2211. Value *ptr = CI->getArgOperand(argIdx);
  2212. if (ptr->getType()->getPointerElementType()->isFloatingPointTy())
  2213. dim = Builder.CreateSIToFP(dim,
  2214. ptr->getType()->getPointerElementType());
  2215. Builder.CreateStore(dim, ptr);
  2216. }
  2217. // NumOfLevel is in w channel.
  2218. dimensionIdx = 3;
  2219. Value *dim = Builder.CreateExtractValue(dims, dimensionIdx);
  2220. Value *ptr = CI->getArgOperand(CI->getNumArgOperands() - 1);
  2221. if (ptr->getType()->getPointerElementType()->isFloatingPointTy())
  2222. dim =
  2223. Builder.CreateSIToFP(dim, ptr->getType()->getPointerElementType());
  2224. Builder.CreateStore(dim, ptr);
  2225. } else {
  2226. for (unsigned argIdx = widthOpIdx + 1; argIdx < CI->getNumArgOperands();
  2227. argIdx++) {
  2228. Value *dim = Builder.CreateExtractValue(dims, dimensionIdx++);
  2229. Value *ptr = CI->getArgOperand(argIdx);
  2230. if (ptr->getType()->getPointerElementType()->isFloatingPointTy())
  2231. dim = Builder.CreateSIToFP(dim,
  2232. ptr->getType()->getPointerElementType());
  2233. Builder.CreateStore(dim, ptr);
  2234. }
  2235. }
  2236. }
  2237. return nullptr;
  2238. }
  2239. Value *GenerateUpdateCounter(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2240. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2241. hlsl::OP *hlslOP = &helper.hlslOP;
  2242. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  2243. pObjHelper->MarkHasCounter(handle, helper.i8Ty);
  2244. bool bInc = IOP == IntrinsicOp::MOP_IncrementCounter;
  2245. IRBuilder<> Builder(CI);
  2246. OP::OpCode OpCode = OP::OpCode::BufferUpdateCounter;
  2247. Value *OpCodeArg = hlslOP->GetU32Const((unsigned)OpCode);
  2248. Value *IncVal = hlslOP->GetI8Const(bInc ? 1 : -1);
  2249. // Create BufferUpdateCounter call.
  2250. Value *Args[] = {OpCodeArg, handle, IncVal};
  2251. Function *F =
  2252. hlslOP->GetOpFunc(OpCode, Type::getVoidTy(handle->getContext()));
  2253. return Builder.CreateCall(F, Args);
  2254. }
  2255. static Value *ScalarizeResRet(Type *RetTy, Value *ResRet, IRBuilder<> &Builder) {
  2256. // Extract value part.
  2257. Value *retVal = llvm::UndefValue::get(RetTy);
  2258. if (RetTy->isVectorTy()) {
  2259. for (unsigned i = 0; i < RetTy->getVectorNumElements(); i++) {
  2260. Value *retComp = Builder.CreateExtractValue(ResRet, i);
  2261. retVal = Builder.CreateInsertElement(retVal, retComp, i);
  2262. }
  2263. } else {
  2264. retVal = Builder.CreateExtractValue(ResRet, 0);
  2265. }
  2266. return retVal;
  2267. }
  2268. static Value *ScalarizeElements(Type *RetTy, ArrayRef<Value*> Elts, IRBuilder<> &Builder) {
  2269. // Extract value part.
  2270. Value *retVal = llvm::UndefValue::get(RetTy);
  2271. if (RetTy->isVectorTy()) {
  2272. unsigned vecSize = RetTy->getVectorNumElements();
  2273. DXASSERT(vecSize <= Elts.size(), "vector size mismatch");
  2274. for (unsigned i = 0; i < vecSize; i++) {
  2275. Value *retComp = Elts[i];
  2276. retVal = Builder.CreateInsertElement(retVal, retComp, i);
  2277. }
  2278. } else {
  2279. retVal = Elts[0];
  2280. }
  2281. return retVal;
  2282. }
  2283. void UpdateStatus(Value *ResRet, Value *status, IRBuilder<> &Builder,
  2284. hlsl::OP *hlslOp) {
  2285. if (status && !isa<UndefValue>(status)) {
  2286. Value *statusVal = Builder.CreateExtractValue(ResRet, DXIL::kResRetStatusIndex);
  2287. Value *checkAccessOp = hlslOp->GetI32Const(
  2288. static_cast<unsigned>(DXIL::OpCode::CheckAccessFullyMapped));
  2289. Function *checkAccessFn = hlslOp->GetOpFunc(
  2290. DXIL::OpCode::CheckAccessFullyMapped, statusVal->getType());
  2291. // CheckAccess on status.
  2292. Value *bStatus =
  2293. Builder.CreateCall(checkAccessFn, {checkAccessOp, statusVal});
  2294. Value *extStatus =
  2295. Builder.CreateZExt(bStatus, Type::getInt32Ty(status->getContext()));
  2296. Builder.CreateStore(extStatus, status);
  2297. }
  2298. }
  2299. Value *SplatToVector(Value *Elt, Type *DstTy, IRBuilder<> &Builder) {
  2300. Value *Result = UndefValue::get(DstTy);
  2301. for (unsigned i = 0; i < DstTy->getVectorNumElements(); i++)
  2302. Result = Builder.CreateInsertElement(Result, Elt, i);
  2303. return Result;
  2304. }
  2305. Value *TranslateMul(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2306. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2307. hlsl::OP *hlslOP = &helper.hlslOP;
  2308. Value *arg0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  2309. Value *arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  2310. Type *arg0Ty = arg0->getType();
  2311. Type *arg1Ty = arg1->getType();
  2312. IRBuilder<> Builder(CI);
  2313. if (arg0Ty->isVectorTy()) {
  2314. if (arg1Ty->isVectorTy()) {
  2315. // mul(vector, vector) == dot(vector, vector)
  2316. unsigned vecSize = arg0Ty->getVectorNumElements();
  2317. if (arg0Ty->getScalarType()->isFloatingPointTy()) {
  2318. return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder);
  2319. }
  2320. else {
  2321. return TranslateIDot(arg0, arg1, vecSize, hlslOP, Builder, IOP == IntrinsicOp::IOP_umul);
  2322. }
  2323. }
  2324. else {
  2325. // mul(vector, scalar) == vector * scalar-splat
  2326. arg1 = SplatToVector(arg1, arg0Ty, Builder);
  2327. }
  2328. }
  2329. else {
  2330. if (arg1Ty->isVectorTy()) {
  2331. // mul(scalar, vector) == scalar-splat * vector
  2332. arg0 = SplatToVector(arg0, arg1Ty, Builder);
  2333. }
  2334. // else mul(scalar, scalar) == scalar * scalar;
  2335. }
  2336. // create fmul/mul for the pair of vectors or scalars
  2337. if (arg0Ty->getScalarType()->isFloatingPointTy()) {
  2338. return Builder.CreateFMul(arg0, arg1);
  2339. }
  2340. else {
  2341. return Builder.CreateMul(arg0, arg1);
  2342. }
  2343. }
  2344. // Sample intrinsics.
  2345. struct SampleHelper {
  2346. SampleHelper(CallInst *CI, OP::OpCode op, HLObjectOperationLowerHelper *pObjHelper);
  2347. OP::OpCode opcode = OP::OpCode::NumOpCodes;
  2348. DXIL::ResourceKind resourceKind = DXIL::ResourceKind::Invalid;
  2349. Value *sampledTexHandle = nullptr;
  2350. Value *texHandle = nullptr;
  2351. Value *samplerHandle = nullptr;
  2352. static const unsigned kMaxCoordDimensions = 4;
  2353. unsigned coordDimensions = 0;
  2354. Value *coord[kMaxCoordDimensions];
  2355. Value *compareValue = nullptr;
  2356. Value *bias = nullptr;
  2357. Value *lod = nullptr;
  2358. // SampleGrad only.
  2359. static const unsigned kMaxDDXYDimensions = 3;
  2360. Value *ddx[kMaxDDXYDimensions];
  2361. Value *ddy[kMaxDDXYDimensions];
  2362. // Optional.
  2363. static const unsigned kMaxOffsetDimensions = 3;
  2364. unsigned offsetDimensions = 0;
  2365. Value *offset[kMaxOffsetDimensions];
  2366. Value *clamp = nullptr;
  2367. Value *status = nullptr;
  2368. unsigned maxHLOperandRead = 0;
  2369. Value *ReadHLOperand(CallInst *CI, unsigned opIdx) {
  2370. if (CI->getNumArgOperands() > opIdx) {
  2371. maxHLOperandRead = std::max(maxHLOperandRead, opIdx);
  2372. return CI->getArgOperand(opIdx);
  2373. }
  2374. return nullptr;
  2375. }
  2376. void TranslateCoord(CallInst *CI, unsigned coordIdx) {
  2377. Value *coordArg = ReadHLOperand(CI, coordIdx);
  2378. DXASSERT_NOMSG(coordArg);
  2379. DXASSERT(coordArg->getType()->getVectorNumElements() == coordDimensions,
  2380. "otherwise, HL coordinate dimensions mismatch");
  2381. IRBuilder<> Builder(CI);
  2382. for (unsigned i = 0; i < coordDimensions; i++)
  2383. coord[i] = Builder.CreateExtractElement(coordArg, i);
  2384. Value *undefF = UndefValue::get(Type::getFloatTy(CI->getContext()));
  2385. for (unsigned i = coordDimensions; i < kMaxCoordDimensions; i++)
  2386. coord[i] = undefF;
  2387. }
  2388. void TranslateOffset(CallInst *CI, unsigned offsetIdx) {
  2389. IntegerType *i32Ty = Type::getInt32Ty(CI->getContext());
  2390. if (Value *offsetArg = ReadHLOperand(CI, offsetIdx)) {
  2391. DXASSERT(offsetArg->getType()->getVectorNumElements() == offsetDimensions,
  2392. "otherwise, HL coordinate dimensions mismatch");
  2393. IRBuilder<> Builder(CI);
  2394. for (unsigned i = 0; i < offsetDimensions; i++)
  2395. offset[i] = Builder.CreateExtractElement(offsetArg, i);
  2396. } else {
  2397. // Use zeros for offsets when not specified, not undef.
  2398. Value *zero = ConstantInt::get(i32Ty, (uint64_t)0);
  2399. for (unsigned i = 0; i < offsetDimensions; i++)
  2400. offset[i] = zero;
  2401. }
  2402. // Use undef for components that should not be used for this resource dim.
  2403. Value *undefI = UndefValue::get(i32Ty);
  2404. for (unsigned i = offsetDimensions; i < kMaxOffsetDimensions; i++)
  2405. offset[i] = undefI;
  2406. }
  2407. void SetBias(CallInst *CI, unsigned biasIdx) {
  2408. // Clamp bias for immediate.
  2409. bias = ReadHLOperand(CI, biasIdx);
  2410. DXASSERT_NOMSG(bias);
  2411. if (ConstantFP *FP = dyn_cast<ConstantFP>(bias)) {
  2412. float v = FP->getValueAPF().convertToFloat();
  2413. if (v > DXIL::kMaxMipLodBias)
  2414. bias = ConstantFP::get(FP->getType(), DXIL::kMaxMipLodBias);
  2415. if (v < DXIL::kMinMipLodBias)
  2416. bias = ConstantFP::get(FP->getType(), DXIL::kMinMipLodBias);
  2417. }
  2418. }
  2419. void SetLOD(CallInst *CI, unsigned lodIdx) {
  2420. lod = ReadHLOperand(CI, lodIdx);
  2421. DXASSERT_NOMSG(lod);
  2422. }
  2423. void SetCompareValue(CallInst *CI, unsigned cmpIdx) {
  2424. compareValue = ReadHLOperand(CI, cmpIdx);
  2425. DXASSERT_NOMSG(compareValue);
  2426. }
  2427. void SetClamp(CallInst *CI, unsigned clampIdx) {
  2428. if ((clamp = ReadHLOperand(CI, clampIdx))) {
  2429. if (clamp->getType()->isVectorTy()) {
  2430. IRBuilder<> Builder(CI);
  2431. clamp = Builder.CreateExtractElement(clamp, (uint64_t)0);
  2432. }
  2433. } else
  2434. clamp = UndefValue::get(Type::getFloatTy(CI->getContext()));
  2435. }
  2436. void SetStatus(CallInst *CI, unsigned statusIdx) {
  2437. status = ReadHLOperand(CI, statusIdx);
  2438. }
  2439. void SetDDX(CallInst *CI, unsigned ddxIdx) {
  2440. SetDDXY(CI, ddx, ReadHLOperand(CI, ddxIdx));
  2441. }
  2442. void SetDDY(CallInst *CI, unsigned ddyIdx) {
  2443. SetDDXY(CI, ddy, ReadHLOperand(CI, ddyIdx));
  2444. }
  2445. void SetDDXY(CallInst *CI, MutableArrayRef<Value *> ddxy, Value *ddxyArg) {
  2446. DXASSERT_NOMSG(ddxyArg);
  2447. IRBuilder<> Builder(CI);
  2448. unsigned ddxySize = ddxyArg->getType()->getVectorNumElements();
  2449. for (unsigned i = 0; i < ddxySize; i++)
  2450. ddxy[i] = Builder.CreateExtractElement(ddxyArg, i);
  2451. Value *undefF = UndefValue::get(Type::getFloatTy(CI->getContext()));
  2452. for (unsigned i = ddxySize; i < kMaxDDXYDimensions; i++)
  2453. ddxy[i] = undefF;
  2454. }
  2455. };
  2456. SampleHelper::SampleHelper(
  2457. CallInst *CI, OP::OpCode op, HLObjectOperationLowerHelper *pObjHelper)
  2458. : opcode(op) {
  2459. texHandle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  2460. resourceKind = pObjHelper->GetRK(texHandle);
  2461. if (resourceKind == DXIL::ResourceKind::Invalid) {
  2462. opcode = DXIL::OpCode::NumOpCodes;
  2463. return;
  2464. }
  2465. coordDimensions = opcode == DXIL::OpCode::CalculateLOD ? DxilResource::GetNumDimensionsForCalcLOD(resourceKind)
  2466. : DxilResource::GetNumCoords(resourceKind);
  2467. offsetDimensions = DxilResource::GetNumOffsets(resourceKind);
  2468. const bool bFeedbackOp = hlsl::OP::IsDxilOpFeedback(op);
  2469. sampledTexHandle = bFeedbackOp ? CI->getArgOperand(HLOperandIndex::kWriteSamplerFeedbackSampledArgIndex)
  2470. : nullptr;
  2471. const unsigned kSamplerArgIndex = bFeedbackOp ? HLOperandIndex::kWriteSamplerFeedbackSamplerArgIndex
  2472. : HLOperandIndex::kSampleSamplerArgIndex;
  2473. samplerHandle = CI->getArgOperand(kSamplerArgIndex);
  2474. const unsigned kCoordArgIdx = bFeedbackOp ? HLOperandIndex::kWriteSamplerFeedbackCoordArgIndex
  2475. : HLOperandIndex::kSampleCoordArgIndex;
  2476. TranslateCoord(CI, kCoordArgIdx);
  2477. // TextureCube does not support offsets, shifting each subsequent arg index down by 1
  2478. unsigned cube = (resourceKind == DXIL::ResourceKind::TextureCube ||
  2479. resourceKind == DXIL::ResourceKind::TextureCubeArray)
  2480. ? 1 : 0;
  2481. switch (op) {
  2482. case OP::OpCode::Sample:
  2483. TranslateOffset(CI, cube ? HLOperandIndex::kInvalidIdx : HLOperandIndex::kSampleOffsetArgIndex);
  2484. SetClamp(CI, HLOperandIndex::kSampleClampArgIndex - cube);
  2485. SetStatus(CI, HLOperandIndex::kSampleStatusArgIndex - cube);
  2486. break;
  2487. case OP::OpCode::SampleLevel:
  2488. SetLOD(CI, HLOperandIndex::kSampleLLevelArgIndex);
  2489. TranslateOffset(CI, cube ? HLOperandIndex::kInvalidIdx : HLOperandIndex::kSampleLOffsetArgIndex);
  2490. SetStatus(CI, HLOperandIndex::kSampleLStatusArgIndex - cube);
  2491. break;
  2492. case OP::OpCode::SampleBias:
  2493. SetBias(CI, HLOperandIndex::kSampleBBiasArgIndex);
  2494. TranslateOffset(CI, cube ? HLOperandIndex::kInvalidIdx : HLOperandIndex::kSampleBOffsetArgIndex);
  2495. SetClamp(CI, HLOperandIndex::kSampleBClampArgIndex - cube);
  2496. SetStatus(CI, HLOperandIndex::kSampleBStatusArgIndex - cube);
  2497. break;
  2498. case OP::OpCode::SampleCmp:
  2499. SetCompareValue(CI, HLOperandIndex::kSampleCmpCmpValArgIndex);
  2500. TranslateOffset(CI, cube ? HLOperandIndex::kInvalidIdx : HLOperandIndex::kSampleCmpOffsetArgIndex);
  2501. SetClamp(CI, HLOperandIndex::kSampleCmpClampArgIndex - cube);
  2502. SetStatus(CI, HLOperandIndex::kSampleCmpStatusArgIndex - cube);
  2503. break;
  2504. case OP::OpCode::SampleCmpLevelZero:
  2505. SetCompareValue(CI, HLOperandIndex::kSampleCmpLZCmpValArgIndex);
  2506. TranslateOffset(CI, cube ? HLOperandIndex::kInvalidIdx : HLOperandIndex::kSampleCmpLZOffsetArgIndex);
  2507. SetStatus(CI, HLOperandIndex::kSampleCmpLZStatusArgIndex - cube);
  2508. break;
  2509. case OP::OpCode::SampleGrad:
  2510. SetDDX(CI, HLOperandIndex::kSampleGDDXArgIndex);
  2511. SetDDY(CI, HLOperandIndex::kSampleGDDYArgIndex);
  2512. TranslateOffset(CI, cube ? HLOperandIndex::kInvalidIdx : HLOperandIndex::kSampleGOffsetArgIndex);
  2513. SetClamp(CI, HLOperandIndex::kSampleGClampArgIndex - cube);
  2514. SetStatus(CI, HLOperandIndex::kSampleGStatusArgIndex - cube);
  2515. break;
  2516. case OP::OpCode::CalculateLOD:
  2517. // Only need coord for LOD calculation.
  2518. break;
  2519. case OP::OpCode::WriteSamplerFeedback:
  2520. SetClamp(CI, HLOperandIndex::kWriteSamplerFeedback_ClampArgIndex);
  2521. break;
  2522. case OP::OpCode::WriteSamplerFeedbackBias:
  2523. SetBias(CI, HLOperandIndex::kWriteSamplerFeedbackBias_BiasArgIndex);
  2524. SetClamp(CI, HLOperandIndex::kWriteSamplerFeedbackBias_ClampArgIndex);
  2525. break;
  2526. case OP::OpCode::WriteSamplerFeedbackGrad:
  2527. SetDDX(CI, HLOperandIndex::kWriteSamplerFeedbackGrad_DdxArgIndex);
  2528. SetDDY(CI, HLOperandIndex::kWriteSamplerFeedbackGrad_DdyArgIndex);
  2529. SetClamp(CI, HLOperandIndex::kWriteSamplerFeedbackGrad_ClampArgIndex);
  2530. break;
  2531. case OP::OpCode::WriteSamplerFeedbackLevel:
  2532. SetLOD(CI, HLOperandIndex::kWriteSamplerFeedbackLevel_LodArgIndex);
  2533. break;
  2534. default:
  2535. DXASSERT(0, "invalid opcode for Sample");
  2536. break;
  2537. }
  2538. DXASSERT(maxHLOperandRead == CI->getNumArgOperands() - 1,
  2539. "otherwise, unused HL arguments for Sample op");
  2540. }
  2541. Value *TranslateCalculateLOD(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2542. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2543. hlsl::OP *hlslOP = &helper.hlslOP;
  2544. SampleHelper sampleHelper(CI, OP::OpCode::CalculateLOD, pObjHelper);
  2545. if (sampleHelper.opcode == DXIL::OpCode::NumOpCodes) {
  2546. Translated = false;
  2547. return nullptr;
  2548. }
  2549. bool bClamped = IOP == IntrinsicOp::MOP_CalculateLevelOfDetail;
  2550. IRBuilder<> Builder(CI);
  2551. Value *opArg =
  2552. hlslOP->GetU32Const(static_cast<unsigned>(OP::OpCode::CalculateLOD));
  2553. Value *clamped = hlslOP->GetI1Const(bClamped);
  2554. Value *args[] = {opArg,
  2555. sampleHelper.texHandle,
  2556. sampleHelper.samplerHandle,
  2557. sampleHelper.coord[0],
  2558. sampleHelper.coord[1],
  2559. sampleHelper.coord[2],
  2560. clamped};
  2561. Function *dxilFunc = hlslOP->GetOpFunc(OP::OpCode::CalculateLOD,
  2562. Type::getFloatTy(opArg->getContext()));
  2563. Value *LOD = Builder.CreateCall(dxilFunc, args);
  2564. return LOD;
  2565. }
  2566. Value *TranslateCheckAccess(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2567. HLOperationLowerHelper &helper,
  2568. HLObjectOperationLowerHelper *pObjHelper,
  2569. bool &Translated) {
  2570. // Translate CheckAccess into uint->bool, later optimization should remove it.
  2571. // Real checkaccess is generated in UpdateStatus.
  2572. IRBuilder<> Builder(CI);
  2573. Value *V = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  2574. return Builder.CreateTrunc(V, helper.i1Ty);
  2575. }
  2576. void GenerateDxilSample(CallInst *CI, Function *F, ArrayRef<Value *> sampleArgs,
  2577. Value *status, hlsl::OP *hlslOp) {
  2578. IRBuilder<> Builder(CI);
  2579. CallInst *call = Builder.CreateCall(F, sampleArgs);
  2580. dxilutil::MigrateDebugValue(CI, call);
  2581. // extract value part
  2582. Value *retVal = ScalarizeResRet(CI->getType(), call, Builder);
  2583. // Replace ret val.
  2584. CI->replaceAllUsesWith(retVal);
  2585. // get status
  2586. if (status) {
  2587. UpdateStatus(call, status, Builder, hlslOp);
  2588. }
  2589. }
  2590. Value *TranslateSample(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2591. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2592. hlsl::OP *hlslOP = &helper.hlslOP;
  2593. SampleHelper sampleHelper(CI, opcode, pObjHelper);
  2594. if (sampleHelper.opcode == DXIL::OpCode::NumOpCodes) {
  2595. Translated = false;
  2596. return nullptr;
  2597. }
  2598. Type *Ty = CI->getType();
  2599. Function *F = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
  2600. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  2601. switch (opcode) {
  2602. case OP::OpCode::Sample: {
  2603. Value *sampleArgs[] = {
  2604. opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
  2605. // Coord.
  2606. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2607. sampleHelper.coord[3],
  2608. // Offset.
  2609. sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
  2610. // Clamp.
  2611. sampleHelper.clamp};
  2612. GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
  2613. } break;
  2614. case OP::OpCode::SampleLevel: {
  2615. Value *sampleArgs[] = {
  2616. opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
  2617. // Coord.
  2618. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2619. sampleHelper.coord[3],
  2620. // Offset.
  2621. sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
  2622. // LOD.
  2623. sampleHelper.lod};
  2624. GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
  2625. } break;
  2626. case OP::OpCode::SampleGrad: {
  2627. Value *sampleArgs[] = {
  2628. opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
  2629. // Coord.
  2630. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2631. sampleHelper.coord[3],
  2632. // Offset.
  2633. sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
  2634. // Ddx.
  2635. sampleHelper.ddx[0], sampleHelper.ddx[1], sampleHelper.ddx[2],
  2636. // Ddy.
  2637. sampleHelper.ddy[0], sampleHelper.ddy[1], sampleHelper.ddy[2],
  2638. // Clamp.
  2639. sampleHelper.clamp};
  2640. GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
  2641. } break;
  2642. case OP::OpCode::SampleBias: {
  2643. Value *sampleArgs[] = {
  2644. opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
  2645. // Coord.
  2646. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2647. sampleHelper.coord[3],
  2648. // Offset.
  2649. sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
  2650. // Bias.
  2651. sampleHelper.bias,
  2652. // Clamp.
  2653. sampleHelper.clamp};
  2654. GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
  2655. } break;
  2656. case OP::OpCode::SampleCmp: {
  2657. Value *sampleArgs[] = {
  2658. opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
  2659. // Coord.
  2660. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2661. sampleHelper.coord[3],
  2662. // Offset.
  2663. sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
  2664. // CmpVal.
  2665. sampleHelper.compareValue,
  2666. // Clamp.
  2667. sampleHelper.clamp};
  2668. GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
  2669. } break;
  2670. case OP::OpCode::SampleCmpLevelZero:
  2671. default: {
  2672. DXASSERT(opcode == OP::OpCode::SampleCmpLevelZero, "invalid sample opcode");
  2673. Value *sampleArgs[] = {
  2674. opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
  2675. // Coord.
  2676. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2677. sampleHelper.coord[3],
  2678. // Offset.
  2679. sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
  2680. // CmpVal.
  2681. sampleHelper.compareValue};
  2682. GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
  2683. } break;
  2684. }
  2685. // CI is replaced in GenerateDxilSample.
  2686. return nullptr;
  2687. }
  2688. // Gather intrinsics.
  2689. struct GatherHelper {
  2690. enum class GatherChannel {
  2691. GatherAll,
  2692. GatherRed,
  2693. GatherGreen,
  2694. GatherBlue,
  2695. GatherAlpha,
  2696. };
  2697. GatherHelper(CallInst *CI, OP::OpCode op, HLObjectOperationLowerHelper *pObjHelper,
  2698. GatherHelper::GatherChannel ch);
  2699. OP::OpCode opcode;
  2700. Value *texHandle;
  2701. Value *samplerHandle;
  2702. static const unsigned kMaxCoordDimensions = 4;
  2703. Value *coord[kMaxCoordDimensions];
  2704. unsigned channel;
  2705. Value *special; // For CompareValue, Bias, LOD.
  2706. // Optional.
  2707. static const unsigned kMaxOffsetDimensions = 2;
  2708. Value *offset[kMaxOffsetDimensions];
  2709. // For the overload send different offset for each sample.
  2710. // Only save 3 sampleOffsets because use offset for normal overload as first
  2711. // sample offset.
  2712. static const unsigned kSampleOffsetDimensions = 3;
  2713. Value *sampleOffsets[kSampleOffsetDimensions][kMaxOffsetDimensions];
  2714. Value *status;
  2715. bool hasSampleOffsets;
  2716. unsigned maxHLOperandRead = 0;
  2717. Value *ReadHLOperand(CallInst *CI, unsigned opIdx) {
  2718. if (CI->getNumArgOperands() > opIdx) {
  2719. maxHLOperandRead = std::max(maxHLOperandRead, opIdx);
  2720. return CI->getArgOperand(opIdx);
  2721. }
  2722. return nullptr;
  2723. }
  2724. void TranslateCoord(CallInst *CI, unsigned coordIdx,
  2725. unsigned coordDimensions) {
  2726. Value *coordArg = ReadHLOperand(CI, coordIdx);
  2727. DXASSERT_NOMSG(coordArg);
  2728. DXASSERT(coordArg->getType()->getVectorNumElements() == coordDimensions,
  2729. "otherwise, HL coordinate dimensions mismatch");
  2730. IRBuilder<> Builder(CI);
  2731. for (unsigned i = 0; i < coordDimensions; i++)
  2732. coord[i] = Builder.CreateExtractElement(coordArg, i);
  2733. Value *undefF = UndefValue::get(Type::getFloatTy(CI->getContext()));
  2734. for (unsigned i = coordDimensions; i < kMaxCoordDimensions; i++)
  2735. coord[i] = undefF;
  2736. }
  2737. void SetStatus(CallInst *CI, unsigned statusIdx) {
  2738. status = ReadHLOperand(CI, statusIdx);
  2739. }
  2740. void TranslateOffset(CallInst *CI, unsigned offsetIdx,
  2741. unsigned offsetDimensions) {
  2742. IntegerType *i32Ty = Type::getInt32Ty(CI->getContext());
  2743. if (Value *offsetArg = ReadHLOperand(CI, offsetIdx)) {
  2744. DXASSERT(offsetArg->getType()->getVectorNumElements() == offsetDimensions,
  2745. "otherwise, HL coordinate dimensions mismatch");
  2746. IRBuilder<> Builder(CI);
  2747. for (unsigned i = 0; i < offsetDimensions; i++)
  2748. offset[i] = Builder.CreateExtractElement(offsetArg, i);
  2749. } else {
  2750. // Use zeros for offsets when not specified, not undef.
  2751. Value *zero = ConstantInt::get(i32Ty, (uint64_t)0);
  2752. for (unsigned i = 0; i < offsetDimensions; i++)
  2753. offset[i] = zero;
  2754. }
  2755. // Use undef for components that should not be used for this resource dim.
  2756. Value *undefI = UndefValue::get(i32Ty);
  2757. for (unsigned i = offsetDimensions; i < kMaxOffsetDimensions; i++)
  2758. offset[i] = undefI;
  2759. }
  2760. void TranslateSampleOffset(CallInst *CI, unsigned offsetIdx,
  2761. unsigned offsetDimensions) {
  2762. Value *undefI = UndefValue::get(Type::getInt32Ty(CI->getContext()));
  2763. if (CI->getNumArgOperands() >= (offsetIdx + kSampleOffsetDimensions)) {
  2764. hasSampleOffsets = true;
  2765. IRBuilder<> Builder(CI);
  2766. for (unsigned ch = 0; ch < kSampleOffsetDimensions; ch++) {
  2767. Value *offsetArg = ReadHLOperand(CI, offsetIdx + ch);
  2768. for (unsigned i = 0; i < offsetDimensions; i++)
  2769. sampleOffsets[ch][i] = Builder.CreateExtractElement(offsetArg, i);
  2770. for (unsigned i = offsetDimensions; i < kMaxOffsetDimensions; i++)
  2771. sampleOffsets[ch][i] = undefI;
  2772. }
  2773. }
  2774. }
  2775. // Update the offset args for gather with sample offset at sampleIdx.
  2776. void UpdateOffsetInGatherArgs(MutableArrayRef<Value *> gatherArgs,
  2777. unsigned sampleIdx) {
  2778. unsigned offsetBase = DXIL::OperandIndex::kTextureGatherOffset0OpIdx;
  2779. for (unsigned i = 0; i < kMaxOffsetDimensions; i++)
  2780. // -1 because offset for sample 0 is in GatherHelper::offset.
  2781. gatherArgs[offsetBase + i] = sampleOffsets[sampleIdx - 1][i];
  2782. }
  2783. };
  2784. GatherHelper::GatherHelper(
  2785. CallInst *CI, OP::OpCode op, HLObjectOperationLowerHelper *pObjHelper,
  2786. GatherHelper::GatherChannel ch)
  2787. : opcode(op), special(nullptr), hasSampleOffsets(false) {
  2788. switch (ch) {
  2789. case GatherChannel::GatherAll:
  2790. channel = 0;
  2791. break;
  2792. case GatherChannel::GatherRed:
  2793. channel = 0;
  2794. break;
  2795. case GatherChannel::GatherGreen:
  2796. channel = 1;
  2797. break;
  2798. case GatherChannel::GatherBlue:
  2799. channel = 2;
  2800. break;
  2801. case GatherChannel::GatherAlpha:
  2802. channel = 3;
  2803. break;
  2804. }
  2805. IRBuilder<> Builder(CI);
  2806. texHandle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  2807. samplerHandle = CI->getArgOperand(HLOperandIndex::kSampleSamplerArgIndex);
  2808. DXIL::ResourceKind RK = pObjHelper->GetRK(texHandle);
  2809. if (RK == DXIL::ResourceKind::Invalid) {
  2810. opcode = DXIL::OpCode::NumOpCodes;
  2811. return;
  2812. }
  2813. unsigned coordSize = DxilResource::GetNumCoords(RK);
  2814. unsigned offsetSize = DxilResource::GetNumOffsets(RK);
  2815. bool cube = RK == DXIL::ResourceKind::TextureCube ||
  2816. RK == DXIL::ResourceKind::TextureCubeArray;
  2817. const unsigned kCoordArgIdx = HLOperandIndex::kSampleCoordArgIndex;
  2818. TranslateCoord(CI, kCoordArgIdx, coordSize);
  2819. switch (op) {
  2820. case OP::OpCode::TextureGather: {
  2821. unsigned statusIdx;
  2822. if (cube) {
  2823. TranslateOffset(CI, HLOperandIndex::kInvalidIdx, offsetSize);
  2824. statusIdx = HLOperandIndex::kGatherCubeStatusArgIndex;
  2825. } else {
  2826. TranslateOffset(CI, HLOperandIndex::kGatherOffsetArgIndex, offsetSize);
  2827. // Gather all don't have sample offset version overload.
  2828. if (ch != GatherChannel::GatherAll)
  2829. TranslateSampleOffset(CI, HLOperandIndex::kGatherSampleOffsetArgIndex,
  2830. offsetSize);
  2831. statusIdx =
  2832. hasSampleOffsets ? HLOperandIndex::kGatherStatusWithSampleOffsetArgIndex
  2833. : HLOperandIndex::kGatherStatusArgIndex;
  2834. }
  2835. SetStatus(CI, statusIdx);
  2836. } break;
  2837. case OP::OpCode::TextureGatherCmp: {
  2838. special = ReadHLOperand(CI, HLOperandIndex::kGatherCmpCmpValArgIndex);
  2839. unsigned statusIdx;
  2840. if (cube) {
  2841. TranslateOffset(CI, HLOperandIndex::kInvalidIdx, offsetSize);
  2842. statusIdx = HLOperandIndex::kGatherCmpCubeStatusArgIndex;
  2843. } else {
  2844. TranslateOffset(CI, HLOperandIndex::kGatherCmpOffsetArgIndex, offsetSize);
  2845. // Gather all don't have sample offset version overload.
  2846. if (ch != GatherChannel::GatherAll)
  2847. TranslateSampleOffset(CI, HLOperandIndex::kGatherCmpSampleOffsetArgIndex,
  2848. offsetSize);
  2849. statusIdx =
  2850. hasSampleOffsets
  2851. ? HLOperandIndex::kGatherCmpStatusWithSampleOffsetArgIndex
  2852. : HLOperandIndex::kGatherCmpStatusArgIndex;
  2853. }
  2854. SetStatus(CI, statusIdx);
  2855. } break;
  2856. default:
  2857. DXASSERT(0, "invalid opcode for Gather");
  2858. break;
  2859. }
  2860. DXASSERT(maxHLOperandRead == CI->getNumArgOperands() - 1,
  2861. "otherwise, unused HL arguments for Sample op");
  2862. }
  2863. void GenerateDxilGather(CallInst *CI, Function *F,
  2864. MutableArrayRef<Value *> gatherArgs,
  2865. GatherHelper &helper, hlsl::OP *hlslOp) {
  2866. IRBuilder<> Builder(CI);
  2867. CallInst *call = Builder.CreateCall(F, gatherArgs);
  2868. dxilutil::MigrateDebugValue(CI, call);
  2869. Value *retVal;
  2870. if (!helper.hasSampleOffsets) {
  2871. // extract value part
  2872. retVal = ScalarizeResRet(CI->getType(), call, Builder);
  2873. } else {
  2874. retVal = UndefValue::get(CI->getType());
  2875. Value *elt = Builder.CreateExtractValue(call, (uint64_t)0);
  2876. retVal = Builder.CreateInsertElement(retVal, elt, (uint64_t)0);
  2877. helper.UpdateOffsetInGatherArgs(gatherArgs, /*sampleIdx*/ 1);
  2878. CallInst *callY = Builder.CreateCall(F, gatherArgs);
  2879. elt = Builder.CreateExtractValue(callY, (uint64_t)1);
  2880. retVal = Builder.CreateInsertElement(retVal, elt, 1);
  2881. helper.UpdateOffsetInGatherArgs(gatherArgs, /*sampleIdx*/ 2);
  2882. CallInst *callZ = Builder.CreateCall(F, gatherArgs);
  2883. elt = Builder.CreateExtractValue(callZ, (uint64_t)2);
  2884. retVal = Builder.CreateInsertElement(retVal, elt, 2);
  2885. helper.UpdateOffsetInGatherArgs(gatherArgs, /*sampleIdx*/ 3);
  2886. CallInst *callW = Builder.CreateCall(F, gatherArgs);
  2887. elt = Builder.CreateExtractValue(callW, (uint64_t)3);
  2888. retVal = Builder.CreateInsertElement(retVal, elt, 3);
  2889. // TODO: UpdateStatus for each gather call.
  2890. }
  2891. // Replace ret val.
  2892. CI->replaceAllUsesWith(retVal);
  2893. // Get status
  2894. if (helper.status) {
  2895. UpdateStatus(call, helper.status, Builder, hlslOp);
  2896. }
  2897. }
  2898. Value *TranslateGather(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2899. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2900. hlsl::OP *hlslOP = &helper.hlslOP;
  2901. GatherHelper::GatherChannel ch = GatherHelper::GatherChannel::GatherAll;
  2902. switch (IOP) {
  2903. case IntrinsicOp::MOP_Gather:
  2904. case IntrinsicOp::MOP_GatherCmp:
  2905. ch = GatherHelper::GatherChannel::GatherAll;
  2906. break;
  2907. case IntrinsicOp::MOP_GatherRed:
  2908. case IntrinsicOp::MOP_GatherCmpRed:
  2909. ch = GatherHelper::GatherChannel::GatherRed;
  2910. break;
  2911. case IntrinsicOp::MOP_GatherGreen:
  2912. case IntrinsicOp::MOP_GatherCmpGreen:
  2913. ch = GatherHelper::GatherChannel::GatherGreen;
  2914. break;
  2915. case IntrinsicOp::MOP_GatherBlue:
  2916. case IntrinsicOp::MOP_GatherCmpBlue:
  2917. ch = GatherHelper::GatherChannel::GatherBlue;
  2918. break;
  2919. case IntrinsicOp::MOP_GatherAlpha:
  2920. case IntrinsicOp::MOP_GatherCmpAlpha:
  2921. ch = GatherHelper::GatherChannel::GatherAlpha;
  2922. break;
  2923. default:
  2924. DXASSERT(0, "invalid gather intrinsic");
  2925. break;
  2926. }
  2927. GatherHelper gatherHelper(CI, opcode, pObjHelper, ch);
  2928. if (gatherHelper.opcode == DXIL::OpCode::NumOpCodes) {
  2929. Translated = false;
  2930. return nullptr;
  2931. }
  2932. Type *Ty = CI->getType();
  2933. Function *F = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
  2934. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  2935. Value *channelArg = hlslOP->GetU32Const(gatherHelper.channel);
  2936. switch (opcode) {
  2937. case OP::OpCode::TextureGather: {
  2938. Value *gatherArgs[] = {
  2939. opArg, gatherHelper.texHandle, gatherHelper.samplerHandle,
  2940. // Coord.
  2941. gatherHelper.coord[0], gatherHelper.coord[1], gatherHelper.coord[2],
  2942. gatherHelper.coord[3],
  2943. // Offset.
  2944. gatherHelper.offset[0], gatherHelper.offset[1],
  2945. // Channel.
  2946. channelArg};
  2947. GenerateDxilGather(CI, F, gatherArgs, gatherHelper, hlslOP);
  2948. } break;
  2949. case OP::OpCode::TextureGatherCmp: {
  2950. Value *gatherArgs[] = {
  2951. opArg, gatherHelper.texHandle, gatherHelper.samplerHandle,
  2952. // Coord.
  2953. gatherHelper.coord[0], gatherHelper.coord[1], gatherHelper.coord[2],
  2954. gatherHelper.coord[3],
  2955. // Offset.
  2956. gatherHelper.offset[0], gatherHelper.offset[1],
  2957. // Channel.
  2958. channelArg,
  2959. // CmpVal.
  2960. gatherHelper.special};
  2961. GenerateDxilGather(CI, F, gatherArgs, gatherHelper, hlslOP);
  2962. } break;
  2963. default:
  2964. DXASSERT(0, "invalid opcode for Gather");
  2965. break;
  2966. }
  2967. // CI is replaced in GenerateDxilGather.
  2968. return nullptr;
  2969. }
  2970. static Value* TranslateWriteSamplerFeedback(CallInst* CI, IntrinsicOp IOP, OP::OpCode opcode,
  2971. HLOperationLowerHelper& helper,
  2972. HLObjectOperationLowerHelper* pObjHelper,
  2973. bool& Translated) {
  2974. hlsl::OP *hlslOP = &helper.hlslOP;
  2975. SampleHelper sampleHelper(CI, opcode, pObjHelper);
  2976. if (sampleHelper.opcode == DXIL::OpCode::NumOpCodes) {
  2977. Translated = false;
  2978. return nullptr;
  2979. }
  2980. Type *Ty = CI->getType();
  2981. Function *F = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
  2982. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  2983. IRBuilder<> Builder(CI);
  2984. switch (opcode) {
  2985. case OP::OpCode::WriteSamplerFeedback: {
  2986. Value *samplerFeedbackArgs[] = {
  2987. opArg, sampleHelper.texHandle, sampleHelper.sampledTexHandle, sampleHelper.samplerHandle,
  2988. // Coord.
  2989. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2990. sampleHelper.coord[3],
  2991. // Clamp.
  2992. sampleHelper.clamp};
  2993. return Builder.CreateCall(F, samplerFeedbackArgs);
  2994. } break;
  2995. case OP::OpCode::WriteSamplerFeedbackBias: {
  2996. Value *samplerFeedbackArgs[] = {
  2997. opArg, sampleHelper.texHandle, sampleHelper.sampledTexHandle, sampleHelper.samplerHandle,
  2998. // Coord.
  2999. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  3000. sampleHelper.coord[3],
  3001. // Bias.
  3002. sampleHelper.bias,
  3003. // Clamp.
  3004. sampleHelper.clamp};
  3005. return Builder.CreateCall(F, samplerFeedbackArgs);
  3006. } break;
  3007. case OP::OpCode::WriteSamplerFeedbackGrad: {
  3008. Value *samplerFeedbackArgs[] = {
  3009. opArg, sampleHelper.texHandle, sampleHelper.sampledTexHandle, sampleHelper.samplerHandle,
  3010. // Coord.
  3011. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  3012. sampleHelper.coord[3],
  3013. // Ddx.
  3014. sampleHelper.ddx[0], sampleHelper.ddx[1], sampleHelper.ddx[2],
  3015. // Ddy.
  3016. sampleHelper.ddy[0], sampleHelper.ddy[1], sampleHelper.ddy[2],
  3017. // Clamp.
  3018. sampleHelper.clamp};
  3019. return Builder.CreateCall(F, samplerFeedbackArgs);
  3020. } break;
  3021. case OP::OpCode::WriteSamplerFeedbackLevel: {
  3022. Value *samplerFeedbackArgs[] = {
  3023. opArg, sampleHelper.texHandle, sampleHelper.sampledTexHandle, sampleHelper.samplerHandle,
  3024. // Coord.
  3025. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  3026. sampleHelper.coord[3],
  3027. // LOD.
  3028. sampleHelper.lod};
  3029. return Builder.CreateCall(F, samplerFeedbackArgs);
  3030. } break;
  3031. default:
  3032. DXASSERT(false, "otherwise, unknown SamplerFeedback Op");
  3033. break;
  3034. }
  3035. return nullptr;
  3036. }
  3037. // Load/Store intrinsics.
  3038. struct ResLoadHelper {
  3039. ResLoadHelper(CallInst *CI, DxilResource::Kind RK, DxilResourceBase::Class RC,
  3040. Value *h, IntrinsicOp IOP, bool bForSubscript=false);
  3041. ResLoadHelper(CallInst *CI, DxilResource::Kind RK, DxilResourceBase::Class RC,
  3042. Value *h, Value *mip);
  3043. // For double subscript.
  3044. ResLoadHelper(Instruction *ldInst, Value *h, Value *idx, Value *mip)
  3045. : opcode(OP::OpCode::TextureLoad),
  3046. intrinsicOpCode(IntrinsicOp::Num_Intrinsics), handle(h), retVal(ldInst),
  3047. addr(idx), offset(nullptr), status(nullptr), mipLevel(mip) {}
  3048. OP::OpCode opcode;
  3049. IntrinsicOp intrinsicOpCode;
  3050. unsigned dxilMajor;
  3051. unsigned dxilMinor;
  3052. Value *handle;
  3053. Value *retVal;
  3054. Value *addr;
  3055. Value *offset;
  3056. Value *status;
  3057. Value *mipLevel;
  3058. };
  3059. ResLoadHelper::ResLoadHelper(CallInst *CI, DxilResource::Kind RK,
  3060. DxilResourceBase::Class RC, Value *hdl, IntrinsicOp IOP, bool bForSubscript)
  3061. : intrinsicOpCode(IOP), handle(hdl), offset(nullptr), status(nullptr) {
  3062. switch (RK) {
  3063. case DxilResource::Kind::RawBuffer:
  3064. case DxilResource::Kind::StructuredBuffer:
  3065. case DxilResource::Kind::StructuredBufferWithCounter:
  3066. opcode = OP::OpCode::RawBufferLoad;
  3067. break;
  3068. case DxilResource::Kind::TypedBuffer:
  3069. opcode = OP::OpCode::BufferLoad;
  3070. break;
  3071. case DxilResource::Kind::Invalid:
  3072. DXASSERT(0, "invalid resource kind");
  3073. break;
  3074. default:
  3075. opcode = OP::OpCode::TextureLoad;
  3076. break;
  3077. }
  3078. retVal = CI;
  3079. const unsigned kAddrIdx = HLOperandIndex::kBufLoadAddrOpIdx;
  3080. addr = CI->getArgOperand(kAddrIdx);
  3081. unsigned argc = CI->getNumArgOperands();
  3082. if (opcode == OP::OpCode::TextureLoad) {
  3083. // mip at last channel
  3084. unsigned coordSize = DxilResource::GetNumCoords(RK);
  3085. if (RC == DxilResourceBase::Class::SRV) {
  3086. if (bForSubscript) {
  3087. // Use 0 when access by [].
  3088. mipLevel = IRBuilder<>(CI).getInt32(0);
  3089. } else {
  3090. if (coordSize == 1 && !addr->getType()->isVectorTy()) {
  3091. // Use addr when access by Load.
  3092. mipLevel = addr;
  3093. } else {
  3094. mipLevel = IRBuilder<>(CI).CreateExtractElement(addr, coordSize);
  3095. }
  3096. }
  3097. } else {
  3098. // Set mip level to undef for UAV.
  3099. mipLevel = UndefValue::get(Type::getInt32Ty(addr->getContext()));
  3100. }
  3101. if (RC == DxilResourceBase::Class::SRV) {
  3102. unsigned offsetIdx = HLOperandIndex::kTexLoadOffsetOpIdx;
  3103. unsigned statusIdx = HLOperandIndex::kTexLoadStatusOpIdx;
  3104. if (RK == DxilResource::Kind::Texture2DMS ||
  3105. RK == DxilResource::Kind::Texture2DMSArray) {
  3106. offsetIdx = HLOperandIndex::kTex2DMSLoadOffsetOpIdx;
  3107. statusIdx = HLOperandIndex::kTex2DMSLoadStatusOpIdx;
  3108. mipLevel =
  3109. CI->getArgOperand(HLOperandIndex::kTex2DMSLoadSampleIdxOpIdx);
  3110. }
  3111. if (argc > offsetIdx)
  3112. offset = CI->getArgOperand(offsetIdx);
  3113. if (argc > statusIdx)
  3114. status = CI->getArgOperand(statusIdx);
  3115. } else {
  3116. const unsigned kStatusIdx = HLOperandIndex::kRWTexLoadStatusOpIdx;
  3117. if (argc > kStatusIdx)
  3118. status = CI->getArgOperand(kStatusIdx);
  3119. }
  3120. } else {
  3121. const unsigned kStatusIdx = HLOperandIndex::kBufLoadStatusOpIdx;
  3122. if (argc > kStatusIdx)
  3123. status = CI->getArgOperand(kStatusIdx);
  3124. }
  3125. }
  3126. ResLoadHelper::ResLoadHelper(CallInst *CI, DxilResource::Kind RK,
  3127. DxilResourceBase::Class RC, Value *hdl, Value *mip)
  3128. : handle(hdl), offset(nullptr), status(nullptr) {
  3129. DXASSERT(RK != DxilResource::Kind::RawBuffer &&
  3130. RK != DxilResource::Kind::TypedBuffer &&
  3131. RK != DxilResource::Kind::Invalid,
  3132. "invalid resource kind");
  3133. opcode = OP::OpCode::TextureLoad;
  3134. retVal = CI;
  3135. mipLevel = mip;
  3136. const unsigned kAddrIdx = HLOperandIndex::kMipLoadAddrOpIdx;
  3137. addr = CI->getArgOperand(kAddrIdx);
  3138. unsigned argc = CI->getNumArgOperands();
  3139. const unsigned kOffsetIdx = HLOperandIndex::kMipLoadOffsetOpIdx;
  3140. const unsigned kStatusIdx = HLOperandIndex::kMipLoadStatusOpIdx;
  3141. if (argc > kOffsetIdx)
  3142. offset = CI->getArgOperand(kOffsetIdx);
  3143. if (argc > kStatusIdx)
  3144. status = CI->getArgOperand(kStatusIdx);
  3145. }
  3146. void TranslateStructBufSubscript(CallInst *CI, Value *handle, Value *status,
  3147. hlsl::OP *OP, HLResource::Kind RK, const DataLayout &DL);
  3148. // Create { v0, v1 } from { v0.lo, v0.hi, v1.lo, v1.hi }
  3149. void Make64bitResultForLoad(Type *EltTy, ArrayRef<Value *> resultElts32,
  3150. unsigned size, MutableArrayRef<Value *> resultElts,
  3151. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  3152. Type *i64Ty = Builder.getInt64Ty();
  3153. Type *doubleTy = Builder.getDoubleTy();
  3154. if (EltTy == doubleTy) {
  3155. Function *makeDouble =
  3156. hlslOP->GetOpFunc(DXIL::OpCode::MakeDouble, doubleTy);
  3157. Value *makeDoubleOpArg =
  3158. Builder.getInt32((unsigned)DXIL::OpCode::MakeDouble);
  3159. for (unsigned i = 0; i < size; i++) {
  3160. Value *lo = resultElts32[2 * i];
  3161. Value *hi = resultElts32[2 * i + 1];
  3162. Value *V = Builder.CreateCall(makeDouble, {makeDoubleOpArg, lo, hi});
  3163. resultElts[i] = V;
  3164. }
  3165. } else {
  3166. for (unsigned i = 0; i < size; i++) {
  3167. Value *lo = resultElts32[2 * i];
  3168. Value *hi = resultElts32[2 * i + 1];
  3169. lo = Builder.CreateZExt(lo, i64Ty);
  3170. hi = Builder.CreateZExt(hi, i64Ty);
  3171. hi = Builder.CreateShl(hi, 32);
  3172. resultElts[i] = Builder.CreateOr(lo, hi);
  3173. }
  3174. }
  3175. }
  3176. static Constant *GetRawBufferMaskForETy(Type *Ty, unsigned NumComponents, hlsl::OP *OP) {
  3177. unsigned mask = 0;
  3178. switch (NumComponents) {
  3179. case 0:
  3180. break;
  3181. case 1:
  3182. mask = DXIL::kCompMask_X;
  3183. break;
  3184. case 2:
  3185. mask = DXIL::kCompMask_X | DXIL::kCompMask_Y;
  3186. break;
  3187. case 3:
  3188. mask = DXIL::kCompMask_X | DXIL::kCompMask_Y | DXIL::kCompMask_Z;
  3189. break;
  3190. case 4:
  3191. mask = DXIL::kCompMask_All;
  3192. break;
  3193. default:
  3194. DXASSERT(false, "Cannot load more than 2 components for 64bit types.");
  3195. }
  3196. return OP->GetI8Const(mask);
  3197. }
  3198. Value *GenerateStructBufLd(Value *handle, Value *bufIdx, Value *offset,
  3199. Value *status, Type *EltTy,
  3200. MutableArrayRef<Value *> resultElts, hlsl::OP *OP,
  3201. IRBuilder<> &Builder, unsigned NumComponents, Constant *alignment);
  3202. void TranslateLoad(ResLoadHelper &helper, HLResource::Kind RK,
  3203. IRBuilder<> &Builder, hlsl::OP *OP, const DataLayout &DL) {
  3204. Type *Ty = helper.retVal->getType();
  3205. if (Ty->isPointerTy()) {
  3206. DXASSERT(!DxilResource::IsAnyTexture(RK), "Textures should not be treated as structured buffers.");
  3207. TranslateStructBufSubscript(cast<CallInst>(helper.retVal), helper.handle,
  3208. helper.status, OP, RK, DL);
  3209. return;
  3210. }
  3211. OP::OpCode opcode = helper.opcode;
  3212. Type *i32Ty = Builder.getInt32Ty();
  3213. Type *i64Ty = Builder.getInt64Ty();
  3214. Type *doubleTy = Builder.getDoubleTy();
  3215. Type *EltTy = Ty->getScalarType();
  3216. // If RawBuffer load of 64-bit value, don't set alignment to 8,
  3217. // since buffer alignment isn't known to be anything over 4.
  3218. unsigned alignValue = OP->GetAllocSizeForType(EltTy);
  3219. if (RK == HLResource::Kind::RawBuffer && alignValue > 4)
  3220. alignValue = 4;
  3221. Constant *Alignment = OP->GetI32Const(alignValue);
  3222. unsigned numComponents = 1;
  3223. if (Ty->isVectorTy()) {
  3224. numComponents = Ty->getVectorNumElements();
  3225. }
  3226. if (DXIL::IsStructuredBuffer(RK)) {
  3227. // Basic type case for StructuredBuffer::Load()
  3228. Value *ResultElts[4];
  3229. Value *StructBufLoad = GenerateStructBufLd(helper.handle, helper.addr, OP->GetU32Const(0),
  3230. helper.status, EltTy, ResultElts, OP, Builder, numComponents, Alignment);
  3231. dxilutil::MigrateDebugValue(helper.retVal, StructBufLoad);
  3232. Value *retValNew = ScalarizeElements(Ty, ResultElts, Builder);
  3233. helper.retVal->replaceAllUsesWith(retValNew);
  3234. helper.retVal = retValNew;
  3235. return;
  3236. }
  3237. bool isTyped = opcode == OP::OpCode::TextureLoad ||
  3238. RK == DxilResource::Kind::TypedBuffer;
  3239. bool is64 = EltTy == i64Ty || EltTy == doubleTy;
  3240. if (is64 && isTyped) {
  3241. EltTy = i32Ty;
  3242. }
  3243. bool isBool = EltTy->isIntegerTy(1);
  3244. if (isBool) {
  3245. // Value will be loaded in its memory representation.
  3246. EltTy = i32Ty;
  3247. if (Ty->isVectorTy()) Ty = VectorType::get(EltTy, numComponents);
  3248. }
  3249. Function *F = OP->GetOpFunc(opcode, EltTy);
  3250. llvm::Constant *opArg = OP->GetU32Const((unsigned)opcode);
  3251. llvm::Value *undefI = llvm::UndefValue::get(i32Ty);
  3252. SmallVector<Value *, 12> loadArgs;
  3253. loadArgs.emplace_back(opArg); // opcode
  3254. loadArgs.emplace_back(helper.handle); // resource handle
  3255. if (opcode == OP::OpCode::TextureLoad) {
  3256. // set mip level
  3257. loadArgs.emplace_back(helper.mipLevel);
  3258. }
  3259. if (opcode == OP::OpCode::TextureLoad) {
  3260. // texture coord
  3261. unsigned coordSize = DxilResource::GetNumCoords(RK);
  3262. bool isVectorAddr = helper.addr->getType()->isVectorTy();
  3263. for (unsigned i = 0; i < 3; i++) {
  3264. if (i < coordSize) {
  3265. loadArgs.emplace_back(
  3266. isVectorAddr ? Builder.CreateExtractElement(helper.addr, i) : helper.addr);
  3267. }
  3268. else
  3269. loadArgs.emplace_back(undefI);
  3270. }
  3271. } else {
  3272. if (helper.addr->getType()->isVectorTy()) {
  3273. Value *scalarOffset =
  3274. Builder.CreateExtractElement(helper.addr, (uint64_t)0);
  3275. // TODO: calculate the real address based on opcode
  3276. loadArgs.emplace_back(scalarOffset); // offset
  3277. } else {
  3278. // TODO: calculate the real address based on opcode
  3279. loadArgs.emplace_back(helper.addr); // offset
  3280. }
  3281. }
  3282. // offset 0
  3283. if (opcode == OP::OpCode::TextureLoad) {
  3284. if (helper.offset && !isa<llvm::UndefValue>(helper.offset)) {
  3285. unsigned offsetSize = DxilResource::GetNumOffsets(RK);
  3286. for (unsigned i = 0; i < 3; i++) {
  3287. if (i < offsetSize)
  3288. loadArgs.emplace_back(Builder.CreateExtractElement(helper.offset, i));
  3289. else
  3290. loadArgs.emplace_back(undefI);
  3291. }
  3292. } else {
  3293. loadArgs.emplace_back(undefI);
  3294. loadArgs.emplace_back(undefI);
  3295. loadArgs.emplace_back(undefI);
  3296. }
  3297. }
  3298. // Offset 1
  3299. if (RK == DxilResource::Kind::RawBuffer) {
  3300. // elementOffset, mask, alignment
  3301. loadArgs.emplace_back(undefI);
  3302. Type *rtnTy = helper.retVal->getType();
  3303. loadArgs.emplace_back(GetRawBufferMaskForETy(rtnTy, numComponents, OP));
  3304. loadArgs.emplace_back(Alignment);
  3305. }
  3306. else if (RK == DxilResource::Kind::TypedBuffer) {
  3307. loadArgs.emplace_back(undefI);
  3308. }
  3309. Value *ResRet =
  3310. Builder.CreateCall(F, loadArgs, OP->GetOpCodeName(opcode));
  3311. dxilutil::MigrateDebugValue(helper.retVal, ResRet);
  3312. Value *retValNew = nullptr;
  3313. if (!is64 || !isTyped) {
  3314. retValNew = ScalarizeResRet(Ty, ResRet, Builder);
  3315. } else {
  3316. unsigned size = numComponents;
  3317. DXASSERT(size <= 2, "typed buffer only allow 4 dwords");
  3318. EltTy = Ty->getScalarType();
  3319. Value *Elts[2];
  3320. Make64bitResultForLoad(Ty->getScalarType(),
  3321. {
  3322. Builder.CreateExtractValue(ResRet, 0),
  3323. Builder.CreateExtractValue(ResRet, 1),
  3324. Builder.CreateExtractValue(ResRet, 2),
  3325. Builder.CreateExtractValue(ResRet, 3),
  3326. },
  3327. size, Elts, OP, Builder);
  3328. retValNew = ScalarizeElements(Ty, Elts, Builder);
  3329. }
  3330. if (isBool) {
  3331. // Convert result back to register representation.
  3332. retValNew = Builder.CreateICmpNE(retValNew, Constant::getNullValue(retValNew->getType()));
  3333. }
  3334. // replace
  3335. helper.retVal->replaceAllUsesWith(retValNew);
  3336. // Save new ret val.
  3337. helper.retVal = retValNew;
  3338. // get status
  3339. UpdateStatus(ResRet, helper.status, Builder, OP);
  3340. }
  3341. Value *TranslateResourceLoad(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  3342. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3343. hlsl::OP *hlslOP = &helper.hlslOP;
  3344. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  3345. IRBuilder<> Builder(CI);
  3346. DXIL::ResourceClass RC = pObjHelper->GetRC(handle);
  3347. DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
  3348. ResLoadHelper loadHelper(CI, RK, RC, handle, IOP);
  3349. TranslateLoad(loadHelper, RK, Builder, hlslOP, helper.dataLayout);
  3350. // CI is replaced in TranslateLoad.
  3351. return nullptr;
  3352. }
  3353. // Split { v0, v1 } to { v0.lo, v0.hi, v1.lo, v1.hi }
  3354. void Split64bitValForStore(Type *EltTy, ArrayRef<Value *> vals, unsigned size,
  3355. MutableArrayRef<Value *> vals32, hlsl::OP *hlslOP,
  3356. IRBuilder<> &Builder) {
  3357. Type *i32Ty = Builder.getInt32Ty();
  3358. Type *doubleTy = Builder.getDoubleTy();
  3359. Value *undefI32 = UndefValue::get(i32Ty);
  3360. if (EltTy == doubleTy) {
  3361. Function *dToU = hlslOP->GetOpFunc(DXIL::OpCode::SplitDouble, doubleTy);
  3362. Value *dToUOpArg = Builder.getInt32((unsigned)DXIL::OpCode::SplitDouble);
  3363. for (unsigned i = 0; i < size; i++) {
  3364. if (isa<UndefValue>(vals[i])) {
  3365. vals32[2 * i] = undefI32;
  3366. vals32[2 * i + 1] = undefI32;
  3367. } else {
  3368. Value *retVal = Builder.CreateCall(dToU, {dToUOpArg, vals[i]});
  3369. Value *lo = Builder.CreateExtractValue(retVal, 0);
  3370. Value *hi = Builder.CreateExtractValue(retVal, 1);
  3371. vals32[2 * i] = lo;
  3372. vals32[2 * i + 1] = hi;
  3373. }
  3374. }
  3375. } else {
  3376. for (unsigned i = 0; i < size; i++) {
  3377. if (isa<UndefValue>(vals[i])) {
  3378. vals32[2 * i] = undefI32;
  3379. vals32[2 * i + 1] = undefI32;
  3380. } else {
  3381. Value *lo = Builder.CreateTrunc(vals[i], i32Ty);
  3382. Value *hi = Builder.CreateLShr(vals[i], 32);
  3383. hi = Builder.CreateTrunc(hi, i32Ty);
  3384. vals32[2 * i] = lo;
  3385. vals32[2 * i + 1] = hi;
  3386. }
  3387. }
  3388. }
  3389. }
  3390. void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
  3391. Value *offset, IRBuilder<> &Builder, hlsl::OP *OP) {
  3392. Type *Ty = val->getType();
  3393. OP::OpCode opcode = OP::OpCode::NumOpCodes;
  3394. switch (RK) {
  3395. case DxilResource::Kind::RawBuffer:
  3396. case DxilResource::Kind::StructuredBuffer:
  3397. case DxilResource::Kind::StructuredBufferWithCounter:
  3398. opcode = OP::OpCode::RawBufferStore;
  3399. break;
  3400. case DxilResource::Kind::TypedBuffer:
  3401. opcode = OP::OpCode::BufferStore;
  3402. break;
  3403. case DxilResource::Kind::Invalid:
  3404. DXASSERT(0, "invalid resource kind");
  3405. break;
  3406. default:
  3407. opcode = OP::OpCode::TextureStore;
  3408. break;
  3409. }
  3410. bool isTyped = opcode == OP::OpCode::TextureStore ||
  3411. RK == DxilResource::Kind::TypedBuffer;
  3412. Type *i32Ty = Builder.getInt32Ty();
  3413. Type *i64Ty = Builder.getInt64Ty();
  3414. Type *doubleTy = Builder.getDoubleTy();
  3415. Type *EltTy = Ty->getScalarType();
  3416. if (EltTy->isIntegerTy(1)) {
  3417. // Since we're going to memory, convert bools to their memory representation.
  3418. EltTy = i32Ty;
  3419. if (Ty->isVectorTy()) Ty = VectorType::get(EltTy, Ty->getVectorNumElements());
  3420. else Ty = EltTy;
  3421. val = Builder.CreateZExt(val, Ty);
  3422. }
  3423. // If RawBuffer store of 64-bit value, don't set alignment to 8,
  3424. // since buffer alignment isn't known to be anything over 4.
  3425. unsigned alignValue = OP->GetAllocSizeForType(EltTy);
  3426. if (RK == HLResource::Kind::RawBuffer && alignValue > 4)
  3427. alignValue = 4;
  3428. Constant *Alignment = OP->GetI32Const(alignValue);
  3429. bool is64 = EltTy == i64Ty || EltTy == doubleTy;
  3430. if (is64 && isTyped) {
  3431. EltTy = i32Ty;
  3432. }
  3433. Function *F = OP->GetOpFunc(opcode, EltTy);
  3434. llvm::Constant *opArg = OP->GetU32Const((unsigned)opcode);
  3435. llvm::Value *undefI =
  3436. llvm::UndefValue::get(llvm::Type::getInt32Ty(Ty->getContext()));
  3437. llvm::Value *undefVal = llvm::UndefValue::get(Ty->getScalarType());
  3438. SmallVector<Value *, 13> storeArgs;
  3439. storeArgs.emplace_back(opArg); // opcode
  3440. storeArgs.emplace_back(handle); // resource handle
  3441. if (RK == DxilResource::Kind::RawBuffer ||
  3442. RK == DxilResource::Kind::TypedBuffer) {
  3443. // Offset 0
  3444. if (offset->getType()->isVectorTy()) {
  3445. Value *scalarOffset = Builder.CreateExtractElement(offset, (uint64_t)0);
  3446. storeArgs.emplace_back(scalarOffset); // offset
  3447. } else {
  3448. storeArgs.emplace_back(offset); // offset
  3449. }
  3450. // Offset 1
  3451. storeArgs.emplace_back(undefI);
  3452. } else {
  3453. // texture store
  3454. unsigned coordSize = DxilResource::GetNumCoords(RK);
  3455. // Set x first.
  3456. if (offset->getType()->isVectorTy())
  3457. storeArgs.emplace_back(Builder.CreateExtractElement(offset, (uint64_t)0));
  3458. else
  3459. storeArgs.emplace_back(offset);
  3460. for (unsigned i = 1; i < 3; i++) {
  3461. if (i < coordSize)
  3462. storeArgs.emplace_back(Builder.CreateExtractElement(offset, i));
  3463. else
  3464. storeArgs.emplace_back(undefI);
  3465. }
  3466. // TODO: support mip for texture ST
  3467. }
  3468. // values
  3469. uint8_t mask = 0;
  3470. if (Ty->isVectorTy()) {
  3471. unsigned vecSize = Ty->getVectorNumElements();
  3472. Value *emptyVal = undefVal;
  3473. if (isTyped) {
  3474. mask = DXIL::kCompMask_All;
  3475. emptyVal = Builder.CreateExtractElement(val, (uint64_t)0);
  3476. }
  3477. for (unsigned i = 0; i < 4; i++) {
  3478. if (i < vecSize) {
  3479. storeArgs.emplace_back(Builder.CreateExtractElement(val, i));
  3480. mask |= (1<<i);
  3481. } else {
  3482. storeArgs.emplace_back(emptyVal);
  3483. }
  3484. }
  3485. } else {
  3486. if (isTyped) {
  3487. mask = DXIL::kCompMask_All;
  3488. storeArgs.emplace_back(val);
  3489. storeArgs.emplace_back(val);
  3490. storeArgs.emplace_back(val);
  3491. storeArgs.emplace_back(val);
  3492. } else {
  3493. storeArgs.emplace_back(val);
  3494. storeArgs.emplace_back(undefVal);
  3495. storeArgs.emplace_back(undefVal);
  3496. storeArgs.emplace_back(undefVal);
  3497. mask = DXIL::kCompMask_X;
  3498. }
  3499. }
  3500. if (is64 && isTyped) {
  3501. unsigned size = 1;
  3502. if (Ty->isVectorTy()) {
  3503. size = Ty->getVectorNumElements();
  3504. }
  3505. DXASSERT(size <= 2, "raw/typed buffer only allow 4 dwords");
  3506. unsigned val0OpIdx = opcode == DXIL::OpCode::TextureStore
  3507. ? DXIL::OperandIndex::kTextureStoreVal0OpIdx
  3508. : DXIL::OperandIndex::kBufferStoreVal0OpIdx;
  3509. Value *V0 = storeArgs[val0OpIdx];
  3510. Value *V1 = storeArgs[val0OpIdx+1];
  3511. Value *vals32[4];
  3512. EltTy = Ty->getScalarType();
  3513. Split64bitValForStore(EltTy, {V0, V1}, size, vals32, OP, Builder);
  3514. // Fill the uninit vals.
  3515. if (size == 1) {
  3516. vals32[2] = vals32[0];
  3517. vals32[3] = vals32[1];
  3518. }
  3519. // Change valOp to 32 version.
  3520. for (unsigned i = 0; i < 4; i++) {
  3521. storeArgs[val0OpIdx + i] = vals32[i];
  3522. }
  3523. // change mask for double
  3524. if (opcode == DXIL::OpCode::RawBufferStore) {
  3525. mask = size == 1 ?
  3526. DXIL::kCompMask_X | DXIL::kCompMask_Y : DXIL::kCompMask_All;
  3527. }
  3528. }
  3529. storeArgs.emplace_back(OP->GetU8Const(mask)); // mask
  3530. if (opcode == DXIL::OpCode::RawBufferStore)
  3531. storeArgs.emplace_back(Alignment); // alignment only for raw buffer
  3532. Builder.CreateCall(F, storeArgs);
  3533. }
  3534. Value *TranslateResourceStore(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  3535. HLOperationLowerHelper &helper,
  3536. HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3537. hlsl::OP *hlslOP = &helper.hlslOP;
  3538. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  3539. IRBuilder<> Builder(CI);
  3540. DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
  3541. Value *val = CI->getArgOperand(HLOperandIndex::kStoreValOpIdx);
  3542. Value *offset = CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx);
  3543. TranslateStore(RK, handle, val, offset, Builder, hlslOP);
  3544. return nullptr;
  3545. }
  3546. }
  3547. // Atomic intrinsics.
  3548. namespace {
  3549. // Atomic intrinsics.
  3550. struct AtomicHelper {
  3551. AtomicHelper(CallInst *CI, OP::OpCode op, Value *h);
  3552. AtomicHelper(CallInst *CI, OP::OpCode op, Value *h, Value *bufIdx,
  3553. Value *baseOffset);
  3554. OP::OpCode opcode;
  3555. Value *handle;
  3556. Value *addr;
  3557. Value *offset; // Offset for structrued buffer.
  3558. Value *value;
  3559. Value *originalValue;
  3560. Value *compareValue;
  3561. };
  3562. // For MOP version of Interlocked*.
  3563. AtomicHelper::AtomicHelper(CallInst *CI, OP::OpCode op, Value *h)
  3564. : opcode(op), handle(h), offset(nullptr), originalValue(nullptr) {
  3565. addr = CI->getArgOperand(HLOperandIndex::kObjectInterlockedDestOpIndex);
  3566. if (op == OP::OpCode::AtomicCompareExchange) {
  3567. compareValue = CI->getArgOperand(
  3568. HLOperandIndex::kObjectInterlockedCmpCompareValueOpIndex);
  3569. value =
  3570. CI->getArgOperand(HLOperandIndex::kObjectInterlockedCmpValueOpIndex);
  3571. if (CI->getNumArgOperands() ==
  3572. (HLOperandIndex::kObjectInterlockedCmpOriginalValueOpIndex + 1))
  3573. originalValue = CI->getArgOperand(
  3574. HLOperandIndex::kObjectInterlockedCmpOriginalValueOpIndex);
  3575. } else {
  3576. value = CI->getArgOperand(HLOperandIndex::kObjectInterlockedValueOpIndex);
  3577. if (CI->getNumArgOperands() ==
  3578. (HLOperandIndex::kObjectInterlockedOriginalValueOpIndex + 1))
  3579. originalValue = CI->getArgOperand(
  3580. HLOperandIndex::kObjectInterlockedOriginalValueOpIndex);
  3581. }
  3582. }
  3583. // For IOP version of Interlocked*.
  3584. AtomicHelper::AtomicHelper(CallInst *CI, OP::OpCode op, Value *h, Value *bufIdx,
  3585. Value *baseOffset)
  3586. : opcode(op), handle(h), addr(bufIdx),
  3587. offset(baseOffset), originalValue(nullptr) {
  3588. if (op == OP::OpCode::AtomicCompareExchange) {
  3589. compareValue =
  3590. CI->getArgOperand(HLOperandIndex::kInterlockedCmpCompareValueOpIndex);
  3591. value = CI->getArgOperand(HLOperandIndex::kInterlockedCmpValueOpIndex);
  3592. if (CI->getNumArgOperands() ==
  3593. (HLOperandIndex::kInterlockedCmpOriginalValueOpIndex + 1))
  3594. originalValue = CI->getArgOperand(
  3595. HLOperandIndex::kInterlockedCmpOriginalValueOpIndex);
  3596. } else {
  3597. value = CI->getArgOperand(HLOperandIndex::kInterlockedValueOpIndex);
  3598. if (CI->getNumArgOperands() ==
  3599. (HLOperandIndex::kInterlockedOriginalValueOpIndex + 1))
  3600. originalValue =
  3601. CI->getArgOperand(HLOperandIndex::kInterlockedOriginalValueOpIndex);
  3602. }
  3603. }
  3604. void TranslateAtomicBinaryOperation(AtomicHelper &helper,
  3605. DXIL::AtomicBinOpCode atomicOp,
  3606. IRBuilder<> &Builder, hlsl::OP *hlslOP) {
  3607. Value *handle = helper.handle;
  3608. Value *addr = helper.addr;
  3609. Value *val = helper.value;
  3610. Type *Ty = val->getType();
  3611. Value *undefI = UndefValue::get(Type::getInt32Ty(Ty->getContext()));
  3612. Function *dxilAtomic = hlslOP->GetOpFunc(helper.opcode, Ty->getScalarType());
  3613. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(helper.opcode));
  3614. Value *atomicOpArg = hlslOP->GetU32Const(static_cast<unsigned>(atomicOp));
  3615. Value *args[] = {opArg, handle, atomicOpArg,
  3616. undefI, undefI, undefI, // coordinates
  3617. val};
  3618. // Setup coordinates.
  3619. if (addr->getType()->isVectorTy()) {
  3620. unsigned vectorNumElements = addr->getType()->getVectorNumElements();
  3621. DXASSERT(vectorNumElements <= 3, "up to 3 elements for atomic binary op");
  3622. _Analysis_assume_(vectorNumElements <= 3);
  3623. for (unsigned i = 0; i < vectorNumElements; i++) {
  3624. Value *Elt = Builder.CreateExtractElement(addr, i);
  3625. args[DXIL::OperandIndex::kAtomicBinOpCoord0OpIdx + i] = Elt;
  3626. }
  3627. } else
  3628. args[DXIL::OperandIndex::kAtomicBinOpCoord0OpIdx] = addr;
  3629. // Set offset for structured buffer.
  3630. if (helper.offset)
  3631. args[DXIL::OperandIndex::kAtomicBinOpCoord1OpIdx] = helper.offset;
  3632. Value *origVal =
  3633. Builder.CreateCall(dxilAtomic, args, hlslOP->GetAtomicOpName(atomicOp));
  3634. if (helper.originalValue) {
  3635. Builder.CreateStore(origVal, helper.originalValue);
  3636. }
  3637. }
  3638. Value *TranslateMopAtomicBinaryOperation(CallInst *CI, IntrinsicOp IOP,
  3639. OP::OpCode opcode,
  3640. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3641. hlsl::OP *hlslOP = &helper.hlslOP;
  3642. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  3643. IRBuilder<> Builder(CI);
  3644. switch (IOP) {
  3645. case IntrinsicOp::MOP_InterlockedAdd: {
  3646. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3647. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Add, Builder,
  3648. hlslOP);
  3649. } break;
  3650. case IntrinsicOp::MOP_InterlockedAnd: {
  3651. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3652. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::And, Builder,
  3653. hlslOP);
  3654. } break;
  3655. case IntrinsicOp::MOP_InterlockedExchange: {
  3656. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3657. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Exchange,
  3658. Builder, hlslOP);
  3659. } break;
  3660. case IntrinsicOp::MOP_InterlockedMax: {
  3661. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3662. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMax, Builder,
  3663. hlslOP);
  3664. } break;
  3665. case IntrinsicOp::MOP_InterlockedMin: {
  3666. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3667. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMin, Builder,
  3668. hlslOP);
  3669. } break;
  3670. case IntrinsicOp::MOP_InterlockedUMax: {
  3671. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3672. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMax, Builder,
  3673. hlslOP);
  3674. } break;
  3675. case IntrinsicOp::MOP_InterlockedUMin: {
  3676. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3677. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMin, Builder,
  3678. hlslOP);
  3679. } break;
  3680. case IntrinsicOp::MOP_InterlockedOr: {
  3681. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3682. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Or, Builder,
  3683. hlslOP);
  3684. } break;
  3685. case IntrinsicOp::MOP_InterlockedXor: {
  3686. default:
  3687. DXASSERT(IOP == IntrinsicOp::MOP_InterlockedXor,
  3688. "invalid MOP atomic intrinsic");
  3689. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3690. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Xor, Builder,
  3691. hlslOP);
  3692. } break;
  3693. }
  3694. return nullptr;
  3695. }
  3696. void TranslateAtomicCmpXChg(AtomicHelper &helper, IRBuilder<> &Builder,
  3697. hlsl::OP *hlslOP) {
  3698. Value *handle = helper.handle;
  3699. Value *addr = helper.addr;
  3700. Value *val = helper.value;
  3701. Value *cmpVal = helper.compareValue;
  3702. Type *Ty = val->getType();
  3703. Value *undefI = UndefValue::get(Type::getInt32Ty(Ty->getContext()));
  3704. Function *dxilAtomic = hlslOP->GetOpFunc(helper.opcode, Ty->getScalarType());
  3705. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(helper.opcode));
  3706. Value *args[] = {opArg, handle, undefI, undefI, undefI, // coordinates
  3707. cmpVal, val};
  3708. // Setup coordinates.
  3709. if (addr->getType()->isVectorTy()) {
  3710. unsigned vectorNumElements = addr->getType()->getVectorNumElements();
  3711. DXASSERT(vectorNumElements <= 3, "up to 3 elements in atomic op");
  3712. _Analysis_assume_(vectorNumElements <= 3);
  3713. for (unsigned i = 0; i < vectorNumElements; i++) {
  3714. Value *Elt = Builder.CreateExtractElement(addr, i);
  3715. args[DXIL::OperandIndex::kAtomicCmpExchangeCoord0OpIdx + i] = Elt;
  3716. }
  3717. } else
  3718. args[DXIL::OperandIndex::kAtomicCmpExchangeCoord0OpIdx] = addr;
  3719. // Set offset for structured buffer.
  3720. if (helper.offset)
  3721. args[DXIL::OperandIndex::kAtomicCmpExchangeCoord1OpIdx] = helper.offset;
  3722. Value *origVal = Builder.CreateCall(dxilAtomic, args);
  3723. if (helper.originalValue) {
  3724. Builder.CreateStore(origVal, helper.originalValue);
  3725. }
  3726. }
  3727. Value *TranslateMopAtomicCmpXChg(CallInst *CI, IntrinsicOp IOP,
  3728. OP::OpCode opcode,
  3729. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3730. hlsl::OP *hlslOP = &helper.hlslOP;
  3731. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  3732. IRBuilder<> Builder(CI);
  3733. AtomicHelper atomicHelper(CI, OP::OpCode::AtomicCompareExchange, handle);
  3734. TranslateAtomicCmpXChg(atomicHelper, Builder, hlslOP);
  3735. return nullptr;
  3736. }
  3737. void TranslateSharedMemAtomicBinOp(CallInst *CI, IntrinsicOp IOP, Value *addr) {
  3738. AtomicRMWInst::BinOp Op;
  3739. switch (IOP) {
  3740. case IntrinsicOp::IOP_InterlockedAdd:
  3741. Op = AtomicRMWInst::BinOp::Add;
  3742. break;
  3743. case IntrinsicOp::IOP_InterlockedAnd:
  3744. Op = AtomicRMWInst::BinOp::And;
  3745. break;
  3746. case IntrinsicOp::IOP_InterlockedExchange:
  3747. Op = AtomicRMWInst::BinOp::Xchg;
  3748. break;
  3749. case IntrinsicOp::IOP_InterlockedMax:
  3750. Op = AtomicRMWInst::BinOp::Max;
  3751. break;
  3752. case IntrinsicOp::IOP_InterlockedUMax:
  3753. Op = AtomicRMWInst::BinOp::UMax;
  3754. break;
  3755. case IntrinsicOp::IOP_InterlockedMin:
  3756. Op = AtomicRMWInst::BinOp::Min;
  3757. break;
  3758. case IntrinsicOp::IOP_InterlockedUMin:
  3759. Op = AtomicRMWInst::BinOp::UMin;
  3760. break;
  3761. case IntrinsicOp::IOP_InterlockedOr:
  3762. Op = AtomicRMWInst::BinOp::Or;
  3763. break;
  3764. case IntrinsicOp::IOP_InterlockedXor:
  3765. default:
  3766. DXASSERT(IOP == IntrinsicOp::IOP_InterlockedXor, "Invalid Intrinsic");
  3767. Op = AtomicRMWInst::BinOp::Xor;
  3768. break;
  3769. }
  3770. Value *val = CI->getArgOperand(HLOperandIndex::kInterlockedValueOpIndex);
  3771. IRBuilder<> Builder(CI);
  3772. Value *Result = Builder.CreateAtomicRMW(
  3773. Op, addr, val, AtomicOrdering::SequentiallyConsistent);
  3774. if (CI->getNumArgOperands() >
  3775. HLOperandIndex::kInterlockedOriginalValueOpIndex)
  3776. Builder.CreateStore(
  3777. Result,
  3778. CI->getArgOperand(HLOperandIndex::kInterlockedOriginalValueOpIndex));
  3779. }
  3780. static Value* SkipAddrSpaceCast(Value* Ptr) {
  3781. if (AddrSpaceCastInst *CastInst = dyn_cast<AddrSpaceCastInst>(Ptr))
  3782. return CastInst->getOperand(0);
  3783. else if (ConstantExpr *ConstExpr = dyn_cast<ConstantExpr>(Ptr)) {
  3784. if (ConstExpr->getOpcode() == Instruction::AddrSpaceCast) {
  3785. return ConstExpr->getOperand(0);
  3786. }
  3787. }
  3788. return Ptr;
  3789. }
  3790. Value *TranslateIopAtomicBinaryOperation(CallInst *CI, IntrinsicOp IOP,
  3791. DXIL::OpCode opcode,
  3792. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3793. Value *addr = CI->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex);
  3794. addr = SkipAddrSpaceCast(addr);
  3795. unsigned addressSpace = addr->getType()->getPointerAddressSpace();
  3796. if (addressSpace == DXIL::kTGSMAddrSpace)
  3797. TranslateSharedMemAtomicBinOp(CI, IOP, addr);
  3798. else {
  3799. // buffer atomic translated in TranslateSubscript.
  3800. // Do nothing here.
  3801. // Mark not translated.
  3802. Translated = false;
  3803. }
  3804. return nullptr;
  3805. }
  3806. void TranslateSharedMemAtomicCmpXChg(CallInst *CI, Value *addr) {
  3807. Value *val = CI->getArgOperand(HLOperandIndex::kInterlockedCmpValueOpIndex);
  3808. Value *cmpVal =
  3809. CI->getArgOperand(HLOperandIndex::kInterlockedCmpCompareValueOpIndex);
  3810. IRBuilder<> Builder(CI);
  3811. Value *Result = Builder.CreateAtomicCmpXchg(
  3812. addr, cmpVal, val, AtomicOrdering::SequentiallyConsistent,
  3813. AtomicOrdering::SequentiallyConsistent);
  3814. if (CI->getNumArgOperands() >
  3815. HLOperandIndex::kInterlockedCmpOriginalValueOpIndex) {
  3816. Value *originVal = Builder.CreateExtractValue(Result, 0);
  3817. Builder.CreateStore(
  3818. originVal,
  3819. CI->getArgOperand(HLOperandIndex::kInterlockedCmpOriginalValueOpIndex));
  3820. }
  3821. }
  3822. Value *TranslateIopAtomicCmpXChg(CallInst *CI, IntrinsicOp IOP,
  3823. DXIL::OpCode opcode,
  3824. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3825. Value *addr = CI->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex);
  3826. addr = SkipAddrSpaceCast(addr);
  3827. unsigned addressSpace = addr->getType()->getPointerAddressSpace();
  3828. if (addressSpace == DXIL::kTGSMAddrSpace)
  3829. TranslateSharedMemAtomicCmpXChg(CI, addr);
  3830. else {
  3831. // buffer atomic translated in TranslateSubscript.
  3832. // Do nothing here.
  3833. // Mark not translated.
  3834. Translated = false;
  3835. }
  3836. return nullptr;
  3837. }
  3838. }
  3839. // Process Tess Factor.
  3840. namespace {
  3841. // Clamp to [0.0f..1.0f], NaN->0.0f.
  3842. Value *CleanupTessFactorScale(Value *input, hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  3843. float fMin = 0;
  3844. float fMax = 1;
  3845. Type *f32Ty = input->getType()->getScalarType();
  3846. Value *minFactor = ConstantFP::get(f32Ty, fMin);
  3847. Value *maxFactor = ConstantFP::get(f32Ty, fMax);
  3848. Type *Ty = input->getType();
  3849. if (Ty->isVectorTy())
  3850. minFactor = SplatToVector(minFactor, input->getType(), Builder);
  3851. Value *temp = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, input, minFactor, hlslOP, Builder);
  3852. if (Ty->isVectorTy())
  3853. maxFactor = SplatToVector(maxFactor, input->getType(), Builder);
  3854. return TrivialDxilBinaryOperation(DXIL::OpCode::FMin, temp, maxFactor, hlslOP, Builder);
  3855. }
  3856. // Clamp to [1.0f..Inf], NaN->1.0f.
  3857. Value *CleanupTessFactor(Value *input, hlsl::OP *hlslOP, IRBuilder<> &Builder)
  3858. {
  3859. float fMin = 1.0;
  3860. Type *f32Ty = input->getType()->getScalarType();
  3861. Value *minFactor = ConstantFP::get(f32Ty, fMin);
  3862. minFactor = SplatToVector(minFactor, input->getType(), Builder);
  3863. return TrivialDxilBinaryOperation(DXIL::OpCode::FMax, input, minFactor, hlslOP, Builder);
  3864. }
  3865. // Do partitioning-specific clamping.
  3866. Value *ClampTessFactor(Value *input, DXIL::TessellatorPartitioning partitionMode,
  3867. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  3868. const unsigned kTESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR = 64;
  3869. const unsigned kTESSELLATOR_MAX_ODD_TESSELLATION_FACTOR = 63;
  3870. const unsigned kTESSELLATOR_MIN_EVEN_TESSELLATION_FACTOR = 2;
  3871. const unsigned kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR = 1;
  3872. const unsigned kTESSELLATOR_MAX_TESSELLATION_FACTOR = 64;
  3873. float fMin;
  3874. float fMax;
  3875. switch (partitionMode) {
  3876. case DXIL::TessellatorPartitioning::Integer:
  3877. fMin = kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR;
  3878. fMax = kTESSELLATOR_MAX_TESSELLATION_FACTOR;
  3879. break;
  3880. case DXIL::TessellatorPartitioning::Pow2:
  3881. fMin = kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR;
  3882. fMax = kTESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR;
  3883. break;
  3884. case DXIL::TessellatorPartitioning::FractionalOdd:
  3885. fMin = kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR;
  3886. fMax = kTESSELLATOR_MAX_ODD_TESSELLATION_FACTOR;
  3887. break;
  3888. case DXIL::TessellatorPartitioning::FractionalEven:
  3889. default:
  3890. DXASSERT(partitionMode == DXIL::TessellatorPartitioning::FractionalEven,
  3891. "invalid partition mode");
  3892. fMin = kTESSELLATOR_MIN_EVEN_TESSELLATION_FACTOR;
  3893. fMax = kTESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR;
  3894. break;
  3895. }
  3896. Type *f32Ty = input->getType()->getScalarType();
  3897. Value *minFactor = ConstantFP::get(f32Ty, fMin);
  3898. Value *maxFactor = ConstantFP::get(f32Ty, fMax);
  3899. Type *Ty = input->getType();
  3900. if (Ty->isVectorTy())
  3901. minFactor = SplatToVector(minFactor, input->getType(), Builder);
  3902. Value *temp = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, input, minFactor, hlslOP, Builder);
  3903. if (Ty->isVectorTy())
  3904. maxFactor = SplatToVector(maxFactor, input->getType(), Builder);
  3905. return TrivialDxilBinaryOperation(DXIL::OpCode::FMin, temp, maxFactor, hlslOP, Builder);
  3906. }
  3907. // round up for integer/pow2 partitioning
  3908. // note that this code assumes the inputs should be in the range [1, inf),
  3909. // which should be enforced by the clamp above.
  3910. Value *RoundUpTessFactor(Value *input, DXIL::TessellatorPartitioning partitionMode,
  3911. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  3912. switch (partitionMode) {
  3913. case DXIL::TessellatorPartitioning::Integer:
  3914. return TrivialDxilUnaryOperation(DXIL::OpCode::Round_pi, input, hlslOP, Builder);
  3915. case DXIL::TessellatorPartitioning::Pow2: {
  3916. const unsigned kExponentMask = 0x7f800000;
  3917. const unsigned kExponentLSB = 0x00800000;
  3918. const unsigned kMantissaMask = 0x007fffff;
  3919. Type *Ty = input->getType();
  3920. // (val = (asuint(val) & mantissamask) ?
  3921. // (asuint(val) & exponentmask) + exponentbump :
  3922. // asuint(val) & exponentmask;
  3923. Type *uintTy = Type::getInt32Ty(Ty->getContext());
  3924. if (Ty->isVectorTy())
  3925. uintTy = VectorType::get(uintTy, Ty->getVectorNumElements());
  3926. Value *uintVal = Builder.CreateCast(Instruction::CastOps::FPToUI, input, uintTy);
  3927. Value *mantMask = ConstantInt::get(uintTy->getScalarType(), kMantissaMask);
  3928. mantMask = SplatToVector(mantMask, uintTy, Builder);
  3929. Value *manVal = Builder.CreateAnd(uintVal, mantMask);
  3930. Value *expMask = ConstantInt::get(uintTy->getScalarType(), kExponentMask);
  3931. expMask = SplatToVector(expMask, uintTy, Builder);
  3932. Value *expVal = Builder.CreateAnd(uintVal, expMask);
  3933. Value *expLSB = ConstantInt::get(uintTy->getScalarType(), kExponentLSB);
  3934. expLSB = SplatToVector(expLSB, uintTy, Builder);
  3935. Value *newExpVal = Builder.CreateAdd(expVal, expLSB);
  3936. Value *manValNotZero = Builder.CreateICmpEQ(manVal, ConstantAggregateZero::get(uintTy));
  3937. Value *factors = Builder.CreateSelect(manValNotZero, newExpVal, expVal);
  3938. return Builder.CreateUIToFP(factors, Ty);
  3939. } break;
  3940. case DXIL::TessellatorPartitioning::FractionalEven:
  3941. case DXIL::TessellatorPartitioning::FractionalOdd:
  3942. return input;
  3943. default:
  3944. DXASSERT(0, "invalid partition mode");
  3945. return nullptr;
  3946. }
  3947. }
  3948. Value *TranslateProcessIsolineTessFactors(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  3949. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3950. hlsl::OP *hlslOP = &helper.hlslOP;
  3951. // Get partition mode
  3952. DXASSERT_NOMSG(helper.functionProps);
  3953. DXASSERT(helper.functionProps->shaderKind == ShaderModel::Kind::Hull, "must be hull shader");
  3954. DXIL::TessellatorPartitioning partition = helper.functionProps->ShaderProps.HS.partition;
  3955. IRBuilder<> Builder(CI);
  3956. Value *rawDetailFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRawDetailFactor);
  3957. rawDetailFactor = Builder.CreateExtractElement(rawDetailFactor, (uint64_t)0);
  3958. Value *rawDensityFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRawDensityFactor);
  3959. rawDensityFactor = Builder.CreateExtractElement(rawDensityFactor, (uint64_t)0);
  3960. Value *init = UndefValue::get(VectorType::get(helper.f32Ty, 2));
  3961. init = Builder.CreateInsertElement(init, rawDetailFactor, (uint64_t)0);
  3962. init = Builder.CreateInsertElement(init, rawDetailFactor, (uint64_t)1);
  3963. Value *clamped = ClampTessFactor(init, partition, hlslOP, Builder);
  3964. Value *rounded = RoundUpTessFactor(clamped, partition, hlslOP, Builder);
  3965. Value *roundedDetailFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedDetailFactor);
  3966. Value *temp = UndefValue::get(VectorType::get(helper.f32Ty, 1));
  3967. Value *roundedX = Builder.CreateExtractElement(rounded, (uint64_t)0);
  3968. temp = Builder.CreateInsertElement(temp, roundedX, (uint64_t)0);
  3969. Builder.CreateStore(temp, roundedDetailFactor);
  3970. Value *roundedDensityFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedDensityFactor);
  3971. Value *roundedY = Builder.CreateExtractElement(rounded, 1);
  3972. temp = Builder.CreateInsertElement(temp, roundedY, (uint64_t)0);
  3973. Builder.CreateStore(temp, roundedDensityFactor);
  3974. return nullptr;
  3975. }
  3976. // 3 inputs, 1 result
  3977. Value *ApplyTriTessFactorOp(Value *input, DXIL::OpCode opcode, hlsl::OP *hlslOP,
  3978. IRBuilder<> &Builder) {
  3979. Value *input0 = Builder.CreateExtractElement(input, (uint64_t)0);
  3980. Value *input1 = Builder.CreateExtractElement(input, 1);
  3981. Value *input2 = Builder.CreateExtractElement(input, 2);
  3982. if (opcode == DXIL::OpCode::FMax || opcode == DXIL::OpCode::FMin) {
  3983. Value *temp =
  3984. TrivialDxilBinaryOperation(opcode, input0, input1, hlslOP, Builder);
  3985. Value *combined =
  3986. TrivialDxilBinaryOperation(opcode, temp, input2, hlslOP, Builder);
  3987. return combined;
  3988. } else {
  3989. // Avg.
  3990. Value *temp = Builder.CreateFAdd(input0, input1);
  3991. Value *combined = Builder.CreateFAdd(temp, input2);
  3992. Value *rcp = ConstantFP::get(input0->getType(), 1.0 / 3.0);
  3993. combined = Builder.CreateFMul(combined, rcp);
  3994. return combined;
  3995. }
  3996. }
  3997. // 4 inputs, 1 result
  3998. Value *ApplyQuadTessFactorOp(Value *input, DXIL::OpCode opcode,
  3999. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  4000. Value *input0 = Builder.CreateExtractElement(input, (uint64_t)0);
  4001. Value *input1 = Builder.CreateExtractElement(input, 1);
  4002. Value *input2 = Builder.CreateExtractElement(input, 2);
  4003. Value *input3 = Builder.CreateExtractElement(input, 3);
  4004. if (opcode == DXIL::OpCode::FMax || opcode == DXIL::OpCode::FMin) {
  4005. Value *temp0 =
  4006. TrivialDxilBinaryOperation(opcode, input0, input1, hlslOP, Builder);
  4007. Value *temp1 =
  4008. TrivialDxilBinaryOperation(opcode, input2, input3, hlslOP, Builder);
  4009. Value *combined =
  4010. TrivialDxilBinaryOperation(opcode, temp0, temp1, hlslOP, Builder);
  4011. return combined;
  4012. } else {
  4013. // Avg.
  4014. Value *temp0 = Builder.CreateFAdd(input0, input1);
  4015. Value *temp1 = Builder.CreateFAdd(input2, input3);
  4016. Value *combined = Builder.CreateFAdd(temp0, temp1);
  4017. Value *rcp = ConstantFP::get(input0->getType(), 0.25);
  4018. combined = Builder.CreateFMul(combined, rcp);
  4019. return combined;
  4020. }
  4021. }
  4022. // 4 inputs, 2 result
  4023. Value *Apply2DQuadTessFactorOp(Value *input, DXIL::OpCode opcode,
  4024. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  4025. Value *input0 = Builder.CreateExtractElement(input, (uint64_t)0);
  4026. Value *input1 = Builder.CreateExtractElement(input, 1);
  4027. Value *input2 = Builder.CreateExtractElement(input, 2);
  4028. Value *input3 = Builder.CreateExtractElement(input, 3);
  4029. if (opcode == DXIL::OpCode::FMax || opcode == DXIL::OpCode::FMin) {
  4030. Value *temp0 =
  4031. TrivialDxilBinaryOperation(opcode, input0, input1, hlslOP, Builder);
  4032. Value *temp1 =
  4033. TrivialDxilBinaryOperation(opcode, input2, input3, hlslOP, Builder);
  4034. Value *combined = UndefValue::get(VectorType::get(input0->getType(), 2));
  4035. combined = Builder.CreateInsertElement(combined, temp0, (uint64_t)0);
  4036. combined = Builder.CreateInsertElement(combined, temp1, 1);
  4037. return combined;
  4038. } else {
  4039. // Avg.
  4040. Value *temp0 = Builder.CreateFAdd(input0, input1);
  4041. Value *temp1 = Builder.CreateFAdd(input2, input3);
  4042. Value *combined = UndefValue::get(VectorType::get(input0->getType(), 2));
  4043. combined = Builder.CreateInsertElement(combined, temp0, (uint64_t)0);
  4044. combined = Builder.CreateInsertElement(combined, temp1, 1);
  4045. Constant *rcp = ConstantFP::get(input0->getType(), 0.5);
  4046. rcp = ConstantVector::getSplat(2, rcp);
  4047. combined = Builder.CreateFMul(combined, rcp);
  4048. return combined;
  4049. }
  4050. }
  4051. Value *ResolveSmallValue(Value **pClampedResult, Value *rounded, Value *averageUnscaled,
  4052. float cutoffVal, DXIL::TessellatorPartitioning partitionMode, hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  4053. Value *clampedResult = *pClampedResult;
  4054. Value *clampedVal = clampedResult;
  4055. Value *roundedVal = rounded;
  4056. // Do partitioning-specific clamping.
  4057. Value *clampedAvg = ClampTessFactor(averageUnscaled, partitionMode, hlslOP, Builder);
  4058. Constant *cutoffVals = ConstantFP::get(Type::getFloatTy(rounded->getContext()), cutoffVal);
  4059. if (clampedAvg->getType()->isVectorTy())
  4060. cutoffVals = ConstantVector::getSplat(clampedAvg->getType()->getVectorNumElements(), cutoffVals);
  4061. // Limit the value.
  4062. clampedAvg = TrivialDxilBinaryOperation(DXIL::OpCode::FMin, clampedAvg, cutoffVals, hlslOP, Builder);
  4063. // Round up for integer/pow2 partitioning.
  4064. Value *roundedAvg = RoundUpTessFactor(clampedAvg, partitionMode, hlslOP, Builder);
  4065. if (rounded->getType() != cutoffVals->getType())
  4066. cutoffVals = ConstantVector::getSplat(rounded->getType()->getVectorNumElements(), cutoffVals);
  4067. // If the scaled value is less than three, then take the unscaled average.
  4068. Value *lt = Builder.CreateFCmpOLT(rounded, cutoffVals);
  4069. if (clampedAvg->getType() != clampedVal->getType())
  4070. clampedAvg = SplatToVector(clampedAvg, clampedVal->getType(), Builder);
  4071. *pClampedResult = Builder.CreateSelect(lt, clampedAvg, clampedVal);
  4072. if (roundedAvg->getType() != roundedVal->getType())
  4073. roundedAvg = SplatToVector(roundedAvg, roundedVal->getType(), Builder);
  4074. Value *result = Builder.CreateSelect(lt, roundedAvg, roundedVal);
  4075. return result;
  4076. }
  4077. void ResolveQuadAxes( Value **pFinalResult, Value **pClampedResult,
  4078. float cutoffVal, DXIL::TessellatorPartitioning partitionMode, hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  4079. Value *finalResult = *pFinalResult;
  4080. Value *clampedResult = *pClampedResult;
  4081. Value *clampR = clampedResult;
  4082. Value *finalR = finalResult;
  4083. Type *f32Ty = Type::getFloatTy(finalR->getContext());
  4084. Constant *cutoffVals = ConstantFP::get(f32Ty, cutoffVal);
  4085. Value *minValsX = cutoffVals;
  4086. Value *minValsY = RoundUpTessFactor(cutoffVals, partitionMode, hlslOP, Builder);
  4087. Value *clampRX = Builder.CreateExtractElement(clampR, (uint64_t)0);
  4088. Value *clampRY = Builder.CreateExtractElement(clampR, 1);
  4089. Value *maxValsX = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, clampRX, clampRY, hlslOP, Builder);
  4090. Value *finalRX = Builder.CreateExtractElement(finalR, (uint64_t)0);
  4091. Value *finalRY = Builder.CreateExtractElement(finalR, 1);
  4092. Value *maxValsY = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, finalRX, finalRY, hlslOP, Builder);
  4093. // Don't go over our threshold ("final" one is rounded).
  4094. Value * optionX = TrivialDxilBinaryOperation(DXIL::OpCode::FMin, maxValsX, minValsX, hlslOP, Builder);
  4095. Value * optionY = TrivialDxilBinaryOperation(DXIL::OpCode::FMin, maxValsY, minValsY, hlslOP, Builder);
  4096. Value *clampL = SplatToVector(optionX, clampR->getType(), Builder);
  4097. Value *finalL = SplatToVector(optionY, finalR->getType(), Builder);
  4098. cutoffVals = ConstantVector::getSplat(2, cutoffVals);
  4099. Value *lt = Builder.CreateFCmpOLT(clampedResult, cutoffVals);
  4100. *pClampedResult = Builder.CreateSelect(lt, clampL, clampR);
  4101. *pFinalResult = Builder.CreateSelect(lt, finalL, finalR);
  4102. }
  4103. Value *TranslateProcessTessFactors(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4104. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  4105. hlsl::OP *hlslOP = &helper.hlslOP;
  4106. // Get partition mode
  4107. DXASSERT_NOMSG(helper.functionProps);
  4108. DXASSERT(helper.functionProps->shaderKind == ShaderModel::Kind::Hull, "must be hull shader");
  4109. DXIL::TessellatorPartitioning partition = helper.functionProps->ShaderProps.HS.partition;
  4110. IRBuilder<> Builder(CI);
  4111. DXIL::OpCode tessFactorOp = DXIL::OpCode::NumOpCodes;
  4112. switch (IOP) {
  4113. case IntrinsicOp::IOP_Process2DQuadTessFactorsMax:
  4114. case IntrinsicOp::IOP_ProcessQuadTessFactorsMax:
  4115. case IntrinsicOp::IOP_ProcessTriTessFactorsMax:
  4116. tessFactorOp = DXIL::OpCode::FMax;
  4117. break;
  4118. case IntrinsicOp::IOP_Process2DQuadTessFactorsMin:
  4119. case IntrinsicOp::IOP_ProcessQuadTessFactorsMin:
  4120. case IntrinsicOp::IOP_ProcessTriTessFactorsMin:
  4121. tessFactorOp = DXIL::OpCode::FMin;
  4122. break;
  4123. default:
  4124. // Default is Avg.
  4125. break;
  4126. }
  4127. Value *rawEdgeFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRawEdgeFactor);
  4128. Value *insideScale = CI->getArgOperand(HLOperandIndex::kProcessTessFactorInsideScale);
  4129. // Clamp to [0.0f..1.0f], NaN->0.0f.
  4130. Value *scales = CleanupTessFactorScale(insideScale, hlslOP, Builder);
  4131. // Do partitioning-specific clamping.
  4132. Value *clamped = ClampTessFactor(rawEdgeFactor, partition, hlslOP, Builder);
  4133. // Round up for integer/pow2 partitioning.
  4134. Value *rounded = RoundUpTessFactor(clamped, partition, hlslOP, Builder);
  4135. // Store the output.
  4136. Value *roundedEdgeFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedEdgeFactor);
  4137. Builder.CreateStore(rounded, roundedEdgeFactor);
  4138. // Clamp to [1.0f..Inf], NaN->1.0f.
  4139. bool isQuad = false;
  4140. Value *clean = CleanupTessFactor(rawEdgeFactor, hlslOP, Builder);
  4141. Value *factors = nullptr;
  4142. switch (IOP) {
  4143. case IntrinsicOp::IOP_Process2DQuadTessFactorsAvg:
  4144. case IntrinsicOp::IOP_Process2DQuadTessFactorsMax:
  4145. case IntrinsicOp::IOP_Process2DQuadTessFactorsMin:
  4146. factors = Apply2DQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
  4147. break;
  4148. case IntrinsicOp::IOP_ProcessQuadTessFactorsAvg:
  4149. case IntrinsicOp::IOP_ProcessQuadTessFactorsMax:
  4150. case IntrinsicOp::IOP_ProcessQuadTessFactorsMin:
  4151. factors = ApplyQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
  4152. isQuad = true;
  4153. break;
  4154. case IntrinsicOp::IOP_ProcessTriTessFactorsAvg:
  4155. case IntrinsicOp::IOP_ProcessTriTessFactorsMax:
  4156. case IntrinsicOp::IOP_ProcessTriTessFactorsMin:
  4157. factors = ApplyTriTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
  4158. break;
  4159. default:
  4160. DXASSERT(0, "invalid opcode for ProcessTessFactor");
  4161. break;
  4162. }
  4163. Value *scaledI = nullptr;
  4164. if (scales->getType() == factors->getType())
  4165. scaledI = Builder.CreateFMul(factors, scales);
  4166. else {
  4167. Value *vecFactors = SplatToVector(factors, scales->getType(), Builder);
  4168. scaledI = Builder.CreateFMul(vecFactors, scales);
  4169. }
  4170. // Do partitioning-specific clamping.
  4171. Value *clampedI = ClampTessFactor(scaledI, partition, hlslOP, Builder);
  4172. // Round up for integer/pow2 partitioning.
  4173. Value *roundedI = RoundUpTessFactor(clampedI, partition, hlslOP, Builder);
  4174. Value *finalI = roundedI;
  4175. if (partition == DXIL::TessellatorPartitioning::FractionalOdd) {
  4176. // If not max, set to AVG.
  4177. if (tessFactorOp != DXIL::OpCode::FMax)
  4178. tessFactorOp = DXIL::OpCode::NumOpCodes;
  4179. bool b2D = false;
  4180. Value *avgFactorsI = nullptr;
  4181. switch (IOP) {
  4182. case IntrinsicOp::IOP_Process2DQuadTessFactorsAvg:
  4183. case IntrinsicOp::IOP_Process2DQuadTessFactorsMax:
  4184. case IntrinsicOp::IOP_Process2DQuadTessFactorsMin:
  4185. avgFactorsI = Apply2DQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
  4186. b2D = true;
  4187. break;
  4188. case IntrinsicOp::IOP_ProcessQuadTessFactorsAvg:
  4189. case IntrinsicOp::IOP_ProcessQuadTessFactorsMax:
  4190. case IntrinsicOp::IOP_ProcessQuadTessFactorsMin:
  4191. avgFactorsI = ApplyQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
  4192. break;
  4193. case IntrinsicOp::IOP_ProcessTriTessFactorsAvg:
  4194. case IntrinsicOp::IOP_ProcessTriTessFactorsMax:
  4195. case IntrinsicOp::IOP_ProcessTriTessFactorsMin:
  4196. avgFactorsI = ApplyTriTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
  4197. break;
  4198. default:
  4199. DXASSERT(0, "invalid opcode for ProcessTessFactor");
  4200. break;
  4201. }
  4202. finalI =
  4203. ResolveSmallValue(/*inout*/&clampedI, roundedI, avgFactorsI, /*cufoff*/ 3.0,
  4204. partition, hlslOP, Builder);
  4205. if (b2D)
  4206. ResolveQuadAxes(/*inout*/&finalI, /*inout*/&clampedI, /*cutoff*/3.0, partition, hlslOP, Builder);
  4207. }
  4208. Value *unroundedInsideFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorUnRoundedInsideFactor);
  4209. Type *outFactorTy = unroundedInsideFactor->getType()->getPointerElementType();
  4210. if (outFactorTy != clampedI->getType()) {
  4211. DXASSERT(isQuad, "quad only write one channel of out factor");
  4212. (void)isQuad;
  4213. clampedI = Builder.CreateExtractElement(clampedI, (uint64_t)0);
  4214. // Splat clampedI to float2.
  4215. clampedI = SplatToVector(clampedI, outFactorTy, Builder);
  4216. }
  4217. Builder.CreateStore(clampedI, unroundedInsideFactor);
  4218. Value *roundedInsideFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedInsideFactor);
  4219. if (outFactorTy != finalI->getType()) {
  4220. DXASSERT(isQuad, "quad only write one channel of out factor");
  4221. finalI = Builder.CreateExtractElement(finalI, (uint64_t)0);
  4222. // Splat finalI to float2.
  4223. finalI = SplatToVector(finalI, outFactorTy, Builder);
  4224. }
  4225. Builder.CreateStore(finalI, roundedInsideFactor);
  4226. return nullptr;
  4227. }
  4228. }
  4229. // Ray Tracing.
  4230. namespace {
  4231. Value *TranslateReportIntersection(CallInst *CI, IntrinsicOp IOP,
  4232. OP::OpCode opcode,
  4233. HLOperationLowerHelper &helper,
  4234. HLObjectOperationLowerHelper *pObjHelper,
  4235. bool &Translated) {
  4236. hlsl::OP *hlslOP = &helper.hlslOP;
  4237. Value *THit = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  4238. Value *HitKind = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  4239. Value *Attr = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  4240. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
  4241. Type *Ty = Attr->getType();
  4242. Function *F = hlslOP->GetOpFunc(opcode, Ty);
  4243. IRBuilder<> Builder(CI);
  4244. return Builder.CreateCall(F, {opArg, THit, HitKind, Attr});
  4245. }
  4246. Value *TranslateCallShader(CallInst *CI, IntrinsicOp IOP,
  4247. OP::OpCode opcode,
  4248. HLOperationLowerHelper &helper,
  4249. HLObjectOperationLowerHelper *pObjHelper,
  4250. bool &Translated) {
  4251. hlsl::OP *hlslOP = &helper.hlslOP;
  4252. Value *ShaderIndex = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  4253. Value *Parameter = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  4254. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
  4255. Type *Ty = Parameter->getType();
  4256. Function *F = hlslOP->GetOpFunc(opcode, Ty);
  4257. IRBuilder<> Builder(CI);
  4258. return Builder.CreateCall(F, {opArg, ShaderIndex, Parameter});
  4259. }
  4260. Value *TranslateTraceRay(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4261. HLOperationLowerHelper &helper,
  4262. HLObjectOperationLowerHelper *pObjHelper,
  4263. bool &Translated) {
  4264. hlsl::OP *hlslOP = &helper.hlslOP;
  4265. Value *rayDesc = CI->getArgOperand(HLOperandIndex::kTraceRayRayDescOpIdx);
  4266. Value *payLoad = CI->getArgOperand(HLOperandIndex::kTraceRayPayLoadOpIdx);
  4267. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
  4268. Value *Args[DXIL::OperandIndex::kTraceRayNumOp];
  4269. Args[0] = opArg;
  4270. for (unsigned i = 1; i < HLOperandIndex::kTraceRayRayDescOpIdx; i++) {
  4271. Args[i] = CI->getArgOperand(i);
  4272. }
  4273. IRBuilder<> Builder(CI);
  4274. // struct RayDesc
  4275. //{
  4276. // float3 Origin;
  4277. // float TMin;
  4278. // float3 Direction;
  4279. // float TMax;
  4280. //};
  4281. Value *zeroIdx = hlslOP->GetU32Const(0);
  4282. Value *origin = Builder.CreateGEP(rayDesc, {zeroIdx, zeroIdx});
  4283. origin = Builder.CreateLoad(origin);
  4284. unsigned index = DXIL::OperandIndex::kTraceRayRayDescOpIdx;
  4285. Args[index++] = Builder.CreateExtractElement(origin, (uint64_t)0);
  4286. Args[index++] = Builder.CreateExtractElement(origin, 1);
  4287. Args[index++] = Builder.CreateExtractElement(origin, 2);
  4288. Value *tmin = Builder.CreateGEP(rayDesc, {zeroIdx, hlslOP->GetU32Const(1)});
  4289. tmin = Builder.CreateLoad(tmin);
  4290. Args[index++] = tmin;
  4291. Value *direction = Builder.CreateGEP(rayDesc, {zeroIdx, hlslOP->GetU32Const(2)});
  4292. direction = Builder.CreateLoad(direction);
  4293. Args[index++] = Builder.CreateExtractElement(direction, (uint64_t)0);
  4294. Args[index++] = Builder.CreateExtractElement(direction, 1);
  4295. Args[index++] = Builder.CreateExtractElement(direction, 2);
  4296. Value *tmax = Builder.CreateGEP(rayDesc, {zeroIdx, hlslOP->GetU32Const(3)});
  4297. tmax = Builder.CreateLoad(tmax);
  4298. Args[index++] = tmax;
  4299. Args[DXIL::OperandIndex::kTraceRayPayloadOpIdx] = payLoad;
  4300. Type *Ty = payLoad->getType();
  4301. Function *F = hlslOP->GetOpFunc(opcode, Ty);
  4302. return Builder.CreateCall(F, Args);
  4303. }
  4304. // RayQuery methods
  4305. Value *TranslateAllocateRayQuery(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4306. HLOperationLowerHelper &helper,
  4307. HLObjectOperationLowerHelper *pObjHelper,
  4308. bool &Translated) {
  4309. hlsl::OP *hlslOP = &helper.hlslOP;
  4310. Value *refArgs[] = {nullptr, CI->getOperand(1)};
  4311. return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP);
  4312. }
  4313. Value *TranslateTraceRayInline(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4314. HLOperationLowerHelper &helper,
  4315. HLObjectOperationLowerHelper *pObjHelper,
  4316. bool &Translated) {
  4317. hlsl::OP *hlslOP = &helper.hlslOP;
  4318. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
  4319. Value *Args[DXIL::OperandIndex::kTraceRayInlineNumOp];
  4320. Args[0] = opArg;
  4321. for (unsigned i = 1; i < HLOperandIndex::kTraceRayInlineRayDescOpIdx; i++) {
  4322. Args[i] = CI->getArgOperand(i);
  4323. }
  4324. IRBuilder<> Builder(CI);
  4325. unsigned hlIndex = HLOperandIndex::kTraceRayInlineRayDescOpIdx;
  4326. unsigned index = DXIL::OperandIndex::kTraceRayInlineRayDescOpIdx;
  4327. // struct RayDesc
  4328. //{
  4329. // float3 Origin;
  4330. Value *origin = CI->getArgOperand(hlIndex++);
  4331. Args[index++] = Builder.CreateExtractElement(origin, (uint64_t)0);
  4332. Args[index++] = Builder.CreateExtractElement(origin, 1);
  4333. Args[index++] = Builder.CreateExtractElement(origin, 2);
  4334. // float TMin;
  4335. Args[index++] = CI->getArgOperand(hlIndex++);
  4336. // float3 Direction;
  4337. Value *direction = CI->getArgOperand(hlIndex++);
  4338. Args[index++] = Builder.CreateExtractElement(direction, (uint64_t)0);
  4339. Args[index++] = Builder.CreateExtractElement(direction, 1);
  4340. Args[index++] = Builder.CreateExtractElement(direction, 2);
  4341. // float TMax;
  4342. Args[index++] = CI->getArgOperand(hlIndex++);
  4343. //};
  4344. DXASSERT_NOMSG(index == DXIL::OperandIndex::kTraceRayInlineNumOp);
  4345. Function *F = hlslOP->GetOpFunc(opcode, Builder.getVoidTy());
  4346. return Builder.CreateCall(F, Args);
  4347. }
  4348. Value *TranslateCommitProceduralPrimitiveHit(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4349. HLOperationLowerHelper &helper,
  4350. HLObjectOperationLowerHelper *pObjHelper,
  4351. bool &Translated) {
  4352. hlsl::OP *hlslOP = &helper.hlslOP;
  4353. Value *THit = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  4354. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
  4355. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  4356. Value *Args[] = {opArg, handle, THit};
  4357. IRBuilder<> Builder(CI);
  4358. Function *F = hlslOP->GetOpFunc(opcode, Builder.getVoidTy());
  4359. return Builder.CreateCall(F, Args);
  4360. }
  4361. Value *TranslateGenericRayQueryMethod(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4362. HLOperationLowerHelper &helper,
  4363. HLObjectOperationLowerHelper *pObjHelper,
  4364. bool &Translated) {
  4365. hlsl::OP *hlslOP = &helper.hlslOP;
  4366. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
  4367. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  4368. IRBuilder<> Builder(CI);
  4369. Function *F = hlslOP->GetOpFunc(opcode, CI->getType());
  4370. return Builder.CreateCall(F, {opArg, handle});
  4371. }
  4372. Value *TranslateRayQueryMatrix3x4Operation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4373. HLOperationLowerHelper &helper,
  4374. HLObjectOperationLowerHelper *pObjHelper,
  4375. bool &Translated) {
  4376. hlsl::OP *hlslOP = &helper.hlslOP;
  4377. VectorType *Ty = cast<VectorType>(CI->getType());
  4378. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  4379. uint32_t rVals[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2};
  4380. Constant *rows = ConstantDataVector::get(CI->getContext(), rVals);
  4381. uint8_t cVals[] = {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3};
  4382. Constant *cols = ConstantDataVector::get(CI->getContext(), cVals);
  4383. Value *retVal =
  4384. TrivialDxilOperation(opcode, {nullptr, handle, rows, cols}, Ty, CI, hlslOP);
  4385. return retVal;
  4386. }
  4387. Value *TranslateRayQueryTransposedMatrix3x4Operation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4388. HLOperationLowerHelper &helper,
  4389. HLObjectOperationLowerHelper *pObjHelper,
  4390. bool &Translated) {
  4391. hlsl::OP *hlslOP = &helper.hlslOP;
  4392. VectorType *Ty = cast<VectorType>(CI->getType());
  4393. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  4394. uint32_t rVals[] = { 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2 };
  4395. Constant *rows = ConstantDataVector::get(CI->getContext(), rVals);
  4396. uint8_t cVals[] = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 };
  4397. Constant *cols = ConstantDataVector::get(CI->getContext(), cVals);
  4398. Value *retVal =
  4399. TrivialDxilOperation(opcode, {nullptr, handle, rows, cols}, Ty, CI, hlslOP);
  4400. return retVal;
  4401. }
  4402. Value *TranslateRayQueryFloat2Getter(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4403. HLOperationLowerHelper &helper,
  4404. HLObjectOperationLowerHelper *pObjHelper,
  4405. bool &Translated) {
  4406. hlsl::OP *hlslOP = &helper.hlslOP;
  4407. VectorType *Ty = cast<VectorType>(CI->getType());
  4408. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  4409. uint8_t elementVals[] = {0, 1};
  4410. Constant *element = ConstantDataVector::get(CI->getContext(), elementVals);
  4411. Value *retVal =
  4412. TrivialDxilOperation(opcode, {nullptr, handle, element}, Ty, CI, hlslOP);
  4413. return retVal;
  4414. }
  4415. Value *TranslateRayQueryFloat3Getter(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4416. HLOperationLowerHelper &helper,
  4417. HLObjectOperationLowerHelper *pObjHelper,
  4418. bool &Translated) {
  4419. hlsl::OP *hlslOP = &helper.hlslOP;
  4420. VectorType *Ty = cast<VectorType>(CI->getType());
  4421. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  4422. uint8_t elementVals[] = {0, 1, 2};
  4423. Constant *element = ConstantDataVector::get(CI->getContext(), elementVals);
  4424. Value *retVal =
  4425. TrivialDxilOperation(opcode, {nullptr, handle, element}, Ty, CI, hlslOP);
  4426. return retVal;
  4427. }
  4428. Value *TranslateNoArgVectorOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4429. HLOperationLowerHelper &helper,
  4430. HLObjectOperationLowerHelper *pObjHelper,
  4431. bool &Translated) {
  4432. hlsl::OP *hlslOP = &helper.hlslOP;
  4433. VectorType *Ty = cast<VectorType>(CI->getType());
  4434. uint8_t vals[] = {0,1,2,3};
  4435. Constant *src = ConstantDataVector::get(CI->getContext(), vals);
  4436. Value *retVal = TrivialDxilOperation(opcode, {nullptr, src}, Ty, CI, hlslOP);
  4437. return retVal;
  4438. }
  4439. Value *TranslateNoArgMatrix3x4Operation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4440. HLOperationLowerHelper &helper,
  4441. HLObjectOperationLowerHelper *pObjHelper,
  4442. bool &Translated) {
  4443. hlsl::OP *hlslOP = &helper.hlslOP;
  4444. VectorType *Ty = cast<VectorType>(CI->getType());
  4445. uint32_t rVals[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2};
  4446. Constant *rows = ConstantDataVector::get(CI->getContext(), rVals);
  4447. uint8_t cVals[] = {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3};
  4448. Constant *cols = ConstantDataVector::get(CI->getContext(), cVals);
  4449. Value *retVal =
  4450. TrivialDxilOperation(opcode, {nullptr, rows, cols}, Ty, CI, hlslOP);
  4451. return retVal;
  4452. }
  4453. Value *TranslateNoArgTransposedMatrix3x4Operation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4454. HLOperationLowerHelper &helper,
  4455. HLObjectOperationLowerHelper *pObjHelper,
  4456. bool &Translated) {
  4457. hlsl::OP *hlslOP = &helper.hlslOP;
  4458. VectorType *Ty = cast<VectorType>(CI->getType());
  4459. uint32_t rVals[] = { 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2 };
  4460. Constant *rows = ConstantDataVector::get(CI->getContext(), rVals);
  4461. uint8_t cVals[] = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 };
  4462. Constant *cols = ConstantDataVector::get(CI->getContext(), cVals);
  4463. Value *retVal =
  4464. TrivialDxilOperation(opcode, { nullptr, rows, cols }, Ty, CI, hlslOP);
  4465. return retVal;
  4466. }
  4467. Value *TranslateNoArgNoReturnPreserveOutput(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4468. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  4469. Instruction *pResult = cast<Instruction>(
  4470. TrivialNoArgOperation(CI, IOP, opcode, helper, pObjHelper, Translated));
  4471. // HL intrinsic must have had a return injected just after the call.
  4472. // SROA_Parameter_HLSL will copy from alloca to output just before each return.
  4473. // Now move call after the copy and just before the return.
  4474. if (isa<ReturnInst>(pResult->getNextNode()))
  4475. return pResult;
  4476. ReturnInst *RetI = cast<ReturnInst>(pResult->getParent()->getTerminator());
  4477. pResult->removeFromParent();
  4478. pResult->insertBefore(RetI);
  4479. return pResult;
  4480. }
  4481. // Special half dot2 with accumulate to float
  4482. Value *TranslateDot2Add(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4483. HLOperationLowerHelper &helper,
  4484. HLObjectOperationLowerHelper *pObjHelper,
  4485. bool &Translated) {
  4486. hlsl::OP *hlslOP = &helper.hlslOP;
  4487. Value *src0 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  4488. const unsigned vecSize = 2;
  4489. DXASSERT(src0->getType()->isVectorTy() &&
  4490. vecSize == src0->getType()->getVectorNumElements() &&
  4491. src0->getType()->getScalarType()->isHalfTy(),
  4492. "otherwise, unexpected input dimension or component type");
  4493. Value *src1 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  4494. DXASSERT(src0->getType() == src1->getType(),
  4495. "otherwise, mismatched argument types");
  4496. Value *accArg = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  4497. Type *accTy = accArg->getType();
  4498. DXASSERT(!accTy->isVectorTy() && accTy->isFloatTy(),
  4499. "otherwise, unexpected accumulator type");
  4500. IRBuilder<> Builder(CI);
  4501. Function *dxilFunc = hlslOP->GetOpFunc(opcode, accTy);
  4502. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  4503. SmallVector<Value *, 6> args;
  4504. args.emplace_back(opArg);
  4505. args.emplace_back(accArg);
  4506. for (unsigned i = 0; i < vecSize; i++)
  4507. args.emplace_back(Builder.CreateExtractElement(src0, i));
  4508. for (unsigned i = 0; i < vecSize; i++)
  4509. args.emplace_back(Builder.CreateExtractElement(src1, i));
  4510. return Builder.CreateCall(dxilFunc, args);
  4511. }
  4512. Value *TranslateDot4AddPacked(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4513. HLOperationLowerHelper &helper,
  4514. HLObjectOperationLowerHelper *pObjHelper,
  4515. bool &Translated) {
  4516. hlsl::OP *hlslOP = &helper.hlslOP;
  4517. Value *src0 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  4518. DXASSERT(
  4519. !src0->getType()->isVectorTy() && src0->getType()->isIntegerTy(32),
  4520. "otherwise, unexpected vector support in high level intrinsic tempalte");
  4521. Value *src1 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  4522. DXASSERT(src0->getType() == src1->getType(), "otherwise, mismatched argument types");
  4523. Value *accArg = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  4524. Type *accTy = accArg->getType();
  4525. DXASSERT(!accTy->isVectorTy() && accTy->isIntegerTy(32),
  4526. "otherwise, unexpected vector support in high level intrinsic tempalte");
  4527. IRBuilder<> Builder(CI);
  4528. Function *dxilFunc = hlslOP->GetOpFunc(opcode, accTy);
  4529. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  4530. return Builder.CreateCall(dxilFunc, { opArg, accArg, src0, src1 });
  4531. }
  4532. } // namespace
  4533. // Resource Handle.
  4534. namespace {
  4535. Value *TranslateGetHandleFromHeap(CallInst *CI, IntrinsicOp IOP,
  4536. DXIL::OpCode opcode,
  4537. HLOperationLowerHelper &helper,
  4538. HLObjectOperationLowerHelper *pObjHelper,
  4539. bool &Translated) {
  4540. hlsl::OP &hlslOP = helper.hlslOP;
  4541. Function *dxilFunc = hlslOP.GetOpFunc(opcode, helper.voidTy);
  4542. IRBuilder<> Builder(CI);
  4543. Value *opArg = ConstantInt::get(helper.i32Ty, (unsigned)opcode);
  4544. return Builder.CreateCall(
  4545. dxilFunc, {opArg, CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx),
  4546. // TODO: update nonUniformIndex later.
  4547. Builder.getInt1(false)});
  4548. }
  4549. }
  4550. // Lower table.
  4551. namespace {
  4552. Value *EmptyLower(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode,
  4553. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  4554. Translated = false;
  4555. dxilutil::EmitErrorOnInstruction(CI, "Unsupported intrinsic.");
  4556. return nullptr;
  4557. }
  4558. // SPIRV change starts
  4559. #ifdef ENABLE_SPIRV_CODEGEN
  4560. Value *UnsupportedVulkanIntrinsic(CallInst *CI, IntrinsicOp IOP,
  4561. DXIL::OpCode opcode,
  4562. HLOperationLowerHelper &helper,
  4563. HLObjectOperationLowerHelper *pObjHelper,
  4564. bool &Translated) {
  4565. Translated = false;
  4566. dxilutil::EmitErrorOnInstruction(CI, "Unsupported Vulkan intrinsic.");
  4567. return nullptr;
  4568. }
  4569. #endif // ENABLE_SPIRV_CODEGEN
  4570. // SPIRV change ends
  4571. Value *StreamOutputLower(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode,
  4572. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  4573. // Translated in DxilGenerationPass::GenerateStreamOutputOperation.
  4574. // Do nothing here.
  4575. // Mark not translated.
  4576. Translated = false;
  4577. return nullptr;
  4578. }
  4579. // This table has to match IntrinsicOp orders
  4580. IntrinsicLower gLowerTable[] = {
  4581. {IntrinsicOp::IOP_AcceptHitAndEndSearch, TranslateNoArgNoReturnPreserveOutput, DXIL::OpCode::AcceptHitAndEndSearch},
  4582. {IntrinsicOp::IOP_AddUint64, TranslateAddUint64, DXIL::OpCode::UAddc},
  4583. {IntrinsicOp::IOP_AllMemoryBarrier, TrivialBarrier, DXIL::OpCode::Barrier},
  4584. {IntrinsicOp::IOP_AllMemoryBarrierWithGroupSync, TrivialBarrier, DXIL::OpCode::Barrier},
  4585. {IntrinsicOp::IOP_AllocateRayQuery, TranslateAllocateRayQuery, DXIL::OpCode::AllocateRayQuery},
  4586. {IntrinsicOp::IOP_CallShader, TranslateCallShader, DXIL::OpCode::CallShader},
  4587. {IntrinsicOp::IOP_CheckAccessFullyMapped, TranslateCheckAccess, DXIL::OpCode::CheckAccessFullyMapped},
  4588. {IntrinsicOp::IOP_CreateResourceFromHeap, TranslateGetHandleFromHeap, DXIL::OpCode::CreateHandleFromHeap},
  4589. {IntrinsicOp::IOP_D3DCOLORtoUBYTE4, TranslateD3DColorToUByte4, DXIL::OpCode::NumOpCodes},
  4590. {IntrinsicOp::IOP_DeviceMemoryBarrier, TrivialBarrier, DXIL::OpCode::Barrier},
  4591. {IntrinsicOp::IOP_DeviceMemoryBarrierWithGroupSync, TrivialBarrier, DXIL::OpCode::Barrier},
  4592. {IntrinsicOp::IOP_DispatchMesh, TrivialDispatchMesh, DXIL::OpCode::DispatchMesh },
  4593. {IntrinsicOp::IOP_DispatchRaysDimensions, TranslateNoArgVectorOperation, DXIL::OpCode::DispatchRaysDimensions},
  4594. {IntrinsicOp::IOP_DispatchRaysIndex, TranslateNoArgVectorOperation, DXIL::OpCode::DispatchRaysIndex},
  4595. {IntrinsicOp::IOP_EvaluateAttributeAtSample, TranslateEvalSample, DXIL::OpCode::NumOpCodes},
  4596. {IntrinsicOp::IOP_EvaluateAttributeCentroid, TranslateEvalCentroid, DXIL::OpCode::EvalCentroid},
  4597. {IntrinsicOp::IOP_EvaluateAttributeSnapped, TranslateEvalSnapped, DXIL::OpCode::NumOpCodes},
  4598. {IntrinsicOp::IOP_GeometryIndex, TrivialNoArgWithRetOperation, DXIL::OpCode::GeometryIndex},
  4599. {IntrinsicOp::IOP_GetAttributeAtVertex, TranslateGetAttributeAtVertex, DXIL::OpCode::AttributeAtVertex},
  4600. {IntrinsicOp::IOP_GetRenderTargetSampleCount, TrivialNoArgOperation, DXIL::OpCode::RenderTargetGetSampleCount},
  4601. {IntrinsicOp::IOP_GetRenderTargetSamplePosition, TranslateGetRTSamplePos, DXIL::OpCode::NumOpCodes},
  4602. {IntrinsicOp::IOP_GroupMemoryBarrier, TrivialBarrier, DXIL::OpCode::Barrier},
  4603. {IntrinsicOp::IOP_GroupMemoryBarrierWithGroupSync, TrivialBarrier, DXIL::OpCode::Barrier},
  4604. {IntrinsicOp::IOP_HitKind, TrivialNoArgWithRetOperation, DXIL::OpCode::HitKind},
  4605. {IntrinsicOp::IOP_IgnoreHit, TranslateNoArgNoReturnPreserveOutput, DXIL::OpCode::IgnoreHit},
  4606. {IntrinsicOp::IOP_InstanceID, TrivialNoArgWithRetOperation, DXIL::OpCode::InstanceID},
  4607. {IntrinsicOp::IOP_InstanceIndex, TrivialNoArgWithRetOperation, DXIL::OpCode::InstanceIndex},
  4608. {IntrinsicOp::IOP_InterlockedAdd, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4609. {IntrinsicOp::IOP_InterlockedAnd, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4610. {IntrinsicOp::IOP_InterlockedCompareExchange, TranslateIopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
  4611. {IntrinsicOp::IOP_InterlockedCompareStore, TranslateIopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
  4612. {IntrinsicOp::IOP_InterlockedExchange, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4613. {IntrinsicOp::IOP_InterlockedMax, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4614. {IntrinsicOp::IOP_InterlockedMin, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4615. {IntrinsicOp::IOP_InterlockedOr, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4616. {IntrinsicOp::IOP_InterlockedXor, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4617. {IntrinsicOp::IOP_NonUniformResourceIndex, TranslateNonUniformResourceIndex, DXIL::OpCode::NumOpCodes},
  4618. {IntrinsicOp::IOP_ObjectRayDirection, TranslateNoArgVectorOperation, DXIL::OpCode::ObjectRayDirection},
  4619. {IntrinsicOp::IOP_ObjectRayOrigin, TranslateNoArgVectorOperation, DXIL::OpCode::ObjectRayOrigin},
  4620. {IntrinsicOp::IOP_ObjectToWorld, TranslateNoArgMatrix3x4Operation, DXIL::OpCode::ObjectToWorld},
  4621. {IntrinsicOp::IOP_ObjectToWorld3x4, TranslateNoArgMatrix3x4Operation, DXIL::OpCode::ObjectToWorld},
  4622. {IntrinsicOp::IOP_ObjectToWorld4x3, TranslateNoArgTransposedMatrix3x4Operation, DXIL::OpCode::ObjectToWorld},
  4623. {IntrinsicOp::IOP_PrimitiveIndex, TrivialNoArgWithRetOperation, DXIL::OpCode::PrimitiveIndex},
  4624. {IntrinsicOp::IOP_Process2DQuadTessFactorsAvg, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4625. {IntrinsicOp::IOP_Process2DQuadTessFactorsMax, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4626. {IntrinsicOp::IOP_Process2DQuadTessFactorsMin, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4627. {IntrinsicOp::IOP_ProcessIsolineTessFactors, TranslateProcessIsolineTessFactors, DXIL::OpCode::NumOpCodes},
  4628. {IntrinsicOp::IOP_ProcessQuadTessFactorsAvg, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4629. {IntrinsicOp::IOP_ProcessQuadTessFactorsMax, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4630. {IntrinsicOp::IOP_ProcessQuadTessFactorsMin, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4631. {IntrinsicOp::IOP_ProcessTriTessFactorsAvg, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4632. {IntrinsicOp::IOP_ProcessTriTessFactorsMax, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4633. {IntrinsicOp::IOP_ProcessTriTessFactorsMin, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4634. {IntrinsicOp::IOP_QuadReadAcrossDiagonal, TranslateQuadReadAcross, DXIL::OpCode::QuadOp},
  4635. {IntrinsicOp::IOP_QuadReadAcrossX, TranslateQuadReadAcross, DXIL::OpCode::QuadOp},
  4636. {IntrinsicOp::IOP_QuadReadAcrossY, TranslateQuadReadAcross, DXIL::OpCode::QuadOp},
  4637. {IntrinsicOp::IOP_QuadReadLaneAt, TranslateQuadReadLaneAt, DXIL::OpCode::NumOpCodes},
  4638. {IntrinsicOp::IOP_RayFlags, TrivialNoArgWithRetOperation, DXIL::OpCode::RayFlags},
  4639. {IntrinsicOp::IOP_RayTCurrent, TrivialNoArgWithRetOperation, DXIL::OpCode::RayTCurrent},
  4640. {IntrinsicOp::IOP_RayTMin, TrivialNoArgWithRetOperation, DXIL::OpCode::RayTMin},
  4641. {IntrinsicOp::IOP_ReportHit, TranslateReportIntersection, DXIL::OpCode::ReportHit},
  4642. {IntrinsicOp::IOP_SetMeshOutputCounts, TrivialSetMeshOutputCounts, DXIL::OpCode::SetMeshOutputCounts},
  4643. {IntrinsicOp::IOP_TraceRay, TranslateTraceRay, DXIL::OpCode::TraceRay},
  4644. {IntrinsicOp::IOP_WaveActiveAllEqual, TranslateWaveAllEqual, DXIL::OpCode::WaveActiveAllEqual},
  4645. {IntrinsicOp::IOP_WaveActiveAllTrue, TranslateWaveA2B, DXIL::OpCode::WaveAllTrue},
  4646. {IntrinsicOp::IOP_WaveActiveAnyTrue, TranslateWaveA2B, DXIL::OpCode::WaveAnyTrue},
  4647. {IntrinsicOp::IOP_WaveActiveBallot, TranslateWaveBallot, DXIL::OpCode::WaveActiveBallot},
  4648. {IntrinsicOp::IOP_WaveActiveBitAnd, TranslateWaveA2A, DXIL::OpCode::WaveActiveBit},
  4649. {IntrinsicOp::IOP_WaveActiveBitOr, TranslateWaveA2A, DXIL::OpCode::WaveActiveBit},
  4650. {IntrinsicOp::IOP_WaveActiveBitXor, TranslateWaveA2A, DXIL::OpCode::WaveActiveBit},
  4651. {IntrinsicOp::IOP_WaveActiveCountBits, TranslateWaveA2B, DXIL::OpCode::WaveAllBitCount},
  4652. {IntrinsicOp::IOP_WaveActiveMax, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp},
  4653. {IntrinsicOp::IOP_WaveActiveMin, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp},
  4654. {IntrinsicOp::IOP_WaveActiveProduct, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp},
  4655. {IntrinsicOp::IOP_WaveActiveSum, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp},
  4656. {IntrinsicOp::IOP_WaveGetLaneCount, TranslateWaveToVal, DXIL::OpCode::WaveGetLaneCount},
  4657. {IntrinsicOp::IOP_WaveGetLaneIndex, TranslateWaveToVal, DXIL::OpCode::WaveGetLaneIndex},
  4658. {IntrinsicOp::IOP_WaveIsFirstLane, TranslateWaveToVal, DXIL::OpCode::WaveIsFirstLane},
  4659. {IntrinsicOp::IOP_WaveMatch, TranslateWaveMatch, DXIL::OpCode::WaveMatch},
  4660. {IntrinsicOp::IOP_WaveMultiPrefixBitAnd, TranslateWaveMultiPrefix, DXIL::OpCode::WaveMultiPrefixOp},
  4661. {IntrinsicOp::IOP_WaveMultiPrefixBitOr, TranslateWaveMultiPrefix, DXIL::OpCode::WaveMultiPrefixOp},
  4662. {IntrinsicOp::IOP_WaveMultiPrefixBitXor, TranslateWaveMultiPrefix, DXIL::OpCode::WaveMultiPrefixOp},
  4663. {IntrinsicOp::IOP_WaveMultiPrefixCountBits, TranslateWaveMultiPrefixBitCount, DXIL::OpCode::WaveMultiPrefixBitCount},
  4664. {IntrinsicOp::IOP_WaveMultiPrefixProduct, TranslateWaveMultiPrefix, DXIL::OpCode::WaveMultiPrefixOp},
  4665. {IntrinsicOp::IOP_WaveMultiPrefixSum, TranslateWaveMultiPrefix, DXIL::OpCode::WaveMultiPrefixOp},
  4666. {IntrinsicOp::IOP_WavePrefixCountBits, TranslateWaveA2B, DXIL::OpCode::WavePrefixBitCount},
  4667. {IntrinsicOp::IOP_WavePrefixProduct, TranslateWaveA2A, DXIL::OpCode::WavePrefixOp},
  4668. {IntrinsicOp::IOP_WavePrefixSum, TranslateWaveA2A, DXIL::OpCode::WavePrefixOp},
  4669. {IntrinsicOp::IOP_WaveReadLaneAt, TranslateWaveReadLaneAt, DXIL::OpCode::WaveReadLaneAt},
  4670. {IntrinsicOp::IOP_WaveReadLaneFirst, TranslateWaveReadLaneFirst, DXIL::OpCode::WaveReadLaneFirst},
  4671. {IntrinsicOp::IOP_WorldRayDirection, TranslateNoArgVectorOperation, DXIL::OpCode::WorldRayDirection},
  4672. {IntrinsicOp::IOP_WorldRayOrigin, TranslateNoArgVectorOperation, DXIL::OpCode::WorldRayOrigin},
  4673. {IntrinsicOp::IOP_WorldToObject, TranslateNoArgMatrix3x4Operation, DXIL::OpCode::WorldToObject},
  4674. {IntrinsicOp::IOP_WorldToObject3x4, TranslateNoArgMatrix3x4Operation, DXIL::OpCode::WorldToObject},
  4675. {IntrinsicOp::IOP_WorldToObject4x3, TranslateNoArgTransposedMatrix3x4Operation, DXIL::OpCode::WorldToObject},
  4676. {IntrinsicOp::IOP_abort, EmptyLower, DXIL::OpCode::NumOpCodes},
  4677. {IntrinsicOp::IOP_abs, TranslateAbs, DXIL::OpCode::NumOpCodes},
  4678. {IntrinsicOp::IOP_acos, TrivialUnaryOperation, DXIL::OpCode::Acos},
  4679. {IntrinsicOp::IOP_all, TranslateAll, DXIL::OpCode::NumOpCodes},
  4680. {IntrinsicOp::IOP_any, TranslateAny, DXIL::OpCode::NumOpCodes},
  4681. {IntrinsicOp::IOP_asdouble, TranslateAsDouble, DXIL::OpCode::MakeDouble},
  4682. {IntrinsicOp::IOP_asfloat, TranslateBitcast, DXIL::OpCode::NumOpCodes},
  4683. {IntrinsicOp::IOP_asfloat16, TranslateBitcast, DXIL::OpCode::NumOpCodes},
  4684. {IntrinsicOp::IOP_asin, TrivialUnaryOperation, DXIL::OpCode::Asin},
  4685. {IntrinsicOp::IOP_asint, TranslateBitcast, DXIL::OpCode::NumOpCodes},
  4686. {IntrinsicOp::IOP_asint16, TranslateBitcast, DXIL::OpCode::NumOpCodes},
  4687. {IntrinsicOp::IOP_asuint, TranslateAsUint, DXIL::OpCode::SplitDouble},
  4688. {IntrinsicOp::IOP_asuint16, TranslateAsUint, DXIL::OpCode::NumOpCodes},
  4689. {IntrinsicOp::IOP_atan, TrivialUnaryOperation, DXIL::OpCode::Atan},
  4690. {IntrinsicOp::IOP_atan2, TranslateAtan2, DXIL::OpCode::NumOpCodes},
  4691. {IntrinsicOp::IOP_ceil, TrivialUnaryOperation, DXIL::OpCode::Round_pi},
  4692. {IntrinsicOp::IOP_clamp, TranslateClamp, DXIL::OpCode::NumOpCodes},
  4693. {IntrinsicOp::IOP_clip, TranslateClip, DXIL::OpCode::NumOpCodes},
  4694. {IntrinsicOp::IOP_cos, TrivialUnaryOperation, DXIL::OpCode::Cos},
  4695. {IntrinsicOp::IOP_cosh, TrivialUnaryOperation, DXIL::OpCode::Hcos},
  4696. {IntrinsicOp::IOP_countbits, TrivialUnaryOperation, DXIL::OpCode::Countbits},
  4697. {IntrinsicOp::IOP_cross, TranslateCross, DXIL::OpCode::NumOpCodes},
  4698. {IntrinsicOp::IOP_ddx, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseX},
  4699. {IntrinsicOp::IOP_ddx_coarse, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseX},
  4700. {IntrinsicOp::IOP_ddx_fine, TrivialUnaryOperation, DXIL::OpCode::DerivFineX},
  4701. {IntrinsicOp::IOP_ddy, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseY},
  4702. {IntrinsicOp::IOP_ddy_coarse, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseY},
  4703. {IntrinsicOp::IOP_ddy_fine, TrivialUnaryOperation, DXIL::OpCode::DerivFineY},
  4704. {IntrinsicOp::IOP_degrees, TranslateDegrees, DXIL::OpCode::NumOpCodes},
  4705. {IntrinsicOp::IOP_determinant, EmptyLower, DXIL::OpCode::NumOpCodes},
  4706. {IntrinsicOp::IOP_distance, TranslateDistance, DXIL::OpCode::NumOpCodes},
  4707. {IntrinsicOp::IOP_dot, TranslateDot, DXIL::OpCode::NumOpCodes},
  4708. {IntrinsicOp::IOP_dot2add, TranslateDot2Add, DXIL::OpCode::Dot2AddHalf},
  4709. {IntrinsicOp::IOP_dot4add_i8packed, TranslateDot4AddPacked, DXIL::OpCode::Dot4AddI8Packed},
  4710. {IntrinsicOp::IOP_dot4add_u8packed, TranslateDot4AddPacked, DXIL::OpCode::Dot4AddU8Packed},
  4711. {IntrinsicOp::IOP_dst, TranslateDst, DXIL::OpCode::NumOpCodes},
  4712. {IntrinsicOp::IOP_exp, TranslateExp, DXIL::OpCode::NumOpCodes},
  4713. {IntrinsicOp::IOP_exp2, TrivialUnaryOperation, DXIL::OpCode::Exp},
  4714. {IntrinsicOp::IOP_f16tof32, TranslateF16ToF32, DXIL::OpCode::LegacyF16ToF32},
  4715. {IntrinsicOp::IOP_f32tof16, TranslateF32ToF16, DXIL::OpCode::LegacyF32ToF16},
  4716. {IntrinsicOp::IOP_faceforward, TranslateFaceforward, DXIL::OpCode::NumOpCodes},
  4717. {IntrinsicOp::IOP_firstbithigh, TranslateFirstbitHi, DXIL::OpCode::FirstbitSHi},
  4718. {IntrinsicOp::IOP_firstbitlow, TranslateFirstbitLo, DXIL::OpCode::FirstbitLo},
  4719. {IntrinsicOp::IOP_floor, TrivialUnaryOperation, DXIL::OpCode::Round_ni},
  4720. {IntrinsicOp::IOP_fma, TrivialTrinaryOperation, DXIL::OpCode::Fma},
  4721. {IntrinsicOp::IOP_fmod, TranslateFMod, DXIL::OpCode::NumOpCodes},
  4722. {IntrinsicOp::IOP_frac, TrivialUnaryOperation, DXIL::OpCode::Frc},
  4723. {IntrinsicOp::IOP_frexp, TranslateFrexp, DXIL::OpCode::NumOpCodes},
  4724. {IntrinsicOp::IOP_fwidth, TranslateFWidth, DXIL::OpCode::NumOpCodes},
  4725. {IntrinsicOp::IOP_isfinite, TrivialIsSpecialFloat, DXIL::OpCode::IsFinite},
  4726. {IntrinsicOp::IOP_isinf, TrivialIsSpecialFloat, DXIL::OpCode::IsInf},
  4727. {IntrinsicOp::IOP_isnan, TrivialIsSpecialFloat, DXIL::OpCode::IsNaN},
  4728. {IntrinsicOp::IOP_ldexp, TranslateLdExp, DXIL::OpCode::NumOpCodes},
  4729. {IntrinsicOp::IOP_length, TranslateLength, DXIL::OpCode::NumOpCodes},
  4730. {IntrinsicOp::IOP_lerp, TranslateLerp, DXIL::OpCode::NumOpCodes},
  4731. {IntrinsicOp::IOP_lit, TranslateLit, DXIL::OpCode::NumOpCodes},
  4732. {IntrinsicOp::IOP_log, TranslateLog, DXIL::OpCode::NumOpCodes},
  4733. {IntrinsicOp::IOP_log10, TranslateLog10, DXIL::OpCode::NumOpCodes},
  4734. {IntrinsicOp::IOP_log2, TrivialUnaryOperation, DXIL::OpCode::Log},
  4735. {IntrinsicOp::IOP_mad, TranslateFUITrinary, DXIL::OpCode::IMad},
  4736. {IntrinsicOp::IOP_max, TranslateFUIBinary, DXIL::OpCode::IMax},
  4737. {IntrinsicOp::IOP_min, TranslateFUIBinary, DXIL::OpCode::IMin},
  4738. {IntrinsicOp::IOP_modf, TranslateModF, DXIL::OpCode::NumOpCodes},
  4739. {IntrinsicOp::IOP_msad4, TranslateMSad4, DXIL::OpCode::NumOpCodes},
  4740. {IntrinsicOp::IOP_mul, TranslateMul, DXIL::OpCode::NumOpCodes},
  4741. {IntrinsicOp::IOP_normalize, TranslateNormalize, DXIL::OpCode::NumOpCodes},
  4742. {IntrinsicOp::IOP_pow, TranslatePow, DXIL::OpCode::NumOpCodes},
  4743. {IntrinsicOp::IOP_printf, TranslatePrintf, DXIL::OpCode::NumOpCodes},
  4744. {IntrinsicOp::IOP_radians, TranslateRadians, DXIL::OpCode::NumOpCodes},
  4745. {IntrinsicOp::IOP_rcp, TranslateRCP, DXIL::OpCode::NumOpCodes},
  4746. {IntrinsicOp::IOP_reflect, TranslateReflect, DXIL::OpCode::NumOpCodes},
  4747. {IntrinsicOp::IOP_refract, TranslateRefract, DXIL::OpCode::NumOpCodes},
  4748. {IntrinsicOp::IOP_reversebits, TrivialUnaryOperation, DXIL::OpCode::Bfrev},
  4749. {IntrinsicOp::IOP_round, TrivialUnaryOperation, DXIL::OpCode::Round_ne},
  4750. {IntrinsicOp::IOP_rsqrt, TrivialUnaryOperation, DXIL::OpCode::Rsqrt},
  4751. {IntrinsicOp::IOP_saturate, TrivialUnaryOperation, DXIL::OpCode::Saturate},
  4752. {IntrinsicOp::IOP_sign, TranslateSign, DXIL::OpCode::NumOpCodes},
  4753. {IntrinsicOp::IOP_sin, TrivialUnaryOperation, DXIL::OpCode::Sin},
  4754. {IntrinsicOp::IOP_sincos, EmptyLower, DXIL::OpCode::NumOpCodes},
  4755. {IntrinsicOp::IOP_sinh, TrivialUnaryOperation, DXIL::OpCode::Hsin},
  4756. {IntrinsicOp::IOP_smoothstep, TranslateSmoothStep, DXIL::OpCode::NumOpCodes},
  4757. {IntrinsicOp::IOP_source_mark, EmptyLower, DXIL::OpCode::NumOpCodes},
  4758. {IntrinsicOp::IOP_sqrt, TrivialUnaryOperation, DXIL::OpCode::Sqrt},
  4759. {IntrinsicOp::IOP_step, TranslateStep, DXIL::OpCode::NumOpCodes},
  4760. {IntrinsicOp::IOP_tan, TrivialUnaryOperation, DXIL::OpCode::Tan},
  4761. {IntrinsicOp::IOP_tanh, TrivialUnaryOperation, DXIL::OpCode::Htan},
  4762. {IntrinsicOp::IOP_tex1D, EmptyLower, DXIL::OpCode::NumOpCodes},
  4763. {IntrinsicOp::IOP_tex1Dbias, EmptyLower, DXIL::OpCode::NumOpCodes},
  4764. {IntrinsicOp::IOP_tex1Dgrad, EmptyLower, DXIL::OpCode::NumOpCodes},
  4765. {IntrinsicOp::IOP_tex1Dlod, EmptyLower, DXIL::OpCode::NumOpCodes},
  4766. {IntrinsicOp::IOP_tex1Dproj, EmptyLower, DXIL::OpCode::NumOpCodes},
  4767. {IntrinsicOp::IOP_tex2D, EmptyLower, DXIL::OpCode::NumOpCodes},
  4768. {IntrinsicOp::IOP_tex2Dbias, EmptyLower, DXIL::OpCode::NumOpCodes},
  4769. {IntrinsicOp::IOP_tex2Dgrad, EmptyLower, DXIL::OpCode::NumOpCodes},
  4770. {IntrinsicOp::IOP_tex2Dlod, EmptyLower, DXIL::OpCode::NumOpCodes},
  4771. {IntrinsicOp::IOP_tex2Dproj, EmptyLower, DXIL::OpCode::NumOpCodes},
  4772. {IntrinsicOp::IOP_tex3D, EmptyLower, DXIL::OpCode::NumOpCodes},
  4773. {IntrinsicOp::IOP_tex3Dbias, EmptyLower, DXIL::OpCode::NumOpCodes},
  4774. {IntrinsicOp::IOP_tex3Dgrad, EmptyLower, DXIL::OpCode::NumOpCodes},
  4775. {IntrinsicOp::IOP_tex3Dlod, EmptyLower, DXIL::OpCode::NumOpCodes},
  4776. {IntrinsicOp::IOP_tex3Dproj, EmptyLower, DXIL::OpCode::NumOpCodes},
  4777. {IntrinsicOp::IOP_texCUBE, EmptyLower, DXIL::OpCode::NumOpCodes},
  4778. {IntrinsicOp::IOP_texCUBEbias, EmptyLower, DXIL::OpCode::NumOpCodes},
  4779. {IntrinsicOp::IOP_texCUBEgrad, EmptyLower, DXIL::OpCode::NumOpCodes},
  4780. {IntrinsicOp::IOP_texCUBElod, EmptyLower, DXIL::OpCode::NumOpCodes},
  4781. {IntrinsicOp::IOP_texCUBEproj, EmptyLower, DXIL::OpCode::NumOpCodes},
  4782. {IntrinsicOp::IOP_transpose, EmptyLower, DXIL::OpCode::NumOpCodes},
  4783. {IntrinsicOp::IOP_trunc, TrivialUnaryOperation, DXIL::OpCode::Round_z},
  4784. {IntrinsicOp::MOP_Append, StreamOutputLower, DXIL::OpCode::EmitStream},
  4785. {IntrinsicOp::MOP_RestartStrip, StreamOutputLower, DXIL::OpCode::CutStream},
  4786. {IntrinsicOp::MOP_CalculateLevelOfDetail, TranslateCalculateLOD, DXIL::OpCode::NumOpCodes},
  4787. {IntrinsicOp::MOP_CalculateLevelOfDetailUnclamped, TranslateCalculateLOD, DXIL::OpCode::NumOpCodes},
  4788. {IntrinsicOp::MOP_GetDimensions, TranslateGetDimensions, DXIL::OpCode::NumOpCodes},
  4789. {IntrinsicOp::MOP_Load, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
  4790. {IntrinsicOp::MOP_Sample, TranslateSample, DXIL::OpCode::Sample},
  4791. {IntrinsicOp::MOP_SampleBias, TranslateSample, DXIL::OpCode::SampleBias},
  4792. {IntrinsicOp::MOP_SampleCmp, TranslateSample, DXIL::OpCode::SampleCmp},
  4793. {IntrinsicOp::MOP_SampleCmpLevelZero, TranslateSample, DXIL::OpCode::SampleCmpLevelZero},
  4794. {IntrinsicOp::MOP_SampleGrad, TranslateSample, DXIL::OpCode::SampleGrad},
  4795. {IntrinsicOp::MOP_SampleLevel, TranslateSample, DXIL::OpCode::SampleLevel},
  4796. {IntrinsicOp::MOP_Gather, TranslateGather, DXIL::OpCode::TextureGather},
  4797. {IntrinsicOp::MOP_GatherAlpha, TranslateGather, DXIL::OpCode::TextureGather},
  4798. {IntrinsicOp::MOP_GatherBlue, TranslateGather, DXIL::OpCode::TextureGather},
  4799. {IntrinsicOp::MOP_GatherCmp, TranslateGather, DXIL::OpCode::TextureGatherCmp},
  4800. {IntrinsicOp::MOP_GatherCmpAlpha, TranslateGather, DXIL::OpCode::TextureGatherCmp},
  4801. {IntrinsicOp::MOP_GatherCmpBlue, TranslateGather, DXIL::OpCode::TextureGatherCmp},
  4802. {IntrinsicOp::MOP_GatherCmpGreen, TranslateGather, DXIL::OpCode::TextureGatherCmp},
  4803. {IntrinsicOp::MOP_GatherCmpRed, TranslateGather, DXIL::OpCode::TextureGatherCmp},
  4804. {IntrinsicOp::MOP_GatherGreen, TranslateGather, DXIL::OpCode::TextureGather},
  4805. {IntrinsicOp::MOP_GatherRed, TranslateGather, DXIL::OpCode::TextureGather},
  4806. {IntrinsicOp::MOP_GetSamplePosition, TranslateGetSamplePosition, DXIL::OpCode::NumOpCodes},
  4807. {IntrinsicOp::MOP_Load2, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
  4808. {IntrinsicOp::MOP_Load3, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
  4809. {IntrinsicOp::MOP_Load4, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
  4810. {IntrinsicOp::MOP_InterlockedAdd, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4811. {IntrinsicOp::MOP_InterlockedAnd, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4812. {IntrinsicOp::MOP_InterlockedCompareExchange, TranslateMopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
  4813. {IntrinsicOp::MOP_InterlockedCompareStore, TranslateMopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
  4814. {IntrinsicOp::MOP_InterlockedExchange, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4815. {IntrinsicOp::MOP_InterlockedMax, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4816. {IntrinsicOp::MOP_InterlockedMin, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4817. {IntrinsicOp::MOP_InterlockedOr, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4818. {IntrinsicOp::MOP_InterlockedXor, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4819. {IntrinsicOp::MOP_Store, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
  4820. {IntrinsicOp::MOP_Store2, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
  4821. {IntrinsicOp::MOP_Store3, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
  4822. {IntrinsicOp::MOP_Store4, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
  4823. {IntrinsicOp::MOP_DecrementCounter, GenerateUpdateCounter, DXIL::OpCode::NumOpCodes},
  4824. {IntrinsicOp::MOP_IncrementCounter, GenerateUpdateCounter, DXIL::OpCode::NumOpCodes},
  4825. {IntrinsicOp::MOP_Consume, EmptyLower, DXIL::OpCode::NumOpCodes},
  4826. {IntrinsicOp::MOP_WriteSamplerFeedback, TranslateWriteSamplerFeedback, DXIL::OpCode::WriteSamplerFeedback},
  4827. {IntrinsicOp::MOP_WriteSamplerFeedbackBias, TranslateWriteSamplerFeedback, DXIL::OpCode::WriteSamplerFeedbackBias},
  4828. {IntrinsicOp::MOP_WriteSamplerFeedbackGrad, TranslateWriteSamplerFeedback, DXIL::OpCode::WriteSamplerFeedbackGrad},
  4829. {IntrinsicOp::MOP_WriteSamplerFeedbackLevel, TranslateWriteSamplerFeedback, DXIL::OpCode::WriteSamplerFeedbackLevel},
  4830. {IntrinsicOp::MOP_Abort, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_Abort},
  4831. {IntrinsicOp::MOP_CandidateGeometryIndex, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CandidateGeometryIndex},
  4832. {IntrinsicOp::MOP_CandidateInstanceContributionToHitGroupIndex, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CandidateInstanceContributionToHitGroupIndex},
  4833. {IntrinsicOp::MOP_CandidateInstanceID, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CandidateInstanceID},
  4834. {IntrinsicOp::MOP_CandidateInstanceIndex, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CandidateInstanceIndex},
  4835. {IntrinsicOp::MOP_CandidateObjectRayDirection, TranslateRayQueryFloat3Getter, DXIL::OpCode::RayQuery_CandidateObjectRayDirection},
  4836. {IntrinsicOp::MOP_CandidateObjectRayOrigin, TranslateRayQueryFloat3Getter, DXIL::OpCode::RayQuery_CandidateObjectRayOrigin},
  4837. {IntrinsicOp::MOP_CandidateObjectToWorld3x4, TranslateRayQueryMatrix3x4Operation, DXIL::OpCode::RayQuery_CandidateObjectToWorld3x4},
  4838. {IntrinsicOp::MOP_CandidateObjectToWorld4x3, TranslateRayQueryTransposedMatrix3x4Operation, DXIL::OpCode::RayQuery_CandidateObjectToWorld3x4},
  4839. {IntrinsicOp::MOP_CandidatePrimitiveIndex, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CandidatePrimitiveIndex},
  4840. {IntrinsicOp::MOP_CandidateProceduralPrimitiveNonOpaque, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CandidateProceduralPrimitiveNonOpaque},
  4841. {IntrinsicOp::MOP_CandidateTriangleBarycentrics, TranslateRayQueryFloat2Getter, DXIL::OpCode::RayQuery_CandidateTriangleBarycentrics},
  4842. {IntrinsicOp::MOP_CandidateTriangleFrontFace, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CandidateTriangleFrontFace},
  4843. {IntrinsicOp::MOP_CandidateTriangleRayT, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CandidateTriangleRayT},
  4844. {IntrinsicOp::MOP_CandidateType, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CandidateType},
  4845. {IntrinsicOp::MOP_CandidateWorldToObject3x4, TranslateRayQueryMatrix3x4Operation, DXIL::OpCode::RayQuery_CandidateWorldToObject3x4},
  4846. {IntrinsicOp::MOP_CandidateWorldToObject4x3, TranslateRayQueryTransposedMatrix3x4Operation, DXIL::OpCode::RayQuery_CandidateWorldToObject3x4},
  4847. {IntrinsicOp::MOP_CommitNonOpaqueTriangleHit, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CommitNonOpaqueTriangleHit},
  4848. {IntrinsicOp::MOP_CommitProceduralPrimitiveHit, TranslateCommitProceduralPrimitiveHit, DXIL::OpCode::RayQuery_CommitProceduralPrimitiveHit},
  4849. {IntrinsicOp::MOP_CommittedGeometryIndex, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CommittedGeometryIndex},
  4850. {IntrinsicOp::MOP_CommittedInstanceContributionToHitGroupIndex, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CommittedInstanceContributionToHitGroupIndex},
  4851. {IntrinsicOp::MOP_CommittedInstanceID, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CommittedInstanceID},
  4852. {IntrinsicOp::MOP_CommittedInstanceIndex, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CommittedInstanceIndex},
  4853. {IntrinsicOp::MOP_CommittedObjectRayDirection, TranslateRayQueryFloat3Getter, DXIL::OpCode::RayQuery_CommittedObjectRayDirection},
  4854. {IntrinsicOp::MOP_CommittedObjectRayOrigin, TranslateRayQueryFloat3Getter, DXIL::OpCode::RayQuery_CommittedObjectRayOrigin},
  4855. {IntrinsicOp::MOP_CommittedObjectToWorld3x4, TranslateRayQueryMatrix3x4Operation, DXIL::OpCode::RayQuery_CommittedObjectToWorld3x4},
  4856. {IntrinsicOp::MOP_CommittedObjectToWorld4x3, TranslateRayQueryTransposedMatrix3x4Operation, DXIL::OpCode::RayQuery_CommittedObjectToWorld3x4},
  4857. {IntrinsicOp::MOP_CommittedPrimitiveIndex, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CommittedPrimitiveIndex},
  4858. {IntrinsicOp::MOP_CommittedRayT, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CommittedRayT},
  4859. {IntrinsicOp::MOP_CommittedStatus, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CommittedStatus},
  4860. {IntrinsicOp::MOP_CommittedTriangleBarycentrics, TranslateRayQueryFloat2Getter, DXIL::OpCode::RayQuery_CommittedTriangleBarycentrics},
  4861. {IntrinsicOp::MOP_CommittedTriangleFrontFace, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CommittedTriangleFrontFace},
  4862. {IntrinsicOp::MOP_CommittedWorldToObject3x4, TranslateRayQueryMatrix3x4Operation, DXIL::OpCode::RayQuery_CommittedWorldToObject3x4},
  4863. {IntrinsicOp::MOP_CommittedWorldToObject4x3, TranslateRayQueryTransposedMatrix3x4Operation, DXIL::OpCode::RayQuery_CommittedWorldToObject3x4},
  4864. {IntrinsicOp::MOP_Proceed, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_Proceed},
  4865. {IntrinsicOp::MOP_RayFlags, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_RayFlags},
  4866. {IntrinsicOp::MOP_RayTMin, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_RayTMin},
  4867. {IntrinsicOp::MOP_TraceRayInline, TranslateTraceRayInline, DXIL::OpCode::RayQuery_TraceRayInline},
  4868. {IntrinsicOp::MOP_WorldRayDirection, TranslateRayQueryFloat3Getter, DXIL::OpCode::RayQuery_WorldRayDirection},
  4869. {IntrinsicOp::MOP_WorldRayOrigin, TranslateRayQueryFloat3Getter, DXIL::OpCode::RayQuery_WorldRayOrigin},
  4870. // SPIRV change starts
  4871. #ifdef ENABLE_SPIRV_CODEGEN
  4872. {IntrinsicOp::MOP_SubpassLoad, UnsupportedVulkanIntrinsic, DXIL::OpCode::NumOpCodes},
  4873. #endif // ENABLE_SPIRV_CODEGEN
  4874. // SPIRV change ends
  4875. // Manully added part.
  4876. { IntrinsicOp::IOP_InterlockedUMax, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes },
  4877. { IntrinsicOp::IOP_InterlockedUMin, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes },
  4878. { IntrinsicOp::IOP_WaveActiveUMax, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp },
  4879. { IntrinsicOp::IOP_WaveActiveUMin, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp },
  4880. { IntrinsicOp::IOP_WaveActiveUProduct, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp },
  4881. { IntrinsicOp::IOP_WaveActiveUSum, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp },
  4882. { IntrinsicOp::IOP_WaveMultiPrefixUProduct, TranslateWaveMultiPrefix, DXIL::OpCode::WaveMultiPrefixOp },
  4883. { IntrinsicOp::IOP_WaveMultiPrefixUSum, TranslateWaveMultiPrefix, DXIL::OpCode::WaveMultiPrefixOp },
  4884. { IntrinsicOp::IOP_WavePrefixUProduct, TranslateWaveA2A, DXIL::OpCode::WavePrefixOp },
  4885. { IntrinsicOp::IOP_WavePrefixUSum, TranslateWaveA2A, DXIL::OpCode::WavePrefixOp },
  4886. { IntrinsicOp::IOP_uabs, TranslateUAbs, DXIL::OpCode::NumOpCodes },
  4887. { IntrinsicOp::IOP_uclamp, TranslateClamp, DXIL::OpCode::NumOpCodes },
  4888. { IntrinsicOp::IOP_ufirstbithigh, TranslateFirstbitHi, DXIL::OpCode::FirstbitHi },
  4889. { IntrinsicOp::IOP_umad, TranslateFUITrinary, DXIL::OpCode::UMad},
  4890. { IntrinsicOp::IOP_umax, TranslateFUIBinary, DXIL::OpCode::UMax},
  4891. { IntrinsicOp::IOP_umin, TranslateFUIBinary, DXIL::OpCode::UMin },
  4892. { IntrinsicOp::IOP_umul, TranslateMul, DXIL::OpCode::UMul },
  4893. { IntrinsicOp::IOP_usign, TranslateUSign, DXIL::OpCode::UMax },
  4894. { IntrinsicOp::MOP_InterlockedUMax, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes },
  4895. { IntrinsicOp::MOP_InterlockedUMin, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes },
  4896. };
  4897. }
  4898. static_assert(sizeof(gLowerTable) / sizeof(gLowerTable[0]) == static_cast<size_t>(IntrinsicOp::Num_Intrinsics),
  4899. "Intrinsic lowering table must be updated to account for new intrinsics.");
  4900. static void TranslateBuiltinIntrinsic(CallInst *CI,
  4901. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  4902. unsigned opcode = hlsl::GetHLOpcode(CI);
  4903. const IntrinsicLower &lower = gLowerTable[opcode];
  4904. Value *Result =
  4905. lower.LowerFunc(CI, lower.IntriOpcode, lower.DxilOpcode, helper, pObjHelper, Translated);
  4906. if (Result)
  4907. CI->replaceAllUsesWith(Result);
  4908. }
  4909. // SharedMem.
  4910. namespace {
  4911. bool IsSharedMemPtr(Value *Ptr) {
  4912. return Ptr->getType()->getPointerAddressSpace() == DXIL::kTGSMAddrSpace;
  4913. }
  4914. bool IsLocalVariablePtr(Value *Ptr) {
  4915. while (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
  4916. Ptr = GEP->getPointerOperand();
  4917. }
  4918. bool isAlloca = isa<AllocaInst>(Ptr);
  4919. if (isAlloca) return true;
  4920. GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr);
  4921. if (!GV) return false;
  4922. return GV->getLinkage() == GlobalValue::LinkageTypes::InternalLinkage;
  4923. }
  4924. }
  4925. // Constant buffer.
  4926. namespace {
  4927. unsigned GetEltTypeByteSizeForConstBuf(Type *EltType, const DataLayout &DL) {
  4928. DXASSERT(EltType->isIntegerTy() || EltType->isFloatingPointTy(),
  4929. "not an element type");
  4930. // TODO: Use real size after change constant buffer into linear layout.
  4931. if (DL.getTypeSizeInBits(EltType) <= 32) {
  4932. // Constant buffer is 4 bytes align.
  4933. return 4;
  4934. } else
  4935. return 8;
  4936. }
  4937. Value *GenerateCBLoad(Value *handle, Value *offset, Type *EltTy, OP *hlslOP,
  4938. IRBuilder<> &Builder) {
  4939. Constant *OpArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CBufferLoad);
  4940. DXASSERT(!EltTy->isIntegerTy(1), "Bools should not be loaded as their register representation.");
  4941. // Align to 8 bytes for now.
  4942. Constant *align = hlslOP->GetU32Const(8);
  4943. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoad, EltTy);
  4944. return Builder.CreateCall(CBLoad, {OpArg, handle, offset, align});
  4945. }
  4946. Value *TranslateConstBufMatLd(Type *matType, Value *handle, Value *offset,
  4947. bool colMajor, OP *OP, const DataLayout &DL,
  4948. IRBuilder<> &Builder) {
  4949. HLMatrixType MatTy = HLMatrixType::cast(matType);
  4950. Type *EltTy = MatTy.getElementTypeForMem();
  4951. unsigned matSize = MatTy.getNumElements();
  4952. std::vector<Value *> elts(matSize);
  4953. Value *EltByteSize = ConstantInt::get(
  4954. offset->getType(), GetEltTypeByteSizeForConstBuf(EltTy, DL));
  4955. // TODO: use real size after change constant buffer into linear layout.
  4956. Value *baseOffset = offset;
  4957. for (unsigned i = 0; i < matSize; i++) {
  4958. elts[i] = GenerateCBLoad(handle, baseOffset, EltTy, OP, Builder);
  4959. baseOffset = Builder.CreateAdd(baseOffset, EltByteSize);
  4960. }
  4961. Value* Vec = HLMatrixLower::BuildVector(EltTy, elts, Builder);
  4962. Vec = MatTy.emitLoweredMemToReg(Vec, Builder);
  4963. return Vec;
  4964. }
  4965. void TranslateCBGep(GetElementPtrInst *GEP, Value *handle, Value *baseOffset,
  4966. hlsl::OP *hlslOP, IRBuilder<> &Builder,
  4967. DxilFieldAnnotation *prevFieldAnnotation,
  4968. const DataLayout &DL, DxilTypeSystem &dxilTypeSys,
  4969. HLObjectOperationLowerHelper *pObjHelper);
  4970. Value *GenerateVecEltFromGEP(Value *ldData, GetElementPtrInst *GEP,
  4971. IRBuilder<> &Builder, bool bInsertLdNextToGEP) {
  4972. DXASSERT(GEP->getNumIndices() == 2, "must have 2 level");
  4973. Value *baseIdx = (GEP->idx_begin())->get();
  4974. Value *zeroIdx = Builder.getInt32(0);
  4975. DXASSERT_LOCALVAR(baseIdx && zeroIdx, baseIdx == zeroIdx,
  4976. "base index must be 0");
  4977. Value *idx = (GEP->idx_begin() + 1)->get();
  4978. if (dyn_cast<ConstantInt>(idx)) {
  4979. return Builder.CreateExtractElement(ldData, idx);
  4980. } else {
  4981. // Dynamic indexing.
  4982. // Copy vec to array.
  4983. Type *Ty = ldData->getType();
  4984. Type *EltTy = Ty->getVectorElementType();
  4985. unsigned vecSize = Ty->getVectorNumElements();
  4986. ArrayType *AT = ArrayType::get(EltTy, vecSize);
  4987. IRBuilder<> AllocaBuilder(
  4988. GEP->getParent()->getParent()->getEntryBlock().getFirstInsertionPt());
  4989. Value *tempArray = AllocaBuilder.CreateAlloca(AT);
  4990. Value *zero = Builder.getInt32(0);
  4991. for (unsigned int i = 0; i < vecSize; i++) {
  4992. Value *Elt = Builder.CreateExtractElement(ldData, Builder.getInt32(i));
  4993. Value *Ptr =
  4994. Builder.CreateInBoundsGEP(tempArray, {zero, Builder.getInt32(i)});
  4995. Builder.CreateStore(Elt, Ptr);
  4996. }
  4997. // Load from temp array.
  4998. if (bInsertLdNextToGEP) {
  4999. // Insert the new GEP just before the old and to-be-deleted GEP
  5000. Builder.SetInsertPoint(GEP);
  5001. }
  5002. Value *EltGEP = Builder.CreateInBoundsGEP(tempArray, {zero, idx});
  5003. return Builder.CreateLoad(EltGEP);
  5004. }
  5005. }
  5006. void TranslateResourceInCB(LoadInst *LI,
  5007. HLObjectOperationLowerHelper *pObjHelper,
  5008. GlobalVariable *CbGV) {
  5009. if (LI->user_empty()) {
  5010. LI->eraseFromParent();
  5011. return;
  5012. }
  5013. GetElementPtrInst *Ptr = cast<GetElementPtrInst>(LI->getPointerOperand());
  5014. CallInst *CI = cast<CallInst>(LI->user_back());
  5015. CallInst *Anno = cast<CallInst>(CI->user_back());
  5016. DxilResourceProperties RP = pObjHelper->GetResPropsFromAnnotateHandle(Anno);
  5017. Value *ResPtr = pObjHelper->GetOrCreateResourceForCbPtr(Ptr, CbGV, RP);
  5018. // Lower Ptr to GV base Ptr.
  5019. Value *GvPtr = pObjHelper->LowerCbResourcePtr(Ptr, ResPtr);
  5020. IRBuilder<> Builder(LI);
  5021. Value *GvLd = Builder.CreateLoad(GvPtr);
  5022. LI->replaceAllUsesWith(GvLd);
  5023. LI->eraseFromParent();
  5024. }
  5025. void TranslateCBAddressUser(Instruction *user, Value *handle, Value *baseOffset,
  5026. hlsl::OP *hlslOP,
  5027. DxilFieldAnnotation *prevFieldAnnotation,
  5028. DxilTypeSystem &dxilTypeSys, const DataLayout &DL,
  5029. HLObjectOperationLowerHelper *pObjHelper) {
  5030. IRBuilder<> Builder(user);
  5031. if (CallInst *CI = dyn_cast<CallInst>(user)) {
  5032. HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
  5033. unsigned opcode = GetHLOpcode(CI);
  5034. if (group == HLOpcodeGroup::HLMatLoadStore) {
  5035. HLMatLoadStoreOpcode matOp = static_cast<HLMatLoadStoreOpcode>(opcode);
  5036. bool colMajor = matOp == HLMatLoadStoreOpcode::ColMatLoad;
  5037. DXASSERT(matOp == HLMatLoadStoreOpcode::ColMatLoad ||
  5038. matOp == HLMatLoadStoreOpcode::RowMatLoad,
  5039. "No store on cbuffer");
  5040. Type *matType = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx)
  5041. ->getType()
  5042. ->getPointerElementType();
  5043. Value *newLd = TranslateConstBufMatLd(matType, handle, baseOffset,
  5044. colMajor, hlslOP, DL, Builder);
  5045. CI->replaceAllUsesWith(newLd);
  5046. CI->eraseFromParent();
  5047. } else if (group == HLOpcodeGroup::HLSubscript) {
  5048. HLSubscriptOpcode subOp = static_cast<HLSubscriptOpcode>(opcode);
  5049. Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx);
  5050. HLMatrixType MatTy = HLMatrixType::cast(basePtr->getType()->getPointerElementType());
  5051. Type *EltTy = MatTy.getElementTypeForReg();
  5052. Value *EltByteSize = ConstantInt::get(
  5053. baseOffset->getType(), GetEltTypeByteSizeForConstBuf(EltTy, DL));
  5054. Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx);
  5055. Type *resultType = CI->getType()->getPointerElementType();
  5056. unsigned resultSize = 1;
  5057. if (resultType->isVectorTy())
  5058. resultSize = resultType->getVectorNumElements();
  5059. DXASSERT(resultSize <= 16, "up to 4x4 elements in vector or matrix");
  5060. _Analysis_assume_(resultSize <= 16);
  5061. Value *idxList[16];
  5062. switch (subOp) {
  5063. case HLSubscriptOpcode::ColMatSubscript:
  5064. case HLSubscriptOpcode::RowMatSubscript: {
  5065. for (unsigned i = 0; i < resultSize; i++) {
  5066. Value *idx =
  5067. CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx + i);
  5068. Value *offset = Builder.CreateMul(idx, EltByteSize);
  5069. idxList[i] = Builder.CreateAdd(baseOffset, offset);
  5070. }
  5071. } break;
  5072. case HLSubscriptOpcode::RowMatElement:
  5073. case HLSubscriptOpcode::ColMatElement: {
  5074. Constant *EltIdxs = cast<Constant>(idx);
  5075. for (unsigned i = 0; i < resultSize; i++) {
  5076. Value *offset =
  5077. Builder.CreateMul(EltIdxs->getAggregateElement(i), EltByteSize);
  5078. idxList[i] = Builder.CreateAdd(baseOffset, offset);
  5079. }
  5080. } break;
  5081. default:
  5082. DXASSERT(0, "invalid operation on const buffer");
  5083. break;
  5084. }
  5085. Value *ldData = UndefValue::get(resultType);
  5086. if (resultType->isVectorTy()) {
  5087. for (unsigned i = 0; i < resultSize; i++) {
  5088. Value *eltData =
  5089. GenerateCBLoad(handle, idxList[i], EltTy, hlslOP, Builder);
  5090. ldData = Builder.CreateInsertElement(ldData, eltData, i);
  5091. }
  5092. } else {
  5093. ldData = GenerateCBLoad(handle, idxList[0], EltTy, hlslOP, Builder);
  5094. }
  5095. for (auto U = CI->user_begin(); U != CI->user_end();) {
  5096. Value *subsUser = *(U++);
  5097. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(subsUser)) {
  5098. Value *subData = GenerateVecEltFromGEP(ldData, GEP, Builder,
  5099. /*bInsertLdNextToGEP*/ true);
  5100. for (auto gepU = GEP->user_begin(); gepU != GEP->user_end();) {
  5101. Value *gepUser = *(gepU++);
  5102. // Must be load here;
  5103. LoadInst *ldUser = cast<LoadInst>(gepUser);
  5104. ldUser->replaceAllUsesWith(subData);
  5105. ldUser->eraseFromParent();
  5106. }
  5107. GEP->eraseFromParent();
  5108. } else {
  5109. // Must be load here.
  5110. LoadInst *ldUser = cast<LoadInst>(subsUser);
  5111. ldUser->replaceAllUsesWith(ldData);
  5112. ldUser->eraseFromParent();
  5113. }
  5114. }
  5115. CI->eraseFromParent();
  5116. } else {
  5117. DXASSERT(0, "not implemented yet");
  5118. }
  5119. } else if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) {
  5120. Type *Ty = ldInst->getType();
  5121. Type *EltTy = Ty->getScalarType();
  5122. // Resource inside cbuffer is lowered after GenerateDxilOperations.
  5123. if (dxilutil::IsHLSLObjectType(Ty)) {
  5124. CallInst *CI = cast<CallInst>(handle);
  5125. GlobalVariable *CbGV = cast<GlobalVariable>(
  5126. CI->getArgOperand(HLOperandIndex::kCreateHandleResourceOpIdx));
  5127. TranslateResourceInCB(ldInst, pObjHelper, CbGV);
  5128. return;
  5129. }
  5130. DXASSERT(!Ty->isAggregateType(), "should be flat in previous pass");
  5131. unsigned EltByteSize = GetEltTypeByteSizeForConstBuf(EltTy, DL);
  5132. Value *newLd = GenerateCBLoad(handle, baseOffset, EltTy, hlslOP, Builder);
  5133. if (Ty->isVectorTy()) {
  5134. Value *result = UndefValue::get(Ty);
  5135. result = Builder.CreateInsertElement(result, newLd, (uint64_t)0);
  5136. // Update offset by 4 bytes.
  5137. Value *offset =
  5138. Builder.CreateAdd(baseOffset, hlslOP->GetU32Const(EltByteSize));
  5139. for (unsigned i = 1; i < Ty->getVectorNumElements(); i++) {
  5140. Value *elt = GenerateCBLoad(handle, offset, EltTy, hlslOP, Builder);
  5141. result = Builder.CreateInsertElement(result, elt, i);
  5142. // Update offset by 4 bytes.
  5143. offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(EltByteSize));
  5144. }
  5145. newLd = result;
  5146. }
  5147. ldInst->replaceAllUsesWith(newLd);
  5148. ldInst->eraseFromParent();
  5149. } else {
  5150. // Must be GEP here
  5151. GetElementPtrInst *GEP = cast<GetElementPtrInst>(user);
  5152. TranslateCBGep(GEP, handle, baseOffset, hlslOP, Builder,
  5153. prevFieldAnnotation, DL, dxilTypeSys, pObjHelper);
  5154. GEP->eraseFromParent();
  5155. }
  5156. }
  5157. void TranslateCBGep(GetElementPtrInst *GEP, Value *handle, Value *baseOffset,
  5158. hlsl::OP *hlslOP, IRBuilder<> &Builder,
  5159. DxilFieldAnnotation *prevFieldAnnotation,
  5160. const DataLayout &DL, DxilTypeSystem &dxilTypeSys,
  5161. HLObjectOperationLowerHelper *pObjHelper) {
  5162. SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end());
  5163. Value *offset = baseOffset;
  5164. // update offset
  5165. DxilFieldAnnotation *fieldAnnotation = prevFieldAnnotation;
  5166. gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP);
  5167. for (; GEPIt != E; GEPIt++) {
  5168. Value *idx = GEPIt.getOperand();
  5169. unsigned immIdx = 0;
  5170. bool bImmIdx = false;
  5171. if (Constant *constIdx = dyn_cast<Constant>(idx)) {
  5172. immIdx = constIdx->getUniqueInteger().getLimitedValue();
  5173. bImmIdx = true;
  5174. }
  5175. if (GEPIt->isPointerTy()) {
  5176. Type *EltTy = GEPIt->getPointerElementType();
  5177. unsigned size = 0;
  5178. if (StructType *ST = dyn_cast<StructType>(EltTy)) {
  5179. DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST);
  5180. size = annotation->GetCBufferSize();
  5181. } else {
  5182. DXASSERT(fieldAnnotation, "must be a field");
  5183. if (ArrayType *AT = dyn_cast<ArrayType>(EltTy)) {
  5184. unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize(
  5185. *fieldAnnotation, EltTy, dxilTypeSys);
  5186. // Decide the nested array size.
  5187. unsigned nestedArraySize = 1;
  5188. Type *EltTy = AT->getArrayElementType();
  5189. // support multi level of array
  5190. while (EltTy->isArrayTy()) {
  5191. ArrayType *EltAT = cast<ArrayType>(EltTy);
  5192. nestedArraySize *= EltAT->getNumElements();
  5193. EltTy = EltAT->getElementType();
  5194. }
  5195. // Align to 4 * 4 bytes.
  5196. unsigned alignedSize = (EltSize + 15) & 0xfffffff0;
  5197. size = nestedArraySize * alignedSize;
  5198. } else {
  5199. size = DL.getTypeAllocSize(EltTy);
  5200. }
  5201. }
  5202. // Align to 4 * 4 bytes.
  5203. size = (size + 15) & 0xfffffff0;
  5204. if (bImmIdx) {
  5205. unsigned tempOffset = size * immIdx;
  5206. offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(tempOffset));
  5207. } else {
  5208. Value *tempOffset = Builder.CreateMul(idx, hlslOP->GetU32Const(size));
  5209. offset = Builder.CreateAdd(offset, tempOffset);
  5210. }
  5211. } else if (GEPIt->isStructTy()) {
  5212. StructType *ST = cast<StructType>(*GEPIt);
  5213. DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST);
  5214. fieldAnnotation = &annotation->GetFieldAnnotation(immIdx);
  5215. unsigned structOffset = fieldAnnotation->GetCBufferOffset();
  5216. offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(structOffset));
  5217. } else if (GEPIt->isArrayTy()) {
  5218. DXASSERT(fieldAnnotation != nullptr, "must a field");
  5219. unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize(
  5220. *fieldAnnotation, *GEPIt, dxilTypeSys);
  5221. // Decide the nested array size.
  5222. unsigned nestedArraySize = 1;
  5223. Type *EltTy = GEPIt->getArrayElementType();
  5224. // support multi level of array
  5225. while (EltTy->isArrayTy()) {
  5226. ArrayType *EltAT = cast<ArrayType>(EltTy);
  5227. nestedArraySize *= EltAT->getNumElements();
  5228. EltTy = EltAT->getElementType();
  5229. }
  5230. // Align to 4 * 4 bytes.
  5231. unsigned alignedSize = (EltSize + 15) & 0xfffffff0;
  5232. unsigned size = nestedArraySize * alignedSize;
  5233. if (bImmIdx) {
  5234. unsigned tempOffset = size * immIdx;
  5235. offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(tempOffset));
  5236. } else {
  5237. Value *tempOffset = Builder.CreateMul(idx, hlslOP->GetU32Const(size));
  5238. offset = Builder.CreateAdd(offset, tempOffset);
  5239. }
  5240. } else if (GEPIt->isVectorTy()) {
  5241. unsigned size = DL.getTypeAllocSize(GEPIt->getVectorElementType());
  5242. if (bImmIdx) {
  5243. unsigned tempOffset = size * immIdx;
  5244. offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(tempOffset));
  5245. } else {
  5246. Value *tempOffset = Builder.CreateMul(idx, hlslOP->GetU32Const(size));
  5247. offset = Builder.CreateAdd(offset, tempOffset);
  5248. }
  5249. } else {
  5250. gep_type_iterator temp = GEPIt;
  5251. temp++;
  5252. DXASSERT(temp == E, "scalar type must be the last");
  5253. }
  5254. }
  5255. for (auto U = GEP->user_begin(); U != GEP->user_end();) {
  5256. Instruction *user = cast<Instruction>(*(U++));
  5257. TranslateCBAddressUser(user, handle, offset, hlslOP, fieldAnnotation,
  5258. dxilTypeSys, DL, pObjHelper);
  5259. }
  5260. }
  5261. void TranslateCBOperations(Value *handle, Value *ptr, Value *offset, OP *hlslOP,
  5262. DxilTypeSystem &dxilTypeSys, const DataLayout &DL,
  5263. HLObjectOperationLowerHelper *pObjHelper) {
  5264. auto User = ptr->user_begin();
  5265. auto UserE = ptr->user_end();
  5266. for (; User != UserE;) {
  5267. // Must be Instruction.
  5268. Instruction *I = cast<Instruction>(*(User++));
  5269. TranslateCBAddressUser(I, handle, offset, hlslOP,
  5270. /*prevFieldAnnotation*/ nullptr, dxilTypeSys, DL,
  5271. pObjHelper);
  5272. }
  5273. }
  5274. Value *GenerateCBLoadLegacy(Value *handle, Value *legacyIdx,
  5275. unsigned channelOffset, Type *EltTy, OP *hlslOP,
  5276. IRBuilder<> &Builder) {
  5277. Constant *OpArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CBufferLoadLegacy);
  5278. DXASSERT(!EltTy->isIntegerTy(1), "Bools should not be loaded as their register representation.");
  5279. Type *doubleTy = Type::getDoubleTy(EltTy->getContext());
  5280. Type *halfTy = Type::getHalfTy(EltTy->getContext());
  5281. Type *i64Ty = Type::getInt64Ty(EltTy->getContext());
  5282. Type *i16Ty = Type::getInt16Ty(EltTy->getContext());
  5283. bool is64 = (EltTy == doubleTy) | (EltTy == i64Ty);
  5284. bool is16 = (EltTy == halfTy || EltTy == i16Ty) && !hlslOP->UseMinPrecision();
  5285. DXASSERT_LOCALVAR(is16, (is16 && channelOffset < 8) || channelOffset < 4,
  5286. "legacy cbuffer don't across 16 bytes register.");
  5287. if (is64) {
  5288. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
  5289. Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
  5290. DXASSERT((channelOffset&1)==0,"channel offset must be even for double");
  5291. unsigned eltIdx = channelOffset>>1;
  5292. Value *Result = Builder.CreateExtractValue(loadLegacy, eltIdx);
  5293. return Result;
  5294. } else {
  5295. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
  5296. Value *loadLegacy = Builder.CreateCall(CBLoad, { OpArg, handle, legacyIdx });
  5297. return Builder.CreateExtractValue(loadLegacy, channelOffset);
  5298. }
  5299. }
  5300. Value *GenerateCBLoadLegacy(Value *handle, Value *legacyIdx,
  5301. unsigned channelOffset, Type *EltTy,
  5302. unsigned vecSize, OP *hlslOP,
  5303. IRBuilder<> &Builder) {
  5304. Constant *OpArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CBufferLoadLegacy);
  5305. DXASSERT(!EltTy->isIntegerTy(1), "Bools should not be loaded as their register representation.");
  5306. Type *doubleTy = Type::getDoubleTy(EltTy->getContext());
  5307. Type *i64Ty = Type::getInt64Ty(EltTy->getContext());
  5308. Type *halfTy = Type::getHalfTy(EltTy->getContext());
  5309. Type *shortTy = Type::getInt16Ty(EltTy->getContext());
  5310. bool is64 = (EltTy == doubleTy) | (EltTy == i64Ty);
  5311. bool is16 = (EltTy == shortTy || EltTy == halfTy) && !hlslOP->UseMinPrecision();
  5312. DXASSERT((is16 && channelOffset + vecSize <= 8) ||
  5313. (channelOffset + vecSize) <= 4,
  5314. "legacy cbuffer don't across 16 bytes register.");
  5315. if (is16) {
  5316. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
  5317. Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
  5318. Value *Result = UndefValue::get(VectorType::get(EltTy, vecSize));
  5319. for (unsigned i = 0; i < vecSize; ++i) {
  5320. Value *NewElt = Builder.CreateExtractValue(loadLegacy, channelOffset + i);
  5321. Result = Builder.CreateInsertElement(Result, NewElt, i);
  5322. }
  5323. return Result;
  5324. } else if (is64) {
  5325. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
  5326. Value *loadLegacy = Builder.CreateCall(CBLoad, { OpArg, handle, legacyIdx });
  5327. Value *Result = UndefValue::get(VectorType::get(EltTy, vecSize));
  5328. unsigned smallVecSize = 2;
  5329. if (vecSize < smallVecSize)
  5330. smallVecSize = vecSize;
  5331. for (unsigned i = 0; i < smallVecSize; ++i) {
  5332. Value *NewElt = Builder.CreateExtractValue(loadLegacy, channelOffset+i);
  5333. Result = Builder.CreateInsertElement(Result, NewElt, i);
  5334. }
  5335. if (vecSize > 2) {
  5336. // Got to next cb register.
  5337. legacyIdx = Builder.CreateAdd(legacyIdx, hlslOP->GetU32Const(1));
  5338. Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
  5339. for (unsigned i = 2; i < vecSize; ++i) {
  5340. Value *NewElt =
  5341. Builder.CreateExtractValue(loadLegacy, i-2);
  5342. Result = Builder.CreateInsertElement(Result, NewElt, i);
  5343. }
  5344. }
  5345. return Result;
  5346. } else {
  5347. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
  5348. Value *loadLegacy = Builder.CreateCall(CBLoad, { OpArg, handle, legacyIdx });
  5349. Value *Result = UndefValue::get(VectorType::get(EltTy, vecSize));
  5350. for (unsigned i = 0; i < vecSize; ++i) {
  5351. Value *NewElt = Builder.CreateExtractValue(loadLegacy, channelOffset + i);
  5352. Result = Builder.CreateInsertElement(Result, NewElt, i);
  5353. }
  5354. return Result;
  5355. }
  5356. }
  5357. Value *TranslateConstBufMatLdLegacy(HLMatrixType MatTy, Value *handle,
  5358. Value *legacyIdx, bool colMajor, OP *OP,
  5359. bool memElemRepr, const DataLayout &DL,
  5360. IRBuilder<> &Builder) {
  5361. Type *EltTy = MatTy.getElementTypeForMem();
  5362. unsigned matSize = MatTy.getNumElements();
  5363. std::vector<Value *> elts(matSize);
  5364. unsigned EltByteSize = GetEltTypeByteSizeForConstBuf(EltTy, DL);
  5365. if (colMajor) {
  5366. unsigned colByteSize = 4 * EltByteSize;
  5367. unsigned colRegSize = (colByteSize + 15) >> 4;
  5368. for (unsigned c = 0; c < MatTy.getNumColumns(); c++) {
  5369. Value *col = GenerateCBLoadLegacy(handle, legacyIdx, /*channelOffset*/ 0,
  5370. EltTy, MatTy.getNumRows(), OP, Builder);
  5371. for (unsigned r = 0; r < MatTy.getNumRows(); r++) {
  5372. unsigned matIdx = MatTy.getColumnMajorIndex(r, c);
  5373. elts[matIdx] = Builder.CreateExtractElement(col, r);
  5374. }
  5375. // Update offset for a column.
  5376. legacyIdx = Builder.CreateAdd(legacyIdx, OP->GetU32Const(colRegSize));
  5377. }
  5378. } else {
  5379. unsigned rowByteSize = 4 * EltByteSize;
  5380. unsigned rowRegSize = (rowByteSize + 15) >> 4;
  5381. for (unsigned r = 0; r < MatTy.getNumRows(); r++) {
  5382. Value *row = GenerateCBLoadLegacy(handle, legacyIdx, /*channelOffset*/ 0,
  5383. EltTy, MatTy.getNumColumns(), OP, Builder);
  5384. for (unsigned c = 0; c < MatTy.getNumColumns(); c++) {
  5385. unsigned matIdx = MatTy.getRowMajorIndex(r, c);
  5386. elts[matIdx] = Builder.CreateExtractElement(row, c);
  5387. }
  5388. // Update offset for a row.
  5389. legacyIdx = Builder.CreateAdd(legacyIdx, OP->GetU32Const(rowRegSize));
  5390. }
  5391. }
  5392. Value *Vec = HLMatrixLower::BuildVector(EltTy, elts, Builder);
  5393. if (!memElemRepr)
  5394. Vec = MatTy.emitLoweredMemToReg(Vec, Builder);
  5395. return Vec;
  5396. }
  5397. void TranslateCBGepLegacy(GetElementPtrInst *GEP, Value *handle,
  5398. Value *legacyIdx, unsigned channelOffset,
  5399. hlsl::OP *hlslOP, IRBuilder<> &Builder,
  5400. DxilFieldAnnotation *prevFieldAnnotation,
  5401. const DataLayout &DL, DxilTypeSystem &dxilTypeSys,
  5402. HLObjectOperationLowerHelper *pObjHelper);
  5403. void TranslateCBAddressUserLegacy(Instruction *user, Value *handle,
  5404. Value *legacyIdx, unsigned channelOffset,
  5405. hlsl::OP *hlslOP,
  5406. DxilFieldAnnotation *prevFieldAnnotation,
  5407. DxilTypeSystem &dxilTypeSys,
  5408. const DataLayout &DL,
  5409. HLObjectOperationLowerHelper *pObjHelper) {
  5410. IRBuilder<> Builder(user);
  5411. if (CallInst *CI = dyn_cast<CallInst>(user)) {
  5412. HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
  5413. unsigned opcode = GetHLOpcode(CI);
  5414. if (group == HLOpcodeGroup::HLMatLoadStore) {
  5415. HLMatLoadStoreOpcode matOp = static_cast<HLMatLoadStoreOpcode>(opcode);
  5416. bool colMajor = matOp == HLMatLoadStoreOpcode::ColMatLoad;
  5417. DXASSERT(matOp == HLMatLoadStoreOpcode::ColMatLoad ||
  5418. matOp == HLMatLoadStoreOpcode::RowMatLoad,
  5419. "No store on cbuffer");
  5420. HLMatrixType MatTy = HLMatrixType::cast(
  5421. CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx)
  5422. ->getType()->getPointerElementType());
  5423. // This will replace a call, so we should use the register representation of elements
  5424. Value *newLd = TranslateConstBufMatLdLegacy(
  5425. MatTy, handle, legacyIdx, colMajor, hlslOP, /*memElemRepr*/false, DL, Builder);
  5426. CI->replaceAllUsesWith(newLd);
  5427. dxilutil::TryScatterDebugValueToVectorElements(newLd);
  5428. CI->eraseFromParent();
  5429. } else if (group == HLOpcodeGroup::HLSubscript) {
  5430. HLSubscriptOpcode subOp = static_cast<HLSubscriptOpcode>(opcode);
  5431. Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx);
  5432. HLMatrixType MatTy = HLMatrixType::cast(basePtr->getType()->getPointerElementType());
  5433. Type *EltTy = MatTy.getElementTypeForReg();
  5434. Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx);
  5435. Type *resultType = CI->getType()->getPointerElementType();
  5436. unsigned resultSize = 1;
  5437. if (resultType->isVectorTy())
  5438. resultSize = resultType->getVectorNumElements();
  5439. DXASSERT(resultSize <= 16, "up to 4x4 elements in vector or matrix");
  5440. _Analysis_assume_(resultSize <= 16);
  5441. Value *idxList[16];
  5442. bool colMajor = subOp == HLSubscriptOpcode::ColMatSubscript ||
  5443. subOp == HLSubscriptOpcode::ColMatElement;
  5444. bool dynamicIndexing = !isa<ConstantInt>(idx) &&
  5445. !isa<ConstantAggregateZero>(idx) &&
  5446. !isa<ConstantDataSequential>(idx);
  5447. Value *ldData = UndefValue::get(resultType);
  5448. if (!dynamicIndexing) {
  5449. // This will replace a load or GEP, so we should use the memory representation of elements
  5450. Value *matLd = TranslateConstBufMatLdLegacy(
  5451. MatTy, handle, legacyIdx, colMajor, hlslOP, /*memElemRepr*/true, DL, Builder);
  5452. // The matLd is keep original layout, just use the idx calc in
  5453. // EmitHLSLMatrixElement and EmitHLSLMatrixSubscript.
  5454. switch (subOp) {
  5455. case HLSubscriptOpcode::RowMatSubscript:
  5456. case HLSubscriptOpcode::ColMatSubscript: {
  5457. for (unsigned i = 0; i < resultSize; i++) {
  5458. idxList[i] =
  5459. CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx + i);
  5460. }
  5461. } break;
  5462. case HLSubscriptOpcode::RowMatElement:
  5463. case HLSubscriptOpcode::ColMatElement: {
  5464. Constant *EltIdxs = cast<Constant>(idx);
  5465. for (unsigned i = 0; i < resultSize; i++) {
  5466. idxList[i] = EltIdxs->getAggregateElement(i);
  5467. }
  5468. } break;
  5469. default:
  5470. DXASSERT(0, "invalid operation on const buffer");
  5471. break;
  5472. }
  5473. if (resultType->isVectorTy()) {
  5474. for (unsigned i = 0; i < resultSize; i++) {
  5475. Value *eltData = Builder.CreateExtractElement(matLd, idxList[i]);
  5476. ldData = Builder.CreateInsertElement(ldData, eltData, i);
  5477. }
  5478. } else {
  5479. Value *eltData = Builder.CreateExtractElement(matLd, idxList[0]);
  5480. ldData = eltData;
  5481. }
  5482. } else {
  5483. // Must be matSub here.
  5484. Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx);
  5485. if (colMajor) {
  5486. // idx is c * row + r.
  5487. // For first col, c is 0, so idx is r.
  5488. Value *one = Builder.getInt32(1);
  5489. // row.x = c[0].[idx]
  5490. // row.y = c[1].[idx]
  5491. // row.z = c[2].[idx]
  5492. // row.w = c[3].[idx]
  5493. Value *Elts[4];
  5494. ArrayType *AT = ArrayType::get(EltTy, MatTy.getNumColumns());
  5495. IRBuilder<> AllocaBuilder(user->getParent()
  5496. ->getParent()
  5497. ->getEntryBlock()
  5498. .getFirstInsertionPt());
  5499. Value *tempArray = AllocaBuilder.CreateAlloca(AT);
  5500. Value *zero = AllocaBuilder.getInt32(0);
  5501. Value *cbufIdx = legacyIdx;
  5502. for (unsigned int c = 0; c < MatTy.getNumColumns(); c++) {
  5503. Value *ColVal =
  5504. GenerateCBLoadLegacy(handle, cbufIdx, /*channelOffset*/ 0,
  5505. EltTy, MatTy.getNumRows(), hlslOP, Builder);
  5506. // Convert ColVal to array for indexing.
  5507. for (unsigned int r = 0; r < MatTy.getNumRows(); r++) {
  5508. Value *Elt =
  5509. Builder.CreateExtractElement(ColVal, Builder.getInt32(r));
  5510. Value *Ptr = Builder.CreateInBoundsGEP(
  5511. tempArray, {zero, Builder.getInt32(r)});
  5512. Builder.CreateStore(Elt, Ptr);
  5513. }
  5514. Value *Ptr = Builder.CreateInBoundsGEP(tempArray, {zero, idx});
  5515. Elts[c] = Builder.CreateLoad(Ptr);
  5516. // Update cbufIdx.
  5517. cbufIdx = Builder.CreateAdd(cbufIdx, one);
  5518. }
  5519. if (resultType->isVectorTy()) {
  5520. for (unsigned int c = 0; c < MatTy.getNumColumns(); c++) {
  5521. ldData = Builder.CreateInsertElement(ldData, Elts[c], c);
  5522. }
  5523. } else {
  5524. ldData = Elts[0];
  5525. }
  5526. } else {
  5527. // idx is r * col + c;
  5528. // r = idx / col;
  5529. Value *cCol = ConstantInt::get(idx->getType(), MatTy.getNumColumns());
  5530. idx = Builder.CreateUDiv(idx, cCol);
  5531. idx = Builder.CreateAdd(idx, legacyIdx);
  5532. // Just return a row; 'col' is the number of columns in the row.
  5533. ldData = GenerateCBLoadLegacy(handle, idx, /*channelOffset*/ 0, EltTy,
  5534. MatTy.getNumColumns(), hlslOP, Builder);
  5535. }
  5536. if (!resultType->isVectorTy()) {
  5537. ldData = Builder.CreateExtractElement(ldData, Builder.getInt32(0));
  5538. }
  5539. }
  5540. for (auto U = CI->user_begin(); U != CI->user_end();) {
  5541. Value *subsUser = *(U++);
  5542. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(subsUser)) {
  5543. Value *subData = GenerateVecEltFromGEP(ldData, GEP, Builder,
  5544. /*bInsertLdNextToGEP*/ true);
  5545. for (auto gepU = GEP->user_begin(); gepU != GEP->user_end();) {
  5546. Value *gepUser = *(gepU++);
  5547. // Must be load here;
  5548. LoadInst *ldUser = cast<LoadInst>(gepUser);
  5549. ldUser->replaceAllUsesWith(subData);
  5550. ldUser->eraseFromParent();
  5551. }
  5552. GEP->eraseFromParent();
  5553. } else {
  5554. // Must be load here.
  5555. LoadInst *ldUser = cast<LoadInst>(subsUser);
  5556. ldUser->replaceAllUsesWith(ldData);
  5557. ldUser->eraseFromParent();
  5558. }
  5559. }
  5560. CI->eraseFromParent();
  5561. } else if (group == HLOpcodeGroup::HLIntrinsic) {
  5562. // FIXME: This case is hit when using built-in structures in constant
  5563. // buffers passed directly to an intrinsic, such as:
  5564. // RayDesc from cbuffer passed to TraceRay.
  5565. DXASSERT(0, "not implemented yet");
  5566. } else {
  5567. DXASSERT(0, "not implemented yet");
  5568. }
  5569. } else if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) {
  5570. Type *Ty = ldInst->getType();
  5571. Type *EltTy = Ty->getScalarType();
  5572. // Resource inside cbuffer is lowered after GenerateDxilOperations.
  5573. if (dxilutil::IsHLSLObjectType(Ty)) {
  5574. CallInst *CI = cast<CallInst>(handle);
  5575. GlobalVariable *CbGV = cast<GlobalVariable>(
  5576. CI->getArgOperand(HLOperandIndex::kCreateHandleResourceOpIdx));
  5577. TranslateResourceInCB(ldInst, pObjHelper, CbGV);
  5578. return;
  5579. }
  5580. DXASSERT(!Ty->isAggregateType(), "should be flat in previous pass");
  5581. Value *newLd = nullptr;
  5582. if (Ty->isVectorTy())
  5583. newLd = GenerateCBLoadLegacy(handle, legacyIdx, channelOffset, EltTy,
  5584. Ty->getVectorNumElements(), hlslOP, Builder);
  5585. else
  5586. newLd = GenerateCBLoadLegacy(handle, legacyIdx, channelOffset, EltTy,
  5587. hlslOP, Builder);
  5588. ldInst->replaceAllUsesWith(newLd);
  5589. dxilutil::TryScatterDebugValueToVectorElements(newLd);
  5590. ldInst->eraseFromParent();
  5591. } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(user)) {
  5592. for (auto it = BCI->user_begin(); it != BCI->user_end(); ) {
  5593. Instruction *I = cast<Instruction>(*it++);
  5594. TranslateCBAddressUserLegacy(I,
  5595. handle, legacyIdx, channelOffset, hlslOP,
  5596. prevFieldAnnotation, dxilTypeSys,
  5597. DL, pObjHelper);
  5598. }
  5599. BCI->eraseFromParent();
  5600. } else {
  5601. // Must be GEP here
  5602. GetElementPtrInst *GEP = cast<GetElementPtrInst>(user);
  5603. TranslateCBGepLegacy(GEP, handle, legacyIdx, channelOffset, hlslOP, Builder,
  5604. prevFieldAnnotation, DL, dxilTypeSys, pObjHelper);
  5605. GEP->eraseFromParent();
  5606. }
  5607. }
  5608. void TranslateCBGepLegacy(GetElementPtrInst *GEP, Value *handle,
  5609. Value *legacyIndex, unsigned channel,
  5610. hlsl::OP *hlslOP, IRBuilder<> &Builder,
  5611. DxilFieldAnnotation *prevFieldAnnotation,
  5612. const DataLayout &DL, DxilTypeSystem &dxilTypeSys,
  5613. HLObjectOperationLowerHelper *pObjHelper) {
  5614. SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end());
  5615. // update offset
  5616. DxilFieldAnnotation *fieldAnnotation = prevFieldAnnotation;
  5617. gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP);
  5618. for (; GEPIt != E; GEPIt++) {
  5619. Value *idx = GEPIt.getOperand();
  5620. unsigned immIdx = 0;
  5621. bool bImmIdx = false;
  5622. if (Constant *constIdx = dyn_cast<Constant>(idx)) {
  5623. immIdx = constIdx->getUniqueInteger().getLimitedValue();
  5624. bImmIdx = true;
  5625. }
  5626. if (GEPIt->isPointerTy()) {
  5627. Type *EltTy = GEPIt->getPointerElementType();
  5628. unsigned size = 0;
  5629. if (StructType *ST = dyn_cast<StructType>(EltTy)) {
  5630. DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST);
  5631. size = annotation->GetCBufferSize();
  5632. } else {
  5633. DXASSERT(fieldAnnotation, "must be a field");
  5634. if (ArrayType *AT = dyn_cast<ArrayType>(EltTy)) {
  5635. unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize(
  5636. *fieldAnnotation, EltTy, dxilTypeSys);
  5637. // Decide the nested array size.
  5638. unsigned nestedArraySize = 1;
  5639. Type *EltTy = AT->getArrayElementType();
  5640. // support multi level of array
  5641. while (EltTy->isArrayTy()) {
  5642. ArrayType *EltAT = cast<ArrayType>(EltTy);
  5643. nestedArraySize *= EltAT->getNumElements();
  5644. EltTy = EltAT->getElementType();
  5645. }
  5646. // Align to 4 * 4 bytes.
  5647. unsigned alignedSize = (EltSize + 15) & 0xfffffff0;
  5648. size = nestedArraySize * alignedSize;
  5649. } else {
  5650. size = DL.getTypeAllocSize(EltTy);
  5651. }
  5652. }
  5653. // Skip 0 idx.
  5654. if (bImmIdx && immIdx == 0)
  5655. continue;
  5656. // Align to 4 * 4 bytes.
  5657. size = (size + 15) & 0xfffffff0;
  5658. // Take this as array idxing.
  5659. if (bImmIdx) {
  5660. unsigned tempOffset = size * immIdx;
  5661. unsigned idxInc = tempOffset >> 4;
  5662. legacyIndex = Builder.CreateAdd(legacyIndex, hlslOP->GetU32Const(idxInc));
  5663. } else {
  5664. Value *idxInc = Builder.CreateMul(idx, hlslOP->GetU32Const(size>>4));
  5665. legacyIndex = Builder.CreateAdd(legacyIndex, idxInc);
  5666. }
  5667. // Array always start from x channel.
  5668. channel = 0;
  5669. } else if (GEPIt->isStructTy()) {
  5670. StructType *ST = cast<StructType>(*GEPIt);
  5671. DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST);
  5672. fieldAnnotation = &annotation->GetFieldAnnotation(immIdx);
  5673. unsigned idxInc = 0;
  5674. unsigned structOffset = 0;
  5675. if (fieldAnnotation->GetCompType().Is16Bit() &&
  5676. !hlslOP->UseMinPrecision()) {
  5677. structOffset = fieldAnnotation->GetCBufferOffset() >> 1;
  5678. channel += structOffset;
  5679. idxInc = channel >> 3;
  5680. channel = channel & 0x7;
  5681. }
  5682. else {
  5683. structOffset = fieldAnnotation->GetCBufferOffset() >> 2;
  5684. channel += structOffset;
  5685. idxInc = channel >> 2;
  5686. channel = channel & 0x3;
  5687. }
  5688. if (idxInc)
  5689. legacyIndex = Builder.CreateAdd(legacyIndex, hlslOP->GetU32Const(idxInc));
  5690. } else if (GEPIt->isArrayTy()) {
  5691. DXASSERT(fieldAnnotation != nullptr, "must a field");
  5692. unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize(
  5693. *fieldAnnotation, *GEPIt, dxilTypeSys);
  5694. // Decide the nested array size.
  5695. unsigned nestedArraySize = 1;
  5696. Type *EltTy = GEPIt->getArrayElementType();
  5697. // support multi level of array
  5698. while (EltTy->isArrayTy()) {
  5699. ArrayType *EltAT = cast<ArrayType>(EltTy);
  5700. nestedArraySize *= EltAT->getNumElements();
  5701. EltTy = EltAT->getElementType();
  5702. }
  5703. // Align to 4 * 4 bytes.
  5704. unsigned alignedSize = (EltSize + 15) & 0xfffffff0;
  5705. unsigned size = nestedArraySize * alignedSize;
  5706. if (bImmIdx) {
  5707. unsigned tempOffset = size * immIdx;
  5708. unsigned idxInc = tempOffset >> 4;
  5709. legacyIndex = Builder.CreateAdd(legacyIndex, hlslOP->GetU32Const(idxInc));
  5710. } else {
  5711. Value *idxInc = Builder.CreateMul(idx, hlslOP->GetU32Const(size>>4));
  5712. legacyIndex = Builder.CreateAdd(legacyIndex, idxInc);
  5713. }
  5714. // Array always start from x channel.
  5715. channel = 0;
  5716. } else if (GEPIt->isVectorTy()) {
  5717. unsigned size = DL.getTypeAllocSize(GEPIt->getVectorElementType());
  5718. // Indexing on vector.
  5719. if (bImmIdx) {
  5720. unsigned tempOffset = size * immIdx;
  5721. if (size == 2) { // 16-bit types
  5722. unsigned channelInc = tempOffset >> 1;
  5723. DXASSERT((channel + channelInc) <= 8, "vector should not cross cb register (8x16bit)");
  5724. channel += channelInc;
  5725. if (channel == 8) {
  5726. // Get to another row.
  5727. // Update index and channel.
  5728. channel = 0;
  5729. legacyIndex = Builder.CreateAdd(legacyIndex, Builder.getInt32(1));
  5730. }
  5731. }
  5732. else {
  5733. unsigned channelInc = tempOffset >> 2;
  5734. DXASSERT((channel + channelInc) <= 4, "vector should not cross cb register (8x32bit)");
  5735. channel += channelInc;
  5736. if (channel == 4) {
  5737. // Get to another row.
  5738. // Update index and channel.
  5739. channel = 0;
  5740. legacyIndex = Builder.CreateAdd(legacyIndex, Builder.getInt32(1));
  5741. }
  5742. }
  5743. } else {
  5744. Type *EltTy = GEPIt->getVectorElementType();
  5745. // Load the whole register.
  5746. Value *newLd = GenerateCBLoadLegacy(handle, legacyIndex,
  5747. /*channelOffset*/ 0, EltTy,
  5748. /*vecSize*/ 4, hlslOP, Builder);
  5749. // Copy to array.
  5750. IRBuilder<> AllocaBuilder(GEP->getParent()->getParent()->getEntryBlock().getFirstInsertionPt());
  5751. Value *tempArray = AllocaBuilder.CreateAlloca(ArrayType::get(EltTy, 4));
  5752. Value *zeroIdx = hlslOP->GetU32Const(0);
  5753. for (unsigned i = 0; i < 4; i++) {
  5754. Value *Elt = Builder.CreateExtractElement(newLd, i);
  5755. Value *EltGEP = Builder.CreateInBoundsGEP(tempArray, {zeroIdx, hlslOP->GetU32Const(i)});
  5756. Builder.CreateStore(Elt, EltGEP);
  5757. }
  5758. // Make sure this is the end of GEP.
  5759. gep_type_iterator temp = GEPIt;
  5760. temp++;
  5761. DXASSERT(temp == E, "scalar type must be the last");
  5762. // Replace the GEP with array GEP.
  5763. Value *ArrayGEP = Builder.CreateInBoundsGEP(tempArray, {zeroIdx, idx});
  5764. GEP->replaceAllUsesWith(ArrayGEP);
  5765. return;
  5766. }
  5767. } else {
  5768. gep_type_iterator temp = GEPIt;
  5769. temp++;
  5770. DXASSERT(temp == E, "scalar type must be the last");
  5771. }
  5772. }
  5773. for (auto U = GEP->user_begin(); U != GEP->user_end();) {
  5774. Instruction *user = cast<Instruction>(*(U++));
  5775. TranslateCBAddressUserLegacy(user, handle, legacyIndex, channel, hlslOP, fieldAnnotation,
  5776. dxilTypeSys, DL, pObjHelper);
  5777. }
  5778. }
  5779. void TranslateCBOperationsLegacy(Value *handle, Value *ptr, OP *hlslOP,
  5780. DxilTypeSystem &dxilTypeSys,
  5781. const DataLayout &DL,
  5782. HLObjectOperationLowerHelper *pObjHelper) {
  5783. auto User = ptr->user_begin();
  5784. auto UserE = ptr->user_end();
  5785. Value *zeroIdx = hlslOP->GetU32Const(0);
  5786. for (; User != UserE;) {
  5787. // Must be Instruction.
  5788. Instruction *I = cast<Instruction>(*(User++));
  5789. TranslateCBAddressUserLegacy(
  5790. I, handle, zeroIdx, /*channelOffset*/ 0, hlslOP,
  5791. /*prevFieldAnnotation*/ nullptr, dxilTypeSys, DL, pObjHelper);
  5792. }
  5793. }
  5794. }
  5795. // Structured buffer.
  5796. namespace {
  5797. // Calculate offset.
  5798. Value *GEPIdxToOffset(GetElementPtrInst *GEP, IRBuilder<> &Builder,
  5799. hlsl::OP *OP, const DataLayout &DL) {
  5800. SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end());
  5801. Value *addr = nullptr;
  5802. // update offset
  5803. if (GEP->hasAllConstantIndices()) {
  5804. unsigned gepOffset =
  5805. DL.getIndexedOffset(GEP->getPointerOperandType(), Indices);
  5806. addr = OP->GetU32Const(gepOffset);
  5807. } else {
  5808. Value *offset = OP->GetU32Const(0);
  5809. gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP);
  5810. for (; GEPIt != E; GEPIt++) {
  5811. Value *idx = GEPIt.getOperand();
  5812. unsigned immIdx = 0;
  5813. if (llvm::Constant *constIdx = dyn_cast<llvm::Constant>(idx)) {
  5814. immIdx = constIdx->getUniqueInteger().getLimitedValue();
  5815. if (immIdx == 0) {
  5816. continue;
  5817. }
  5818. }
  5819. if (GEPIt->isPointerTy() || GEPIt->isArrayTy() || GEPIt->isVectorTy()) {
  5820. unsigned size = DL.getTypeAllocSize(GEPIt->getSequentialElementType());
  5821. if (immIdx) {
  5822. unsigned tempOffset = size * immIdx;
  5823. offset = Builder.CreateAdd(offset, OP->GetU32Const(tempOffset));
  5824. } else {
  5825. Value *tempOffset = Builder.CreateMul(idx, OP->GetU32Const(size));
  5826. offset = Builder.CreateAdd(offset, tempOffset);
  5827. }
  5828. } else if (GEPIt->isStructTy()) {
  5829. const StructLayout *Layout = DL.getStructLayout(cast<StructType>(*GEPIt));
  5830. unsigned structOffset = Layout->getElementOffset(immIdx);
  5831. offset = Builder.CreateAdd(offset, OP->GetU32Const(structOffset));
  5832. } else {
  5833. gep_type_iterator temp = GEPIt;
  5834. temp++;
  5835. DXASSERT(temp == E, "scalar type must be the last");
  5836. }
  5837. };
  5838. addr = offset;
  5839. }
  5840. // TODO: x4 for byte address
  5841. return addr;
  5842. }
  5843. // Load a value from a typedef buffer with an offset.
  5844. // Typed buffer do not directly support reading at offsets
  5845. // because the whole value (e.g. float4) must be read at once.
  5846. // If we are provided a non-zero offset, we need to simulate it
  5847. // by returning the correct elements.
  5848. using ResRetValueArray = std::array<Value*, 4>;
  5849. static ResRetValueArray GenerateTypedBufferLoad(
  5850. Value *Handle, Type *BufferElemTy, Value *ElemIdx, Value *StatusPtr,
  5851. OP* HlslOP, IRBuilder<> &Builder) {
  5852. OP::OpCode OpCode = OP::OpCode::BufferLoad;
  5853. Value* LoadArgs[] = { HlslOP->GetU32Const((unsigned)OpCode), Handle, ElemIdx, UndefValue::get(Builder.getInt32Ty()) };
  5854. Function* LoadFunc = HlslOP->GetOpFunc(OpCode, BufferElemTy);
  5855. Value* Load = Builder.CreateCall(LoadFunc, LoadArgs, OP::GetOpCodeName(OpCode));
  5856. ResRetValueArray ResultValues;
  5857. for (unsigned i = 0; i < ResultValues.size(); ++i) {
  5858. ResultValues[i] = cast<ExtractValueInst>(Builder.CreateExtractValue(Load, { i }));
  5859. }
  5860. UpdateStatus(Load, StatusPtr, Builder, HlslOP);
  5861. return ResultValues;
  5862. }
  5863. static AllocaInst* SpillValuesToArrayAlloca(ArrayRef<Value*> Values, IRBuilder<>& Builder) {
  5864. DXASSERT_NOMSG(!Values.empty());
  5865. IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Builder.GetInsertPoint()));
  5866. AllocaInst* ArrayAlloca = AllocaBuilder.CreateAlloca(ArrayType::get(Values[0]->getType(), Values.size()));
  5867. for (unsigned i = 0; i < Values.size(); ++i) {
  5868. Value* ArrayElemPtr = Builder.CreateGEP(ArrayAlloca, { Builder.getInt32(0), Builder.getInt32(i) });
  5869. Builder.CreateStore(Values[i], ArrayElemPtr);
  5870. }
  5871. return ArrayAlloca;
  5872. }
  5873. static Value* ExtractFromTypedBufferLoad(const ResRetValueArray& ResRet,
  5874. Type* ResultTy, Value* Offset, IRBuilder<>& Builder) {
  5875. unsigned ElemCount = ResultTy->isVectorTy() ? ResultTy->getVectorNumElements() : 1;
  5876. DXASSERT_NOMSG(ElemCount < ResRet.size());
  5877. unsigned ElemSizeInBytes = ResRet[0]->getType()->getScalarSizeInBits() / 8;
  5878. SmallVector<Value*, 4> Elems;
  5879. if (ConstantInt *OffsetAsConstantInt = dyn_cast<ConstantInt>(Offset)) {
  5880. // Get all elements to be returned
  5881. uint64_t FirstElemOffset = OffsetAsConstantInt->getLimitedValue();
  5882. DXASSERT_NOMSG(FirstElemOffset % ElemSizeInBytes == 0);
  5883. uint64_t FirstElemIdx = FirstElemOffset / ElemSizeInBytes;
  5884. DXASSERT_NOMSG(FirstElemIdx <= ResRet.size() - ElemCount);
  5885. for (unsigned ElemIdx = 0; ElemIdx < ElemCount; ++ElemIdx) {
  5886. Elems.emplace_back(ResRet[std::min<size_t>(FirstElemIdx + ElemIdx, ResRet.size() - 1)]);
  5887. }
  5888. }
  5889. else {
  5890. Value* ArrayAlloca = SpillValuesToArrayAlloca(
  5891. ArrayRef<Value*>(ResRet.data(), ResRet.size()), Builder);
  5892. // Get all elements to be returned through dynamic indices
  5893. Value *FirstElemIdx = Builder.CreateUDiv(Offset, Builder.getInt32(ElemSizeInBytes));
  5894. for (unsigned i = 0; i < ElemCount; ++i) {
  5895. Value *ElemIdx = Builder.CreateAdd(FirstElemIdx, Builder.getInt32(i));
  5896. Value* ElemPtr = Builder.CreateGEP(ArrayAlloca, { Builder.getInt32(0), ElemIdx });
  5897. Elems.emplace_back(Builder.CreateLoad(ElemPtr));
  5898. }
  5899. }
  5900. return ScalarizeElements(ResultTy, Elems, Builder);
  5901. }
  5902. Value *GenerateStructBufLd(Value *handle, Value *bufIdx, Value *offset,
  5903. Value *status, Type *EltTy,
  5904. MutableArrayRef<Value *> resultElts, hlsl::OP *OP,
  5905. IRBuilder<> &Builder, unsigned NumComponents, Constant *alignment) {
  5906. OP::OpCode opcode = OP::OpCode::RawBufferLoad;
  5907. DXASSERT(resultElts.size() <= 4,
  5908. "buffer load cannot load more than 4 values");
  5909. if (bufIdx == nullptr) {
  5910. // This is actually a byte address buffer load with a struct template type.
  5911. // The call takes only one coordinates for the offset.
  5912. bufIdx = offset;
  5913. offset = UndefValue::get(offset->getType());
  5914. }
  5915. Function *dxilF = OP->GetOpFunc(opcode, EltTy);
  5916. Constant *mask = GetRawBufferMaskForETy(EltTy, NumComponents, OP);
  5917. Value *Args[] = {OP->GetU32Const((unsigned)opcode),
  5918. handle,
  5919. bufIdx,
  5920. offset,
  5921. mask,
  5922. alignment};
  5923. Value *Ld = Builder.CreateCall(dxilF, Args, OP::GetOpCodeName(opcode));
  5924. for (unsigned i = 0; i < resultElts.size(); i++) {
  5925. resultElts[i] = Builder.CreateExtractValue(Ld, i);
  5926. }
  5927. // status
  5928. UpdateStatus(Ld, status, Builder, OP);
  5929. return Ld;
  5930. }
  5931. void GenerateStructBufSt(Value *handle, Value *bufIdx, Value *offset,
  5932. Type *EltTy, hlsl::OP *OP, IRBuilder<> &Builder,
  5933. ArrayRef<Value *> vals, uint8_t mask, Constant *alignment) {
  5934. OP::OpCode opcode = OP::OpCode::RawBufferStore;
  5935. DXASSERT(vals.size() == 4, "buffer store need 4 values");
  5936. Value *Args[] = {OP->GetU32Const((unsigned)opcode),
  5937. handle,
  5938. bufIdx,
  5939. offset,
  5940. vals[0],
  5941. vals[1],
  5942. vals[2],
  5943. vals[3],
  5944. OP->GetU8Const(mask),
  5945. alignment};
  5946. Function *dxilF = OP->GetOpFunc(opcode, EltTy);
  5947. Builder.CreateCall(dxilF, Args);
  5948. }
  5949. Value *TranslateStructBufMatLd(Type *matType, IRBuilder<> &Builder,
  5950. Value *handle, hlsl::OP *OP, Value *status,
  5951. Value *bufIdx, Value *baseOffset,
  5952. const DataLayout &DL) {
  5953. HLMatrixType MatTy = HLMatrixType::cast(matType);
  5954. Type *EltTy = MatTy.getElementTypeForMem();
  5955. unsigned EltSize = DL.getTypeAllocSize(EltTy);
  5956. Constant* alignment = OP->GetI32Const(EltSize);
  5957. Value *offset = baseOffset;
  5958. if (baseOffset == nullptr)
  5959. offset = OP->GetU32Const(0);
  5960. unsigned matSize = MatTy.getNumElements();
  5961. std::vector<Value *> elts(matSize);
  5962. unsigned rest = (matSize % 4);
  5963. if (rest) {
  5964. Value *ResultElts[4];
  5965. GenerateStructBufLd(handle, bufIdx, offset, status, EltTy, ResultElts, OP, Builder, 3, alignment);
  5966. for (unsigned i = 0; i < rest; i++)
  5967. elts[i] = ResultElts[i];
  5968. offset = Builder.CreateAdd(offset, OP->GetU32Const(EltSize * rest));
  5969. }
  5970. for (unsigned i = rest; i < matSize; i += 4) {
  5971. Value *ResultElts[4];
  5972. GenerateStructBufLd(handle, bufIdx, offset, status, EltTy, ResultElts, OP, Builder, 4, alignment);
  5973. elts[i] = ResultElts[0];
  5974. elts[i + 1] = ResultElts[1];
  5975. elts[i + 2] = ResultElts[2];
  5976. elts[i + 3] = ResultElts[3];
  5977. // Update offset by 4*4bytes.
  5978. offset = Builder.CreateAdd(offset, OP->GetU32Const(4 * EltSize));
  5979. }
  5980. Value *Vec = HLMatrixLower::BuildVector(EltTy, elts, Builder);
  5981. Vec = MatTy.emitLoweredMemToReg(Vec, Builder);
  5982. return Vec;
  5983. }
  5984. void TranslateStructBufMatSt(Type *matType, IRBuilder<> &Builder, Value *handle,
  5985. hlsl::OP *OP, Value *bufIdx, Value *baseOffset,
  5986. Value *val, const DataLayout &DL) {
  5987. HLMatrixType MatTy = HLMatrixType::cast(matType);
  5988. Type *EltTy = MatTy.getElementTypeForMem();
  5989. val = MatTy.emitLoweredRegToMem(val, Builder);
  5990. unsigned EltSize = DL.getTypeAllocSize(EltTy);
  5991. Constant *Alignment = OP->GetI32Const(EltSize);
  5992. Value *offset = baseOffset;
  5993. if (baseOffset == nullptr)
  5994. offset = OP->GetU32Const(0);
  5995. unsigned matSize = MatTy.getNumElements();
  5996. Value *undefElt = UndefValue::get(EltTy);
  5997. unsigned storeSize = matSize;
  5998. if (matSize % 4) {
  5999. storeSize = matSize + 4 - (matSize & 3);
  6000. }
  6001. std::vector<Value *> elts(storeSize, undefElt);
  6002. for (unsigned i = 0; i < matSize; i++)
  6003. elts[i] = Builder.CreateExtractElement(val, i);
  6004. for (unsigned i = 0; i < matSize; i += 4) {
  6005. uint8_t mask = 0;
  6006. for (unsigned j = 0; j < 4 && (i+j) < matSize; j++) {
  6007. if (elts[i+j] != undefElt)
  6008. mask |= (1<<j);
  6009. }
  6010. GenerateStructBufSt(handle, bufIdx, offset, EltTy, OP, Builder,
  6011. {elts[i], elts[i + 1], elts[i + 2], elts[i + 3]}, mask,
  6012. Alignment);
  6013. // Update offset by 4*4bytes.
  6014. offset = Builder.CreateAdd(offset, OP->GetU32Const(4 * EltSize));
  6015. }
  6016. }
  6017. void TranslateStructBufMatLdSt(CallInst *CI, Value *handle, hlsl::OP *OP,
  6018. Value *status, Value *bufIdx,
  6019. Value *baseOffset, const DataLayout &DL) {
  6020. IRBuilder<> Builder(CI);
  6021. HLOpcodeGroup group = hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction());
  6022. unsigned opcode = GetHLOpcode(CI);
  6023. DXASSERT_LOCALVAR(group, group == HLOpcodeGroup::HLMatLoadStore,
  6024. "only translate matrix loadStore here.");
  6025. HLMatLoadStoreOpcode matOp = static_cast<HLMatLoadStoreOpcode>(opcode);
  6026. // Due to the current way the initial codegen generates matrix
  6027. // orientation casts, the in-register vector matrix has already been
  6028. // reordered based on the destination's row or column-major packing orientation.
  6029. switch (matOp) {
  6030. case HLMatLoadStoreOpcode::RowMatLoad:
  6031. case HLMatLoadStoreOpcode::ColMatLoad: {
  6032. Value *ptr = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx);
  6033. Value *NewLd = TranslateStructBufMatLd(
  6034. ptr->getType()->getPointerElementType(), Builder, handle, OP, status,
  6035. bufIdx, baseOffset, DL);
  6036. CI->replaceAllUsesWith(NewLd);
  6037. } break;
  6038. case HLMatLoadStoreOpcode::RowMatStore:
  6039. case HLMatLoadStoreOpcode::ColMatStore: {
  6040. Value *ptr = CI->getArgOperand(HLOperandIndex::kMatStoreDstPtrOpIdx);
  6041. Value *val = CI->getArgOperand(HLOperandIndex::kMatStoreValOpIdx);
  6042. TranslateStructBufMatSt(ptr->getType()->getPointerElementType(), Builder,
  6043. handle, OP, bufIdx, baseOffset, val,
  6044. DL);
  6045. } break;
  6046. }
  6047. CI->eraseFromParent();
  6048. }
  6049. void TranslateStructBufSubscriptUser(Instruction *user,
  6050. Value *handle, HLResource::Kind ResKind,
  6051. Value *bufIdx, Value *baseOffset, Value *status,
  6052. hlsl::OP *OP, const DataLayout &DL);
  6053. // For case like mat[i][j].
  6054. // IdxList is [i][0], [i][1], [i][2],[i][3].
  6055. // Idx is j.
  6056. // return [i][j] not mat[i][j] because resource ptr and temp ptr need different
  6057. // code gen.
  6058. static Value *LowerGEPOnMatIndexListToIndex(
  6059. llvm::GetElementPtrInst *GEP, ArrayRef<Value *> IdxList) {
  6060. IRBuilder<> Builder(GEP);
  6061. Value *zero = Builder.getInt32(0);
  6062. DXASSERT(GEP->getNumIndices() == 2, "must have 2 level");
  6063. Value *baseIdx = (GEP->idx_begin())->get();
  6064. DXASSERT_LOCALVAR(baseIdx, baseIdx == zero, "base index must be 0");
  6065. Value *Idx = (GEP->idx_begin() + 1)->get();
  6066. if (ConstantInt *immIdx = dyn_cast<ConstantInt>(Idx)) {
  6067. return IdxList[immIdx->getSExtValue()];
  6068. }
  6069. else {
  6070. IRBuilder<> AllocaBuilder(
  6071. GEP->getParent()->getParent()->getEntryBlock().getFirstInsertionPt());
  6072. unsigned size = IdxList.size();
  6073. // Store idxList to temp array.
  6074. ArrayType *AT = ArrayType::get(IdxList[0]->getType(), size);
  6075. Value *tempArray = AllocaBuilder.CreateAlloca(AT);
  6076. for (unsigned i = 0; i < size; i++) {
  6077. Value *EltPtr = Builder.CreateGEP(tempArray, { zero, Builder.getInt32(i) });
  6078. Builder.CreateStore(IdxList[i], EltPtr);
  6079. }
  6080. // Load the idx.
  6081. Value *GEPOffset = Builder.CreateGEP(tempArray, { zero, Idx });
  6082. return Builder.CreateLoad(GEPOffset);
  6083. }
  6084. }
  6085. // subscript operator for matrix of struct element.
  6086. void TranslateStructBufMatSubscript(CallInst *CI,
  6087. Value *handle, HLResource::Kind ResKind,
  6088. Value *bufIdx, Value *baseOffset, Value *status,
  6089. hlsl::OP* hlslOP, const DataLayout &DL) {
  6090. unsigned opcode = GetHLOpcode(CI);
  6091. IRBuilder<> subBuilder(CI);
  6092. HLSubscriptOpcode subOp = static_cast<HLSubscriptOpcode>(opcode);
  6093. Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx);
  6094. HLMatrixType MatTy = HLMatrixType::cast(basePtr->getType()->getPointerElementType());
  6095. Type *EltTy = MatTy.getElementTypeForReg();
  6096. Constant *alignment = hlslOP->GetI32Const(DL.getTypeAllocSize(EltTy));
  6097. Value *EltByteSize = ConstantInt::get(
  6098. baseOffset->getType(), GetEltTypeByteSizeForConstBuf(EltTy, DL));
  6099. Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx);
  6100. Type *resultType = CI->getType()->getPointerElementType();
  6101. unsigned resultSize = 1;
  6102. if (resultType->isVectorTy())
  6103. resultSize = resultType->getVectorNumElements();
  6104. DXASSERT(resultSize <= 16, "up to 4x4 elements in vector or matrix");
  6105. _Analysis_assume_(resultSize <= 16);
  6106. std::vector<Value *> idxList(resultSize);
  6107. switch (subOp) {
  6108. case HLSubscriptOpcode::ColMatSubscript:
  6109. case HLSubscriptOpcode::RowMatSubscript: {
  6110. for (unsigned i = 0; i < resultSize; i++) {
  6111. Value *offset =
  6112. CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx + i);
  6113. offset = subBuilder.CreateMul(offset, EltByteSize);
  6114. idxList[i] = subBuilder.CreateAdd(baseOffset, offset);
  6115. }
  6116. } break;
  6117. case HLSubscriptOpcode::RowMatElement:
  6118. case HLSubscriptOpcode::ColMatElement: {
  6119. Constant *EltIdxs = cast<Constant>(idx);
  6120. for (unsigned i = 0; i < resultSize; i++) {
  6121. Value *offset =
  6122. subBuilder.CreateMul(EltIdxs->getAggregateElement(i), EltByteSize);
  6123. idxList[i] = subBuilder.CreateAdd(baseOffset, offset);
  6124. }
  6125. } break;
  6126. default:
  6127. DXASSERT(0, "invalid operation on const buffer");
  6128. break;
  6129. }
  6130. Value *undefElt = UndefValue::get(EltTy);
  6131. for (auto U = CI->user_begin(); U != CI->user_end();) {
  6132. Value *subsUser = *(U++);
  6133. if (resultSize == 1) {
  6134. TranslateStructBufSubscriptUser(cast<Instruction>(subsUser),
  6135. handle, ResKind, bufIdx, idxList[0], status, hlslOP, DL);
  6136. continue;
  6137. }
  6138. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(subsUser)) {
  6139. Value *GEPOffset = LowerGEPOnMatIndexListToIndex(GEP, idxList);
  6140. for (auto gepU = GEP->user_begin(); gepU != GEP->user_end();) {
  6141. Instruction *gepUserInst = cast<Instruction>(*(gepU++));
  6142. TranslateStructBufSubscriptUser(gepUserInst,
  6143. handle, ResKind, bufIdx, GEPOffset, status, hlslOP, DL);
  6144. }
  6145. GEP->eraseFromParent();
  6146. } else if (StoreInst *stUser = dyn_cast<StoreInst>(subsUser)) {
  6147. IRBuilder<> stBuilder(stUser);
  6148. Value *Val = stUser->getValueOperand();
  6149. if (Val->getType()->isVectorTy()) {
  6150. for (unsigned i = 0; i < resultSize; i++) {
  6151. Value *EltVal = stBuilder.CreateExtractElement(Val, i);
  6152. uint8_t mask = DXIL::kCompMask_X;
  6153. GenerateStructBufSt(handle, bufIdx, idxList[i], EltTy, hlslOP,
  6154. stBuilder, {EltVal, undefElt, undefElt, undefElt},
  6155. mask, alignment);
  6156. }
  6157. } else {
  6158. uint8_t mask = DXIL::kCompMask_X;
  6159. GenerateStructBufSt(handle, bufIdx, idxList[0], EltTy, hlslOP,
  6160. stBuilder, {Val, undefElt, undefElt, undefElt},
  6161. mask, alignment);
  6162. }
  6163. stUser->eraseFromParent();
  6164. } else {
  6165. // Must be load here.
  6166. LoadInst *ldUser = cast<LoadInst>(subsUser);
  6167. IRBuilder<> ldBuilder(ldUser);
  6168. Value *ldData = UndefValue::get(resultType);
  6169. if (resultType->isVectorTy()) {
  6170. for (unsigned i = 0; i < resultSize; i++) {
  6171. Value *ResultElt;
  6172. // TODO: This can be inefficient for row major matrix load
  6173. GenerateStructBufLd(handle, bufIdx, idxList[i],
  6174. /*status*/ nullptr, EltTy, ResultElt, hlslOP,
  6175. ldBuilder, 1, alignment);
  6176. ldData = ldBuilder.CreateInsertElement(ldData, ResultElt, i);
  6177. }
  6178. } else {
  6179. GenerateStructBufLd(handle, bufIdx, idxList[0], /*status*/ nullptr,
  6180. EltTy, ldData, hlslOP, ldBuilder, 4, alignment);
  6181. }
  6182. ldUser->replaceAllUsesWith(ldData);
  6183. ldUser->eraseFromParent();
  6184. }
  6185. }
  6186. CI->eraseFromParent();
  6187. }
  6188. void TranslateStructBufSubscriptUser(
  6189. Instruction *user, Value *handle, HLResource::Kind ResKind,
  6190. Value *bufIdx, Value *baseOffset, Value *status,
  6191. hlsl::OP *OP, const DataLayout &DL) {
  6192. IRBuilder<> Builder(user);
  6193. if (CallInst *userCall = dyn_cast<CallInst>(user)) {
  6194. HLOpcodeGroup group = // user call?
  6195. hlsl::GetHLOpcodeGroupByName(userCall->getCalledFunction());
  6196. unsigned opcode = GetHLOpcode(userCall);
  6197. // For case element type of structure buffer is not structure type.
  6198. if (baseOffset == nullptr)
  6199. baseOffset = OP->GetU32Const(0);
  6200. if (group == HLOpcodeGroup::HLIntrinsic) {
  6201. IntrinsicOp IOP = static_cast<IntrinsicOp>(opcode);
  6202. switch (IOP) {
  6203. case IntrinsicOp::MOP_Load: {
  6204. if (userCall->getType()->isPointerTy()) {
  6205. // Struct will return pointers which like []
  6206. } else {
  6207. // Use builtin types on structuredBuffer.
  6208. }
  6209. DXASSERT(0, "not implement yet");
  6210. } break;
  6211. case IntrinsicOp::IOP_InterlockedAdd: {
  6212. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  6213. baseOffset);
  6214. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Add,
  6215. Builder, OP);
  6216. } break;
  6217. case IntrinsicOp::IOP_InterlockedAnd: {
  6218. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  6219. baseOffset);
  6220. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::And,
  6221. Builder, OP);
  6222. } break;
  6223. case IntrinsicOp::IOP_InterlockedExchange: {
  6224. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  6225. baseOffset);
  6226. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Exchange,
  6227. Builder, OP);
  6228. } break;
  6229. case IntrinsicOp::IOP_InterlockedMax: {
  6230. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  6231. baseOffset);
  6232. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMax,
  6233. Builder, OP);
  6234. } break;
  6235. case IntrinsicOp::IOP_InterlockedMin: {
  6236. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  6237. baseOffset);
  6238. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMin,
  6239. Builder, OP);
  6240. } break;
  6241. case IntrinsicOp::IOP_InterlockedUMax: {
  6242. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  6243. baseOffset);
  6244. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMax,
  6245. Builder, OP);
  6246. } break;
  6247. case IntrinsicOp::IOP_InterlockedUMin: {
  6248. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  6249. baseOffset);
  6250. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMin,
  6251. Builder, OP);
  6252. } break;
  6253. case IntrinsicOp::IOP_InterlockedOr: {
  6254. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  6255. baseOffset);
  6256. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Or,
  6257. Builder, OP);
  6258. } break;
  6259. case IntrinsicOp::IOP_InterlockedXor: {
  6260. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  6261. baseOffset);
  6262. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Xor,
  6263. Builder, OP);
  6264. } break;
  6265. case IntrinsicOp::IOP_InterlockedCompareStore:
  6266. case IntrinsicOp::IOP_InterlockedCompareExchange: {
  6267. AtomicHelper helper(userCall, DXIL::OpCode::AtomicCompareExchange,
  6268. handle, bufIdx, baseOffset);
  6269. TranslateAtomicCmpXChg(helper, Builder, OP);
  6270. } break;
  6271. default:
  6272. DXASSERT(0, "invalid opcode");
  6273. break;
  6274. }
  6275. userCall->eraseFromParent();
  6276. } else if (group == HLOpcodeGroup::HLMatLoadStore)
  6277. TranslateStructBufMatLdSt(userCall, handle, OP, status, bufIdx,
  6278. baseOffset, DL);
  6279. else if (group == HLOpcodeGroup::HLSubscript) {
  6280. TranslateStructBufMatSubscript(userCall,
  6281. handle, ResKind, bufIdx, baseOffset, status, OP, DL);
  6282. }
  6283. } else if (isa<LoadInst>(user) || isa<StoreInst>(user)) {
  6284. LoadInst *ldInst = dyn_cast<LoadInst>(user);
  6285. StoreInst *stInst = dyn_cast<StoreInst>(user);
  6286. Type *Ty = isa<LoadInst>(user) ? ldInst->getType()
  6287. : stInst->getValueOperand()->getType();
  6288. Type *pOverloadTy = Ty->getScalarType();
  6289. Value *offset = baseOffset;
  6290. unsigned arraySize = 1;
  6291. Value *eltSize = nullptr;
  6292. if (pOverloadTy->isArrayTy()) {
  6293. arraySize = pOverloadTy->getArrayNumElements();
  6294. eltSize = OP->GetU32Const(
  6295. DL.getTypeAllocSize(pOverloadTy->getArrayElementType()));
  6296. pOverloadTy = pOverloadTy->getArrayElementType()->getScalarType();
  6297. }
  6298. if (ldInst) {
  6299. auto LdElement = [=](Value *offset, IRBuilder<> &Builder) -> Value * {
  6300. unsigned numComponents = 0;
  6301. if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
  6302. numComponents = VTy->getNumElements();
  6303. }
  6304. else {
  6305. numComponents = 1;
  6306. }
  6307. Constant *alignment =
  6308. OP->GetI32Const(DL.getTypeAllocSize(Ty->getScalarType()));
  6309. if (ResKind == HLResource::Kind::TypedBuffer) {
  6310. // Typed buffer cannot have offsets, they must be loaded all at once
  6311. ResRetValueArray ResRet = GenerateTypedBufferLoad(
  6312. handle, pOverloadTy, bufIdx, status, OP, Builder);
  6313. return ExtractFromTypedBufferLoad(ResRet, Ty, offset, Builder);
  6314. }
  6315. else {
  6316. Value* ResultElts[4];
  6317. GenerateStructBufLd(handle, bufIdx, offset, status, pOverloadTy,
  6318. ResultElts, OP, Builder, numComponents, alignment);
  6319. return ScalarizeElements(Ty, ResultElts, Builder);
  6320. }
  6321. };
  6322. Value *newLd = LdElement(offset, Builder);
  6323. if (arraySize > 1) {
  6324. newLd =
  6325. Builder.CreateInsertValue(UndefValue::get(Ty), newLd, (uint64_t)0);
  6326. for (unsigned i = 1; i < arraySize; i++) {
  6327. offset = Builder.CreateAdd(offset, eltSize);
  6328. Value *eltLd = LdElement(offset, Builder);
  6329. newLd = Builder.CreateInsertValue(newLd, eltLd, i);
  6330. }
  6331. }
  6332. ldInst->replaceAllUsesWith(newLd);
  6333. } else {
  6334. Value *val = stInst->getValueOperand();
  6335. auto StElement = [&](Value *offset, Value *val, IRBuilder<> &Builder) {
  6336. Value *undefVal = llvm::UndefValue::get(pOverloadTy);
  6337. Value *vals[] = {undefVal, undefVal, undefVal, undefVal};
  6338. uint8_t mask = 0;
  6339. if (Ty->isVectorTy()) {
  6340. unsigned vectorNumElements = Ty->getVectorNumElements();
  6341. DXASSERT(vectorNumElements <= 4, "up to 4 elements in vector");
  6342. _Analysis_assume_(vectorNumElements <= 4);
  6343. for (unsigned i = 0; i < vectorNumElements; i++) {
  6344. vals[i] = Builder.CreateExtractElement(val, i);
  6345. mask |= (1<<i);
  6346. }
  6347. } else {
  6348. vals[0] = val;
  6349. mask = DXIL::kCompMask_X;
  6350. }
  6351. Constant *alignment =
  6352. OP->GetI32Const(DL.getTypeAllocSize(Ty->getScalarType()));
  6353. GenerateStructBufSt(handle, bufIdx, offset, pOverloadTy, OP, Builder,
  6354. vals, mask, alignment);
  6355. };
  6356. if (arraySize > 1)
  6357. val = Builder.CreateExtractValue(val, 0);
  6358. StElement(offset, val, Builder);
  6359. if (arraySize > 1) {
  6360. val = stInst->getValueOperand();
  6361. for (unsigned i = 1; i < arraySize; i++) {
  6362. offset = Builder.CreateAdd(offset, eltSize);
  6363. Value *eltVal = Builder.CreateExtractValue(val, i);
  6364. StElement(offset, eltVal, Builder);
  6365. }
  6366. }
  6367. }
  6368. user->eraseFromParent();
  6369. } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(user)) {
  6370. // Recurse users
  6371. for (auto U = BCI->user_begin(); U != BCI->user_end();) {
  6372. Value *BCIUser = *(U++);
  6373. TranslateStructBufSubscriptUser(cast<Instruction>(BCIUser),
  6374. handle, ResKind, bufIdx, baseOffset, status, OP, DL);
  6375. }
  6376. BCI->eraseFromParent();
  6377. } else {
  6378. // should only used by GEP
  6379. GetElementPtrInst *GEP = cast<GetElementPtrInst>(user);
  6380. Type *Ty = GEP->getType()->getPointerElementType();
  6381. Value *offset = GEPIdxToOffset(GEP, Builder, OP, DL);
  6382. DXASSERT_LOCALVAR(Ty, offset->getType() == Type::getInt32Ty(Ty->getContext()),
  6383. "else bitness is wrong");
  6384. offset = Builder.CreateAdd(offset, baseOffset);
  6385. for (auto U = GEP->user_begin(); U != GEP->user_end();) {
  6386. Value *GEPUser = *(U++);
  6387. TranslateStructBufSubscriptUser(cast<Instruction>(GEPUser),
  6388. handle, ResKind, bufIdx, offset, status, OP, DL);
  6389. }
  6390. // delete the inst
  6391. GEP->eraseFromParent();
  6392. }
  6393. }
  6394. void TranslateStructBufSubscript(CallInst *CI, Value *handle, Value *status,
  6395. hlsl::OP *OP, HLResource::Kind ResKind, const DataLayout &DL) {
  6396. Value *subscriptIndex = CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx);
  6397. Value* bufIdx = nullptr;
  6398. Value *offset = nullptr;
  6399. if (ResKind == HLResource::Kind::RawBuffer) {
  6400. offset = subscriptIndex;
  6401. }
  6402. else {
  6403. // StructuredBuffer, TypedBuffer, etc.
  6404. bufIdx = subscriptIndex;
  6405. offset = OP->GetU32Const(0);
  6406. }
  6407. for (auto U = CI->user_begin(); U != CI->user_end();) {
  6408. Value *user = *(U++);
  6409. TranslateStructBufSubscriptUser(cast<Instruction>(user),
  6410. handle, ResKind, bufIdx, offset, status, OP, DL);
  6411. }
  6412. }
  6413. }
  6414. // HLSubscript.
  6415. namespace {
  6416. Value *TranslateTypedBufLoad(CallInst *CI, DXIL::ResourceKind RK,
  6417. DXIL::ResourceClass RC, Value *handle,
  6418. LoadInst *ldInst, IRBuilder<> &Builder,
  6419. hlsl::OP *hlslOP, const DataLayout &DL) {
  6420. ResLoadHelper ldHelper(CI, RK, RC, handle, IntrinsicOp::MOP_Load, /*bForSubscript*/ true);
  6421. // Default sampleIdx for 2DMS textures.
  6422. if (RK == DxilResource::Kind::Texture2DMS ||
  6423. RK == DxilResource::Kind::Texture2DMSArray)
  6424. ldHelper.mipLevel = hlslOP->GetU32Const(0);
  6425. // use ldInst as retVal
  6426. ldHelper.retVal = ldInst;
  6427. TranslateLoad(ldHelper, RK, Builder, hlslOP, DL);
  6428. // delete the ld
  6429. ldInst->eraseFromParent();
  6430. return ldHelper.retVal;
  6431. }
  6432. Value *UpdateVectorElt(Value *VecVal, Value *EltVal, Value *EltIdx,
  6433. unsigned vectorSize, Instruction *InsertPt) {
  6434. IRBuilder<> Builder(InsertPt);
  6435. if (ConstantInt *CEltIdx = dyn_cast<ConstantInt>(EltIdx)) {
  6436. VecVal =
  6437. Builder.CreateInsertElement(VecVal, EltVal, CEltIdx->getLimitedValue());
  6438. } else {
  6439. BasicBlock *BB = InsertPt->getParent();
  6440. BasicBlock *EndBB = BB->splitBasicBlock(InsertPt);
  6441. TerminatorInst *TI = BB->getTerminator();
  6442. IRBuilder<> SwitchBuilder(TI);
  6443. LLVMContext &Ctx = InsertPt->getContext();
  6444. SwitchInst *Switch = SwitchBuilder.CreateSwitch(EltIdx, EndBB, vectorSize);
  6445. TI->eraseFromParent();
  6446. Function *F = EndBB->getParent();
  6447. IRBuilder<> endSwitchBuilder(EndBB->begin());
  6448. Type *Ty = VecVal->getType();
  6449. PHINode *VecPhi = endSwitchBuilder.CreatePHI(Ty, vectorSize + 1);
  6450. for (unsigned i = 0; i < vectorSize; i++) {
  6451. BasicBlock *CaseBB = BasicBlock::Create(Ctx, "case", F, EndBB);
  6452. Switch->addCase(SwitchBuilder.getInt32(i), CaseBB);
  6453. IRBuilder<> CaseBuilder(CaseBB);
  6454. Value *CaseVal = CaseBuilder.CreateInsertElement(VecVal, EltVal, i);
  6455. VecPhi->addIncoming(CaseVal, CaseBB);
  6456. CaseBuilder.CreateBr(EndBB);
  6457. }
  6458. VecPhi->addIncoming(VecVal, BB);
  6459. VecVal = VecPhi;
  6460. }
  6461. return VecVal;
  6462. }
  6463. void TranslateDefaultSubscript(CallInst *CI, HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  6464. Value *ptr = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx);
  6465. hlsl::OP *hlslOP = &helper.hlslOP;
  6466. // Resource ptr.
  6467. Value *handle = ptr;
  6468. DXIL::ResourceClass RC = pObjHelper->GetRC(handle);
  6469. DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
  6470. Type *Ty = CI->getType()->getPointerElementType();
  6471. for (auto It = CI->user_begin(); It != CI->user_end(); ) {
  6472. User *user = *(It++);
  6473. Instruction *I = cast<Instruction>(user);
  6474. IRBuilder<> Builder(I);
  6475. if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) {
  6476. TranslateTypedBufLoad(CI, RK, RC, handle, ldInst, Builder, hlslOP, helper.dataLayout);
  6477. } else if (StoreInst *stInst = dyn_cast<StoreInst>(user)) {
  6478. Value *val = stInst->getValueOperand();
  6479. TranslateStore(RK, handle, val,
  6480. CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx),
  6481. Builder, hlslOP);
  6482. // delete the st
  6483. stInst->eraseFromParent();
  6484. } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(user)) {
  6485. // Must be vector type here.
  6486. unsigned vectorSize = Ty->getVectorNumElements();
  6487. DXASSERT_NOMSG(GEP->getNumIndices() == 2);
  6488. Use *GEPIdx = GEP->idx_begin();
  6489. GEPIdx++;
  6490. Value *EltIdx = *GEPIdx;
  6491. for (auto GEPIt = GEP->user_begin(); GEPIt != GEP->user_end();) {
  6492. User *GEPUser = *(GEPIt++);
  6493. if (StoreInst *SI = dyn_cast<StoreInst>(GEPUser)) {
  6494. IRBuilder<> StBuilder(SI);
  6495. // Generate Ld.
  6496. LoadInst *tmpLd = StBuilder.CreateLoad(CI);
  6497. Value *ldVal = TranslateTypedBufLoad(CI, RK, RC, handle, tmpLd, StBuilder,
  6498. hlslOP, helper.dataLayout);
  6499. // Update vector.
  6500. ldVal = UpdateVectorElt(ldVal, SI->getValueOperand(), EltIdx,
  6501. vectorSize, SI);
  6502. // Generate St.
  6503. // Reset insert point, UpdateVectorElt may move SI to different block.
  6504. StBuilder.SetInsertPoint(SI);
  6505. TranslateStore(RK, handle, ldVal,
  6506. CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx),
  6507. StBuilder, hlslOP);
  6508. SI->eraseFromParent();
  6509. continue;
  6510. }
  6511. if (LoadInst *LI = dyn_cast<LoadInst>(GEPUser)) {
  6512. IRBuilder<> LdBuilder(LI);
  6513. // Generate tmp vector load with vector type & translate it
  6514. LoadInst *tmpLd = LdBuilder.CreateLoad(CI);
  6515. Value *ldVal = TranslateTypedBufLoad(CI, RK, RC, handle, tmpLd, LdBuilder,
  6516. hlslOP, helper.dataLayout);
  6517. // get the single element
  6518. ldVal = GenerateVecEltFromGEP(ldVal, GEP, LdBuilder,
  6519. /*bInsertLdNextToGEP*/ false);
  6520. LI->replaceAllUsesWith(ldVal);
  6521. LI->eraseFromParent();
  6522. continue;
  6523. }
  6524. if (!isa<CallInst>(GEPUser)) {
  6525. // Invalid operations.
  6526. Translated = false;
  6527. dxilutil::EmitErrorOnInstruction(GEP, "Invalid operation on typed buffer.");
  6528. return;
  6529. }
  6530. CallInst *userCall = cast<CallInst>(GEPUser);
  6531. HLOpcodeGroup group =
  6532. hlsl::GetHLOpcodeGroupByName(userCall->getCalledFunction());
  6533. if (group != HLOpcodeGroup::HLIntrinsic) {
  6534. // Invalid operations.
  6535. Translated = false;
  6536. dxilutil::EmitErrorOnInstruction(userCall, "Invalid operation on typed buffer.");
  6537. return;
  6538. }
  6539. unsigned opcode = hlsl::GetHLOpcode(userCall);
  6540. IntrinsicOp IOP = static_cast<IntrinsicOp>(opcode);
  6541. switch (IOP) {
  6542. case IntrinsicOp::IOP_InterlockedAdd:
  6543. case IntrinsicOp::IOP_InterlockedAnd:
  6544. case IntrinsicOp::IOP_InterlockedExchange:
  6545. case IntrinsicOp::IOP_InterlockedMax:
  6546. case IntrinsicOp::IOP_InterlockedMin:
  6547. case IntrinsicOp::IOP_InterlockedUMax:
  6548. case IntrinsicOp::IOP_InterlockedUMin:
  6549. case IntrinsicOp::IOP_InterlockedOr:
  6550. case IntrinsicOp::IOP_InterlockedXor:
  6551. case IntrinsicOp::IOP_InterlockedCompareStore:
  6552. case IntrinsicOp::IOP_InterlockedCompareExchange: {
  6553. // Invalid operations.
  6554. Translated = false;
  6555. dxilutil::EmitErrorOnInstruction(
  6556. userCall, "Atomic operation on typed buffer is not supported.");
  6557. return;
  6558. } break;
  6559. default:
  6560. // Invalid operations.
  6561. Translated = false;
  6562. dxilutil::EmitErrorOnInstruction(userCall, "Invalid operation on typed buffer.");
  6563. return;
  6564. break;
  6565. }
  6566. }
  6567. GEP->eraseFromParent();
  6568. } else {
  6569. CallInst *userCall = cast<CallInst>(user);
  6570. HLOpcodeGroup group =
  6571. hlsl::GetHLOpcodeGroupByName(userCall->getCalledFunction());
  6572. unsigned opcode = hlsl::GetHLOpcode(userCall);
  6573. if (group == HLOpcodeGroup::HLIntrinsic) {
  6574. IntrinsicOp IOP = static_cast<IntrinsicOp>(opcode);
  6575. if (RC == DXIL::ResourceClass::SRV) {
  6576. // Invalid operations.
  6577. Translated = false;
  6578. switch (IOP) {
  6579. case IntrinsicOp::IOP_InterlockedAdd:
  6580. case IntrinsicOp::IOP_InterlockedAnd:
  6581. case IntrinsicOp::IOP_InterlockedExchange:
  6582. case IntrinsicOp::IOP_InterlockedMax:
  6583. case IntrinsicOp::IOP_InterlockedMin:
  6584. case IntrinsicOp::IOP_InterlockedUMax:
  6585. case IntrinsicOp::IOP_InterlockedUMin:
  6586. case IntrinsicOp::IOP_InterlockedOr:
  6587. case IntrinsicOp::IOP_InterlockedXor:
  6588. case IntrinsicOp::IOP_InterlockedCompareStore:
  6589. case IntrinsicOp::IOP_InterlockedCompareExchange: {
  6590. dxilutil::EmitErrorOnInstruction(
  6591. userCall, "Atomic operation targets must be groupshared on UAV.");
  6592. return;
  6593. } break;
  6594. default:
  6595. dxilutil::EmitErrorOnInstruction(userCall, "Invalid operation on typed buffer.");
  6596. return;
  6597. break;
  6598. }
  6599. }
  6600. switch (IOP) {
  6601. case IntrinsicOp::IOP_InterlockedAdd: {
  6602. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedAdd);
  6603. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  6604. helper.addr, /*offset*/ nullptr);
  6605. TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::Add,
  6606. Builder, hlslOP);
  6607. } break;
  6608. case IntrinsicOp::IOP_InterlockedAnd: {
  6609. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedAnd);
  6610. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  6611. helper.addr, /*offset*/ nullptr);
  6612. TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::And,
  6613. Builder, hlslOP);
  6614. } break;
  6615. case IntrinsicOp::IOP_InterlockedExchange: {
  6616. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedExchange);
  6617. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  6618. helper.addr, /*offset*/ nullptr);
  6619. TranslateAtomicBinaryOperation(
  6620. atomHelper, DXIL::AtomicBinOpCode::Exchange, Builder, hlslOP);
  6621. } break;
  6622. case IntrinsicOp::IOP_InterlockedMax: {
  6623. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedMax);
  6624. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  6625. helper.addr, /*offset*/ nullptr);
  6626. TranslateAtomicBinaryOperation(
  6627. atomHelper, DXIL::AtomicBinOpCode::IMax, Builder, hlslOP);
  6628. } break;
  6629. case IntrinsicOp::IOP_InterlockedMin: {
  6630. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedMin);
  6631. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  6632. helper.addr, /*offset*/ nullptr);
  6633. TranslateAtomicBinaryOperation(
  6634. atomHelper, DXIL::AtomicBinOpCode::IMin, Builder, hlslOP);
  6635. } break;
  6636. case IntrinsicOp::IOP_InterlockedUMax: {
  6637. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedUMax);
  6638. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  6639. helper.addr, /*offset*/ nullptr);
  6640. TranslateAtomicBinaryOperation(
  6641. atomHelper, DXIL::AtomicBinOpCode::UMax, Builder, hlslOP);
  6642. } break;
  6643. case IntrinsicOp::IOP_InterlockedUMin: {
  6644. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedUMin);
  6645. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  6646. helper.addr, /*offset*/ nullptr);
  6647. TranslateAtomicBinaryOperation(
  6648. atomHelper, DXIL::AtomicBinOpCode::UMin, Builder, hlslOP);
  6649. } break;
  6650. case IntrinsicOp::IOP_InterlockedOr: {
  6651. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedOr);
  6652. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  6653. helper.addr, /*offset*/ nullptr);
  6654. TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::Or,
  6655. Builder, hlslOP);
  6656. } break;
  6657. case IntrinsicOp::IOP_InterlockedXor: {
  6658. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedXor);
  6659. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  6660. helper.addr, /*offset*/ nullptr);
  6661. TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::Xor,
  6662. Builder, hlslOP);
  6663. } break;
  6664. case IntrinsicOp::IOP_InterlockedCompareStore:
  6665. case IntrinsicOp::IOP_InterlockedCompareExchange: {
  6666. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedCompareExchange);
  6667. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicCompareExchange,
  6668. handle, helper.addr, /*offset*/ nullptr);
  6669. TranslateAtomicCmpXChg(atomHelper, Builder, hlslOP);
  6670. } break;
  6671. default:
  6672. DXASSERT(0, "invalid opcode");
  6673. break;
  6674. }
  6675. } else {
  6676. DXASSERT(0, "invalid group");
  6677. }
  6678. userCall->eraseFromParent();
  6679. }
  6680. }
  6681. }
  6682. void TranslateHLSubscript(CallInst *CI, HLSubscriptOpcode opcode,
  6683. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  6684. if (CI->user_empty()) {
  6685. Translated = true;
  6686. return;
  6687. }
  6688. hlsl::OP *hlslOP = &helper.hlslOP;
  6689. Value *ptr = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx);
  6690. if (opcode == HLSubscriptOpcode::CBufferSubscript) {
  6691. HLModule::MergeGepUse(CI);
  6692. // Resource ptr.
  6693. Value *handle = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx);
  6694. if (helper.bLegacyCBufferLoad)
  6695. TranslateCBOperationsLegacy(handle, CI, hlslOP, helper.dxilTypeSys,
  6696. helper.dataLayout, pObjHelper);
  6697. else {
  6698. TranslateCBOperations(handle, CI, /*offset*/ hlslOP->GetU32Const(0),
  6699. hlslOP, helper.dxilTypeSys,
  6700. CI->getModule()->getDataLayout(), pObjHelper);
  6701. }
  6702. Translated = true;
  6703. return;
  6704. } else if (opcode == HLSubscriptOpcode::DoubleSubscript) {
  6705. // Resource ptr.
  6706. Value *handle = ptr;
  6707. DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
  6708. Value *coord = CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx);
  6709. Value *mipLevel =
  6710. CI->getArgOperand(HLOperandIndex::kDoubleSubscriptMipLevelOpIdx);
  6711. auto U = CI->user_begin();
  6712. DXASSERT(CI->hasOneUse(), "subscript should only has one use");
  6713. // TODO: support store.
  6714. Instruction *ldInst = cast<Instruction>(*U);
  6715. ResLoadHelper ldHelper(ldInst, handle, coord, mipLevel);
  6716. IRBuilder<> Builder(CI);
  6717. TranslateLoad(ldHelper, RK, Builder, hlslOP, helper.dataLayout);
  6718. ldInst->eraseFromParent();
  6719. Translated = true;
  6720. return;
  6721. } else {
  6722. Type *HandleTy = hlslOP->GetHandleType();
  6723. if (ptr->getType() == HandleTy) {
  6724. // Resource ptr.
  6725. Value *handle = ptr;
  6726. DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
  6727. if (RK == DxilResource::Kind::Invalid) {
  6728. Translated = false;
  6729. return;
  6730. }
  6731. Translated = true;
  6732. Type *ObjTy = pObjHelper->GetResourceType(handle);
  6733. Type *RetTy = ObjTy->getStructElementType(0);
  6734. if (DXIL::IsStructuredBuffer(RK)) {
  6735. TranslateStructBufSubscript(CI, handle, /*status*/ nullptr, hlslOP, RK,
  6736. helper.dataLayout);
  6737. } else if (RetTy->isAggregateType() &&
  6738. RK == DxilResource::Kind::TypedBuffer) {
  6739. TranslateStructBufSubscript(CI, handle, /*status*/ nullptr, hlslOP, RK,
  6740. helper.dataLayout);
  6741. // Clear offset for typed buf.
  6742. for (auto User = handle->user_begin(); User != handle->user_end(); ) {
  6743. CallInst *CI = cast<CallInst>(*(User++));
  6744. // Skip not lowered HL functions.
  6745. if (hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction()) != HLOpcodeGroup::NotHL)
  6746. continue;
  6747. switch (hlslOP->GetDxilOpFuncCallInst(CI)) {
  6748. case DXIL::OpCode::BufferLoad: {
  6749. CI->setArgOperand(DXIL::OperandIndex::kBufferLoadCoord1OpIdx,
  6750. UndefValue::get(helper.i32Ty));
  6751. } break;
  6752. case DXIL::OpCode::BufferStore: {
  6753. CI->setArgOperand(DXIL::OperandIndex::kBufferStoreCoord1OpIdx,
  6754. UndefValue::get(helper.i32Ty));
  6755. } break;
  6756. case DXIL::OpCode::AtomicBinOp: {
  6757. CI->setArgOperand(DXIL::OperandIndex::kAtomicBinOpCoord1OpIdx,
  6758. UndefValue::get(helper.i32Ty));
  6759. } break;
  6760. case DXIL::OpCode::AtomicCompareExchange: {
  6761. CI->setArgOperand(DXIL::OperandIndex::kAtomicCmpExchangeCoord1OpIdx,
  6762. UndefValue::get(helper.i32Ty));
  6763. } break;
  6764. case DXIL::OpCode::RawBufferLoad: {
  6765. // Structured buffer inside a typed buffer must be converted to typed buffer load.
  6766. // Typed buffer load is equivalent to raw buffer load, except there is no mask.
  6767. StructType *STy = cast<StructType>(CI->getFunctionType()->getReturnType());
  6768. Type *ETy = STy->getElementType(0);
  6769. SmallVector<Value *, 4> Args;
  6770. Args.emplace_back(hlslOP->GetI32Const((unsigned)DXIL::OpCode::BufferLoad));
  6771. Args.emplace_back(CI->getArgOperand(1)); // handle
  6772. Args.emplace_back(CI->getArgOperand(2)); // index
  6773. Args.emplace_back(UndefValue::get(helper.i32Ty)); // offset
  6774. IRBuilder<> builder(CI);
  6775. Function *newFunction = hlslOP->GetOpFunc(DXIL::OpCode::BufferLoad, ETy);
  6776. CallInst *newCall = builder.CreateCall(newFunction, Args);
  6777. CI->replaceAllUsesWith(newCall);
  6778. CI->eraseFromParent();
  6779. } break;
  6780. default:
  6781. DXASSERT(0, "Invalid operation on resource handle");
  6782. break;
  6783. }
  6784. }
  6785. } else {
  6786. TranslateDefaultSubscript(CI, helper, pObjHelper, Translated);
  6787. }
  6788. return;
  6789. }
  6790. }
  6791. Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx);
  6792. if (IsLocalVariablePtr(basePtr) || IsSharedMemPtr(basePtr)) {
  6793. // Translate matrix into vector of array for share memory or local
  6794. // variable should be done in HLMatrixLowerPass
  6795. DXASSERT_NOMSG(0);
  6796. Translated = true;
  6797. return;
  6798. }
  6799. // Other case should be take care in TranslateStructBufSubscript or
  6800. // TranslateCBOperations.
  6801. Translated = false;
  6802. return;
  6803. }
  6804. }
  6805. void TranslateSubscriptOperation(Function *F, HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper) {
  6806. for (auto U = F->user_begin(); U != F->user_end();) {
  6807. Value *user = *(U++);
  6808. if (!isa<Instruction>(user))
  6809. continue;
  6810. // must be call inst
  6811. CallInst *CI = cast<CallInst>(user);
  6812. unsigned opcode = GetHLOpcode(CI);
  6813. bool Translated = true;
  6814. TranslateHLSubscript(
  6815. CI, static_cast<HLSubscriptOpcode>(opcode), helper, pObjHelper, Translated);
  6816. if (Translated) {
  6817. // delete the call
  6818. DXASSERT(CI->use_empty(),
  6819. "else TranslateHLSubscript didn't replace/erase uses");
  6820. CI->eraseFromParent();
  6821. }
  6822. }
  6823. }
  6824. // Create BitCast if ptr, otherwise, create alloca of new type, write to bitcast of alloca, and return load from alloca
  6825. // If bOrigAllocaTy is true: create alloca of old type instead, write to alloca, and return load from bitcast of alloca
  6826. static Instruction *BitCastValueOrPtr(Value* V, Instruction *Insert, Type *Ty, bool bOrigAllocaTy = false, const Twine &Name = "") {
  6827. IRBuilder<> Builder(Insert);
  6828. if (Ty->isPointerTy()) {
  6829. // If pointer, we can bitcast directly
  6830. return cast<Instruction>(Builder.CreateBitCast(V, Ty, Name));
  6831. } else {
  6832. // If value, we have to alloca, store to bitcast ptr, and load
  6833. IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Insert));
  6834. Type *allocaTy = bOrigAllocaTy ? V->getType() : Ty;
  6835. Type *otherTy = bOrigAllocaTy ? Ty : V->getType();
  6836. Instruction *allocaInst = AllocaBuilder.CreateAlloca(allocaTy);
  6837. Instruction *bitCast = cast<Instruction>(Builder.CreateBitCast(allocaInst, otherTy->getPointerTo()));
  6838. Builder.CreateStore(V, bOrigAllocaTy ? allocaInst : bitCast);
  6839. return Builder.CreateLoad(bOrigAllocaTy ? bitCast : allocaInst, Name);
  6840. }
  6841. }
  6842. static Instruction *CreateTransposeShuffle(IRBuilder<> &Builder, Value *vecVal, unsigned toRows, unsigned toCols) {
  6843. SmallVector<int, 16> castMask(toCols * toRows);
  6844. unsigned idx = 0;
  6845. for (unsigned r = 0; r < toRows; r++)
  6846. for (unsigned c = 0; c < toCols; c++)
  6847. castMask[idx++] = c * toRows + r;
  6848. return cast<Instruction>(
  6849. Builder.CreateShuffleVector(vecVal, vecVal, castMask));
  6850. }
  6851. void TranslateHLBuiltinOperation(Function *F, HLOperationLowerHelper &helper,
  6852. hlsl::HLOpcodeGroup group, HLObjectOperationLowerHelper *pObjHelper) {
  6853. if (group == HLOpcodeGroup::HLIntrinsic) {
  6854. // map to dxil operations
  6855. for (auto U = F->user_begin(); U != F->user_end();) {
  6856. Value *User = *(U++);
  6857. if (!isa<Instruction>(User))
  6858. continue;
  6859. // must be call inst
  6860. CallInst *CI = cast<CallInst>(User);
  6861. // Keep the instruction to lower by other function.
  6862. bool Translated = true;
  6863. TranslateBuiltinIntrinsic(CI, helper, pObjHelper, Translated);
  6864. if (Translated) {
  6865. // delete the call
  6866. DXASSERT(CI->use_empty(),
  6867. "else TranslateBuiltinIntrinsic didn't replace/erase uses");
  6868. CI->eraseFromParent();
  6869. }
  6870. }
  6871. } else {
  6872. if (group == HLOpcodeGroup::HLMatLoadStore) {
  6873. // Both ld/st use arg1 for the pointer.
  6874. Type *PtrTy =
  6875. F->getFunctionType()->getParamType(HLOperandIndex::kMatLoadPtrOpIdx);
  6876. if (PtrTy->getPointerAddressSpace() == DXIL::kTGSMAddrSpace) {
  6877. // Translate matrix into vector of array for shared memory
  6878. // variable should be done in HLMatrixLowerPass.
  6879. if (!F->user_empty())
  6880. F->getContext().emitError("Fail to lower matrix load/store.");
  6881. } else if (PtrTy->getPointerAddressSpace() == DXIL::kDefaultAddrSpace) {
  6882. // Default address space may be function argument in lib target
  6883. if (!F->user_empty()) {
  6884. for (auto U = F->user_begin(); U != F->user_end();) {
  6885. Value *User = *(U++);
  6886. if (!isa<Instruction>(User))
  6887. continue;
  6888. // must be call inst
  6889. CallInst *CI = cast<CallInst>(User);
  6890. IRBuilder<> Builder(CI);
  6891. HLMatLoadStoreOpcode opcode = static_cast<HLMatLoadStoreOpcode>(hlsl::GetHLOpcode(CI));
  6892. switch (opcode) {
  6893. case HLMatLoadStoreOpcode::ColMatStore:
  6894. case HLMatLoadStoreOpcode::RowMatStore: {
  6895. Value *vecVal = CI->getArgOperand(HLOperandIndex::kMatStoreValOpIdx);
  6896. Value *matPtr = CI->getArgOperand(HLOperandIndex::kMatStoreDstPtrOpIdx);
  6897. Value *castPtr = Builder.CreateBitCast(matPtr, vecVal->getType()->getPointerTo());
  6898. Builder.CreateStore(vecVal, castPtr);
  6899. CI->eraseFromParent();
  6900. } break;
  6901. case HLMatLoadStoreOpcode::ColMatLoad:
  6902. case HLMatLoadStoreOpcode::RowMatLoad: {
  6903. Value *matPtr = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx);
  6904. Value *castPtr = Builder.CreateBitCast(matPtr, CI->getType()->getPointerTo());
  6905. Value *vecVal = Builder.CreateLoad(castPtr);
  6906. CI->replaceAllUsesWith(vecVal);
  6907. CI->eraseFromParent();
  6908. } break;
  6909. }
  6910. }
  6911. }
  6912. }
  6913. } else if (group == HLOpcodeGroup::HLCast) {
  6914. // HLCast may be used on matrix value function argument in lib target
  6915. if (!F->user_empty()) {
  6916. for (auto U = F->user_begin(); U != F->user_end();) {
  6917. Value *User = *(U++);
  6918. if (!isa<Instruction>(User))
  6919. continue;
  6920. // must be call inst
  6921. CallInst *CI = cast<CallInst>(User);
  6922. IRBuilder<> Builder(CI);
  6923. HLCastOpcode opcode = static_cast<HLCastOpcode>(hlsl::GetHLOpcode(CI));
  6924. bool bTranspose = false;
  6925. bool bColDest = false;
  6926. switch (opcode) {
  6927. case HLCastOpcode::RowMatrixToColMatrix:
  6928. bColDest = true;
  6929. case HLCastOpcode::ColMatrixToRowMatrix:
  6930. bTranspose = true;
  6931. case HLCastOpcode::ColMatrixToVecCast:
  6932. case HLCastOpcode::RowMatrixToVecCast: {
  6933. Value *matVal = CI->getArgOperand(HLOperandIndex::kInitFirstArgOpIdx);
  6934. Value *vecVal = BitCastValueOrPtr(matVal, CI, CI->getType(),
  6935. /*bOrigAllocaTy*/false,
  6936. matVal->getName());
  6937. if (bTranspose) {
  6938. HLMatrixType MatTy = HLMatrixType::cast(matVal->getType());
  6939. unsigned row = MatTy.getNumRows();
  6940. unsigned col = MatTy.getNumColumns();
  6941. if (bColDest) std::swap(row, col);
  6942. vecVal = CreateTransposeShuffle(Builder, vecVal, row, col);
  6943. }
  6944. CI->replaceAllUsesWith(vecVal);
  6945. CI->eraseFromParent();
  6946. } break;
  6947. }
  6948. }
  6949. }
  6950. } else if (group == HLOpcodeGroup::HLSubscript) {
  6951. TranslateSubscriptOperation(F, helper, pObjHelper);
  6952. }
  6953. // map to math function or llvm ir
  6954. }
  6955. }
  6956. typedef std::unordered_map<llvm::Instruction *, llvm::Value *> HandleMap;
  6957. static void TranslateHLExtension(Function *F,
  6958. HLSLExtensionsCodegenHelper *helper,
  6959. OP& hlslOp,
  6960. HLObjectOperationLowerHelper &objHelper) {
  6961. // Find all calls to the function F.
  6962. // Store the calls in a vector for now to be replaced the loop below.
  6963. // We use a two step "find then replace" to avoid removing uses while
  6964. // iterating.
  6965. SmallVector<CallInst *, 8> CallsToReplace;
  6966. for (User *U : F->users()) {
  6967. if (CallInst *CI = dyn_cast<CallInst>(U)) {
  6968. CallsToReplace.push_back(CI);
  6969. }
  6970. }
  6971. // Get the lowering strategy to use for this intrinsic.
  6972. llvm::StringRef LowerStrategy = GetHLLowerStrategy(F);
  6973. HLObjectExtensionLowerHelper extObjHelper(objHelper);
  6974. ExtensionLowering lower(LowerStrategy, helper, hlslOp, extObjHelper);
  6975. // Replace all calls that were successfully translated.
  6976. for (CallInst *CI : CallsToReplace) {
  6977. Value *Result = lower.Translate(CI);
  6978. if (Result && Result != CI) {
  6979. CI->replaceAllUsesWith(Result);
  6980. CI->eraseFromParent();
  6981. }
  6982. }
  6983. }
  6984. namespace hlsl {
  6985. void TranslateBuiltinOperations(
  6986. HLModule &HLM, HLSLExtensionsCodegenHelper *extCodegenHelper,
  6987. std::unordered_set<LoadInst *> &UpdateCounterSet) {
  6988. HLOperationLowerHelper helper(HLM);
  6989. HLObjectOperationLowerHelper objHelper = {HLM, UpdateCounterSet};
  6990. Module *M = HLM.GetModule();
  6991. SmallVector<Function *, 4> NonUniformResourceIndexIntrinsics;
  6992. // generate dxil operation
  6993. for (iplist<Function>::iterator F : M->getFunctionList()) {
  6994. if (F->user_empty())
  6995. continue;
  6996. if (!F->isDeclaration()) {
  6997. continue;
  6998. }
  6999. hlsl::HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(F);
  7000. if (group == HLOpcodeGroup::NotHL) {
  7001. // Nothing to do.
  7002. continue;
  7003. }
  7004. if (group == HLOpcodeGroup::HLExtIntrinsic) {
  7005. TranslateHLExtension(F, extCodegenHelper, helper.hlslOP, objHelper);
  7006. continue;
  7007. }
  7008. if (group == HLOpcodeGroup::HLIntrinsic) {
  7009. CallInst *CI = cast<CallInst>(*F->user_begin()); // must be call inst
  7010. unsigned opcode = hlsl::GetHLOpcode(CI);
  7011. if (opcode == (unsigned)IntrinsicOp::IOP_NonUniformResourceIndex) {
  7012. NonUniformResourceIndexIntrinsics.push_back(F);
  7013. continue;
  7014. }
  7015. }
  7016. TranslateHLBuiltinOperation(F, helper, group, &objHelper);
  7017. }
  7018. // Translate last so value placed in NonUniformSet is still valid.
  7019. if (!NonUniformResourceIndexIntrinsics.empty()) {
  7020. for (auto F : NonUniformResourceIndexIntrinsics) {
  7021. TranslateHLBuiltinOperation(F, helper, HLOpcodeGroup::HLIntrinsic, &objHelper);
  7022. }
  7023. }
  7024. }
  7025. }