HLOperationLower.cpp 338 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023802480258026802780288029803080318032803380348035803680378038803980408041804280438044804580468047804880498050805180528053805480558056805780588059806080618062806380648065806680678068806980708071807280738074807580768077807880798080808180828083808480858086808780888089809080918092809380948095
  1. ///////////////////////////////////////////////////////////////////////////////
  2. // //
  3. // HLOperationLower.cpp //
  4. // Copyright (C) Microsoft Corporation. All rights reserved. //
  5. // This file is distributed under the University of Illinois Open Source //
  6. // License. See LICENSE.TXT for details. //
  7. // //
  8. // Lower functions to lower HL operations to DXIL operations. //
  9. // //
  10. ///////////////////////////////////////////////////////////////////////////////
  11. #define _USE_MATH_DEFINES
  12. #include <array>
  13. #include <cmath>
  14. #include <unordered_set>
  15. #include <functional>
  16. #include "dxc/DXIL/DxilModule.h"
  17. #include "dxc/DXIL/DxilOperations.h"
  18. #include "dxc/HLSL/HLMatrixLowerHelper.h"
  19. #include "dxc/HLSL/HLMatrixType.h"
  20. #include "dxc/HLSL/HLModule.h"
  21. #include "dxc/DXIL/DxilUtil.h"
  22. #include "dxc/HLSL/HLOperationLower.h"
  23. #include "dxc/HLSL/HLOperationLowerExtension.h"
  24. #include "dxc/HLSL/HLOperations.h"
  25. #include "dxc/HlslIntrinsicOp.h"
  26. #include "dxc/DXIL/DxilResourceProperties.h"
  27. #include "llvm/IR/GetElementPtrTypeIterator.h"
  28. #include "llvm/IR/IRBuilder.h"
  29. #include "llvm/IR/Instructions.h"
  30. #include "llvm/IR/IntrinsicInst.h"
  31. #include "llvm/IR/Module.h"
  32. #include "llvm/ADT/APSInt.h"
  33. using namespace llvm;
  34. using namespace hlsl;
  35. struct HLOperationLowerHelper {
  36. OP &hlslOP;
  37. Type *voidTy;
  38. Type *f32Ty;
  39. Type *i32Ty;
  40. Type *i16Ty;
  41. llvm::Type *i1Ty;
  42. Type *i8Ty;
  43. DxilTypeSystem &dxilTypeSys;
  44. DxilFunctionProps *functionProps;
  45. bool bLegacyCBufferLoad;
  46. DataLayout dataLayout;
  47. HLOperationLowerHelper(HLModule &HLM);
  48. };
  49. HLOperationLowerHelper::HLOperationLowerHelper(HLModule &HLM)
  50. : hlslOP(*HLM.GetOP()), dxilTypeSys(HLM.GetTypeSystem()),
  51. dataLayout(DataLayout(HLM.GetHLOptions().bUseMinPrecision
  52. ? hlsl::DXIL::kLegacyLayoutString
  53. : hlsl::DXIL::kNewLayoutString)) {
  54. llvm::LLVMContext &Ctx = HLM.GetCtx();
  55. voidTy = Type::getVoidTy(Ctx);
  56. f32Ty = Type::getFloatTy(Ctx);
  57. i32Ty = Type::getInt32Ty(Ctx);
  58. i16Ty = Type::getInt16Ty(Ctx);
  59. i1Ty = Type::getInt1Ty(Ctx);
  60. i8Ty = Type::getInt8Ty(Ctx);
  61. Function *EntryFunc = HLM.GetEntryFunction();
  62. functionProps = nullptr;
  63. if (HLM.HasDxilFunctionProps(EntryFunc))
  64. functionProps = &HLM.GetDxilFunctionProps(EntryFunc);
  65. bLegacyCBufferLoad = HLM.GetHLOptions().bLegacyCBufferLoad;
  66. }
  67. struct HLObjectOperationLowerHelper {
  68. private:
  69. // For object intrinsics.
  70. HLModule &HLM;
  71. struct ResAttribute {
  72. DXIL::ResourceClass RC;
  73. DXIL::ResourceKind RK;
  74. Type *ResourceType;
  75. };
  76. std::unordered_map<Value *, ResAttribute> HandleMetaMap;
  77. std::unordered_set<LoadInst *> &UpdateCounterSet;
  78. // Map from pointer of cbuffer to pointer of resource.
  79. // For cbuffer like this:
  80. // cbuffer A {
  81. // Texture2D T;
  82. // };
  83. // A global resource Texture2D T2 will be created for Texture2D T.
  84. // CBPtrToResourceMap[T] will return T2.
  85. std::unordered_map<Value *, Value *> CBPtrToResourceMap;
  86. public:
  87. HLObjectOperationLowerHelper(HLModule &HLM,
  88. std::unordered_set<LoadInst *> &UpdateCounter)
  89. : HLM(HLM), UpdateCounterSet(UpdateCounter) {}
  90. DXIL::ResourceClass GetRC(Value *Handle) {
  91. ResAttribute &Res = FindCreateHandleResourceBase(Handle);
  92. return Res.RC;
  93. }
  94. DXIL::ResourceKind GetRK(Value *Handle) {
  95. ResAttribute &Res = FindCreateHandleResourceBase(Handle);
  96. return Res.RK;
  97. }
  98. Type *GetResourceType(Value *Handle) {
  99. ResAttribute &Res = FindCreateHandleResourceBase(Handle);
  100. return Res.ResourceType;
  101. }
  102. void MarkHasCounter(Value *handle, Type *i8Ty) {
  103. CallInst *CIHandle = cast<CallInst>(handle);
  104. DXASSERT(hlsl::GetHLOpcodeGroup(CIHandle->getCalledFunction()) == HLOpcodeGroup::HLAnnotateHandle, "else invalid handle");
  105. // Mark has counter for the input handle.
  106. Value *counterHandle =
  107. CIHandle->getArgOperand(HLOperandIndex::kAnnotateHandleHandleOpIdx);
  108. // Change kind into StructurBufferWithCounter.
  109. Constant *Props = cast<Constant>(CIHandle->getArgOperand(
  110. HLOperandIndex::kAnnotateHandleResourcePropertiesOpIdx));
  111. DxilResourceProperties RP = resource_helper::loadPropsFromConstant(*Props);
  112. RP.Basic.SamplerCmpOrHasCounter = true;
  113. CIHandle->setArgOperand(
  114. HLOperandIndex::kAnnotateHandleResourcePropertiesOpIdx,
  115. resource_helper::getAsConstant(
  116. RP,
  117. HLM.GetOP()->GetResourcePropertiesType(),
  118. *HLM.GetShaderModel()));
  119. DXIL::ResourceClass RC = GetRC(handle);
  120. DXASSERT_LOCALVAR(RC, RC == DXIL::ResourceClass::UAV,
  121. "must UAV for counter");
  122. std::unordered_set<Value *> resSet;
  123. MarkHasCounterOnCreateHandle(counterHandle, resSet);
  124. }
  125. Value *GetOrCreateResourceForCbPtr(GetElementPtrInst *CbPtr,
  126. GlobalVariable *CbGV,
  127. DxilResourceProperties &RP) {
  128. // Change array idx to 0 to make sure all array ptr share same key.
  129. Value *Key = UniformCbPtr(CbPtr, CbGV);
  130. if (CBPtrToResourceMap.count(Key))
  131. return CBPtrToResourceMap[Key];
  132. Value *Resource = CreateResourceForCbPtr(CbPtr, CbGV, RP);
  133. CBPtrToResourceMap[Key] = Resource;
  134. return Resource;
  135. }
  136. Value *LowerCbResourcePtr(GetElementPtrInst *CbPtr, Value *ResPtr) {
  137. // Simple case.
  138. if (ResPtr->getType() == CbPtr->getType())
  139. return ResPtr;
  140. // Array case.
  141. DXASSERT_NOMSG(ResPtr->getType()->getPointerElementType()->isArrayTy());
  142. IRBuilder<> Builder(CbPtr);
  143. gep_type_iterator GEPIt = gep_type_begin(CbPtr), E = gep_type_end(CbPtr);
  144. Value *arrayIdx = GEPIt.getOperand();
  145. // Only calc array idx and size.
  146. // Ignore struct type part.
  147. for (; GEPIt != E; ++GEPIt) {
  148. if (GEPIt->isArrayTy()) {
  149. arrayIdx = Builder.CreateMul(
  150. arrayIdx, Builder.getInt32(GEPIt->getArrayNumElements()));
  151. arrayIdx = Builder.CreateAdd(arrayIdx, GEPIt.getOperand());
  152. }
  153. }
  154. return Builder.CreateGEP(ResPtr, {Builder.getInt32(0), arrayIdx});
  155. }
  156. DxilResourceProperties GetResPropsFromAnnotateHandle(CallInst *Anno) {
  157. Constant *Props = cast<Constant>(Anno->getArgOperand(
  158. HLOperandIndex::kAnnotateHandleResourcePropertiesOpIdx));
  159. DxilResourceProperties RP = resource_helper::loadPropsFromConstant(
  160. *Props);
  161. return RP;
  162. }
  163. private:
  164. ResAttribute &FindCreateHandleResourceBase(Value *Handle) {
  165. if (HandleMetaMap.count(Handle))
  166. return HandleMetaMap[Handle];
  167. // Add invalid first to avoid dead loop.
  168. HandleMetaMap[Handle] = {DXIL::ResourceClass::Invalid,
  169. DXIL::ResourceKind::Invalid,
  170. StructType::get(Type::getVoidTy(HLM.GetCtx()), nullptr)};
  171. if (CallInst *CI = dyn_cast<CallInst>(Handle)) {
  172. hlsl::HLOpcodeGroup group =
  173. hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction());
  174. if (group == HLOpcodeGroup::HLAnnotateHandle) {
  175. Constant *Props = cast<Constant>(CI->getArgOperand(
  176. HLOperandIndex::kAnnotateHandleResourcePropertiesOpIdx));
  177. DxilResourceProperties RP =
  178. resource_helper::loadPropsFromConstant(*Props);
  179. Type *ResTy =
  180. CI->getArgOperand(HLOperandIndex::kAnnotateHandleResourceTypeOpIdx)
  181. ->getType();
  182. ResAttribute Attrib = {RP.getResourceClass(), RP.getResourceKind(),
  183. ResTy};
  184. HandleMetaMap[Handle] = Attrib;
  185. return HandleMetaMap[Handle];
  186. }
  187. }
  188. Handle->getContext().emitError("cannot map resource to handle");
  189. return HandleMetaMap[Handle];
  190. }
  191. CallInst *FindCreateHandle(Value *handle,
  192. std::unordered_set<Value *> &resSet) {
  193. // Already checked.
  194. if (resSet.count(handle))
  195. return nullptr;
  196. resSet.insert(handle);
  197. if (CallInst *CI = dyn_cast<CallInst>(handle))
  198. return CI;
  199. if (SelectInst *Sel = dyn_cast<SelectInst>(handle)) {
  200. if (CallInst *CI = FindCreateHandle(Sel->getTrueValue(), resSet))
  201. return CI;
  202. if (CallInst *CI = FindCreateHandle(Sel->getFalseValue(), resSet))
  203. return CI;
  204. return nullptr;
  205. }
  206. if (PHINode *Phi = dyn_cast<PHINode>(handle)) {
  207. for (unsigned i = 0; i < Phi->getNumOperands(); i++) {
  208. if (CallInst *CI = FindCreateHandle(Phi->getOperand(i), resSet))
  209. return CI;
  210. }
  211. return nullptr;
  212. }
  213. return nullptr;
  214. }
  215. void MarkHasCounterOnCreateHandle(Value *handle,
  216. std::unordered_set<Value *> &resSet) {
  217. // Already checked.
  218. if (resSet.count(handle))
  219. return;
  220. resSet.insert(handle);
  221. if (CallInst *CI = dyn_cast<CallInst>(handle)) {
  222. Value *Res =
  223. CI->getArgOperand(HLOperandIndex::kCreateHandleResourceOpIdx);
  224. LoadInst *LdRes = dyn_cast<LoadInst>(Res);
  225. if (!LdRes) {
  226. dxilutil::EmitErrorOnInstruction(CI, "cannot map resource to handle.");
  227. return;
  228. }
  229. UpdateCounterSet.insert(LdRes);
  230. return;
  231. }
  232. if (SelectInst *Sel = dyn_cast<SelectInst>(handle)) {
  233. MarkHasCounterOnCreateHandle(Sel->getTrueValue(), resSet);
  234. MarkHasCounterOnCreateHandle(Sel->getFalseValue(), resSet);
  235. }
  236. if (PHINode *Phi = dyn_cast<PHINode>(handle)) {
  237. for (unsigned i = 0; i < Phi->getNumOperands(); i++) {
  238. MarkHasCounterOnCreateHandle(Phi->getOperand(i), resSet);
  239. }
  240. }
  241. }
  242. Value *UniformCbPtr(GetElementPtrInst *CbPtr, GlobalVariable *CbGV) {
  243. gep_type_iterator GEPIt = gep_type_begin(CbPtr), E = gep_type_end(CbPtr);
  244. std::vector<Value *> idxList(CbPtr->idx_begin(), CbPtr->idx_end());
  245. unsigned i = 0;
  246. IRBuilder<> Builder(HLM.GetCtx());
  247. Value *zero = Builder.getInt32(0);
  248. for (; GEPIt != E; ++GEPIt, ++i) {
  249. ConstantInt *ImmIdx = dyn_cast<ConstantInt>(GEPIt.getOperand());
  250. if (!ImmIdx) {
  251. // Remove dynamic indexing to avoid crash.
  252. idxList[i] = zero;
  253. }
  254. }
  255. Value *Key = Builder.CreateInBoundsGEP(CbGV, idxList);
  256. return Key;
  257. }
  258. Value *CreateResourceForCbPtr(GetElementPtrInst *CbPtr, GlobalVariable *CbGV,
  259. DxilResourceProperties &RP) {
  260. Type *CbTy = CbPtr->getPointerOperandType();
  261. DXASSERT_LOCALVAR(CbTy, CbTy == CbGV->getType(), "else arg not point to var");
  262. gep_type_iterator GEPIt = gep_type_begin(CbPtr), E = gep_type_end(CbPtr);
  263. unsigned i = 0;
  264. IRBuilder<> Builder(HLM.GetCtx());
  265. unsigned arraySize = 1;
  266. DxilTypeSystem &typeSys = HLM.GetTypeSystem();
  267. std::string Name;
  268. for (; GEPIt != E; ++GEPIt, ++i) {
  269. if (GEPIt->isArrayTy()) {
  270. arraySize *= GEPIt->getArrayNumElements();
  271. if (!Name.empty())
  272. Name += ".";
  273. if (ConstantInt *ImmIdx = dyn_cast<ConstantInt>(GEPIt.getOperand())) {
  274. unsigned idx = ImmIdx->getLimitedValue();
  275. Name += std::to_string(idx);
  276. }
  277. } else if (GEPIt->isStructTy()) {
  278. DxilStructAnnotation *typeAnnot =
  279. typeSys.GetStructAnnotation(cast<StructType>(*GEPIt));
  280. DXASSERT_NOMSG(typeAnnot);
  281. unsigned idx = cast<ConstantInt>(GEPIt.getOperand())->getLimitedValue();
  282. DXASSERT_NOMSG(typeAnnot->GetNumFields() > idx);
  283. DxilFieldAnnotation &fieldAnnot = typeAnnot->GetFieldAnnotation(idx);
  284. if (!Name.empty())
  285. Name += ".";
  286. Name += fieldAnnot.GetFieldName();
  287. }
  288. }
  289. Type *Ty = CbPtr->getResultElementType();
  290. // Not support resource array in cbuffer.
  291. unsigned ResBinding = HLM.GetBindingForResourceInCB(CbPtr, CbGV, RP.getResourceClass());
  292. return CreateResourceGV(Ty, Name, RP, ResBinding);
  293. }
  294. Value *CreateResourceGV(Type *Ty, StringRef Name, DxilResourceProperties &RP,
  295. unsigned ResBinding) {
  296. Module &M = *HLM.GetModule();
  297. Constant *GV = M.getOrInsertGlobal(Name, Ty);
  298. // Create resource and set GV as globalSym.
  299. DxilResourceBase *Res = HLM.AddResourceWithGlobalVariableAndProps(GV, RP);
  300. DXASSERT(Res, "fail to create resource for global variable in cbuffer");
  301. Res->SetLowerBound(ResBinding);
  302. return GV;
  303. }
  304. };
  305. // Helper for lowering resource extension methods.
  306. struct HLObjectExtensionLowerHelper : public hlsl::HLResourceLookup {
  307. explicit HLObjectExtensionLowerHelper(HLObjectOperationLowerHelper &ObjHelper)
  308. : m_ObjHelper(ObjHelper)
  309. { }
  310. virtual bool GetResourceKindName(Value *HLHandle, const char **ppName)
  311. {
  312. DXIL::ResourceKind K = m_ObjHelper.GetRK(HLHandle);
  313. bool Success = K != DXIL::ResourceKind::Invalid;
  314. if (Success)
  315. {
  316. *ppName = hlsl::GetResourceKindName(K);
  317. }
  318. return Success;
  319. }
  320. private:
  321. HLObjectOperationLowerHelper &m_ObjHelper;
  322. };
  323. using IntrinsicLowerFuncTy = Value *(CallInst *CI, IntrinsicOp IOP,
  324. DXIL::OpCode opcode,
  325. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated);
  326. struct IntrinsicLower {
  327. // Intrinsic opcode.
  328. IntrinsicOp IntriOpcode;
  329. // Lower function.
  330. IntrinsicLowerFuncTy &LowerFunc;
  331. // DXIL opcode if can direct map.
  332. DXIL::OpCode DxilOpcode;
  333. };
  334. // IOP intrinsics.
  335. namespace {
  336. Value *TrivialDxilOperation(Function *dxilFunc, OP::OpCode opcode, ArrayRef<Value *> refArgs,
  337. Type *Ty, Type *RetTy, OP *hlslOP,
  338. IRBuilder<> &Builder) {
  339. unsigned argNum = refArgs.size();
  340. std::vector<Value *> args = refArgs;
  341. if (Ty->isVectorTy()) {
  342. Value *retVal = llvm::UndefValue::get(RetTy);
  343. unsigned vecSize = Ty->getVectorNumElements();
  344. for (unsigned i = 0; i < vecSize; i++) {
  345. // Update vector args, skip known opcode arg.
  346. for (unsigned argIdx = HLOperandIndex::kUnaryOpSrc0Idx; argIdx < argNum;
  347. argIdx++) {
  348. if (refArgs[argIdx]->getType()->isVectorTy()) {
  349. Value *arg = refArgs[argIdx];
  350. args[argIdx] = Builder.CreateExtractElement(arg, i);
  351. }
  352. }
  353. Value *EltOP =
  354. Builder.CreateCall(dxilFunc, args, hlslOP->GetOpCodeName(opcode));
  355. retVal = Builder.CreateInsertElement(retVal, EltOP, i);
  356. }
  357. return retVal;
  358. } else {
  359. if (!RetTy->isVoidTy()) {
  360. Value *retVal =
  361. Builder.CreateCall(dxilFunc, args, hlslOP->GetOpCodeName(opcode));
  362. return retVal;
  363. } else {
  364. // Cannot add name to void.
  365. return Builder.CreateCall(dxilFunc, args);
  366. }
  367. }
  368. }
  369. // Generates a DXIL operation over an overloaded type (Ty), returning a
  370. // RetTy value; when Ty is a vector, it will replicate per-element operations
  371. // into RetTy to rebuild it.
  372. Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef<Value *> refArgs,
  373. Type *Ty, Type *RetTy, OP *hlslOP,
  374. IRBuilder<> &Builder) {
  375. Type *EltTy = Ty->getScalarType();
  376. Function *dxilFunc = hlslOP->GetOpFunc(opcode, EltTy);
  377. return TrivialDxilOperation(dxilFunc, opcode, refArgs, Ty, RetTy, hlslOP, Builder);
  378. }
  379. Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef<Value *> refArgs,
  380. Type *Ty, Instruction *Inst, OP *hlslOP) {
  381. DXASSERT(refArgs.size() > 0, "else opcode isn't in signature");
  382. DXASSERT(refArgs[0] == nullptr,
  383. "else caller has already filled the value in");
  384. IRBuilder<> B(Inst);
  385. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  386. const_cast<llvm::Value **>(refArgs.data())[0] =
  387. opArg; // actually stack memory from caller
  388. return TrivialDxilOperation(opcode, refArgs, Ty, Inst->getType(), hlslOP, B);
  389. }
  390. Value *TrivialDxilUnaryOperationRet(OP::OpCode opcode, Value *src, Type *RetTy,
  391. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  392. Type *Ty = src->getType();
  393. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  394. Value *args[] = {opArg, src};
  395. return TrivialDxilOperation(opcode, args, Ty, RetTy, hlslOP, Builder);
  396. }
  397. Value *TrivialDxilUnaryOperation(OP::OpCode opcode, Value *src,
  398. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  399. return TrivialDxilUnaryOperationRet(opcode, src, src->getType(), hlslOP,
  400. Builder);
  401. }
  402. Value *TrivialDxilBinaryOperation(OP::OpCode opcode, Value *src0, Value *src1,
  403. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  404. Type *Ty = src0->getType();
  405. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  406. Value *args[] = {opArg, src0, src1};
  407. return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
  408. }
  409. Value *TrivialDxilTrinaryOperation(OP::OpCode opcode, Value *src0, Value *src1,
  410. Value *src2, hlsl::OP *hlslOP,
  411. IRBuilder<> &Builder) {
  412. Type *Ty = src0->getType();
  413. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  414. Value *args[] = {opArg, src0, src1, src2};
  415. return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
  416. }
  417. Value *TrivialUnaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  418. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  419. Value *src0 = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  420. IRBuilder<> Builder(CI);
  421. hlsl::OP *hlslOP = &helper.hlslOP;
  422. Value *retVal = TrivialDxilUnaryOperationRet(opcode, src0, CI->getType(), hlslOP, Builder);
  423. return retVal;
  424. }
  425. Value *TrivialBinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  426. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  427. hlsl::OP *hlslOP = &helper.hlslOP;
  428. Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  429. Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  430. IRBuilder<> Builder(CI);
  431. Value *binOp =
  432. TrivialDxilBinaryOperation(opcode, src0, src1, hlslOP, Builder);
  433. return binOp;
  434. }
  435. Value *TrivialTrinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  436. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  437. hlsl::OP *hlslOP = &helper.hlslOP;
  438. Value *src0 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  439. Value *src1 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  440. Value *src2 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  441. IRBuilder<> Builder(CI);
  442. Value *triOp =
  443. TrivialDxilTrinaryOperation(opcode, src0, src1, src2, hlslOP, Builder);
  444. return triOp;
  445. }
  446. Value *TrivialIsSpecialFloat(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  447. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  448. hlsl::OP *hlslOP = &helper.hlslOP;
  449. Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  450. IRBuilder<> Builder(CI);
  451. Type *Ty = src->getType();
  452. Type *RetTy = Type::getInt1Ty(CI->getContext());
  453. if (Ty->isVectorTy())
  454. RetTy = VectorType::get(RetTy, Ty->getVectorNumElements());
  455. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  456. Value *args[] = {opArg, src};
  457. return TrivialDxilOperation(opcode, args, Ty, RetTy, hlslOP, Builder);
  458. }
  459. bool IsResourceGEP(GetElementPtrInst *I) {
  460. Type *Ty = I->getType()->getPointerElementType();
  461. Ty = dxilutil::GetArrayEltTy(Ty);
  462. // Only mark on GEP which point to resource.
  463. return dxilutil::IsHLSLResourceType(Ty);
  464. }
  465. Value *TranslateNonUniformResourceIndex(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  466. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  467. Value *V = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  468. Type *hdlTy = helper.hlslOP.GetHandleType();
  469. for (User *U : CI->users()) {
  470. if (GetElementPtrInst *I = dyn_cast<GetElementPtrInst>(U)) {
  471. // Only mark on GEP which point to resource.
  472. if (IsResourceGEP(I))
  473. DxilMDHelper::MarkNonUniform(I);
  474. } else if (CastInst *castI = dyn_cast<CastInst>(U)) {
  475. for (User *castU : castI->users()) {
  476. if (GetElementPtrInst *I = dyn_cast<GetElementPtrInst>(castU)) {
  477. // Only mark on GEP which point to resource.
  478. if (IsResourceGEP(I))
  479. DxilMDHelper::MarkNonUniform(I);
  480. }
  481. }
  482. } else if (CallInst *CI = dyn_cast<CallInst>(U)) {
  483. if (CI->getType() == hdlTy)
  484. DxilMDHelper::MarkNonUniform(CI);
  485. }
  486. }
  487. CI->replaceAllUsesWith(V);
  488. return nullptr;
  489. }
  490. Value *TrivialBarrier(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  491. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  492. hlsl::OP *OP = &helper.hlslOP;
  493. Function *dxilFunc = OP->GetOpFunc(OP::OpCode::Barrier, CI->getType());
  494. Constant *opArg = OP->GetU32Const((unsigned)OP::OpCode::Barrier);
  495. unsigned uglobal = static_cast<unsigned>(DXIL::BarrierMode::UAVFenceGlobal);
  496. unsigned g = static_cast<unsigned>(DXIL::BarrierMode::TGSMFence);
  497. unsigned t = static_cast<unsigned>(DXIL::BarrierMode::SyncThreadGroup);
  498. // unsigned ut = static_cast<unsigned>(DXIL::BarrierMode::UAVFenceThreadGroup);
  499. unsigned barrierMode = 0;
  500. switch (IOP) {
  501. case IntrinsicOp::IOP_AllMemoryBarrier:
  502. barrierMode = uglobal | g;
  503. break;
  504. case IntrinsicOp::IOP_AllMemoryBarrierWithGroupSync:
  505. barrierMode = uglobal | g | t;
  506. break;
  507. case IntrinsicOp::IOP_GroupMemoryBarrier:
  508. barrierMode = g;
  509. break;
  510. case IntrinsicOp::IOP_GroupMemoryBarrierWithGroupSync:
  511. barrierMode = g | t;
  512. break;
  513. case IntrinsicOp::IOP_DeviceMemoryBarrier:
  514. barrierMode = uglobal;
  515. break;
  516. case IntrinsicOp::IOP_DeviceMemoryBarrierWithGroupSync:
  517. barrierMode = uglobal | t;
  518. break;
  519. default:
  520. DXASSERT(0, "invalid opcode for barrier");
  521. break;
  522. }
  523. Value *src0 = OP->GetU32Const(static_cast<unsigned>(barrierMode));
  524. Value *args[] = {opArg, src0};
  525. IRBuilder<> Builder(CI);
  526. Builder.CreateCall(dxilFunc, args);
  527. return nullptr;
  528. }
  529. Value *TranslateD3DColorToUByte4(CallInst *CI, IntrinsicOp IOP,
  530. OP::OpCode opcode,
  531. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  532. IRBuilder<> Builder(CI);
  533. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  534. Type *Ty = val->getType();
  535. // Use the same scaling factor used by FXC (i.e., 255.001953)
  536. // Excerpt from stackoverflow discussion:
  537. // "Built-in rounding, necessary because of truncation. 0.001953 * 256 = 0.5"
  538. Constant *toByteConst = ConstantFP::get(Ty->getScalarType(), 255.001953);
  539. if (Ty->isVectorTy()) {
  540. static constexpr int supportedVecElemCount = 4;
  541. if (Ty->getVectorNumElements() == supportedVecElemCount) {
  542. toByteConst = ConstantVector::getSplat(supportedVecElemCount, toByteConst);
  543. // Swizzle the input val -> val.zyxw
  544. std::vector<int> mask { 2, 1, 0, 3 };
  545. val = Builder.CreateShuffleVector(val, val, mask);
  546. } else {
  547. dxilutil::EmitErrorOnInstruction(CI, "Unsupported input type for intrinsic D3DColorToUByte4.");
  548. return UndefValue::get(CI->getType());
  549. }
  550. }
  551. Value *byte4 = Builder.CreateFMul(toByteConst, val);
  552. return Builder.CreateCast(Instruction::CastOps::FPToSI, byte4, CI->getType());
  553. }
  554. // Returns true if pow can be implemented using Fxc's mul-only code gen pattern.
  555. // Fxc uses the below rules when choosing mul-only code gen pattern to implement pow function.
  556. // Rule 1: Applicable only to power values in the range [INT32_MIN, INT32_MAX]
  557. // Rule 2: The maximum number of mul ops needed shouldn't exceed (2n+1) or (n+1) based on whether the power
  558. // is a positive or a negative value. Here "n" is the number of scalar elements in power.
  559. // Rule 3: Power must be an exact value.
  560. // +----------+---------------------+------------------+
  561. // | BaseType | IsExponentPositive | MaxMulOpsAllowed |
  562. // +----------+---------------------+------------------+
  563. // | float4x4 | True | 33 |
  564. // | float4x4 | False | 17 |
  565. // | float4x2 | True | 17 |
  566. // | float4x2 | False | 9 |
  567. // | float2x4 | True | 17 |
  568. // | float2x4 | False | 9 |
  569. // | float4 | True | 9 |
  570. // | float4 | False | 5 |
  571. // | float2 | True | 5 |
  572. // | float2 | False | 3 |
  573. // | float | True | 3 |
  574. // | float | False | 2 |
  575. // +----------+---------------------+------------------+
  576. bool CanUseFxcMulOnlyPatternForPow(IRBuilder<>& Builder, Value *x, Value *pow, int32_t& powI) {
  577. // Applicable only when power is a literal.
  578. if (!isa<ConstantDataVector>(pow) && !isa<ConstantFP>(pow)) {
  579. return false;
  580. }
  581. // Only apply this code gen on splat values.
  582. if (ConstantDataVector *cdv = dyn_cast<ConstantDataVector>(pow)) {
  583. if (!hlsl::dxilutil::IsSplat(cdv)) {
  584. return false;
  585. }
  586. }
  587. APFloat powAPF = isa<ConstantDataVector>(pow) ?
  588. cast<ConstantDataVector>(pow)->getElementAsAPFloat(0) : // should be a splat value
  589. cast<ConstantFP>(pow)->getValueAPF();
  590. APSInt powAPS(32, false);
  591. bool isExact = false;
  592. // Try converting float value of power to integer and also check if the float value is exact.
  593. APFloat::opStatus status = powAPF.convertToInteger(powAPS, APFloat::rmTowardZero, &isExact);
  594. if (status == APFloat::opStatus::opOK && isExact) {
  595. powI = powAPS.getExtValue();
  596. uint32_t powU = abs(powI);
  597. int setBitCount = 0;
  598. int maxBitSetPos = -1;
  599. for (int i = 0; i < 32; i++) {
  600. if ((powU >> i) & 1) {
  601. setBitCount++;
  602. maxBitSetPos = i;
  603. }
  604. }
  605. DXASSERT(maxBitSetPos <= 30, "msb should always be zero.");
  606. unsigned numElem = isa<ConstantDataVector>(pow) ? x->getType()->getVectorNumElements() : 1;
  607. int mulOpThreshold = powI < 0 ? numElem + 1 : 2 * numElem + 1;
  608. int mulOpNeeded = maxBitSetPos + setBitCount - 1;
  609. return mulOpNeeded <= mulOpThreshold;
  610. }
  611. return false;
  612. }
  613. Value *TranslatePowUsingFxcMulOnlyPattern(IRBuilder<>& Builder, Value *x, const int32_t y) {
  614. uint32_t absY = abs(y);
  615. // If y is zero then always return 1.
  616. if (absY == 0) {
  617. return ConstantFP::get(x->getType(), 1);
  618. }
  619. int lastSetPos = -1;
  620. Value *result = nullptr;
  621. Value *mul = nullptr;
  622. for (int i = 0; i < 32; i++) {
  623. if ((absY >> i) & 1) {
  624. for (int j = i; j > lastSetPos; j--) {
  625. if (!mul) {
  626. mul = x;
  627. }
  628. else {
  629. mul = Builder.CreateFMul(mul, mul);
  630. }
  631. }
  632. result = (result == nullptr) ? mul : Builder.CreateFMul(result, mul);
  633. lastSetPos = i;
  634. }
  635. }
  636. // Compute reciprocal for negative power values.
  637. if (y < 0) {
  638. Value* constOne = ConstantFP::get(x->getType(), 1);
  639. result = Builder.CreateFDiv(constOne, result);
  640. }
  641. return result;
  642. }
  643. Value *TranslatePowImpl(hlsl::OP *hlslOP, IRBuilder<>& Builder, Value *x, Value *y, bool isFXCCompatMode = false) {
  644. // As applicable implement pow using only mul ops as done by Fxc.
  645. int32_t p = 0;
  646. if (CanUseFxcMulOnlyPatternForPow(Builder, x, y, p)) {
  647. if (isFXCCompatMode) {
  648. return TranslatePowUsingFxcMulOnlyPattern(Builder, x, p);
  649. } else if (p == 2) {
  650. // Only take care 2 for it will not affect register pressure.
  651. return Builder.CreateFMul(x, x);
  652. }
  653. }
  654. // Default to log-mul-exp pattern if previous scenarios don't apply.
  655. // t = log(x);
  656. Value *logX =
  657. TrivialDxilUnaryOperation(DXIL::OpCode::Log, x, hlslOP, Builder);
  658. // t = y * t;
  659. Value *mulY = Builder.CreateFMul(logX, y);
  660. // pow = exp(t);
  661. return TrivialDxilUnaryOperation(DXIL::OpCode::Exp, mulY, hlslOP, Builder);
  662. }
  663. Value *TranslateAddUint64(CallInst *CI, IntrinsicOp IOP,
  664. OP::OpCode opcode,
  665. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  666. hlsl::OP *hlslOP = &helper.hlslOP;
  667. IRBuilder<> Builder(CI);
  668. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  669. Type *Ty = val->getType();
  670. VectorType *VT = dyn_cast<VectorType>(Ty);
  671. if (!VT) {
  672. dxilutil::EmitErrorOnInstruction(
  673. CI, "AddUint64 can only be applied to uint2 and uint4 operands.");
  674. return UndefValue::get(Ty);
  675. }
  676. unsigned size = VT->getNumElements();
  677. if (size != 2 && size != 4) {
  678. dxilutil::EmitErrorOnInstruction(
  679. CI, "AddUint64 can only be applied to uint2 and uint4 operands.");
  680. return UndefValue::get(Ty);
  681. }
  682. Value *op0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  683. Value *op1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  684. Value *RetVal = UndefValue::get(Ty);
  685. Function *AddC = hlslOP->GetOpFunc(DXIL::OpCode::UAddc, helper.i32Ty);
  686. Value *opArg = Builder.getInt32(static_cast<unsigned>(DXIL::OpCode::UAddc));
  687. for (unsigned i=0; i<size; i+=2) {
  688. Value *low0 = Builder.CreateExtractElement(op0, i);
  689. Value *low1 = Builder.CreateExtractElement(op1, i);
  690. Value *lowWithC = Builder.CreateCall(AddC, { opArg, low0, low1});
  691. Value *low = Builder.CreateExtractValue(lowWithC, 0);
  692. RetVal = Builder.CreateInsertElement(RetVal, low, i);
  693. Value *carry = Builder.CreateExtractValue(lowWithC, 1);
  694. // Ext i1 to i32
  695. carry = Builder.CreateZExt(carry, helper.i32Ty);
  696. Value *hi0 = Builder.CreateExtractElement(op0, i+1);
  697. Value *hi1 = Builder.CreateExtractElement(op1, i+1);
  698. Value *hi = Builder.CreateAdd(hi0, hi1);
  699. hi = Builder.CreateAdd(hi, carry);
  700. RetVal = Builder.CreateInsertElement(RetVal, hi, i+1);
  701. }
  702. return RetVal;
  703. }
  704. bool IsValidLoadInput(Value *V) {
  705. // Must be load input.
  706. // TODO: report this error on front-end
  707. if (!V || !isa<CallInst>(V)) {
  708. return false;
  709. }
  710. CallInst *CI = cast<CallInst>(V);
  711. // Must be immediate.
  712. ConstantInt *opArg =
  713. cast<ConstantInt>(CI->getArgOperand(DXIL::OperandIndex::kOpcodeIdx));
  714. DXIL::OpCode op = static_cast<DXIL::OpCode>(opArg->getLimitedValue());
  715. if (op != DXIL::OpCode::LoadInput) {
  716. return false;
  717. }
  718. return true;
  719. }
  720. // Tunnel through insert/extract element and shuffle to find original source
  721. // of scalar value, or specified element (vecIdx) of vector value.
  722. Value *FindScalarSource(Value *src, unsigned vecIdx = 0) {
  723. Type *srcTy = src->getType()->getScalarType();
  724. while (src && !isa<UndefValue>(src)) {
  725. if (src->getType()->isVectorTy()) {
  726. if (InsertElementInst *IE = dyn_cast<InsertElementInst>(src)) {
  727. unsigned curIdx = (unsigned)cast<ConstantInt>(IE->getOperand(2))
  728. ->getUniqueInteger().getLimitedValue();
  729. src = IE->getOperand( (curIdx == vecIdx) ? 1 : 0 );
  730. } else if (ShuffleVectorInst *SV = dyn_cast<ShuffleVectorInst>(src)) {
  731. int newIdx = SV->getMaskValue(vecIdx);
  732. if (newIdx < 0)
  733. return UndefValue::get(srcTy);
  734. vecIdx = (unsigned)newIdx;
  735. src = SV->getOperand(0);
  736. unsigned numElt = src->getType()->getVectorNumElements();
  737. if (numElt <= vecIdx) {
  738. vecIdx -= numElt;
  739. src = SV->getOperand(1);
  740. }
  741. } else {
  742. return UndefValue::get(srcTy); // Didn't find it.
  743. }
  744. } else {
  745. if (ExtractElementInst *EE = dyn_cast<ExtractElementInst>(src)) {
  746. vecIdx = (unsigned)cast<ConstantInt>(EE->getIndexOperand())
  747. ->getUniqueInteger().getLimitedValue();
  748. src = EE->getVectorOperand();
  749. } else if (hlsl::dxilutil::IsConvergentMarker(src)) {
  750. src = hlsl::dxilutil::GetConvergentSource(src);
  751. } else {
  752. break; // Found it.
  753. }
  754. }
  755. }
  756. return src;
  757. }
  758. // Finds corresponding inputs, calls translation for each, and returns
  759. // resulting vector or scalar.
  760. // Uses functor that takes (inputElemID, rowIdx, colIdx), and returns
  761. // translation for one input scalar.
  762. Value *TranslateEvalHelper(CallInst *CI, Value *val, IRBuilder<> &Builder,
  763. std::function<Value*(Value*, Value*, Value*)> fnTranslateScalarInput) {
  764. Type *Ty = CI->getType();
  765. Value *result = UndefValue::get(Ty);
  766. if (Ty->isVectorTy()) {
  767. for (unsigned i = 0; i < Ty->getVectorNumElements(); ++i) {
  768. Value *InputEl = FindScalarSource(val, i);
  769. if (!IsValidLoadInput(InputEl)) {
  770. dxilutil::EmitErrorOnInstruction(CI, "attribute evaluation can only be done "
  771. "on values taken directly from inputs.");
  772. return result;
  773. }
  774. CallInst *loadInput = cast<CallInst>(InputEl);
  775. Value *inputElemID = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx);
  776. Value *rowIdx = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
  777. Value *colIdx = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
  778. Value *Elt = fnTranslateScalarInput(inputElemID, rowIdx, colIdx);
  779. result = Builder.CreateInsertElement(result, Elt, i);
  780. }
  781. }
  782. else {
  783. Value *InputEl = FindScalarSource(val);
  784. if (!IsValidLoadInput(InputEl)) {
  785. dxilutil::EmitErrorOnInstruction(CI, "attribute evaluation can only be done "
  786. "on values taken directly from inputs.");
  787. return result;
  788. }
  789. CallInst *loadInput = cast<CallInst>(InputEl);
  790. Value *inputElemID = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx);
  791. Value *rowIdx = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
  792. Value *colIdx = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
  793. result = fnTranslateScalarInput(inputElemID, rowIdx, colIdx);
  794. }
  795. return result;
  796. }
  797. Value *TranslateEvalSample(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  798. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  799. hlsl::OP *hlslOP = &helper.hlslOP;
  800. Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  801. Value *sampleIdx = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  802. IRBuilder<> Builder(CI);
  803. OP::OpCode opcode = OP::OpCode::EvalSampleIndex;
  804. Value *opArg = hlslOP->GetU32Const((unsigned)opcode);
  805. Function *evalFunc = hlslOP->GetOpFunc(opcode, CI->getType()->getScalarType());
  806. return TranslateEvalHelper(CI, val, Builder,
  807. [&](Value *inputElemID, Value *rowIdx, Value *colIdx) -> Value* {
  808. return Builder.CreateCall(evalFunc, { opArg, inputElemID, rowIdx, colIdx, sampleIdx });
  809. }
  810. );
  811. }
  812. Value *TranslateEvalSnapped(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  813. HLOperationLowerHelper &helper,
  814. HLObjectOperationLowerHelper *pObjHelper,
  815. bool &Translated) {
  816. hlsl::OP *hlslOP = &helper.hlslOP;
  817. Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  818. Value *offset = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  819. IRBuilder<> Builder(CI);
  820. Value *offsetX = Builder.CreateExtractElement(offset, (uint64_t)0);
  821. Value *offsetY = Builder.CreateExtractElement(offset, 1);
  822. OP::OpCode opcode = OP::OpCode::EvalSnapped;
  823. Value *opArg = hlslOP->GetU32Const((unsigned)opcode);
  824. Function *evalFunc = hlslOP->GetOpFunc(opcode, CI->getType()->getScalarType());
  825. return TranslateEvalHelper(CI, val, Builder,
  826. [&](Value *inputElemID, Value *rowIdx, Value *colIdx) -> Value* {
  827. return Builder.CreateCall(evalFunc, { opArg, inputElemID, rowIdx, colIdx, offsetX, offsetY });
  828. }
  829. );
  830. }
  831. Value *TranslateEvalCentroid(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  832. HLOperationLowerHelper &helper,
  833. HLObjectOperationLowerHelper *pObjHelper,
  834. bool &Translated) {
  835. hlsl::OP *hlslOP = &helper.hlslOP;
  836. Value *val = CI->getArgOperand(DXIL::OperandIndex::kUnarySrc0OpIdx);
  837. IRBuilder<> Builder(CI);
  838. OP::OpCode opcode = OP::OpCode::EvalCentroid;
  839. Value *opArg = hlslOP->GetU32Const((unsigned)opcode);
  840. Function *evalFunc = hlslOP->GetOpFunc(opcode, CI->getType()->getScalarType());
  841. return TranslateEvalHelper(CI, val, Builder,
  842. [&](Value *inputElemID, Value *rowIdx, Value *colIdx) -> Value* {
  843. return Builder.CreateCall(evalFunc, { opArg, inputElemID, rowIdx, colIdx });
  844. }
  845. );
  846. }
  847. Value *TranslateGetAttributeAtVertex(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  848. HLOperationLowerHelper &helper,
  849. HLObjectOperationLowerHelper *pObjHelper,
  850. bool &Translated) {
  851. DXASSERT(op == OP::OpCode::AttributeAtVertex, "Wrong opcode to translate");
  852. hlsl::OP *hlslOP = &helper.hlslOP;
  853. IRBuilder<> Builder(CI);
  854. Value *val = CI->getArgOperand(DXIL::OperandIndex::kBinarySrc0OpIdx);
  855. Value *vertexIdx = CI->getArgOperand(DXIL::OperandIndex::kBinarySrc1OpIdx);
  856. Value *vertexI8Idx = Builder.CreateTrunc(vertexIdx, Type::getInt8Ty(CI->getContext()));
  857. Value *opArg = hlslOP->GetU32Const((unsigned)op);
  858. Function *evalFunc = hlslOP->GetOpFunc(op, val->getType()->getScalarType());
  859. return TranslateEvalHelper(CI, val, Builder,
  860. [&](Value *inputElemID, Value *rowIdx, Value *colIdx) -> Value* {
  861. return Builder.CreateCall(evalFunc, { opArg, inputElemID, rowIdx, colIdx, vertexI8Idx });
  862. }
  863. );
  864. }
  865. Value *TrivialNoArgOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  866. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  867. hlsl::OP *hlslOP = &helper.hlslOP;
  868. Type *Ty = Type::getVoidTy(CI->getContext());
  869. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  870. Value *args[] = {opArg};
  871. IRBuilder<> Builder(CI);
  872. Value *dxilOp = TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
  873. return dxilOp;
  874. }
  875. Value *TrivialNoArgWithRetOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  876. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  877. hlsl::OP *hlslOP = &helper.hlslOP;
  878. Type *Ty = CI->getType();
  879. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  880. Value *args[] = {opArg};
  881. IRBuilder<> Builder(CI);
  882. Value *dxilOp = TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
  883. return dxilOp;
  884. }
  885. Value *TranslateGetRTSamplePos(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  886. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  887. hlsl::OP *hlslOP = &helper.hlslOP;
  888. OP::OpCode opcode = OP::OpCode::RenderTargetGetSamplePosition;
  889. IRBuilder<> Builder(CI);
  890. Type *Ty = Type::getVoidTy(CI->getContext());
  891. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  892. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  893. Value *args[] = {opArg, val};
  894. Value *samplePos =
  895. TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
  896. Value *result = UndefValue::get(CI->getType());
  897. Value *samplePosX = Builder.CreateExtractValue(samplePos, 0);
  898. Value *samplePosY = Builder.CreateExtractValue(samplePos, 1);
  899. result = Builder.CreateInsertElement(result, samplePosX, (uint64_t)0);
  900. result = Builder.CreateInsertElement(result, samplePosY, 1);
  901. return result;
  902. }
  903. // val QuadReadLaneAt(val, uint);
  904. Value *TranslateQuadReadLaneAt(CallInst *CI, IntrinsicOp IOP,
  905. OP::OpCode opcode,
  906. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  907. hlsl::OP *hlslOP = &helper.hlslOP;
  908. Value *refArgs[] = {nullptr, CI->getOperand(1), CI->getOperand(2)};
  909. return TrivialDxilOperation(DXIL::OpCode::QuadReadLaneAt, refArgs,
  910. CI->getOperand(1)->getType(), CI, hlslOP);
  911. }
  912. // Wave intrinsics of the form fn(val,QuadOpKind)->val
  913. Value *TranslateQuadReadAcross(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  914. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  915. hlsl::OP *hlslOP = &helper.hlslOP;
  916. DXIL::QuadOpKind opKind;
  917. switch (IOP) {
  918. case IntrinsicOp::IOP_QuadReadAcrossX: opKind = DXIL::QuadOpKind::ReadAcrossX; break;
  919. case IntrinsicOp::IOP_QuadReadAcrossY: opKind = DXIL::QuadOpKind::ReadAcrossY; break;
  920. default: DXASSERT_NOMSG(IOP == IntrinsicOp::IOP_QuadReadAcrossDiagonal);
  921. case IntrinsicOp::IOP_QuadReadAcrossDiagonal: opKind = DXIL::QuadOpKind::ReadAcrossDiagonal; break;
  922. }
  923. Constant *OpArg = hlslOP->GetI8Const((unsigned)opKind);
  924. Value *refArgs[] = {nullptr, CI->getOperand(1), OpArg};
  925. return TrivialDxilOperation(DXIL::OpCode::QuadOp, refArgs,
  926. CI->getOperand(1)->getType(), CI, hlslOP);
  927. }
  928. // WaveAllEqual(val<n>)->bool<n>
  929. Value *TranslateWaveAllEqual(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  930. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  931. hlsl::OP *hlslOP = &helper.hlslOP;
  932. Value *src = CI->getArgOperand(HLOperandIndex::kWaveAllEqualValueOpIdx);
  933. IRBuilder<> Builder(CI);
  934. Type *Ty = src->getType();
  935. Type *RetTy = Type::getInt1Ty(CI->getContext());
  936. if (Ty->isVectorTy())
  937. RetTy = VectorType::get(RetTy, Ty->getVectorNumElements());
  938. Constant *opArg = hlslOP->GetU32Const((unsigned)DXIL::OpCode::WaveActiveAllEqual);
  939. Value *args[] = {opArg, src};
  940. return TrivialDxilOperation(DXIL::OpCode::WaveActiveAllEqual, args, Ty, RetTy,
  941. hlslOP, Builder);
  942. }
  943. // WaveMatch(val<n>)->uint4
  944. Value *TranslateWaveMatch(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opc,
  945. HLOperationLowerHelper &Helper,
  946. HLObjectOperationLowerHelper *ObjHelper,
  947. bool &Translated) {
  948. hlsl::OP *Op = &Helper.hlslOP;
  949. IRBuilder<> Builder(CI);
  950. // Generate a dx.op.waveMatch call for each scalar in the input, and perform
  951. // a bitwise AND between each result to derive the final bitmask in the case
  952. // of vector inputs.
  953. // (1) Collect the list of all scalar inputs (e.g. decompose vectors)
  954. SmallVector<Value *, 4> ScalarInputs;
  955. Value *Val = CI->getArgOperand(1);
  956. Type *ValTy = Val->getType();
  957. Type *EltTy = ValTy->getScalarType();
  958. if (ValTy->isVectorTy()) {
  959. for (uint64_t i = 0, e = ValTy->getVectorNumElements(); i != e; ++i) {
  960. Value *Elt = Builder.CreateExtractElement(Val, i);
  961. ScalarInputs.push_back(Elt);
  962. }
  963. } else {
  964. ScalarInputs.push_back(Val);
  965. }
  966. Value *Res = nullptr;
  967. Constant *OpcArg = Op->GetU32Const((unsigned)DXIL::OpCode::WaveMatch);
  968. Value *Fn = Op->GetOpFunc(OP::OpCode::WaveMatch, EltTy);
  969. // (2) For each scalar, emit a call to dx.op.waveMatch. If this is not the
  970. // first scalar, then AND the result with the accumulator.
  971. for (unsigned i = 0, e = ScalarInputs.size(); i != e; ++i) {
  972. Value *Args[] = { OpcArg, ScalarInputs[i] };
  973. Value *Call = Builder.CreateCall(Fn, Args);
  974. if (Res) {
  975. // Generate bitwise AND of the components
  976. for (unsigned j = 0; j != 4; ++j) {
  977. Value *ResVal = Builder.CreateExtractValue(Res, j);
  978. Value *CallVal = Builder.CreateExtractValue(Call, j);
  979. Value *And = Builder.CreateAnd(ResVal, CallVal);
  980. Res = Builder.CreateInsertValue(Res, And, j);
  981. }
  982. } else {
  983. Res = Call;
  984. }
  985. }
  986. // (3) Convert the final aggregate into a vector to make the types match
  987. Value *ResVec = UndefValue::get(CI->getType());
  988. for (unsigned i = 0; i != 4; ++i) {
  989. Value *Elt = Builder.CreateExtractValue(Res, i);
  990. ResVec = Builder.CreateInsertElement(ResVec, Elt, i);
  991. }
  992. return ResVec;
  993. }
  994. // Wave intrinsics of the form fn(valA)->valB, where no overloading takes place
  995. Value *TranslateWaveA2B(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  996. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  997. hlsl::OP *hlslOP = &helper.hlslOP;
  998. Value *refArgs[] = {nullptr, CI->getOperand(1)};
  999. return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP);
  1000. }
  1001. // Wave ballot intrinsic.
  1002. Value *TranslateWaveBallot(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1003. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1004. // The high-level operation is uint4 ballot(i1).
  1005. // The DXIL operation is struct.u4 ballot(i1).
  1006. // To avoid updating users with more than a simple replace, we translate into
  1007. // a call into struct.u4, then reassemble the vector.
  1008. // Scalarization and constant propagation take care of cleanup.
  1009. IRBuilder<> B(CI);
  1010. // Make the DXIL call itself.
  1011. hlsl::OP *hlslOP = &helper.hlslOP;
  1012. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  1013. Value *refArgs[] = { opArg, CI->getOperand(1) };
  1014. Function *dxilFunc = hlslOP->GetOpFunc(opcode, Type::getVoidTy(CI->getContext()));
  1015. Value *dxilVal = B.CreateCall(dxilFunc, refArgs, hlslOP->GetOpCodeName(opcode));
  1016. // Assign from the call results into a vector.
  1017. Type *ResTy = CI->getType();
  1018. DXASSERT_NOMSG(ResTy->isVectorTy() && ResTy->getVectorNumElements() == 4);
  1019. DXASSERT_NOMSG(dxilVal->getType()->isStructTy() &&
  1020. dxilVal->getType()->getNumContainedTypes() == 4);
  1021. // 'x' component is the first vector element, highest bits.
  1022. Value *ResVal = llvm::UndefValue::get(ResTy);
  1023. for (unsigned Idx = 0; Idx < 4; ++Idx) {
  1024. ResVal = B.CreateInsertElement(
  1025. ResVal, B.CreateExtractValue(dxilVal, ArrayRef<unsigned>(Idx)), Idx);
  1026. }
  1027. return ResVal;
  1028. }
  1029. static bool WaveIntrinsicNeedsSign(OP::OpCode opcode) {
  1030. return opcode == OP::OpCode::WaveActiveOp ||
  1031. opcode == OP::OpCode::WavePrefixOp;
  1032. }
  1033. static unsigned WaveIntrinsicToSignedOpKind(IntrinsicOp IOP) {
  1034. if (IOP == IntrinsicOp::IOP_WaveActiveUMax ||
  1035. IOP == IntrinsicOp::IOP_WaveActiveUMin ||
  1036. IOP == IntrinsicOp::IOP_WaveActiveUSum ||
  1037. IOP == IntrinsicOp::IOP_WaveActiveUProduct ||
  1038. IOP == IntrinsicOp::IOP_WaveMultiPrefixUProduct ||
  1039. IOP == IntrinsicOp::IOP_WaveMultiPrefixUSum ||
  1040. IOP == IntrinsicOp::IOP_WavePrefixUSum ||
  1041. IOP == IntrinsicOp::IOP_WavePrefixUProduct)
  1042. return (unsigned)DXIL::SignedOpKind::Unsigned;
  1043. return (unsigned)DXIL::SignedOpKind::Signed;
  1044. }
  1045. static unsigned WaveIntrinsicToOpKind(IntrinsicOp IOP) {
  1046. switch (IOP) {
  1047. // Bit operations.
  1048. case IntrinsicOp::IOP_WaveActiveBitOr:
  1049. return (unsigned)DXIL::WaveBitOpKind::Or;
  1050. case IntrinsicOp::IOP_WaveActiveBitAnd:
  1051. return (unsigned)DXIL::WaveBitOpKind::And;
  1052. case IntrinsicOp::IOP_WaveActiveBitXor:
  1053. return (unsigned)DXIL::WaveBitOpKind::Xor;
  1054. // Prefix operations.
  1055. case IntrinsicOp::IOP_WavePrefixSum:
  1056. case IntrinsicOp::IOP_WavePrefixUSum:
  1057. return (unsigned)DXIL::WaveOpKind::Sum;
  1058. case IntrinsicOp::IOP_WavePrefixProduct:
  1059. case IntrinsicOp::IOP_WavePrefixUProduct:
  1060. return (unsigned)DXIL::WaveOpKind::Product;
  1061. // Numeric operations.
  1062. case IntrinsicOp::IOP_WaveActiveMax:
  1063. case IntrinsicOp::IOP_WaveActiveUMax:
  1064. return (unsigned)DXIL::WaveOpKind::Max;
  1065. case IntrinsicOp::IOP_WaveActiveMin:
  1066. case IntrinsicOp::IOP_WaveActiveUMin:
  1067. return (unsigned)DXIL::WaveOpKind::Min;
  1068. case IntrinsicOp::IOP_WaveActiveSum:
  1069. case IntrinsicOp::IOP_WaveActiveUSum:
  1070. return (unsigned)DXIL::WaveOpKind::Sum;
  1071. case IntrinsicOp::IOP_WaveActiveProduct:
  1072. case IntrinsicOp::IOP_WaveActiveUProduct:
  1073. // MultiPrefix operations
  1074. case IntrinsicOp::IOP_WaveMultiPrefixBitAnd:
  1075. return (unsigned)DXIL::WaveMultiPrefixOpKind::And;
  1076. case IntrinsicOp::IOP_WaveMultiPrefixBitOr:
  1077. return (unsigned)DXIL::WaveMultiPrefixOpKind::Or;
  1078. case IntrinsicOp::IOP_WaveMultiPrefixBitXor:
  1079. return (unsigned)DXIL::WaveMultiPrefixOpKind::Xor;
  1080. case IntrinsicOp::IOP_WaveMultiPrefixProduct:
  1081. case IntrinsicOp::IOP_WaveMultiPrefixUProduct:
  1082. return (unsigned)DXIL::WaveMultiPrefixOpKind::Product;
  1083. case IntrinsicOp::IOP_WaveMultiPrefixSum:
  1084. case IntrinsicOp::IOP_WaveMultiPrefixUSum:
  1085. return (unsigned)DXIL::WaveMultiPrefixOpKind::Sum;
  1086. default:
  1087. DXASSERT(IOP == IntrinsicOp::IOP_WaveActiveProduct ||
  1088. IOP == IntrinsicOp::IOP_WaveActiveUProduct,
  1089. "else caller passed incorrect value");
  1090. return (unsigned)DXIL::WaveOpKind::Product;
  1091. }
  1092. }
  1093. // Wave intrinsics of the form fn(valA)->valA
  1094. Value *TranslateWaveA2A(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1095. HLOperationLowerHelper &helper,
  1096. HLObjectOperationLowerHelper *pObjHelper,
  1097. bool &Translated) {
  1098. hlsl::OP *hlslOP = &helper.hlslOP;
  1099. Constant *kindValInt = hlslOP->GetI8Const(WaveIntrinsicToOpKind(IOP));
  1100. Constant *signValInt = hlslOP->GetI8Const(WaveIntrinsicToSignedOpKind(IOP));
  1101. Value *refArgs[] = {nullptr, CI->getOperand(1), kindValInt, signValInt};
  1102. unsigned refArgCount = _countof(refArgs);
  1103. if (!WaveIntrinsicNeedsSign(opcode))
  1104. refArgCount--;
  1105. return TrivialDxilOperation(opcode,
  1106. llvm::ArrayRef<Value *>(refArgs, refArgCount),
  1107. CI->getOperand(1)->getType(), CI, hlslOP);
  1108. }
  1109. // WaveMultiPrefixOP(val<n>, mask) -> val<n>
  1110. Value *TranslateWaveMultiPrefix(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opc,
  1111. HLOperationLowerHelper &Helper,
  1112. HLObjectOperationLowerHelper *ObjHelper,
  1113. bool &Translated) {
  1114. hlsl::OP *Op = &Helper.hlslOP;
  1115. Constant *KindValInt = Op->GetI8Const(WaveIntrinsicToOpKind(IOP));
  1116. Constant *SignValInt = Op->GetI8Const(WaveIntrinsicToSignedOpKind(IOP));
  1117. // Decompose mask into scalars
  1118. IRBuilder<> Builder(CI);
  1119. Value *Mask = CI->getArgOperand(2);
  1120. Value *Mask0 = Builder.CreateExtractElement(Mask, (uint64_t)0);
  1121. Value *Mask1 = Builder.CreateExtractElement(Mask, (uint64_t)1);
  1122. Value *Mask2 = Builder.CreateExtractElement(Mask, (uint64_t)2);
  1123. Value *Mask3 = Builder.CreateExtractElement(Mask, (uint64_t)3);
  1124. Value *Args[] = { nullptr, CI->getOperand(1),
  1125. Mask0, Mask1, Mask2, Mask3, KindValInt, SignValInt };
  1126. return TrivialDxilOperation(Opc, Args, CI->getOperand(1)->getType(), CI, Op);
  1127. }
  1128. // WaveMultiPrefixBitCount(i1, mask) -> i32
  1129. Value *TranslateWaveMultiPrefixBitCount(CallInst *CI, IntrinsicOp IOP,
  1130. OP::OpCode Opc,
  1131. HLOperationLowerHelper &Helper,
  1132. HLObjectOperationLowerHelper *ObjHelper,
  1133. bool &Translated) {
  1134. hlsl::OP *Op = &Helper.hlslOP;
  1135. // Decompose mask into scalars
  1136. IRBuilder<> Builder(CI);
  1137. Value *Mask = CI->getArgOperand(2);
  1138. Value *Mask0 = Builder.CreateExtractElement(Mask, (uint64_t)0);
  1139. Value *Mask1 = Builder.CreateExtractElement(Mask, (uint64_t)1);
  1140. Value *Mask2 = Builder.CreateExtractElement(Mask, (uint64_t)2);
  1141. Value *Mask3 = Builder.CreateExtractElement(Mask, (uint64_t)3);
  1142. Value *Args[] = { nullptr, CI->getOperand(1), Mask0, Mask1, Mask2, Mask3 };
  1143. return TrivialDxilOperation(Opc, Args, Helper.voidTy, CI, Op);
  1144. }
  1145. // Wave intrinsics of the form fn()->val
  1146. Value *TranslateWaveToVal(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1147. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1148. hlsl::OP *hlslOP = &helper.hlslOP;
  1149. Value *refArgs[] = {nullptr};
  1150. return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP);
  1151. }
  1152. // Wave intrinsics of the form fn(val,lane)->val
  1153. Value *TranslateWaveReadLaneAt(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1154. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1155. hlsl::OP *hlslOP = &helper.hlslOP;
  1156. Value *refArgs[] = {nullptr, CI->getOperand(1), CI->getOperand(2)};
  1157. return TrivialDxilOperation(DXIL::OpCode::WaveReadLaneAt, refArgs,
  1158. CI->getOperand(1)->getType(), CI, hlslOP);
  1159. }
  1160. // Wave intrinsics of the form fn(val)->val
  1161. Value *TranslateWaveReadLaneFirst(CallInst *CI, IntrinsicOp IOP,
  1162. OP::OpCode opcode,
  1163. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1164. hlsl::OP *hlslOP = &helper.hlslOP;
  1165. Value *refArgs[] = {nullptr, CI->getOperand(1)};
  1166. return TrivialDxilOperation(DXIL::OpCode::WaveReadLaneFirst, refArgs,
  1167. CI->getOperand(1)->getType(), CI, hlslOP);
  1168. }
  1169. Value *TranslateAbs(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1170. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1171. hlsl::OP *hlslOP = &helper.hlslOP;
  1172. Type *pOverloadTy = CI->getType()->getScalarType();
  1173. if (pOverloadTy->isFloatingPointTy()) {
  1174. Value *refArgs[] = {nullptr, CI->getOperand(1)};
  1175. return TrivialDxilOperation(DXIL::OpCode::FAbs, refArgs, CI->getType(), CI,
  1176. hlslOP);
  1177. } else {
  1178. Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1179. IRBuilder<> Builder(CI);
  1180. Value *neg = Builder.CreateNeg(src);
  1181. return TrivialDxilBinaryOperation(DXIL::OpCode::IMax, src, neg, hlslOP,
  1182. Builder);
  1183. }
  1184. }
  1185. Value *TranslateUAbs(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1186. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1187. return CI->getOperand(HLOperandIndex::kUnaryOpSrc0Idx); // No-op
  1188. }
  1189. Value *GenerateCmpNEZero(Value *val, IRBuilder<> Builder) {
  1190. Type *Ty = val->getType();
  1191. Type *EltTy = Ty->getScalarType();
  1192. Constant *zero = nullptr;
  1193. if (EltTy->isFloatingPointTy())
  1194. zero = ConstantFP::get(EltTy, 0);
  1195. else
  1196. zero = ConstantInt::get(EltTy, 0);
  1197. if (Ty != EltTy) {
  1198. zero = ConstantVector::getSplat(Ty->getVectorNumElements(), zero);
  1199. }
  1200. if (EltTy->isFloatingPointTy())
  1201. return Builder.CreateFCmpUNE(val, zero);
  1202. else
  1203. return Builder.CreateICmpNE(val, zero);
  1204. }
  1205. Value *TranslateAllForValue(Value *val, IRBuilder<> &Builder) {
  1206. Value *cond = GenerateCmpNEZero(val, Builder);
  1207. Type *Ty = val->getType();
  1208. Type *EltTy = Ty->getScalarType();
  1209. if (Ty != EltTy) {
  1210. Value *Result = Builder.CreateExtractElement(cond, (uint64_t)0);
  1211. for (unsigned i = 1; i < Ty->getVectorNumElements(); i++) {
  1212. Value *Elt = Builder.CreateExtractElement(cond, i);
  1213. Result = Builder.CreateAnd(Result, Elt);
  1214. }
  1215. return Result;
  1216. } else
  1217. return cond;
  1218. }
  1219. Value *TranslateAll(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1220. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1221. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1222. IRBuilder<> Builder(CI);
  1223. return TranslateAllForValue(val, Builder);
  1224. }
  1225. Value *TranslateAny(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1226. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1227. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1228. IRBuilder<> Builder(CI);
  1229. Value *cond = GenerateCmpNEZero(val, Builder);
  1230. Type *Ty = val->getType();
  1231. Type *EltTy = Ty->getScalarType();
  1232. if (Ty != EltTy) {
  1233. Value *Result = Builder.CreateExtractElement(cond, (uint64_t)0);
  1234. for (unsigned i = 1; i < Ty->getVectorNumElements(); i++) {
  1235. Value *Elt = Builder.CreateExtractElement(cond, i);
  1236. Result = Builder.CreateOr(Result, Elt);
  1237. }
  1238. return Result;
  1239. } else
  1240. return cond;
  1241. }
  1242. Value *TranslateBitcast(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1243. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1244. Type *Ty = CI->getType();
  1245. Value *op = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1246. IRBuilder<> Builder(CI);
  1247. return Builder.CreateBitCast(op, Ty);
  1248. }
  1249. Value *TranslateDoubleAsUint(Value *x, Value *lo, Value *hi,
  1250. IRBuilder<> &Builder, hlsl::OP *hlslOP) {
  1251. Type *Ty = x->getType();
  1252. Type *outTy = lo->getType()->getPointerElementType();
  1253. DXIL::OpCode opcode = DXIL::OpCode::SplitDouble;
  1254. Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
  1255. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
  1256. if (Ty->isVectorTy()) {
  1257. Value *retValLo = llvm::UndefValue::get(outTy);
  1258. Value *retValHi = llvm::UndefValue::get(outTy);
  1259. unsigned vecSize = Ty->getVectorNumElements();
  1260. for (unsigned i = 0; i < vecSize; i++) {
  1261. Value *Elt = Builder.CreateExtractElement(x, i);
  1262. Value *EltOP = Builder.CreateCall(dxilFunc, {opArg, Elt},
  1263. hlslOP->GetOpCodeName(opcode));
  1264. Value *EltLo = Builder.CreateExtractValue(EltOP, 0);
  1265. retValLo = Builder.CreateInsertElement(retValLo, EltLo, i);
  1266. Value *EltHi = Builder.CreateExtractValue(EltOP, 1);
  1267. retValHi = Builder.CreateInsertElement(retValHi, EltHi, i);
  1268. }
  1269. Builder.CreateStore(retValLo, lo);
  1270. Builder.CreateStore(retValHi, hi);
  1271. } else {
  1272. Value *retVal =
  1273. Builder.CreateCall(dxilFunc, {opArg, x}, hlslOP->GetOpCodeName(opcode));
  1274. Value *retValLo = Builder.CreateExtractValue(retVal, 0);
  1275. Value *retValHi = Builder.CreateExtractValue(retVal, 1);
  1276. Builder.CreateStore(retValLo, lo);
  1277. Builder.CreateStore(retValHi, hi);
  1278. }
  1279. return nullptr;
  1280. }
  1281. Value *TranslateAsUint(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1282. HLOperationLowerHelper &helper,
  1283. HLObjectOperationLowerHelper *pObjHelper,
  1284. bool &Translated) {
  1285. if (CI->getNumArgOperands() == 2) {
  1286. return TranslateBitcast(CI, IOP, opcode, helper, pObjHelper, Translated);
  1287. } else {
  1288. DXASSERT_NOMSG(CI->getNumArgOperands() == 4);
  1289. hlsl::OP *hlslOP = &helper.hlslOP;
  1290. Value *x = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  1291. DXASSERT_NOMSG(x->getType()->getScalarType()->isDoubleTy());
  1292. Value *lo = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  1293. Value *hi = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  1294. IRBuilder<> Builder(CI);
  1295. return TranslateDoubleAsUint(x, lo, hi, Builder, hlslOP);
  1296. }
  1297. }
  1298. Value *TranslateAsDouble(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1299. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1300. hlsl::OP *hlslOP = &helper.hlslOP;
  1301. Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1302. Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1303. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
  1304. IRBuilder<> Builder(CI);
  1305. return TrivialDxilOperation(opcode, { opArg, x, y }, CI->getType(), CI->getType(), hlslOP, Builder);
  1306. }
  1307. Value *TranslateAtan2(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1308. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1309. hlsl::OP *hlslOP = &helper.hlslOP;
  1310. Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1311. Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1312. IRBuilder<> Builder(CI);
  1313. Value *tan = Builder.CreateFDiv(y, x);
  1314. Value *atan =
  1315. TrivialDxilUnaryOperation(OP::OpCode::Atan, tan, hlslOP, Builder);
  1316. // Modify atan result based on https://en.wikipedia.org/wiki/Atan2.
  1317. Type *Ty = x->getType();
  1318. Constant *pi = ConstantFP::get(Ty->getScalarType(), M_PI);
  1319. Constant *halfPi = ConstantFP::get(Ty->getScalarType(), M_PI / 2);
  1320. Constant *negHalfPi = ConstantFP::get(Ty->getScalarType(), -M_PI / 2);
  1321. Constant *zero = ConstantFP::get(Ty->getScalarType(), 0);
  1322. if (Ty->isVectorTy()) {
  1323. unsigned vecSize = Ty->getVectorNumElements();
  1324. pi = ConstantVector::getSplat(vecSize, pi);
  1325. halfPi = ConstantVector::getSplat(vecSize, halfPi);
  1326. negHalfPi = ConstantVector::getSplat(vecSize, negHalfPi);
  1327. zero = ConstantVector::getSplat(vecSize, zero);
  1328. }
  1329. Value *atanAddPi = Builder.CreateFAdd(atan, pi);
  1330. Value *atanSubPi = Builder.CreateFSub(atan, pi);
  1331. // x > 0 -> atan.
  1332. Value *result = atan;
  1333. Value *xLt0 = Builder.CreateFCmpOLT(x, zero);
  1334. Value *xEq0 = Builder.CreateFCmpOEQ(x, zero);
  1335. Value *yGe0 = Builder.CreateFCmpOGE(y, zero);
  1336. Value *yLt0 = Builder.CreateFCmpOLT(y, zero);
  1337. // x < 0, y >= 0 -> atan + pi.
  1338. Value *xLt0AndyGe0 = Builder.CreateAnd(xLt0, yGe0);
  1339. result = Builder.CreateSelect(xLt0AndyGe0, atanAddPi, result);
  1340. // x < 0, y < 0 -> atan - pi.
  1341. Value *xLt0AndYLt0 = Builder.CreateAnd(xLt0, yLt0);
  1342. result = Builder.CreateSelect(xLt0AndYLt0, atanSubPi, result);
  1343. // x == 0, y < 0 -> -pi/2
  1344. Value *xEq0AndYLt0 = Builder.CreateAnd(xEq0, yLt0);
  1345. result = Builder.CreateSelect(xEq0AndYLt0, negHalfPi, result);
  1346. // x == 0, y > 0 -> pi/2
  1347. Value *xEq0AndYGe0 = Builder.CreateAnd(xEq0, yGe0);
  1348. result = Builder.CreateSelect(xEq0AndYGe0, halfPi, result);
  1349. return result;
  1350. }
  1351. Value *TranslateClamp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1352. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1353. hlsl::OP *hlslOP = &helper.hlslOP;
  1354. Type *Ty = CI->getType();
  1355. Type *EltTy = Ty->getScalarType();
  1356. DXIL::OpCode maxOp = DXIL::OpCode::FMax;
  1357. DXIL::OpCode minOp = DXIL::OpCode::FMin;
  1358. if (IOP == IntrinsicOp::IOP_uclamp) {
  1359. maxOp = DXIL::OpCode::UMax;
  1360. minOp = DXIL::OpCode::UMin;
  1361. } else if (EltTy->isIntegerTy()) {
  1362. maxOp = DXIL::OpCode::IMax;
  1363. minOp = DXIL::OpCode::IMin;
  1364. }
  1365. Value *x = CI->getArgOperand(HLOperandIndex::kClampOpXIdx);
  1366. Value *maxVal = CI->getArgOperand(HLOperandIndex::kClampOpMaxIdx);
  1367. Value *minVal = CI->getArgOperand(HLOperandIndex::kClampOpMinIdx);
  1368. IRBuilder<> Builder(CI);
  1369. // min(max(x, minVal), maxVal).
  1370. Value *maxXMinVal =
  1371. TrivialDxilBinaryOperation(maxOp, x, minVal, hlslOP, Builder);
  1372. return TrivialDxilBinaryOperation(minOp, maxXMinVal, maxVal, hlslOP, Builder);
  1373. }
  1374. Value *TranslateClip(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1375. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1376. hlsl::OP *hlslOP = &helper.hlslOP;
  1377. Function *discard =
  1378. hlslOP->GetOpFunc(OP::OpCode::Discard, Type::getVoidTy(CI->getContext()));
  1379. IRBuilder<> Builder(CI);
  1380. Value *cond = nullptr;
  1381. Value *arg = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1382. if (VectorType *VT = dyn_cast<VectorType>(arg->getType())) {
  1383. Value *elt = Builder.CreateExtractElement(arg, (uint64_t)0);
  1384. cond = Builder.CreateFCmpOLT(elt, hlslOP->GetFloatConst(0));
  1385. for (unsigned i = 1; i < VT->getNumElements(); i++) {
  1386. Value *elt = Builder.CreateExtractElement(arg, i);
  1387. Value *eltCond = Builder.CreateFCmpOLT(elt, hlslOP->GetFloatConst(0));
  1388. cond = Builder.CreateOr(cond, eltCond);
  1389. }
  1390. } else
  1391. cond = Builder.CreateFCmpOLT(arg, hlslOP->GetFloatConst(0));
  1392. /*If discard condition evaluates to false at compile-time, then
  1393. don't emit the discard instruction.*/
  1394. if (ConstantInt *constCond = dyn_cast<ConstantInt>(cond))
  1395. if (!constCond->getLimitedValue())
  1396. return nullptr;
  1397. Constant *opArg = hlslOP->GetU32Const((unsigned)OP::OpCode::Discard);
  1398. Builder.CreateCall(discard, {opArg, cond});
  1399. return nullptr;
  1400. }
  1401. Value *TranslateCross(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1402. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1403. VectorType *VT = cast<VectorType>(CI->getType());
  1404. DXASSERT_NOMSG(VT->getNumElements() == 3);
  1405. Value *op0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1406. Value *op1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1407. IRBuilder<> Builder(CI);
  1408. Value *op0_x = Builder.CreateExtractElement(op0, (uint64_t)0);
  1409. Value *op0_y = Builder.CreateExtractElement(op0, 1);
  1410. Value *op0_z = Builder.CreateExtractElement(op0, 2);
  1411. Value *op1_x = Builder.CreateExtractElement(op1, (uint64_t)0);
  1412. Value *op1_y = Builder.CreateExtractElement(op1, 1);
  1413. Value *op1_z = Builder.CreateExtractElement(op1, 2);
  1414. auto MulSub = [&](Value *x0, Value *y0, Value *x1, Value *y1) -> Value * {
  1415. Value *xy = Builder.CreateFMul(x0, y1);
  1416. Value *yx = Builder.CreateFMul(y0, x1);
  1417. return Builder.CreateFSub(xy, yx);
  1418. };
  1419. Value *yz_zy = MulSub(op0_y, op0_z, op1_y, op1_z);
  1420. Value *zx_xz = MulSub(op0_z, op0_x, op1_z, op1_x);
  1421. Value *xy_yx = MulSub(op0_x, op0_y, op1_x, op1_y);
  1422. Value *cross = UndefValue::get(VT);
  1423. cross = Builder.CreateInsertElement(cross, yz_zy, (uint64_t)0);
  1424. cross = Builder.CreateInsertElement(cross, zx_xz, 1);
  1425. cross = Builder.CreateInsertElement(cross, xy_yx, 2);
  1426. return cross;
  1427. }
  1428. Value *TranslateDegrees(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1429. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1430. IRBuilder<> Builder(CI);
  1431. Type *Ty = CI->getType();
  1432. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1433. // 180/pi.
  1434. Constant *toDegreeConst = ConstantFP::get(Ty->getScalarType(), 180 / M_PI);
  1435. if (Ty != Ty->getScalarType()) {
  1436. toDegreeConst =
  1437. ConstantVector::getSplat(Ty->getVectorNumElements(), toDegreeConst);
  1438. }
  1439. return Builder.CreateFMul(toDegreeConst, val);
  1440. }
  1441. Value *TranslateDst(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1442. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1443. Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1444. Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1445. Type *Ty = src1->getType();
  1446. IRBuilder<> Builder(CI);
  1447. Value *Result = UndefValue::get(Ty);
  1448. Constant *oneConst = ConstantFP::get(Ty->getScalarType(), 1);
  1449. // dest.x = 1;
  1450. Result = Builder.CreateInsertElement(Result, oneConst, (uint64_t)0);
  1451. // dest.y = src0.y * src1.y;
  1452. Value *src0_y = Builder.CreateExtractElement(src0, 1);
  1453. Value *src1_y = Builder.CreateExtractElement(src1, 1);
  1454. Value *yMuly = Builder.CreateFMul(src0_y, src1_y);
  1455. Result = Builder.CreateInsertElement(Result, yMuly, 1);
  1456. // dest.z = src0.z;
  1457. Value *src0_z = Builder.CreateExtractElement(src0, 2);
  1458. Result = Builder.CreateInsertElement(Result, src0_z, 2);
  1459. // dest.w = src1.w;
  1460. Value *src1_w = Builder.CreateExtractElement(src1, 3);
  1461. Result = Builder.CreateInsertElement(Result, src1_w, 3);
  1462. return Result;
  1463. }
  1464. Value *TranslateFirstbitHi(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1465. HLOperationLowerHelper &helper,
  1466. HLObjectOperationLowerHelper *pObjHelper,
  1467. bool &Translated) {
  1468. Value *firstbitHi =
  1469. TrivialUnaryOperation(CI, IOP, opcode, helper, pObjHelper, Translated);
  1470. // firstbitHi == -1? -1 : (bitWidth-1 -firstbitHi);
  1471. IRBuilder<> Builder(CI);
  1472. Constant *neg1 = Builder.getInt32(-1);
  1473. Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1474. Type *Ty = src->getType();
  1475. IntegerType *EltTy = cast<IntegerType>(Ty->getScalarType());
  1476. Constant *bitWidth = Builder.getInt32(EltTy->getBitWidth()-1);
  1477. if (Ty == Ty->getScalarType()) {
  1478. Value *sub = Builder.CreateSub(bitWidth, firstbitHi);
  1479. Value *cond = Builder.CreateICmpEQ(neg1, firstbitHi);
  1480. return Builder.CreateSelect(cond, neg1, sub);
  1481. } else {
  1482. Value *result = UndefValue::get(CI->getType());
  1483. unsigned vecSize = Ty->getVectorNumElements();
  1484. for (unsigned i = 0; i < vecSize; i++) {
  1485. Value *EltFirstBit = Builder.CreateExtractElement(firstbitHi, i);
  1486. Value *sub = Builder.CreateSub(bitWidth, EltFirstBit);
  1487. Value *cond = Builder.CreateICmpEQ(neg1, EltFirstBit);
  1488. Value *Elt = Builder.CreateSelect(cond, neg1, sub);
  1489. result = Builder.CreateInsertElement(result, Elt, i);
  1490. }
  1491. return result;
  1492. }
  1493. }
  1494. Value *TranslateFirstbitLo(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1495. HLOperationLowerHelper &helper,
  1496. HLObjectOperationLowerHelper *pObjHelper,
  1497. bool &Translated) {
  1498. Value *firstbitLo =
  1499. TrivialUnaryOperation(CI, IOP, opcode, helper, pObjHelper, Translated);
  1500. return firstbitLo;
  1501. }
  1502. Value *TranslateLit(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1503. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1504. Value *n_dot_l = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  1505. Value *n_dot_h = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  1506. Value *m = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  1507. IRBuilder<> Builder(CI);
  1508. Type *Ty = m->getType();
  1509. Value *Result = UndefValue::get(VectorType::get(Ty, 4));
  1510. // Result = (ambient, diffuse, specular, 1)
  1511. // ambient = 1.
  1512. Constant *oneConst = ConstantFP::get(Ty, 1);
  1513. Result = Builder.CreateInsertElement(Result, oneConst, (uint64_t)0);
  1514. // Result.w = 1.
  1515. Result = Builder.CreateInsertElement(Result, oneConst, 3);
  1516. // diffuse = (n_dot_l < 0) ? 0 : n_dot_l.
  1517. Constant *zeroConst = ConstantFP::get(Ty, 0);
  1518. Value *nlCmp = Builder.CreateFCmpOLT(n_dot_l, zeroConst);
  1519. Value *diffuse = Builder.CreateSelect(nlCmp, zeroConst, n_dot_l);
  1520. Result = Builder.CreateInsertElement(Result, diffuse, 1);
  1521. // specular = ((n_dot_l < 0) || (n_dot_h < 0)) ? 0: (n_dot_h ^ m).
  1522. Value *nhCmp = Builder.CreateFCmpOLT(n_dot_h, zeroConst);
  1523. Value *specCond = Builder.CreateOr(nlCmp, nhCmp);
  1524. bool isFXCCompatMode = CI->getModule()->GetHLModule().GetHLOptions().bFXCCompatMode;
  1525. Value *nhPowM = TranslatePowImpl(&helper.hlslOP, Builder, n_dot_h, m, isFXCCompatMode);
  1526. Value *spec = Builder.CreateSelect(specCond, zeroConst, nhPowM);
  1527. Result = Builder.CreateInsertElement(Result, spec, 2);
  1528. return Result;
  1529. }
  1530. Value *TranslateRadians(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1531. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1532. IRBuilder<> Builder(CI);
  1533. Type *Ty = CI->getType();
  1534. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1535. // pi/180.
  1536. Constant *toRadianConst = ConstantFP::get(Ty->getScalarType(), M_PI / 180);
  1537. if (Ty != Ty->getScalarType()) {
  1538. toRadianConst =
  1539. ConstantVector::getSplat(Ty->getVectorNumElements(), toRadianConst);
  1540. }
  1541. return Builder.CreateFMul(toRadianConst, val);
  1542. }
  1543. Value *TranslateF16ToF32(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1544. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1545. IRBuilder<> Builder(CI);
  1546. Value *x = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1547. Type *Ty = CI->getType();
  1548. Function *f16tof32 =
  1549. helper.hlslOP.GetOpFunc(opcode, helper.voidTy);
  1550. return TrivialDxilOperation(
  1551. f16tof32, opcode, {Builder.getInt32(static_cast<unsigned>(opcode)), x},
  1552. x->getType(), Ty, &helper.hlslOP, Builder);
  1553. }
  1554. Value *TranslateF32ToF16(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1555. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1556. IRBuilder<> Builder(CI);
  1557. Value *x = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1558. Type *Ty = CI->getType();
  1559. Function *f32tof16 =
  1560. helper.hlslOP.GetOpFunc(opcode, helper.voidTy);
  1561. return TrivialDxilOperation(
  1562. f32tof16, opcode, {Builder.getInt32(static_cast<unsigned>(opcode)), x},
  1563. x->getType(), Ty, &helper.hlslOP, Builder);
  1564. }
  1565. Value *TranslateLength(CallInst *CI, Value *val, hlsl::OP *hlslOP) {
  1566. IRBuilder<> Builder(CI);
  1567. if (VectorType *VT = dyn_cast<VectorType>(val->getType())) {
  1568. Value *Elt = Builder.CreateExtractElement(val, (uint64_t)0);
  1569. unsigned size = VT->getNumElements();
  1570. if (size > 1) {
  1571. Value *Sum = Builder.CreateFMul(Elt, Elt);
  1572. for (unsigned i = 1; i < size; i++) {
  1573. Elt = Builder.CreateExtractElement(val, i);
  1574. Value *Mul = Builder.CreateFMul(Elt, Elt);
  1575. Sum = Builder.CreateFAdd(Sum, Mul);
  1576. }
  1577. DXIL::OpCode sqrt = DXIL::OpCode::Sqrt;
  1578. Function *dxilSqrt = hlslOP->GetOpFunc(sqrt, VT->getElementType());
  1579. Value *opArg = hlslOP->GetI32Const((unsigned)sqrt);
  1580. return Builder.CreateCall(dxilSqrt, {opArg, Sum},
  1581. hlslOP->GetOpCodeName(sqrt));
  1582. } else {
  1583. val = Elt;
  1584. }
  1585. }
  1586. DXIL::OpCode fabs = DXIL::OpCode::FAbs;
  1587. Function *dxilFAbs = hlslOP->GetOpFunc(fabs, val->getType());
  1588. Value *opArg = hlslOP->GetI32Const((unsigned)fabs);
  1589. return Builder.CreateCall(dxilFAbs, {opArg, val},
  1590. hlslOP->GetOpCodeName(fabs));
  1591. }
  1592. Value *TranslateLength(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1593. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1594. hlsl::OP *hlslOP = &helper.hlslOP;
  1595. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1596. return TranslateLength(CI, val, hlslOP);
  1597. }
  1598. Value *TranslateModF(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1599. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1600. hlsl::OP *hlslOP = &helper.hlslOP;
  1601. Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1602. Value *outIntPtr = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1603. IRBuilder<> Builder(CI);
  1604. Value *intP =
  1605. TrivialDxilUnaryOperation(OP::OpCode::Round_z, val, hlslOP, Builder);
  1606. Value *fracP = Builder.CreateFSub(val, intP);
  1607. Builder.CreateStore(intP, outIntPtr);
  1608. return fracP;
  1609. }
  1610. Value *TranslateDistance(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1611. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1612. hlsl::OP *hlslOP = &helper.hlslOP;
  1613. Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1614. Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1615. IRBuilder<> Builder(CI);
  1616. Value *sub = Builder.CreateFSub(src0, src1);
  1617. return TranslateLength(CI, sub, hlslOP);
  1618. }
  1619. Value *TranslateExp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1620. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1621. hlsl::OP *hlslOP = &helper.hlslOP;
  1622. IRBuilder<> Builder(CI);
  1623. Type *Ty = CI->getType();
  1624. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1625. Constant *log2eConst = ConstantFP::get(Ty->getScalarType(), M_LOG2E);
  1626. if (Ty != Ty->getScalarType()) {
  1627. log2eConst =
  1628. ConstantVector::getSplat(Ty->getVectorNumElements(), log2eConst);
  1629. }
  1630. val = Builder.CreateFMul(log2eConst, val);
  1631. Value *exp = TrivialDxilUnaryOperation(OP::OpCode::Exp, val, hlslOP, Builder);
  1632. return exp;
  1633. }
  1634. Value *TranslateLog(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1635. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1636. hlsl::OP *hlslOP = &helper.hlslOP;
  1637. IRBuilder<> Builder(CI);
  1638. Type *Ty = CI->getType();
  1639. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1640. Constant *ln2Const = ConstantFP::get(Ty->getScalarType(), M_LN2);
  1641. if (Ty != Ty->getScalarType()) {
  1642. ln2Const = ConstantVector::getSplat(Ty->getVectorNumElements(), ln2Const);
  1643. }
  1644. Value *log = TrivialDxilUnaryOperation(OP::OpCode::Log, val, hlslOP, Builder);
  1645. return Builder.CreateFMul(ln2Const, log);
  1646. }
  1647. Value *TranslateLog10(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1648. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1649. hlsl::OP *hlslOP = &helper.hlslOP;
  1650. IRBuilder<> Builder(CI);
  1651. Type *Ty = CI->getType();
  1652. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1653. Constant *log2_10Const = ConstantFP::get(Ty->getScalarType(), M_LN2 / M_LN10);
  1654. if (Ty != Ty->getScalarType()) {
  1655. log2_10Const =
  1656. ConstantVector::getSplat(Ty->getVectorNumElements(), log2_10Const);
  1657. }
  1658. Value *log = TrivialDxilUnaryOperation(OP::OpCode::Log, val, hlslOP, Builder);
  1659. return Builder.CreateFMul(log2_10Const, log);
  1660. }
  1661. Value *TranslateFMod(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1662. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1663. hlsl::OP *hlslOP = &helper.hlslOP;
  1664. Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1665. Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1666. IRBuilder<> Builder(CI);
  1667. Value *div = Builder.CreateFDiv(src0, src1);
  1668. Value *negDiv = Builder.CreateFNeg(div);
  1669. Value *ge = Builder.CreateFCmpOGE(div, negDiv);
  1670. Value *absDiv =
  1671. TrivialDxilUnaryOperation(OP::OpCode::FAbs, div, hlslOP, Builder);
  1672. Value *frc =
  1673. TrivialDxilUnaryOperation(OP::OpCode::Frc, absDiv, hlslOP, Builder);
  1674. Value *negFrc = Builder.CreateFNeg(frc);
  1675. Value *realFrc = Builder.CreateSelect(ge, frc, negFrc);
  1676. return Builder.CreateFMul(realFrc, src1);
  1677. }
  1678. Value *TranslateFUIBinary(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1679. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1680. bool isFloat = CI->getType()->getScalarType()->isFloatingPointTy();
  1681. if (isFloat) {
  1682. switch (IOP) {
  1683. case IntrinsicOp::IOP_max:
  1684. opcode = OP::OpCode::FMax;
  1685. break;
  1686. case IntrinsicOp::IOP_min:
  1687. default:
  1688. DXASSERT_NOMSG(IOP == IntrinsicOp::IOP_min);
  1689. opcode = OP::OpCode::FMin;
  1690. break;
  1691. }
  1692. }
  1693. return TrivialBinaryOperation(CI, IOP, opcode, helper, pObjHelper, Translated);
  1694. }
  1695. Value *TranslateFUITrinary(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1696. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1697. bool isFloat = CI->getType()->getScalarType()->isFloatingPointTy();
  1698. if (isFloat) {
  1699. switch (IOP) {
  1700. case IntrinsicOp::IOP_mad:
  1701. default:
  1702. DXASSERT_NOMSG(IOP == IntrinsicOp::IOP_mad);
  1703. opcode = OP::OpCode::FMad;
  1704. break;
  1705. }
  1706. }
  1707. return TrivialTrinaryOperation(CI, IOP, opcode, helper, pObjHelper, Translated);
  1708. }
  1709. Value *TranslateFrexp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1710. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1711. hlsl::OP *hlslOP = &helper.hlslOP;
  1712. Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1713. Value *expPtr = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1714. IRBuilder<> Builder(CI);
  1715. Type *i32Ty = Type::getInt32Ty(CI->getContext());
  1716. Constant *exponentMaskConst = ConstantInt::get(i32Ty, 0x7f800000);
  1717. Constant *mantisaMaskConst = ConstantInt::get(i32Ty, 0x007fffff);
  1718. Constant *exponentShiftConst = ConstantInt::get(i32Ty, 23);
  1719. Constant *mantisaOrConst = ConstantInt::get(i32Ty, 0x3f000000);
  1720. Constant *exponentBiasConst = ConstantInt::get(i32Ty, -(int)0x3f000000);
  1721. Constant *zeroVal = hlslOP->GetFloatConst(0);
  1722. // int iVal = asint(val);
  1723. Type *dstTy = i32Ty;
  1724. Type *Ty = val->getType();
  1725. if (Ty->isVectorTy()) {
  1726. unsigned vecSize = Ty->getVectorNumElements();
  1727. dstTy = VectorType::get(i32Ty, vecSize);
  1728. exponentMaskConst = ConstantVector::getSplat(vecSize, exponentMaskConst);
  1729. mantisaMaskConst = ConstantVector::getSplat(vecSize, mantisaMaskConst);
  1730. exponentShiftConst = ConstantVector::getSplat(vecSize, exponentShiftConst);
  1731. mantisaOrConst = ConstantVector::getSplat(vecSize, mantisaOrConst);
  1732. exponentBiasConst = ConstantVector::getSplat(vecSize, exponentBiasConst);
  1733. zeroVal = ConstantVector::getSplat(vecSize, zeroVal);
  1734. }
  1735. // bool ne = val != 0;
  1736. Value *notZero = Builder.CreateFCmpUNE(val, zeroVal);
  1737. notZero = Builder.CreateSExt(notZero, dstTy);
  1738. Value *intVal = Builder.CreateBitCast(val, dstTy);
  1739. // temp = intVal & exponentMask;
  1740. Value *temp = Builder.CreateAnd(intVal, exponentMaskConst);
  1741. // temp = temp + exponentBias;
  1742. temp = Builder.CreateAdd(temp, exponentBiasConst);
  1743. // temp = temp & ne;
  1744. temp = Builder.CreateAnd(temp, notZero);
  1745. // temp = temp >> exponentShift;
  1746. temp = Builder.CreateAShr(temp, exponentShiftConst);
  1747. // exp = float(temp);
  1748. Value *exp = Builder.CreateSIToFP(temp, Ty);
  1749. Builder.CreateStore(exp, expPtr);
  1750. // temp = iVal & mantisaMask;
  1751. temp = Builder.CreateAnd(intVal, mantisaMaskConst);
  1752. // temp = temp | mantisaOr;
  1753. temp = Builder.CreateOr(temp, mantisaOrConst);
  1754. // mantisa = temp & ne;
  1755. Value *mantisa = Builder.CreateAnd(temp, notZero);
  1756. return Builder.CreateBitCast(mantisa, Ty);
  1757. }
  1758. Value *TranslateLdExp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1759. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1760. hlsl::OP *hlslOP = &helper.hlslOP;
  1761. Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1762. Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1763. IRBuilder<> Builder(CI);
  1764. Value *exp =
  1765. TrivialDxilUnaryOperation(OP::OpCode::Exp, src1, hlslOP, Builder);
  1766. return Builder.CreateFMul(exp, src0);
  1767. }
  1768. Value *TranslateFWidth(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1769. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1770. hlsl::OP *hlslOP = &helper.hlslOP;
  1771. Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1772. IRBuilder<> Builder(CI);
  1773. Value *ddx =
  1774. TrivialDxilUnaryOperation(OP::OpCode::DerivCoarseX, src, hlslOP, Builder);
  1775. Value *absDdx =
  1776. TrivialDxilUnaryOperation(OP::OpCode::FAbs, ddx, hlslOP, Builder);
  1777. Value *ddy =
  1778. TrivialDxilUnaryOperation(OP::OpCode::DerivCoarseY, src, hlslOP, Builder);
  1779. Value *absDdy =
  1780. TrivialDxilUnaryOperation(OP::OpCode::FAbs, ddy, hlslOP, Builder);
  1781. return Builder.CreateFAdd(absDdx, absDdy);
  1782. }
  1783. Value *TranslateLerp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1784. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1785. // x + s(y-x)
  1786. Value *x = CI->getArgOperand(HLOperandIndex::kLerpOpXIdx);
  1787. Value *y = CI->getArgOperand(HLOperandIndex::kLerpOpYIdx);
  1788. IRBuilder<> Builder(CI);
  1789. Value *ySubx = Builder.CreateFSub(y, x);
  1790. Value *s = CI->getArgOperand(HLOperandIndex::kLerpOpSIdx);
  1791. Value *sMulSub = Builder.CreateFMul(s, ySubx);
  1792. return Builder.CreateFAdd(x, sMulSub);
  1793. }
  1794. Value *TrivialDotOperation(OP::OpCode opcode, Value *src0,
  1795. Value *src1, hlsl::OP *hlslOP,
  1796. IRBuilder<> &Builder) {
  1797. Type *Ty = src0->getType()->getScalarType();
  1798. Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty);
  1799. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  1800. SmallVector<Value *, 9> args;
  1801. args.emplace_back(opArg);
  1802. unsigned vecSize = src0->getType()->getVectorNumElements();
  1803. for (unsigned i = 0; i < vecSize; i++)
  1804. args.emplace_back(Builder.CreateExtractElement(src0, i));
  1805. for (unsigned i = 0; i < vecSize; i++)
  1806. args.emplace_back(Builder.CreateExtractElement(src1, i));
  1807. Value *dotOP = Builder.CreateCall(dxilFunc, args);
  1808. return dotOP;
  1809. }
  1810. Value *TranslateIDot(Value *arg0, Value *arg1, unsigned vecSize, hlsl::OP *hlslOP, IRBuilder<> &Builder, bool Unsigned = false) {
  1811. auto madOpCode = Unsigned ? DXIL::OpCode::UMad : DXIL::OpCode::IMad;
  1812. Value *Elt0 = Builder.CreateExtractElement(arg0, (uint64_t)0);
  1813. Value *Elt1 = Builder.CreateExtractElement(arg1, (uint64_t)0);
  1814. Value *Result = Builder.CreateMul(Elt0, Elt1);
  1815. for (unsigned iVecElt = 1; iVecElt < vecSize; ++iVecElt) {
  1816. Elt0 = Builder.CreateExtractElement(arg0, iVecElt);
  1817. Elt1 = Builder.CreateExtractElement(arg1, iVecElt);
  1818. Result = TrivialDxilTrinaryOperation(madOpCode, Elt0, Elt1, Result, hlslOP, Builder);
  1819. }
  1820. return Result;
  1821. }
  1822. Value *TranslateFDot(Value *arg0, Value *arg1, unsigned vecSize,
  1823. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  1824. switch (vecSize) {
  1825. case 2:
  1826. return TrivialDotOperation(OP::OpCode::Dot2, arg0, arg1, hlslOP, Builder);
  1827. break;
  1828. case 3:
  1829. return TrivialDotOperation(OP::OpCode::Dot3, arg0, arg1, hlslOP, Builder);
  1830. break;
  1831. case 4:
  1832. return TrivialDotOperation(OP::OpCode::Dot4, arg0, arg1, hlslOP, Builder);
  1833. break;
  1834. default:
  1835. DXASSERT(vecSize == 1, "wrong vector size");
  1836. {
  1837. Value *vecMul = Builder.CreateFMul(arg0, arg1);
  1838. return Builder.CreateExtractElement(vecMul, (uint64_t)0);
  1839. }
  1840. break;
  1841. }
  1842. }
  1843. Value *TranslateDot(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1844. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1845. hlsl::OP *hlslOP = &helper.hlslOP;
  1846. Value *arg0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1847. Type *Ty = arg0->getType();
  1848. unsigned vecSize = Ty->getVectorNumElements();
  1849. Value *arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1850. IRBuilder<> Builder(CI);
  1851. if (Ty->getScalarType()->isFloatingPointTy()) {
  1852. return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder);
  1853. } else {
  1854. return TranslateIDot(arg0, arg1, vecSize, hlslOP, Builder);
  1855. }
  1856. }
  1857. Value *TranslateNormalize(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1858. HLOperationLowerHelper &helper,
  1859. HLObjectOperationLowerHelper *pObjHelper,
  1860. bool &Translated) {
  1861. hlsl::OP *hlslOP = &helper.hlslOP;
  1862. Type *Ty = CI->getType();
  1863. Value *op = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1864. VectorType *VT = cast<VectorType>(Ty);
  1865. unsigned vecSize = VT->getNumElements();
  1866. IRBuilder<> Builder(CI);
  1867. Value *dot = TranslateFDot(op, op, vecSize, hlslOP, Builder);
  1868. DXIL::OpCode rsqrtOp = DXIL::OpCode::Rsqrt;
  1869. Function *dxilRsqrt = hlslOP->GetOpFunc(rsqrtOp, VT->getElementType());
  1870. Value *rsqrt = Builder.CreateCall(
  1871. dxilRsqrt, {hlslOP->GetI32Const((unsigned)rsqrtOp), dot},
  1872. hlslOP->GetOpCodeName(rsqrtOp));
  1873. Value *vecRsqrt = UndefValue::get(VT);
  1874. for (unsigned i = 0; i < VT->getNumElements(); i++)
  1875. vecRsqrt = Builder.CreateInsertElement(vecRsqrt, rsqrt, i);
  1876. return Builder.CreateFMul(op, vecRsqrt);
  1877. }
  1878. Value *TranslateReflect(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  1879. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1880. hlsl::OP *hlslOP = &helper.hlslOP;
  1881. // v = i - 2 * n * dot(i, n).
  1882. IRBuilder<> Builder(CI);
  1883. Value *i = CI->getArgOperand(HLOperandIndex::kReflectOpIIdx);
  1884. Value *n = CI->getArgOperand(HLOperandIndex::kReflectOpNIdx);
  1885. VectorType *VT = cast<VectorType>(i->getType());
  1886. unsigned vecSize = VT->getNumElements();
  1887. Value *dot = TranslateFDot(i, n, vecSize, hlslOP, Builder);
  1888. // 2 * dot (i, n).
  1889. dot = Builder.CreateFMul(hlslOP->GetFloatConst(2), dot);
  1890. // 2 * n * dot(i, n).
  1891. Value *vecDot = Builder.CreateVectorSplat(vecSize, dot);
  1892. Value *nMulDot = Builder.CreateFMul(vecDot, n);
  1893. // i - 2 * n * dot(i, n).
  1894. return Builder.CreateFSub(i, nMulDot);
  1895. }
  1896. Value *TranslateRefract(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  1897. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1898. hlsl::OP *hlslOP = &helper.hlslOP;
  1899. // d = dot(i, n);
  1900. // t = 1 - eta * eta * ( 1 - d*d);
  1901. // cond = t >= 1;
  1902. // r = eta * i - (eta * d + sqrt(t)) * n;
  1903. // return cond ? r : 0;
  1904. IRBuilder<> Builder(CI);
  1905. Value *i = CI->getArgOperand(HLOperandIndex::kRefractOpIIdx);
  1906. Value *n = CI->getArgOperand(HLOperandIndex::kRefractOpNIdx);
  1907. Value *eta = CI->getArgOperand(HLOperandIndex::kRefractOpEtaIdx);
  1908. VectorType *VT = cast<VectorType>(i->getType());
  1909. unsigned vecSize = VT->getNumElements();
  1910. Value *dot = TranslateFDot(i, n, vecSize, hlslOP, Builder);
  1911. // eta * eta;
  1912. Value *eta2 = Builder.CreateFMul(eta, eta);
  1913. // d*d;
  1914. Value *dot2 = Builder.CreateFMul(dot, dot);
  1915. Constant *one = ConstantFP::get(eta->getType(), 1);
  1916. Constant *zero = ConstantFP::get(eta->getType(), 0);
  1917. // 1- d*d;
  1918. dot2 = Builder.CreateFSub(one, dot2);
  1919. // eta * eta * (1-d*d);
  1920. eta2 = Builder.CreateFMul(dot2, eta2);
  1921. // t = 1 - eta * eta * ( 1 - d*d);
  1922. Value *t = Builder.CreateFSub(one, eta2);
  1923. // cond = t >= 0;
  1924. Value *cond = Builder.CreateFCmpOGE(t, zero);
  1925. // eta * i;
  1926. Value *vecEta = UndefValue::get(VT);
  1927. for (unsigned i = 0; i < vecSize; i++)
  1928. vecEta = Builder.CreateInsertElement(vecEta, eta, i);
  1929. Value *etaMulI = Builder.CreateFMul(i, vecEta);
  1930. // sqrt(t);
  1931. Value *sqrt = TrivialDxilUnaryOperation(OP::OpCode::Sqrt, t, hlslOP, Builder);
  1932. // eta * d;
  1933. Value *etaMulD = Builder.CreateFMul(eta, dot);
  1934. // eta * d + sqrt(t);
  1935. Value *etaSqrt = Builder.CreateFAdd(etaMulD, sqrt);
  1936. // (eta * d + sqrt(t)) * n;
  1937. Value *vecEtaSqrt = Builder.CreateVectorSplat(vecSize, etaSqrt);
  1938. Value *r = Builder.CreateFMul(vecEtaSqrt, n);
  1939. // r = eta * i - (eta * d + sqrt(t)) * n;
  1940. r = Builder.CreateFSub(etaMulI, r);
  1941. Value *refract =
  1942. Builder.CreateSelect(cond, r, ConstantVector::getSplat(vecSize, zero));
  1943. return refract;
  1944. }
  1945. Value *TranslateSmoothStep(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1946. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1947. hlsl::OP *hlslOP = &helper.hlslOP;
  1948. // s = saturate((x-min)/(max-min)).
  1949. IRBuilder<> Builder(CI);
  1950. Value *minVal = CI->getArgOperand(HLOperandIndex::kSmoothStepOpMinIdx);
  1951. Value *maxVal = CI->getArgOperand(HLOperandIndex::kSmoothStepOpMaxIdx);
  1952. Value *maxSubMin = Builder.CreateFSub(maxVal, minVal);
  1953. Value *x = CI->getArgOperand(HLOperandIndex::kSmoothStepOpXIdx);
  1954. Value *xSubMin = Builder.CreateFSub(x, minVal);
  1955. Value *satVal = Builder.CreateFDiv(xSubMin, maxSubMin);
  1956. Value *s = TrivialDxilUnaryOperation(DXIL::OpCode::Saturate, satVal, hlslOP,
  1957. Builder);
  1958. // return s * s *(3-2*s).
  1959. Constant *c2 = ConstantFP::get(CI->getType(),2);
  1960. Constant *c3 = ConstantFP::get(CI->getType(),3);
  1961. Value *sMul2 = Builder.CreateFMul(s, c2);
  1962. Value *result = Builder.CreateFSub(c3, sMul2);
  1963. result = Builder.CreateFMul(s, result);
  1964. result = Builder.CreateFMul(s, result);
  1965. return result;
  1966. }
  1967. Value *TranslateMSad4(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1968. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1969. hlsl::OP *hlslOP = &helper.hlslOP;
  1970. Value *ref = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  1971. Value *src = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  1972. Value *accum = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  1973. Type *Ty = CI->getType();
  1974. IRBuilder<> Builder(CI);
  1975. Value *vecRef = UndefValue::get(Ty);
  1976. for (unsigned i = 0; i < 4; i++)
  1977. vecRef = Builder.CreateInsertElement(vecRef, ref, i);
  1978. Value *srcX = Builder.CreateExtractElement(src, (uint64_t)0);
  1979. Value *srcY = Builder.CreateExtractElement(src, 1);
  1980. Value *byteSrc = UndefValue::get(Ty);
  1981. byteSrc = Builder.CreateInsertElement(byteSrc, srcX, (uint64_t)0);
  1982. // ushr r0.yzw, srcX, l(0, 8, 16, 24)
  1983. // bfi r1.yzw, l(0, 8, 16, 24), l(0, 24, 16, 8), srcX, r0.yyzw
  1984. Value *bfiOpArg =
  1985. hlslOP->GetU32Const(static_cast<unsigned>(DXIL::OpCode::Bfi));
  1986. Value *imm8 = hlslOP->GetU32Const(8);
  1987. Value *imm16 = hlslOP->GetU32Const(16);
  1988. Value *imm24 = hlslOP->GetU32Const(24);
  1989. Ty = ref->getType();
  1990. // Get x[31:8].
  1991. Value *srcXShift = Builder.CreateLShr(srcX, imm8);
  1992. // y[0~7] x[31:8].
  1993. Value *byteSrcElt = TrivialDxilOperation(
  1994. DXIL::OpCode::Bfi, {bfiOpArg, imm8, imm24, srcY, srcXShift}, Ty, Ty,
  1995. hlslOP, Builder);
  1996. byteSrc = Builder.CreateInsertElement(byteSrc, byteSrcElt, 1);
  1997. // Get x[31:16].
  1998. srcXShift = Builder.CreateLShr(srcXShift, imm8);
  1999. // y[0~15] x[31:16].
  2000. byteSrcElt = TrivialDxilOperation(DXIL::OpCode::Bfi,
  2001. {bfiOpArg, imm16, imm16, srcY, srcXShift},
  2002. Ty, Ty, hlslOP, Builder);
  2003. byteSrc = Builder.CreateInsertElement(byteSrc, byteSrcElt, 2);
  2004. // Get x[31:24].
  2005. srcXShift = Builder.CreateLShr(srcXShift, imm8);
  2006. // y[0~23] x[31:24].
  2007. byteSrcElt = TrivialDxilOperation(DXIL::OpCode::Bfi,
  2008. {bfiOpArg, imm24, imm8, srcY, srcXShift},
  2009. Ty, Ty, hlslOP, Builder);
  2010. byteSrc = Builder.CreateInsertElement(byteSrc, byteSrcElt, 3);
  2011. // Msad on vecref and byteSrc.
  2012. return TrivialDxilTrinaryOperation(DXIL::OpCode::Msad, vecRef, byteSrc, accum,
  2013. hlslOP, Builder);
  2014. }
  2015. Value *TranslateRCP(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2016. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2017. Type *Ty = CI->getType();
  2018. Value *op = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  2019. IRBuilder<> Builder(CI);
  2020. Constant *one = ConstantFP::get(Ty->getScalarType(), 1.0);
  2021. if (Ty != Ty->getScalarType()) {
  2022. one = ConstantVector::getSplat(Ty->getVectorNumElements(), one);
  2023. }
  2024. return Builder.CreateFDiv(one, op);
  2025. }
  2026. Value *TranslateSign(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2027. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2028. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  2029. Type *Ty = val->getType();
  2030. bool IsInt = Ty->getScalarType()->isIntegerTy();
  2031. IRBuilder<> Builder(CI);
  2032. Constant *zero = Constant::getNullValue(Ty);
  2033. Value *zeroLtVal = IsInt ? Builder.CreateICmpSLT(zero, val) : Builder.CreateFCmpOLT(zero, val);
  2034. Value *valLtZero = IsInt ? Builder.CreateICmpSLT(val, zero) : Builder.CreateFCmpOLT(val, zero);
  2035. zeroLtVal = Builder.CreateZExt(zeroLtVal, CI->getType());
  2036. valLtZero = Builder.CreateZExt(valLtZero, CI->getType());
  2037. return Builder.CreateSub(zeroLtVal, valLtZero);
  2038. }
  2039. Value *TranslateUSign(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2040. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2041. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  2042. Type *Ty = val->getType();
  2043. IRBuilder<> Builder(CI);
  2044. Constant *zero = Constant::getNullValue(Ty);
  2045. Value *nonZero = Builder.CreateICmpNE(val, zero);
  2046. return Builder.CreateZExt(nonZero, CI->getType());
  2047. }
  2048. Value *TranslateStep(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2049. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2050. Value *edge = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  2051. Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  2052. Type *Ty = CI->getType();
  2053. IRBuilder<> Builder(CI);
  2054. Constant *one = ConstantFP::get(Ty->getScalarType(), 1.0);
  2055. Constant *zero = ConstantFP::get(Ty->getScalarType(), 0);
  2056. Value *cond = Builder.CreateFCmpOLT(x, edge);
  2057. if (Ty != Ty->getScalarType()) {
  2058. one = ConstantVector::getSplat(Ty->getVectorNumElements(), one);
  2059. zero = ConstantVector::getSplat(Ty->getVectorNumElements(), zero);
  2060. }
  2061. return Builder.CreateSelect(cond, zero, one);
  2062. }
  2063. Value *TranslatePow(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2064. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2065. hlsl::OP *hlslOP = &helper.hlslOP;
  2066. Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  2067. Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  2068. bool isFXCCompatMode = CI->getModule()->GetHLModule().GetHLOptions().bFXCCompatMode;
  2069. IRBuilder<> Builder(CI);
  2070. return TranslatePowImpl(hlslOP,Builder,x,y,isFXCCompatMode);
  2071. }
  2072. Value *TranslatePrintf(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode,
  2073. HLOperationLowerHelper &helper,
  2074. HLObjectOperationLowerHelper *pObjHelper,
  2075. bool &Translated) {
  2076. Translated = false;
  2077. CI->getContext().emitError(CI, "use of undeclared identifier 'printf'");
  2078. return nullptr;
  2079. }
  2080. Value *TranslateFaceforward(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  2081. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2082. hlsl::OP *hlslOP = &helper.hlslOP;
  2083. Type *Ty = CI->getType();
  2084. Value *n = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  2085. Value *i = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  2086. Value *ng = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  2087. IRBuilder<> Builder(CI);
  2088. unsigned vecSize = Ty->getVectorNumElements();
  2089. // -n x sign(dot(i, ng)).
  2090. Value *dotOp = TranslateFDot(i, ng, vecSize, hlslOP, Builder);
  2091. Constant *zero = ConstantFP::get(Ty->getScalarType(), 0);
  2092. Value *dotLtZero = Builder.CreateFCmpOLT(dotOp, zero);
  2093. Value *negN = Builder.CreateFNeg(n);
  2094. Value *faceforward = Builder.CreateSelect(dotLtZero, n, negN);
  2095. return faceforward;
  2096. }
  2097. Value *TrivialSetMeshOutputCounts(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  2098. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2099. hlsl::OP *hlslOP = &helper.hlslOP;
  2100. Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  2101. Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  2102. IRBuilder<> Builder(CI);
  2103. Constant *opArg = hlslOP->GetU32Const((unsigned)op);
  2104. Value *args[] = { opArg, src0, src1 };
  2105. Function *dxilFunc = hlslOP->GetOpFunc(op, Type::getVoidTy(CI->getContext()));
  2106. Builder.CreateCall(dxilFunc, args);
  2107. return nullptr;
  2108. }
  2109. Value *TrivialDispatchMesh(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  2110. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2111. hlsl::OP *hlslOP = &helper.hlslOP;
  2112. Value *src0 = CI->getArgOperand(HLOperandIndex::kDispatchMeshOpThreadX);
  2113. Value *src1 = CI->getArgOperand(HLOperandIndex::kDispatchMeshOpThreadY);
  2114. Value *src2 = CI->getArgOperand(HLOperandIndex::kDispatchMeshOpThreadZ);
  2115. Value *src3 = CI->getArgOperand(HLOperandIndex::kDispatchMeshOpPayload);
  2116. IRBuilder<> Builder(CI);
  2117. Constant *opArg = hlslOP->GetU32Const((unsigned)op);
  2118. Value *args[] = { opArg, src0, src1, src2, src3 };
  2119. Function *dxilFunc = hlslOP->GetOpFunc(op, src3->getType());
  2120. Builder.CreateCall(dxilFunc, args);
  2121. return nullptr;
  2122. }
  2123. }
  2124. // MOP intrinsics
  2125. namespace {
  2126. Value *TranslateGetSamplePosition(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  2127. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2128. hlsl::OP *hlslOP = &helper.hlslOP;
  2129. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  2130. IRBuilder<> Builder(CI);
  2131. Value *sampleIdx =
  2132. CI->getArgOperand(HLOperandIndex::kGetSamplePositionSampleIdxOpIndex);
  2133. OP::OpCode opcode = OP::OpCode::Texture2DMSGetSamplePosition;
  2134. llvm::Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  2135. Function *dxilFunc =
  2136. hlslOP->GetOpFunc(opcode, Type::getVoidTy(CI->getContext()));
  2137. Value *args[] = {opArg, handle, sampleIdx};
  2138. Value *samplePos = Builder.CreateCall(dxilFunc, args);
  2139. Value *result = UndefValue::get(CI->getType());
  2140. Value *samplePosX = Builder.CreateExtractValue(samplePos, 0);
  2141. Value *samplePosY = Builder.CreateExtractValue(samplePos, 1);
  2142. result = Builder.CreateInsertElement(result, samplePosX, (uint64_t)0);
  2143. result = Builder.CreateInsertElement(result, samplePosY, 1);
  2144. return result;
  2145. }
  2146. Value *TranslateGetDimensions(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  2147. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2148. hlsl::OP *hlslOP = &helper.hlslOP;
  2149. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  2150. DxilResource::Kind RK = pObjHelper->GetRK(handle);
  2151. IRBuilder<> Builder(CI);
  2152. OP::OpCode opcode = OP::OpCode::GetDimensions;
  2153. llvm::Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  2154. Function *dxilFunc =
  2155. hlslOP->GetOpFunc(opcode, Type::getVoidTy(CI->getContext()));
  2156. Type *i32Ty = Type::getInt32Ty(CI->getContext());
  2157. Value *mipLevel = UndefValue::get(i32Ty);
  2158. unsigned widthOpIdx = HLOperandIndex::kGetDimensionsMipWidthOpIndex;
  2159. switch (RK) {
  2160. case DxilResource::Kind::Texture1D:
  2161. case DxilResource::Kind::Texture1DArray:
  2162. case DxilResource::Kind::Texture2D:
  2163. case DxilResource::Kind::Texture2DArray:
  2164. case DxilResource::Kind::TextureCube:
  2165. case DxilResource::Kind::TextureCubeArray:
  2166. case DxilResource::Kind::Texture3D: {
  2167. Value *opMipLevel =
  2168. CI->getArgOperand(HLOperandIndex::kGetDimensionsMipLevelOpIndex);
  2169. // mipLevel is in parameter, should not be pointer.
  2170. if (!opMipLevel->getType()->isPointerTy())
  2171. mipLevel = opMipLevel;
  2172. else {
  2173. // No mip level.
  2174. widthOpIdx = HLOperandIndex::kGetDimensionsNoMipWidthOpIndex;
  2175. mipLevel = ConstantInt::get(i32Ty, 0);
  2176. }
  2177. } break;
  2178. default:
  2179. widthOpIdx = HLOperandIndex::kGetDimensionsNoMipWidthOpIndex;
  2180. break;
  2181. }
  2182. Value *args[] = {opArg, handle, mipLevel};
  2183. Value *dims = Builder.CreateCall(dxilFunc, args);
  2184. unsigned dimensionIdx = 0;
  2185. Value *width = Builder.CreateExtractValue(dims, dimensionIdx++);
  2186. Value *widthPtr = CI->getArgOperand(widthOpIdx);
  2187. if (widthPtr->getType()->getPointerElementType()->isFloatingPointTy())
  2188. width = Builder.CreateSIToFP(width,
  2189. widthPtr->getType()->getPointerElementType());
  2190. Builder.CreateStore(width, widthPtr);
  2191. if (DXIL::IsStructuredBuffer(RK)) {
  2192. // Set stride.
  2193. Value *stridePtr = CI->getArgOperand(widthOpIdx + 1);
  2194. const DataLayout &DL = helper.dataLayout;
  2195. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  2196. Type *bufTy = pObjHelper->GetResourceType(handle);
  2197. Type *bufRetTy = bufTy->getStructElementType(0);
  2198. unsigned stride = DL.getTypeAllocSize(bufRetTy);
  2199. Builder.CreateStore(hlslOP->GetU32Const(stride), stridePtr);
  2200. } else {
  2201. if (widthOpIdx == HLOperandIndex::kGetDimensionsMipWidthOpIndex ||
  2202. // Samples is in w channel too.
  2203. RK == DXIL::ResourceKind::Texture2DMS) {
  2204. // Has mip.
  2205. for (unsigned argIdx = widthOpIdx + 1;
  2206. argIdx < CI->getNumArgOperands() - 1; argIdx++) {
  2207. Value *dim = Builder.CreateExtractValue(dims, dimensionIdx++);
  2208. Value *ptr = CI->getArgOperand(argIdx);
  2209. if (ptr->getType()->getPointerElementType()->isFloatingPointTy())
  2210. dim = Builder.CreateSIToFP(dim,
  2211. ptr->getType()->getPointerElementType());
  2212. Builder.CreateStore(dim, ptr);
  2213. }
  2214. // NumOfLevel is in w channel.
  2215. dimensionIdx = 3;
  2216. Value *dim = Builder.CreateExtractValue(dims, dimensionIdx);
  2217. Value *ptr = CI->getArgOperand(CI->getNumArgOperands() - 1);
  2218. if (ptr->getType()->getPointerElementType()->isFloatingPointTy())
  2219. dim =
  2220. Builder.CreateSIToFP(dim, ptr->getType()->getPointerElementType());
  2221. Builder.CreateStore(dim, ptr);
  2222. } else {
  2223. for (unsigned argIdx = widthOpIdx + 1; argIdx < CI->getNumArgOperands();
  2224. argIdx++) {
  2225. Value *dim = Builder.CreateExtractValue(dims, dimensionIdx++);
  2226. Value *ptr = CI->getArgOperand(argIdx);
  2227. if (ptr->getType()->getPointerElementType()->isFloatingPointTy())
  2228. dim = Builder.CreateSIToFP(dim,
  2229. ptr->getType()->getPointerElementType());
  2230. Builder.CreateStore(dim, ptr);
  2231. }
  2232. }
  2233. }
  2234. return nullptr;
  2235. }
  2236. Value *GenerateUpdateCounter(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2237. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2238. hlsl::OP *hlslOP = &helper.hlslOP;
  2239. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  2240. pObjHelper->MarkHasCounter(handle, helper.i8Ty);
  2241. bool bInc = IOP == IntrinsicOp::MOP_IncrementCounter;
  2242. IRBuilder<> Builder(CI);
  2243. OP::OpCode OpCode = OP::OpCode::BufferUpdateCounter;
  2244. Value *OpCodeArg = hlslOP->GetU32Const((unsigned)OpCode);
  2245. Value *IncVal = hlslOP->GetI8Const(bInc ? 1 : -1);
  2246. // Create BufferUpdateCounter call.
  2247. Value *Args[] = {OpCodeArg, handle, IncVal};
  2248. Function *F =
  2249. hlslOP->GetOpFunc(OpCode, Type::getVoidTy(handle->getContext()));
  2250. return Builder.CreateCall(F, Args);
  2251. }
  2252. static Value *ScalarizeResRet(Type *RetTy, Value *ResRet, IRBuilder<> &Builder) {
  2253. // Extract value part.
  2254. Value *retVal = llvm::UndefValue::get(RetTy);
  2255. if (RetTy->isVectorTy()) {
  2256. for (unsigned i = 0; i < RetTy->getVectorNumElements(); i++) {
  2257. Value *retComp = Builder.CreateExtractValue(ResRet, i);
  2258. retVal = Builder.CreateInsertElement(retVal, retComp, i);
  2259. }
  2260. } else {
  2261. retVal = Builder.CreateExtractValue(ResRet, 0);
  2262. }
  2263. return retVal;
  2264. }
  2265. static Value *ScalarizeElements(Type *RetTy, ArrayRef<Value*> Elts, IRBuilder<> &Builder) {
  2266. // Extract value part.
  2267. Value *retVal = llvm::UndefValue::get(RetTy);
  2268. if (RetTy->isVectorTy()) {
  2269. unsigned vecSize = RetTy->getVectorNumElements();
  2270. DXASSERT(vecSize <= Elts.size(), "vector size mismatch");
  2271. for (unsigned i = 0; i < vecSize; i++) {
  2272. Value *retComp = Elts[i];
  2273. retVal = Builder.CreateInsertElement(retVal, retComp, i);
  2274. }
  2275. } else {
  2276. retVal = Elts[0];
  2277. }
  2278. return retVal;
  2279. }
  2280. void UpdateStatus(Value *ResRet, Value *status, IRBuilder<> &Builder,
  2281. hlsl::OP *hlslOp) {
  2282. if (status && !isa<UndefValue>(status)) {
  2283. Value *statusVal = Builder.CreateExtractValue(ResRet, DXIL::kResRetStatusIndex);
  2284. Value *checkAccessOp = hlslOp->GetI32Const(
  2285. static_cast<unsigned>(DXIL::OpCode::CheckAccessFullyMapped));
  2286. Function *checkAccessFn = hlslOp->GetOpFunc(
  2287. DXIL::OpCode::CheckAccessFullyMapped, statusVal->getType());
  2288. // CheckAccess on status.
  2289. Value *bStatus =
  2290. Builder.CreateCall(checkAccessFn, {checkAccessOp, statusVal});
  2291. Value *extStatus =
  2292. Builder.CreateZExt(bStatus, Type::getInt32Ty(status->getContext()));
  2293. Builder.CreateStore(extStatus, status);
  2294. }
  2295. }
  2296. Value *SplatToVector(Value *Elt, Type *DstTy, IRBuilder<> &Builder) {
  2297. Value *Result = UndefValue::get(DstTy);
  2298. for (unsigned i = 0; i < DstTy->getVectorNumElements(); i++)
  2299. Result = Builder.CreateInsertElement(Result, Elt, i);
  2300. return Result;
  2301. }
  2302. Value *TranslateMul(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2303. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2304. hlsl::OP *hlslOP = &helper.hlslOP;
  2305. Value *arg0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  2306. Value *arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  2307. Type *arg0Ty = arg0->getType();
  2308. Type *arg1Ty = arg1->getType();
  2309. IRBuilder<> Builder(CI);
  2310. if (arg0Ty->isVectorTy()) {
  2311. if (arg1Ty->isVectorTy()) {
  2312. // mul(vector, vector) == dot(vector, vector)
  2313. unsigned vecSize = arg0Ty->getVectorNumElements();
  2314. if (arg0Ty->getScalarType()->isFloatingPointTy()) {
  2315. return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder);
  2316. }
  2317. else {
  2318. return TranslateIDot(arg0, arg1, vecSize, hlslOP, Builder, IOP == IntrinsicOp::IOP_umul);
  2319. }
  2320. }
  2321. else {
  2322. // mul(vector, scalar) == vector * scalar-splat
  2323. arg1 = SplatToVector(arg1, arg0Ty, Builder);
  2324. }
  2325. }
  2326. else {
  2327. if (arg1Ty->isVectorTy()) {
  2328. // mul(scalar, vector) == scalar-splat * vector
  2329. arg0 = SplatToVector(arg0, arg1Ty, Builder);
  2330. }
  2331. // else mul(scalar, scalar) == scalar * scalar;
  2332. }
  2333. // create fmul/mul for the pair of vectors or scalars
  2334. if (arg0Ty->getScalarType()->isFloatingPointTy()) {
  2335. return Builder.CreateFMul(arg0, arg1);
  2336. }
  2337. else {
  2338. return Builder.CreateMul(arg0, arg1);
  2339. }
  2340. }
  2341. // Sample intrinsics.
  2342. struct SampleHelper {
  2343. SampleHelper(CallInst *CI, OP::OpCode op, HLObjectOperationLowerHelper *pObjHelper);
  2344. OP::OpCode opcode = OP::OpCode::NumOpCodes;
  2345. DXIL::ResourceKind resourceKind = DXIL::ResourceKind::Invalid;
  2346. Value *sampledTexHandle = nullptr;
  2347. Value *texHandle = nullptr;
  2348. Value *samplerHandle = nullptr;
  2349. static const unsigned kMaxCoordDimensions = 4;
  2350. unsigned coordDimensions = 0;
  2351. Value *coord[kMaxCoordDimensions];
  2352. Value *compareValue = nullptr;
  2353. Value *bias = nullptr;
  2354. Value *lod = nullptr;
  2355. // SampleGrad only.
  2356. static const unsigned kMaxDDXYDimensions = 3;
  2357. Value *ddx[kMaxDDXYDimensions];
  2358. Value *ddy[kMaxDDXYDimensions];
  2359. // Optional.
  2360. static const unsigned kMaxOffsetDimensions = 3;
  2361. unsigned offsetDimensions = 0;
  2362. Value *offset[kMaxOffsetDimensions];
  2363. Value *clamp = nullptr;
  2364. Value *status = nullptr;
  2365. unsigned maxHLOperandRead = 0;
  2366. Value *ReadHLOperand(CallInst *CI, unsigned opIdx) {
  2367. if (CI->getNumArgOperands() > opIdx) {
  2368. maxHLOperandRead = std::max(maxHLOperandRead, opIdx);
  2369. return CI->getArgOperand(opIdx);
  2370. }
  2371. return nullptr;
  2372. }
  2373. void TranslateCoord(CallInst *CI, unsigned coordIdx) {
  2374. Value *coordArg = ReadHLOperand(CI, coordIdx);
  2375. DXASSERT_NOMSG(coordArg);
  2376. DXASSERT(coordArg->getType()->getVectorNumElements() == coordDimensions,
  2377. "otherwise, HL coordinate dimensions mismatch");
  2378. IRBuilder<> Builder(CI);
  2379. for (unsigned i = 0; i < coordDimensions; i++)
  2380. coord[i] = Builder.CreateExtractElement(coordArg, i);
  2381. Value *undefF = UndefValue::get(Type::getFloatTy(CI->getContext()));
  2382. for (unsigned i = coordDimensions; i < kMaxCoordDimensions; i++)
  2383. coord[i] = undefF;
  2384. }
  2385. void TranslateOffset(CallInst *CI, unsigned offsetIdx) {
  2386. IntegerType *i32Ty = Type::getInt32Ty(CI->getContext());
  2387. if (Value *offsetArg = ReadHLOperand(CI, offsetIdx)) {
  2388. DXASSERT(offsetArg->getType()->getVectorNumElements() == offsetDimensions,
  2389. "otherwise, HL coordinate dimensions mismatch");
  2390. IRBuilder<> Builder(CI);
  2391. for (unsigned i = 0; i < offsetDimensions; i++)
  2392. offset[i] = Builder.CreateExtractElement(offsetArg, i);
  2393. } else {
  2394. // Use zeros for offsets when not specified, not undef.
  2395. Value *zero = ConstantInt::get(i32Ty, (uint64_t)0);
  2396. for (unsigned i = 0; i < offsetDimensions; i++)
  2397. offset[i] = zero;
  2398. }
  2399. // Use undef for components that should not be used for this resource dim.
  2400. Value *undefI = UndefValue::get(i32Ty);
  2401. for (unsigned i = offsetDimensions; i < kMaxOffsetDimensions; i++)
  2402. offset[i] = undefI;
  2403. }
  2404. void SetBias(CallInst *CI, unsigned biasIdx) {
  2405. // Clamp bias for immediate.
  2406. bias = ReadHLOperand(CI, biasIdx);
  2407. DXASSERT_NOMSG(bias);
  2408. if (ConstantFP *FP = dyn_cast<ConstantFP>(bias)) {
  2409. float v = FP->getValueAPF().convertToFloat();
  2410. if (v > DXIL::kMaxMipLodBias)
  2411. bias = ConstantFP::get(FP->getType(), DXIL::kMaxMipLodBias);
  2412. if (v < DXIL::kMinMipLodBias)
  2413. bias = ConstantFP::get(FP->getType(), DXIL::kMinMipLodBias);
  2414. }
  2415. }
  2416. void SetLOD(CallInst *CI, unsigned lodIdx) {
  2417. lod = ReadHLOperand(CI, lodIdx);
  2418. DXASSERT_NOMSG(lod);
  2419. }
  2420. void SetCompareValue(CallInst *CI, unsigned cmpIdx) {
  2421. compareValue = ReadHLOperand(CI, cmpIdx);
  2422. DXASSERT_NOMSG(compareValue);
  2423. }
  2424. void SetClamp(CallInst *CI, unsigned clampIdx) {
  2425. if ((clamp = ReadHLOperand(CI, clampIdx))) {
  2426. if (clamp->getType()->isVectorTy()) {
  2427. IRBuilder<> Builder(CI);
  2428. clamp = Builder.CreateExtractElement(clamp, (uint64_t)0);
  2429. }
  2430. } else
  2431. clamp = UndefValue::get(Type::getFloatTy(CI->getContext()));
  2432. }
  2433. void SetStatus(CallInst *CI, unsigned statusIdx) {
  2434. status = ReadHLOperand(CI, statusIdx);
  2435. }
  2436. void SetDDX(CallInst *CI, unsigned ddxIdx) {
  2437. SetDDXY(CI, ddx, ReadHLOperand(CI, ddxIdx));
  2438. }
  2439. void SetDDY(CallInst *CI, unsigned ddyIdx) {
  2440. SetDDXY(CI, ddy, ReadHLOperand(CI, ddyIdx));
  2441. }
  2442. void SetDDXY(CallInst *CI, MutableArrayRef<Value *> ddxy, Value *ddxyArg) {
  2443. DXASSERT_NOMSG(ddxyArg);
  2444. IRBuilder<> Builder(CI);
  2445. unsigned ddxySize = ddxyArg->getType()->getVectorNumElements();
  2446. for (unsigned i = 0; i < ddxySize; i++)
  2447. ddxy[i] = Builder.CreateExtractElement(ddxyArg, i);
  2448. Value *undefF = UndefValue::get(Type::getFloatTy(CI->getContext()));
  2449. for (unsigned i = ddxySize; i < kMaxDDXYDimensions; i++)
  2450. ddxy[i] = undefF;
  2451. }
  2452. };
  2453. SampleHelper::SampleHelper(
  2454. CallInst *CI, OP::OpCode op, HLObjectOperationLowerHelper *pObjHelper)
  2455. : opcode(op) {
  2456. texHandle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  2457. resourceKind = pObjHelper->GetRK(texHandle);
  2458. if (resourceKind == DXIL::ResourceKind::Invalid) {
  2459. opcode = DXIL::OpCode::NumOpCodes;
  2460. return;
  2461. }
  2462. coordDimensions = opcode == DXIL::OpCode::CalculateLOD ? DxilResource::GetNumDimensionsForCalcLOD(resourceKind)
  2463. : DxilResource::GetNumCoords(resourceKind);
  2464. offsetDimensions = DxilResource::GetNumOffsets(resourceKind);
  2465. const bool bFeedbackOp = hlsl::OP::IsDxilOpFeedback(op);
  2466. sampledTexHandle = bFeedbackOp ? CI->getArgOperand(HLOperandIndex::kWriteSamplerFeedbackSampledArgIndex)
  2467. : nullptr;
  2468. const unsigned kSamplerArgIndex = bFeedbackOp ? HLOperandIndex::kWriteSamplerFeedbackSamplerArgIndex
  2469. : HLOperandIndex::kSampleSamplerArgIndex;
  2470. samplerHandle = CI->getArgOperand(kSamplerArgIndex);
  2471. const unsigned kCoordArgIdx = bFeedbackOp ? HLOperandIndex::kWriteSamplerFeedbackCoordArgIndex
  2472. : HLOperandIndex::kSampleCoordArgIndex;
  2473. TranslateCoord(CI, kCoordArgIdx);
  2474. // TextureCube does not support offsets, shifting each subsequent arg index down by 1
  2475. unsigned cube = (resourceKind == DXIL::ResourceKind::TextureCube ||
  2476. resourceKind == DXIL::ResourceKind::TextureCubeArray)
  2477. ? 1 : 0;
  2478. switch (op) {
  2479. case OP::OpCode::Sample:
  2480. TranslateOffset(CI, cube ? HLOperandIndex::kInvalidIdx : HLOperandIndex::kSampleOffsetArgIndex);
  2481. SetClamp(CI, HLOperandIndex::kSampleClampArgIndex - cube);
  2482. SetStatus(CI, HLOperandIndex::kSampleStatusArgIndex - cube);
  2483. break;
  2484. case OP::OpCode::SampleLevel:
  2485. SetLOD(CI, HLOperandIndex::kSampleLLevelArgIndex);
  2486. TranslateOffset(CI, cube ? HLOperandIndex::kInvalidIdx : HLOperandIndex::kSampleLOffsetArgIndex);
  2487. SetStatus(CI, HLOperandIndex::kSampleLStatusArgIndex - cube);
  2488. break;
  2489. case OP::OpCode::SampleBias:
  2490. SetBias(CI, HLOperandIndex::kSampleBBiasArgIndex);
  2491. TranslateOffset(CI, cube ? HLOperandIndex::kInvalidIdx : HLOperandIndex::kSampleBOffsetArgIndex);
  2492. SetClamp(CI, HLOperandIndex::kSampleBClampArgIndex - cube);
  2493. SetStatus(CI, HLOperandIndex::kSampleBStatusArgIndex - cube);
  2494. break;
  2495. case OP::OpCode::SampleCmp:
  2496. SetCompareValue(CI, HLOperandIndex::kSampleCmpCmpValArgIndex);
  2497. TranslateOffset(CI, cube ? HLOperandIndex::kInvalidIdx : HLOperandIndex::kSampleCmpOffsetArgIndex);
  2498. SetClamp(CI, HLOperandIndex::kSampleCmpClampArgIndex - cube);
  2499. SetStatus(CI, HLOperandIndex::kSampleCmpStatusArgIndex - cube);
  2500. break;
  2501. case OP::OpCode::SampleCmpLevelZero:
  2502. SetCompareValue(CI, HLOperandIndex::kSampleCmpLZCmpValArgIndex);
  2503. TranslateOffset(CI, cube ? HLOperandIndex::kInvalidIdx : HLOperandIndex::kSampleCmpLZOffsetArgIndex);
  2504. SetStatus(CI, HLOperandIndex::kSampleCmpLZStatusArgIndex - cube);
  2505. break;
  2506. case OP::OpCode::SampleGrad:
  2507. SetDDX(CI, HLOperandIndex::kSampleGDDXArgIndex);
  2508. SetDDY(CI, HLOperandIndex::kSampleGDDYArgIndex);
  2509. TranslateOffset(CI, cube ? HLOperandIndex::kInvalidIdx : HLOperandIndex::kSampleGOffsetArgIndex);
  2510. SetClamp(CI, HLOperandIndex::kSampleGClampArgIndex - cube);
  2511. SetStatus(CI, HLOperandIndex::kSampleGStatusArgIndex - cube);
  2512. break;
  2513. case OP::OpCode::CalculateLOD:
  2514. // Only need coord for LOD calculation.
  2515. break;
  2516. case OP::OpCode::WriteSamplerFeedback:
  2517. SetClamp(CI, HLOperandIndex::kWriteSamplerFeedback_ClampArgIndex);
  2518. break;
  2519. case OP::OpCode::WriteSamplerFeedbackBias:
  2520. SetBias(CI, HLOperandIndex::kWriteSamplerFeedbackBias_BiasArgIndex);
  2521. SetClamp(CI, HLOperandIndex::kWriteSamplerFeedbackBias_ClampArgIndex);
  2522. break;
  2523. case OP::OpCode::WriteSamplerFeedbackGrad:
  2524. SetDDX(CI, HLOperandIndex::kWriteSamplerFeedbackGrad_DdxArgIndex);
  2525. SetDDY(CI, HLOperandIndex::kWriteSamplerFeedbackGrad_DdyArgIndex);
  2526. SetClamp(CI, HLOperandIndex::kWriteSamplerFeedbackGrad_ClampArgIndex);
  2527. break;
  2528. case OP::OpCode::WriteSamplerFeedbackLevel:
  2529. SetLOD(CI, HLOperandIndex::kWriteSamplerFeedbackLevel_LodArgIndex);
  2530. break;
  2531. default:
  2532. DXASSERT(0, "invalid opcode for Sample");
  2533. break;
  2534. }
  2535. DXASSERT(maxHLOperandRead == CI->getNumArgOperands() - 1,
  2536. "otherwise, unused HL arguments for Sample op");
  2537. }
  2538. Value *TranslateCalculateLOD(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2539. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2540. hlsl::OP *hlslOP = &helper.hlslOP;
  2541. SampleHelper sampleHelper(CI, OP::OpCode::CalculateLOD, pObjHelper);
  2542. if (sampleHelper.opcode == DXIL::OpCode::NumOpCodes) {
  2543. Translated = false;
  2544. return nullptr;
  2545. }
  2546. bool bClamped = IOP == IntrinsicOp::MOP_CalculateLevelOfDetail;
  2547. IRBuilder<> Builder(CI);
  2548. Value *opArg =
  2549. hlslOP->GetU32Const(static_cast<unsigned>(OP::OpCode::CalculateLOD));
  2550. Value *clamped = hlslOP->GetI1Const(bClamped);
  2551. Value *args[] = {opArg,
  2552. sampleHelper.texHandle,
  2553. sampleHelper.samplerHandle,
  2554. sampleHelper.coord[0],
  2555. sampleHelper.coord[1],
  2556. sampleHelper.coord[2],
  2557. clamped};
  2558. Function *dxilFunc = hlslOP->GetOpFunc(OP::OpCode::CalculateLOD,
  2559. Type::getFloatTy(opArg->getContext()));
  2560. Value *LOD = Builder.CreateCall(dxilFunc, args);
  2561. return LOD;
  2562. }
  2563. Value *TranslateCheckAccess(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2564. HLOperationLowerHelper &helper,
  2565. HLObjectOperationLowerHelper *pObjHelper,
  2566. bool &Translated) {
  2567. // Translate CheckAccess into uint->bool, later optimization should remove it.
  2568. // Real checkaccess is generated in UpdateStatus.
  2569. IRBuilder<> Builder(CI);
  2570. Value *V = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  2571. return Builder.CreateTrunc(V, helper.i1Ty);
  2572. }
  2573. void GenerateDxilSample(CallInst *CI, Function *F, ArrayRef<Value *> sampleArgs,
  2574. Value *status, hlsl::OP *hlslOp) {
  2575. IRBuilder<> Builder(CI);
  2576. CallInst *call = Builder.CreateCall(F, sampleArgs);
  2577. dxilutil::MigrateDebugValue(CI, call);
  2578. // extract value part
  2579. Value *retVal = ScalarizeResRet(CI->getType(), call, Builder);
  2580. // Replace ret val.
  2581. CI->replaceAllUsesWith(retVal);
  2582. // get status
  2583. if (status) {
  2584. UpdateStatus(call, status, Builder, hlslOp);
  2585. }
  2586. }
  2587. Value *TranslateSample(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2588. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2589. hlsl::OP *hlslOP = &helper.hlslOP;
  2590. SampleHelper sampleHelper(CI, opcode, pObjHelper);
  2591. if (sampleHelper.opcode == DXIL::OpCode::NumOpCodes) {
  2592. Translated = false;
  2593. return nullptr;
  2594. }
  2595. Type *Ty = CI->getType();
  2596. Function *F = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
  2597. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  2598. switch (opcode) {
  2599. case OP::OpCode::Sample: {
  2600. Value *sampleArgs[] = {
  2601. opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
  2602. // Coord.
  2603. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2604. sampleHelper.coord[3],
  2605. // Offset.
  2606. sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
  2607. // Clamp.
  2608. sampleHelper.clamp};
  2609. GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
  2610. } break;
  2611. case OP::OpCode::SampleLevel: {
  2612. Value *sampleArgs[] = {
  2613. opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
  2614. // Coord.
  2615. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2616. sampleHelper.coord[3],
  2617. // Offset.
  2618. sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
  2619. // LOD.
  2620. sampleHelper.lod};
  2621. GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
  2622. } break;
  2623. case OP::OpCode::SampleGrad: {
  2624. Value *sampleArgs[] = {
  2625. opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
  2626. // Coord.
  2627. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2628. sampleHelper.coord[3],
  2629. // Offset.
  2630. sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
  2631. // Ddx.
  2632. sampleHelper.ddx[0], sampleHelper.ddx[1], sampleHelper.ddx[2],
  2633. // Ddy.
  2634. sampleHelper.ddy[0], sampleHelper.ddy[1], sampleHelper.ddy[2],
  2635. // Clamp.
  2636. sampleHelper.clamp};
  2637. GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
  2638. } break;
  2639. case OP::OpCode::SampleBias: {
  2640. Value *sampleArgs[] = {
  2641. opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
  2642. // Coord.
  2643. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2644. sampleHelper.coord[3],
  2645. // Offset.
  2646. sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
  2647. // Bias.
  2648. sampleHelper.bias,
  2649. // Clamp.
  2650. sampleHelper.clamp};
  2651. GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
  2652. } break;
  2653. case OP::OpCode::SampleCmp: {
  2654. Value *sampleArgs[] = {
  2655. opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
  2656. // Coord.
  2657. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2658. sampleHelper.coord[3],
  2659. // Offset.
  2660. sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
  2661. // CmpVal.
  2662. sampleHelper.compareValue,
  2663. // Clamp.
  2664. sampleHelper.clamp};
  2665. GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
  2666. } break;
  2667. case OP::OpCode::SampleCmpLevelZero:
  2668. default: {
  2669. DXASSERT(opcode == OP::OpCode::SampleCmpLevelZero, "invalid sample opcode");
  2670. Value *sampleArgs[] = {
  2671. opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
  2672. // Coord.
  2673. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2674. sampleHelper.coord[3],
  2675. // Offset.
  2676. sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
  2677. // CmpVal.
  2678. sampleHelper.compareValue};
  2679. GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
  2680. } break;
  2681. }
  2682. // CI is replaced in GenerateDxilSample.
  2683. return nullptr;
  2684. }
  2685. // Gather intrinsics.
  2686. struct GatherHelper {
  2687. enum class GatherChannel {
  2688. GatherAll,
  2689. GatherRed,
  2690. GatherGreen,
  2691. GatherBlue,
  2692. GatherAlpha,
  2693. };
  2694. GatherHelper(CallInst *CI, OP::OpCode op, HLObjectOperationLowerHelper *pObjHelper,
  2695. GatherHelper::GatherChannel ch);
  2696. OP::OpCode opcode;
  2697. Value *texHandle;
  2698. Value *samplerHandle;
  2699. static const unsigned kMaxCoordDimensions = 4;
  2700. Value *coord[kMaxCoordDimensions];
  2701. unsigned channel;
  2702. Value *special; // For CompareValue, Bias, LOD.
  2703. // Optional.
  2704. static const unsigned kMaxOffsetDimensions = 2;
  2705. Value *offset[kMaxOffsetDimensions];
  2706. // For the overload send different offset for each sample.
  2707. // Only save 3 sampleOffsets because use offset for normal overload as first
  2708. // sample offset.
  2709. static const unsigned kSampleOffsetDimensions = 3;
  2710. Value *sampleOffsets[kSampleOffsetDimensions][kMaxOffsetDimensions];
  2711. Value *status;
  2712. bool hasSampleOffsets;
  2713. unsigned maxHLOperandRead = 0;
  2714. Value *ReadHLOperand(CallInst *CI, unsigned opIdx) {
  2715. if (CI->getNumArgOperands() > opIdx) {
  2716. maxHLOperandRead = std::max(maxHLOperandRead, opIdx);
  2717. return CI->getArgOperand(opIdx);
  2718. }
  2719. return nullptr;
  2720. }
  2721. void TranslateCoord(CallInst *CI, unsigned coordIdx,
  2722. unsigned coordDimensions) {
  2723. Value *coordArg = ReadHLOperand(CI, coordIdx);
  2724. DXASSERT_NOMSG(coordArg);
  2725. DXASSERT(coordArg->getType()->getVectorNumElements() == coordDimensions,
  2726. "otherwise, HL coordinate dimensions mismatch");
  2727. IRBuilder<> Builder(CI);
  2728. for (unsigned i = 0; i < coordDimensions; i++)
  2729. coord[i] = Builder.CreateExtractElement(coordArg, i);
  2730. Value *undefF = UndefValue::get(Type::getFloatTy(CI->getContext()));
  2731. for (unsigned i = coordDimensions; i < kMaxCoordDimensions; i++)
  2732. coord[i] = undefF;
  2733. }
  2734. void SetStatus(CallInst *CI, unsigned statusIdx) {
  2735. status = ReadHLOperand(CI, statusIdx);
  2736. }
  2737. void TranslateOffset(CallInst *CI, unsigned offsetIdx,
  2738. unsigned offsetDimensions) {
  2739. IntegerType *i32Ty = Type::getInt32Ty(CI->getContext());
  2740. if (Value *offsetArg = ReadHLOperand(CI, offsetIdx)) {
  2741. DXASSERT(offsetArg->getType()->getVectorNumElements() == offsetDimensions,
  2742. "otherwise, HL coordinate dimensions mismatch");
  2743. IRBuilder<> Builder(CI);
  2744. for (unsigned i = 0; i < offsetDimensions; i++)
  2745. offset[i] = Builder.CreateExtractElement(offsetArg, i);
  2746. } else {
  2747. // Use zeros for offsets when not specified, not undef.
  2748. Value *zero = ConstantInt::get(i32Ty, (uint64_t)0);
  2749. for (unsigned i = 0; i < offsetDimensions; i++)
  2750. offset[i] = zero;
  2751. }
  2752. // Use undef for components that should not be used for this resource dim.
  2753. Value *undefI = UndefValue::get(i32Ty);
  2754. for (unsigned i = offsetDimensions; i < kMaxOffsetDimensions; i++)
  2755. offset[i] = undefI;
  2756. }
  2757. void TranslateSampleOffset(CallInst *CI, unsigned offsetIdx,
  2758. unsigned offsetDimensions) {
  2759. Value *undefI = UndefValue::get(Type::getInt32Ty(CI->getContext()));
  2760. if (CI->getNumArgOperands() >= (offsetIdx + kSampleOffsetDimensions)) {
  2761. hasSampleOffsets = true;
  2762. IRBuilder<> Builder(CI);
  2763. for (unsigned ch = 0; ch < kSampleOffsetDimensions; ch++) {
  2764. Value *offsetArg = ReadHLOperand(CI, offsetIdx + ch);
  2765. for (unsigned i = 0; i < offsetDimensions; i++)
  2766. sampleOffsets[ch][i] = Builder.CreateExtractElement(offsetArg, i);
  2767. for (unsigned i = offsetDimensions; i < kMaxOffsetDimensions; i++)
  2768. sampleOffsets[ch][i] = undefI;
  2769. }
  2770. }
  2771. }
  2772. // Update the offset args for gather with sample offset at sampleIdx.
  2773. void UpdateOffsetInGatherArgs(MutableArrayRef<Value *> gatherArgs,
  2774. unsigned sampleIdx) {
  2775. unsigned offsetBase = DXIL::OperandIndex::kTextureGatherOffset0OpIdx;
  2776. for (unsigned i = 0; i < kMaxOffsetDimensions; i++)
  2777. // -1 because offset for sample 0 is in GatherHelper::offset.
  2778. gatherArgs[offsetBase + i] = sampleOffsets[sampleIdx - 1][i];
  2779. }
  2780. };
  2781. GatherHelper::GatherHelper(
  2782. CallInst *CI, OP::OpCode op, HLObjectOperationLowerHelper *pObjHelper,
  2783. GatherHelper::GatherChannel ch)
  2784. : opcode(op), special(nullptr), hasSampleOffsets(false) {
  2785. switch (ch) {
  2786. case GatherChannel::GatherAll:
  2787. channel = 0;
  2788. break;
  2789. case GatherChannel::GatherRed:
  2790. channel = 0;
  2791. break;
  2792. case GatherChannel::GatherGreen:
  2793. channel = 1;
  2794. break;
  2795. case GatherChannel::GatherBlue:
  2796. channel = 2;
  2797. break;
  2798. case GatherChannel::GatherAlpha:
  2799. channel = 3;
  2800. break;
  2801. }
  2802. IRBuilder<> Builder(CI);
  2803. texHandle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  2804. samplerHandle = CI->getArgOperand(HLOperandIndex::kSampleSamplerArgIndex);
  2805. DXIL::ResourceKind RK = pObjHelper->GetRK(texHandle);
  2806. if (RK == DXIL::ResourceKind::Invalid) {
  2807. opcode = DXIL::OpCode::NumOpCodes;
  2808. return;
  2809. }
  2810. unsigned coordSize = DxilResource::GetNumCoords(RK);
  2811. unsigned offsetSize = DxilResource::GetNumOffsets(RK);
  2812. bool cube = RK == DXIL::ResourceKind::TextureCube ||
  2813. RK == DXIL::ResourceKind::TextureCubeArray;
  2814. const unsigned kCoordArgIdx = HLOperandIndex::kSampleCoordArgIndex;
  2815. TranslateCoord(CI, kCoordArgIdx, coordSize);
  2816. switch (op) {
  2817. case OP::OpCode::TextureGather: {
  2818. unsigned statusIdx;
  2819. if (cube) {
  2820. TranslateOffset(CI, HLOperandIndex::kInvalidIdx, offsetSize);
  2821. statusIdx = HLOperandIndex::kGatherCubeStatusArgIndex;
  2822. } else {
  2823. TranslateOffset(CI, HLOperandIndex::kGatherOffsetArgIndex, offsetSize);
  2824. // Gather all don't have sample offset version overload.
  2825. if (ch != GatherChannel::GatherAll)
  2826. TranslateSampleOffset(CI, HLOperandIndex::kGatherSampleOffsetArgIndex,
  2827. offsetSize);
  2828. if (hasSampleOffsets) {
  2829. statusIdx = HLOperandIndex::kGatherStatusWithSampleOffsetArgIndex;
  2830. } else {
  2831. opcode = OP::OpCode::TextureGatherImm;
  2832. statusIdx = HLOperandIndex::kGatherStatusArgIndex;
  2833. }
  2834. }
  2835. SetStatus(CI, statusIdx);
  2836. } break;
  2837. case OP::OpCode::TextureGatherCmp: {
  2838. special = ReadHLOperand(CI, HLOperandIndex::kGatherCmpCmpValArgIndex);
  2839. unsigned statusIdx;
  2840. if (cube) {
  2841. TranslateOffset(CI, HLOperandIndex::kInvalidIdx, offsetSize);
  2842. statusIdx = HLOperandIndex::kGatherCmpCubeStatusArgIndex;
  2843. } else {
  2844. TranslateOffset(CI, HLOperandIndex::kGatherCmpOffsetArgIndex, offsetSize);
  2845. // Gather all don't have sample offset version overload.
  2846. if (ch != GatherChannel::GatherAll)
  2847. TranslateSampleOffset(CI, HLOperandIndex::kGatherCmpSampleOffsetArgIndex,
  2848. offsetSize);
  2849. if (hasSampleOffsets) {
  2850. statusIdx = HLOperandIndex::kGatherCmpStatusWithSampleOffsetArgIndex;
  2851. } else {
  2852. opcode = OP::OpCode::TextureGatherCmpImm;
  2853. statusIdx = HLOperandIndex::kGatherCmpStatusArgIndex;
  2854. }
  2855. }
  2856. SetStatus(CI, statusIdx);
  2857. } break;
  2858. default:
  2859. DXASSERT(0, "invalid opcode for Gather");
  2860. break;
  2861. }
  2862. DXASSERT(maxHLOperandRead == CI->getNumArgOperands() - 1,
  2863. "otherwise, unused HL arguments for Sample op");
  2864. }
  2865. void GenerateDxilGather(CallInst *CI, Function *F,
  2866. MutableArrayRef<Value *> gatherArgs,
  2867. GatherHelper &helper, hlsl::OP *hlslOp) {
  2868. IRBuilder<> Builder(CI);
  2869. CallInst *call = Builder.CreateCall(F, gatherArgs);
  2870. dxilutil::MigrateDebugValue(CI, call);
  2871. Value *retVal;
  2872. if (!helper.hasSampleOffsets) {
  2873. // extract value part
  2874. retVal = ScalarizeResRet(CI->getType(), call, Builder);
  2875. } else {
  2876. retVal = UndefValue::get(CI->getType());
  2877. Value *elt = Builder.CreateExtractValue(call, (uint64_t)0);
  2878. retVal = Builder.CreateInsertElement(retVal, elt, (uint64_t)0);
  2879. helper.UpdateOffsetInGatherArgs(gatherArgs, /*sampleIdx*/ 1);
  2880. CallInst *callY = Builder.CreateCall(F, gatherArgs);
  2881. elt = Builder.CreateExtractValue(callY, (uint64_t)1);
  2882. retVal = Builder.CreateInsertElement(retVal, elt, 1);
  2883. helper.UpdateOffsetInGatherArgs(gatherArgs, /*sampleIdx*/ 2);
  2884. CallInst *callZ = Builder.CreateCall(F, gatherArgs);
  2885. elt = Builder.CreateExtractValue(callZ, (uint64_t)2);
  2886. retVal = Builder.CreateInsertElement(retVal, elt, 2);
  2887. helper.UpdateOffsetInGatherArgs(gatherArgs, /*sampleIdx*/ 3);
  2888. CallInst *callW = Builder.CreateCall(F, gatherArgs);
  2889. elt = Builder.CreateExtractValue(callW, (uint64_t)3);
  2890. retVal = Builder.CreateInsertElement(retVal, elt, 3);
  2891. // TODO: UpdateStatus for each gather call.
  2892. }
  2893. // Replace ret val.
  2894. CI->replaceAllUsesWith(retVal);
  2895. // Get status
  2896. if (helper.status) {
  2897. UpdateStatus(call, helper.status, Builder, hlslOp);
  2898. }
  2899. }
  2900. Value *TranslateGather(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2901. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2902. hlsl::OP *hlslOP = &helper.hlslOP;
  2903. GatherHelper::GatherChannel ch = GatherHelper::GatherChannel::GatherAll;
  2904. switch (IOP) {
  2905. case IntrinsicOp::MOP_Gather:
  2906. case IntrinsicOp::MOP_GatherCmp:
  2907. ch = GatherHelper::GatherChannel::GatherAll;
  2908. break;
  2909. case IntrinsicOp::MOP_GatherRed:
  2910. case IntrinsicOp::MOP_GatherCmpRed:
  2911. ch = GatherHelper::GatherChannel::GatherRed;
  2912. break;
  2913. case IntrinsicOp::MOP_GatherGreen:
  2914. case IntrinsicOp::MOP_GatherCmpGreen:
  2915. ch = GatherHelper::GatherChannel::GatherGreen;
  2916. break;
  2917. case IntrinsicOp::MOP_GatherBlue:
  2918. case IntrinsicOp::MOP_GatherCmpBlue:
  2919. ch = GatherHelper::GatherChannel::GatherBlue;
  2920. break;
  2921. case IntrinsicOp::MOP_GatherAlpha:
  2922. case IntrinsicOp::MOP_GatherCmpAlpha:
  2923. ch = GatherHelper::GatherChannel::GatherAlpha;
  2924. break;
  2925. default:
  2926. DXASSERT(0, "invalid gather intrinsic");
  2927. break;
  2928. }
  2929. GatherHelper gatherHelper(CI, opcode, pObjHelper, ch);
  2930. if (gatherHelper.opcode == DXIL::OpCode::NumOpCodes) {
  2931. Translated = false;
  2932. return nullptr;
  2933. }
  2934. Type *Ty = CI->getType();
  2935. Function *F = hlslOP->GetOpFunc(gatherHelper.opcode, Ty->getScalarType());
  2936. Constant *opArg = hlslOP->GetU32Const((unsigned)gatherHelper.opcode);
  2937. Value *channelArg = hlslOP->GetU32Const(gatherHelper.channel);
  2938. switch (opcode) {
  2939. case OP::OpCode::TextureGather: {
  2940. Value *gatherArgs[] = {
  2941. opArg, gatherHelper.texHandle, gatherHelper.samplerHandle,
  2942. // Coord.
  2943. gatherHelper.coord[0], gatherHelper.coord[1], gatherHelper.coord[2],
  2944. gatherHelper.coord[3],
  2945. // Offset.
  2946. gatherHelper.offset[0], gatherHelper.offset[1],
  2947. // Channel.
  2948. channelArg};
  2949. GenerateDxilGather(CI, F, gatherArgs, gatherHelper, hlslOP);
  2950. } break;
  2951. case OP::OpCode::TextureGatherCmp: {
  2952. Value *gatherArgs[] = {
  2953. opArg, gatherHelper.texHandle, gatherHelper.samplerHandle,
  2954. // Coord.
  2955. gatherHelper.coord[0], gatherHelper.coord[1], gatherHelper.coord[2],
  2956. gatherHelper.coord[3],
  2957. // Offset.
  2958. gatherHelper.offset[0], gatherHelper.offset[1],
  2959. // Channel.
  2960. channelArg,
  2961. // CmpVal.
  2962. gatherHelper.special};
  2963. GenerateDxilGather(CI, F, gatherArgs, gatherHelper, hlslOP);
  2964. } break;
  2965. default:
  2966. DXASSERT(0, "invalid opcode for Gather");
  2967. break;
  2968. }
  2969. // CI is replaced in GenerateDxilGather.
  2970. return nullptr;
  2971. }
  2972. static Value* TranslateWriteSamplerFeedback(CallInst* CI, IntrinsicOp IOP, OP::OpCode opcode,
  2973. HLOperationLowerHelper& helper,
  2974. HLObjectOperationLowerHelper* pObjHelper,
  2975. bool& Translated) {
  2976. hlsl::OP *hlslOP = &helper.hlslOP;
  2977. SampleHelper sampleHelper(CI, opcode, pObjHelper);
  2978. if (sampleHelper.opcode == DXIL::OpCode::NumOpCodes) {
  2979. Translated = false;
  2980. return nullptr;
  2981. }
  2982. Type *Ty = CI->getType();
  2983. Function *F = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
  2984. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  2985. IRBuilder<> Builder(CI);
  2986. switch (opcode) {
  2987. case OP::OpCode::WriteSamplerFeedback: {
  2988. Value *samplerFeedbackArgs[] = {
  2989. opArg, sampleHelper.texHandle, sampleHelper.sampledTexHandle, sampleHelper.samplerHandle,
  2990. // Coord.
  2991. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2992. sampleHelper.coord[3],
  2993. // Clamp.
  2994. sampleHelper.clamp};
  2995. return Builder.CreateCall(F, samplerFeedbackArgs);
  2996. } break;
  2997. case OP::OpCode::WriteSamplerFeedbackBias: {
  2998. Value *samplerFeedbackArgs[] = {
  2999. opArg, sampleHelper.texHandle, sampleHelper.sampledTexHandle, sampleHelper.samplerHandle,
  3000. // Coord.
  3001. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  3002. sampleHelper.coord[3],
  3003. // Bias.
  3004. sampleHelper.bias,
  3005. // Clamp.
  3006. sampleHelper.clamp};
  3007. return Builder.CreateCall(F, samplerFeedbackArgs);
  3008. } break;
  3009. case OP::OpCode::WriteSamplerFeedbackGrad: {
  3010. Value *samplerFeedbackArgs[] = {
  3011. opArg, sampleHelper.texHandle, sampleHelper.sampledTexHandle, sampleHelper.samplerHandle,
  3012. // Coord.
  3013. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  3014. sampleHelper.coord[3],
  3015. // Ddx.
  3016. sampleHelper.ddx[0], sampleHelper.ddx[1], sampleHelper.ddx[2],
  3017. // Ddy.
  3018. sampleHelper.ddy[0], sampleHelper.ddy[1], sampleHelper.ddy[2],
  3019. // Clamp.
  3020. sampleHelper.clamp};
  3021. return Builder.CreateCall(F, samplerFeedbackArgs);
  3022. } break;
  3023. case OP::OpCode::WriteSamplerFeedbackLevel: {
  3024. Value *samplerFeedbackArgs[] = {
  3025. opArg, sampleHelper.texHandle, sampleHelper.sampledTexHandle, sampleHelper.samplerHandle,
  3026. // Coord.
  3027. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  3028. sampleHelper.coord[3],
  3029. // LOD.
  3030. sampleHelper.lod};
  3031. return Builder.CreateCall(F, samplerFeedbackArgs);
  3032. } break;
  3033. default:
  3034. DXASSERT(false, "otherwise, unknown SamplerFeedback Op");
  3035. break;
  3036. }
  3037. return nullptr;
  3038. }
  3039. // Load/Store intrinsics.
  3040. struct ResLoadHelper {
  3041. ResLoadHelper(CallInst *CI, DxilResource::Kind RK, DxilResourceBase::Class RC,
  3042. Value *h, IntrinsicOp IOP, bool bForSubscript=false);
  3043. ResLoadHelper(CallInst *CI, DxilResource::Kind RK, DxilResourceBase::Class RC,
  3044. Value *h, Value *mip);
  3045. // For double subscript.
  3046. ResLoadHelper(Instruction *ldInst, Value *h, Value *idx, Value *mip)
  3047. : opcode(OP::OpCode::TextureLoad),
  3048. intrinsicOpCode(IntrinsicOp::Num_Intrinsics), handle(h), retVal(ldInst),
  3049. addr(idx), offset(nullptr), status(nullptr), mipLevel(mip) {}
  3050. OP::OpCode opcode;
  3051. IntrinsicOp intrinsicOpCode;
  3052. unsigned dxilMajor;
  3053. unsigned dxilMinor;
  3054. Value *handle;
  3055. Value *retVal;
  3056. Value *addr;
  3057. Value *offset;
  3058. Value *status;
  3059. Value *mipLevel;
  3060. };
  3061. ResLoadHelper::ResLoadHelper(CallInst *CI, DxilResource::Kind RK,
  3062. DxilResourceBase::Class RC, Value *hdl, IntrinsicOp IOP, bool bForSubscript)
  3063. : intrinsicOpCode(IOP), handle(hdl), offset(nullptr), status(nullptr) {
  3064. switch (RK) {
  3065. case DxilResource::Kind::RawBuffer:
  3066. case DxilResource::Kind::StructuredBuffer:
  3067. opcode = OP::OpCode::RawBufferLoad;
  3068. break;
  3069. case DxilResource::Kind::TypedBuffer:
  3070. opcode = OP::OpCode::BufferLoad;
  3071. break;
  3072. case DxilResource::Kind::Invalid:
  3073. DXASSERT(0, "invalid resource kind");
  3074. break;
  3075. default:
  3076. opcode = OP::OpCode::TextureLoad;
  3077. break;
  3078. }
  3079. retVal = CI;
  3080. const unsigned kAddrIdx = HLOperandIndex::kBufLoadAddrOpIdx;
  3081. addr = CI->getArgOperand(kAddrIdx);
  3082. unsigned argc = CI->getNumArgOperands();
  3083. if (opcode == OP::OpCode::TextureLoad) {
  3084. // mip at last channel
  3085. unsigned coordSize = DxilResource::GetNumCoords(RK);
  3086. if (RC == DxilResourceBase::Class::SRV) {
  3087. if (bForSubscript) {
  3088. // Use 0 when access by [].
  3089. mipLevel = IRBuilder<>(CI).getInt32(0);
  3090. } else {
  3091. if (coordSize == 1 && !addr->getType()->isVectorTy()) {
  3092. // Use addr when access by Load.
  3093. mipLevel = addr;
  3094. } else {
  3095. mipLevel = IRBuilder<>(CI).CreateExtractElement(addr, coordSize);
  3096. }
  3097. }
  3098. } else {
  3099. // Set mip level to undef for UAV.
  3100. mipLevel = UndefValue::get(Type::getInt32Ty(addr->getContext()));
  3101. }
  3102. if (RC == DxilResourceBase::Class::SRV) {
  3103. unsigned offsetIdx = HLOperandIndex::kTexLoadOffsetOpIdx;
  3104. unsigned statusIdx = HLOperandIndex::kTexLoadStatusOpIdx;
  3105. if (RK == DxilResource::Kind::Texture2DMS ||
  3106. RK == DxilResource::Kind::Texture2DMSArray) {
  3107. offsetIdx = HLOperandIndex::kTex2DMSLoadOffsetOpIdx;
  3108. statusIdx = HLOperandIndex::kTex2DMSLoadStatusOpIdx;
  3109. mipLevel =
  3110. CI->getArgOperand(HLOperandIndex::kTex2DMSLoadSampleIdxOpIdx);
  3111. }
  3112. if (argc > offsetIdx)
  3113. offset = CI->getArgOperand(offsetIdx);
  3114. if (argc > statusIdx)
  3115. status = CI->getArgOperand(statusIdx);
  3116. } else {
  3117. const unsigned kStatusIdx = HLOperandIndex::kRWTexLoadStatusOpIdx;
  3118. if (argc > kStatusIdx)
  3119. status = CI->getArgOperand(kStatusIdx);
  3120. }
  3121. } else {
  3122. const unsigned kStatusIdx = HLOperandIndex::kBufLoadStatusOpIdx;
  3123. if (argc > kStatusIdx)
  3124. status = CI->getArgOperand(kStatusIdx);
  3125. }
  3126. }
  3127. ResLoadHelper::ResLoadHelper(CallInst *CI, DxilResource::Kind RK,
  3128. DxilResourceBase::Class RC, Value *hdl, Value *mip)
  3129. : handle(hdl), offset(nullptr), status(nullptr) {
  3130. DXASSERT(RK != DxilResource::Kind::RawBuffer &&
  3131. RK != DxilResource::Kind::TypedBuffer &&
  3132. RK != DxilResource::Kind::Invalid,
  3133. "invalid resource kind");
  3134. opcode = OP::OpCode::TextureLoad;
  3135. retVal = CI;
  3136. mipLevel = mip;
  3137. const unsigned kAddrIdx = HLOperandIndex::kMipLoadAddrOpIdx;
  3138. addr = CI->getArgOperand(kAddrIdx);
  3139. unsigned argc = CI->getNumArgOperands();
  3140. const unsigned kOffsetIdx = HLOperandIndex::kMipLoadOffsetOpIdx;
  3141. const unsigned kStatusIdx = HLOperandIndex::kMipLoadStatusOpIdx;
  3142. if (argc > kOffsetIdx)
  3143. offset = CI->getArgOperand(kOffsetIdx);
  3144. if (argc > kStatusIdx)
  3145. status = CI->getArgOperand(kStatusIdx);
  3146. }
  3147. void TranslateStructBufSubscript(CallInst *CI, Value *handle, Value *status,
  3148. hlsl::OP *OP, HLResource::Kind RK, const DataLayout &DL);
  3149. // Create { v0, v1 } from { v0.lo, v0.hi, v1.lo, v1.hi }
  3150. void Make64bitResultForLoad(Type *EltTy, ArrayRef<Value *> resultElts32,
  3151. unsigned size, MutableArrayRef<Value *> resultElts,
  3152. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  3153. Type *i64Ty = Builder.getInt64Ty();
  3154. Type *doubleTy = Builder.getDoubleTy();
  3155. if (EltTy == doubleTy) {
  3156. Function *makeDouble =
  3157. hlslOP->GetOpFunc(DXIL::OpCode::MakeDouble, doubleTy);
  3158. Value *makeDoubleOpArg =
  3159. Builder.getInt32((unsigned)DXIL::OpCode::MakeDouble);
  3160. for (unsigned i = 0; i < size; i++) {
  3161. Value *lo = resultElts32[2 * i];
  3162. Value *hi = resultElts32[2 * i + 1];
  3163. Value *V = Builder.CreateCall(makeDouble, {makeDoubleOpArg, lo, hi});
  3164. resultElts[i] = V;
  3165. }
  3166. } else {
  3167. for (unsigned i = 0; i < size; i++) {
  3168. Value *lo = resultElts32[2 * i];
  3169. Value *hi = resultElts32[2 * i + 1];
  3170. lo = Builder.CreateZExt(lo, i64Ty);
  3171. hi = Builder.CreateZExt(hi, i64Ty);
  3172. hi = Builder.CreateShl(hi, 32);
  3173. resultElts[i] = Builder.CreateOr(lo, hi);
  3174. }
  3175. }
  3176. }
  3177. static Constant *GetRawBufferMaskForETy(Type *Ty, unsigned NumComponents, hlsl::OP *OP) {
  3178. unsigned mask = 0;
  3179. switch (NumComponents) {
  3180. case 0:
  3181. break;
  3182. case 1:
  3183. mask = DXIL::kCompMask_X;
  3184. break;
  3185. case 2:
  3186. mask = DXIL::kCompMask_X | DXIL::kCompMask_Y;
  3187. break;
  3188. case 3:
  3189. mask = DXIL::kCompMask_X | DXIL::kCompMask_Y | DXIL::kCompMask_Z;
  3190. break;
  3191. case 4:
  3192. mask = DXIL::kCompMask_All;
  3193. break;
  3194. default:
  3195. DXASSERT(false, "Cannot load more than 2 components for 64bit types.");
  3196. }
  3197. return OP->GetI8Const(mask);
  3198. }
  3199. Value *GenerateRawBufLd(Value *handle, Value *bufIdx, Value *offset,
  3200. Value *status, Type *EltTy,
  3201. MutableArrayRef<Value *> resultElts, hlsl::OP *OP,
  3202. IRBuilder<> &Builder, unsigned NumComponents, Constant *alignment);
  3203. static Value* TranslateRawBufVecLd(Type* VecEltTy, unsigned VecElemCount,
  3204. IRBuilder<>& Builder, Value* handle, hlsl::OP* OP, Value* status,
  3205. Value* bufIdx, Value* baseOffset, const DataLayout& DL,
  3206. std::vector<Value*>& bufLds, unsigned baseAlign, bool isScalarTy = false);
  3207. void TranslateLoad(ResLoadHelper &helper, HLResource::Kind RK,
  3208. IRBuilder<> &Builder, hlsl::OP *OP, const DataLayout &DL) {
  3209. Type *Ty = helper.retVal->getType();
  3210. if (Ty->isPointerTy()) {
  3211. DXASSERT(!DxilResource::IsAnyTexture(RK), "Textures should not be treated as structured buffers.");
  3212. TranslateStructBufSubscript(cast<CallInst>(helper.retVal), helper.handle,
  3213. helper.status, OP, RK, DL);
  3214. return;
  3215. }
  3216. OP::OpCode opcode = helper.opcode;
  3217. Type *i32Ty = Builder.getInt32Ty();
  3218. Type *i64Ty = Builder.getInt64Ty();
  3219. Type *doubleTy = Builder.getDoubleTy();
  3220. Type *EltTy = Ty->getScalarType();
  3221. unsigned numComponents = 1;
  3222. if (Ty->isVectorTy()) {
  3223. numComponents = Ty->getVectorNumElements();
  3224. }
  3225. if (DXIL::IsStructuredBuffer(RK) || DXIL::IsRawBuffer(RK)) {
  3226. std::vector<Value*> bufLds;
  3227. const bool isBool = EltTy->isIntegerTy(1);
  3228. // Bool are represented as i32 in memory
  3229. Type* MemReprTy = isBool ? Builder.getInt32Ty() : EltTy;
  3230. bool isScalarTy = !Ty->isVectorTy();
  3231. Value* retValNew = nullptr;
  3232. if (DXIL::IsStructuredBuffer(RK)) {
  3233. retValNew = TranslateRawBufVecLd(MemReprTy, numComponents, Builder, helper.handle, OP, helper.status,
  3234. helper.addr, OP->GetU32Const(0), DL, bufLds, /*baseAlign (in bytes)*/ 8, isScalarTy);
  3235. } else {
  3236. retValNew = TranslateRawBufVecLd(MemReprTy, numComponents, Builder, helper.handle, OP, helper.status,
  3237. nullptr, helper.addr, DL, bufLds, /*baseAlign (in bytes)*/ 4, isScalarTy);
  3238. }
  3239. DXASSERT_NOMSG(!bufLds.empty());
  3240. dxilutil::MigrateDebugValue(helper.retVal, bufLds.front());
  3241. if (isBool) {
  3242. // Convert result back to register representation.
  3243. retValNew = Builder.CreateICmpNE(retValNew, Constant::getNullValue(retValNew->getType()));
  3244. }
  3245. helper.retVal->replaceAllUsesWith(retValNew);
  3246. helper.retVal = retValNew;
  3247. return;
  3248. }
  3249. bool isTyped = opcode == OP::OpCode::TextureLoad ||
  3250. RK == DxilResource::Kind::TypedBuffer;
  3251. bool is64 = EltTy == i64Ty || EltTy == doubleTy;
  3252. if (is64 && isTyped) {
  3253. EltTy = i32Ty;
  3254. }
  3255. bool isBool = EltTy->isIntegerTy(1);
  3256. if (isBool) {
  3257. // Value will be loaded in its memory representation.
  3258. EltTy = i32Ty;
  3259. if (Ty->isVectorTy()) Ty = VectorType::get(EltTy, numComponents);
  3260. }
  3261. Function *F = OP->GetOpFunc(opcode, EltTy);
  3262. llvm::Constant *opArg = OP->GetU32Const((unsigned)opcode);
  3263. llvm::Value *undefI = llvm::UndefValue::get(i32Ty);
  3264. SmallVector<Value *, 12> loadArgs;
  3265. loadArgs.emplace_back(opArg); // opcode
  3266. loadArgs.emplace_back(helper.handle); // resource handle
  3267. if (opcode == OP::OpCode::TextureLoad) {
  3268. // set mip level
  3269. loadArgs.emplace_back(helper.mipLevel);
  3270. }
  3271. if (opcode == OP::OpCode::TextureLoad) {
  3272. // texture coord
  3273. unsigned coordSize = DxilResource::GetNumCoords(RK);
  3274. bool isVectorAddr = helper.addr->getType()->isVectorTy();
  3275. for (unsigned i = 0; i < 3; i++) {
  3276. if (i < coordSize) {
  3277. loadArgs.emplace_back(
  3278. isVectorAddr ? Builder.CreateExtractElement(helper.addr, i) : helper.addr);
  3279. }
  3280. else
  3281. loadArgs.emplace_back(undefI);
  3282. }
  3283. } else {
  3284. if (helper.addr->getType()->isVectorTy()) {
  3285. Value *scalarOffset =
  3286. Builder.CreateExtractElement(helper.addr, (uint64_t)0);
  3287. // TODO: calculate the real address based on opcode
  3288. loadArgs.emplace_back(scalarOffset); // offset
  3289. } else {
  3290. // TODO: calculate the real address based on opcode
  3291. loadArgs.emplace_back(helper.addr); // offset
  3292. }
  3293. }
  3294. // offset 0
  3295. if (opcode == OP::OpCode::TextureLoad) {
  3296. if (helper.offset && !isa<llvm::UndefValue>(helper.offset)) {
  3297. unsigned offsetSize = DxilResource::GetNumOffsets(RK);
  3298. for (unsigned i = 0; i < 3; i++) {
  3299. if (i < offsetSize)
  3300. loadArgs.emplace_back(Builder.CreateExtractElement(helper.offset, i));
  3301. else
  3302. loadArgs.emplace_back(undefI);
  3303. }
  3304. } else {
  3305. loadArgs.emplace_back(undefI);
  3306. loadArgs.emplace_back(undefI);
  3307. loadArgs.emplace_back(undefI);
  3308. }
  3309. }
  3310. // Offset 1
  3311. if (RK == DxilResource::Kind::TypedBuffer) {
  3312. loadArgs.emplace_back(undefI);
  3313. }
  3314. Value *ResRet =
  3315. Builder.CreateCall(F, loadArgs, OP->GetOpCodeName(opcode));
  3316. dxilutil::MigrateDebugValue(helper.retVal, ResRet);
  3317. Value *retValNew = nullptr;
  3318. if (!is64 || !isTyped) {
  3319. retValNew = ScalarizeResRet(Ty, ResRet, Builder);
  3320. } else {
  3321. unsigned size = numComponents;
  3322. DXASSERT(size <= 2, "typed buffer only allow 4 dwords");
  3323. EltTy = Ty->getScalarType();
  3324. Value *Elts[2];
  3325. Make64bitResultForLoad(Ty->getScalarType(),
  3326. {
  3327. Builder.CreateExtractValue(ResRet, 0),
  3328. Builder.CreateExtractValue(ResRet, 1),
  3329. Builder.CreateExtractValue(ResRet, 2),
  3330. Builder.CreateExtractValue(ResRet, 3),
  3331. },
  3332. size, Elts, OP, Builder);
  3333. retValNew = ScalarizeElements(Ty, Elts, Builder);
  3334. }
  3335. if (isBool) {
  3336. // Convert result back to register representation.
  3337. retValNew = Builder.CreateICmpNE(retValNew, Constant::getNullValue(retValNew->getType()));
  3338. }
  3339. // replace
  3340. helper.retVal->replaceAllUsesWith(retValNew);
  3341. // Save new ret val.
  3342. helper.retVal = retValNew;
  3343. // get status
  3344. UpdateStatus(ResRet, helper.status, Builder, OP);
  3345. }
  3346. Value *TranslateResourceLoad(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  3347. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3348. hlsl::OP *hlslOP = &helper.hlslOP;
  3349. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  3350. IRBuilder<> Builder(CI);
  3351. DXIL::ResourceClass RC = pObjHelper->GetRC(handle);
  3352. DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
  3353. ResLoadHelper loadHelper(CI, RK, RC, handle, IOP);
  3354. TranslateLoad(loadHelper, RK, Builder, hlslOP, helper.dataLayout);
  3355. // CI is replaced in TranslateLoad.
  3356. return nullptr;
  3357. }
  3358. // Split { v0, v1 } to { v0.lo, v0.hi, v1.lo, v1.hi }
  3359. void Split64bitValForStore(Type *EltTy, ArrayRef<Value *> vals, unsigned size,
  3360. MutableArrayRef<Value *> vals32, hlsl::OP *hlslOP,
  3361. IRBuilder<> &Builder) {
  3362. Type *i32Ty = Builder.getInt32Ty();
  3363. Type *doubleTy = Builder.getDoubleTy();
  3364. Value *undefI32 = UndefValue::get(i32Ty);
  3365. if (EltTy == doubleTy) {
  3366. Function *dToU = hlslOP->GetOpFunc(DXIL::OpCode::SplitDouble, doubleTy);
  3367. Value *dToUOpArg = Builder.getInt32((unsigned)DXIL::OpCode::SplitDouble);
  3368. for (unsigned i = 0; i < size; i++) {
  3369. if (isa<UndefValue>(vals[i])) {
  3370. vals32[2 * i] = undefI32;
  3371. vals32[2 * i + 1] = undefI32;
  3372. } else {
  3373. Value *retVal = Builder.CreateCall(dToU, {dToUOpArg, vals[i]});
  3374. Value *lo = Builder.CreateExtractValue(retVal, 0);
  3375. Value *hi = Builder.CreateExtractValue(retVal, 1);
  3376. vals32[2 * i] = lo;
  3377. vals32[2 * i + 1] = hi;
  3378. }
  3379. }
  3380. } else {
  3381. for (unsigned i = 0; i < size; i++) {
  3382. if (isa<UndefValue>(vals[i])) {
  3383. vals32[2 * i] = undefI32;
  3384. vals32[2 * i + 1] = undefI32;
  3385. } else {
  3386. Value *lo = Builder.CreateTrunc(vals[i], i32Ty);
  3387. Value *hi = Builder.CreateLShr(vals[i], 32);
  3388. hi = Builder.CreateTrunc(hi, i32Ty);
  3389. vals32[2 * i] = lo;
  3390. vals32[2 * i + 1] = hi;
  3391. }
  3392. }
  3393. }
  3394. }
  3395. void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
  3396. Value *offset, IRBuilder<> &Builder, hlsl::OP *OP) {
  3397. Type *Ty = val->getType();
  3398. // This function is no longer used for lowering stores to a
  3399. // structured buffer.
  3400. DXASSERT_NOMSG(RK != DxilResource::Kind::StructuredBuffer);
  3401. OP::OpCode opcode = OP::OpCode::NumOpCodes;
  3402. switch (RK) {
  3403. case DxilResource::Kind::RawBuffer:
  3404. case DxilResource::Kind::StructuredBuffer:
  3405. opcode = OP::OpCode::RawBufferStore;
  3406. break;
  3407. case DxilResource::Kind::TypedBuffer:
  3408. opcode = OP::OpCode::BufferStore;
  3409. break;
  3410. case DxilResource::Kind::Invalid:
  3411. DXASSERT(0, "invalid resource kind");
  3412. break;
  3413. default:
  3414. opcode = OP::OpCode::TextureStore;
  3415. break;
  3416. }
  3417. bool isTyped = opcode == OP::OpCode::TextureStore ||
  3418. RK == DxilResource::Kind::TypedBuffer;
  3419. Type *i32Ty = Builder.getInt32Ty();
  3420. Type *i64Ty = Builder.getInt64Ty();
  3421. Type *doubleTy = Builder.getDoubleTy();
  3422. Type *EltTy = Ty->getScalarType();
  3423. if (EltTy->isIntegerTy(1)) {
  3424. // Since we're going to memory, convert bools to their memory representation.
  3425. EltTy = i32Ty;
  3426. if (Ty->isVectorTy()) Ty = VectorType::get(EltTy, Ty->getVectorNumElements());
  3427. else Ty = EltTy;
  3428. val = Builder.CreateZExt(val, Ty);
  3429. }
  3430. // If RawBuffer store of 64-bit value, don't set alignment to 8,
  3431. // since buffer alignment isn't known to be anything over 4.
  3432. unsigned alignValue = OP->GetAllocSizeForType(EltTy);
  3433. if (RK == HLResource::Kind::RawBuffer && alignValue > 4)
  3434. alignValue = 4;
  3435. Constant *Alignment = OP->GetI32Const(alignValue);
  3436. bool is64 = EltTy == i64Ty || EltTy == doubleTy;
  3437. if (is64 && isTyped) {
  3438. EltTy = i32Ty;
  3439. }
  3440. Function *F = OP->GetOpFunc(opcode, EltTy);
  3441. llvm::Constant *opArg = OP->GetU32Const((unsigned)opcode);
  3442. llvm::Value *undefI =
  3443. llvm::UndefValue::get(llvm::Type::getInt32Ty(Ty->getContext()));
  3444. llvm::Value *undefVal = llvm::UndefValue::get(Ty->getScalarType());
  3445. SmallVector<Value *, 13> storeArgs;
  3446. storeArgs.emplace_back(opArg); // opcode
  3447. storeArgs.emplace_back(handle); // resource handle
  3448. unsigned offset0Idx = 0;
  3449. if (RK == DxilResource::Kind::RawBuffer ||
  3450. RK == DxilResource::Kind::TypedBuffer) {
  3451. // Offset 0
  3452. if (offset->getType()->isVectorTy()) {
  3453. Value *scalarOffset = Builder.CreateExtractElement(offset, (uint64_t)0);
  3454. storeArgs.emplace_back(scalarOffset); // offset
  3455. } else {
  3456. storeArgs.emplace_back(offset); // offset
  3457. }
  3458. // Store offset0 for later use
  3459. offset0Idx = storeArgs.size() - 1;
  3460. // Offset 1
  3461. storeArgs.emplace_back(undefI);
  3462. } else {
  3463. // texture store
  3464. unsigned coordSize = DxilResource::GetNumCoords(RK);
  3465. // Set x first.
  3466. if (offset->getType()->isVectorTy())
  3467. storeArgs.emplace_back(Builder.CreateExtractElement(offset, (uint64_t)0));
  3468. else
  3469. storeArgs.emplace_back(offset);
  3470. // Store offset0 for later use
  3471. offset0Idx = storeArgs.size() - 1;
  3472. for (unsigned i = 1; i < 3; i++) {
  3473. if (i < coordSize)
  3474. storeArgs.emplace_back(Builder.CreateExtractElement(offset, i));
  3475. else
  3476. storeArgs.emplace_back(undefI);
  3477. }
  3478. // TODO: support mip for texture ST
  3479. }
  3480. constexpr unsigned MaxStoreElemCount = 4;
  3481. const unsigned CompCount = Ty->isVectorTy() ? Ty->getVectorNumElements() : 1;
  3482. const unsigned StoreInstCount = (CompCount / MaxStoreElemCount) + (CompCount % MaxStoreElemCount != 0);
  3483. SmallVector<decltype(storeArgs), 4> storeArgsList;
  3484. // Max number of element to store should be 16 (for a 4x4 matrix)
  3485. DXASSERT_NOMSG(StoreInstCount >= 1 && StoreInstCount <= 4);
  3486. // If number of elements to store exceeds the maximum number of elements
  3487. // that can be stored in a single store call, make sure to generate enough
  3488. // store calls to store all elements
  3489. for (unsigned j = 0; j < StoreInstCount; j++) {
  3490. decltype(storeArgs) newStoreArgs;
  3491. for (Value* storeArg : storeArgs)
  3492. newStoreArgs.emplace_back(storeArg);
  3493. storeArgsList.emplace_back(newStoreArgs);
  3494. }
  3495. for (unsigned j = 0; j < storeArgsList.size(); j++) {
  3496. // For second and subsequent store calls, increment the offset0 (i.e. store index)
  3497. if (j > 0) {
  3498. // Greater than four-components store is not allowed for
  3499. // TypedBuffer and Textures. So greater than four elements
  3500. // scenario should only get hit here for RawBuffer.
  3501. DXASSERT_NOMSG(RK == DxilResource::Kind::RawBuffer);
  3502. unsigned EltSize = OP->GetAllocSizeForType(EltTy);
  3503. unsigned newOffset = EltSize * MaxStoreElemCount * j;
  3504. Value* newOffsetVal = ConstantInt::get(Builder.getInt32Ty(), newOffset);
  3505. newOffsetVal = Builder.CreateAdd(storeArgsList[0][offset0Idx], newOffsetVal);
  3506. storeArgsList[j][offset0Idx] = newOffsetVal;
  3507. }
  3508. // values
  3509. uint8_t mask = 0;
  3510. if (Ty->isVectorTy()) {
  3511. unsigned vecSize = std::min((j + 1) * MaxStoreElemCount, Ty->getVectorNumElements()) - (j * MaxStoreElemCount);
  3512. Value* emptyVal = undefVal;
  3513. if (isTyped) {
  3514. mask = DXIL::kCompMask_All;
  3515. emptyVal = Builder.CreateExtractElement(val, (uint64_t)0);
  3516. }
  3517. for (unsigned i = 0; i < MaxStoreElemCount; i++) {
  3518. if (i < vecSize) {
  3519. storeArgsList[j].emplace_back(Builder.CreateExtractElement(val, (j * MaxStoreElemCount) + i));
  3520. mask |= (1 << i);
  3521. }
  3522. else {
  3523. storeArgsList[j].emplace_back(emptyVal);
  3524. }
  3525. }
  3526. }
  3527. else {
  3528. if (isTyped) {
  3529. mask = DXIL::kCompMask_All;
  3530. storeArgsList[j].emplace_back(val);
  3531. storeArgsList[j].emplace_back(val);
  3532. storeArgsList[j].emplace_back(val);
  3533. storeArgsList[j].emplace_back(val);
  3534. }
  3535. else {
  3536. storeArgsList[j].emplace_back(val);
  3537. storeArgsList[j].emplace_back(undefVal);
  3538. storeArgsList[j].emplace_back(undefVal);
  3539. storeArgsList[j].emplace_back(undefVal);
  3540. mask = DXIL::kCompMask_X;
  3541. }
  3542. }
  3543. if (is64 && isTyped) {
  3544. unsigned size = 1;
  3545. if (Ty->isVectorTy()) {
  3546. size = std::min((j + 1) * MaxStoreElemCount, Ty->getVectorNumElements()) - (j * MaxStoreElemCount);
  3547. }
  3548. DXASSERT(size <= 2, "raw/typed buffer only allow 4 dwords");
  3549. unsigned val0OpIdx = opcode == DXIL::OpCode::TextureStore
  3550. ? DXIL::OperandIndex::kTextureStoreVal0OpIdx
  3551. : DXIL::OperandIndex::kBufferStoreVal0OpIdx;
  3552. Value* V0 = storeArgsList[j][val0OpIdx];
  3553. Value* V1 = storeArgsList[j][val0OpIdx + 1];
  3554. Value* vals32[4];
  3555. EltTy = Ty->getScalarType();
  3556. Split64bitValForStore(EltTy, { V0, V1 }, size, vals32, OP, Builder);
  3557. // Fill the uninit vals.
  3558. if (size == 1) {
  3559. vals32[2] = vals32[0];
  3560. vals32[3] = vals32[1];
  3561. }
  3562. // Change valOp to 32 version.
  3563. for (unsigned i = 0; i < 4; i++) {
  3564. storeArgsList[j][val0OpIdx + i] = vals32[i];
  3565. }
  3566. // change mask for double
  3567. if (opcode == DXIL::OpCode::RawBufferStore) {
  3568. mask = size == 1 ?
  3569. DXIL::kCompMask_X | DXIL::kCompMask_Y : DXIL::kCompMask_All;
  3570. }
  3571. }
  3572. storeArgsList[j].emplace_back(OP->GetU8Const(mask)); // mask
  3573. if (opcode == DXIL::OpCode::RawBufferStore)
  3574. storeArgsList[j].emplace_back(Alignment); // alignment only for raw buffer
  3575. Builder.CreateCall(F, storeArgsList[j]);
  3576. }
  3577. }
  3578. Value *TranslateResourceStore(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  3579. HLOperationLowerHelper &helper,
  3580. HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3581. hlsl::OP *hlslOP = &helper.hlslOP;
  3582. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  3583. IRBuilder<> Builder(CI);
  3584. DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
  3585. Value *val = CI->getArgOperand(HLOperandIndex::kStoreValOpIdx);
  3586. Value *offset = CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx);
  3587. TranslateStore(RK, handle, val, offset, Builder, hlslOP);
  3588. return nullptr;
  3589. }
  3590. }
  3591. // Atomic intrinsics.
  3592. namespace {
  3593. // Atomic intrinsics.
  3594. struct AtomicHelper {
  3595. AtomicHelper(CallInst *CI, OP::OpCode op, Value *h, Type *opType=nullptr);
  3596. AtomicHelper(CallInst *CI, OP::OpCode op, Value *h, Value *bufIdx,
  3597. Value *baseOffset, Type *opType=nullptr);
  3598. OP::OpCode opcode;
  3599. Value *handle;
  3600. Value *addr;
  3601. Value *offset; // Offset for structrued buffer.
  3602. Value *value;
  3603. Value *originalValue;
  3604. Value *compareValue;
  3605. Type *operationType;
  3606. };
  3607. // For MOP version of Interlocked*.
  3608. AtomicHelper::AtomicHelper(CallInst *CI, OP::OpCode op, Value *h, Type *opType)
  3609. : opcode(op), handle(h), offset(nullptr), originalValue(nullptr),
  3610. operationType(opType) {
  3611. addr = CI->getArgOperand(HLOperandIndex::kObjectInterlockedDestOpIndex);
  3612. if (op == OP::OpCode::AtomicCompareExchange) {
  3613. compareValue = CI->getArgOperand(
  3614. HLOperandIndex::kObjectInterlockedCmpCompareValueOpIndex);
  3615. value =
  3616. CI->getArgOperand(HLOperandIndex::kObjectInterlockedCmpValueOpIndex);
  3617. if (CI->getNumArgOperands() ==
  3618. (HLOperandIndex::kObjectInterlockedCmpOriginalValueOpIndex + 1))
  3619. originalValue = CI->getArgOperand(
  3620. HLOperandIndex::kObjectInterlockedCmpOriginalValueOpIndex);
  3621. } else {
  3622. value = CI->getArgOperand(HLOperandIndex::kObjectInterlockedValueOpIndex);
  3623. if (CI->getNumArgOperands() ==
  3624. (HLOperandIndex::kObjectInterlockedOriginalValueOpIndex + 1))
  3625. originalValue = CI->getArgOperand(
  3626. HLOperandIndex::kObjectInterlockedOriginalValueOpIndex);
  3627. }
  3628. if (nullptr == operationType)
  3629. operationType = value->getType();
  3630. }
  3631. // For IOP version of Interlocked*.
  3632. AtomicHelper::AtomicHelper(CallInst *CI, OP::OpCode op, Value *h, Value *bufIdx,
  3633. Value *baseOffset, Type *opType)
  3634. : opcode(op), handle(h), addr(bufIdx),
  3635. offset(baseOffset), originalValue(nullptr),
  3636. operationType(opType) {
  3637. if (op == OP::OpCode::AtomicCompareExchange) {
  3638. compareValue =
  3639. CI->getArgOperand(HLOperandIndex::kInterlockedCmpCompareValueOpIndex);
  3640. value = CI->getArgOperand(HLOperandIndex::kInterlockedCmpValueOpIndex);
  3641. if (CI->getNumArgOperands() ==
  3642. (HLOperandIndex::kInterlockedCmpOriginalValueOpIndex + 1))
  3643. originalValue = CI->getArgOperand(
  3644. HLOperandIndex::kInterlockedCmpOriginalValueOpIndex);
  3645. } else {
  3646. value = CI->getArgOperand(HLOperandIndex::kInterlockedValueOpIndex);
  3647. if (CI->getNumArgOperands() ==
  3648. (HLOperandIndex::kInterlockedOriginalValueOpIndex + 1))
  3649. originalValue =
  3650. CI->getArgOperand(HLOperandIndex::kInterlockedOriginalValueOpIndex);
  3651. }
  3652. if (nullptr == operationType)
  3653. operationType = value->getType();
  3654. }
  3655. void TranslateAtomicBinaryOperation(AtomicHelper &helper,
  3656. DXIL::AtomicBinOpCode atomicOp,
  3657. IRBuilder<> &Builder, hlsl::OP *hlslOP) {
  3658. Value *handle = helper.handle;
  3659. Value *addr = helper.addr;
  3660. Value *val = helper.value;
  3661. Type *Ty = helper.operationType;
  3662. Type *valTy = val->getType();
  3663. Value *undefI = UndefValue::get(Type::getInt32Ty(Ty->getContext()));
  3664. Function *dxilAtomic = hlslOP->GetOpFunc(helper.opcode, Ty->getScalarType());
  3665. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(helper.opcode));
  3666. Value *atomicOpArg = hlslOP->GetU32Const(static_cast<unsigned>(atomicOp));
  3667. if (Ty != valTy)
  3668. val = Builder.CreateBitCast(val, Ty);
  3669. Value *args[] = {opArg, handle, atomicOpArg,
  3670. undefI, undefI, undefI, // coordinates
  3671. val};
  3672. // Setup coordinates.
  3673. if (addr->getType()->isVectorTy()) {
  3674. unsigned vectorNumElements = addr->getType()->getVectorNumElements();
  3675. DXASSERT(vectorNumElements <= 3, "up to 3 elements for atomic binary op");
  3676. _Analysis_assume_(vectorNumElements <= 3);
  3677. for (unsigned i = 0; i < vectorNumElements; i++) {
  3678. Value *Elt = Builder.CreateExtractElement(addr, i);
  3679. args[DXIL::OperandIndex::kAtomicBinOpCoord0OpIdx + i] = Elt;
  3680. }
  3681. } else
  3682. args[DXIL::OperandIndex::kAtomicBinOpCoord0OpIdx] = addr;
  3683. // Set offset for structured buffer.
  3684. if (helper.offset)
  3685. args[DXIL::OperandIndex::kAtomicBinOpCoord1OpIdx] = helper.offset;
  3686. Value *origVal =
  3687. Builder.CreateCall(dxilAtomic, args, hlslOP->GetAtomicOpName(atomicOp));
  3688. if (helper.originalValue) {
  3689. if (Ty != valTy)
  3690. origVal = Builder.CreateBitCast(origVal, valTy);
  3691. Builder.CreateStore(origVal, helper.originalValue);
  3692. }
  3693. }
  3694. Value *TranslateMopAtomicBinaryOperation(CallInst *CI, IntrinsicOp IOP,
  3695. OP::OpCode opcode,
  3696. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3697. hlsl::OP *hlslOP = &helper.hlslOP;
  3698. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  3699. IRBuilder<> Builder(CI);
  3700. switch (IOP) {
  3701. case IntrinsicOp::MOP_InterlockedAdd:
  3702. case IntrinsicOp::MOP_InterlockedAdd64: {
  3703. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3704. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Add, Builder,
  3705. hlslOP);
  3706. } break;
  3707. case IntrinsicOp::MOP_InterlockedAnd:
  3708. case IntrinsicOp::MOP_InterlockedAnd64: {
  3709. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3710. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::And, Builder,
  3711. hlslOP);
  3712. } break;
  3713. case IntrinsicOp::MOP_InterlockedExchange:
  3714. case IntrinsicOp::MOP_InterlockedExchange64: {
  3715. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3716. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Exchange,
  3717. Builder, hlslOP);
  3718. } break;
  3719. case IntrinsicOp::MOP_InterlockedExchangeFloat: {
  3720. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle, Type::getInt32Ty(CI->getContext()));
  3721. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Exchange,
  3722. Builder, hlslOP);
  3723. } break;
  3724. case IntrinsicOp::MOP_InterlockedMax:
  3725. case IntrinsicOp::MOP_InterlockedMax64: {
  3726. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3727. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMax, Builder,
  3728. hlslOP);
  3729. } break;
  3730. case IntrinsicOp::MOP_InterlockedMin:
  3731. case IntrinsicOp::MOP_InterlockedMin64: {
  3732. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3733. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMin, Builder,
  3734. hlslOP);
  3735. } break;
  3736. case IntrinsicOp::MOP_InterlockedUMax: {
  3737. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3738. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMax, Builder,
  3739. hlslOP);
  3740. } break;
  3741. case IntrinsicOp::MOP_InterlockedUMin: {
  3742. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3743. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMin, Builder,
  3744. hlslOP);
  3745. } break;
  3746. case IntrinsicOp::MOP_InterlockedOr:
  3747. case IntrinsicOp::MOP_InterlockedOr64: {
  3748. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3749. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Or, Builder,
  3750. hlslOP);
  3751. } break;
  3752. case IntrinsicOp::MOP_InterlockedXor:
  3753. case IntrinsicOp::MOP_InterlockedXor64:
  3754. default: {
  3755. DXASSERT(IOP == IntrinsicOp::MOP_InterlockedXor || IOP == IntrinsicOp::MOP_InterlockedXor64,
  3756. "invalid MOP atomic intrinsic");
  3757. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3758. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Xor, Builder,
  3759. hlslOP);
  3760. } break;
  3761. }
  3762. return nullptr;
  3763. }
  3764. void TranslateAtomicCmpXChg(AtomicHelper &helper, IRBuilder<> &Builder,
  3765. hlsl::OP *hlslOP) {
  3766. Value *handle = helper.handle;
  3767. Value *addr = helper.addr;
  3768. Value *val = helper.value;
  3769. Value *cmpVal = helper.compareValue;
  3770. Type *Ty = helper.operationType;
  3771. Type *valTy = val->getType();
  3772. Value *undefI = UndefValue::get(Type::getInt32Ty(Ty->getContext()));
  3773. Function *dxilAtomic = hlslOP->GetOpFunc(helper.opcode, Ty->getScalarType());
  3774. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(helper.opcode));
  3775. if (Ty != valTy) {
  3776. val = Builder.CreateBitCast(val, Ty);
  3777. if (cmpVal)
  3778. cmpVal = Builder.CreateBitCast(cmpVal, Ty);
  3779. }
  3780. Value *args[] = {opArg, handle, undefI, undefI, undefI, // coordinates
  3781. cmpVal, val};
  3782. // Setup coordinates.
  3783. if (addr->getType()->isVectorTy()) {
  3784. unsigned vectorNumElements = addr->getType()->getVectorNumElements();
  3785. DXASSERT(vectorNumElements <= 3, "up to 3 elements in atomic op");
  3786. _Analysis_assume_(vectorNumElements <= 3);
  3787. for (unsigned i = 0; i < vectorNumElements; i++) {
  3788. Value *Elt = Builder.CreateExtractElement(addr, i);
  3789. args[DXIL::OperandIndex::kAtomicCmpExchangeCoord0OpIdx + i] = Elt;
  3790. }
  3791. } else
  3792. args[DXIL::OperandIndex::kAtomicCmpExchangeCoord0OpIdx] = addr;
  3793. // Set offset for structured buffer.
  3794. if (helper.offset)
  3795. args[DXIL::OperandIndex::kAtomicCmpExchangeCoord1OpIdx] = helper.offset;
  3796. Value *origVal = Builder.CreateCall(dxilAtomic, args);
  3797. if (helper.originalValue) {
  3798. if (Ty != valTy)
  3799. origVal = Builder.CreateBitCast(origVal, valTy);
  3800. Builder.CreateStore(origVal, helper.originalValue);
  3801. }
  3802. }
  3803. Value *TranslateMopAtomicCmpXChg(CallInst *CI, IntrinsicOp IOP,
  3804. OP::OpCode opcode,
  3805. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3806. hlsl::OP *hlslOP = &helper.hlslOP;
  3807. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  3808. IRBuilder<> Builder(CI);
  3809. Type *opType = nullptr;
  3810. if (IOP == IntrinsicOp::MOP_InterlockedCompareStoreFloatBitwise ||
  3811. IOP == IntrinsicOp::MOP_InterlockedCompareExchangeFloatBitwise)
  3812. opType = Type::getInt32Ty(CI->getContext());
  3813. AtomicHelper atomicHelper(CI, OP::OpCode::AtomicCompareExchange, handle, opType);
  3814. TranslateAtomicCmpXChg(atomicHelper, Builder, hlslOP);
  3815. return nullptr;
  3816. }
  3817. void TranslateSharedMemAtomicBinOp(CallInst *CI, IntrinsicOp IOP, Value *addr) {
  3818. AtomicRMWInst::BinOp Op;
  3819. IRBuilder<> Builder(CI);
  3820. Value *val = CI->getArgOperand(HLOperandIndex::kInterlockedValueOpIndex);
  3821. PointerType *ptrType = dyn_cast<PointerType>(
  3822. CI->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex)->getType());
  3823. bool needCast = ptrType && ptrType->getElementType()->isFloatTy();
  3824. switch (IOP) {
  3825. case IntrinsicOp::IOP_InterlockedAdd:
  3826. Op = AtomicRMWInst::BinOp::Add;
  3827. break;
  3828. case IntrinsicOp::IOP_InterlockedAnd:
  3829. Op = AtomicRMWInst::BinOp::And;
  3830. break;
  3831. case IntrinsicOp::IOP_InterlockedExchange:
  3832. if (needCast) {
  3833. val = Builder.CreateBitCast(val, Type::getInt32Ty(CI->getContext()));
  3834. addr = Builder.CreateBitCast(addr, Type::getInt32PtrTy(CI->getContext(), DXIL::kTGSMAddrSpace));
  3835. }
  3836. Op = AtomicRMWInst::BinOp::Xchg;
  3837. break;
  3838. case IntrinsicOp::IOP_InterlockedMax:
  3839. Op = AtomicRMWInst::BinOp::Max;
  3840. break;
  3841. case IntrinsicOp::IOP_InterlockedUMax:
  3842. Op = AtomicRMWInst::BinOp::UMax;
  3843. break;
  3844. case IntrinsicOp::IOP_InterlockedMin:
  3845. Op = AtomicRMWInst::BinOp::Min;
  3846. break;
  3847. case IntrinsicOp::IOP_InterlockedUMin:
  3848. Op = AtomicRMWInst::BinOp::UMin;
  3849. break;
  3850. case IntrinsicOp::IOP_InterlockedOr:
  3851. Op = AtomicRMWInst::BinOp::Or;
  3852. break;
  3853. case IntrinsicOp::IOP_InterlockedXor:
  3854. default:
  3855. DXASSERT(IOP == IntrinsicOp::IOP_InterlockedXor, "Invalid Intrinsic");
  3856. Op = AtomicRMWInst::BinOp::Xor;
  3857. break;
  3858. }
  3859. Value *Result = Builder.CreateAtomicRMW(
  3860. Op, addr, val, AtomicOrdering::SequentiallyConsistent);
  3861. if (CI->getNumArgOperands() >
  3862. HLOperandIndex::kInterlockedOriginalValueOpIndex) {
  3863. if (needCast)
  3864. Result = Builder.CreateBitCast(Result, Type::getFloatTy(CI->getContext()));
  3865. Builder.CreateStore(
  3866. Result,
  3867. CI->getArgOperand(HLOperandIndex::kInterlockedOriginalValueOpIndex));
  3868. }
  3869. }
  3870. static Value* SkipAddrSpaceCast(Value* Ptr) {
  3871. if (AddrSpaceCastInst *CastInst = dyn_cast<AddrSpaceCastInst>(Ptr))
  3872. return CastInst->getOperand(0);
  3873. else if (ConstantExpr *ConstExpr = dyn_cast<ConstantExpr>(Ptr)) {
  3874. if (ConstExpr->getOpcode() == Instruction::AddrSpaceCast) {
  3875. return ConstExpr->getOperand(0);
  3876. }
  3877. }
  3878. return Ptr;
  3879. }
  3880. Value *TranslateIopAtomicBinaryOperation(CallInst *CI, IntrinsicOp IOP,
  3881. DXIL::OpCode opcode,
  3882. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3883. Value *addr = CI->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex);
  3884. addr = SkipAddrSpaceCast(addr);
  3885. unsigned addressSpace = addr->getType()->getPointerAddressSpace();
  3886. if (addressSpace == DXIL::kTGSMAddrSpace)
  3887. TranslateSharedMemAtomicBinOp(CI, IOP, addr);
  3888. else {
  3889. // buffer atomic translated in TranslateSubscript.
  3890. // Do nothing here.
  3891. // Mark not translated.
  3892. Translated = false;
  3893. }
  3894. return nullptr;
  3895. }
  3896. void TranslateSharedMemAtomicCmpXChg(CallInst *CI, Value *addr) {
  3897. Value *val = CI->getArgOperand(HLOperandIndex::kInterlockedCmpValueOpIndex);
  3898. Value *cmpVal =
  3899. CI->getArgOperand(HLOperandIndex::kInterlockedCmpCompareValueOpIndex);
  3900. IRBuilder<> Builder(CI);
  3901. PointerType *ptrType = dyn_cast<PointerType>(
  3902. CI->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex)->getType());
  3903. bool needCast = false;
  3904. if (ptrType && ptrType->getElementType()->isFloatTy()) {
  3905. needCast = true;
  3906. val = Builder.CreateBitCast(val, Type::getInt32Ty(CI->getContext()));
  3907. cmpVal = Builder.CreateBitCast(cmpVal, Type::getInt32Ty(CI->getContext()));
  3908. addr = Builder.CreateBitCast(addr, Type::getInt32PtrTy(CI->getContext(), DXIL::kTGSMAddrSpace));
  3909. }
  3910. Value *Result = Builder.CreateAtomicCmpXchg(
  3911. addr, cmpVal, val, AtomicOrdering::SequentiallyConsistent,
  3912. AtomicOrdering::SequentiallyConsistent);
  3913. if (CI->getNumArgOperands() >
  3914. HLOperandIndex::kInterlockedCmpOriginalValueOpIndex) {
  3915. Value *originVal = Builder.CreateExtractValue(Result, 0);
  3916. if (needCast)
  3917. originVal = Builder.CreateBitCast(originVal, Type::getFloatTy(CI->getContext()));
  3918. Builder.CreateStore(
  3919. originVal,
  3920. CI->getArgOperand(HLOperandIndex::kInterlockedCmpOriginalValueOpIndex));
  3921. }
  3922. }
  3923. Value *TranslateIopAtomicCmpXChg(CallInst *CI, IntrinsicOp IOP,
  3924. DXIL::OpCode opcode,
  3925. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3926. Value *addr = CI->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex);
  3927. addr = SkipAddrSpaceCast(addr);
  3928. unsigned addressSpace = addr->getType()->getPointerAddressSpace();
  3929. if (addressSpace == DXIL::kTGSMAddrSpace)
  3930. TranslateSharedMemAtomicCmpXChg(CI, addr);
  3931. else {
  3932. // buffer atomic translated in TranslateSubscript.
  3933. // Do nothing here.
  3934. // Mark not translated.
  3935. Translated = false;
  3936. }
  3937. return nullptr;
  3938. }
  3939. }
  3940. // Process Tess Factor.
  3941. namespace {
  3942. // Clamp to [0.0f..1.0f], NaN->0.0f.
  3943. Value *CleanupTessFactorScale(Value *input, hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  3944. float fMin = 0;
  3945. float fMax = 1;
  3946. Type *f32Ty = input->getType()->getScalarType();
  3947. Value *minFactor = ConstantFP::get(f32Ty, fMin);
  3948. Value *maxFactor = ConstantFP::get(f32Ty, fMax);
  3949. Type *Ty = input->getType();
  3950. if (Ty->isVectorTy())
  3951. minFactor = SplatToVector(minFactor, input->getType(), Builder);
  3952. Value *temp = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, input, minFactor, hlslOP, Builder);
  3953. if (Ty->isVectorTy())
  3954. maxFactor = SplatToVector(maxFactor, input->getType(), Builder);
  3955. return TrivialDxilBinaryOperation(DXIL::OpCode::FMin, temp, maxFactor, hlslOP, Builder);
  3956. }
  3957. // Clamp to [1.0f..Inf], NaN->1.0f.
  3958. Value *CleanupTessFactor(Value *input, hlsl::OP *hlslOP, IRBuilder<> &Builder)
  3959. {
  3960. float fMin = 1.0;
  3961. Type *f32Ty = input->getType()->getScalarType();
  3962. Value *minFactor = ConstantFP::get(f32Ty, fMin);
  3963. minFactor = SplatToVector(minFactor, input->getType(), Builder);
  3964. return TrivialDxilBinaryOperation(DXIL::OpCode::FMax, input, minFactor, hlslOP, Builder);
  3965. }
  3966. // Do partitioning-specific clamping.
  3967. Value *ClampTessFactor(Value *input, DXIL::TessellatorPartitioning partitionMode,
  3968. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  3969. const unsigned kTESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR = 64;
  3970. const unsigned kTESSELLATOR_MAX_ODD_TESSELLATION_FACTOR = 63;
  3971. const unsigned kTESSELLATOR_MIN_EVEN_TESSELLATION_FACTOR = 2;
  3972. const unsigned kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR = 1;
  3973. const unsigned kTESSELLATOR_MAX_TESSELLATION_FACTOR = 64;
  3974. float fMin;
  3975. float fMax;
  3976. switch (partitionMode) {
  3977. case DXIL::TessellatorPartitioning::Integer:
  3978. fMin = kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR;
  3979. fMax = kTESSELLATOR_MAX_TESSELLATION_FACTOR;
  3980. break;
  3981. case DXIL::TessellatorPartitioning::Pow2:
  3982. fMin = kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR;
  3983. fMax = kTESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR;
  3984. break;
  3985. case DXIL::TessellatorPartitioning::FractionalOdd:
  3986. fMin = kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR;
  3987. fMax = kTESSELLATOR_MAX_ODD_TESSELLATION_FACTOR;
  3988. break;
  3989. case DXIL::TessellatorPartitioning::FractionalEven:
  3990. default:
  3991. DXASSERT(partitionMode == DXIL::TessellatorPartitioning::FractionalEven,
  3992. "invalid partition mode");
  3993. fMin = kTESSELLATOR_MIN_EVEN_TESSELLATION_FACTOR;
  3994. fMax = kTESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR;
  3995. break;
  3996. }
  3997. Type *f32Ty = input->getType()->getScalarType();
  3998. Value *minFactor = ConstantFP::get(f32Ty, fMin);
  3999. Value *maxFactor = ConstantFP::get(f32Ty, fMax);
  4000. Type *Ty = input->getType();
  4001. if (Ty->isVectorTy())
  4002. minFactor = SplatToVector(minFactor, input->getType(), Builder);
  4003. Value *temp = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, input, minFactor, hlslOP, Builder);
  4004. if (Ty->isVectorTy())
  4005. maxFactor = SplatToVector(maxFactor, input->getType(), Builder);
  4006. return TrivialDxilBinaryOperation(DXIL::OpCode::FMin, temp, maxFactor, hlslOP, Builder);
  4007. }
  4008. // round up for integer/pow2 partitioning
  4009. // note that this code assumes the inputs should be in the range [1, inf),
  4010. // which should be enforced by the clamp above.
  4011. Value *RoundUpTessFactor(Value *input, DXIL::TessellatorPartitioning partitionMode,
  4012. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  4013. switch (partitionMode) {
  4014. case DXIL::TessellatorPartitioning::Integer:
  4015. return TrivialDxilUnaryOperation(DXIL::OpCode::Round_pi, input, hlslOP, Builder);
  4016. case DXIL::TessellatorPartitioning::Pow2: {
  4017. const unsigned kExponentMask = 0x7f800000;
  4018. const unsigned kExponentLSB = 0x00800000;
  4019. const unsigned kMantissaMask = 0x007fffff;
  4020. Type *Ty = input->getType();
  4021. // (val = (asuint(val) & mantissamask) ?
  4022. // (asuint(val) & exponentmask) + exponentbump :
  4023. // asuint(val) & exponentmask;
  4024. Type *uintTy = Type::getInt32Ty(Ty->getContext());
  4025. if (Ty->isVectorTy())
  4026. uintTy = VectorType::get(uintTy, Ty->getVectorNumElements());
  4027. Value *uintVal = Builder.CreateCast(Instruction::CastOps::FPToUI, input, uintTy);
  4028. Value *mantMask = ConstantInt::get(uintTy->getScalarType(), kMantissaMask);
  4029. mantMask = SplatToVector(mantMask, uintTy, Builder);
  4030. Value *manVal = Builder.CreateAnd(uintVal, mantMask);
  4031. Value *expMask = ConstantInt::get(uintTy->getScalarType(), kExponentMask);
  4032. expMask = SplatToVector(expMask, uintTy, Builder);
  4033. Value *expVal = Builder.CreateAnd(uintVal, expMask);
  4034. Value *expLSB = ConstantInt::get(uintTy->getScalarType(), kExponentLSB);
  4035. expLSB = SplatToVector(expLSB, uintTy, Builder);
  4036. Value *newExpVal = Builder.CreateAdd(expVal, expLSB);
  4037. Value *manValNotZero = Builder.CreateICmpEQ(manVal, ConstantAggregateZero::get(uintTy));
  4038. Value *factors = Builder.CreateSelect(manValNotZero, newExpVal, expVal);
  4039. return Builder.CreateUIToFP(factors, Ty);
  4040. } break;
  4041. case DXIL::TessellatorPartitioning::FractionalEven:
  4042. case DXIL::TessellatorPartitioning::FractionalOdd:
  4043. return input;
  4044. default:
  4045. DXASSERT(0, "invalid partition mode");
  4046. return nullptr;
  4047. }
  4048. }
  4049. Value *TranslateProcessIsolineTessFactors(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4050. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  4051. hlsl::OP *hlslOP = &helper.hlslOP;
  4052. // Get partition mode
  4053. DXASSERT_NOMSG(helper.functionProps);
  4054. DXASSERT(helper.functionProps->shaderKind == ShaderModel::Kind::Hull, "must be hull shader");
  4055. DXIL::TessellatorPartitioning partition = helper.functionProps->ShaderProps.HS.partition;
  4056. IRBuilder<> Builder(CI);
  4057. Value *rawDetailFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRawDetailFactor);
  4058. rawDetailFactor = Builder.CreateExtractElement(rawDetailFactor, (uint64_t)0);
  4059. Value *rawDensityFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRawDensityFactor);
  4060. rawDensityFactor = Builder.CreateExtractElement(rawDensityFactor, (uint64_t)0);
  4061. Value *init = UndefValue::get(VectorType::get(helper.f32Ty, 2));
  4062. init = Builder.CreateInsertElement(init, rawDetailFactor, (uint64_t)0);
  4063. init = Builder.CreateInsertElement(init, rawDetailFactor, (uint64_t)1);
  4064. Value *clamped = ClampTessFactor(init, partition, hlslOP, Builder);
  4065. Value *rounded = RoundUpTessFactor(clamped, partition, hlslOP, Builder);
  4066. Value *roundedDetailFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedDetailFactor);
  4067. Value *temp = UndefValue::get(VectorType::get(helper.f32Ty, 1));
  4068. Value *roundedX = Builder.CreateExtractElement(rounded, (uint64_t)0);
  4069. temp = Builder.CreateInsertElement(temp, roundedX, (uint64_t)0);
  4070. Builder.CreateStore(temp, roundedDetailFactor);
  4071. Value *roundedDensityFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedDensityFactor);
  4072. Value *roundedY = Builder.CreateExtractElement(rounded, 1);
  4073. temp = Builder.CreateInsertElement(temp, roundedY, (uint64_t)0);
  4074. Builder.CreateStore(temp, roundedDensityFactor);
  4075. return nullptr;
  4076. }
  4077. // 3 inputs, 1 result
  4078. Value *ApplyTriTessFactorOp(Value *input, DXIL::OpCode opcode, hlsl::OP *hlslOP,
  4079. IRBuilder<> &Builder) {
  4080. Value *input0 = Builder.CreateExtractElement(input, (uint64_t)0);
  4081. Value *input1 = Builder.CreateExtractElement(input, 1);
  4082. Value *input2 = Builder.CreateExtractElement(input, 2);
  4083. if (opcode == DXIL::OpCode::FMax || opcode == DXIL::OpCode::FMin) {
  4084. Value *temp =
  4085. TrivialDxilBinaryOperation(opcode, input0, input1, hlslOP, Builder);
  4086. Value *combined =
  4087. TrivialDxilBinaryOperation(opcode, temp, input2, hlslOP, Builder);
  4088. return combined;
  4089. } else {
  4090. // Avg.
  4091. Value *temp = Builder.CreateFAdd(input0, input1);
  4092. Value *combined = Builder.CreateFAdd(temp, input2);
  4093. Value *rcp = ConstantFP::get(input0->getType(), 1.0 / 3.0);
  4094. combined = Builder.CreateFMul(combined, rcp);
  4095. return combined;
  4096. }
  4097. }
  4098. // 4 inputs, 1 result
  4099. Value *ApplyQuadTessFactorOp(Value *input, DXIL::OpCode opcode,
  4100. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  4101. Value *input0 = Builder.CreateExtractElement(input, (uint64_t)0);
  4102. Value *input1 = Builder.CreateExtractElement(input, 1);
  4103. Value *input2 = Builder.CreateExtractElement(input, 2);
  4104. Value *input3 = Builder.CreateExtractElement(input, 3);
  4105. if (opcode == DXIL::OpCode::FMax || opcode == DXIL::OpCode::FMin) {
  4106. Value *temp0 =
  4107. TrivialDxilBinaryOperation(opcode, input0, input1, hlslOP, Builder);
  4108. Value *temp1 =
  4109. TrivialDxilBinaryOperation(opcode, input2, input3, hlslOP, Builder);
  4110. Value *combined =
  4111. TrivialDxilBinaryOperation(opcode, temp0, temp1, hlslOP, Builder);
  4112. return combined;
  4113. } else {
  4114. // Avg.
  4115. Value *temp0 = Builder.CreateFAdd(input0, input1);
  4116. Value *temp1 = Builder.CreateFAdd(input2, input3);
  4117. Value *combined = Builder.CreateFAdd(temp0, temp1);
  4118. Value *rcp = ConstantFP::get(input0->getType(), 0.25);
  4119. combined = Builder.CreateFMul(combined, rcp);
  4120. return combined;
  4121. }
  4122. }
  4123. // 4 inputs, 2 result
  4124. Value *Apply2DQuadTessFactorOp(Value *input, DXIL::OpCode opcode,
  4125. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  4126. Value *input0 = Builder.CreateExtractElement(input, (uint64_t)0);
  4127. Value *input1 = Builder.CreateExtractElement(input, 1);
  4128. Value *input2 = Builder.CreateExtractElement(input, 2);
  4129. Value *input3 = Builder.CreateExtractElement(input, 3);
  4130. if (opcode == DXIL::OpCode::FMax || opcode == DXIL::OpCode::FMin) {
  4131. Value *temp0 =
  4132. TrivialDxilBinaryOperation(opcode, input0, input1, hlslOP, Builder);
  4133. Value *temp1 =
  4134. TrivialDxilBinaryOperation(opcode, input2, input3, hlslOP, Builder);
  4135. Value *combined = UndefValue::get(VectorType::get(input0->getType(), 2));
  4136. combined = Builder.CreateInsertElement(combined, temp0, (uint64_t)0);
  4137. combined = Builder.CreateInsertElement(combined, temp1, 1);
  4138. return combined;
  4139. } else {
  4140. // Avg.
  4141. Value *temp0 = Builder.CreateFAdd(input0, input1);
  4142. Value *temp1 = Builder.CreateFAdd(input2, input3);
  4143. Value *combined = UndefValue::get(VectorType::get(input0->getType(), 2));
  4144. combined = Builder.CreateInsertElement(combined, temp0, (uint64_t)0);
  4145. combined = Builder.CreateInsertElement(combined, temp1, 1);
  4146. Constant *rcp = ConstantFP::get(input0->getType(), 0.5);
  4147. rcp = ConstantVector::getSplat(2, rcp);
  4148. combined = Builder.CreateFMul(combined, rcp);
  4149. return combined;
  4150. }
  4151. }
  4152. Value *ResolveSmallValue(Value **pClampedResult, Value *rounded, Value *averageUnscaled,
  4153. float cutoffVal, DXIL::TessellatorPartitioning partitionMode, hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  4154. Value *clampedResult = *pClampedResult;
  4155. Value *clampedVal = clampedResult;
  4156. Value *roundedVal = rounded;
  4157. // Do partitioning-specific clamping.
  4158. Value *clampedAvg = ClampTessFactor(averageUnscaled, partitionMode, hlslOP, Builder);
  4159. Constant *cutoffVals = ConstantFP::get(Type::getFloatTy(rounded->getContext()), cutoffVal);
  4160. if (clampedAvg->getType()->isVectorTy())
  4161. cutoffVals = ConstantVector::getSplat(clampedAvg->getType()->getVectorNumElements(), cutoffVals);
  4162. // Limit the value.
  4163. clampedAvg = TrivialDxilBinaryOperation(DXIL::OpCode::FMin, clampedAvg, cutoffVals, hlslOP, Builder);
  4164. // Round up for integer/pow2 partitioning.
  4165. Value *roundedAvg = RoundUpTessFactor(clampedAvg, partitionMode, hlslOP, Builder);
  4166. if (rounded->getType() != cutoffVals->getType())
  4167. cutoffVals = ConstantVector::getSplat(rounded->getType()->getVectorNumElements(), cutoffVals);
  4168. // If the scaled value is less than three, then take the unscaled average.
  4169. Value *lt = Builder.CreateFCmpOLT(rounded, cutoffVals);
  4170. if (clampedAvg->getType() != clampedVal->getType())
  4171. clampedAvg = SplatToVector(clampedAvg, clampedVal->getType(), Builder);
  4172. *pClampedResult = Builder.CreateSelect(lt, clampedAvg, clampedVal);
  4173. if (roundedAvg->getType() != roundedVal->getType())
  4174. roundedAvg = SplatToVector(roundedAvg, roundedVal->getType(), Builder);
  4175. Value *result = Builder.CreateSelect(lt, roundedAvg, roundedVal);
  4176. return result;
  4177. }
  4178. void ResolveQuadAxes( Value **pFinalResult, Value **pClampedResult,
  4179. float cutoffVal, DXIL::TessellatorPartitioning partitionMode, hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  4180. Value *finalResult = *pFinalResult;
  4181. Value *clampedResult = *pClampedResult;
  4182. Value *clampR = clampedResult;
  4183. Value *finalR = finalResult;
  4184. Type *f32Ty = Type::getFloatTy(finalR->getContext());
  4185. Constant *cutoffVals = ConstantFP::get(f32Ty, cutoffVal);
  4186. Value *minValsX = cutoffVals;
  4187. Value *minValsY = RoundUpTessFactor(cutoffVals, partitionMode, hlslOP, Builder);
  4188. Value *clampRX = Builder.CreateExtractElement(clampR, (uint64_t)0);
  4189. Value *clampRY = Builder.CreateExtractElement(clampR, 1);
  4190. Value *maxValsX = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, clampRX, clampRY, hlslOP, Builder);
  4191. Value *finalRX = Builder.CreateExtractElement(finalR, (uint64_t)0);
  4192. Value *finalRY = Builder.CreateExtractElement(finalR, 1);
  4193. Value *maxValsY = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, finalRX, finalRY, hlslOP, Builder);
  4194. // Don't go over our threshold ("final" one is rounded).
  4195. Value * optionX = TrivialDxilBinaryOperation(DXIL::OpCode::FMin, maxValsX, minValsX, hlslOP, Builder);
  4196. Value * optionY = TrivialDxilBinaryOperation(DXIL::OpCode::FMin, maxValsY, minValsY, hlslOP, Builder);
  4197. Value *clampL = SplatToVector(optionX, clampR->getType(), Builder);
  4198. Value *finalL = SplatToVector(optionY, finalR->getType(), Builder);
  4199. cutoffVals = ConstantVector::getSplat(2, cutoffVals);
  4200. Value *lt = Builder.CreateFCmpOLT(clampedResult, cutoffVals);
  4201. *pClampedResult = Builder.CreateSelect(lt, clampL, clampR);
  4202. *pFinalResult = Builder.CreateSelect(lt, finalL, finalR);
  4203. }
  4204. Value *TranslateProcessTessFactors(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4205. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  4206. hlsl::OP *hlslOP = &helper.hlslOP;
  4207. // Get partition mode
  4208. DXASSERT_NOMSG(helper.functionProps);
  4209. DXASSERT(helper.functionProps->shaderKind == ShaderModel::Kind::Hull, "must be hull shader");
  4210. DXIL::TessellatorPartitioning partition = helper.functionProps->ShaderProps.HS.partition;
  4211. IRBuilder<> Builder(CI);
  4212. DXIL::OpCode tessFactorOp = DXIL::OpCode::NumOpCodes;
  4213. switch (IOP) {
  4214. case IntrinsicOp::IOP_Process2DQuadTessFactorsMax:
  4215. case IntrinsicOp::IOP_ProcessQuadTessFactorsMax:
  4216. case IntrinsicOp::IOP_ProcessTriTessFactorsMax:
  4217. tessFactorOp = DXIL::OpCode::FMax;
  4218. break;
  4219. case IntrinsicOp::IOP_Process2DQuadTessFactorsMin:
  4220. case IntrinsicOp::IOP_ProcessQuadTessFactorsMin:
  4221. case IntrinsicOp::IOP_ProcessTriTessFactorsMin:
  4222. tessFactorOp = DXIL::OpCode::FMin;
  4223. break;
  4224. default:
  4225. // Default is Avg.
  4226. break;
  4227. }
  4228. Value *rawEdgeFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRawEdgeFactor);
  4229. Value *insideScale = CI->getArgOperand(HLOperandIndex::kProcessTessFactorInsideScale);
  4230. // Clamp to [0.0f..1.0f], NaN->0.0f.
  4231. Value *scales = CleanupTessFactorScale(insideScale, hlslOP, Builder);
  4232. // Do partitioning-specific clamping.
  4233. Value *clamped = ClampTessFactor(rawEdgeFactor, partition, hlslOP, Builder);
  4234. // Round up for integer/pow2 partitioning.
  4235. Value *rounded = RoundUpTessFactor(clamped, partition, hlslOP, Builder);
  4236. // Store the output.
  4237. Value *roundedEdgeFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedEdgeFactor);
  4238. Builder.CreateStore(rounded, roundedEdgeFactor);
  4239. // Clamp to [1.0f..Inf], NaN->1.0f.
  4240. bool isQuad = false;
  4241. Value *clean = CleanupTessFactor(rawEdgeFactor, hlslOP, Builder);
  4242. Value *factors = nullptr;
  4243. switch (IOP) {
  4244. case IntrinsicOp::IOP_Process2DQuadTessFactorsAvg:
  4245. case IntrinsicOp::IOP_Process2DQuadTessFactorsMax:
  4246. case IntrinsicOp::IOP_Process2DQuadTessFactorsMin:
  4247. factors = Apply2DQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
  4248. break;
  4249. case IntrinsicOp::IOP_ProcessQuadTessFactorsAvg:
  4250. case IntrinsicOp::IOP_ProcessQuadTessFactorsMax:
  4251. case IntrinsicOp::IOP_ProcessQuadTessFactorsMin:
  4252. factors = ApplyQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
  4253. isQuad = true;
  4254. break;
  4255. case IntrinsicOp::IOP_ProcessTriTessFactorsAvg:
  4256. case IntrinsicOp::IOP_ProcessTriTessFactorsMax:
  4257. case IntrinsicOp::IOP_ProcessTriTessFactorsMin:
  4258. factors = ApplyTriTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
  4259. break;
  4260. default:
  4261. DXASSERT(0, "invalid opcode for ProcessTessFactor");
  4262. break;
  4263. }
  4264. Value *scaledI = nullptr;
  4265. if (scales->getType() == factors->getType())
  4266. scaledI = Builder.CreateFMul(factors, scales);
  4267. else {
  4268. Value *vecFactors = SplatToVector(factors, scales->getType(), Builder);
  4269. scaledI = Builder.CreateFMul(vecFactors, scales);
  4270. }
  4271. // Do partitioning-specific clamping.
  4272. Value *clampedI = ClampTessFactor(scaledI, partition, hlslOP, Builder);
  4273. // Round up for integer/pow2 partitioning.
  4274. Value *roundedI = RoundUpTessFactor(clampedI, partition, hlslOP, Builder);
  4275. Value *finalI = roundedI;
  4276. if (partition == DXIL::TessellatorPartitioning::FractionalOdd) {
  4277. // If not max, set to AVG.
  4278. if (tessFactorOp != DXIL::OpCode::FMax)
  4279. tessFactorOp = DXIL::OpCode::NumOpCodes;
  4280. bool b2D = false;
  4281. Value *avgFactorsI = nullptr;
  4282. switch (IOP) {
  4283. case IntrinsicOp::IOP_Process2DQuadTessFactorsAvg:
  4284. case IntrinsicOp::IOP_Process2DQuadTessFactorsMax:
  4285. case IntrinsicOp::IOP_Process2DQuadTessFactorsMin:
  4286. avgFactorsI = Apply2DQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
  4287. b2D = true;
  4288. break;
  4289. case IntrinsicOp::IOP_ProcessQuadTessFactorsAvg:
  4290. case IntrinsicOp::IOP_ProcessQuadTessFactorsMax:
  4291. case IntrinsicOp::IOP_ProcessQuadTessFactorsMin:
  4292. avgFactorsI = ApplyQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
  4293. break;
  4294. case IntrinsicOp::IOP_ProcessTriTessFactorsAvg:
  4295. case IntrinsicOp::IOP_ProcessTriTessFactorsMax:
  4296. case IntrinsicOp::IOP_ProcessTriTessFactorsMin:
  4297. avgFactorsI = ApplyTriTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
  4298. break;
  4299. default:
  4300. DXASSERT(0, "invalid opcode for ProcessTessFactor");
  4301. break;
  4302. }
  4303. finalI =
  4304. ResolveSmallValue(/*inout*/&clampedI, roundedI, avgFactorsI, /*cufoff*/ 3.0,
  4305. partition, hlslOP, Builder);
  4306. if (b2D)
  4307. ResolveQuadAxes(/*inout*/&finalI, /*inout*/&clampedI, /*cutoff*/3.0, partition, hlslOP, Builder);
  4308. }
  4309. Value *unroundedInsideFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorUnRoundedInsideFactor);
  4310. Type *outFactorTy = unroundedInsideFactor->getType()->getPointerElementType();
  4311. if (outFactorTy != clampedI->getType()) {
  4312. DXASSERT(isQuad, "quad only write one channel of out factor");
  4313. (void)isQuad;
  4314. clampedI = Builder.CreateExtractElement(clampedI, (uint64_t)0);
  4315. // Splat clampedI to float2.
  4316. clampedI = SplatToVector(clampedI, outFactorTy, Builder);
  4317. }
  4318. Builder.CreateStore(clampedI, unroundedInsideFactor);
  4319. Value *roundedInsideFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedInsideFactor);
  4320. if (outFactorTy != finalI->getType()) {
  4321. DXASSERT(isQuad, "quad only write one channel of out factor");
  4322. finalI = Builder.CreateExtractElement(finalI, (uint64_t)0);
  4323. // Splat finalI to float2.
  4324. finalI = SplatToVector(finalI, outFactorTy, Builder);
  4325. }
  4326. Builder.CreateStore(finalI, roundedInsideFactor);
  4327. return nullptr;
  4328. }
  4329. }
  4330. // Ray Tracing.
  4331. namespace {
  4332. Value *TranslateReportIntersection(CallInst *CI, IntrinsicOp IOP,
  4333. OP::OpCode opcode,
  4334. HLOperationLowerHelper &helper,
  4335. HLObjectOperationLowerHelper *pObjHelper,
  4336. bool &Translated) {
  4337. hlsl::OP *hlslOP = &helper.hlslOP;
  4338. Value *THit = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  4339. Value *HitKind = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  4340. Value *Attr = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  4341. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
  4342. Type *Ty = Attr->getType();
  4343. Function *F = hlslOP->GetOpFunc(opcode, Ty);
  4344. IRBuilder<> Builder(CI);
  4345. return Builder.CreateCall(F, {opArg, THit, HitKind, Attr});
  4346. }
  4347. Value *TranslateCallShader(CallInst *CI, IntrinsicOp IOP,
  4348. OP::OpCode opcode,
  4349. HLOperationLowerHelper &helper,
  4350. HLObjectOperationLowerHelper *pObjHelper,
  4351. bool &Translated) {
  4352. hlsl::OP *hlslOP = &helper.hlslOP;
  4353. Value *ShaderIndex = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  4354. Value *Parameter = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  4355. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
  4356. Type *Ty = Parameter->getType();
  4357. Function *F = hlslOP->GetOpFunc(opcode, Ty);
  4358. IRBuilder<> Builder(CI);
  4359. return Builder.CreateCall(F, {opArg, ShaderIndex, Parameter});
  4360. }
  4361. Value *TranslateTraceRay(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4362. HLOperationLowerHelper &helper,
  4363. HLObjectOperationLowerHelper *pObjHelper,
  4364. bool &Translated) {
  4365. hlsl::OP *hlslOP = &helper.hlslOP;
  4366. Value *rayDesc = CI->getArgOperand(HLOperandIndex::kTraceRayRayDescOpIdx);
  4367. Value *payLoad = CI->getArgOperand(HLOperandIndex::kTraceRayPayLoadOpIdx);
  4368. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
  4369. Value *Args[DXIL::OperandIndex::kTraceRayNumOp];
  4370. Args[0] = opArg;
  4371. for (unsigned i = 1; i < HLOperandIndex::kTraceRayRayDescOpIdx; i++) {
  4372. Args[i] = CI->getArgOperand(i);
  4373. }
  4374. IRBuilder<> Builder(CI);
  4375. // struct RayDesc
  4376. //{
  4377. // float3 Origin;
  4378. // float TMin;
  4379. // float3 Direction;
  4380. // float TMax;
  4381. //};
  4382. Value *zeroIdx = hlslOP->GetU32Const(0);
  4383. Value *origin = Builder.CreateGEP(rayDesc, {zeroIdx, zeroIdx});
  4384. origin = Builder.CreateLoad(origin);
  4385. unsigned index = DXIL::OperandIndex::kTraceRayRayDescOpIdx;
  4386. Args[index++] = Builder.CreateExtractElement(origin, (uint64_t)0);
  4387. Args[index++] = Builder.CreateExtractElement(origin, 1);
  4388. Args[index++] = Builder.CreateExtractElement(origin, 2);
  4389. Value *tmin = Builder.CreateGEP(rayDesc, {zeroIdx, hlslOP->GetU32Const(1)});
  4390. tmin = Builder.CreateLoad(tmin);
  4391. Args[index++] = tmin;
  4392. Value *direction = Builder.CreateGEP(rayDesc, {zeroIdx, hlslOP->GetU32Const(2)});
  4393. direction = Builder.CreateLoad(direction);
  4394. Args[index++] = Builder.CreateExtractElement(direction, (uint64_t)0);
  4395. Args[index++] = Builder.CreateExtractElement(direction, 1);
  4396. Args[index++] = Builder.CreateExtractElement(direction, 2);
  4397. Value *tmax = Builder.CreateGEP(rayDesc, {zeroIdx, hlslOP->GetU32Const(3)});
  4398. tmax = Builder.CreateLoad(tmax);
  4399. Args[index++] = tmax;
  4400. Args[DXIL::OperandIndex::kTraceRayPayloadOpIdx] = payLoad;
  4401. Type *Ty = payLoad->getType();
  4402. Function *F = hlslOP->GetOpFunc(opcode, Ty);
  4403. return Builder.CreateCall(F, Args);
  4404. }
  4405. // RayQuery methods
  4406. Value *TranslateAllocateRayQuery(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4407. HLOperationLowerHelper &helper,
  4408. HLObjectOperationLowerHelper *pObjHelper,
  4409. bool &Translated) {
  4410. hlsl::OP *hlslOP = &helper.hlslOP;
  4411. Value *refArgs[] = {nullptr, CI->getOperand(1)};
  4412. return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP);
  4413. }
  4414. Value *TranslateTraceRayInline(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4415. HLOperationLowerHelper &helper,
  4416. HLObjectOperationLowerHelper *pObjHelper,
  4417. bool &Translated) {
  4418. hlsl::OP *hlslOP = &helper.hlslOP;
  4419. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
  4420. Value *Args[DXIL::OperandIndex::kTraceRayInlineNumOp];
  4421. Args[0] = opArg;
  4422. for (unsigned i = 1; i < HLOperandIndex::kTraceRayInlineRayDescOpIdx; i++) {
  4423. Args[i] = CI->getArgOperand(i);
  4424. }
  4425. IRBuilder<> Builder(CI);
  4426. unsigned hlIndex = HLOperandIndex::kTraceRayInlineRayDescOpIdx;
  4427. unsigned index = DXIL::OperandIndex::kTraceRayInlineRayDescOpIdx;
  4428. // struct RayDesc
  4429. //{
  4430. // float3 Origin;
  4431. Value *origin = CI->getArgOperand(hlIndex++);
  4432. Args[index++] = Builder.CreateExtractElement(origin, (uint64_t)0);
  4433. Args[index++] = Builder.CreateExtractElement(origin, 1);
  4434. Args[index++] = Builder.CreateExtractElement(origin, 2);
  4435. // float TMin;
  4436. Args[index++] = CI->getArgOperand(hlIndex++);
  4437. // float3 Direction;
  4438. Value *direction = CI->getArgOperand(hlIndex++);
  4439. Args[index++] = Builder.CreateExtractElement(direction, (uint64_t)0);
  4440. Args[index++] = Builder.CreateExtractElement(direction, 1);
  4441. Args[index++] = Builder.CreateExtractElement(direction, 2);
  4442. // float TMax;
  4443. Args[index++] = CI->getArgOperand(hlIndex++);
  4444. //};
  4445. DXASSERT_NOMSG(index == DXIL::OperandIndex::kTraceRayInlineNumOp);
  4446. Function *F = hlslOP->GetOpFunc(opcode, Builder.getVoidTy());
  4447. return Builder.CreateCall(F, Args);
  4448. }
  4449. Value *TranslateCommitProceduralPrimitiveHit(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4450. HLOperationLowerHelper &helper,
  4451. HLObjectOperationLowerHelper *pObjHelper,
  4452. bool &Translated) {
  4453. hlsl::OP *hlslOP = &helper.hlslOP;
  4454. Value *THit = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  4455. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
  4456. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  4457. Value *Args[] = {opArg, handle, THit};
  4458. IRBuilder<> Builder(CI);
  4459. Function *F = hlslOP->GetOpFunc(opcode, Builder.getVoidTy());
  4460. return Builder.CreateCall(F, Args);
  4461. }
  4462. Value *TranslateGenericRayQueryMethod(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4463. HLOperationLowerHelper &helper,
  4464. HLObjectOperationLowerHelper *pObjHelper,
  4465. bool &Translated) {
  4466. hlsl::OP *hlslOP = &helper.hlslOP;
  4467. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
  4468. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  4469. IRBuilder<> Builder(CI);
  4470. Function *F = hlslOP->GetOpFunc(opcode, CI->getType());
  4471. return Builder.CreateCall(F, {opArg, handle});
  4472. }
  4473. Value *TranslateRayQueryMatrix3x4Operation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4474. HLOperationLowerHelper &helper,
  4475. HLObjectOperationLowerHelper *pObjHelper,
  4476. bool &Translated) {
  4477. hlsl::OP *hlslOP = &helper.hlslOP;
  4478. VectorType *Ty = cast<VectorType>(CI->getType());
  4479. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  4480. uint32_t rVals[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2};
  4481. Constant *rows = ConstantDataVector::get(CI->getContext(), rVals);
  4482. uint8_t cVals[] = {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3};
  4483. Constant *cols = ConstantDataVector::get(CI->getContext(), cVals);
  4484. Value *retVal =
  4485. TrivialDxilOperation(opcode, {nullptr, handle, rows, cols}, Ty, CI, hlslOP);
  4486. return retVal;
  4487. }
  4488. Value *TranslateRayQueryTransposedMatrix3x4Operation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4489. HLOperationLowerHelper &helper,
  4490. HLObjectOperationLowerHelper *pObjHelper,
  4491. bool &Translated) {
  4492. hlsl::OP *hlslOP = &helper.hlslOP;
  4493. VectorType *Ty = cast<VectorType>(CI->getType());
  4494. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  4495. uint32_t rVals[] = { 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2 };
  4496. Constant *rows = ConstantDataVector::get(CI->getContext(), rVals);
  4497. uint8_t cVals[] = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 };
  4498. Constant *cols = ConstantDataVector::get(CI->getContext(), cVals);
  4499. Value *retVal =
  4500. TrivialDxilOperation(opcode, {nullptr, handle, rows, cols}, Ty, CI, hlslOP);
  4501. return retVal;
  4502. }
  4503. Value *TranslateRayQueryFloat2Getter(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4504. HLOperationLowerHelper &helper,
  4505. HLObjectOperationLowerHelper *pObjHelper,
  4506. bool &Translated) {
  4507. hlsl::OP *hlslOP = &helper.hlslOP;
  4508. VectorType *Ty = cast<VectorType>(CI->getType());
  4509. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  4510. uint8_t elementVals[] = {0, 1};
  4511. Constant *element = ConstantDataVector::get(CI->getContext(), elementVals);
  4512. Value *retVal =
  4513. TrivialDxilOperation(opcode, {nullptr, handle, element}, Ty, CI, hlslOP);
  4514. return retVal;
  4515. }
  4516. Value *TranslateRayQueryFloat3Getter(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4517. HLOperationLowerHelper &helper,
  4518. HLObjectOperationLowerHelper *pObjHelper,
  4519. bool &Translated) {
  4520. hlsl::OP *hlslOP = &helper.hlslOP;
  4521. VectorType *Ty = cast<VectorType>(CI->getType());
  4522. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  4523. uint8_t elementVals[] = {0, 1, 2};
  4524. Constant *element = ConstantDataVector::get(CI->getContext(), elementVals);
  4525. Value *retVal =
  4526. TrivialDxilOperation(opcode, {nullptr, handle, element}, Ty, CI, hlslOP);
  4527. return retVal;
  4528. }
  4529. Value *TranslateNoArgVectorOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4530. HLOperationLowerHelper &helper,
  4531. HLObjectOperationLowerHelper *pObjHelper,
  4532. bool &Translated) {
  4533. hlsl::OP *hlslOP = &helper.hlslOP;
  4534. VectorType *Ty = cast<VectorType>(CI->getType());
  4535. uint8_t vals[] = {0,1,2,3};
  4536. Constant *src = ConstantDataVector::get(CI->getContext(), vals);
  4537. Value *retVal = TrivialDxilOperation(opcode, {nullptr, src}, Ty, CI, hlslOP);
  4538. return retVal;
  4539. }
  4540. Value *TranslateNoArgMatrix3x4Operation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4541. HLOperationLowerHelper &helper,
  4542. HLObjectOperationLowerHelper *pObjHelper,
  4543. bool &Translated) {
  4544. hlsl::OP *hlslOP = &helper.hlslOP;
  4545. VectorType *Ty = cast<VectorType>(CI->getType());
  4546. uint32_t rVals[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2};
  4547. Constant *rows = ConstantDataVector::get(CI->getContext(), rVals);
  4548. uint8_t cVals[] = {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3};
  4549. Constant *cols = ConstantDataVector::get(CI->getContext(), cVals);
  4550. Value *retVal =
  4551. TrivialDxilOperation(opcode, {nullptr, rows, cols}, Ty, CI, hlslOP);
  4552. return retVal;
  4553. }
  4554. Value *TranslateNoArgTransposedMatrix3x4Operation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4555. HLOperationLowerHelper &helper,
  4556. HLObjectOperationLowerHelper *pObjHelper,
  4557. bool &Translated) {
  4558. hlsl::OP *hlslOP = &helper.hlslOP;
  4559. VectorType *Ty = cast<VectorType>(CI->getType());
  4560. uint32_t rVals[] = { 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2 };
  4561. Constant *rows = ConstantDataVector::get(CI->getContext(), rVals);
  4562. uint8_t cVals[] = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 };
  4563. Constant *cols = ConstantDataVector::get(CI->getContext(), cVals);
  4564. Value *retVal =
  4565. TrivialDxilOperation(opcode, { nullptr, rows, cols }, Ty, CI, hlslOP);
  4566. return retVal;
  4567. }
  4568. Value *TranslateNoArgNoReturnPreserveOutput(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4569. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  4570. Instruction *pResult = cast<Instruction>(
  4571. TrivialNoArgOperation(CI, IOP, opcode, helper, pObjHelper, Translated));
  4572. // HL intrinsic must have had a return injected just after the call.
  4573. // SROA_Parameter_HLSL will copy from alloca to output just before each return.
  4574. // Now move call after the copy and just before the return.
  4575. if (isa<ReturnInst>(pResult->getNextNode()))
  4576. return pResult;
  4577. ReturnInst *RetI = cast<ReturnInst>(pResult->getParent()->getTerminator());
  4578. pResult->removeFromParent();
  4579. pResult->insertBefore(RetI);
  4580. return pResult;
  4581. }
  4582. // Special half dot2 with accumulate to float
  4583. Value *TranslateDot2Add(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4584. HLOperationLowerHelper &helper,
  4585. HLObjectOperationLowerHelper *pObjHelper,
  4586. bool &Translated) {
  4587. hlsl::OP *hlslOP = &helper.hlslOP;
  4588. Value *src0 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  4589. const unsigned vecSize = 2;
  4590. DXASSERT(src0->getType()->isVectorTy() &&
  4591. vecSize == src0->getType()->getVectorNumElements() &&
  4592. src0->getType()->getScalarType()->isHalfTy(),
  4593. "otherwise, unexpected input dimension or component type");
  4594. Value *src1 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  4595. DXASSERT(src0->getType() == src1->getType(),
  4596. "otherwise, mismatched argument types");
  4597. Value *accArg = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  4598. Type *accTy = accArg->getType();
  4599. DXASSERT(!accTy->isVectorTy() && accTy->isFloatTy(),
  4600. "otherwise, unexpected accumulator type");
  4601. IRBuilder<> Builder(CI);
  4602. Function *dxilFunc = hlslOP->GetOpFunc(opcode, accTy);
  4603. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  4604. SmallVector<Value *, 6> args;
  4605. args.emplace_back(opArg);
  4606. args.emplace_back(accArg);
  4607. for (unsigned i = 0; i < vecSize; i++)
  4608. args.emplace_back(Builder.CreateExtractElement(src0, i));
  4609. for (unsigned i = 0; i < vecSize; i++)
  4610. args.emplace_back(Builder.CreateExtractElement(src1, i));
  4611. return Builder.CreateCall(dxilFunc, args);
  4612. }
  4613. Value *TranslateDot4AddPacked(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4614. HLOperationLowerHelper &helper,
  4615. HLObjectOperationLowerHelper *pObjHelper,
  4616. bool &Translated) {
  4617. hlsl::OP *hlslOP = &helper.hlslOP;
  4618. Value *src0 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  4619. DXASSERT(
  4620. !src0->getType()->isVectorTy() && src0->getType()->isIntegerTy(32),
  4621. "otherwise, unexpected vector support in high level intrinsic template");
  4622. Value *src1 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  4623. DXASSERT(src0->getType() == src1->getType(), "otherwise, mismatched argument types");
  4624. Value *accArg = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  4625. Type *accTy = accArg->getType();
  4626. DXASSERT(!accTy->isVectorTy() && accTy->isIntegerTy(32),
  4627. "otherwise, unexpected vector support in high level intrinsic template");
  4628. IRBuilder<> Builder(CI);
  4629. Function *dxilFunc = hlslOP->GetOpFunc(opcode, accTy);
  4630. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  4631. return Builder.CreateCall(dxilFunc, { opArg, accArg, src0, src1 });
  4632. }
  4633. Value *TranslatePack(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4634. HLOperationLowerHelper &helper,
  4635. HLObjectOperationLowerHelper *pObjHelper,
  4636. bool &Translated) {
  4637. hlsl::OP *hlslOP = &helper.hlslOP;
  4638. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  4639. Type *valTy = val->getType();
  4640. Type *eltTy = valTy->getScalarType();
  4641. DXASSERT(valTy->isVectorTy() && valTy->getVectorNumElements() == 4 && eltTy->isIntegerTy() &&
  4642. (eltTy->getIntegerBitWidth() == 32 || eltTy->getIntegerBitWidth() == 16),
  4643. "otherwise, unexpected input dimension or component type");
  4644. DXIL::PackMode packMode = DXIL::PackMode::Trunc;
  4645. switch (IOP) {
  4646. case hlsl::IntrinsicOp::IOP_pack_clamp_s8:
  4647. packMode = DXIL::PackMode::SClamp;
  4648. break;
  4649. case hlsl::IntrinsicOp::IOP_pack_clamp_u8:
  4650. packMode = DXIL::PackMode::UClamp;
  4651. break;
  4652. case hlsl::IntrinsicOp::IOP_pack_s8:
  4653. case hlsl::IntrinsicOp::IOP_pack_u8:
  4654. packMode = DXIL::PackMode::Trunc;
  4655. break;
  4656. default:
  4657. DXASSERT(false, "unexpected opcode");
  4658. break;
  4659. }
  4660. IRBuilder<> Builder(CI);
  4661. Function *dxilFunc = hlslOP->GetOpFunc(opcode, eltTy);
  4662. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  4663. Constant *packModeArg = hlslOP->GetU8Const((unsigned)packMode);
  4664. Value *elt0 = Builder.CreateExtractElement(val, (uint64_t)0);
  4665. Value *elt1 = Builder.CreateExtractElement(val, (uint64_t)1);
  4666. Value *elt2 = Builder.CreateExtractElement(val, (uint64_t)2);
  4667. Value *elt3 = Builder.CreateExtractElement(val, (uint64_t)3);
  4668. return Builder.CreateCall(dxilFunc, { opArg, packModeArg, elt0, elt1, elt2, elt3 });
  4669. }
  4670. Value *TranslateUnpack(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4671. HLOperationLowerHelper &helper,
  4672. HLObjectOperationLowerHelper *pObjHelper,
  4673. bool &Translated) {
  4674. hlsl::OP *hlslOP = &helper.hlslOP;
  4675. Value *packedVal = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  4676. DXASSERT(!packedVal->getType()->isVectorTy() && packedVal->getType()->isIntegerTy(32),
  4677. "otherwise, unexpected vector support in high level intrinsic template");
  4678. Type *overloadType = nullptr;
  4679. DXIL::UnpackMode unpackMode = DXIL::UnpackMode::Unsigned;
  4680. switch (IOP) {
  4681. case hlsl::IntrinsicOp::IOP_unpack_s8s32:
  4682. unpackMode = DXIL::UnpackMode::Signed;
  4683. overloadType = helper.i32Ty;
  4684. break;
  4685. case hlsl::IntrinsicOp::IOP_unpack_u8u32:
  4686. unpackMode = DXIL::UnpackMode::Unsigned;
  4687. overloadType = helper.i32Ty;
  4688. break;
  4689. case hlsl::IntrinsicOp::IOP_unpack_s8s16:
  4690. unpackMode = DXIL::UnpackMode::Signed;
  4691. overloadType = helper.i16Ty;
  4692. break;
  4693. case hlsl::IntrinsicOp::IOP_unpack_u8u16:
  4694. unpackMode = DXIL::UnpackMode::Unsigned;
  4695. overloadType = helper.i16Ty;
  4696. break;
  4697. default:
  4698. DXASSERT(false, "unexpected opcode");
  4699. break;
  4700. }
  4701. IRBuilder<> Builder(CI);
  4702. Function *dxilFunc = hlslOP->GetOpFunc(opcode, overloadType);
  4703. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  4704. Constant *unpackModeArg = hlslOP->GetU8Const((unsigned)unpackMode);
  4705. Value *Res = Builder.CreateCall(dxilFunc, { opArg, unpackModeArg , packedVal });
  4706. // Convert the final aggregate into a vector to make the types match
  4707. const unsigned vecSize = 4;
  4708. Value *ResVec = UndefValue::get(CI->getType());
  4709. for (unsigned i = 0; i < vecSize; ++i) {
  4710. Value *Elt = Builder.CreateExtractValue(Res, i);
  4711. ResVec = Builder.CreateInsertElement(ResVec, Elt, i);
  4712. }
  4713. return ResVec;
  4714. }
  4715. } // namespace
  4716. // Resource Handle.
  4717. namespace {
  4718. Value *TranslateGetHandleFromHeap(CallInst *CI, IntrinsicOp IOP,
  4719. DXIL::OpCode opcode,
  4720. HLOperationLowerHelper &helper,
  4721. HLObjectOperationLowerHelper *pObjHelper,
  4722. bool &Translated) {
  4723. hlsl::OP &hlslOP = helper.hlslOP;
  4724. Function *dxilFunc = hlslOP.GetOpFunc(opcode, helper.voidTy);
  4725. IRBuilder<> Builder(CI);
  4726. Value *opArg = ConstantInt::get(helper.i32Ty, (unsigned)opcode);
  4727. return Builder.CreateCall(
  4728. dxilFunc, {opArg, CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx),
  4729. CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx),
  4730. // TODO: update nonUniformIndex later.
  4731. Builder.getInt1(false)});
  4732. }
  4733. }
  4734. // Lower table.
  4735. namespace {
  4736. Value *EmptyLower(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode,
  4737. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  4738. Translated = false;
  4739. dxilutil::EmitErrorOnInstruction(CI, "Unsupported intrinsic.");
  4740. return nullptr;
  4741. }
  4742. // SPIRV change starts
  4743. #ifdef ENABLE_SPIRV_CODEGEN
  4744. Value *UnsupportedVulkanIntrinsic(CallInst *CI, IntrinsicOp IOP,
  4745. DXIL::OpCode opcode,
  4746. HLOperationLowerHelper &helper,
  4747. HLObjectOperationLowerHelper *pObjHelper,
  4748. bool &Translated) {
  4749. Translated = false;
  4750. dxilutil::EmitErrorOnInstruction(CI, "Unsupported Vulkan intrinsic.");
  4751. return nullptr;
  4752. }
  4753. #endif // ENABLE_SPIRV_CODEGEN
  4754. // SPIRV change ends
  4755. Value *StreamOutputLower(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode,
  4756. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  4757. // Translated in DxilGenerationPass::GenerateStreamOutputOperation.
  4758. // Do nothing here.
  4759. // Mark not translated.
  4760. Translated = false;
  4761. return nullptr;
  4762. }
  4763. // This table has to match IntrinsicOp orders
  4764. IntrinsicLower gLowerTable[] = {
  4765. {IntrinsicOp::IOP_AcceptHitAndEndSearch, TranslateNoArgNoReturnPreserveOutput, DXIL::OpCode::AcceptHitAndEndSearch},
  4766. {IntrinsicOp::IOP_AddUint64, TranslateAddUint64, DXIL::OpCode::UAddc},
  4767. {IntrinsicOp::IOP_AllMemoryBarrier, TrivialBarrier, DXIL::OpCode::Barrier},
  4768. {IntrinsicOp::IOP_AllMemoryBarrierWithGroupSync, TrivialBarrier, DXIL::OpCode::Barrier},
  4769. {IntrinsicOp::IOP_AllocateRayQuery, TranslateAllocateRayQuery, DXIL::OpCode::AllocateRayQuery},
  4770. {IntrinsicOp::IOP_CallShader, TranslateCallShader, DXIL::OpCode::CallShader},
  4771. {IntrinsicOp::IOP_CheckAccessFullyMapped, TranslateCheckAccess, DXIL::OpCode::CheckAccessFullyMapped},
  4772. {IntrinsicOp::IOP_CreateResourceFromHeap, TranslateGetHandleFromHeap, DXIL::OpCode::CreateHandleFromHeap},
  4773. {IntrinsicOp::IOP_D3DCOLORtoUBYTE4, TranslateD3DColorToUByte4, DXIL::OpCode::NumOpCodes},
  4774. {IntrinsicOp::IOP_DeviceMemoryBarrier, TrivialBarrier, DXIL::OpCode::Barrier},
  4775. {IntrinsicOp::IOP_DeviceMemoryBarrierWithGroupSync, TrivialBarrier, DXIL::OpCode::Barrier},
  4776. {IntrinsicOp::IOP_DispatchMesh, TrivialDispatchMesh, DXIL::OpCode::DispatchMesh },
  4777. {IntrinsicOp::IOP_DispatchRaysDimensions, TranslateNoArgVectorOperation, DXIL::OpCode::DispatchRaysDimensions},
  4778. {IntrinsicOp::IOP_DispatchRaysIndex, TranslateNoArgVectorOperation, DXIL::OpCode::DispatchRaysIndex},
  4779. {IntrinsicOp::IOP_EvaluateAttributeAtSample, TranslateEvalSample, DXIL::OpCode::NumOpCodes},
  4780. {IntrinsicOp::IOP_EvaluateAttributeCentroid, TranslateEvalCentroid, DXIL::OpCode::EvalCentroid},
  4781. {IntrinsicOp::IOP_EvaluateAttributeSnapped, TranslateEvalSnapped, DXIL::OpCode::NumOpCodes},
  4782. {IntrinsicOp::IOP_GeometryIndex, TrivialNoArgWithRetOperation, DXIL::OpCode::GeometryIndex},
  4783. {IntrinsicOp::IOP_GetAttributeAtVertex, TranslateGetAttributeAtVertex, DXIL::OpCode::AttributeAtVertex},
  4784. {IntrinsicOp::IOP_GetRenderTargetSampleCount, TrivialNoArgOperation, DXIL::OpCode::RenderTargetGetSampleCount},
  4785. {IntrinsicOp::IOP_GetRenderTargetSamplePosition, TranslateGetRTSamplePos, DXIL::OpCode::NumOpCodes},
  4786. {IntrinsicOp::IOP_GroupMemoryBarrier, TrivialBarrier, DXIL::OpCode::Barrier},
  4787. {IntrinsicOp::IOP_GroupMemoryBarrierWithGroupSync, TrivialBarrier, DXIL::OpCode::Barrier},
  4788. {IntrinsicOp::IOP_HitKind, TrivialNoArgWithRetOperation, DXIL::OpCode::HitKind},
  4789. {IntrinsicOp::IOP_IgnoreHit, TranslateNoArgNoReturnPreserveOutput, DXIL::OpCode::IgnoreHit},
  4790. {IntrinsicOp::IOP_InstanceID, TrivialNoArgWithRetOperation, DXIL::OpCode::InstanceID},
  4791. {IntrinsicOp::IOP_InstanceIndex, TrivialNoArgWithRetOperation, DXIL::OpCode::InstanceIndex},
  4792. {IntrinsicOp::IOP_InterlockedAdd, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4793. {IntrinsicOp::IOP_InterlockedAnd, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4794. {IntrinsicOp::IOP_InterlockedCompareExchange, TranslateIopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
  4795. {IntrinsicOp::IOP_InterlockedCompareExchangeFloatBitwise, TranslateIopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
  4796. {IntrinsicOp::IOP_InterlockedCompareStore, TranslateIopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
  4797. {IntrinsicOp::IOP_InterlockedCompareStoreFloatBitwise, TranslateIopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
  4798. {IntrinsicOp::IOP_InterlockedExchange, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4799. {IntrinsicOp::IOP_InterlockedMax, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4800. {IntrinsicOp::IOP_InterlockedMin, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4801. {IntrinsicOp::IOP_InterlockedOr, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4802. {IntrinsicOp::IOP_InterlockedXor, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4803. {IntrinsicOp::IOP_IsHelperLane, TrivialNoArgWithRetOperation, DXIL::OpCode::IsHelperLane},
  4804. {IntrinsicOp::IOP_NonUniformResourceIndex, TranslateNonUniformResourceIndex, DXIL::OpCode::NumOpCodes},
  4805. {IntrinsicOp::IOP_ObjectRayDirection, TranslateNoArgVectorOperation, DXIL::OpCode::ObjectRayDirection},
  4806. {IntrinsicOp::IOP_ObjectRayOrigin, TranslateNoArgVectorOperation, DXIL::OpCode::ObjectRayOrigin},
  4807. {IntrinsicOp::IOP_ObjectToWorld, TranslateNoArgMatrix3x4Operation, DXIL::OpCode::ObjectToWorld},
  4808. {IntrinsicOp::IOP_ObjectToWorld3x4, TranslateNoArgMatrix3x4Operation, DXIL::OpCode::ObjectToWorld},
  4809. {IntrinsicOp::IOP_ObjectToWorld4x3, TranslateNoArgTransposedMatrix3x4Operation, DXIL::OpCode::ObjectToWorld},
  4810. {IntrinsicOp::IOP_PrimitiveIndex, TrivialNoArgWithRetOperation, DXIL::OpCode::PrimitiveIndex},
  4811. {IntrinsicOp::IOP_Process2DQuadTessFactorsAvg, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4812. {IntrinsicOp::IOP_Process2DQuadTessFactorsMax, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4813. {IntrinsicOp::IOP_Process2DQuadTessFactorsMin, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4814. {IntrinsicOp::IOP_ProcessIsolineTessFactors, TranslateProcessIsolineTessFactors, DXIL::OpCode::NumOpCodes},
  4815. {IntrinsicOp::IOP_ProcessQuadTessFactorsAvg, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4816. {IntrinsicOp::IOP_ProcessQuadTessFactorsMax, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4817. {IntrinsicOp::IOP_ProcessQuadTessFactorsMin, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4818. {IntrinsicOp::IOP_ProcessTriTessFactorsAvg, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4819. {IntrinsicOp::IOP_ProcessTriTessFactorsMax, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4820. {IntrinsicOp::IOP_ProcessTriTessFactorsMin, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4821. {IntrinsicOp::IOP_QuadReadAcrossDiagonal, TranslateQuadReadAcross, DXIL::OpCode::QuadOp},
  4822. {IntrinsicOp::IOP_QuadReadAcrossX, TranslateQuadReadAcross, DXIL::OpCode::QuadOp},
  4823. {IntrinsicOp::IOP_QuadReadAcrossY, TranslateQuadReadAcross, DXIL::OpCode::QuadOp},
  4824. {IntrinsicOp::IOP_QuadReadLaneAt, TranslateQuadReadLaneAt, DXIL::OpCode::NumOpCodes},
  4825. {IntrinsicOp::IOP_RayFlags, TrivialNoArgWithRetOperation, DXIL::OpCode::RayFlags},
  4826. {IntrinsicOp::IOP_RayTCurrent, TrivialNoArgWithRetOperation, DXIL::OpCode::RayTCurrent},
  4827. {IntrinsicOp::IOP_RayTMin, TrivialNoArgWithRetOperation, DXIL::OpCode::RayTMin},
  4828. {IntrinsicOp::IOP_ReportHit, TranslateReportIntersection, DXIL::OpCode::ReportHit},
  4829. {IntrinsicOp::IOP_SetMeshOutputCounts, TrivialSetMeshOutputCounts, DXIL::OpCode::SetMeshOutputCounts},
  4830. {IntrinsicOp::IOP_TraceRay, TranslateTraceRay, DXIL::OpCode::TraceRay},
  4831. {IntrinsicOp::IOP_WaveActiveAllEqual, TranslateWaveAllEqual, DXIL::OpCode::WaveActiveAllEqual},
  4832. {IntrinsicOp::IOP_WaveActiveAllTrue, TranslateWaveA2B, DXIL::OpCode::WaveAllTrue},
  4833. {IntrinsicOp::IOP_WaveActiveAnyTrue, TranslateWaveA2B, DXIL::OpCode::WaveAnyTrue},
  4834. {IntrinsicOp::IOP_WaveActiveBallot, TranslateWaveBallot, DXIL::OpCode::WaveActiveBallot},
  4835. {IntrinsicOp::IOP_WaveActiveBitAnd, TranslateWaveA2A, DXIL::OpCode::WaveActiveBit},
  4836. {IntrinsicOp::IOP_WaveActiveBitOr, TranslateWaveA2A, DXIL::OpCode::WaveActiveBit},
  4837. {IntrinsicOp::IOP_WaveActiveBitXor, TranslateWaveA2A, DXIL::OpCode::WaveActiveBit},
  4838. {IntrinsicOp::IOP_WaveActiveCountBits, TranslateWaveA2B, DXIL::OpCode::WaveAllBitCount},
  4839. {IntrinsicOp::IOP_WaveActiveMax, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp},
  4840. {IntrinsicOp::IOP_WaveActiveMin, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp},
  4841. {IntrinsicOp::IOP_WaveActiveProduct, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp},
  4842. {IntrinsicOp::IOP_WaveActiveSum, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp},
  4843. {IntrinsicOp::IOP_WaveGetLaneCount, TranslateWaveToVal, DXIL::OpCode::WaveGetLaneCount},
  4844. {IntrinsicOp::IOP_WaveGetLaneIndex, TranslateWaveToVal, DXIL::OpCode::WaveGetLaneIndex},
  4845. {IntrinsicOp::IOP_WaveIsFirstLane, TranslateWaveToVal, DXIL::OpCode::WaveIsFirstLane},
  4846. {IntrinsicOp::IOP_WaveMatch, TranslateWaveMatch, DXIL::OpCode::WaveMatch},
  4847. {IntrinsicOp::IOP_WaveMultiPrefixBitAnd, TranslateWaveMultiPrefix, DXIL::OpCode::WaveMultiPrefixOp},
  4848. {IntrinsicOp::IOP_WaveMultiPrefixBitOr, TranslateWaveMultiPrefix, DXIL::OpCode::WaveMultiPrefixOp},
  4849. {IntrinsicOp::IOP_WaveMultiPrefixBitXor, TranslateWaveMultiPrefix, DXIL::OpCode::WaveMultiPrefixOp},
  4850. {IntrinsicOp::IOP_WaveMultiPrefixCountBits, TranslateWaveMultiPrefixBitCount, DXIL::OpCode::WaveMultiPrefixBitCount},
  4851. {IntrinsicOp::IOP_WaveMultiPrefixProduct, TranslateWaveMultiPrefix, DXIL::OpCode::WaveMultiPrefixOp},
  4852. {IntrinsicOp::IOP_WaveMultiPrefixSum, TranslateWaveMultiPrefix, DXIL::OpCode::WaveMultiPrefixOp},
  4853. {IntrinsicOp::IOP_WavePrefixCountBits, TranslateWaveA2B, DXIL::OpCode::WavePrefixBitCount},
  4854. {IntrinsicOp::IOP_WavePrefixProduct, TranslateWaveA2A, DXIL::OpCode::WavePrefixOp},
  4855. {IntrinsicOp::IOP_WavePrefixSum, TranslateWaveA2A, DXIL::OpCode::WavePrefixOp},
  4856. {IntrinsicOp::IOP_WaveReadLaneAt, TranslateWaveReadLaneAt, DXIL::OpCode::WaveReadLaneAt},
  4857. {IntrinsicOp::IOP_WaveReadLaneFirst, TranslateWaveReadLaneFirst, DXIL::OpCode::WaveReadLaneFirst},
  4858. {IntrinsicOp::IOP_WorldRayDirection, TranslateNoArgVectorOperation, DXIL::OpCode::WorldRayDirection},
  4859. {IntrinsicOp::IOP_WorldRayOrigin, TranslateNoArgVectorOperation, DXIL::OpCode::WorldRayOrigin},
  4860. {IntrinsicOp::IOP_WorldToObject, TranslateNoArgMatrix3x4Operation, DXIL::OpCode::WorldToObject},
  4861. {IntrinsicOp::IOP_WorldToObject3x4, TranslateNoArgMatrix3x4Operation, DXIL::OpCode::WorldToObject},
  4862. {IntrinsicOp::IOP_WorldToObject4x3, TranslateNoArgTransposedMatrix3x4Operation, DXIL::OpCode::WorldToObject},
  4863. {IntrinsicOp::IOP_abort, EmptyLower, DXIL::OpCode::NumOpCodes},
  4864. {IntrinsicOp::IOP_abs, TranslateAbs, DXIL::OpCode::NumOpCodes},
  4865. {IntrinsicOp::IOP_acos, TrivialUnaryOperation, DXIL::OpCode::Acos},
  4866. {IntrinsicOp::IOP_all, TranslateAll, DXIL::OpCode::NumOpCodes},
  4867. {IntrinsicOp::IOP_any, TranslateAny, DXIL::OpCode::NumOpCodes},
  4868. {IntrinsicOp::IOP_asdouble, TranslateAsDouble, DXIL::OpCode::MakeDouble},
  4869. {IntrinsicOp::IOP_asfloat, TranslateBitcast, DXIL::OpCode::NumOpCodes},
  4870. {IntrinsicOp::IOP_asfloat16, TranslateBitcast, DXIL::OpCode::NumOpCodes},
  4871. {IntrinsicOp::IOP_asin, TrivialUnaryOperation, DXIL::OpCode::Asin},
  4872. {IntrinsicOp::IOP_asint, TranslateBitcast, DXIL::OpCode::NumOpCodes},
  4873. {IntrinsicOp::IOP_asint16, TranslateBitcast, DXIL::OpCode::NumOpCodes},
  4874. {IntrinsicOp::IOP_asuint, TranslateAsUint, DXIL::OpCode::SplitDouble},
  4875. {IntrinsicOp::IOP_asuint16, TranslateAsUint, DXIL::OpCode::NumOpCodes},
  4876. {IntrinsicOp::IOP_atan, TrivialUnaryOperation, DXIL::OpCode::Atan},
  4877. {IntrinsicOp::IOP_atan2, TranslateAtan2, DXIL::OpCode::NumOpCodes},
  4878. {IntrinsicOp::IOP_ceil, TrivialUnaryOperation, DXIL::OpCode::Round_pi},
  4879. {IntrinsicOp::IOP_clamp, TranslateClamp, DXIL::OpCode::NumOpCodes},
  4880. {IntrinsicOp::IOP_clip, TranslateClip, DXIL::OpCode::NumOpCodes},
  4881. {IntrinsicOp::IOP_cos, TrivialUnaryOperation, DXIL::OpCode::Cos},
  4882. {IntrinsicOp::IOP_cosh, TrivialUnaryOperation, DXIL::OpCode::Hcos},
  4883. {IntrinsicOp::IOP_countbits, TrivialUnaryOperation, DXIL::OpCode::Countbits},
  4884. {IntrinsicOp::IOP_cross, TranslateCross, DXIL::OpCode::NumOpCodes},
  4885. {IntrinsicOp::IOP_ddx, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseX},
  4886. {IntrinsicOp::IOP_ddx_coarse, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseX},
  4887. {IntrinsicOp::IOP_ddx_fine, TrivialUnaryOperation, DXIL::OpCode::DerivFineX},
  4888. {IntrinsicOp::IOP_ddy, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseY},
  4889. {IntrinsicOp::IOP_ddy_coarse, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseY},
  4890. {IntrinsicOp::IOP_ddy_fine, TrivialUnaryOperation, DXIL::OpCode::DerivFineY},
  4891. {IntrinsicOp::IOP_degrees, TranslateDegrees, DXIL::OpCode::NumOpCodes},
  4892. {IntrinsicOp::IOP_determinant, EmptyLower, DXIL::OpCode::NumOpCodes},
  4893. {IntrinsicOp::IOP_distance, TranslateDistance, DXIL::OpCode::NumOpCodes},
  4894. {IntrinsicOp::IOP_dot, TranslateDot, DXIL::OpCode::NumOpCodes},
  4895. {IntrinsicOp::IOP_dot2add, TranslateDot2Add, DXIL::OpCode::Dot2AddHalf},
  4896. {IntrinsicOp::IOP_dot4add_i8packed, TranslateDot4AddPacked, DXIL::OpCode::Dot4AddI8Packed},
  4897. {IntrinsicOp::IOP_dot4add_u8packed, TranslateDot4AddPacked, DXIL::OpCode::Dot4AddU8Packed},
  4898. {IntrinsicOp::IOP_dst, TranslateDst, DXIL::OpCode::NumOpCodes},
  4899. {IntrinsicOp::IOP_exp, TranslateExp, DXIL::OpCode::NumOpCodes},
  4900. {IntrinsicOp::IOP_exp2, TrivialUnaryOperation, DXIL::OpCode::Exp},
  4901. {IntrinsicOp::IOP_f16tof32, TranslateF16ToF32, DXIL::OpCode::LegacyF16ToF32},
  4902. {IntrinsicOp::IOP_f32tof16, TranslateF32ToF16, DXIL::OpCode::LegacyF32ToF16},
  4903. {IntrinsicOp::IOP_faceforward, TranslateFaceforward, DXIL::OpCode::NumOpCodes},
  4904. {IntrinsicOp::IOP_firstbithigh, TranslateFirstbitHi, DXIL::OpCode::FirstbitSHi},
  4905. {IntrinsicOp::IOP_firstbitlow, TranslateFirstbitLo, DXIL::OpCode::FirstbitLo},
  4906. {IntrinsicOp::IOP_floor, TrivialUnaryOperation, DXIL::OpCode::Round_ni},
  4907. {IntrinsicOp::IOP_fma, TrivialTrinaryOperation, DXIL::OpCode::Fma},
  4908. {IntrinsicOp::IOP_fmod, TranslateFMod, DXIL::OpCode::NumOpCodes},
  4909. {IntrinsicOp::IOP_frac, TrivialUnaryOperation, DXIL::OpCode::Frc},
  4910. {IntrinsicOp::IOP_frexp, TranslateFrexp, DXIL::OpCode::NumOpCodes},
  4911. {IntrinsicOp::IOP_fwidth, TranslateFWidth, DXIL::OpCode::NumOpCodes},
  4912. {IntrinsicOp::IOP_isfinite, TrivialIsSpecialFloat, DXIL::OpCode::IsFinite},
  4913. {IntrinsicOp::IOP_isinf, TrivialIsSpecialFloat, DXIL::OpCode::IsInf},
  4914. {IntrinsicOp::IOP_isnan, TrivialIsSpecialFloat, DXIL::OpCode::IsNaN},
  4915. {IntrinsicOp::IOP_ldexp, TranslateLdExp, DXIL::OpCode::NumOpCodes},
  4916. {IntrinsicOp::IOP_length, TranslateLength, DXIL::OpCode::NumOpCodes},
  4917. {IntrinsicOp::IOP_lerp, TranslateLerp, DXIL::OpCode::NumOpCodes},
  4918. {IntrinsicOp::IOP_lit, TranslateLit, DXIL::OpCode::NumOpCodes},
  4919. {IntrinsicOp::IOP_log, TranslateLog, DXIL::OpCode::NumOpCodes},
  4920. {IntrinsicOp::IOP_log10, TranslateLog10, DXIL::OpCode::NumOpCodes},
  4921. {IntrinsicOp::IOP_log2, TrivialUnaryOperation, DXIL::OpCode::Log},
  4922. {IntrinsicOp::IOP_mad, TranslateFUITrinary, DXIL::OpCode::IMad},
  4923. {IntrinsicOp::IOP_max, TranslateFUIBinary, DXIL::OpCode::IMax},
  4924. {IntrinsicOp::IOP_min, TranslateFUIBinary, DXIL::OpCode::IMin},
  4925. {IntrinsicOp::IOP_modf, TranslateModF, DXIL::OpCode::NumOpCodes},
  4926. {IntrinsicOp::IOP_msad4, TranslateMSad4, DXIL::OpCode::NumOpCodes},
  4927. {IntrinsicOp::IOP_mul, TranslateMul, DXIL::OpCode::NumOpCodes},
  4928. {IntrinsicOp::IOP_normalize, TranslateNormalize, DXIL::OpCode::NumOpCodes},
  4929. {IntrinsicOp::IOP_pack_clamp_s8, TranslatePack, DXIL::OpCode::Pack4x8 },
  4930. {IntrinsicOp::IOP_pack_clamp_u8, TranslatePack, DXIL::OpCode::Pack4x8 },
  4931. {IntrinsicOp::IOP_pack_s8, TranslatePack, DXIL::OpCode::Pack4x8 },
  4932. {IntrinsicOp::IOP_pack_u8, TranslatePack, DXIL::OpCode::Pack4x8 },
  4933. {IntrinsicOp::IOP_pow, TranslatePow, DXIL::OpCode::NumOpCodes},
  4934. {IntrinsicOp::IOP_printf, TranslatePrintf, DXIL::OpCode::NumOpCodes},
  4935. {IntrinsicOp::IOP_radians, TranslateRadians, DXIL::OpCode::NumOpCodes},
  4936. {IntrinsicOp::IOP_rcp, TranslateRCP, DXIL::OpCode::NumOpCodes},
  4937. {IntrinsicOp::IOP_reflect, TranslateReflect, DXIL::OpCode::NumOpCodes},
  4938. {IntrinsicOp::IOP_refract, TranslateRefract, DXIL::OpCode::NumOpCodes},
  4939. {IntrinsicOp::IOP_reversebits, TrivialUnaryOperation, DXIL::OpCode::Bfrev},
  4940. {IntrinsicOp::IOP_round, TrivialUnaryOperation, DXIL::OpCode::Round_ne},
  4941. {IntrinsicOp::IOP_rsqrt, TrivialUnaryOperation, DXIL::OpCode::Rsqrt},
  4942. {IntrinsicOp::IOP_saturate, TrivialUnaryOperation, DXIL::OpCode::Saturate},
  4943. {IntrinsicOp::IOP_sign, TranslateSign, DXIL::OpCode::NumOpCodes},
  4944. {IntrinsicOp::IOP_sin, TrivialUnaryOperation, DXIL::OpCode::Sin},
  4945. {IntrinsicOp::IOP_sincos, EmptyLower, DXIL::OpCode::NumOpCodes},
  4946. {IntrinsicOp::IOP_sinh, TrivialUnaryOperation, DXIL::OpCode::Hsin},
  4947. {IntrinsicOp::IOP_smoothstep, TranslateSmoothStep, DXIL::OpCode::NumOpCodes},
  4948. {IntrinsicOp::IOP_source_mark, EmptyLower, DXIL::OpCode::NumOpCodes},
  4949. {IntrinsicOp::IOP_sqrt, TrivialUnaryOperation, DXIL::OpCode::Sqrt},
  4950. {IntrinsicOp::IOP_step, TranslateStep, DXIL::OpCode::NumOpCodes},
  4951. {IntrinsicOp::IOP_tan, TrivialUnaryOperation, DXIL::OpCode::Tan},
  4952. {IntrinsicOp::IOP_tanh, TrivialUnaryOperation, DXIL::OpCode::Htan},
  4953. {IntrinsicOp::IOP_tex1D, EmptyLower, DXIL::OpCode::NumOpCodes},
  4954. {IntrinsicOp::IOP_tex1Dbias, EmptyLower, DXIL::OpCode::NumOpCodes},
  4955. {IntrinsicOp::IOP_tex1Dgrad, EmptyLower, DXIL::OpCode::NumOpCodes},
  4956. {IntrinsicOp::IOP_tex1Dlod, EmptyLower, DXIL::OpCode::NumOpCodes},
  4957. {IntrinsicOp::IOP_tex1Dproj, EmptyLower, DXIL::OpCode::NumOpCodes},
  4958. {IntrinsicOp::IOP_tex2D, EmptyLower, DXIL::OpCode::NumOpCodes},
  4959. {IntrinsicOp::IOP_tex2Dbias, EmptyLower, DXIL::OpCode::NumOpCodes},
  4960. {IntrinsicOp::IOP_tex2Dgrad, EmptyLower, DXIL::OpCode::NumOpCodes},
  4961. {IntrinsicOp::IOP_tex2Dlod, EmptyLower, DXIL::OpCode::NumOpCodes},
  4962. {IntrinsicOp::IOP_tex2Dproj, EmptyLower, DXIL::OpCode::NumOpCodes},
  4963. {IntrinsicOp::IOP_tex3D, EmptyLower, DXIL::OpCode::NumOpCodes},
  4964. {IntrinsicOp::IOP_tex3Dbias, EmptyLower, DXIL::OpCode::NumOpCodes},
  4965. {IntrinsicOp::IOP_tex3Dgrad, EmptyLower, DXIL::OpCode::NumOpCodes},
  4966. {IntrinsicOp::IOP_tex3Dlod, EmptyLower, DXIL::OpCode::NumOpCodes},
  4967. {IntrinsicOp::IOP_tex3Dproj, EmptyLower, DXIL::OpCode::NumOpCodes},
  4968. {IntrinsicOp::IOP_texCUBE, EmptyLower, DXIL::OpCode::NumOpCodes},
  4969. {IntrinsicOp::IOP_texCUBEbias, EmptyLower, DXIL::OpCode::NumOpCodes},
  4970. {IntrinsicOp::IOP_texCUBEgrad, EmptyLower, DXIL::OpCode::NumOpCodes},
  4971. {IntrinsicOp::IOP_texCUBElod, EmptyLower, DXIL::OpCode::NumOpCodes},
  4972. {IntrinsicOp::IOP_texCUBEproj, EmptyLower, DXIL::OpCode::NumOpCodes},
  4973. {IntrinsicOp::IOP_transpose, EmptyLower, DXIL::OpCode::NumOpCodes},
  4974. {IntrinsicOp::IOP_trunc, TrivialUnaryOperation, DXIL::OpCode::Round_z},
  4975. {IntrinsicOp::IOP_unpack_s8s16, TranslateUnpack, DXIL::OpCode::Unpack4x8},
  4976. {IntrinsicOp::IOP_unpack_s8s32, TranslateUnpack, DXIL::OpCode::Unpack4x8},
  4977. {IntrinsicOp::IOP_unpack_u8u16, TranslateUnpack, DXIL::OpCode::Unpack4x8},
  4978. {IntrinsicOp::IOP_unpack_u8u32, TranslateUnpack, DXIL::OpCode::Unpack4x8},
  4979. #ifdef ENABLE_SPIRV_CODEGEN
  4980. { IntrinsicOp::IOP_VkReadClock, UnsupportedVulkanIntrinsic, DXIL::OpCode::NumOpCodes },
  4981. #endif // ENABLE_SPIRV_CODEGEN
  4982. {IntrinsicOp::MOP_Append, StreamOutputLower, DXIL::OpCode::EmitStream},
  4983. {IntrinsicOp::MOP_RestartStrip, StreamOutputLower, DXIL::OpCode::CutStream},
  4984. {IntrinsicOp::MOP_CalculateLevelOfDetail, TranslateCalculateLOD, DXIL::OpCode::NumOpCodes},
  4985. {IntrinsicOp::MOP_CalculateLevelOfDetailUnclamped, TranslateCalculateLOD, DXIL::OpCode::NumOpCodes},
  4986. {IntrinsicOp::MOP_GetDimensions, TranslateGetDimensions, DXIL::OpCode::NumOpCodes},
  4987. {IntrinsicOp::MOP_Load, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
  4988. {IntrinsicOp::MOP_Sample, TranslateSample, DXIL::OpCode::Sample},
  4989. {IntrinsicOp::MOP_SampleBias, TranslateSample, DXIL::OpCode::SampleBias},
  4990. {IntrinsicOp::MOP_SampleCmp, TranslateSample, DXIL::OpCode::SampleCmp},
  4991. {IntrinsicOp::MOP_SampleCmpLevelZero, TranslateSample, DXIL::OpCode::SampleCmpLevelZero},
  4992. {IntrinsicOp::MOP_SampleGrad, TranslateSample, DXIL::OpCode::SampleGrad},
  4993. {IntrinsicOp::MOP_SampleLevel, TranslateSample, DXIL::OpCode::SampleLevel},
  4994. {IntrinsicOp::MOP_Gather, TranslateGather, DXIL::OpCode::TextureGather},
  4995. {IntrinsicOp::MOP_GatherAlpha, TranslateGather, DXIL::OpCode::TextureGather},
  4996. {IntrinsicOp::MOP_GatherBlue, TranslateGather, DXIL::OpCode::TextureGather},
  4997. {IntrinsicOp::MOP_GatherCmp, TranslateGather, DXIL::OpCode::TextureGatherCmp},
  4998. {IntrinsicOp::MOP_GatherCmpAlpha, TranslateGather, DXIL::OpCode::TextureGatherCmp},
  4999. {IntrinsicOp::MOP_GatherCmpBlue, TranslateGather, DXIL::OpCode::TextureGatherCmp},
  5000. {IntrinsicOp::MOP_GatherCmpGreen, TranslateGather, DXIL::OpCode::TextureGatherCmp},
  5001. {IntrinsicOp::MOP_GatherCmpRed, TranslateGather, DXIL::OpCode::TextureGatherCmp},
  5002. {IntrinsicOp::MOP_GatherGreen, TranslateGather, DXIL::OpCode::TextureGather},
  5003. {IntrinsicOp::MOP_GatherRed, TranslateGather, DXIL::OpCode::TextureGather},
  5004. {IntrinsicOp::MOP_GetSamplePosition, TranslateGetSamplePosition, DXIL::OpCode::NumOpCodes},
  5005. {IntrinsicOp::MOP_Load2, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
  5006. {IntrinsicOp::MOP_Load3, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
  5007. {IntrinsicOp::MOP_Load4, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
  5008. {IntrinsicOp::MOP_InterlockedAdd, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  5009. {IntrinsicOp::MOP_InterlockedAdd64, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  5010. {IntrinsicOp::MOP_InterlockedAnd, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  5011. {IntrinsicOp::MOP_InterlockedAnd64, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  5012. {IntrinsicOp::MOP_InterlockedCompareExchange, TranslateMopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
  5013. {IntrinsicOp::MOP_InterlockedCompareExchange64, TranslateMopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
  5014. {IntrinsicOp::MOP_InterlockedCompareExchangeFloatBitwise, TranslateMopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
  5015. {IntrinsicOp::MOP_InterlockedCompareStore, TranslateMopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
  5016. {IntrinsicOp::MOP_InterlockedCompareStore64, TranslateMopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
  5017. {IntrinsicOp::MOP_InterlockedCompareStoreFloatBitwise, TranslateMopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
  5018. {IntrinsicOp::MOP_InterlockedExchange, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  5019. {IntrinsicOp::MOP_InterlockedExchange64, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  5020. {IntrinsicOp::MOP_InterlockedExchangeFloat, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  5021. {IntrinsicOp::MOP_InterlockedMax, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  5022. {IntrinsicOp::MOP_InterlockedMax64, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  5023. {IntrinsicOp::MOP_InterlockedMin, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  5024. {IntrinsicOp::MOP_InterlockedMin64, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  5025. {IntrinsicOp::MOP_InterlockedOr, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  5026. {IntrinsicOp::MOP_InterlockedOr64, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  5027. {IntrinsicOp::MOP_InterlockedXor, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  5028. {IntrinsicOp::MOP_InterlockedXor64, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  5029. {IntrinsicOp::MOP_Store, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
  5030. {IntrinsicOp::MOP_Store2, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
  5031. {IntrinsicOp::MOP_Store3, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
  5032. {IntrinsicOp::MOP_Store4, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
  5033. {IntrinsicOp::MOP_DecrementCounter, GenerateUpdateCounter, DXIL::OpCode::NumOpCodes},
  5034. {IntrinsicOp::MOP_IncrementCounter, GenerateUpdateCounter, DXIL::OpCode::NumOpCodes},
  5035. {IntrinsicOp::MOP_Consume, EmptyLower, DXIL::OpCode::NumOpCodes},
  5036. {IntrinsicOp::MOP_WriteSamplerFeedback, TranslateWriteSamplerFeedback, DXIL::OpCode::WriteSamplerFeedback},
  5037. {IntrinsicOp::MOP_WriteSamplerFeedbackBias, TranslateWriteSamplerFeedback, DXIL::OpCode::WriteSamplerFeedbackBias},
  5038. {IntrinsicOp::MOP_WriteSamplerFeedbackGrad, TranslateWriteSamplerFeedback, DXIL::OpCode::WriteSamplerFeedbackGrad},
  5039. {IntrinsicOp::MOP_WriteSamplerFeedbackLevel, TranslateWriteSamplerFeedback, DXIL::OpCode::WriteSamplerFeedbackLevel},
  5040. {IntrinsicOp::MOP_Abort, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_Abort},
  5041. {IntrinsicOp::MOP_CandidateGeometryIndex, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CandidateGeometryIndex},
  5042. {IntrinsicOp::MOP_CandidateInstanceContributionToHitGroupIndex, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CandidateInstanceContributionToHitGroupIndex},
  5043. {IntrinsicOp::MOP_CandidateInstanceID, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CandidateInstanceID},
  5044. {IntrinsicOp::MOP_CandidateInstanceIndex, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CandidateInstanceIndex},
  5045. {IntrinsicOp::MOP_CandidateObjectRayDirection, TranslateRayQueryFloat3Getter, DXIL::OpCode::RayQuery_CandidateObjectRayDirection},
  5046. {IntrinsicOp::MOP_CandidateObjectRayOrigin, TranslateRayQueryFloat3Getter, DXIL::OpCode::RayQuery_CandidateObjectRayOrigin},
  5047. {IntrinsicOp::MOP_CandidateObjectToWorld3x4, TranslateRayQueryMatrix3x4Operation, DXIL::OpCode::RayQuery_CandidateObjectToWorld3x4},
  5048. {IntrinsicOp::MOP_CandidateObjectToWorld4x3, TranslateRayQueryTransposedMatrix3x4Operation, DXIL::OpCode::RayQuery_CandidateObjectToWorld3x4},
  5049. {IntrinsicOp::MOP_CandidatePrimitiveIndex, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CandidatePrimitiveIndex},
  5050. {IntrinsicOp::MOP_CandidateProceduralPrimitiveNonOpaque, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CandidateProceduralPrimitiveNonOpaque},
  5051. {IntrinsicOp::MOP_CandidateTriangleBarycentrics, TranslateRayQueryFloat2Getter, DXIL::OpCode::RayQuery_CandidateTriangleBarycentrics},
  5052. {IntrinsicOp::MOP_CandidateTriangleFrontFace, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CandidateTriangleFrontFace},
  5053. {IntrinsicOp::MOP_CandidateTriangleRayT, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CandidateTriangleRayT},
  5054. {IntrinsicOp::MOP_CandidateType, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CandidateType},
  5055. {IntrinsicOp::MOP_CandidateWorldToObject3x4, TranslateRayQueryMatrix3x4Operation, DXIL::OpCode::RayQuery_CandidateWorldToObject3x4},
  5056. {IntrinsicOp::MOP_CandidateWorldToObject4x3, TranslateRayQueryTransposedMatrix3x4Operation, DXIL::OpCode::RayQuery_CandidateWorldToObject3x4},
  5057. {IntrinsicOp::MOP_CommitNonOpaqueTriangleHit, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CommitNonOpaqueTriangleHit},
  5058. {IntrinsicOp::MOP_CommitProceduralPrimitiveHit, TranslateCommitProceduralPrimitiveHit, DXIL::OpCode::RayQuery_CommitProceduralPrimitiveHit},
  5059. {IntrinsicOp::MOP_CommittedGeometryIndex, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CommittedGeometryIndex},
  5060. {IntrinsicOp::MOP_CommittedInstanceContributionToHitGroupIndex, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CommittedInstanceContributionToHitGroupIndex},
  5061. {IntrinsicOp::MOP_CommittedInstanceID, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CommittedInstanceID},
  5062. {IntrinsicOp::MOP_CommittedInstanceIndex, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CommittedInstanceIndex},
  5063. {IntrinsicOp::MOP_CommittedObjectRayDirection, TranslateRayQueryFloat3Getter, DXIL::OpCode::RayQuery_CommittedObjectRayDirection},
  5064. {IntrinsicOp::MOP_CommittedObjectRayOrigin, TranslateRayQueryFloat3Getter, DXIL::OpCode::RayQuery_CommittedObjectRayOrigin},
  5065. {IntrinsicOp::MOP_CommittedObjectToWorld3x4, TranslateRayQueryMatrix3x4Operation, DXIL::OpCode::RayQuery_CommittedObjectToWorld3x4},
  5066. {IntrinsicOp::MOP_CommittedObjectToWorld4x3, TranslateRayQueryTransposedMatrix3x4Operation, DXIL::OpCode::RayQuery_CommittedObjectToWorld3x4},
  5067. {IntrinsicOp::MOP_CommittedPrimitiveIndex, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CommittedPrimitiveIndex},
  5068. {IntrinsicOp::MOP_CommittedRayT, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CommittedRayT},
  5069. {IntrinsicOp::MOP_CommittedStatus, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CommittedStatus},
  5070. {IntrinsicOp::MOP_CommittedTriangleBarycentrics, TranslateRayQueryFloat2Getter, DXIL::OpCode::RayQuery_CommittedTriangleBarycentrics},
  5071. {IntrinsicOp::MOP_CommittedTriangleFrontFace, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_CommittedTriangleFrontFace},
  5072. {IntrinsicOp::MOP_CommittedWorldToObject3x4, TranslateRayQueryMatrix3x4Operation, DXIL::OpCode::RayQuery_CommittedWorldToObject3x4},
  5073. {IntrinsicOp::MOP_CommittedWorldToObject4x3, TranslateRayQueryTransposedMatrix3x4Operation, DXIL::OpCode::RayQuery_CommittedWorldToObject3x4},
  5074. {IntrinsicOp::MOP_Proceed, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_Proceed},
  5075. {IntrinsicOp::MOP_RayFlags, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_RayFlags},
  5076. {IntrinsicOp::MOP_RayTMin, TranslateGenericRayQueryMethod, DXIL::OpCode::RayQuery_RayTMin},
  5077. {IntrinsicOp::MOP_TraceRayInline, TranslateTraceRayInline, DXIL::OpCode::RayQuery_TraceRayInline},
  5078. {IntrinsicOp::MOP_WorldRayDirection, TranslateRayQueryFloat3Getter, DXIL::OpCode::RayQuery_WorldRayDirection},
  5079. {IntrinsicOp::MOP_WorldRayOrigin, TranslateRayQueryFloat3Getter, DXIL::OpCode::RayQuery_WorldRayOrigin},
  5080. // SPIRV change starts
  5081. #ifdef ENABLE_SPIRV_CODEGEN
  5082. {IntrinsicOp::MOP_SubpassLoad, UnsupportedVulkanIntrinsic, DXIL::OpCode::NumOpCodes},
  5083. #endif // ENABLE_SPIRV_CODEGEN
  5084. // SPIRV change ends
  5085. // Manully added part.
  5086. { IntrinsicOp::IOP_InterlockedUMax, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes },
  5087. { IntrinsicOp::IOP_InterlockedUMin, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes },
  5088. { IntrinsicOp::IOP_WaveActiveUMax, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp },
  5089. { IntrinsicOp::IOP_WaveActiveUMin, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp },
  5090. { IntrinsicOp::IOP_WaveActiveUProduct, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp },
  5091. { IntrinsicOp::IOP_WaveActiveUSum, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp },
  5092. { IntrinsicOp::IOP_WaveMultiPrefixUProduct, TranslateWaveMultiPrefix, DXIL::OpCode::WaveMultiPrefixOp },
  5093. { IntrinsicOp::IOP_WaveMultiPrefixUSum, TranslateWaveMultiPrefix, DXIL::OpCode::WaveMultiPrefixOp },
  5094. { IntrinsicOp::IOP_WavePrefixUProduct, TranslateWaveA2A, DXIL::OpCode::WavePrefixOp },
  5095. { IntrinsicOp::IOP_WavePrefixUSum, TranslateWaveA2A, DXIL::OpCode::WavePrefixOp },
  5096. { IntrinsicOp::IOP_uabs, TranslateUAbs, DXIL::OpCode::NumOpCodes },
  5097. { IntrinsicOp::IOP_uclamp, TranslateClamp, DXIL::OpCode::NumOpCodes },
  5098. { IntrinsicOp::IOP_ufirstbithigh, TranslateFirstbitHi, DXIL::OpCode::FirstbitHi },
  5099. { IntrinsicOp::IOP_umad, TranslateFUITrinary, DXIL::OpCode::UMad},
  5100. { IntrinsicOp::IOP_umax, TranslateFUIBinary, DXIL::OpCode::UMax},
  5101. { IntrinsicOp::IOP_umin, TranslateFUIBinary, DXIL::OpCode::UMin },
  5102. { IntrinsicOp::IOP_umul, TranslateMul, DXIL::OpCode::UMul },
  5103. { IntrinsicOp::IOP_usign, TranslateUSign, DXIL::OpCode::UMax },
  5104. { IntrinsicOp::MOP_InterlockedUMax, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes },
  5105. { IntrinsicOp::MOP_InterlockedUMin, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes },
  5106. };
  5107. }
  5108. static_assert(sizeof(gLowerTable) / sizeof(gLowerTable[0]) == static_cast<size_t>(IntrinsicOp::Num_Intrinsics),
  5109. "Intrinsic lowering table must be updated to account for new intrinsics.");
  5110. static void TranslateBuiltinIntrinsic(CallInst *CI,
  5111. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  5112. unsigned opcode = hlsl::GetHLOpcode(CI);
  5113. const IntrinsicLower &lower = gLowerTable[opcode];
  5114. Value *Result =
  5115. lower.LowerFunc(CI, lower.IntriOpcode, lower.DxilOpcode, helper, pObjHelper, Translated);
  5116. if (Result)
  5117. CI->replaceAllUsesWith(Result);
  5118. }
  5119. // SharedMem.
  5120. namespace {
  5121. bool IsSharedMemPtr(Value *Ptr) {
  5122. return Ptr->getType()->getPointerAddressSpace() == DXIL::kTGSMAddrSpace;
  5123. }
  5124. bool IsLocalVariablePtr(Value *Ptr) {
  5125. while (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
  5126. Ptr = GEP->getPointerOperand();
  5127. }
  5128. bool isAlloca = isa<AllocaInst>(Ptr);
  5129. if (isAlloca) return true;
  5130. GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr);
  5131. if (!GV) return false;
  5132. return GV->getLinkage() == GlobalValue::LinkageTypes::InternalLinkage;
  5133. }
  5134. }
  5135. // Constant buffer.
  5136. namespace {
  5137. unsigned GetEltTypeByteSizeForConstBuf(Type *EltType, const DataLayout &DL) {
  5138. DXASSERT(EltType->isIntegerTy() || EltType->isFloatingPointTy(),
  5139. "not an element type");
  5140. // TODO: Use real size after change constant buffer into linear layout.
  5141. if (DL.getTypeSizeInBits(EltType) <= 32) {
  5142. // Constant buffer is 4 bytes align.
  5143. return 4;
  5144. } else
  5145. return 8;
  5146. }
  5147. Value *GenerateCBLoad(Value *handle, Value *offset, Type *EltTy, OP *hlslOP,
  5148. IRBuilder<> &Builder) {
  5149. Constant *OpArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CBufferLoad);
  5150. DXASSERT(!EltTy->isIntegerTy(1), "Bools should not be loaded as their register representation.");
  5151. // Align to 8 bytes for now.
  5152. Constant *align = hlslOP->GetU32Const(8);
  5153. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoad, EltTy);
  5154. return Builder.CreateCall(CBLoad, {OpArg, handle, offset, align});
  5155. }
  5156. Value *TranslateConstBufMatLd(Type *matType, Value *handle, Value *offset,
  5157. bool colMajor, OP *OP, const DataLayout &DL,
  5158. IRBuilder<> &Builder) {
  5159. HLMatrixType MatTy = HLMatrixType::cast(matType);
  5160. Type *EltTy = MatTy.getElementTypeForMem();
  5161. unsigned matSize = MatTy.getNumElements();
  5162. std::vector<Value *> elts(matSize);
  5163. Value *EltByteSize = ConstantInt::get(
  5164. offset->getType(), GetEltTypeByteSizeForConstBuf(EltTy, DL));
  5165. // TODO: use real size after change constant buffer into linear layout.
  5166. Value *baseOffset = offset;
  5167. for (unsigned i = 0; i < matSize; i++) {
  5168. elts[i] = GenerateCBLoad(handle, baseOffset, EltTy, OP, Builder);
  5169. baseOffset = Builder.CreateAdd(baseOffset, EltByteSize);
  5170. }
  5171. Value* Vec = HLMatrixLower::BuildVector(EltTy, elts, Builder);
  5172. Vec = MatTy.emitLoweredMemToReg(Vec, Builder);
  5173. return Vec;
  5174. }
  5175. void TranslateCBGep(GetElementPtrInst *GEP, Value *handle, Value *baseOffset,
  5176. hlsl::OP *hlslOP, IRBuilder<> &Builder,
  5177. DxilFieldAnnotation *prevFieldAnnotation,
  5178. const DataLayout &DL, DxilTypeSystem &dxilTypeSys,
  5179. HLObjectOperationLowerHelper *pObjHelper);
  5180. Value *GenerateVecEltFromGEP(Value *ldData, GetElementPtrInst *GEP,
  5181. IRBuilder<> &Builder, bool bInsertLdNextToGEP) {
  5182. DXASSERT(GEP->getNumIndices() == 2, "must have 2 level");
  5183. Value *baseIdx = (GEP->idx_begin())->get();
  5184. Value *zeroIdx = Builder.getInt32(0);
  5185. DXASSERT_LOCALVAR(baseIdx && zeroIdx, baseIdx == zeroIdx,
  5186. "base index must be 0");
  5187. Value *idx = (GEP->idx_begin() + 1)->get();
  5188. if (dyn_cast<ConstantInt>(idx)) {
  5189. return Builder.CreateExtractElement(ldData, idx);
  5190. } else {
  5191. // Dynamic indexing.
  5192. // Copy vec to array.
  5193. Type *Ty = ldData->getType();
  5194. Type *EltTy = Ty->getVectorElementType();
  5195. unsigned vecSize = Ty->getVectorNumElements();
  5196. ArrayType *AT = ArrayType::get(EltTy, vecSize);
  5197. IRBuilder<> AllocaBuilder(
  5198. GEP->getParent()->getParent()->getEntryBlock().getFirstInsertionPt());
  5199. Value *tempArray = AllocaBuilder.CreateAlloca(AT);
  5200. Value *zero = Builder.getInt32(0);
  5201. for (unsigned int i = 0; i < vecSize; i++) {
  5202. Value *Elt = Builder.CreateExtractElement(ldData, Builder.getInt32(i));
  5203. Value *Ptr =
  5204. Builder.CreateInBoundsGEP(tempArray, {zero, Builder.getInt32(i)});
  5205. Builder.CreateStore(Elt, Ptr);
  5206. }
  5207. // Load from temp array.
  5208. if (bInsertLdNextToGEP) {
  5209. // Insert the new GEP just before the old and to-be-deleted GEP
  5210. Builder.SetInsertPoint(GEP);
  5211. }
  5212. Value *EltGEP = Builder.CreateInBoundsGEP(tempArray, {zero, idx});
  5213. return Builder.CreateLoad(EltGEP);
  5214. }
  5215. }
  5216. void TranslateResourceInCB(LoadInst *LI,
  5217. HLObjectOperationLowerHelper *pObjHelper,
  5218. GlobalVariable *CbGV) {
  5219. if (LI->user_empty()) {
  5220. LI->eraseFromParent();
  5221. return;
  5222. }
  5223. GetElementPtrInst *Ptr = cast<GetElementPtrInst>(LI->getPointerOperand());
  5224. CallInst *CI = cast<CallInst>(LI->user_back());
  5225. CallInst *Anno = cast<CallInst>(CI->user_back());
  5226. DxilResourceProperties RP = pObjHelper->GetResPropsFromAnnotateHandle(Anno);
  5227. Value *ResPtr = pObjHelper->GetOrCreateResourceForCbPtr(Ptr, CbGV, RP);
  5228. // Lower Ptr to GV base Ptr.
  5229. Value *GvPtr = pObjHelper->LowerCbResourcePtr(Ptr, ResPtr);
  5230. IRBuilder<> Builder(LI);
  5231. Value *GvLd = Builder.CreateLoad(GvPtr);
  5232. LI->replaceAllUsesWith(GvLd);
  5233. LI->eraseFromParent();
  5234. }
  5235. void TranslateCBAddressUser(Instruction *user, Value *handle, Value *baseOffset,
  5236. hlsl::OP *hlslOP,
  5237. DxilFieldAnnotation *prevFieldAnnotation,
  5238. DxilTypeSystem &dxilTypeSys, const DataLayout &DL,
  5239. HLObjectOperationLowerHelper *pObjHelper) {
  5240. IRBuilder<> Builder(user);
  5241. if (CallInst *CI = dyn_cast<CallInst>(user)) {
  5242. HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
  5243. unsigned opcode = GetHLOpcode(CI);
  5244. if (group == HLOpcodeGroup::HLMatLoadStore) {
  5245. HLMatLoadStoreOpcode matOp = static_cast<HLMatLoadStoreOpcode>(opcode);
  5246. bool colMajor = matOp == HLMatLoadStoreOpcode::ColMatLoad;
  5247. DXASSERT(matOp == HLMatLoadStoreOpcode::ColMatLoad ||
  5248. matOp == HLMatLoadStoreOpcode::RowMatLoad,
  5249. "No store on cbuffer");
  5250. Type *matType = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx)
  5251. ->getType()
  5252. ->getPointerElementType();
  5253. Value *newLd = TranslateConstBufMatLd(matType, handle, baseOffset,
  5254. colMajor, hlslOP, DL, Builder);
  5255. CI->replaceAllUsesWith(newLd);
  5256. CI->eraseFromParent();
  5257. } else if (group == HLOpcodeGroup::HLSubscript) {
  5258. HLSubscriptOpcode subOp = static_cast<HLSubscriptOpcode>(opcode);
  5259. Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx);
  5260. HLMatrixType MatTy = HLMatrixType::cast(basePtr->getType()->getPointerElementType());
  5261. Type *EltTy = MatTy.getElementTypeForReg();
  5262. Value *EltByteSize = ConstantInt::get(
  5263. baseOffset->getType(), GetEltTypeByteSizeForConstBuf(EltTy, DL));
  5264. Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx);
  5265. Type *resultType = CI->getType()->getPointerElementType();
  5266. unsigned resultSize = 1;
  5267. if (resultType->isVectorTy())
  5268. resultSize = resultType->getVectorNumElements();
  5269. DXASSERT(resultSize <= 16, "up to 4x4 elements in vector or matrix");
  5270. _Analysis_assume_(resultSize <= 16);
  5271. Value *idxList[16];
  5272. switch (subOp) {
  5273. case HLSubscriptOpcode::ColMatSubscript:
  5274. case HLSubscriptOpcode::RowMatSubscript: {
  5275. for (unsigned i = 0; i < resultSize; i++) {
  5276. Value *idx =
  5277. CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx + i);
  5278. Value *offset = Builder.CreateMul(idx, EltByteSize);
  5279. idxList[i] = Builder.CreateAdd(baseOffset, offset);
  5280. }
  5281. } break;
  5282. case HLSubscriptOpcode::RowMatElement:
  5283. case HLSubscriptOpcode::ColMatElement: {
  5284. Constant *EltIdxs = cast<Constant>(idx);
  5285. for (unsigned i = 0; i < resultSize; i++) {
  5286. Value *offset =
  5287. Builder.CreateMul(EltIdxs->getAggregateElement(i), EltByteSize);
  5288. idxList[i] = Builder.CreateAdd(baseOffset, offset);
  5289. }
  5290. } break;
  5291. default:
  5292. DXASSERT(0, "invalid operation on const buffer");
  5293. break;
  5294. }
  5295. Value *ldData = UndefValue::get(resultType);
  5296. if (resultType->isVectorTy()) {
  5297. for (unsigned i = 0; i < resultSize; i++) {
  5298. Value *eltData =
  5299. GenerateCBLoad(handle, idxList[i], EltTy, hlslOP, Builder);
  5300. ldData = Builder.CreateInsertElement(ldData, eltData, i);
  5301. }
  5302. } else {
  5303. ldData = GenerateCBLoad(handle, idxList[0], EltTy, hlslOP, Builder);
  5304. }
  5305. for (auto U = CI->user_begin(); U != CI->user_end();) {
  5306. Value *subsUser = *(U++);
  5307. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(subsUser)) {
  5308. Value *subData = GenerateVecEltFromGEP(ldData, GEP, Builder,
  5309. /*bInsertLdNextToGEP*/ true);
  5310. for (auto gepU = GEP->user_begin(); gepU != GEP->user_end();) {
  5311. Value *gepUser = *(gepU++);
  5312. // Must be load here;
  5313. LoadInst *ldUser = cast<LoadInst>(gepUser);
  5314. ldUser->replaceAllUsesWith(subData);
  5315. ldUser->eraseFromParent();
  5316. }
  5317. GEP->eraseFromParent();
  5318. } else {
  5319. // Must be load here.
  5320. LoadInst *ldUser = cast<LoadInst>(subsUser);
  5321. ldUser->replaceAllUsesWith(ldData);
  5322. ldUser->eraseFromParent();
  5323. }
  5324. }
  5325. CI->eraseFromParent();
  5326. } else {
  5327. DXASSERT(0, "not implemented yet");
  5328. }
  5329. } else if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) {
  5330. Type *Ty = ldInst->getType();
  5331. Type *EltTy = Ty->getScalarType();
  5332. // Resource inside cbuffer is lowered after GenerateDxilOperations.
  5333. if (dxilutil::IsHLSLObjectType(Ty)) {
  5334. CallInst *CI = cast<CallInst>(handle);
  5335. // CI should be annotate handle.
  5336. // Need createHandle here.
  5337. if (GetHLOpcodeGroup(CI->getCalledFunction()) == HLOpcodeGroup::HLAnnotateHandle)
  5338. CI = cast<CallInst>(CI->getArgOperand(HLOperandIndex::kAnnotateHandleHandleOpIdx));
  5339. GlobalVariable *CbGV = cast<GlobalVariable>(
  5340. CI->getArgOperand(HLOperandIndex::kCreateHandleResourceOpIdx));
  5341. TranslateResourceInCB(ldInst, pObjHelper, CbGV);
  5342. return;
  5343. }
  5344. DXASSERT(!Ty->isAggregateType(), "should be flat in previous pass");
  5345. unsigned EltByteSize = GetEltTypeByteSizeForConstBuf(EltTy, DL);
  5346. Value *newLd = GenerateCBLoad(handle, baseOffset, EltTy, hlslOP, Builder);
  5347. if (Ty->isVectorTy()) {
  5348. Value *result = UndefValue::get(Ty);
  5349. result = Builder.CreateInsertElement(result, newLd, (uint64_t)0);
  5350. // Update offset by 4 bytes.
  5351. Value *offset =
  5352. Builder.CreateAdd(baseOffset, hlslOP->GetU32Const(EltByteSize));
  5353. for (unsigned i = 1; i < Ty->getVectorNumElements(); i++) {
  5354. Value *elt = GenerateCBLoad(handle, offset, EltTy, hlslOP, Builder);
  5355. result = Builder.CreateInsertElement(result, elt, i);
  5356. // Update offset by 4 bytes.
  5357. offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(EltByteSize));
  5358. }
  5359. newLd = result;
  5360. }
  5361. ldInst->replaceAllUsesWith(newLd);
  5362. ldInst->eraseFromParent();
  5363. } else {
  5364. // Must be GEP here
  5365. GetElementPtrInst *GEP = cast<GetElementPtrInst>(user);
  5366. TranslateCBGep(GEP, handle, baseOffset, hlslOP, Builder,
  5367. prevFieldAnnotation, DL, dxilTypeSys, pObjHelper);
  5368. GEP->eraseFromParent();
  5369. }
  5370. }
  5371. void TranslateCBGep(GetElementPtrInst *GEP, Value *handle, Value *baseOffset,
  5372. hlsl::OP *hlslOP, IRBuilder<> &Builder,
  5373. DxilFieldAnnotation *prevFieldAnnotation,
  5374. const DataLayout &DL, DxilTypeSystem &dxilTypeSys,
  5375. HLObjectOperationLowerHelper *pObjHelper) {
  5376. SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end());
  5377. Value *offset = baseOffset;
  5378. // update offset
  5379. DxilFieldAnnotation *fieldAnnotation = prevFieldAnnotation;
  5380. gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP);
  5381. for (; GEPIt != E; GEPIt++) {
  5382. Value *idx = GEPIt.getOperand();
  5383. unsigned immIdx = 0;
  5384. bool bImmIdx = false;
  5385. if (Constant *constIdx = dyn_cast<Constant>(idx)) {
  5386. immIdx = constIdx->getUniqueInteger().getLimitedValue();
  5387. bImmIdx = true;
  5388. }
  5389. if (GEPIt->isPointerTy()) {
  5390. Type *EltTy = GEPIt->getPointerElementType();
  5391. unsigned size = 0;
  5392. if (StructType *ST = dyn_cast<StructType>(EltTy)) {
  5393. DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST);
  5394. size = annotation->GetCBufferSize();
  5395. } else {
  5396. DXASSERT(fieldAnnotation, "must be a field");
  5397. if (ArrayType *AT = dyn_cast<ArrayType>(EltTy)) {
  5398. unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize(
  5399. *fieldAnnotation, EltTy, dxilTypeSys);
  5400. // Decide the nested array size.
  5401. unsigned nestedArraySize = 1;
  5402. Type *EltTy = AT->getArrayElementType();
  5403. // support multi level of array
  5404. while (EltTy->isArrayTy()) {
  5405. ArrayType *EltAT = cast<ArrayType>(EltTy);
  5406. nestedArraySize *= EltAT->getNumElements();
  5407. EltTy = EltAT->getElementType();
  5408. }
  5409. // Align to 4 * 4 bytes.
  5410. unsigned alignedSize = (EltSize + 15) & 0xfffffff0;
  5411. size = nestedArraySize * alignedSize;
  5412. } else {
  5413. size = DL.getTypeAllocSize(EltTy);
  5414. }
  5415. }
  5416. // Align to 4 * 4 bytes.
  5417. size = (size + 15) & 0xfffffff0;
  5418. if (bImmIdx) {
  5419. unsigned tempOffset = size * immIdx;
  5420. offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(tempOffset));
  5421. } else {
  5422. Value *tempOffset = Builder.CreateMul(idx, hlslOP->GetU32Const(size));
  5423. offset = Builder.CreateAdd(offset, tempOffset);
  5424. }
  5425. } else if (GEPIt->isStructTy()) {
  5426. StructType *ST = cast<StructType>(*GEPIt);
  5427. DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST);
  5428. fieldAnnotation = &annotation->GetFieldAnnotation(immIdx);
  5429. unsigned structOffset = fieldAnnotation->GetCBufferOffset();
  5430. offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(structOffset));
  5431. } else if (GEPIt->isArrayTy()) {
  5432. DXASSERT(fieldAnnotation != nullptr, "must a field");
  5433. unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize(
  5434. *fieldAnnotation, *GEPIt, dxilTypeSys);
  5435. // Decide the nested array size.
  5436. unsigned nestedArraySize = 1;
  5437. Type *EltTy = GEPIt->getArrayElementType();
  5438. // support multi level of array
  5439. while (EltTy->isArrayTy()) {
  5440. ArrayType *EltAT = cast<ArrayType>(EltTy);
  5441. nestedArraySize *= EltAT->getNumElements();
  5442. EltTy = EltAT->getElementType();
  5443. }
  5444. // Align to 4 * 4 bytes.
  5445. unsigned alignedSize = (EltSize + 15) & 0xfffffff0;
  5446. unsigned size = nestedArraySize * alignedSize;
  5447. if (bImmIdx) {
  5448. unsigned tempOffset = size * immIdx;
  5449. offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(tempOffset));
  5450. } else {
  5451. Value *tempOffset = Builder.CreateMul(idx, hlslOP->GetU32Const(size));
  5452. offset = Builder.CreateAdd(offset, tempOffset);
  5453. }
  5454. } else if (GEPIt->isVectorTy()) {
  5455. unsigned size = DL.getTypeAllocSize(GEPIt->getVectorElementType());
  5456. if (bImmIdx) {
  5457. unsigned tempOffset = size * immIdx;
  5458. offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(tempOffset));
  5459. } else {
  5460. Value *tempOffset = Builder.CreateMul(idx, hlslOP->GetU32Const(size));
  5461. offset = Builder.CreateAdd(offset, tempOffset);
  5462. }
  5463. } else {
  5464. gep_type_iterator temp = GEPIt;
  5465. temp++;
  5466. DXASSERT(temp == E, "scalar type must be the last");
  5467. }
  5468. }
  5469. for (auto U = GEP->user_begin(); U != GEP->user_end();) {
  5470. Instruction *user = cast<Instruction>(*(U++));
  5471. TranslateCBAddressUser(user, handle, offset, hlslOP, fieldAnnotation,
  5472. dxilTypeSys, DL, pObjHelper);
  5473. }
  5474. }
  5475. void TranslateCBOperations(Value *handle, Value *ptr, Value *offset, OP *hlslOP,
  5476. DxilTypeSystem &dxilTypeSys, const DataLayout &DL,
  5477. HLObjectOperationLowerHelper *pObjHelper) {
  5478. auto User = ptr->user_begin();
  5479. auto UserE = ptr->user_end();
  5480. for (; User != UserE;) {
  5481. // Must be Instruction.
  5482. Instruction *I = cast<Instruction>(*(User++));
  5483. TranslateCBAddressUser(I, handle, offset, hlslOP,
  5484. /*prevFieldAnnotation*/ nullptr, dxilTypeSys, DL,
  5485. pObjHelper);
  5486. }
  5487. }
  5488. Value *GenerateCBLoadLegacy(Value *handle, Value *legacyIdx,
  5489. unsigned channelOffset, Type *EltTy, OP *hlslOP,
  5490. IRBuilder<> &Builder) {
  5491. Constant *OpArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CBufferLoadLegacy);
  5492. DXASSERT(!EltTy->isIntegerTy(1), "Bools should not be loaded as their register representation.");
  5493. Type *doubleTy = Type::getDoubleTy(EltTy->getContext());
  5494. Type *halfTy = Type::getHalfTy(EltTy->getContext());
  5495. Type *i64Ty = Type::getInt64Ty(EltTy->getContext());
  5496. Type *i16Ty = Type::getInt16Ty(EltTy->getContext());
  5497. bool is64 = (EltTy == doubleTy) | (EltTy == i64Ty);
  5498. bool is16 = (EltTy == halfTy || EltTy == i16Ty) && !hlslOP->UseMinPrecision();
  5499. DXASSERT_LOCALVAR(is16, (is16 && channelOffset < 8) || channelOffset < 4,
  5500. "legacy cbuffer don't across 16 bytes register.");
  5501. if (is64) {
  5502. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
  5503. Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
  5504. DXASSERT((channelOffset&1)==0,"channel offset must be even for double");
  5505. unsigned eltIdx = channelOffset>>1;
  5506. Value *Result = Builder.CreateExtractValue(loadLegacy, eltIdx);
  5507. return Result;
  5508. } else {
  5509. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
  5510. Value *loadLegacy = Builder.CreateCall(CBLoad, { OpArg, handle, legacyIdx });
  5511. return Builder.CreateExtractValue(loadLegacy, channelOffset);
  5512. }
  5513. }
  5514. Value *GenerateCBLoadLegacy(Value *handle, Value *legacyIdx,
  5515. unsigned channelOffset, Type *EltTy,
  5516. unsigned vecSize, OP *hlslOP,
  5517. IRBuilder<> &Builder) {
  5518. Constant *OpArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CBufferLoadLegacy);
  5519. DXASSERT(!EltTy->isIntegerTy(1), "Bools should not be loaded as their register representation.");
  5520. Type *doubleTy = Type::getDoubleTy(EltTy->getContext());
  5521. Type *i64Ty = Type::getInt64Ty(EltTy->getContext());
  5522. Type *halfTy = Type::getHalfTy(EltTy->getContext());
  5523. Type *shortTy = Type::getInt16Ty(EltTy->getContext());
  5524. bool is64 = (EltTy == doubleTy) | (EltTy == i64Ty);
  5525. bool is16 = (EltTy == shortTy || EltTy == halfTy) && !hlslOP->UseMinPrecision();
  5526. DXASSERT((is16 && channelOffset + vecSize <= 8) ||
  5527. (channelOffset + vecSize) <= 4,
  5528. "legacy cbuffer don't across 16 bytes register.");
  5529. if (is16) {
  5530. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
  5531. Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
  5532. Value *Result = UndefValue::get(VectorType::get(EltTy, vecSize));
  5533. for (unsigned i = 0; i < vecSize; ++i) {
  5534. Value *NewElt = Builder.CreateExtractValue(loadLegacy, channelOffset + i);
  5535. Result = Builder.CreateInsertElement(Result, NewElt, i);
  5536. }
  5537. return Result;
  5538. } else if (is64) {
  5539. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
  5540. Value *loadLegacy = Builder.CreateCall(CBLoad, { OpArg, handle, legacyIdx });
  5541. Value *Result = UndefValue::get(VectorType::get(EltTy, vecSize));
  5542. unsigned smallVecSize = 2;
  5543. if (vecSize < smallVecSize)
  5544. smallVecSize = vecSize;
  5545. for (unsigned i = 0; i < smallVecSize; ++i) {
  5546. Value *NewElt = Builder.CreateExtractValue(loadLegacy, channelOffset+i);
  5547. Result = Builder.CreateInsertElement(Result, NewElt, i);
  5548. }
  5549. if (vecSize > 2) {
  5550. // Got to next cb register.
  5551. legacyIdx = Builder.CreateAdd(legacyIdx, hlslOP->GetU32Const(1));
  5552. Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
  5553. for (unsigned i = 2; i < vecSize; ++i) {
  5554. Value *NewElt =
  5555. Builder.CreateExtractValue(loadLegacy, i-2);
  5556. Result = Builder.CreateInsertElement(Result, NewElt, i);
  5557. }
  5558. }
  5559. return Result;
  5560. } else {
  5561. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
  5562. Value *loadLegacy = Builder.CreateCall(CBLoad, { OpArg, handle, legacyIdx });
  5563. Value *Result = UndefValue::get(VectorType::get(EltTy, vecSize));
  5564. for (unsigned i = 0; i < vecSize; ++i) {
  5565. Value *NewElt = Builder.CreateExtractValue(loadLegacy, channelOffset + i);
  5566. Result = Builder.CreateInsertElement(Result, NewElt, i);
  5567. }
  5568. return Result;
  5569. }
  5570. }
  5571. Value *TranslateConstBufMatLdLegacy(HLMatrixType MatTy, Value *handle,
  5572. Value *legacyIdx, bool colMajor, OP *OP,
  5573. bool memElemRepr, const DataLayout &DL,
  5574. IRBuilder<> &Builder) {
  5575. Type *EltTy = MatTy.getElementTypeForMem();
  5576. unsigned matSize = MatTy.getNumElements();
  5577. std::vector<Value *> elts(matSize);
  5578. unsigned EltByteSize = GetEltTypeByteSizeForConstBuf(EltTy, DL);
  5579. if (colMajor) {
  5580. unsigned colByteSize = 4 * EltByteSize;
  5581. unsigned colRegSize = (colByteSize + 15) >> 4;
  5582. for (unsigned c = 0; c < MatTy.getNumColumns(); c++) {
  5583. Value *col = GenerateCBLoadLegacy(handle, legacyIdx, /*channelOffset*/ 0,
  5584. EltTy, MatTy.getNumRows(), OP, Builder);
  5585. for (unsigned r = 0; r < MatTy.getNumRows(); r++) {
  5586. unsigned matIdx = MatTy.getColumnMajorIndex(r, c);
  5587. elts[matIdx] = Builder.CreateExtractElement(col, r);
  5588. }
  5589. // Update offset for a column.
  5590. legacyIdx = Builder.CreateAdd(legacyIdx, OP->GetU32Const(colRegSize));
  5591. }
  5592. } else {
  5593. unsigned rowByteSize = 4 * EltByteSize;
  5594. unsigned rowRegSize = (rowByteSize + 15) >> 4;
  5595. for (unsigned r = 0; r < MatTy.getNumRows(); r++) {
  5596. Value *row = GenerateCBLoadLegacy(handle, legacyIdx, /*channelOffset*/ 0,
  5597. EltTy, MatTy.getNumColumns(), OP, Builder);
  5598. for (unsigned c = 0; c < MatTy.getNumColumns(); c++) {
  5599. unsigned matIdx = MatTy.getRowMajorIndex(r, c);
  5600. elts[matIdx] = Builder.CreateExtractElement(row, c);
  5601. }
  5602. // Update offset for a row.
  5603. legacyIdx = Builder.CreateAdd(legacyIdx, OP->GetU32Const(rowRegSize));
  5604. }
  5605. }
  5606. Value *Vec = HLMatrixLower::BuildVector(EltTy, elts, Builder);
  5607. if (!memElemRepr)
  5608. Vec = MatTy.emitLoweredMemToReg(Vec, Builder);
  5609. return Vec;
  5610. }
  5611. void TranslateCBGepLegacy(GetElementPtrInst *GEP, Value *handle,
  5612. Value *legacyIdx, unsigned channelOffset,
  5613. hlsl::OP *hlslOP, IRBuilder<> &Builder,
  5614. DxilFieldAnnotation *prevFieldAnnotation,
  5615. const DataLayout &DL, DxilTypeSystem &dxilTypeSys,
  5616. HLObjectOperationLowerHelper *pObjHelper);
  5617. void TranslateCBAddressUserLegacy(Instruction *user, Value *handle,
  5618. Value *legacyIdx, unsigned channelOffset,
  5619. hlsl::OP *hlslOP,
  5620. DxilFieldAnnotation *prevFieldAnnotation,
  5621. DxilTypeSystem &dxilTypeSys,
  5622. const DataLayout &DL,
  5623. HLObjectOperationLowerHelper *pObjHelper) {
  5624. IRBuilder<> Builder(user);
  5625. if (CallInst *CI = dyn_cast<CallInst>(user)) {
  5626. HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
  5627. if (group == HLOpcodeGroup::HLMatLoadStore) {
  5628. unsigned opcode = GetHLOpcode(CI);
  5629. HLMatLoadStoreOpcode matOp = static_cast<HLMatLoadStoreOpcode>(opcode);
  5630. bool colMajor = matOp == HLMatLoadStoreOpcode::ColMatLoad;
  5631. DXASSERT(matOp == HLMatLoadStoreOpcode::ColMatLoad ||
  5632. matOp == HLMatLoadStoreOpcode::RowMatLoad,
  5633. "No store on cbuffer");
  5634. HLMatrixType MatTy = HLMatrixType::cast(
  5635. CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx)
  5636. ->getType()->getPointerElementType());
  5637. // This will replace a call, so we should use the register representation of elements
  5638. Value *newLd = TranslateConstBufMatLdLegacy(
  5639. MatTy, handle, legacyIdx, colMajor, hlslOP, /*memElemRepr*/false, DL, Builder);
  5640. CI->replaceAllUsesWith(newLd);
  5641. dxilutil::TryScatterDebugValueToVectorElements(newLd);
  5642. CI->eraseFromParent();
  5643. } else if (group == HLOpcodeGroup::HLSubscript) {
  5644. unsigned opcode = GetHLOpcode(CI);
  5645. HLSubscriptOpcode subOp = static_cast<HLSubscriptOpcode>(opcode);
  5646. Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx);
  5647. HLMatrixType MatTy = HLMatrixType::cast(basePtr->getType()->getPointerElementType());
  5648. Type *EltTy = MatTy.getElementTypeForReg();
  5649. Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx);
  5650. Type *resultType = CI->getType()->getPointerElementType();
  5651. unsigned resultSize = 1;
  5652. if (resultType->isVectorTy())
  5653. resultSize = resultType->getVectorNumElements();
  5654. DXASSERT(resultSize <= 16, "up to 4x4 elements in vector or matrix");
  5655. _Analysis_assume_(resultSize <= 16);
  5656. Value *idxList[16];
  5657. bool colMajor = subOp == HLSubscriptOpcode::ColMatSubscript ||
  5658. subOp == HLSubscriptOpcode::ColMatElement;
  5659. bool dynamicIndexing = !isa<ConstantInt>(idx) &&
  5660. !isa<ConstantAggregateZero>(idx) &&
  5661. !isa<ConstantDataSequential>(idx);
  5662. Value *ldData = UndefValue::get(resultType);
  5663. if (!dynamicIndexing) {
  5664. // This will replace a load or GEP, so we should use the memory representation of elements
  5665. Value *matLd = TranslateConstBufMatLdLegacy(
  5666. MatTy, handle, legacyIdx, colMajor, hlslOP, /*memElemRepr*/true, DL, Builder);
  5667. // The matLd is keep original layout, just use the idx calc in
  5668. // EmitHLSLMatrixElement and EmitHLSLMatrixSubscript.
  5669. switch (subOp) {
  5670. case HLSubscriptOpcode::RowMatSubscript:
  5671. case HLSubscriptOpcode::ColMatSubscript: {
  5672. for (unsigned i = 0; i < resultSize; i++) {
  5673. idxList[i] =
  5674. CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx + i);
  5675. }
  5676. } break;
  5677. case HLSubscriptOpcode::RowMatElement:
  5678. case HLSubscriptOpcode::ColMatElement: {
  5679. Constant *EltIdxs = cast<Constant>(idx);
  5680. for (unsigned i = 0; i < resultSize; i++) {
  5681. idxList[i] = EltIdxs->getAggregateElement(i);
  5682. }
  5683. } break;
  5684. default:
  5685. DXASSERT(0, "invalid operation on const buffer");
  5686. break;
  5687. }
  5688. if (resultType->isVectorTy()) {
  5689. for (unsigned i = 0; i < resultSize; i++) {
  5690. Value *eltData = Builder.CreateExtractElement(matLd, idxList[i]);
  5691. ldData = Builder.CreateInsertElement(ldData, eltData, i);
  5692. }
  5693. } else {
  5694. Value *eltData = Builder.CreateExtractElement(matLd, idxList[0]);
  5695. ldData = eltData;
  5696. }
  5697. } else {
  5698. // Must be matSub here.
  5699. Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx);
  5700. if (colMajor) {
  5701. // idx is c * row + r.
  5702. // For first col, c is 0, so idx is r.
  5703. Value *one = Builder.getInt32(1);
  5704. // row.x = c[0].[idx]
  5705. // row.y = c[1].[idx]
  5706. // row.z = c[2].[idx]
  5707. // row.w = c[3].[idx]
  5708. Value *Elts[4];
  5709. ArrayType *AT = ArrayType::get(EltTy, MatTy.getNumColumns());
  5710. IRBuilder<> AllocaBuilder(user->getParent()
  5711. ->getParent()
  5712. ->getEntryBlock()
  5713. .getFirstInsertionPt());
  5714. Value *tempArray = AllocaBuilder.CreateAlloca(AT);
  5715. Value *zero = AllocaBuilder.getInt32(0);
  5716. Value *cbufIdx = legacyIdx;
  5717. for (unsigned int c = 0; c < MatTy.getNumColumns(); c++) {
  5718. Value *ColVal =
  5719. GenerateCBLoadLegacy(handle, cbufIdx, /*channelOffset*/ 0,
  5720. EltTy, MatTy.getNumRows(), hlslOP, Builder);
  5721. // Convert ColVal to array for indexing.
  5722. for (unsigned int r = 0; r < MatTy.getNumRows(); r++) {
  5723. Value *Elt =
  5724. Builder.CreateExtractElement(ColVal, Builder.getInt32(r));
  5725. Value *Ptr = Builder.CreateInBoundsGEP(
  5726. tempArray, {zero, Builder.getInt32(r)});
  5727. Builder.CreateStore(Elt, Ptr);
  5728. }
  5729. Value *Ptr = Builder.CreateInBoundsGEP(tempArray, {zero, idx});
  5730. Elts[c] = Builder.CreateLoad(Ptr);
  5731. // Update cbufIdx.
  5732. cbufIdx = Builder.CreateAdd(cbufIdx, one);
  5733. }
  5734. if (resultType->isVectorTy()) {
  5735. for (unsigned int c = 0; c < MatTy.getNumColumns(); c++) {
  5736. ldData = Builder.CreateInsertElement(ldData, Elts[c], c);
  5737. }
  5738. } else {
  5739. ldData = Elts[0];
  5740. }
  5741. } else {
  5742. // idx is r * col + c;
  5743. // r = idx / col;
  5744. Value *cCol = ConstantInt::get(idx->getType(), MatTy.getNumColumns());
  5745. idx = Builder.CreateUDiv(idx, cCol);
  5746. idx = Builder.CreateAdd(idx, legacyIdx);
  5747. // Just return a row; 'col' is the number of columns in the row.
  5748. ldData = GenerateCBLoadLegacy(handle, idx, /*channelOffset*/ 0, EltTy,
  5749. MatTy.getNumColumns(), hlslOP, Builder);
  5750. }
  5751. if (!resultType->isVectorTy()) {
  5752. ldData = Builder.CreateExtractElement(ldData, Builder.getInt32(0));
  5753. }
  5754. }
  5755. for (auto U = CI->user_begin(); U != CI->user_end();) {
  5756. Value *subsUser = *(U++);
  5757. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(subsUser)) {
  5758. Value *subData = GenerateVecEltFromGEP(ldData, GEP, Builder,
  5759. /*bInsertLdNextToGEP*/ true);
  5760. for (auto gepU = GEP->user_begin(); gepU != GEP->user_end();) {
  5761. Value *gepUser = *(gepU++);
  5762. // Must be load here;
  5763. LoadInst *ldUser = cast<LoadInst>(gepUser);
  5764. ldUser->replaceAllUsesWith(subData);
  5765. ldUser->eraseFromParent();
  5766. }
  5767. GEP->eraseFromParent();
  5768. } else {
  5769. // Must be load here.
  5770. LoadInst *ldUser = cast<LoadInst>(subsUser);
  5771. ldUser->replaceAllUsesWith(ldData);
  5772. ldUser->eraseFromParent();
  5773. }
  5774. }
  5775. CI->eraseFromParent();
  5776. } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(user)) {
  5777. if( II->getIntrinsicID() == Intrinsic::lifetime_start ||
  5778. II->getIntrinsicID() == Intrinsic::lifetime_end ) {
  5779. DXASSERT(II->use_empty(), "lifetime intrinsic can't have uses");
  5780. II->eraseFromParent();
  5781. } else {
  5782. DXASSERT(0, "not implemented yet");
  5783. }
  5784. } else {
  5785. DXASSERT(0, "not implemented yet");
  5786. }
  5787. } else if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) {
  5788. Type *Ty = ldInst->getType();
  5789. Type *EltTy = Ty->getScalarType();
  5790. // Resource inside cbuffer is lowered after GenerateDxilOperations.
  5791. if (dxilutil::IsHLSLObjectType(Ty)) {
  5792. CallInst *CI = cast<CallInst>(handle);
  5793. // CI should be annotate handle.
  5794. // Need createHandle here.
  5795. if (GetHLOpcodeGroup(CI->getCalledFunction()) == HLOpcodeGroup::HLAnnotateHandle)
  5796. CI = cast<CallInst>(CI->getArgOperand(HLOperandIndex::kAnnotateHandleHandleOpIdx));
  5797. GlobalVariable *CbGV = cast<GlobalVariable>(
  5798. CI->getArgOperand(HLOperandIndex::kCreateHandleResourceOpIdx));
  5799. TranslateResourceInCB(ldInst, pObjHelper, CbGV);
  5800. return;
  5801. }
  5802. DXASSERT(!Ty->isAggregateType(), "should be flat in previous pass");
  5803. Value *newLd = nullptr;
  5804. if (Ty->isVectorTy())
  5805. newLd = GenerateCBLoadLegacy(handle, legacyIdx, channelOffset, EltTy,
  5806. Ty->getVectorNumElements(), hlslOP, Builder);
  5807. else
  5808. newLd = GenerateCBLoadLegacy(handle, legacyIdx, channelOffset, EltTy,
  5809. hlslOP, Builder);
  5810. ldInst->replaceAllUsesWith(newLd);
  5811. dxilutil::TryScatterDebugValueToVectorElements(newLd);
  5812. ldInst->eraseFromParent();
  5813. } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(user)) {
  5814. for (auto it = BCI->user_begin(); it != BCI->user_end(); ) {
  5815. Instruction *I = cast<Instruction>(*it++);
  5816. TranslateCBAddressUserLegacy(I,
  5817. handle, legacyIdx, channelOffset, hlslOP,
  5818. prevFieldAnnotation, dxilTypeSys,
  5819. DL, pObjHelper);
  5820. }
  5821. BCI->eraseFromParent();
  5822. } else {
  5823. // Must be GEP here
  5824. GetElementPtrInst *GEP = cast<GetElementPtrInst>(user);
  5825. TranslateCBGepLegacy(GEP, handle, legacyIdx, channelOffset, hlslOP, Builder,
  5826. prevFieldAnnotation, DL, dxilTypeSys, pObjHelper);
  5827. GEP->eraseFromParent();
  5828. }
  5829. }
  5830. void TranslateCBGepLegacy(GetElementPtrInst *GEP, Value *handle,
  5831. Value *legacyIndex, unsigned channel,
  5832. hlsl::OP *hlslOP, IRBuilder<> &Builder,
  5833. DxilFieldAnnotation *prevFieldAnnotation,
  5834. const DataLayout &DL, DxilTypeSystem &dxilTypeSys,
  5835. HLObjectOperationLowerHelper *pObjHelper) {
  5836. SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end());
  5837. // update offset
  5838. DxilFieldAnnotation *fieldAnnotation = prevFieldAnnotation;
  5839. gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP);
  5840. for (; GEPIt != E; GEPIt++) {
  5841. Value *idx = GEPIt.getOperand();
  5842. unsigned immIdx = 0;
  5843. bool bImmIdx = false;
  5844. if (Constant *constIdx = dyn_cast<Constant>(idx)) {
  5845. immIdx = constIdx->getUniqueInteger().getLimitedValue();
  5846. bImmIdx = true;
  5847. }
  5848. if (GEPIt->isPointerTy()) {
  5849. Type *EltTy = GEPIt->getPointerElementType();
  5850. unsigned size = 0;
  5851. if (StructType *ST = dyn_cast<StructType>(EltTy)) {
  5852. DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST);
  5853. size = annotation->GetCBufferSize();
  5854. } else {
  5855. DXASSERT(fieldAnnotation, "must be a field");
  5856. if (ArrayType *AT = dyn_cast<ArrayType>(EltTy)) {
  5857. unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize(
  5858. *fieldAnnotation, EltTy, dxilTypeSys);
  5859. // Decide the nested array size.
  5860. unsigned nestedArraySize = 1;
  5861. Type *EltTy = AT->getArrayElementType();
  5862. // support multi level of array
  5863. while (EltTy->isArrayTy()) {
  5864. ArrayType *EltAT = cast<ArrayType>(EltTy);
  5865. nestedArraySize *= EltAT->getNumElements();
  5866. EltTy = EltAT->getElementType();
  5867. }
  5868. // Align to 4 * 4 bytes.
  5869. unsigned alignedSize = (EltSize + 15) & 0xfffffff0;
  5870. size = nestedArraySize * alignedSize;
  5871. } else {
  5872. size = DL.getTypeAllocSize(EltTy);
  5873. }
  5874. }
  5875. // Skip 0 idx.
  5876. if (bImmIdx && immIdx == 0)
  5877. continue;
  5878. // Align to 4 * 4 bytes.
  5879. size = (size + 15) & 0xfffffff0;
  5880. // Take this as array idxing.
  5881. if (bImmIdx) {
  5882. unsigned tempOffset = size * immIdx;
  5883. unsigned idxInc = tempOffset >> 4;
  5884. legacyIndex = Builder.CreateAdd(legacyIndex, hlslOP->GetU32Const(idxInc));
  5885. } else {
  5886. Value *idxInc = Builder.CreateMul(idx, hlslOP->GetU32Const(size>>4));
  5887. legacyIndex = Builder.CreateAdd(legacyIndex, idxInc);
  5888. }
  5889. // Array always start from x channel.
  5890. channel = 0;
  5891. } else if (GEPIt->isStructTy()) {
  5892. StructType *ST = cast<StructType>(*GEPIt);
  5893. DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST);
  5894. fieldAnnotation = &annotation->GetFieldAnnotation(immIdx);
  5895. unsigned idxInc = 0;
  5896. unsigned structOffset = 0;
  5897. if (fieldAnnotation->GetCompType().Is16Bit() &&
  5898. !hlslOP->UseMinPrecision()) {
  5899. structOffset = fieldAnnotation->GetCBufferOffset() >> 1;
  5900. channel += structOffset;
  5901. idxInc = channel >> 3;
  5902. channel = channel & 0x7;
  5903. }
  5904. else {
  5905. structOffset = fieldAnnotation->GetCBufferOffset() >> 2;
  5906. channel += structOffset;
  5907. idxInc = channel >> 2;
  5908. channel = channel & 0x3;
  5909. }
  5910. if (idxInc)
  5911. legacyIndex = Builder.CreateAdd(legacyIndex, hlslOP->GetU32Const(idxInc));
  5912. } else if (GEPIt->isArrayTy()) {
  5913. DXASSERT(fieldAnnotation != nullptr, "must a field");
  5914. unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize(
  5915. *fieldAnnotation, *GEPIt, dxilTypeSys);
  5916. // Decide the nested array size.
  5917. unsigned nestedArraySize = 1;
  5918. Type *EltTy = GEPIt->getArrayElementType();
  5919. // support multi level of array
  5920. while (EltTy->isArrayTy()) {
  5921. ArrayType *EltAT = cast<ArrayType>(EltTy);
  5922. nestedArraySize *= EltAT->getNumElements();
  5923. EltTy = EltAT->getElementType();
  5924. }
  5925. // Align to 4 * 4 bytes.
  5926. unsigned alignedSize = (EltSize + 15) & 0xfffffff0;
  5927. unsigned size = nestedArraySize * alignedSize;
  5928. if (bImmIdx) {
  5929. unsigned tempOffset = size * immIdx;
  5930. unsigned idxInc = tempOffset >> 4;
  5931. legacyIndex = Builder.CreateAdd(legacyIndex, hlslOP->GetU32Const(idxInc));
  5932. } else {
  5933. Value *idxInc = Builder.CreateMul(idx, hlslOP->GetU32Const(size>>4));
  5934. legacyIndex = Builder.CreateAdd(legacyIndex, idxInc);
  5935. }
  5936. // Array always start from x channel.
  5937. channel = 0;
  5938. } else if (GEPIt->isVectorTy()) {
  5939. unsigned size = DL.getTypeAllocSize(GEPIt->getVectorElementType());
  5940. // Indexing on vector.
  5941. if (bImmIdx) {
  5942. unsigned tempOffset = size * immIdx;
  5943. if (size == 2) { // 16-bit types
  5944. unsigned channelInc = tempOffset >> 1;
  5945. DXASSERT((channel + channelInc) <= 8, "vector should not cross cb register (8x16bit)");
  5946. channel += channelInc;
  5947. if (channel == 8) {
  5948. // Get to another row.
  5949. // Update index and channel.
  5950. channel = 0;
  5951. legacyIndex = Builder.CreateAdd(legacyIndex, Builder.getInt32(1));
  5952. }
  5953. }
  5954. else {
  5955. unsigned channelInc = tempOffset >> 2;
  5956. DXASSERT((channel + channelInc) <= 4, "vector should not cross cb register (8x32bit)");
  5957. channel += channelInc;
  5958. if (channel == 4) {
  5959. // Get to another row.
  5960. // Update index and channel.
  5961. channel = 0;
  5962. legacyIndex = Builder.CreateAdd(legacyIndex, Builder.getInt32(1));
  5963. }
  5964. }
  5965. } else {
  5966. Type *EltTy = GEPIt->getVectorElementType();
  5967. unsigned vecSize = GEPIt->getVectorNumElements();
  5968. // Load the whole register.
  5969. Value *newLd = GenerateCBLoadLegacy(handle, legacyIndex,
  5970. /*channelOffset*/ channel, EltTy,
  5971. /*vecSize*/ vecSize, hlslOP, Builder);
  5972. // Copy to array.
  5973. IRBuilder<> AllocaBuilder(GEP->getParent()->getParent()->getEntryBlock().getFirstInsertionPt());
  5974. Value *tempArray = AllocaBuilder.CreateAlloca(ArrayType::get(EltTy, vecSize));
  5975. Value *zeroIdx = hlslOP->GetU32Const(0);
  5976. for (unsigned i = 0; i < vecSize; i++) {
  5977. Value *Elt = Builder.CreateExtractElement(newLd, i);
  5978. Value *EltGEP = Builder.CreateInBoundsGEP(tempArray, {zeroIdx, hlslOP->GetU32Const(i)});
  5979. Builder.CreateStore(Elt, EltGEP);
  5980. }
  5981. // Make sure this is the end of GEP.
  5982. gep_type_iterator temp = GEPIt;
  5983. temp++;
  5984. DXASSERT(temp == E, "scalar type must be the last");
  5985. // Replace the GEP with array GEP.
  5986. Value *ArrayGEP = Builder.CreateInBoundsGEP(tempArray, {zeroIdx, idx});
  5987. GEP->replaceAllUsesWith(ArrayGEP);
  5988. return;
  5989. }
  5990. } else {
  5991. gep_type_iterator temp = GEPIt;
  5992. temp++;
  5993. DXASSERT(temp == E, "scalar type must be the last");
  5994. }
  5995. }
  5996. for (auto U = GEP->user_begin(); U != GEP->user_end();) {
  5997. Instruction *user = cast<Instruction>(*(U++));
  5998. TranslateCBAddressUserLegacy(user, handle, legacyIndex, channel, hlslOP, fieldAnnotation,
  5999. dxilTypeSys, DL, pObjHelper);
  6000. }
  6001. }
  6002. void TranslateCBOperationsLegacy(Value *handle, Value *ptr, OP *hlslOP,
  6003. DxilTypeSystem &dxilTypeSys,
  6004. const DataLayout &DL,
  6005. HLObjectOperationLowerHelper *pObjHelper) {
  6006. auto User = ptr->user_begin();
  6007. auto UserE = ptr->user_end();
  6008. Value *zeroIdx = hlslOP->GetU32Const(0);
  6009. for (; User != UserE;) {
  6010. // Must be Instruction.
  6011. Instruction *I = cast<Instruction>(*(User++));
  6012. TranslateCBAddressUserLegacy(
  6013. I, handle, zeroIdx, /*channelOffset*/ 0, hlslOP,
  6014. /*prevFieldAnnotation*/ nullptr, dxilTypeSys, DL, pObjHelper);
  6015. }
  6016. }
  6017. }
  6018. // Structured buffer.
  6019. namespace {
  6020. // Calculate offset.
  6021. Value *GEPIdxToOffset(GetElementPtrInst *GEP, IRBuilder<> &Builder,
  6022. hlsl::OP *OP, const DataLayout &DL) {
  6023. SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end());
  6024. Value *addr = nullptr;
  6025. // update offset
  6026. if (GEP->hasAllConstantIndices()) {
  6027. unsigned gepOffset =
  6028. DL.getIndexedOffset(GEP->getPointerOperandType(), Indices);
  6029. addr = OP->GetU32Const(gepOffset);
  6030. } else {
  6031. Value *offset = OP->GetU32Const(0);
  6032. gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP);
  6033. for (; GEPIt != E; GEPIt++) {
  6034. Value *idx = GEPIt.getOperand();
  6035. unsigned immIdx = 0;
  6036. if (llvm::Constant *constIdx = dyn_cast<llvm::Constant>(idx)) {
  6037. immIdx = constIdx->getUniqueInteger().getLimitedValue();
  6038. if (immIdx == 0) {
  6039. continue;
  6040. }
  6041. }
  6042. if (GEPIt->isPointerTy() || GEPIt->isArrayTy() || GEPIt->isVectorTy()) {
  6043. unsigned size = DL.getTypeAllocSize(GEPIt->getSequentialElementType());
  6044. if (immIdx) {
  6045. unsigned tempOffset = size * immIdx;
  6046. offset = Builder.CreateAdd(offset, OP->GetU32Const(tempOffset));
  6047. } else {
  6048. Value *tempOffset = Builder.CreateMul(idx, OP->GetU32Const(size));
  6049. offset = Builder.CreateAdd(offset, tempOffset);
  6050. }
  6051. } else if (GEPIt->isStructTy()) {
  6052. const StructLayout *Layout = DL.getStructLayout(cast<StructType>(*GEPIt));
  6053. unsigned structOffset = Layout->getElementOffset(immIdx);
  6054. offset = Builder.CreateAdd(offset, OP->GetU32Const(structOffset));
  6055. } else {
  6056. gep_type_iterator temp = GEPIt;
  6057. temp++;
  6058. DXASSERT(temp == E, "scalar type must be the last");
  6059. }
  6060. };
  6061. addr = offset;
  6062. }
  6063. // TODO: x4 for byte address
  6064. return addr;
  6065. }
  6066. // Load a value from a typedef buffer with an offset.
  6067. // Typed buffer do not directly support reading at offsets
  6068. // because the whole value (e.g. float4) must be read at once.
  6069. // If we are provided a non-zero offset, we need to simulate it
  6070. // by returning the correct elements.
  6071. using ResRetValueArray = std::array<Value*, 4>;
  6072. static ResRetValueArray GenerateTypedBufferLoad(
  6073. Value *Handle, Type *BufferElemTy, Value *ElemIdx, Value *StatusPtr,
  6074. OP* HlslOP, IRBuilder<> &Builder) {
  6075. OP::OpCode OpCode = OP::OpCode::BufferLoad;
  6076. Value* LoadArgs[] = { HlslOP->GetU32Const((unsigned)OpCode), Handle, ElemIdx, UndefValue::get(Builder.getInt32Ty()) };
  6077. Function* LoadFunc = HlslOP->GetOpFunc(OpCode, BufferElemTy);
  6078. Value* Load = Builder.CreateCall(LoadFunc, LoadArgs, OP::GetOpCodeName(OpCode));
  6079. ResRetValueArray ResultValues;
  6080. for (unsigned i = 0; i < ResultValues.size(); ++i) {
  6081. ResultValues[i] = cast<ExtractValueInst>(Builder.CreateExtractValue(Load, { i }));
  6082. }
  6083. UpdateStatus(Load, StatusPtr, Builder, HlslOP);
  6084. return ResultValues;
  6085. }
  6086. static AllocaInst* SpillValuesToArrayAlloca(ArrayRef<Value*> Values, IRBuilder<>& Builder) {
  6087. DXASSERT_NOMSG(!Values.empty());
  6088. IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Builder.GetInsertPoint()));
  6089. AllocaInst* ArrayAlloca = AllocaBuilder.CreateAlloca(ArrayType::get(Values[0]->getType(), Values.size()));
  6090. for (unsigned i = 0; i < Values.size(); ++i) {
  6091. Value* ArrayElemPtr = Builder.CreateGEP(ArrayAlloca, { Builder.getInt32(0), Builder.getInt32(i) });
  6092. Builder.CreateStore(Values[i], ArrayElemPtr);
  6093. }
  6094. return ArrayAlloca;
  6095. }
  6096. static Value* ExtractFromTypedBufferLoad(const ResRetValueArray& ResRet,
  6097. Type* ResultTy, Value* Offset, IRBuilder<>& Builder) {
  6098. unsigned ElemCount = ResultTy->isVectorTy() ? ResultTy->getVectorNumElements() : 1;
  6099. DXASSERT_NOMSG(ElemCount < ResRet.size());
  6100. unsigned ElemSizeInBytes = ResRet[0]->getType()->getScalarSizeInBits() / 8;
  6101. SmallVector<Value*, 4> Elems;
  6102. if (ConstantInt *OffsetAsConstantInt = dyn_cast<ConstantInt>(Offset)) {
  6103. // Get all elements to be returned
  6104. uint64_t FirstElemOffset = OffsetAsConstantInt->getLimitedValue();
  6105. DXASSERT_NOMSG(FirstElemOffset % ElemSizeInBytes == 0);
  6106. uint64_t FirstElemIdx = FirstElemOffset / ElemSizeInBytes;
  6107. DXASSERT_NOMSG(FirstElemIdx <= ResRet.size() - ElemCount);
  6108. for (unsigned ElemIdx = 0; ElemIdx < ElemCount; ++ElemIdx) {
  6109. Elems.emplace_back(ResRet[std::min<size_t>(FirstElemIdx + ElemIdx, ResRet.size() - 1)]);
  6110. }
  6111. }
  6112. else {
  6113. Value* ArrayAlloca = SpillValuesToArrayAlloca(
  6114. ArrayRef<Value*>(ResRet.data(), ResRet.size()), Builder);
  6115. // Get all elements to be returned through dynamic indices
  6116. Value *FirstElemIdx = Builder.CreateUDiv(Offset, Builder.getInt32(ElemSizeInBytes));
  6117. for (unsigned i = 0; i < ElemCount; ++i) {
  6118. Value *ElemIdx = Builder.CreateAdd(FirstElemIdx, Builder.getInt32(i));
  6119. Value* ElemPtr = Builder.CreateGEP(ArrayAlloca, { Builder.getInt32(0), ElemIdx });
  6120. Elems.emplace_back(Builder.CreateLoad(ElemPtr));
  6121. }
  6122. }
  6123. return ScalarizeElements(ResultTy, Elems, Builder);
  6124. }
  6125. Value *GenerateRawBufLd(Value *handle, Value *bufIdx, Value *offset,
  6126. Value *status, Type *EltTy,
  6127. MutableArrayRef<Value *> resultElts, hlsl::OP *OP,
  6128. IRBuilder<> &Builder, unsigned NumComponents, Constant *alignment) {
  6129. OP::OpCode opcode = OP::OpCode::RawBufferLoad;
  6130. DXASSERT(resultElts.size() <= 4,
  6131. "buffer load cannot load more than 4 values");
  6132. if (bufIdx == nullptr) {
  6133. // This is actually a byte address buffer load with a struct template type.
  6134. // The call takes only one coordinates for the offset.
  6135. bufIdx = offset;
  6136. offset = UndefValue::get(offset->getType());
  6137. }
  6138. Function *dxilF = OP->GetOpFunc(opcode, EltTy);
  6139. Constant *mask = GetRawBufferMaskForETy(EltTy, NumComponents, OP);
  6140. Value *Args[] = {OP->GetU32Const((unsigned)opcode),
  6141. handle,
  6142. bufIdx,
  6143. offset,
  6144. mask,
  6145. alignment};
  6146. Value *Ld = Builder.CreateCall(dxilF, Args, OP::GetOpCodeName(opcode));
  6147. for (unsigned i = 0; i < resultElts.size(); i++) {
  6148. resultElts[i] = Builder.CreateExtractValue(Ld, i);
  6149. }
  6150. // status
  6151. UpdateStatus(Ld, status, Builder, OP);
  6152. return Ld;
  6153. }
  6154. void GenerateStructBufSt(Value *handle, Value *bufIdx, Value *offset,
  6155. Type *EltTy, hlsl::OP *OP, IRBuilder<> &Builder,
  6156. ArrayRef<Value *> vals, uint8_t mask, Constant *alignment) {
  6157. OP::OpCode opcode = OP::OpCode::RawBufferStore;
  6158. DXASSERT(vals.size() == 4, "buffer store need 4 values");
  6159. Value *Args[] = {OP->GetU32Const((unsigned)opcode),
  6160. handle,
  6161. bufIdx,
  6162. offset,
  6163. vals[0],
  6164. vals[1],
  6165. vals[2],
  6166. vals[3],
  6167. OP->GetU8Const(mask),
  6168. alignment};
  6169. Function *dxilF = OP->GetOpFunc(opcode, EltTy);
  6170. Builder.CreateCall(dxilF, Args);
  6171. }
  6172. static Value* TranslateRawBufVecLd(Type* VecEltTy, unsigned ElemCount,
  6173. IRBuilder<>& Builder, Value* handle, hlsl::OP* OP, Value* status,
  6174. Value* bufIdx, Value* baseOffset, const DataLayout& DL,
  6175. std::vector<Value*> &bufLds, unsigned baseAlign, bool isScalarTy) {
  6176. unsigned EltSize = DL.getTypeAllocSize(VecEltTy);
  6177. unsigned alignment = std::min(baseAlign, EltSize);
  6178. Constant* alignmentVal = OP->GetI32Const(alignment);
  6179. if (baseOffset == nullptr) {
  6180. baseOffset = OP->GetU32Const(0);
  6181. }
  6182. std::vector<Value*> elts(ElemCount);
  6183. unsigned rest = (ElemCount % 4);
  6184. for (unsigned i = 0; i < ElemCount - rest; i += 4) {
  6185. Value* ResultElts[4];
  6186. Value* bufLd = GenerateRawBufLd(handle, bufIdx, baseOffset, status, VecEltTy, ResultElts, OP, Builder, 4, alignmentVal);
  6187. bufLds.emplace_back(bufLd);
  6188. elts[i] = ResultElts[0];
  6189. elts[i + 1] = ResultElts[1];
  6190. elts[i + 2] = ResultElts[2];
  6191. elts[i + 3] = ResultElts[3];
  6192. baseOffset = Builder.CreateAdd(baseOffset, OP->GetU32Const(4 * EltSize));
  6193. }
  6194. if (rest) {
  6195. Value* ResultElts[4];
  6196. Value* bufLd = GenerateRawBufLd(handle, bufIdx, baseOffset, status, VecEltTy, ResultElts, OP, Builder, rest, alignmentVal);
  6197. bufLds.emplace_back(bufLd);
  6198. for (unsigned i = 0; i < rest; i++)
  6199. elts[ElemCount - rest + i] = ResultElts[i];
  6200. }
  6201. // If the expected return type is scalar then skip building a vector
  6202. if (isScalarTy) {
  6203. return elts[0];
  6204. }
  6205. Value* Vec = HLMatrixLower::BuildVector(VecEltTy, elts, Builder);
  6206. return Vec;
  6207. }
  6208. Value *TranslateStructBufMatLd(Type *matType, IRBuilder<> &Builder,
  6209. Value *handle, hlsl::OP *OP, Value *status,
  6210. Value *bufIdx, Value *baseOffset,
  6211. const DataLayout &DL) {
  6212. HLMatrixType MatTy = HLMatrixType::cast(matType);
  6213. Type *EltTy = MatTy.getElementTypeForMem();
  6214. unsigned matSize = MatTy.getNumElements();
  6215. std::vector<Value*> bufLds;
  6216. Value* Vec = TranslateRawBufVecLd(EltTy, matSize, Builder, handle, OP, status, bufIdx,
  6217. baseOffset, DL, bufLds, /*baseAlign (in bytes)*/ 8);
  6218. Vec = MatTy.emitLoweredMemToReg(Vec, Builder);
  6219. return Vec;
  6220. }
  6221. void TranslateStructBufMatSt(Type *matType, IRBuilder<> &Builder, Value *handle,
  6222. hlsl::OP *OP, Value *bufIdx, Value *baseOffset,
  6223. Value *val, const DataLayout &DL) {
  6224. HLMatrixType MatTy = HLMatrixType::cast(matType);
  6225. Type *EltTy = MatTy.getElementTypeForMem();
  6226. val = MatTy.emitLoweredRegToMem(val, Builder);
  6227. unsigned EltSize = DL.getTypeAllocSize(EltTy);
  6228. Constant *Alignment = OP->GetI32Const(EltSize);
  6229. Value *offset = baseOffset;
  6230. if (baseOffset == nullptr)
  6231. offset = OP->GetU32Const(0);
  6232. unsigned matSize = MatTy.getNumElements();
  6233. Value *undefElt = UndefValue::get(EltTy);
  6234. unsigned storeSize = matSize;
  6235. if (matSize % 4) {
  6236. storeSize = matSize + 4 - (matSize & 3);
  6237. }
  6238. std::vector<Value *> elts(storeSize, undefElt);
  6239. for (unsigned i = 0; i < matSize; i++)
  6240. elts[i] = Builder.CreateExtractElement(val, i);
  6241. for (unsigned i = 0; i < matSize; i += 4) {
  6242. uint8_t mask = 0;
  6243. for (unsigned j = 0; j < 4 && (i+j) < matSize; j++) {
  6244. if (elts[i+j] != undefElt)
  6245. mask |= (1<<j);
  6246. }
  6247. GenerateStructBufSt(handle, bufIdx, offset, EltTy, OP, Builder,
  6248. {elts[i], elts[i + 1], elts[i + 2], elts[i + 3]}, mask,
  6249. Alignment);
  6250. // Update offset by 4*4bytes.
  6251. offset = Builder.CreateAdd(offset, OP->GetU32Const(4 * EltSize));
  6252. }
  6253. }
  6254. void TranslateStructBufMatLdSt(CallInst *CI, Value *handle, hlsl::OP *OP,
  6255. Value *status, Value *bufIdx,
  6256. Value *baseOffset, const DataLayout &DL) {
  6257. IRBuilder<> Builder(CI);
  6258. HLOpcodeGroup group = hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction());
  6259. unsigned opcode = GetHLOpcode(CI);
  6260. DXASSERT_LOCALVAR(group, group == HLOpcodeGroup::HLMatLoadStore,
  6261. "only translate matrix loadStore here.");
  6262. HLMatLoadStoreOpcode matOp = static_cast<HLMatLoadStoreOpcode>(opcode);
  6263. // Due to the current way the initial codegen generates matrix
  6264. // orientation casts, the in-register vector matrix has already been
  6265. // reordered based on the destination's row or column-major packing orientation.
  6266. switch (matOp) {
  6267. case HLMatLoadStoreOpcode::RowMatLoad:
  6268. case HLMatLoadStoreOpcode::ColMatLoad: {
  6269. Value *ptr = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx);
  6270. Value *NewLd = TranslateStructBufMatLd(
  6271. ptr->getType()->getPointerElementType(), Builder, handle, OP, status,
  6272. bufIdx, baseOffset, DL);
  6273. CI->replaceAllUsesWith(NewLd);
  6274. } break;
  6275. case HLMatLoadStoreOpcode::RowMatStore:
  6276. case HLMatLoadStoreOpcode::ColMatStore: {
  6277. Value *ptr = CI->getArgOperand(HLOperandIndex::kMatStoreDstPtrOpIdx);
  6278. Value *val = CI->getArgOperand(HLOperandIndex::kMatStoreValOpIdx);
  6279. TranslateStructBufMatSt(ptr->getType()->getPointerElementType(), Builder,
  6280. handle, OP, bufIdx, baseOffset, val,
  6281. DL);
  6282. } break;
  6283. }
  6284. CI->eraseFromParent();
  6285. }
  6286. void TranslateStructBufSubscriptUser(Instruction *user,
  6287. Value *handle, HLResource::Kind ResKind,
  6288. Value *bufIdx, Value *baseOffset, Value *status,
  6289. hlsl::OP *OP, const DataLayout &DL);
  6290. // For case like mat[i][j].
  6291. // IdxList is [i][0], [i][1], [i][2],[i][3].
  6292. // Idx is j.
  6293. // return [i][j] not mat[i][j] because resource ptr and temp ptr need different
  6294. // code gen.
  6295. static Value *LowerGEPOnMatIndexListToIndex(
  6296. llvm::GetElementPtrInst *GEP, ArrayRef<Value *> IdxList) {
  6297. IRBuilder<> Builder(GEP);
  6298. Value *zero = Builder.getInt32(0);
  6299. DXASSERT(GEP->getNumIndices() == 2, "must have 2 level");
  6300. Value *baseIdx = (GEP->idx_begin())->get();
  6301. DXASSERT_LOCALVAR(baseIdx, baseIdx == zero, "base index must be 0");
  6302. Value *Idx = (GEP->idx_begin() + 1)->get();
  6303. if (ConstantInt *immIdx = dyn_cast<ConstantInt>(Idx)) {
  6304. return IdxList[immIdx->getSExtValue()];
  6305. }
  6306. else {
  6307. IRBuilder<> AllocaBuilder(
  6308. GEP->getParent()->getParent()->getEntryBlock().getFirstInsertionPt());
  6309. unsigned size = IdxList.size();
  6310. // Store idxList to temp array.
  6311. ArrayType *AT = ArrayType::get(IdxList[0]->getType(), size);
  6312. Value *tempArray = AllocaBuilder.CreateAlloca(AT);
  6313. for (unsigned i = 0; i < size; i++) {
  6314. Value *EltPtr = Builder.CreateGEP(tempArray, { zero, Builder.getInt32(i) });
  6315. Builder.CreateStore(IdxList[i], EltPtr);
  6316. }
  6317. // Load the idx.
  6318. Value *GEPOffset = Builder.CreateGEP(tempArray, { zero, Idx });
  6319. return Builder.CreateLoad(GEPOffset);
  6320. }
  6321. }
  6322. // subscript operator for matrix of struct element.
  6323. void TranslateStructBufMatSubscript(CallInst *CI,
  6324. Value *handle, HLResource::Kind ResKind,
  6325. Value *bufIdx, Value *baseOffset, Value *status,
  6326. hlsl::OP* hlslOP, const DataLayout &DL) {
  6327. unsigned opcode = GetHLOpcode(CI);
  6328. IRBuilder<> subBuilder(CI);
  6329. HLSubscriptOpcode subOp = static_cast<HLSubscriptOpcode>(opcode);
  6330. Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx);
  6331. HLMatrixType MatTy = HLMatrixType::cast(basePtr->getType()->getPointerElementType());
  6332. Type *EltTy = MatTy.getElementTypeForReg();
  6333. Constant *alignment = hlslOP->GetI32Const(DL.getTypeAllocSize(EltTy));
  6334. Value *EltByteSize = ConstantInt::get(
  6335. baseOffset->getType(), GetEltTypeByteSizeForConstBuf(EltTy, DL));
  6336. Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx);
  6337. Type *resultType = CI->getType()->getPointerElementType();
  6338. unsigned resultSize = 1;
  6339. if (resultType->isVectorTy())
  6340. resultSize = resultType->getVectorNumElements();
  6341. DXASSERT(resultSize <= 16, "up to 4x4 elements in vector or matrix");
  6342. _Analysis_assume_(resultSize <= 16);
  6343. std::vector<Value *> idxList(resultSize);
  6344. switch (subOp) {
  6345. case HLSubscriptOpcode::ColMatSubscript:
  6346. case HLSubscriptOpcode::RowMatSubscript: {
  6347. for (unsigned i = 0; i < resultSize; i++) {
  6348. Value *offset =
  6349. CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx + i);
  6350. offset = subBuilder.CreateMul(offset, EltByteSize);
  6351. idxList[i] = subBuilder.CreateAdd(baseOffset, offset);
  6352. }
  6353. } break;
  6354. case HLSubscriptOpcode::RowMatElement:
  6355. case HLSubscriptOpcode::ColMatElement: {
  6356. Constant *EltIdxs = cast<Constant>(idx);
  6357. for (unsigned i = 0; i < resultSize; i++) {
  6358. Value *offset =
  6359. subBuilder.CreateMul(EltIdxs->getAggregateElement(i), EltByteSize);
  6360. idxList[i] = subBuilder.CreateAdd(baseOffset, offset);
  6361. }
  6362. } break;
  6363. default:
  6364. DXASSERT(0, "invalid operation on const buffer");
  6365. break;
  6366. }
  6367. Value *undefElt = UndefValue::get(EltTy);
  6368. for (auto U = CI->user_begin(); U != CI->user_end();) {
  6369. Value *subsUser = *(U++);
  6370. if (resultSize == 1) {
  6371. TranslateStructBufSubscriptUser(cast<Instruction>(subsUser),
  6372. handle, ResKind, bufIdx, idxList[0], status, hlslOP, DL);
  6373. continue;
  6374. }
  6375. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(subsUser)) {
  6376. Value *GEPOffset = LowerGEPOnMatIndexListToIndex(GEP, idxList);
  6377. for (auto gepU = GEP->user_begin(); gepU != GEP->user_end();) {
  6378. Instruction *gepUserInst = cast<Instruction>(*(gepU++));
  6379. TranslateStructBufSubscriptUser(gepUserInst,
  6380. handle, ResKind, bufIdx, GEPOffset, status, hlslOP, DL);
  6381. }
  6382. GEP->eraseFromParent();
  6383. } else if (StoreInst *stUser = dyn_cast<StoreInst>(subsUser)) {
  6384. IRBuilder<> stBuilder(stUser);
  6385. Value *Val = stUser->getValueOperand();
  6386. if (Val->getType()->isVectorTy()) {
  6387. for (unsigned i = 0; i < resultSize; i++) {
  6388. Value *EltVal = stBuilder.CreateExtractElement(Val, i);
  6389. uint8_t mask = DXIL::kCompMask_X;
  6390. GenerateStructBufSt(handle, bufIdx, idxList[i], EltTy, hlslOP,
  6391. stBuilder, {EltVal, undefElt, undefElt, undefElt},
  6392. mask, alignment);
  6393. }
  6394. } else {
  6395. uint8_t mask = DXIL::kCompMask_X;
  6396. GenerateStructBufSt(handle, bufIdx, idxList[0], EltTy, hlslOP,
  6397. stBuilder, {Val, undefElt, undefElt, undefElt},
  6398. mask, alignment);
  6399. }
  6400. stUser->eraseFromParent();
  6401. } else {
  6402. // Must be load here.
  6403. LoadInst *ldUser = cast<LoadInst>(subsUser);
  6404. IRBuilder<> ldBuilder(ldUser);
  6405. Value *ldData = UndefValue::get(resultType);
  6406. if (resultType->isVectorTy()) {
  6407. for (unsigned i = 0; i < resultSize; i++) {
  6408. Value *ResultElt;
  6409. // TODO: This can be inefficient for row major matrix load
  6410. GenerateRawBufLd(handle, bufIdx, idxList[i],
  6411. /*status*/ nullptr, EltTy, ResultElt, hlslOP,
  6412. ldBuilder, 1, alignment);
  6413. ldData = ldBuilder.CreateInsertElement(ldData, ResultElt, i);
  6414. }
  6415. } else {
  6416. GenerateRawBufLd(handle, bufIdx, idxList[0], /*status*/ nullptr,
  6417. EltTy, ldData, hlslOP, ldBuilder, 4, alignment);
  6418. }
  6419. ldUser->replaceAllUsesWith(ldData);
  6420. ldUser->eraseFromParent();
  6421. }
  6422. }
  6423. CI->eraseFromParent();
  6424. }
  6425. void TranslateStructBufSubscriptUser(
  6426. Instruction *user, Value *handle, HLResource::Kind ResKind,
  6427. Value *bufIdx, Value *baseOffset, Value *status,
  6428. hlsl::OP *OP, const DataLayout &DL) {
  6429. IRBuilder<> Builder(user);
  6430. if (CallInst *userCall = dyn_cast<CallInst>(user)) {
  6431. HLOpcodeGroup group = // user call?
  6432. hlsl::GetHLOpcodeGroupByName(userCall->getCalledFunction());
  6433. unsigned opcode = GetHLOpcode(userCall);
  6434. // For case element type of structure buffer is not structure type.
  6435. if (baseOffset == nullptr)
  6436. baseOffset = OP->GetU32Const(0);
  6437. if (group == HLOpcodeGroup::HLIntrinsic) {
  6438. IntrinsicOp IOP = static_cast<IntrinsicOp>(opcode);
  6439. switch (IOP) {
  6440. case IntrinsicOp::MOP_Load: {
  6441. if (userCall->getType()->isPointerTy()) {
  6442. // Struct will return pointers which like []
  6443. } else {
  6444. // Use builtin types on structuredBuffer.
  6445. }
  6446. DXASSERT(0, "not implement yet");
  6447. } break;
  6448. case IntrinsicOp::IOP_InterlockedAdd: {
  6449. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  6450. baseOffset);
  6451. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Add,
  6452. Builder, OP);
  6453. } break;
  6454. case IntrinsicOp::IOP_InterlockedAnd: {
  6455. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  6456. baseOffset);
  6457. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::And,
  6458. Builder, OP);
  6459. } break;
  6460. case IntrinsicOp::IOP_InterlockedExchange: {
  6461. Type *opType = nullptr;
  6462. PointerType *ptrType = dyn_cast<PointerType>(
  6463. userCall->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex)->getType());
  6464. if (ptrType && ptrType->getElementType()->isFloatTy())
  6465. opType = Type::getInt32Ty(userCall->getContext());
  6466. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  6467. baseOffset, opType);
  6468. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Exchange,
  6469. Builder, OP);
  6470. } break;
  6471. case IntrinsicOp::IOP_InterlockedMax: {
  6472. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  6473. baseOffset);
  6474. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMax,
  6475. Builder, OP);
  6476. } break;
  6477. case IntrinsicOp::IOP_InterlockedMin: {
  6478. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  6479. baseOffset);
  6480. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMin,
  6481. Builder, OP);
  6482. } break;
  6483. case IntrinsicOp::IOP_InterlockedUMax: {
  6484. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  6485. baseOffset);
  6486. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMax,
  6487. Builder, OP);
  6488. } break;
  6489. case IntrinsicOp::IOP_InterlockedUMin: {
  6490. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  6491. baseOffset);
  6492. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMin,
  6493. Builder, OP);
  6494. } break;
  6495. case IntrinsicOp::IOP_InterlockedOr: {
  6496. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  6497. baseOffset);
  6498. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Or,
  6499. Builder, OP);
  6500. } break;
  6501. case IntrinsicOp::IOP_InterlockedXor: {
  6502. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  6503. baseOffset);
  6504. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Xor,
  6505. Builder, OP);
  6506. } break;
  6507. case IntrinsicOp::IOP_InterlockedCompareStore:
  6508. case IntrinsicOp::IOP_InterlockedCompareExchange: {
  6509. AtomicHelper helper(userCall, DXIL::OpCode::AtomicCompareExchange,
  6510. handle, bufIdx, baseOffset);
  6511. TranslateAtomicCmpXChg(helper, Builder, OP);
  6512. } break;
  6513. case IntrinsicOp::IOP_InterlockedCompareStoreFloatBitwise:
  6514. case IntrinsicOp::IOP_InterlockedCompareExchangeFloatBitwise: {
  6515. Type *i32Ty = Type::getInt32Ty(userCall->getContext());
  6516. AtomicHelper helper(userCall, DXIL::OpCode::AtomicCompareExchange,
  6517. handle, bufIdx, baseOffset, i32Ty);
  6518. TranslateAtomicCmpXChg(helper, Builder, OP);
  6519. } break;
  6520. default:
  6521. DXASSERT(0, "invalid opcode");
  6522. break;
  6523. }
  6524. userCall->eraseFromParent();
  6525. } else if (group == HLOpcodeGroup::HLMatLoadStore)
  6526. TranslateStructBufMatLdSt(userCall, handle, OP, status, bufIdx,
  6527. baseOffset, DL);
  6528. else if (group == HLOpcodeGroup::HLSubscript) {
  6529. TranslateStructBufMatSubscript(userCall,
  6530. handle, ResKind, bufIdx, baseOffset, status, OP, DL);
  6531. }
  6532. } else if (isa<LoadInst>(user) || isa<StoreInst>(user)) {
  6533. LoadInst *ldInst = dyn_cast<LoadInst>(user);
  6534. StoreInst *stInst = dyn_cast<StoreInst>(user);
  6535. Type *Ty = isa<LoadInst>(user) ? ldInst->getType()
  6536. : stInst->getValueOperand()->getType();
  6537. Type *pOverloadTy = Ty->getScalarType();
  6538. Value *offset = baseOffset;
  6539. unsigned arraySize = 1;
  6540. Value *eltSize = nullptr;
  6541. if (pOverloadTy->isArrayTy()) {
  6542. arraySize = pOverloadTy->getArrayNumElements();
  6543. eltSize = OP->GetU32Const(
  6544. DL.getTypeAllocSize(pOverloadTy->getArrayElementType()));
  6545. pOverloadTy = pOverloadTy->getArrayElementType()->getScalarType();
  6546. }
  6547. if (ldInst) {
  6548. auto LdElement = [=](Value *offset, IRBuilder<> &Builder) -> Value * {
  6549. unsigned numComponents = 0;
  6550. if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
  6551. numComponents = VTy->getNumElements();
  6552. }
  6553. else {
  6554. numComponents = 1;
  6555. }
  6556. Constant *alignment =
  6557. OP->GetI32Const(DL.getTypeAllocSize(Ty->getScalarType()));
  6558. if (ResKind == HLResource::Kind::TypedBuffer) {
  6559. // Typed buffer cannot have offsets, they must be loaded all at once
  6560. ResRetValueArray ResRet = GenerateTypedBufferLoad(
  6561. handle, pOverloadTy, bufIdx, status, OP, Builder);
  6562. return ExtractFromTypedBufferLoad(ResRet, Ty, offset, Builder);
  6563. }
  6564. else {
  6565. Value* ResultElts[4];
  6566. GenerateRawBufLd(handle, bufIdx, offset, status, pOverloadTy,
  6567. ResultElts, OP, Builder, numComponents, alignment);
  6568. return ScalarizeElements(Ty, ResultElts, Builder);
  6569. }
  6570. };
  6571. Value *newLd = LdElement(offset, Builder);
  6572. if (arraySize > 1) {
  6573. newLd =
  6574. Builder.CreateInsertValue(UndefValue::get(Ty), newLd, (uint64_t)0);
  6575. for (unsigned i = 1; i < arraySize; i++) {
  6576. offset = Builder.CreateAdd(offset, eltSize);
  6577. Value *eltLd = LdElement(offset, Builder);
  6578. newLd = Builder.CreateInsertValue(newLd, eltLd, i);
  6579. }
  6580. }
  6581. ldInst->replaceAllUsesWith(newLd);
  6582. } else {
  6583. Value *val = stInst->getValueOperand();
  6584. auto StElement = [&](Value *offset, Value *val, IRBuilder<> &Builder) {
  6585. Value *undefVal = llvm::UndefValue::get(pOverloadTy);
  6586. Value *vals[] = {undefVal, undefVal, undefVal, undefVal};
  6587. uint8_t mask = 0;
  6588. if (Ty->isVectorTy()) {
  6589. unsigned vectorNumElements = Ty->getVectorNumElements();
  6590. DXASSERT(vectorNumElements <= 4, "up to 4 elements in vector");
  6591. _Analysis_assume_(vectorNumElements <= 4);
  6592. for (unsigned i = 0; i < vectorNumElements; i++) {
  6593. vals[i] = Builder.CreateExtractElement(val, i);
  6594. mask |= (1<<i);
  6595. }
  6596. } else {
  6597. vals[0] = val;
  6598. mask = DXIL::kCompMask_X;
  6599. }
  6600. Constant *alignment =
  6601. OP->GetI32Const(DL.getTypeAllocSize(Ty->getScalarType()));
  6602. GenerateStructBufSt(handle, bufIdx, offset, pOverloadTy, OP, Builder,
  6603. vals, mask, alignment);
  6604. };
  6605. if (arraySize > 1)
  6606. val = Builder.CreateExtractValue(val, 0);
  6607. StElement(offset, val, Builder);
  6608. if (arraySize > 1) {
  6609. val = stInst->getValueOperand();
  6610. for (unsigned i = 1; i < arraySize; i++) {
  6611. offset = Builder.CreateAdd(offset, eltSize);
  6612. Value *eltVal = Builder.CreateExtractValue(val, i);
  6613. StElement(offset, eltVal, Builder);
  6614. }
  6615. }
  6616. }
  6617. user->eraseFromParent();
  6618. } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(user)) {
  6619. // Recurse users
  6620. for (auto U = BCI->user_begin(); U != BCI->user_end();) {
  6621. Value *BCIUser = *(U++);
  6622. TranslateStructBufSubscriptUser(cast<Instruction>(BCIUser),
  6623. handle, ResKind, bufIdx, baseOffset, status, OP, DL);
  6624. }
  6625. BCI->eraseFromParent();
  6626. } else {
  6627. // should only used by GEP
  6628. GetElementPtrInst *GEP = cast<GetElementPtrInst>(user);
  6629. Type *Ty = GEP->getType()->getPointerElementType();
  6630. Value *offset = GEPIdxToOffset(GEP, Builder, OP, DL);
  6631. DXASSERT_LOCALVAR(Ty, offset->getType() == Type::getInt32Ty(Ty->getContext()),
  6632. "else bitness is wrong");
  6633. offset = Builder.CreateAdd(offset, baseOffset);
  6634. for (auto U = GEP->user_begin(); U != GEP->user_end();) {
  6635. Value *GEPUser = *(U++);
  6636. TranslateStructBufSubscriptUser(cast<Instruction>(GEPUser),
  6637. handle, ResKind, bufIdx, offset, status, OP, DL);
  6638. }
  6639. // delete the inst
  6640. GEP->eraseFromParent();
  6641. }
  6642. }
  6643. void TranslateStructBufSubscript(CallInst *CI, Value *handle, Value *status,
  6644. hlsl::OP *OP, HLResource::Kind ResKind, const DataLayout &DL) {
  6645. Value *subscriptIndex = CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx);
  6646. Value* bufIdx = nullptr;
  6647. Value *offset = nullptr;
  6648. if (ResKind == HLResource::Kind::RawBuffer) {
  6649. offset = subscriptIndex;
  6650. }
  6651. else {
  6652. // StructuredBuffer, TypedBuffer, etc.
  6653. bufIdx = subscriptIndex;
  6654. offset = OP->GetU32Const(0);
  6655. }
  6656. for (auto U = CI->user_begin(); U != CI->user_end();) {
  6657. Value *user = *(U++);
  6658. TranslateStructBufSubscriptUser(cast<Instruction>(user),
  6659. handle, ResKind, bufIdx, offset, status, OP, DL);
  6660. }
  6661. }
  6662. }
  6663. // HLSubscript.
  6664. namespace {
  6665. Value *TranslateTypedBufLoad(CallInst *CI, DXIL::ResourceKind RK,
  6666. DXIL::ResourceClass RC, Value *handle,
  6667. LoadInst *ldInst, IRBuilder<> &Builder,
  6668. hlsl::OP *hlslOP, const DataLayout &DL) {
  6669. ResLoadHelper ldHelper(CI, RK, RC, handle, IntrinsicOp::MOP_Load, /*bForSubscript*/ true);
  6670. // Default sampleIdx for 2DMS textures.
  6671. if (RK == DxilResource::Kind::Texture2DMS ||
  6672. RK == DxilResource::Kind::Texture2DMSArray)
  6673. ldHelper.mipLevel = hlslOP->GetU32Const(0);
  6674. // use ldInst as retVal
  6675. ldHelper.retVal = ldInst;
  6676. TranslateLoad(ldHelper, RK, Builder, hlslOP, DL);
  6677. // delete the ld
  6678. ldInst->eraseFromParent();
  6679. return ldHelper.retVal;
  6680. }
  6681. Value *UpdateVectorElt(Value *VecVal, Value *EltVal, Value *EltIdx,
  6682. unsigned vectorSize, Instruction *InsertPt) {
  6683. IRBuilder<> Builder(InsertPt);
  6684. if (ConstantInt *CEltIdx = dyn_cast<ConstantInt>(EltIdx)) {
  6685. VecVal =
  6686. Builder.CreateInsertElement(VecVal, EltVal, CEltIdx->getLimitedValue());
  6687. } else {
  6688. BasicBlock *BB = InsertPt->getParent();
  6689. BasicBlock *EndBB = BB->splitBasicBlock(InsertPt);
  6690. TerminatorInst *TI = BB->getTerminator();
  6691. IRBuilder<> SwitchBuilder(TI);
  6692. LLVMContext &Ctx = InsertPt->getContext();
  6693. SwitchInst *Switch = SwitchBuilder.CreateSwitch(EltIdx, EndBB, vectorSize);
  6694. TI->eraseFromParent();
  6695. Function *F = EndBB->getParent();
  6696. IRBuilder<> endSwitchBuilder(EndBB->begin());
  6697. Type *Ty = VecVal->getType();
  6698. PHINode *VecPhi = endSwitchBuilder.CreatePHI(Ty, vectorSize + 1);
  6699. for (unsigned i = 0; i < vectorSize; i++) {
  6700. BasicBlock *CaseBB = BasicBlock::Create(Ctx, "case", F, EndBB);
  6701. Switch->addCase(SwitchBuilder.getInt32(i), CaseBB);
  6702. IRBuilder<> CaseBuilder(CaseBB);
  6703. Value *CaseVal = CaseBuilder.CreateInsertElement(VecVal, EltVal, i);
  6704. VecPhi->addIncoming(CaseVal, CaseBB);
  6705. CaseBuilder.CreateBr(EndBB);
  6706. }
  6707. VecPhi->addIncoming(VecVal, BB);
  6708. VecVal = VecPhi;
  6709. }
  6710. return VecVal;
  6711. }
  6712. void TranslateDefaultSubscript(CallInst *CI, HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  6713. Value *ptr = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx);
  6714. hlsl::OP *hlslOP = &helper.hlslOP;
  6715. // Resource ptr.
  6716. Value *handle = ptr;
  6717. DXIL::ResourceClass RC = pObjHelper->GetRC(handle);
  6718. DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
  6719. Type *Ty = CI->getType()->getPointerElementType();
  6720. for (auto It = CI->user_begin(); It != CI->user_end(); ) {
  6721. User *user = *(It++);
  6722. Instruction *I = cast<Instruction>(user);
  6723. IRBuilder<> Builder(I);
  6724. if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) {
  6725. TranslateTypedBufLoad(CI, RK, RC, handle, ldInst, Builder, hlslOP, helper.dataLayout);
  6726. } else if (StoreInst *stInst = dyn_cast<StoreInst>(user)) {
  6727. Value *val = stInst->getValueOperand();
  6728. TranslateStore(RK, handle, val,
  6729. CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx),
  6730. Builder, hlslOP);
  6731. // delete the st
  6732. stInst->eraseFromParent();
  6733. } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(user)) {
  6734. // Must be vector type here.
  6735. unsigned vectorSize = Ty->getVectorNumElements();
  6736. DXASSERT_NOMSG(GEP->getNumIndices() == 2);
  6737. Use *GEPIdx = GEP->idx_begin();
  6738. GEPIdx++;
  6739. Value *EltIdx = *GEPIdx;
  6740. for (auto GEPIt = GEP->user_begin(); GEPIt != GEP->user_end();) {
  6741. User *GEPUser = *(GEPIt++);
  6742. if (StoreInst *SI = dyn_cast<StoreInst>(GEPUser)) {
  6743. IRBuilder<> StBuilder(SI);
  6744. // Generate Ld.
  6745. LoadInst *tmpLd = StBuilder.CreateLoad(CI);
  6746. Value *ldVal = TranslateTypedBufLoad(CI, RK, RC, handle, tmpLd, StBuilder,
  6747. hlslOP, helper.dataLayout);
  6748. // Update vector.
  6749. ldVal = UpdateVectorElt(ldVal, SI->getValueOperand(), EltIdx,
  6750. vectorSize, SI);
  6751. // Generate St.
  6752. // Reset insert point, UpdateVectorElt may move SI to different block.
  6753. StBuilder.SetInsertPoint(SI);
  6754. TranslateStore(RK, handle, ldVal,
  6755. CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx),
  6756. StBuilder, hlslOP);
  6757. SI->eraseFromParent();
  6758. continue;
  6759. }
  6760. if (LoadInst *LI = dyn_cast<LoadInst>(GEPUser)) {
  6761. IRBuilder<> LdBuilder(LI);
  6762. // Generate tmp vector load with vector type & translate it
  6763. LoadInst *tmpLd = LdBuilder.CreateLoad(CI);
  6764. Value *ldVal = TranslateTypedBufLoad(CI, RK, RC, handle, tmpLd, LdBuilder,
  6765. hlslOP, helper.dataLayout);
  6766. // get the single element
  6767. ldVal = GenerateVecEltFromGEP(ldVal, GEP, LdBuilder,
  6768. /*bInsertLdNextToGEP*/ false);
  6769. LI->replaceAllUsesWith(ldVal);
  6770. LI->eraseFromParent();
  6771. continue;
  6772. }
  6773. if (!isa<CallInst>(GEPUser)) {
  6774. // Invalid operations.
  6775. Translated = false;
  6776. dxilutil::EmitErrorOnInstruction(GEP, "Invalid operation on typed buffer.");
  6777. return;
  6778. }
  6779. CallInst *userCall = cast<CallInst>(GEPUser);
  6780. HLOpcodeGroup group =
  6781. hlsl::GetHLOpcodeGroupByName(userCall->getCalledFunction());
  6782. if (group != HLOpcodeGroup::HLIntrinsic) {
  6783. // Invalid operations.
  6784. Translated = false;
  6785. dxilutil::EmitErrorOnInstruction(userCall, "Invalid operation on typed buffer.");
  6786. return;
  6787. }
  6788. unsigned opcode = hlsl::GetHLOpcode(userCall);
  6789. IntrinsicOp IOP = static_cast<IntrinsicOp>(opcode);
  6790. switch (IOP) {
  6791. case IntrinsicOp::IOP_InterlockedAdd:
  6792. case IntrinsicOp::IOP_InterlockedAnd:
  6793. case IntrinsicOp::IOP_InterlockedExchange:
  6794. case IntrinsicOp::IOP_InterlockedMax:
  6795. case IntrinsicOp::IOP_InterlockedMin:
  6796. case IntrinsicOp::IOP_InterlockedUMax:
  6797. case IntrinsicOp::IOP_InterlockedUMin:
  6798. case IntrinsicOp::IOP_InterlockedOr:
  6799. case IntrinsicOp::IOP_InterlockedXor:
  6800. case IntrinsicOp::IOP_InterlockedCompareStore:
  6801. case IntrinsicOp::IOP_InterlockedCompareExchange:
  6802. case IntrinsicOp::IOP_InterlockedCompareStoreFloatBitwise:
  6803. case IntrinsicOp::IOP_InterlockedCompareExchangeFloatBitwise: {
  6804. // Invalid operations.
  6805. Translated = false;
  6806. dxilutil::EmitErrorOnInstruction(
  6807. userCall, "Typed resources used in atomic operations must have a scalar element type.");
  6808. return;
  6809. } break;
  6810. default:
  6811. // Invalid operations.
  6812. Translated = false;
  6813. dxilutil::EmitErrorOnInstruction(userCall, "Invalid operation on typed buffer.");
  6814. return;
  6815. break;
  6816. }
  6817. }
  6818. GEP->eraseFromParent();
  6819. } else {
  6820. CallInst *userCall = cast<CallInst>(user);
  6821. HLOpcodeGroup group =
  6822. hlsl::GetHLOpcodeGroupByName(userCall->getCalledFunction());
  6823. unsigned opcode = hlsl::GetHLOpcode(userCall);
  6824. if (group == HLOpcodeGroup::HLIntrinsic) {
  6825. IntrinsicOp IOP = static_cast<IntrinsicOp>(opcode);
  6826. if (RC == DXIL::ResourceClass::SRV) {
  6827. // Invalid operations.
  6828. Translated = false;
  6829. switch (IOP) {
  6830. case IntrinsicOp::IOP_InterlockedAdd:
  6831. case IntrinsicOp::IOP_InterlockedAnd:
  6832. case IntrinsicOp::IOP_InterlockedExchange:
  6833. case IntrinsicOp::IOP_InterlockedMax:
  6834. case IntrinsicOp::IOP_InterlockedMin:
  6835. case IntrinsicOp::IOP_InterlockedUMax:
  6836. case IntrinsicOp::IOP_InterlockedUMin:
  6837. case IntrinsicOp::IOP_InterlockedOr:
  6838. case IntrinsicOp::IOP_InterlockedXor:
  6839. case IntrinsicOp::IOP_InterlockedCompareStore:
  6840. case IntrinsicOp::IOP_InterlockedCompareExchange:
  6841. case IntrinsicOp::IOP_InterlockedCompareStoreFloatBitwise:
  6842. case IntrinsicOp::IOP_InterlockedCompareExchangeFloatBitwise: {
  6843. dxilutil::EmitErrorOnInstruction(
  6844. userCall, "Atomic operation targets must be groupshared or UAV.");
  6845. return;
  6846. } break;
  6847. default:
  6848. dxilutil::EmitErrorOnInstruction(userCall, "Invalid operation on typed buffer.");
  6849. return;
  6850. break;
  6851. }
  6852. }
  6853. switch (IOP) {
  6854. case IntrinsicOp::IOP_InterlockedAdd: {
  6855. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedAdd);
  6856. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  6857. helper.addr, /*offset*/ nullptr);
  6858. TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::Add,
  6859. Builder, hlslOP);
  6860. } break;
  6861. case IntrinsicOp::IOP_InterlockedAnd: {
  6862. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedAnd);
  6863. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  6864. helper.addr, /*offset*/ nullptr);
  6865. TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::And,
  6866. Builder, hlslOP);
  6867. } break;
  6868. case IntrinsicOp::IOP_InterlockedExchange: {
  6869. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedExchange);
  6870. Type *opType = nullptr;
  6871. PointerType *ptrType = dyn_cast<PointerType>(
  6872. userCall->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex)->getType());
  6873. if (ptrType && ptrType->getElementType()->isFloatTy())
  6874. opType = Type::getInt32Ty(userCall->getContext());
  6875. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  6876. helper.addr, /*offset*/ nullptr, opType);
  6877. TranslateAtomicBinaryOperation(
  6878. atomHelper, DXIL::AtomicBinOpCode::Exchange, Builder, hlslOP);
  6879. } break;
  6880. case IntrinsicOp::IOP_InterlockedMax: {
  6881. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedMax);
  6882. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  6883. helper.addr, /*offset*/ nullptr);
  6884. TranslateAtomicBinaryOperation(
  6885. atomHelper, DXIL::AtomicBinOpCode::IMax, Builder, hlslOP);
  6886. } break;
  6887. case IntrinsicOp::IOP_InterlockedMin: {
  6888. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedMin);
  6889. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  6890. helper.addr, /*offset*/ nullptr);
  6891. TranslateAtomicBinaryOperation(
  6892. atomHelper, DXIL::AtomicBinOpCode::IMin, Builder, hlslOP);
  6893. } break;
  6894. case IntrinsicOp::IOP_InterlockedUMax: {
  6895. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedUMax);
  6896. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  6897. helper.addr, /*offset*/ nullptr);
  6898. TranslateAtomicBinaryOperation(
  6899. atomHelper, DXIL::AtomicBinOpCode::UMax, Builder, hlslOP);
  6900. } break;
  6901. case IntrinsicOp::IOP_InterlockedUMin: {
  6902. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedUMin);
  6903. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  6904. helper.addr, /*offset*/ nullptr);
  6905. TranslateAtomicBinaryOperation(
  6906. atomHelper, DXIL::AtomicBinOpCode::UMin, Builder, hlslOP);
  6907. } break;
  6908. case IntrinsicOp::IOP_InterlockedOr: {
  6909. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedOr);
  6910. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  6911. helper.addr, /*offset*/ nullptr);
  6912. TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::Or,
  6913. Builder, hlslOP);
  6914. } break;
  6915. case IntrinsicOp::IOP_InterlockedXor: {
  6916. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedXor);
  6917. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  6918. helper.addr, /*offset*/ nullptr);
  6919. TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::Xor,
  6920. Builder, hlslOP);
  6921. } break;
  6922. case IntrinsicOp::IOP_InterlockedCompareStore:
  6923. case IntrinsicOp::IOP_InterlockedCompareExchange: {
  6924. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedCompareExchange);
  6925. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicCompareExchange,
  6926. handle, helper.addr, /*offset*/ nullptr);
  6927. TranslateAtomicCmpXChg(atomHelper, Builder, hlslOP);
  6928. } break;
  6929. case IntrinsicOp::IOP_InterlockedCompareStoreFloatBitwise:
  6930. case IntrinsicOp::IOP_InterlockedCompareExchangeFloatBitwise: {
  6931. Type *i32Ty = Type::getInt32Ty(userCall->getContext());
  6932. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedCompareExchange);
  6933. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicCompareExchange,
  6934. handle, helper.addr, /*offset*/ nullptr, i32Ty);
  6935. TranslateAtomicCmpXChg(atomHelper, Builder, hlslOP);
  6936. } break;
  6937. default:
  6938. DXASSERT(0, "invalid opcode");
  6939. break;
  6940. }
  6941. } else {
  6942. DXASSERT(0, "invalid group");
  6943. }
  6944. userCall->eraseFromParent();
  6945. }
  6946. }
  6947. }
  6948. void TranslateHLSubscript(CallInst *CI, HLSubscriptOpcode opcode,
  6949. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  6950. if (CI->user_empty()) {
  6951. Translated = true;
  6952. return;
  6953. }
  6954. hlsl::OP *hlslOP = &helper.hlslOP;
  6955. Value *ptr = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx);
  6956. if (opcode == HLSubscriptOpcode::CBufferSubscript) {
  6957. HLModule::MergeGepUse(CI);
  6958. // Resource ptr.
  6959. Value *handle = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx);
  6960. if (helper.bLegacyCBufferLoad)
  6961. TranslateCBOperationsLegacy(handle, CI, hlslOP, helper.dxilTypeSys,
  6962. helper.dataLayout, pObjHelper);
  6963. else {
  6964. TranslateCBOperations(handle, CI, /*offset*/ hlslOP->GetU32Const(0),
  6965. hlslOP, helper.dxilTypeSys,
  6966. CI->getModule()->getDataLayout(), pObjHelper);
  6967. }
  6968. Translated = true;
  6969. return;
  6970. } else if (opcode == HLSubscriptOpcode::DoubleSubscript) {
  6971. // Resource ptr.
  6972. Value *handle = ptr;
  6973. DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
  6974. Value *coord = CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx);
  6975. Value *mipLevel =
  6976. CI->getArgOperand(HLOperandIndex::kDoubleSubscriptMipLevelOpIdx);
  6977. auto U = CI->user_begin();
  6978. DXASSERT(CI->hasOneUse(), "subscript should only has one use");
  6979. // TODO: support store.
  6980. Instruction *ldInst = cast<Instruction>(*U);
  6981. ResLoadHelper ldHelper(ldInst, handle, coord, mipLevel);
  6982. IRBuilder<> Builder(CI);
  6983. TranslateLoad(ldHelper, RK, Builder, hlslOP, helper.dataLayout);
  6984. ldInst->eraseFromParent();
  6985. Translated = true;
  6986. return;
  6987. } else {
  6988. Type *HandleTy = hlslOP->GetHandleType();
  6989. if (ptr->getType() == HandleTy) {
  6990. // Resource ptr.
  6991. Value *handle = ptr;
  6992. DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
  6993. if (RK == DxilResource::Kind::Invalid) {
  6994. Translated = false;
  6995. return;
  6996. }
  6997. Translated = true;
  6998. Type *ObjTy = pObjHelper->GetResourceType(handle);
  6999. Type *RetTy = ObjTy->getStructElementType(0);
  7000. if (DXIL::IsStructuredBuffer(RK)) {
  7001. TranslateStructBufSubscript(CI, handle, /*status*/ nullptr, hlslOP, RK,
  7002. helper.dataLayout);
  7003. } else if (RetTy->isAggregateType() &&
  7004. RK == DxilResource::Kind::TypedBuffer) {
  7005. TranslateStructBufSubscript(CI, handle, /*status*/ nullptr, hlslOP, RK,
  7006. helper.dataLayout);
  7007. // Clear offset for typed buf.
  7008. for (auto User = handle->user_begin(); User != handle->user_end(); ) {
  7009. CallInst *CI = cast<CallInst>(*(User++));
  7010. // Skip not lowered HL functions.
  7011. if (hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction()) != HLOpcodeGroup::NotHL)
  7012. continue;
  7013. switch (hlslOP->GetDxilOpFuncCallInst(CI)) {
  7014. case DXIL::OpCode::BufferLoad: {
  7015. CI->setArgOperand(DXIL::OperandIndex::kBufferLoadCoord1OpIdx,
  7016. UndefValue::get(helper.i32Ty));
  7017. } break;
  7018. case DXIL::OpCode::BufferStore: {
  7019. CI->setArgOperand(DXIL::OperandIndex::kBufferStoreCoord1OpIdx,
  7020. UndefValue::get(helper.i32Ty));
  7021. } break;
  7022. case DXIL::OpCode::AtomicBinOp: {
  7023. CI->setArgOperand(DXIL::OperandIndex::kAtomicBinOpCoord1OpIdx,
  7024. UndefValue::get(helper.i32Ty));
  7025. } break;
  7026. case DXIL::OpCode::AtomicCompareExchange: {
  7027. CI->setArgOperand(DXIL::OperandIndex::kAtomicCmpExchangeCoord1OpIdx,
  7028. UndefValue::get(helper.i32Ty));
  7029. } break;
  7030. case DXIL::OpCode::RawBufferLoad: {
  7031. // Structured buffer inside a typed buffer must be converted to typed buffer load.
  7032. // Typed buffer load is equivalent to raw buffer load, except there is no mask.
  7033. StructType *STy = cast<StructType>(CI->getFunctionType()->getReturnType());
  7034. Type *ETy = STy->getElementType(0);
  7035. SmallVector<Value *, 4> Args;
  7036. Args.emplace_back(hlslOP->GetI32Const((unsigned)DXIL::OpCode::BufferLoad));
  7037. Args.emplace_back(CI->getArgOperand(1)); // handle
  7038. Args.emplace_back(CI->getArgOperand(2)); // index
  7039. Args.emplace_back(UndefValue::get(helper.i32Ty)); // offset
  7040. IRBuilder<> builder(CI);
  7041. Function *newFunction = hlslOP->GetOpFunc(DXIL::OpCode::BufferLoad, ETy);
  7042. CallInst *newCall = builder.CreateCall(newFunction, Args);
  7043. CI->replaceAllUsesWith(newCall);
  7044. CI->eraseFromParent();
  7045. } break;
  7046. default:
  7047. DXASSERT(0, "Invalid operation on resource handle");
  7048. break;
  7049. }
  7050. }
  7051. } else {
  7052. TranslateDefaultSubscript(CI, helper, pObjHelper, Translated);
  7053. }
  7054. return;
  7055. }
  7056. }
  7057. Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx);
  7058. if (IsLocalVariablePtr(basePtr) || IsSharedMemPtr(basePtr)) {
  7059. // Translate matrix into vector of array for share memory or local
  7060. // variable should be done in HLMatrixLowerPass
  7061. DXASSERT_NOMSG(0);
  7062. Translated = true;
  7063. return;
  7064. }
  7065. // Other case should be take care in TranslateStructBufSubscript or
  7066. // TranslateCBOperations.
  7067. Translated = false;
  7068. return;
  7069. }
  7070. }
  7071. void TranslateSubscriptOperation(Function *F, HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper) {
  7072. for (auto U = F->user_begin(); U != F->user_end();) {
  7073. Value *user = *(U++);
  7074. if (!isa<Instruction>(user))
  7075. continue;
  7076. // must be call inst
  7077. CallInst *CI = cast<CallInst>(user);
  7078. unsigned opcode = GetHLOpcode(CI);
  7079. bool Translated = true;
  7080. TranslateHLSubscript(
  7081. CI, static_cast<HLSubscriptOpcode>(opcode), helper, pObjHelper, Translated);
  7082. if (Translated) {
  7083. // delete the call
  7084. DXASSERT(CI->use_empty(),
  7085. "else TranslateHLSubscript didn't replace/erase uses");
  7086. CI->eraseFromParent();
  7087. }
  7088. }
  7089. }
  7090. // Create BitCast if ptr, otherwise, create alloca of new type, write to bitcast of alloca, and return load from alloca
  7091. // If bOrigAllocaTy is true: create alloca of old type instead, write to alloca, and return load from bitcast of alloca
  7092. static Instruction *BitCastValueOrPtr(Value* V, Instruction *Insert, Type *Ty, bool bOrigAllocaTy = false, const Twine &Name = "") {
  7093. IRBuilder<> Builder(Insert);
  7094. if (Ty->isPointerTy()) {
  7095. // If pointer, we can bitcast directly
  7096. return cast<Instruction>(Builder.CreateBitCast(V, Ty, Name));
  7097. } else {
  7098. // If value, we have to alloca, store to bitcast ptr, and load
  7099. IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Insert));
  7100. Type *allocaTy = bOrigAllocaTy ? V->getType() : Ty;
  7101. Type *otherTy = bOrigAllocaTy ? Ty : V->getType();
  7102. Instruction *allocaInst = AllocaBuilder.CreateAlloca(allocaTy);
  7103. Instruction *bitCast = cast<Instruction>(Builder.CreateBitCast(allocaInst, otherTy->getPointerTo()));
  7104. Builder.CreateStore(V, bOrigAllocaTy ? allocaInst : bitCast);
  7105. return Builder.CreateLoad(bOrigAllocaTy ? bitCast : allocaInst, Name);
  7106. }
  7107. }
  7108. static Instruction *CreateTransposeShuffle(IRBuilder<> &Builder, Value *vecVal, unsigned toRows, unsigned toCols) {
  7109. SmallVector<int, 16> castMask(toCols * toRows);
  7110. unsigned idx = 0;
  7111. for (unsigned r = 0; r < toRows; r++)
  7112. for (unsigned c = 0; c < toCols; c++)
  7113. castMask[idx++] = c * toRows + r;
  7114. return cast<Instruction>(
  7115. Builder.CreateShuffleVector(vecVal, vecVal, castMask));
  7116. }
  7117. void TranslateHLBuiltinOperation(Function *F, HLOperationLowerHelper &helper,
  7118. hlsl::HLOpcodeGroup group, HLObjectOperationLowerHelper *pObjHelper) {
  7119. if (group == HLOpcodeGroup::HLIntrinsic) {
  7120. // map to dxil operations
  7121. for (auto U = F->user_begin(); U != F->user_end();) {
  7122. Value *User = *(U++);
  7123. if (!isa<Instruction>(User))
  7124. continue;
  7125. // must be call inst
  7126. CallInst *CI = cast<CallInst>(User);
  7127. // Keep the instruction to lower by other function.
  7128. bool Translated = true;
  7129. TranslateBuiltinIntrinsic(CI, helper, pObjHelper, Translated);
  7130. if (Translated) {
  7131. // delete the call
  7132. DXASSERT(CI->use_empty(),
  7133. "else TranslateBuiltinIntrinsic didn't replace/erase uses");
  7134. CI->eraseFromParent();
  7135. }
  7136. }
  7137. } else {
  7138. if (group == HLOpcodeGroup::HLMatLoadStore) {
  7139. // Both ld/st use arg1 for the pointer.
  7140. Type *PtrTy =
  7141. F->getFunctionType()->getParamType(HLOperandIndex::kMatLoadPtrOpIdx);
  7142. if (PtrTy->getPointerAddressSpace() == DXIL::kTGSMAddrSpace) {
  7143. // Translate matrix into vector of array for shared memory
  7144. // variable should be done in HLMatrixLowerPass.
  7145. if (!F->user_empty())
  7146. F->getContext().emitError("Fail to lower matrix load/store.");
  7147. } else if (PtrTy->getPointerAddressSpace() == DXIL::kDefaultAddrSpace) {
  7148. // Default address space may be function argument in lib target
  7149. if (!F->user_empty()) {
  7150. for (auto U = F->user_begin(); U != F->user_end();) {
  7151. Value *User = *(U++);
  7152. if (!isa<Instruction>(User))
  7153. continue;
  7154. // must be call inst
  7155. CallInst *CI = cast<CallInst>(User);
  7156. IRBuilder<> Builder(CI);
  7157. HLMatLoadStoreOpcode opcode = static_cast<HLMatLoadStoreOpcode>(hlsl::GetHLOpcode(CI));
  7158. switch (opcode) {
  7159. case HLMatLoadStoreOpcode::ColMatStore:
  7160. case HLMatLoadStoreOpcode::RowMatStore: {
  7161. Value *vecVal = CI->getArgOperand(HLOperandIndex::kMatStoreValOpIdx);
  7162. Value *matPtr = CI->getArgOperand(HLOperandIndex::kMatStoreDstPtrOpIdx);
  7163. Value *castPtr = Builder.CreateBitCast(matPtr, vecVal->getType()->getPointerTo());
  7164. Builder.CreateStore(vecVal, castPtr);
  7165. CI->eraseFromParent();
  7166. } break;
  7167. case HLMatLoadStoreOpcode::ColMatLoad:
  7168. case HLMatLoadStoreOpcode::RowMatLoad: {
  7169. Value *matPtr = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx);
  7170. Value *castPtr = Builder.CreateBitCast(matPtr, CI->getType()->getPointerTo());
  7171. Value *vecVal = Builder.CreateLoad(castPtr);
  7172. CI->replaceAllUsesWith(vecVal);
  7173. CI->eraseFromParent();
  7174. } break;
  7175. }
  7176. }
  7177. }
  7178. }
  7179. } else if (group == HLOpcodeGroup::HLCast) {
  7180. // HLCast may be used on matrix value function argument in lib target
  7181. if (!F->user_empty()) {
  7182. for (auto U = F->user_begin(); U != F->user_end();) {
  7183. Value *User = *(U++);
  7184. if (!isa<Instruction>(User))
  7185. continue;
  7186. // must be call inst
  7187. CallInst *CI = cast<CallInst>(User);
  7188. IRBuilder<> Builder(CI);
  7189. HLCastOpcode opcode = static_cast<HLCastOpcode>(hlsl::GetHLOpcode(CI));
  7190. bool bTranspose = false;
  7191. bool bColDest = false;
  7192. switch (opcode) {
  7193. case HLCastOpcode::RowMatrixToColMatrix:
  7194. bColDest = true;
  7195. case HLCastOpcode::ColMatrixToRowMatrix:
  7196. bTranspose = true;
  7197. case HLCastOpcode::ColMatrixToVecCast:
  7198. case HLCastOpcode::RowMatrixToVecCast: {
  7199. Value *matVal = CI->getArgOperand(HLOperandIndex::kInitFirstArgOpIdx);
  7200. Value *vecVal = BitCastValueOrPtr(matVal, CI, CI->getType(),
  7201. /*bOrigAllocaTy*/false,
  7202. matVal->getName());
  7203. if (bTranspose) {
  7204. HLMatrixType MatTy = HLMatrixType::cast(matVal->getType());
  7205. unsigned row = MatTy.getNumRows();
  7206. unsigned col = MatTy.getNumColumns();
  7207. if (bColDest) std::swap(row, col);
  7208. vecVal = CreateTransposeShuffle(Builder, vecVal, row, col);
  7209. }
  7210. CI->replaceAllUsesWith(vecVal);
  7211. CI->eraseFromParent();
  7212. } break;
  7213. }
  7214. }
  7215. }
  7216. } else if (group == HLOpcodeGroup::HLSubscript) {
  7217. TranslateSubscriptOperation(F, helper, pObjHelper);
  7218. }
  7219. // map to math function or llvm ir
  7220. }
  7221. }
  7222. typedef std::unordered_map<llvm::Instruction *, llvm::Value *> HandleMap;
  7223. static void TranslateHLExtension(Function *F,
  7224. HLSLExtensionsCodegenHelper *helper,
  7225. OP& hlslOp,
  7226. HLObjectOperationLowerHelper &objHelper) {
  7227. // Find all calls to the function F.
  7228. // Store the calls in a vector for now to be replaced the loop below.
  7229. // We use a two step "find then replace" to avoid removing uses while
  7230. // iterating.
  7231. SmallVector<CallInst *, 8> CallsToReplace;
  7232. for (User *U : F->users()) {
  7233. if (CallInst *CI = dyn_cast<CallInst>(U)) {
  7234. CallsToReplace.push_back(CI);
  7235. }
  7236. }
  7237. // Get the lowering strategy to use for this intrinsic.
  7238. llvm::StringRef LowerStrategy = GetHLLowerStrategy(F);
  7239. HLObjectExtensionLowerHelper extObjHelper(objHelper);
  7240. ExtensionLowering lower(LowerStrategy, helper, hlslOp, extObjHelper);
  7241. // Replace all calls that were successfully translated.
  7242. for (CallInst *CI : CallsToReplace) {
  7243. Value *Result = lower.Translate(CI);
  7244. if (Result && Result != CI) {
  7245. CI->replaceAllUsesWith(Result);
  7246. CI->eraseFromParent();
  7247. }
  7248. }
  7249. }
  7250. namespace hlsl {
  7251. void TranslateBuiltinOperations(
  7252. HLModule &HLM, HLSLExtensionsCodegenHelper *extCodegenHelper,
  7253. std::unordered_set<LoadInst *> &UpdateCounterSet) {
  7254. HLOperationLowerHelper helper(HLM);
  7255. HLObjectOperationLowerHelper objHelper = {HLM, UpdateCounterSet};
  7256. Module *M = HLM.GetModule();
  7257. SmallVector<Function *, 4> NonUniformResourceIndexIntrinsics;
  7258. // generate dxil operation
  7259. for (iplist<Function>::iterator F : M->getFunctionList()) {
  7260. if (F->user_empty())
  7261. continue;
  7262. if (!F->isDeclaration()) {
  7263. continue;
  7264. }
  7265. hlsl::HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(F);
  7266. if (group == HLOpcodeGroup::NotHL) {
  7267. // Nothing to do.
  7268. continue;
  7269. }
  7270. if (group == HLOpcodeGroup::HLExtIntrinsic) {
  7271. TranslateHLExtension(F, extCodegenHelper, helper.hlslOP, objHelper);
  7272. continue;
  7273. }
  7274. if (group == HLOpcodeGroup::HLIntrinsic) {
  7275. CallInst *CI = cast<CallInst>(*F->user_begin()); // must be call inst
  7276. unsigned opcode = hlsl::GetHLOpcode(CI);
  7277. if (opcode == (unsigned)IntrinsicOp::IOP_NonUniformResourceIndex) {
  7278. NonUniformResourceIndexIntrinsics.push_back(F);
  7279. continue;
  7280. }
  7281. }
  7282. TranslateHLBuiltinOperation(F, helper, group, &objHelper);
  7283. }
  7284. // Translate last so value placed in NonUniformSet is still valid.
  7285. if (!NonUniformResourceIndexIntrinsics.empty()) {
  7286. for (auto F : NonUniformResourceIndexIntrinsics) {
  7287. TranslateHLBuiltinOperation(F, helper, HLOpcodeGroup::HLIntrinsic, &objHelper);
  7288. }
  7289. }
  7290. }
  7291. }