HLOperationLower.cpp 291 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132
  1. ///////////////////////////////////////////////////////////////////////////////
  2. // //
  3. // HLOperationLower.cpp //
  4. // Copyright (C) Microsoft Corporation. All rights reserved. //
  5. // This file is distributed under the University of Illinois Open Source //
  6. // License. See LICENSE.TXT for details. //
  7. // //
  8. // Lower functions to lower HL operations to DXIL operations. //
  9. // //
  10. ///////////////////////////////////////////////////////////////////////////////
  11. #define _USE_MATH_DEFINES
  12. #include <cmath>
  13. #include <unordered_set>
  14. #include "dxc/DXIL/DxilModule.h"
  15. #include "dxc/DXIL/DxilOperations.h"
  16. #include "dxc/HLSL/HLMatrixLowerHelper.h"
  17. #include "dxc/HLSL/HLMatrixType.h"
  18. #include "dxc/HLSL/HLModule.h"
  19. #include "dxc/DXIL/DxilUtil.h"
  20. #include "dxc/HLSL/HLOperationLower.h"
  21. #include "dxc/HLSL/HLOperationLowerExtension.h"
  22. #include "dxc/HLSL/HLOperations.h"
  23. #include "dxc/HlslIntrinsicOp.h"
  24. #include "llvm/IR/GetElementPtrTypeIterator.h"
  25. #include "llvm/IR/IRBuilder.h"
  26. #include "llvm/IR/Instructions.h"
  27. #include "llvm/IR/Module.h"
  28. #include "llvm/ADT/APSInt.h"
  29. using namespace llvm;
  30. using namespace hlsl;
  31. struct HLOperationLowerHelper {
  32. OP &hlslOP;
  33. Type *voidTy;
  34. Type *f32Ty;
  35. Type *i32Ty;
  36. llvm::Type *i1Ty;
  37. Type *i8Ty;
  38. DxilTypeSystem &dxilTypeSys;
  39. DxilFunctionProps *functionProps;
  40. bool bLegacyCBufferLoad;
  41. DataLayout dataLayout;
  42. HLOperationLowerHelper(HLModule &HLM);
  43. };
  44. HLOperationLowerHelper::HLOperationLowerHelper(HLModule &HLM)
  45. : hlslOP(*HLM.GetOP()), dxilTypeSys(HLM.GetTypeSystem()),
  46. dataLayout(DataLayout(HLM.GetHLOptions().bUseMinPrecision
  47. ? hlsl::DXIL::kLegacyLayoutString
  48. : hlsl::DXIL::kNewLayoutString)) {
  49. llvm::LLVMContext &Ctx = HLM.GetCtx();
  50. voidTy = Type::getVoidTy(Ctx);
  51. f32Ty = Type::getFloatTy(Ctx);
  52. i32Ty = Type::getInt32Ty(Ctx);
  53. i1Ty = Type::getInt1Ty(Ctx);
  54. i8Ty = Type::getInt8Ty(Ctx);
  55. Function *EntryFunc = HLM.GetEntryFunction();
  56. functionProps = nullptr;
  57. if (HLM.HasDxilFunctionProps(EntryFunc))
  58. functionProps = &HLM.GetDxilFunctionProps(EntryFunc);
  59. bLegacyCBufferLoad = HLM.GetHLOptions().bLegacyCBufferLoad;
  60. }
  61. struct HLObjectOperationLowerHelper {
  62. private:
  63. // For object intrinsics.
  64. HLModule &HLM;
  65. struct ResAttribute {
  66. DXIL::ResourceClass RC;
  67. DXIL::ResourceKind RK;
  68. Type *ResourceType;
  69. };
  70. std::unordered_map<Value *, ResAttribute> HandleMetaMap;
  71. std::unordered_set<LoadInst *> &UpdateCounterSet;
  72. // Map from pointer of cbuffer to pointer of resource.
  73. // For cbuffer like this:
  74. // cbuffer A {
  75. // Texture2D T;
  76. // };
  77. // A global resource Texture2D T2 will be created for Texture2D T.
  78. // CBPtrToResourceMap[T] will return T2.
  79. std::unordered_map<Value *, Value *> CBPtrToResourceMap;
  80. public:
  81. HLObjectOperationLowerHelper(HLModule &HLM,
  82. std::unordered_set<LoadInst *> &UpdateCounter)
  83. : HLM(HLM), UpdateCounterSet(UpdateCounter) {}
  84. DXIL::ResourceClass GetRC(Value *Handle) {
  85. ResAttribute &Res = FindCreateHandleResourceBase(Handle);
  86. return Res.RC;
  87. }
  88. DXIL::ResourceKind GetRK(Value *Handle) {
  89. ResAttribute &Res = FindCreateHandleResourceBase(Handle);
  90. return Res.RK;
  91. }
  92. Type *GetResourceType(Value *Handle) {
  93. ResAttribute &Res = FindCreateHandleResourceBase(Handle);
  94. return Res.ResourceType;
  95. }
  96. void MarkHasCounter(Type *Ty, Value *handle) {
  97. DXIL::ResourceClass RC = GetRC(handle);
  98. DXASSERT_LOCALVAR(RC, RC == DXIL::ResourceClass::UAV,
  99. "must UAV for counter");
  100. std::unordered_set<Value *> resSet;
  101. MarkHasCounterOnCreateHandle(handle, resSet);
  102. }
  103. Value *GetOrCreateResourceForCbPtr(GetElementPtrInst *CbPtr,
  104. GlobalVariable *CbGV, MDNode *MD) {
  105. // Change array idx to 0 to make sure all array ptr share same key.
  106. Value *Key = UniformCbPtr(CbPtr, CbGV);
  107. if (CBPtrToResourceMap.count(Key))
  108. return CBPtrToResourceMap[Key];
  109. Value *Resource = CreateResourceForCbPtr(CbPtr, CbGV, MD);
  110. CBPtrToResourceMap[Key] = Resource;
  111. return Resource;
  112. }
  113. Value *LowerCbResourcePtr(GetElementPtrInst *CbPtr, Value *ResPtr) {
  114. // Simple case.
  115. if (ResPtr->getType() == CbPtr->getType())
  116. return ResPtr;
  117. // Array case.
  118. DXASSERT_NOMSG(ResPtr->getType()->getPointerElementType()->isArrayTy());
  119. IRBuilder<> Builder(CbPtr);
  120. gep_type_iterator GEPIt = gep_type_begin(CbPtr), E = gep_type_end(CbPtr);
  121. Value *arrayIdx = GEPIt.getOperand();
  122. // Only calc array idx and size.
  123. // Ignore struct type part.
  124. for (; GEPIt != E; ++GEPIt) {
  125. if (GEPIt->isArrayTy()) {
  126. arrayIdx = Builder.CreateMul(
  127. arrayIdx, Builder.getInt32(GEPIt->getArrayNumElements()));
  128. arrayIdx = Builder.CreateAdd(arrayIdx, GEPIt.getOperand());
  129. }
  130. }
  131. return Builder.CreateGEP(ResPtr, {Builder.getInt32(0), arrayIdx});
  132. }
  133. private:
  134. ResAttribute &FindCreateHandleResourceBase(Value *Handle) {
  135. if (HandleMetaMap.count(Handle))
  136. return HandleMetaMap[Handle];
  137. // Add invalid first to avoid dead loop.
  138. HandleMetaMap[Handle] = {DXIL::ResourceClass::Invalid,
  139. DXIL::ResourceKind::Invalid,
  140. StructType::get(Type::getVoidTy(HLM.GetCtx()), nullptr)};
  141. if (Argument *Arg = dyn_cast<Argument>(Handle)) {
  142. MDNode *MD = HLM.GetDxilResourceAttrib(Arg);
  143. if (!MD) {
  144. Handle->getContext().emitError("cannot map resource to handle");
  145. return HandleMetaMap[Handle];
  146. }
  147. DxilResourceBase Res(DxilResource::Class::Invalid);
  148. HLM.LoadDxilResourceBaseFromMDNode(MD, Res);
  149. ResAttribute Attrib = {Res.GetClass(), Res.GetKind(),
  150. Res.GetGlobalSymbol()->getType()};
  151. HandleMetaMap[Handle] = Attrib;
  152. return HandleMetaMap[Handle];
  153. }
  154. if (LoadInst *LI = dyn_cast<LoadInst>(Handle)) {
  155. Value *Ptr = LI->getPointerOperand();
  156. for (User *U : Ptr->users()) {
  157. if (CallInst *CI = dyn_cast<CallInst>(U)) {
  158. DxilFunctionAnnotation *FnAnnot = HLM.GetFunctionAnnotation(CI->getCalledFunction());
  159. if (FnAnnot) {
  160. for (auto &arg : CI->arg_operands()) {
  161. if (arg == Ptr) {
  162. unsigned argNo = arg.getOperandNo();
  163. DxilParameterAnnotation &ParamAnnot = FnAnnot->GetParameterAnnotation(argNo);
  164. MDNode *MD = ParamAnnot.GetResourceAttribute();
  165. if (!MD) {
  166. Handle->getContext().emitError(
  167. "cannot map resource to handle");
  168. return HandleMetaMap[Handle];
  169. }
  170. DxilResourceBase Res(DxilResource::Class::Invalid);
  171. HLM.LoadDxilResourceBaseFromMDNode(MD, Res);
  172. ResAttribute Attrib = {Res.GetClass(), Res.GetKind(),
  173. Res.GetGlobalSymbol()->getType()};
  174. HandleMetaMap[Handle] = Attrib;
  175. return HandleMetaMap[Handle];
  176. }
  177. }
  178. }
  179. }
  180. if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
  181. Value *V = SI->getValueOperand();
  182. ResAttribute Attrib = FindCreateHandleResourceBase(V);
  183. HandleMetaMap[Handle] = Attrib;
  184. return HandleMetaMap[Handle];
  185. }
  186. }
  187. // Cannot find.
  188. Handle->getContext().emitError("cannot map resource to handle");
  189. return HandleMetaMap[Handle];
  190. }
  191. if (CallInst *CI = dyn_cast<CallInst>(Handle)) {
  192. MDNode *MD = HLM.GetDxilResourceAttrib(CI->getCalledFunction());
  193. if (!MD) {
  194. Handle->getContext().emitError("cannot map resource to handle");
  195. return HandleMetaMap[Handle];
  196. }
  197. DxilResourceBase Res(DxilResource::Class::Invalid);
  198. HLM.LoadDxilResourceBaseFromMDNode(MD, Res);
  199. ResAttribute Attrib = {Res.GetClass(), Res.GetKind(),
  200. Res.GetGlobalSymbol()->getType()};
  201. HandleMetaMap[Handle] = Attrib;
  202. return HandleMetaMap[Handle];
  203. }
  204. if (SelectInst *Sel = dyn_cast<SelectInst>(Handle)) {
  205. ResAttribute &ResT = FindCreateHandleResourceBase(Sel->getTrueValue());
  206. // Use MDT here, ResourceClass, ResourceID match is done at
  207. // DxilGenerationPass::AddCreateHandleForPhiNodeAndSelect.
  208. HandleMetaMap[Handle] = ResT;
  209. FindCreateHandleResourceBase(Sel->getFalseValue());
  210. return ResT;
  211. }
  212. if (PHINode *Phi = dyn_cast<PHINode>(Handle)) {
  213. if (Phi->getNumOperands() == 0) {
  214. Handle->getContext().emitError("cannot map resource to handle");
  215. return HandleMetaMap[Handle];
  216. }
  217. ResAttribute &Res0 = FindCreateHandleResourceBase(Phi->getOperand(0));
  218. // Use Res0 here, ResourceClass, ResourceID match is done at
  219. // DxilGenerationPass::AddCreateHandleForPhiNodeAndSelect.
  220. HandleMetaMap[Handle] = Res0;
  221. for (unsigned i = 1; i < Phi->getNumOperands(); i++) {
  222. FindCreateHandleResourceBase(Phi->getOperand(i));
  223. }
  224. return Res0;
  225. }
  226. Handle->getContext().emitError("cannot map resource to handle");
  227. return HandleMetaMap[Handle];
  228. }
  229. CallInst *FindCreateHandle(Value *handle,
  230. std::unordered_set<Value *> &resSet) {
  231. // Already checked.
  232. if (resSet.count(handle))
  233. return nullptr;
  234. resSet.insert(handle);
  235. if (CallInst *CI = dyn_cast<CallInst>(handle))
  236. return CI;
  237. if (SelectInst *Sel = dyn_cast<SelectInst>(handle)) {
  238. if (CallInst *CI = FindCreateHandle(Sel->getTrueValue(), resSet))
  239. return CI;
  240. if (CallInst *CI = FindCreateHandle(Sel->getFalseValue(), resSet))
  241. return CI;
  242. return nullptr;
  243. }
  244. if (PHINode *Phi = dyn_cast<PHINode>(handle)) {
  245. for (unsigned i = 0; i < Phi->getNumOperands(); i++) {
  246. if (CallInst *CI = FindCreateHandle(Phi->getOperand(i), resSet))
  247. return CI;
  248. }
  249. return nullptr;
  250. }
  251. return nullptr;
  252. }
  253. void MarkHasCounterOnCreateHandle(Value *handle,
  254. std::unordered_set<Value *> &resSet) {
  255. // Already checked.
  256. if (resSet.count(handle))
  257. return;
  258. resSet.insert(handle);
  259. if (CallInst *CI = dyn_cast<CallInst>(handle)) {
  260. Value *Res =
  261. CI->getArgOperand(HLOperandIndex::kCreateHandleResourceOpIdx);
  262. LoadInst *LdRes = dyn_cast<LoadInst>(Res);
  263. if (!LdRes) {
  264. CI->getContext().emitError(CI, "cannot map resource to handle");
  265. return;
  266. }
  267. UpdateCounterSet.insert(LdRes);
  268. return;
  269. }
  270. if (SelectInst *Sel = dyn_cast<SelectInst>(handle)) {
  271. MarkHasCounterOnCreateHandle(Sel->getTrueValue(), resSet);
  272. MarkHasCounterOnCreateHandle(Sel->getFalseValue(), resSet);
  273. }
  274. if (PHINode *Phi = dyn_cast<PHINode>(handle)) {
  275. for (unsigned i = 0; i < Phi->getNumOperands(); i++) {
  276. MarkHasCounterOnCreateHandle(Phi->getOperand(i), resSet);
  277. }
  278. }
  279. }
  280. Value *UniformCbPtr(GetElementPtrInst *CbPtr, GlobalVariable *CbGV) {
  281. gep_type_iterator GEPIt = gep_type_begin(CbPtr), E = gep_type_end(CbPtr);
  282. std::vector<Value *> idxList(CbPtr->idx_begin(), CbPtr->idx_end());
  283. unsigned i = 0;
  284. IRBuilder<> Builder(HLM.GetCtx());
  285. Value *zero = Builder.getInt32(0);
  286. for (; GEPIt != E; ++GEPIt, ++i) {
  287. if (GEPIt->isArrayTy()) {
  288. // Change array idx to 0 to make sure all array ptr share same key.
  289. idxList[i] = zero;
  290. }
  291. }
  292. Value *Key = Builder.CreateInBoundsGEP(CbGV, idxList);
  293. return Key;
  294. }
  295. Value *CreateResourceForCbPtr(GetElementPtrInst *CbPtr, GlobalVariable *CbGV,
  296. MDNode *MD) {
  297. Type *CbTy = CbPtr->getPointerOperandType();
  298. DXASSERT_LOCALVAR(CbTy, CbTy == CbGV->getType(), "else arg not point to var");
  299. gep_type_iterator GEPIt = gep_type_begin(CbPtr), E = gep_type_end(CbPtr);
  300. unsigned i = 0;
  301. IRBuilder<> Builder(HLM.GetCtx());
  302. unsigned arraySize = 1;
  303. DxilTypeSystem &typeSys = HLM.GetTypeSystem();
  304. std::string Name;
  305. for (; GEPIt != E; ++GEPIt, ++i) {
  306. if (GEPIt->isArrayTy()) {
  307. arraySize *= GEPIt->getArrayNumElements();
  308. } else if (GEPIt->isStructTy()) {
  309. DxilStructAnnotation *typeAnnot =
  310. typeSys.GetStructAnnotation(cast<StructType>(*GEPIt));
  311. DXASSERT_NOMSG(typeAnnot);
  312. unsigned idx = cast<ConstantInt>(GEPIt.getOperand())->getLimitedValue();
  313. DXASSERT_NOMSG(typeAnnot->GetNumFields() > idx);
  314. DxilFieldAnnotation &fieldAnnot = typeAnnot->GetFieldAnnotation(idx);
  315. if (!Name.empty())
  316. Name += ".";
  317. Name += fieldAnnot.GetFieldName();
  318. }
  319. }
  320. Type *Ty = CbPtr->getResultElementType();
  321. if (arraySize > 1) {
  322. Ty = ArrayType::get(Ty, arraySize);
  323. }
  324. return CreateResourceGV(Ty, Name, MD);
  325. }
  326. Value *CreateResourceGV(Type *Ty, StringRef Name, MDNode *MD) {
  327. Module &M = *HLM.GetModule();
  328. Constant *GV = M.getOrInsertGlobal(Name, Ty);
  329. // Create resource and set GV as globalSym.
  330. HLM.AddResourceWithGlobalVariableAndMDNode(GV, MD);
  331. return GV;
  332. }
  333. };
  334. using IntrinsicLowerFuncTy = Value *(CallInst *CI, IntrinsicOp IOP,
  335. DXIL::OpCode opcode,
  336. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated);
  337. struct IntrinsicLower {
  338. // Intrinsic opcode.
  339. IntrinsicOp IntriOpcode;
  340. // Lower function.
  341. IntrinsicLowerFuncTy &LowerFunc;
  342. // DXIL opcode if can direct map.
  343. DXIL::OpCode DxilOpcode;
  344. };
  345. // IOP intrinsics.
  346. namespace {
  347. Value *TrivialDxilOperation(Function *dxilFunc, OP::OpCode opcode, ArrayRef<Value *> refArgs,
  348. Type *Ty, Type *RetTy, OP *hlslOP,
  349. IRBuilder<> &Builder) {
  350. unsigned argNum = refArgs.size();
  351. std::vector<Value *> args = refArgs;
  352. if (Ty->isVectorTy()) {
  353. Value *retVal = llvm::UndefValue::get(RetTy);
  354. unsigned vecSize = Ty->getVectorNumElements();
  355. for (unsigned i = 0; i < vecSize; i++) {
  356. // Update vector args, skip known opcode arg.
  357. for (unsigned argIdx = HLOperandIndex::kUnaryOpSrc0Idx; argIdx < argNum;
  358. argIdx++) {
  359. if (refArgs[argIdx]->getType()->isVectorTy()) {
  360. Value *arg = refArgs[argIdx];
  361. args[argIdx] = Builder.CreateExtractElement(arg, i);
  362. }
  363. }
  364. Value *EltOP =
  365. Builder.CreateCall(dxilFunc, args, hlslOP->GetOpCodeName(opcode));
  366. retVal = Builder.CreateInsertElement(retVal, EltOP, i);
  367. }
  368. return retVal;
  369. } else {
  370. if (!RetTy->isVoidTy()) {
  371. Value *retVal =
  372. Builder.CreateCall(dxilFunc, args, hlslOP->GetOpCodeName(opcode));
  373. return retVal;
  374. } else {
  375. // Cannot add name to void.
  376. return Builder.CreateCall(dxilFunc, args);
  377. }
  378. }
  379. }
  380. // Generates a DXIL operation over an overloaded type (Ty), returning a
  381. // RetTy value; when Ty is a vector, it will replicate per-element operations
  382. // into RetTy to rebuild it.
  383. Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef<Value *> refArgs,
  384. Type *Ty, Type *RetTy, OP *hlslOP,
  385. IRBuilder<> &Builder) {
  386. Type *EltTy = Ty->getScalarType();
  387. Function *dxilFunc = hlslOP->GetOpFunc(opcode, EltTy);
  388. return TrivialDxilOperation(dxilFunc, opcode, refArgs, Ty, RetTy, hlslOP, Builder);
  389. }
  390. Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef<Value *> refArgs,
  391. Type *Ty, Instruction *Inst, OP *hlslOP) {
  392. DXASSERT(refArgs.size() > 0, "else opcode isn't in signature");
  393. DXASSERT(refArgs[0] == nullptr,
  394. "else caller has already filled the value in");
  395. IRBuilder<> B(Inst);
  396. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  397. const_cast<llvm::Value **>(refArgs.data())[0] =
  398. opArg; // actually stack memory from caller
  399. return TrivialDxilOperation(opcode, refArgs, Ty, Inst->getType(), hlslOP, B);
  400. }
  401. Value *TrivialDxilUnaryOperationRet(OP::OpCode opcode, Value *src, Type *RetTy,
  402. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  403. Type *Ty = src->getType();
  404. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  405. Value *args[] = {opArg, src};
  406. return TrivialDxilOperation(opcode, args, Ty, RetTy, hlslOP, Builder);
  407. }
  408. Value *TrivialDxilUnaryOperation(OP::OpCode opcode, Value *src,
  409. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  410. return TrivialDxilUnaryOperationRet(opcode, src, src->getType(), hlslOP,
  411. Builder);
  412. }
  413. Value *TrivialDxilBinaryOperation(OP::OpCode opcode, Value *src0, Value *src1,
  414. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  415. Type *Ty = src0->getType();
  416. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  417. Value *args[] = {opArg, src0, src1};
  418. return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
  419. }
  420. Value *TrivialDxilTrinaryOperation(OP::OpCode opcode, Value *src0, Value *src1,
  421. Value *src2, hlsl::OP *hlslOP,
  422. IRBuilder<> &Builder) {
  423. Type *Ty = src0->getType();
  424. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  425. Value *args[] = {opArg, src0, src1, src2};
  426. return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
  427. }
  428. Value *TrivialUnaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  429. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  430. Value *src0 = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  431. IRBuilder<> Builder(CI);
  432. hlsl::OP *hlslOP = &helper.hlslOP;
  433. Value *retVal = TrivialDxilUnaryOperationRet(opcode, src0, CI->getType(), hlslOP, Builder);
  434. return retVal;
  435. }
  436. Value *TrivialBinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  437. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  438. hlsl::OP *hlslOP = &helper.hlslOP;
  439. Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  440. Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  441. IRBuilder<> Builder(CI);
  442. Value *binOp =
  443. TrivialDxilBinaryOperation(opcode, src0, src1, hlslOP, Builder);
  444. return binOp;
  445. }
  446. Value *TrivialTrinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  447. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  448. hlsl::OP *hlslOP = &helper.hlslOP;
  449. Value *src0 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  450. Value *src1 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  451. Value *src2 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  452. IRBuilder<> Builder(CI);
  453. Value *triOp =
  454. TrivialDxilTrinaryOperation(opcode, src0, src1, src2, hlslOP, Builder);
  455. return triOp;
  456. }
  457. Value *TrivialIsSpecialFloat(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  458. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  459. hlsl::OP *hlslOP = &helper.hlslOP;
  460. Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  461. IRBuilder<> Builder(CI);
  462. Type *Ty = src->getType();
  463. Type *RetTy = Type::getInt1Ty(CI->getContext());
  464. if (Ty->isVectorTy())
  465. RetTy = VectorType::get(RetTy, Ty->getVectorNumElements());
  466. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  467. Value *args[] = {opArg, src};
  468. return TrivialDxilOperation(opcode, args, Ty, RetTy, hlslOP, Builder);
  469. }
  470. Value *TranslateNonUniformResourceIndex(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  471. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  472. Value *V = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  473. CI->replaceAllUsesWith(V);
  474. for (User *U : V->users()) {
  475. if (GetElementPtrInst *I = dyn_cast<GetElementPtrInst>(U)) {
  476. DxilMDHelper::MarkNonUniform(I);
  477. } else if (CastInst *castI = dyn_cast<CastInst>(U)) {
  478. for (User *castU : castI->users()) {
  479. if (GetElementPtrInst *I = dyn_cast<GetElementPtrInst>(castU)) {
  480. DxilMDHelper::MarkNonUniform(I);
  481. }
  482. }
  483. }
  484. }
  485. return nullptr;
  486. }
  487. Value *TrivialBarrier(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  488. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  489. hlsl::OP *OP = &helper.hlslOP;
  490. Function *dxilFunc = OP->GetOpFunc(OP::OpCode::Barrier, CI->getType());
  491. Constant *opArg = OP->GetU32Const((unsigned)OP::OpCode::Barrier);
  492. unsigned uglobal = static_cast<unsigned>(DXIL::BarrierMode::UAVFenceGlobal);
  493. unsigned g = static_cast<unsigned>(DXIL::BarrierMode::TGSMFence);
  494. unsigned t = static_cast<unsigned>(DXIL::BarrierMode::SyncThreadGroup);
  495. // unsigned ut = static_cast<unsigned>(DXIL::BarrierMode::UAVFenceThreadGroup);
  496. unsigned barrierMode = 0;
  497. switch (IOP) {
  498. case IntrinsicOp::IOP_AllMemoryBarrier:
  499. barrierMode = uglobal | g;
  500. break;
  501. case IntrinsicOp::IOP_AllMemoryBarrierWithGroupSync:
  502. barrierMode = uglobal | g | t;
  503. break;
  504. case IntrinsicOp::IOP_GroupMemoryBarrier:
  505. barrierMode = g;
  506. break;
  507. case IntrinsicOp::IOP_GroupMemoryBarrierWithGroupSync:
  508. barrierMode = g | t;
  509. break;
  510. case IntrinsicOp::IOP_DeviceMemoryBarrier:
  511. barrierMode = uglobal;
  512. break;
  513. case IntrinsicOp::IOP_DeviceMemoryBarrierWithGroupSync:
  514. barrierMode = uglobal | t;
  515. break;
  516. default:
  517. DXASSERT(0, "invalid opcode for barrier");
  518. break;
  519. }
  520. Value *src0 = OP->GetU32Const(static_cast<unsigned>(barrierMode));
  521. Value *args[] = {opArg, src0};
  522. IRBuilder<> Builder(CI);
  523. Builder.CreateCall(dxilFunc, args);
  524. return nullptr;
  525. }
  526. Value *TranslateD3DColorToUByte4(CallInst *CI, IntrinsicOp IOP,
  527. OP::OpCode opcode,
  528. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  529. hlsl::OP *hlslOP = &helper.hlslOP;
  530. IRBuilder<> Builder(CI);
  531. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  532. Type *Ty = val->getType();
  533. Constant *toByteConst = ConstantFP::get(Ty->getScalarType(), 255);
  534. if (Ty != Ty->getScalarType()) {
  535. toByteConst =
  536. ConstantVector::getSplat(Ty->getVectorNumElements(), toByteConst);
  537. }
  538. Value *byte4 = Builder.CreateFMul(toByteConst, val);
  539. byte4 = TrivialDxilUnaryOperation(OP::OpCode::Round_ne, byte4, hlslOP, Builder);
  540. return Builder.CreateCast(Instruction::CastOps::FPToSI, byte4, CI->getType());
  541. }
  542. // Returns true if pow can be implemented using Fxc's mul-only code gen pattern.
  543. // Fxc uses the below rules when choosing mul-only code gen pattern to implement pow function.
  544. // Rule 1: Applicable only to power values in the range [INT32_MIN, INT32_MAX]
  545. // Rule 2: The maximum number of mul ops needed shouldn't exceed (2n+1) or (n+1) based on whether the power
  546. // is a positive or a negative value. Here "n" is the number of scalar elements in power.
  547. // Rule 3: Power must be an exact value.
  548. // +----------+---------------------+------------------+
  549. // | BaseType | IsExponentPositive | MaxMulOpsAllowed |
  550. // +----------+---------------------+------------------+
  551. // | float4x4 | True | 33 |
  552. // | float4x4 | False | 17 |
  553. // | float4x2 | True | 17 |
  554. // | float4x2 | False | 9 |
  555. // | float2x4 | True | 17 |
  556. // | float2x4 | False | 9 |
  557. // | float4 | True | 9 |
  558. // | float4 | False | 5 |
  559. // | float2 | True | 5 |
  560. // | float2 | False | 3 |
  561. // | float | True | 3 |
  562. // | float | False | 2 |
  563. // +----------+---------------------+------------------+
  564. bool CanUseFxcMulOnlyPatternForPow(IRBuilder<>& Builder, Value *x, Value *pow, int32_t& powI) {
  565. // Applicable only when power is a literal.
  566. if (!isa<ConstantDataVector>(pow) && !isa<ConstantFP>(pow)) {
  567. return false;
  568. }
  569. // Only apply this code gen on splat values.
  570. if (ConstantDataVector *cdv = dyn_cast<ConstantDataVector>(pow)) {
  571. if (!hlsl::dxilutil::IsSplat(cdv)) {
  572. return false;
  573. }
  574. }
  575. APFloat powAPF = isa<ConstantDataVector>(pow) ?
  576. cast<ConstantDataVector>(pow)->getElementAsAPFloat(0) : // should be a splat value
  577. cast<ConstantFP>(pow)->getValueAPF();
  578. APSInt powAPS(32, false);
  579. bool isExact = false;
  580. // Try converting float value of power to integer and also check if the float value is exact.
  581. APFloat::opStatus status = powAPF.convertToInteger(powAPS, APFloat::rmTowardZero, &isExact);
  582. if (status == APFloat::opStatus::opOK && isExact) {
  583. powI = powAPS.getExtValue();
  584. uint32_t powU = abs(powI);
  585. int setBitCount = 0;
  586. int maxBitSetPos = -1;
  587. for (int i = 0; i < 32; i++) {
  588. if ((powU >> i) & 1) {
  589. setBitCount++;
  590. maxBitSetPos = i;
  591. }
  592. }
  593. DXASSERT(maxBitSetPos <= 30, "msb should always be zero.");
  594. unsigned numElem = isa<ConstantDataVector>(pow) ? x->getType()->getVectorNumElements() : 1;
  595. int mulOpThreshold = powI < 0 ? numElem + 1 : 2 * numElem + 1;
  596. int mulOpNeeded = maxBitSetPos + setBitCount - 1;
  597. return mulOpNeeded <= mulOpThreshold;
  598. }
  599. return false;
  600. }
  601. Value *TranslatePowUsingFxcMulOnlyPattern(IRBuilder<>& Builder, Value *x, const int32_t y) {
  602. uint32_t absY = abs(y);
  603. // If y is zero then always return 1.
  604. if (absY == 0) {
  605. return ConstantFP::get(x->getType(), 1);
  606. }
  607. int lastSetPos = -1;
  608. Value *result = nullptr;
  609. Value *mul = nullptr;
  610. for (int i = 0; i < 32; i++) {
  611. if ((absY >> i) & 1) {
  612. for (int j = i; j > lastSetPos; j--) {
  613. if (!mul) {
  614. mul = x;
  615. }
  616. else {
  617. mul = Builder.CreateFMul(mul, mul);
  618. }
  619. }
  620. result = (result == nullptr) ? mul : Builder.CreateFMul(result, mul);
  621. lastSetPos = i;
  622. }
  623. }
  624. // Compute reciprocal for negative power values.
  625. if (y < 0) {
  626. Value* constOne = ConstantFP::get(x->getType(), 1);
  627. result = Builder.CreateFDiv(constOne, result);
  628. }
  629. return result;
  630. }
  631. Value *TranslatePowImpl(hlsl::OP *hlslOP, IRBuilder<>& Builder, Value *x, Value *y, bool isFXCCompatMode = false) {
  632. // As applicable implement pow using only mul ops as done by Fxc.
  633. int32_t p = 0;
  634. if (isFXCCompatMode && CanUseFxcMulOnlyPatternForPow(Builder, x, y, p)) {
  635. return TranslatePowUsingFxcMulOnlyPattern(Builder, x, p);
  636. }
  637. // Default to log-mul-exp pattern if previous scenarios don't apply.
  638. // t = log(x);
  639. Value *logX =
  640. TrivialDxilUnaryOperation(DXIL::OpCode::Log, x, hlslOP, Builder);
  641. // t = y * t;
  642. Value *mulY = Builder.CreateFMul(logX, y);
  643. // pow = exp(t);
  644. return TrivialDxilUnaryOperation(DXIL::OpCode::Exp, mulY, hlslOP, Builder);
  645. }
  646. Value *TranslateAddUint64(CallInst *CI, IntrinsicOp IOP,
  647. OP::OpCode opcode,
  648. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  649. hlsl::OP *hlslOP = &helper.hlslOP;
  650. IRBuilder<> Builder(CI);
  651. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  652. Type *Ty = val->getType();
  653. VectorType *VT = dyn_cast<VectorType>(Ty);
  654. if (!VT) {
  655. CI->getContext().emitError(
  656. CI, "AddUint64 can only be applied to uint2 and uint4 operands");
  657. return UndefValue::get(Ty);
  658. }
  659. unsigned size = VT->getNumElements();
  660. if (size != 2 && size != 4) {
  661. CI->getContext().emitError(
  662. CI, "AddUint64 can only be applied to uint2 and uint4 operands");
  663. return UndefValue::get(Ty);
  664. }
  665. Value *op0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  666. Value *op1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  667. Value *RetVal = UndefValue::get(Ty);
  668. Function *AddC = hlslOP->GetOpFunc(DXIL::OpCode::UAddc, helper.i32Ty);
  669. Value *opArg = Builder.getInt32(static_cast<unsigned>(DXIL::OpCode::UAddc));
  670. for (unsigned i=0; i<size; i+=2) {
  671. Value *low0 = Builder.CreateExtractElement(op0, i);
  672. Value *low1 = Builder.CreateExtractElement(op1, i);
  673. Value *lowWithC = Builder.CreateCall(AddC, { opArg, low0, low1});
  674. Value *low = Builder.CreateExtractValue(lowWithC, 0);
  675. RetVal = Builder.CreateInsertElement(RetVal, low, i);
  676. Value *carry = Builder.CreateExtractValue(lowWithC, 1);
  677. // Ext i1 to i32
  678. carry = Builder.CreateZExt(carry, helper.i32Ty);
  679. Value *hi0 = Builder.CreateExtractElement(op0, i+1);
  680. Value *hi1 = Builder.CreateExtractElement(op1, i+1);
  681. Value *hi = Builder.CreateAdd(hi0, hi1);
  682. hi = Builder.CreateAdd(hi, carry);
  683. RetVal = Builder.CreateInsertElement(RetVal, hi, i+1);
  684. }
  685. return RetVal;
  686. }
  687. bool IsValidLoadInput(Value *V) {
  688. // Must be load input.
  689. // TODO: report this error on front-end
  690. if (!isa<CallInst>(V)) {
  691. V->getContext().emitError("attribute evaluation can only be done on values "
  692. "taken directly from inputs");
  693. return false;
  694. }
  695. CallInst *CI = cast<CallInst>(V);
  696. // Must be immediate.
  697. ConstantInt *opArg =
  698. cast<ConstantInt>(CI->getArgOperand(DXIL::OperandIndex::kOpcodeIdx));
  699. DXIL::OpCode op = static_cast<DXIL::OpCode>(opArg->getLimitedValue());
  700. if (op != DXIL::OpCode::LoadInput) {
  701. V->getContext().emitError("attribute evaluation can only be done on values "
  702. "taken directly from inputs");
  703. return false;
  704. }
  705. return true;
  706. }
  707. // Apply current shuffle vector mask on top of previous shuffle mask.
  708. // For example, if previous mask is (12,11,10,13) and current mask is (3,1,0,2)
  709. // new mask would be (13,11,12,10)
  710. Constant *AccumulateMask(Constant *curMask, Constant *prevMask) {
  711. if (curMask == nullptr) {
  712. return prevMask;
  713. }
  714. unsigned size = cast<VectorType>(curMask->getType())->getNumElements();
  715. SmallVector<uint32_t, 16> Elts;
  716. for (unsigned i = 0; i != size; ++i) {
  717. ConstantInt *Index = cast<ConstantInt>(curMask->getAggregateElement(i));
  718. ConstantInt *IVal =
  719. cast<ConstantInt>(prevMask->getAggregateElement(Index->getSExtValue()));
  720. Elts.emplace_back(IVal->getSExtValue());
  721. }
  722. return ConstantDataVector::get(curMask->getContext(), Elts);
  723. }
  724. Constant *GetLoadInputsForEvaluate(Value *V, std::vector<CallInst*> &loadList) {
  725. Constant *shufMask = nullptr;
  726. if (V->getType()->isVectorTy()) {
  727. // Must be insert element inst. Keeping track of masks for shuffle vector
  728. Value *Vec = V;
  729. while (ShuffleVectorInst *shuf = dyn_cast<ShuffleVectorInst>(Vec)) {
  730. shufMask = AccumulateMask(shufMask, shuf->getMask());
  731. Vec = shuf->getOperand(0);
  732. }
  733. // TODO: We are assuming that the operand of insertelement is a LoadInput.
  734. // This will fail on the case where we pass in matrix member using array subscript.
  735. while (!isa<UndefValue>(Vec)) {
  736. InsertElementInst *insertInst = cast<InsertElementInst>(Vec);
  737. Vec = insertInst->getOperand(0);
  738. Value *Elt = insertInst->getOperand(1);
  739. if (IsValidLoadInput(Elt)) {
  740. loadList.emplace_back(cast<CallInst>(Elt));
  741. }
  742. }
  743. } else {
  744. if (IsValidLoadInput(V)) {
  745. loadList.emplace_back(cast<CallInst>(V));
  746. }
  747. }
  748. return shufMask;
  749. }
  750. // Swizzle could reduce the dimensionality of the Type, but
  751. // for temporary insertelement instructions should maintain the existing size of the loadinput.
  752. // So we have to analyze the type of src in order to determine the actual size required.
  753. Type *GetInsertElementTypeForEvaluate(Value *src) {
  754. if (dyn_cast<InsertElementInst>(src)) {
  755. return src->getType();
  756. }
  757. else if (ShuffleVectorInst *SV = dyn_cast<ShuffleVectorInst>(src)) {
  758. return SV->getOperand(0)->getType();
  759. }
  760. src->getContext().emitError("Invalid type call for EvaluateAttribute function");
  761. return nullptr;
  762. }
  763. Value *TranslateEvalSample(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  764. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  765. hlsl::OP *hlslOP = &helper.hlslOP;
  766. Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  767. Value *sampleIdx = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  768. IRBuilder<> Builder(CI);
  769. std::vector<CallInst*> loadList;
  770. Constant *shufMask = GetLoadInputsForEvaluate(val, loadList);
  771. unsigned size = loadList.size();
  772. OP::OpCode opcode = OP::OpCode::EvalSampleIndex;
  773. Value *opArg = hlslOP->GetU32Const((unsigned)opcode);
  774. Type *Ty = GetInsertElementTypeForEvaluate(val);
  775. Function *evalFunc = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
  776. Value *result = UndefValue::get(Ty);
  777. for (unsigned i = 0; i < size; i++) {
  778. CallInst *loadInput = loadList[size-1-i];
  779. Value *inputElemID = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx);
  780. Value *rowIdx = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
  781. Value *colIdx = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
  782. Value *Elt = Builder.CreateCall(evalFunc, { opArg, inputElemID, rowIdx, colIdx, sampleIdx });
  783. result = Builder.CreateInsertElement(result, Elt, i);
  784. }
  785. if (shufMask)
  786. result = Builder.CreateShuffleVector(result, UndefValue::get(Ty), shufMask);
  787. return result;
  788. }
  789. Value *TranslateEvalSnapped(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  790. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  791. hlsl::OP *hlslOP = &helper.hlslOP;
  792. Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  793. Value *offset = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  794. IRBuilder<> Builder(CI);
  795. Value *offsetX = Builder.CreateExtractElement(offset, (uint64_t)0);
  796. Value *offsetY = Builder.CreateExtractElement(offset, 1);
  797. std::vector<CallInst*> loadList;
  798. Constant *shufMask = GetLoadInputsForEvaluate(val, loadList);
  799. unsigned size = loadList.size();
  800. OP::OpCode opcode = OP::OpCode::EvalSnapped;
  801. Value *opArg = hlslOP->GetU32Const((unsigned)opcode);
  802. Type *Ty = GetInsertElementTypeForEvaluate(val);
  803. Function *evalFunc = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
  804. Value *result = UndefValue::get(Ty);
  805. for (unsigned i = 0; i < size; i++) {
  806. CallInst *loadInput = loadList[size-1-i];
  807. Value *inputElemID = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx);
  808. Value *rowIdx = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
  809. Value *colIdx = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
  810. Value *Elt = Builder.CreateCall(evalFunc, { opArg, inputElemID, rowIdx, colIdx, offsetX, offsetY });
  811. result = Builder.CreateInsertElement(result, Elt, i);
  812. }
  813. if (shufMask)
  814. result = Builder.CreateShuffleVector(result, UndefValue::get(Ty), shufMask);
  815. return result;
  816. }
  817. Value *TranslateEvalCentroid(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  818. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  819. hlsl::OP *hlslOP = &helper.hlslOP;
  820. Value *src = CI->getArgOperand(DXIL::OperandIndex::kUnarySrc0OpIdx);
  821. std::vector<CallInst*> loadList;
  822. Constant *shufMask = GetLoadInputsForEvaluate(src, loadList);
  823. unsigned size = loadList.size();
  824. IRBuilder<> Builder(CI);
  825. OP::OpCode opcode = OP::OpCode::EvalCentroid;
  826. Value *opArg = hlslOP->GetU32Const((unsigned)opcode);
  827. Type *Ty = GetInsertElementTypeForEvaluate(src);
  828. Function *evalFunc = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
  829. Value *result = UndefValue::get(Ty);
  830. for (unsigned i = 0; i < size; i++) {
  831. CallInst *loadInput = loadList[size-1-i];
  832. Value *inputElemID = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx);
  833. Value *rowIdx = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
  834. Value *colIdx = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
  835. Value *Elt = Builder.CreateCall(evalFunc, { opArg, inputElemID, rowIdx, colIdx });
  836. result = Builder.CreateInsertElement(result, Elt, i);
  837. }
  838. if (shufMask)
  839. result = Builder.CreateShuffleVector(result, UndefValue::get(Ty), shufMask);
  840. return result;
  841. }
  842. Value *TranslateGetAttributeAtVertex(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  843. HLOperationLowerHelper &helper,
  844. HLObjectOperationLowerHelper *pObjHelper,
  845. bool &Translated) {
  846. DXASSERT(op == OP::OpCode::AttributeAtVertex, "Wrong opcode to translate");
  847. hlsl::OP *hlslOP = &helper.hlslOP;
  848. IRBuilder<> Builder(CI);
  849. Type *Ty = CI->getType();
  850. Value *val = CI->getArgOperand(DXIL::OperandIndex::kBinarySrc0OpIdx);
  851. Value *vertexIdx = CI->getArgOperand(DXIL::OperandIndex::kBinarySrc1OpIdx);
  852. Value *vertexI8Idx = Builder.CreateTrunc(vertexIdx, Type::getInt8Ty(CI->getContext()));
  853. // Check the range of VertexID
  854. Value *vertex0 = Builder.getInt8(0);
  855. Value *vertex1 = Builder.getInt8(1);
  856. Value *vertex2 = Builder.getInt8(2);
  857. if (vertexI8Idx != vertex0 && vertexI8Idx != vertex1 && vertexI8Idx != vertex2) {
  858. CI->getContext().emitError(CI, "VertexID at GetAttributeAtVertex can only range from 0 to 2");
  859. return UndefValue::get(Ty);
  860. }
  861. std::vector<CallInst*> loadList;
  862. Constant *shufMask = GetLoadInputsForEvaluate(val, loadList);
  863. unsigned size = loadList.size();
  864. Value *opArg = hlslOP->GetU32Const((unsigned)op);
  865. Function *evalFunc = hlslOP->GetOpFunc(op, Ty->getScalarType());
  866. Value *result = UndefValue::get(Ty);
  867. for (unsigned i = 0; i < size; ++i) {
  868. CallInst *loadInput = loadList[size - 1 - i];
  869. Value *inputElemID = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx);
  870. Value *rowIdx = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
  871. Value *colIdx = loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
  872. Value *Elt = Builder.CreateCall(evalFunc, { opArg, inputElemID, rowIdx, colIdx, vertexI8Idx });
  873. result = Builder.CreateInsertElement(result, Elt, i);
  874. }
  875. if (shufMask)
  876. result = Builder.CreateShuffleVector(result, UndefValue::get(Ty), shufMask);
  877. return result;
  878. }
  879. Value *TrivialNoArgOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  880. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  881. hlsl::OP *hlslOP = &helper.hlslOP;
  882. Type *Ty = Type::getVoidTy(CI->getContext());
  883. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  884. Value *args[] = {opArg};
  885. IRBuilder<> Builder(CI);
  886. Value *dxilOp = TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
  887. return dxilOp;
  888. }
  889. Value *TrivialNoArgWithRetOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  890. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  891. hlsl::OP *hlslOP = &helper.hlslOP;
  892. Type *Ty = CI->getType();
  893. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  894. Value *args[] = {opArg};
  895. IRBuilder<> Builder(CI);
  896. Value *dxilOp = TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
  897. return dxilOp;
  898. }
  899. Value *TranslateGetRTSamplePos(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  900. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  901. hlsl::OP *hlslOP = &helper.hlslOP;
  902. OP::OpCode opcode = OP::OpCode::RenderTargetGetSamplePosition;
  903. IRBuilder<> Builder(CI);
  904. Type *Ty = Type::getVoidTy(CI->getContext());
  905. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  906. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  907. Value *args[] = {opArg, val};
  908. Value *samplePos =
  909. TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
  910. Value *result = UndefValue::get(CI->getType());
  911. Value *samplePosX = Builder.CreateExtractValue(samplePos, 0);
  912. Value *samplePosY = Builder.CreateExtractValue(samplePos, 1);
  913. result = Builder.CreateInsertElement(result, samplePosX, (uint64_t)0);
  914. result = Builder.CreateInsertElement(result, samplePosY, 1);
  915. return result;
  916. }
  917. // val QuadReadLaneAt(val, uint);
  918. Value *TranslateQuadReadLaneAt(CallInst *CI, IntrinsicOp IOP,
  919. OP::OpCode opcode,
  920. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  921. hlsl::OP *hlslOP = &helper.hlslOP;
  922. Value *refArgs[] = {nullptr, CI->getOperand(1), CI->getOperand(2)};
  923. return TrivialDxilOperation(DXIL::OpCode::QuadReadLaneAt, refArgs,
  924. CI->getOperand(1)->getType(), CI, hlslOP);
  925. }
  926. // Wave intrinsics of the form fn(val,QuadOpKind)->val
  927. Value *TranslateQuadReadAcross(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  928. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  929. hlsl::OP *hlslOP = &helper.hlslOP;
  930. DXIL::QuadOpKind opKind;
  931. switch (IOP) {
  932. case IntrinsicOp::IOP_QuadReadAcrossX: opKind = DXIL::QuadOpKind::ReadAcrossX; break;
  933. case IntrinsicOp::IOP_QuadReadAcrossY: opKind = DXIL::QuadOpKind::ReadAcrossY; break;
  934. default: DXASSERT_NOMSG(IOP == IntrinsicOp::IOP_QuadReadAcrossDiagonal);
  935. case IntrinsicOp::IOP_QuadReadAcrossDiagonal: opKind = DXIL::QuadOpKind::ReadAcrossDiagonal; break;
  936. }
  937. Constant *OpArg = hlslOP->GetI8Const((unsigned)opKind);
  938. Value *refArgs[] = {nullptr, CI->getOperand(1), OpArg};
  939. return TrivialDxilOperation(DXIL::OpCode::QuadOp, refArgs,
  940. CI->getOperand(1)->getType(), CI, hlslOP);
  941. }
  942. // WaveAllEqual(val<n>)->bool<n>
  943. Value *TranslateWaveAllEqual(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  944. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  945. hlsl::OP *hlslOP = &helper.hlslOP;
  946. Value *src = CI->getArgOperand(HLOperandIndex::kWaveAllEqualValueOpIdx);
  947. IRBuilder<> Builder(CI);
  948. Type *Ty = src->getType();
  949. Type *RetTy = Type::getInt1Ty(CI->getContext());
  950. if (Ty->isVectorTy())
  951. RetTy = VectorType::get(RetTy, Ty->getVectorNumElements());
  952. Constant *opArg = hlslOP->GetU32Const((unsigned)DXIL::OpCode::WaveActiveAllEqual);
  953. Value *args[] = {opArg, src};
  954. return TrivialDxilOperation(DXIL::OpCode::WaveActiveAllEqual, args, Ty, RetTy,
  955. hlslOP, Builder);
  956. }
  957. // Wave intrinsics of the form fn(valA)->valB, where no overloading takes place
  958. Value *TranslateWaveA2B(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  959. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  960. hlsl::OP *hlslOP = &helper.hlslOP;
  961. Value *refArgs[] = {nullptr, CI->getOperand(1)};
  962. return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP);
  963. }
  964. // Wave ballot intrinsic.
  965. Value *TranslateWaveBallot(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  966. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  967. // The high-level operation is uint4 ballot(i1).
  968. // The DXIL operation is struct.u4 ballot(i1).
  969. // To avoid updating users with more than a simple replace, we translate into
  970. // a call into struct.u4, then reassemble the vector.
  971. // Scalarization and constant propagation take care of cleanup.
  972. IRBuilder<> B(CI);
  973. // Make the DXIL call itself.
  974. hlsl::OP *hlslOP = &helper.hlslOP;
  975. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  976. Value *refArgs[] = { opArg, CI->getOperand(1) };
  977. Function *dxilFunc = hlslOP->GetOpFunc(opcode, Type::getVoidTy(CI->getContext()));
  978. Value *dxilVal = B.CreateCall(dxilFunc, refArgs, hlslOP->GetOpCodeName(opcode));
  979. // Assign from the call results into a vector.
  980. Type *ResTy = CI->getType();
  981. DXASSERT_NOMSG(ResTy->isVectorTy() && ResTy->getVectorNumElements() == 4);
  982. DXASSERT_NOMSG(dxilVal->getType()->isStructTy() &&
  983. dxilVal->getType()->getNumContainedTypes() == 4);
  984. // 'x' component is the first vector element, highest bits.
  985. Value *ResVal = llvm::UndefValue::get(ResTy);
  986. for (unsigned Idx = 0; Idx < 4; ++Idx) {
  987. ResVal = B.CreateInsertElement(
  988. ResVal, B.CreateExtractValue(dxilVal, ArrayRef<unsigned>(Idx)), Idx);
  989. }
  990. return ResVal;
  991. }
  992. static bool WaveIntrinsicNeedsSign(OP::OpCode opcode) {
  993. return opcode == OP::OpCode::WaveActiveOp ||
  994. opcode == OP::OpCode::WavePrefixOp;
  995. }
  996. static unsigned WaveIntrinsicToSignedOpKind(IntrinsicOp IOP) {
  997. if (IOP == IntrinsicOp::IOP_WaveActiveUMax ||
  998. IOP == IntrinsicOp::IOP_WaveActiveUMin ||
  999. IOP == IntrinsicOp::IOP_WaveActiveUSum ||
  1000. IOP == IntrinsicOp::IOP_WaveActiveUProduct ||
  1001. IOP == IntrinsicOp::IOP_WavePrefixUSum ||
  1002. IOP == IntrinsicOp::IOP_WavePrefixUProduct)
  1003. return (unsigned)DXIL::SignedOpKind::Unsigned;
  1004. return (unsigned)DXIL::SignedOpKind::Signed;
  1005. }
  1006. static unsigned WaveIntrinsicToOpKind(IntrinsicOp IOP) {
  1007. switch (IOP) {
  1008. // Bit operations.
  1009. case IntrinsicOp::IOP_WaveActiveBitOr:
  1010. return (unsigned)DXIL::WaveBitOpKind::Or;
  1011. case IntrinsicOp::IOP_WaveActiveBitAnd:
  1012. return (unsigned)DXIL::WaveBitOpKind::And;
  1013. case IntrinsicOp::IOP_WaveActiveBitXor:
  1014. return (unsigned)DXIL::WaveBitOpKind::Xor;
  1015. // Prefix operations.
  1016. case IntrinsicOp::IOP_WavePrefixSum:
  1017. case IntrinsicOp::IOP_WavePrefixUSum:
  1018. return (unsigned)DXIL::WaveOpKind::Sum;
  1019. case IntrinsicOp::IOP_WavePrefixProduct:
  1020. case IntrinsicOp::IOP_WavePrefixUProduct:
  1021. return (unsigned)DXIL::WaveOpKind::Product;
  1022. // Numeric operations.
  1023. case IntrinsicOp::IOP_WaveActiveMax:
  1024. case IntrinsicOp::IOP_WaveActiveUMax:
  1025. return (unsigned)DXIL::WaveOpKind::Max;
  1026. case IntrinsicOp::IOP_WaveActiveMin:
  1027. case IntrinsicOp::IOP_WaveActiveUMin:
  1028. return (unsigned)DXIL::WaveOpKind::Min;
  1029. case IntrinsicOp::IOP_WaveActiveSum:
  1030. case IntrinsicOp::IOP_WaveActiveUSum:
  1031. return (unsigned)DXIL::WaveOpKind::Sum;
  1032. case IntrinsicOp::IOP_WaveActiveProduct:
  1033. case IntrinsicOp::IOP_WaveActiveUProduct:
  1034. default:
  1035. DXASSERT(IOP == IntrinsicOp::IOP_WaveActiveProduct ||
  1036. IOP == IntrinsicOp::IOP_WaveActiveUProduct,
  1037. "else caller passed incorrect value");
  1038. return (unsigned)DXIL::WaveOpKind::Product;
  1039. }
  1040. }
  1041. // Wave intrinsics of the form fn(valA)->valA
  1042. Value *TranslateWaveA2A(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1043. HLOperationLowerHelper &helper,
  1044. HLObjectOperationLowerHelper *pObjHelper,
  1045. bool &Translated) {
  1046. hlsl::OP *hlslOP = &helper.hlslOP;
  1047. Constant *kindValInt = hlslOP->GetI8Const(WaveIntrinsicToOpKind(IOP));
  1048. Constant *signValInt = hlslOP->GetI8Const(WaveIntrinsicToSignedOpKind(IOP));
  1049. Value *refArgs[] = {nullptr, CI->getOperand(1), kindValInt, signValInt};
  1050. unsigned refArgCount = _countof(refArgs);
  1051. if (!WaveIntrinsicNeedsSign(opcode))
  1052. refArgCount--;
  1053. return TrivialDxilOperation(opcode,
  1054. llvm::ArrayRef<Value *>(refArgs, refArgCount),
  1055. CI->getOperand(1)->getType(), CI, hlslOP);
  1056. }
  1057. // Wave intrinsics of the form fn()->val
  1058. Value *TranslateWaveToVal(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1059. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1060. hlsl::OP *hlslOP = &helper.hlslOP;
  1061. Value *refArgs[] = {nullptr};
  1062. return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP);
  1063. }
  1064. // Wave intrinsics of the form fn(val,lane)->val
  1065. Value *TranslateWaveReadLaneAt(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1066. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1067. hlsl::OP *hlslOP = &helper.hlslOP;
  1068. Value *refArgs[] = {nullptr, CI->getOperand(1), CI->getOperand(2)};
  1069. return TrivialDxilOperation(DXIL::OpCode::WaveReadLaneAt, refArgs,
  1070. CI->getOperand(1)->getType(), CI, hlslOP);
  1071. }
  1072. // Wave intrinsics of the form fn(val)->val
  1073. Value *TranslateWaveReadLaneFirst(CallInst *CI, IntrinsicOp IOP,
  1074. OP::OpCode opcode,
  1075. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1076. hlsl::OP *hlslOP = &helper.hlslOP;
  1077. Value *refArgs[] = {nullptr, CI->getOperand(1)};
  1078. return TrivialDxilOperation(DXIL::OpCode::WaveReadLaneFirst, refArgs,
  1079. CI->getOperand(1)->getType(), CI, hlslOP);
  1080. }
  1081. Value *TranslateAbs(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1082. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1083. hlsl::OP *hlslOP = &helper.hlslOP;
  1084. Type *pOverloadTy = CI->getType()->getScalarType();
  1085. if (pOverloadTy->isFloatingPointTy()) {
  1086. Value *refArgs[] = {nullptr, CI->getOperand(1)};
  1087. return TrivialDxilOperation(DXIL::OpCode::FAbs, refArgs, CI->getType(), CI,
  1088. hlslOP);
  1089. } else {
  1090. Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1091. IRBuilder<> Builder(CI);
  1092. Value *neg = Builder.CreateNeg(src);
  1093. return TrivialDxilBinaryOperation(DXIL::OpCode::IMax, src, neg, hlslOP,
  1094. Builder);
  1095. }
  1096. }
  1097. Value *TranslateUAbs(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1098. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1099. return CI->getOperand(HLOperandIndex::kUnaryOpSrc0Idx); // No-op
  1100. }
  1101. Value *GenerateCmpNEZero(Value *val, IRBuilder<> Builder) {
  1102. Type *Ty = val->getType();
  1103. Type *EltTy = Ty->getScalarType();
  1104. Constant *zero = nullptr;
  1105. if (EltTy->isFloatingPointTy())
  1106. zero = ConstantFP::get(EltTy, 0);
  1107. else
  1108. zero = ConstantInt::get(EltTy, 0);
  1109. if (Ty != EltTy) {
  1110. zero = ConstantVector::getSplat(Ty->getVectorNumElements(), zero);
  1111. }
  1112. if (EltTy->isFloatingPointTy())
  1113. return Builder.CreateFCmpUNE(val, zero);
  1114. else
  1115. return Builder.CreateICmpNE(val, zero);
  1116. }
  1117. Value *TranslateAllForValue(Value *val, IRBuilder<> &Builder) {
  1118. Value *cond = GenerateCmpNEZero(val, Builder);
  1119. Type *Ty = val->getType();
  1120. Type *EltTy = Ty->getScalarType();
  1121. if (Ty != EltTy) {
  1122. Value *Result = Builder.CreateExtractElement(cond, (uint64_t)0);
  1123. for (unsigned i = 1; i < Ty->getVectorNumElements(); i++) {
  1124. Value *Elt = Builder.CreateExtractElement(cond, i);
  1125. Result = Builder.CreateAnd(Result, Elt);
  1126. }
  1127. return Result;
  1128. } else
  1129. return cond;
  1130. }
  1131. Value *TranslateAll(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1132. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1133. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1134. IRBuilder<> Builder(CI);
  1135. return TranslateAllForValue(val, Builder);
  1136. }
  1137. Value *TranslateAny(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1138. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1139. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1140. IRBuilder<> Builder(CI);
  1141. Value *cond = GenerateCmpNEZero(val, Builder);
  1142. Type *Ty = val->getType();
  1143. Type *EltTy = Ty->getScalarType();
  1144. if (Ty != EltTy) {
  1145. Value *Result = Builder.CreateExtractElement(cond, (uint64_t)0);
  1146. for (unsigned i = 1; i < Ty->getVectorNumElements(); i++) {
  1147. Value *Elt = Builder.CreateExtractElement(cond, i);
  1148. Result = Builder.CreateOr(Result, Elt);
  1149. }
  1150. return Result;
  1151. } else
  1152. return cond;
  1153. }
  1154. Value *TranslateBitcast(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1155. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1156. Type *Ty = CI->getType();
  1157. Value *op = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1158. IRBuilder<> Builder(CI);
  1159. return Builder.CreateBitCast(op, Ty);
  1160. }
  1161. Value *TranslateDoubleAsUint(Value *x, Value *lo, Value *hi,
  1162. IRBuilder<> &Builder, hlsl::OP *hlslOP) {
  1163. Type *Ty = x->getType();
  1164. Type *outTy = lo->getType()->getPointerElementType();
  1165. DXIL::OpCode opcode = DXIL::OpCode::SplitDouble;
  1166. Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
  1167. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
  1168. if (Ty->isVectorTy()) {
  1169. Value *retValLo = llvm::UndefValue::get(outTy);
  1170. Value *retValHi = llvm::UndefValue::get(outTy);
  1171. unsigned vecSize = Ty->getVectorNumElements();
  1172. for (unsigned i = 0; i < vecSize; i++) {
  1173. Value *Elt = Builder.CreateExtractElement(x, i);
  1174. Value *EltOP = Builder.CreateCall(dxilFunc, {opArg, Elt},
  1175. hlslOP->GetOpCodeName(opcode));
  1176. Value *EltLo = Builder.CreateExtractValue(EltOP, 0);
  1177. retValLo = Builder.CreateInsertElement(retValLo, EltLo, i);
  1178. Value *EltHi = Builder.CreateExtractValue(EltOP, 1);
  1179. retValHi = Builder.CreateInsertElement(retValHi, EltHi, i);
  1180. }
  1181. Builder.CreateStore(retValLo, lo);
  1182. Builder.CreateStore(retValHi, hi);
  1183. } else {
  1184. Value *retVal =
  1185. Builder.CreateCall(dxilFunc, {opArg, x}, hlslOP->GetOpCodeName(opcode));
  1186. Value *retValLo = Builder.CreateExtractValue(retVal, 0);
  1187. Value *retValHi = Builder.CreateExtractValue(retVal, 1);
  1188. Builder.CreateStore(retValLo, lo);
  1189. Builder.CreateStore(retValHi, hi);
  1190. }
  1191. return nullptr;
  1192. }
  1193. Value *TranslateAsUint(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1194. HLOperationLowerHelper &helper,
  1195. HLObjectOperationLowerHelper *pObjHelper,
  1196. bool &Translated) {
  1197. if (CI->getNumArgOperands() == 2) {
  1198. return TranslateBitcast(CI, IOP, opcode, helper, pObjHelper, Translated);
  1199. } else {
  1200. DXASSERT_NOMSG(CI->getNumArgOperands() == 4);
  1201. hlsl::OP *hlslOP = &helper.hlslOP;
  1202. Value *x = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  1203. DXASSERT_NOMSG(x->getType()->getScalarType()->isDoubleTy());
  1204. Value *lo = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  1205. Value *hi = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  1206. IRBuilder<> Builder(CI);
  1207. return TranslateDoubleAsUint(x, lo, hi, Builder, hlslOP);
  1208. }
  1209. }
  1210. Value *TranslateAsDouble(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1211. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1212. hlsl::OP *hlslOP = &helper.hlslOP;
  1213. Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1214. Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1215. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
  1216. IRBuilder<> Builder(CI);
  1217. return TrivialDxilOperation(opcode, { opArg, x, y }, CI->getType(), CI->getType(), hlslOP, Builder);
  1218. }
  1219. Value *TranslateAtan2(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1220. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1221. hlsl::OP *hlslOP = &helper.hlslOP;
  1222. Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1223. Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1224. IRBuilder<> Builder(CI);
  1225. Value *tan = Builder.CreateFDiv(y, x);
  1226. Value *atan =
  1227. TrivialDxilUnaryOperation(OP::OpCode::Atan, tan, hlslOP, Builder);
  1228. // Modify atan result based on https://en.wikipedia.org/wiki/Atan2.
  1229. Type *Ty = x->getType();
  1230. Constant *pi = ConstantFP::get(Ty->getScalarType(), M_PI);
  1231. Constant *halfPi = ConstantFP::get(Ty->getScalarType(), M_PI / 2);
  1232. Constant *negHalfPi = ConstantFP::get(Ty->getScalarType(), -M_PI / 2);
  1233. Constant *zero = ConstantFP::get(Ty->getScalarType(), 0);
  1234. if (Ty->isVectorTy()) {
  1235. unsigned vecSize = Ty->getVectorNumElements();
  1236. pi = ConstantVector::getSplat(vecSize, pi);
  1237. halfPi = ConstantVector::getSplat(vecSize, halfPi);
  1238. negHalfPi = ConstantVector::getSplat(vecSize, negHalfPi);
  1239. zero = ConstantVector::getSplat(vecSize, zero);
  1240. }
  1241. Value *atanAddPi = Builder.CreateFAdd(atan, pi);
  1242. Value *atanSubPi = Builder.CreateFSub(atan, pi);
  1243. // x > 0 -> atan.
  1244. Value *result = atan;
  1245. Value *xLt0 = Builder.CreateFCmpOLT(x, zero);
  1246. Value *xEq0 = Builder.CreateFCmpOEQ(x, zero);
  1247. Value *yGe0 = Builder.CreateFCmpOGE(y, zero);
  1248. Value *yLt0 = Builder.CreateFCmpOLT(y, zero);
  1249. // x < 0, y >= 0 -> atan + pi.
  1250. Value *xLt0AndyGe0 = Builder.CreateAnd(xLt0, yGe0);
  1251. result = Builder.CreateSelect(xLt0AndyGe0, atanAddPi, result);
  1252. // x < 0, y < 0 -> atan - pi.
  1253. Value *xLt0AndYLt0 = Builder.CreateAnd(xLt0, yLt0);
  1254. result = Builder.CreateSelect(xLt0AndYLt0, atanSubPi, result);
  1255. // x == 0, y < 0 -> -pi/2
  1256. Value *xEq0AndYLt0 = Builder.CreateAnd(xEq0, yLt0);
  1257. result = Builder.CreateSelect(xEq0AndYLt0, negHalfPi, result);
  1258. // x == 0, y > 0 -> pi/2
  1259. Value *xEq0AndYGe0 = Builder.CreateAnd(xEq0, yGe0);
  1260. result = Builder.CreateSelect(xEq0AndYGe0, halfPi, result);
  1261. return result;
  1262. }
  1263. Value *TranslateClamp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1264. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1265. hlsl::OP *hlslOP = &helper.hlslOP;
  1266. Type *Ty = CI->getType();
  1267. Type *EltTy = Ty->getScalarType();
  1268. DXIL::OpCode maxOp = DXIL::OpCode::FMax;
  1269. DXIL::OpCode minOp = DXIL::OpCode::FMin;
  1270. if (IOP == IntrinsicOp::IOP_uclamp) {
  1271. maxOp = DXIL::OpCode::UMax;
  1272. minOp = DXIL::OpCode::UMin;
  1273. } else if (EltTy->isIntegerTy()) {
  1274. maxOp = DXIL::OpCode::IMax;
  1275. minOp = DXIL::OpCode::IMin;
  1276. }
  1277. Value *x = CI->getArgOperand(HLOperandIndex::kClampOpXIdx);
  1278. Value *maxVal = CI->getArgOperand(HLOperandIndex::kClampOpMaxIdx);
  1279. Value *minVal = CI->getArgOperand(HLOperandIndex::kClampOpMinIdx);
  1280. IRBuilder<> Builder(CI);
  1281. // min(max(x, minVal), maxVal).
  1282. Value *maxXMinVal =
  1283. TrivialDxilBinaryOperation(maxOp, x, minVal, hlslOP, Builder);
  1284. return TrivialDxilBinaryOperation(minOp, maxXMinVal, maxVal, hlslOP, Builder);
  1285. }
  1286. Value *TranslateClip(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1287. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1288. hlsl::OP *hlslOP = &helper.hlslOP;
  1289. Function *discard =
  1290. hlslOP->GetOpFunc(OP::OpCode::Discard, Type::getVoidTy(CI->getContext()));
  1291. IRBuilder<> Builder(CI);
  1292. Value *cond = nullptr;
  1293. Value *arg = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1294. if (VectorType *VT = dyn_cast<VectorType>(arg->getType())) {
  1295. Value *elt = Builder.CreateExtractElement(arg, (uint64_t)0);
  1296. cond = Builder.CreateFCmpOLT(elt, hlslOP->GetFloatConst(0));
  1297. for (unsigned i = 1; i < VT->getNumElements(); i++) {
  1298. Value *elt = Builder.CreateExtractElement(arg, i);
  1299. Value *eltCond = Builder.CreateFCmpOLT(elt, hlslOP->GetFloatConst(0));
  1300. cond = Builder.CreateOr(cond, eltCond);
  1301. }
  1302. } else
  1303. cond = Builder.CreateFCmpOLT(arg, hlslOP->GetFloatConst(0));
  1304. Constant *opArg = hlslOP->GetU32Const((unsigned)OP::OpCode::Discard);
  1305. Builder.CreateCall(discard, {opArg, cond});
  1306. return nullptr;
  1307. }
  1308. Value *TranslateCross(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1309. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1310. VectorType *VT = cast<VectorType>(CI->getType());
  1311. DXASSERT_NOMSG(VT->getNumElements() == 3);
  1312. Value *op0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1313. Value *op1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1314. IRBuilder<> Builder(CI);
  1315. Value *op0_x = Builder.CreateExtractElement(op0, (uint64_t)0);
  1316. Value *op0_y = Builder.CreateExtractElement(op0, 1);
  1317. Value *op0_z = Builder.CreateExtractElement(op0, 2);
  1318. Value *op1_x = Builder.CreateExtractElement(op1, (uint64_t)0);
  1319. Value *op1_y = Builder.CreateExtractElement(op1, 1);
  1320. Value *op1_z = Builder.CreateExtractElement(op1, 2);
  1321. auto MulSub = [&](Value *x0, Value *y0, Value *x1, Value *y1) -> Value * {
  1322. Value *xy = Builder.CreateFMul(x0, y1);
  1323. Value *yx = Builder.CreateFMul(y0, x1);
  1324. return Builder.CreateFSub(xy, yx);
  1325. };
  1326. Value *yz_zy = MulSub(op0_y, op0_z, op1_y, op1_z);
  1327. Value *zx_xz = MulSub(op0_z, op0_x, op1_z, op1_x);
  1328. Value *xy_yx = MulSub(op0_x, op0_y, op1_x, op1_y);
  1329. Value *cross = UndefValue::get(VT);
  1330. cross = Builder.CreateInsertElement(cross, yz_zy, (uint64_t)0);
  1331. cross = Builder.CreateInsertElement(cross, zx_xz, 1);
  1332. cross = Builder.CreateInsertElement(cross, xy_yx, 2);
  1333. return cross;
  1334. }
  1335. Value *TranslateDegrees(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1336. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1337. IRBuilder<> Builder(CI);
  1338. Type *Ty = CI->getType();
  1339. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1340. // 180/pi.
  1341. Constant *toDegreeConst = ConstantFP::get(Ty->getScalarType(), 180 / M_PI);
  1342. if (Ty != Ty->getScalarType()) {
  1343. toDegreeConst =
  1344. ConstantVector::getSplat(Ty->getVectorNumElements(), toDegreeConst);
  1345. }
  1346. return Builder.CreateFMul(toDegreeConst, val);
  1347. }
  1348. Value *TranslateDst(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1349. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1350. Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1351. Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1352. Type *Ty = src1->getType();
  1353. IRBuilder<> Builder(CI);
  1354. Value *Result = UndefValue::get(Ty);
  1355. Constant *oneConst = ConstantFP::get(Ty->getScalarType(), 1);
  1356. // dest.x = 1;
  1357. Result = Builder.CreateInsertElement(Result, oneConst, (uint64_t)0);
  1358. // dest.y = src0.y * src1.y;
  1359. Value *src0_y = Builder.CreateExtractElement(src0, 1);
  1360. Value *src1_y = Builder.CreateExtractElement(src1, 1);
  1361. Value *yMuly = Builder.CreateFMul(src0_y, src1_y);
  1362. Result = Builder.CreateInsertElement(Result, yMuly, 1);
  1363. // dest.z = src0.z;
  1364. Value *src0_z = Builder.CreateExtractElement(src0, 2);
  1365. Result = Builder.CreateInsertElement(Result, src0_z, 2);
  1366. // dest.w = src1.w;
  1367. Value *src1_w = Builder.CreateExtractElement(src1, 3);
  1368. Result = Builder.CreateInsertElement(Result, src1_w, 3);
  1369. return Result;
  1370. }
  1371. Value *TranslateFirstbitHi(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1372. HLOperationLowerHelper &helper,
  1373. HLObjectOperationLowerHelper *pObjHelper,
  1374. bool &Translated) {
  1375. Value *firstbitHi =
  1376. TrivialUnaryOperation(CI, IOP, opcode, helper, pObjHelper, Translated);
  1377. // firstbitHi == -1? -1 : (bitWidth-1 -firstbitHi);
  1378. IRBuilder<> Builder(CI);
  1379. Constant *neg1 = Builder.getInt32(-1);
  1380. Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1381. Type *Ty = src->getType();
  1382. IntegerType *EltTy = cast<IntegerType>(Ty->getScalarType());
  1383. Constant *bitWidth = Builder.getInt32(EltTy->getBitWidth()-1);
  1384. if (Ty == Ty->getScalarType()) {
  1385. Value *sub = Builder.CreateSub(bitWidth, firstbitHi);
  1386. Value *cond = Builder.CreateICmpEQ(neg1, firstbitHi);
  1387. return Builder.CreateSelect(cond, neg1, sub);
  1388. } else {
  1389. Value *result = UndefValue::get(CI->getType());
  1390. unsigned vecSize = Ty->getVectorNumElements();
  1391. for (unsigned i = 0; i < vecSize; i++) {
  1392. Value *EltFirstBit = Builder.CreateExtractElement(firstbitHi, i);
  1393. Value *sub = Builder.CreateSub(bitWidth, EltFirstBit);
  1394. Value *cond = Builder.CreateICmpEQ(neg1, EltFirstBit);
  1395. Value *Elt = Builder.CreateSelect(cond, neg1, sub);
  1396. result = Builder.CreateInsertElement(result, Elt, i);
  1397. }
  1398. return result;
  1399. }
  1400. }
  1401. Value *TranslateFirstbitLo(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1402. HLOperationLowerHelper &helper,
  1403. HLObjectOperationLowerHelper *pObjHelper,
  1404. bool &Translated) {
  1405. Value *firstbitLo =
  1406. TrivialUnaryOperation(CI, IOP, opcode, helper, pObjHelper, Translated);
  1407. return firstbitLo;
  1408. }
  1409. Value *TranslateLit(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1410. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1411. Value *n_dot_l = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  1412. Value *n_dot_h = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  1413. Value *m = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  1414. IRBuilder<> Builder(CI);
  1415. Type *Ty = m->getType();
  1416. Value *Result = UndefValue::get(VectorType::get(Ty, 4));
  1417. // Result = (ambient, diffuse, specular, 1)
  1418. // ambient = 1.
  1419. Constant *oneConst = ConstantFP::get(Ty, 1);
  1420. Result = Builder.CreateInsertElement(Result, oneConst, (uint64_t)0);
  1421. // Result.w = 1.
  1422. Result = Builder.CreateInsertElement(Result, oneConst, 3);
  1423. // diffuse = (n_dot_l < 0) ? 0 : n_dot_l.
  1424. Constant *zeroConst = ConstantFP::get(Ty, 0);
  1425. Value *nlCmp = Builder.CreateFCmpOLT(n_dot_l, zeroConst);
  1426. Value *diffuse = Builder.CreateSelect(nlCmp, zeroConst, n_dot_l);
  1427. Result = Builder.CreateInsertElement(Result, diffuse, 1);
  1428. // specular = ((n_dot_l < 0) || (n_dot_h < 0)) ? 0: (n_dot_h ^ m).
  1429. Value *nhCmp = Builder.CreateFCmpOLT(n_dot_h, zeroConst);
  1430. Value *specCond = Builder.CreateOr(nlCmp, nhCmp);
  1431. bool isFXCCompatMode = CI->getModule()->GetHLModule().GetHLOptions().bFXCCompatMode;
  1432. Value *nhPowM = TranslatePowImpl(&helper.hlslOP, Builder, n_dot_h, m, isFXCCompatMode);
  1433. Value *spec = Builder.CreateSelect(specCond, zeroConst, nhPowM);
  1434. Result = Builder.CreateInsertElement(Result, spec, 2);
  1435. return Result;
  1436. }
  1437. Value *TranslateRadians(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1438. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1439. IRBuilder<> Builder(CI);
  1440. Type *Ty = CI->getType();
  1441. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1442. // pi/180.
  1443. Constant *toRadianConst = ConstantFP::get(Ty->getScalarType(), M_PI / 180);
  1444. if (Ty != Ty->getScalarType()) {
  1445. toRadianConst =
  1446. ConstantVector::getSplat(Ty->getVectorNumElements(), toRadianConst);
  1447. }
  1448. return Builder.CreateFMul(toRadianConst, val);
  1449. }
  1450. Value *TranslateF16ToF32(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1451. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1452. IRBuilder<> Builder(CI);
  1453. Value *x = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1454. Type *Ty = CI->getType();
  1455. Function *f16tof32 =
  1456. helper.hlslOP.GetOpFunc(opcode, helper.voidTy);
  1457. return TrivialDxilOperation(
  1458. f16tof32, opcode, {Builder.getInt32(static_cast<unsigned>(opcode)), x},
  1459. x->getType(), Ty, &helper.hlslOP, Builder);
  1460. }
  1461. Value *TranslateF32ToF16(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1462. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1463. IRBuilder<> Builder(CI);
  1464. Value *x = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1465. Type *Ty = CI->getType();
  1466. Function *f32tof16 =
  1467. helper.hlslOP.GetOpFunc(opcode, helper.voidTy);
  1468. return TrivialDxilOperation(
  1469. f32tof16, opcode, {Builder.getInt32(static_cast<unsigned>(opcode)), x},
  1470. x->getType(), Ty, &helper.hlslOP, Builder);
  1471. }
  1472. Value *TranslateLength(CallInst *CI, Value *val, hlsl::OP *hlslOP) {
  1473. IRBuilder<> Builder(CI);
  1474. if (VectorType *VT = dyn_cast<VectorType>(val->getType())) {
  1475. Value *Elt = Builder.CreateExtractElement(val, (uint64_t)0);
  1476. unsigned size = VT->getNumElements();
  1477. if (size > 1) {
  1478. Value *Sum = Builder.CreateFMul(Elt, Elt);
  1479. for (unsigned i = 1; i < size; i++) {
  1480. Elt = Builder.CreateExtractElement(val, i);
  1481. Value *Mul = Builder.CreateFMul(Elt, Elt);
  1482. Sum = Builder.CreateFAdd(Sum, Mul);
  1483. }
  1484. DXIL::OpCode sqrt = DXIL::OpCode::Sqrt;
  1485. Function *dxilSqrt = hlslOP->GetOpFunc(sqrt, VT->getElementType());
  1486. Value *opArg = hlslOP->GetI32Const((unsigned)sqrt);
  1487. return Builder.CreateCall(dxilSqrt, {opArg, Sum},
  1488. hlslOP->GetOpCodeName(sqrt));
  1489. } else {
  1490. val = Elt;
  1491. }
  1492. }
  1493. DXIL::OpCode fabs = DXIL::OpCode::FAbs;
  1494. Function *dxilFAbs = hlslOP->GetOpFunc(fabs, val->getType());
  1495. Value *opArg = hlslOP->GetI32Const((unsigned)fabs);
  1496. return Builder.CreateCall(dxilFAbs, {opArg, val},
  1497. hlslOP->GetOpCodeName(fabs));
  1498. }
  1499. Value *TranslateLength(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1500. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1501. hlsl::OP *hlslOP = &helper.hlslOP;
  1502. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1503. return TranslateLength(CI, val, hlslOP);
  1504. }
  1505. Value *TranslateModF(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1506. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1507. hlsl::OP *hlslOP = &helper.hlslOP;
  1508. Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1509. Value *outIntPtr = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1510. IRBuilder<> Builder(CI);
  1511. Value *intP =
  1512. TrivialDxilUnaryOperation(OP::OpCode::Round_z, val, hlslOP, Builder);
  1513. Value *fracP = Builder.CreateFSub(val, intP);
  1514. Builder.CreateStore(intP, outIntPtr);
  1515. return fracP;
  1516. }
  1517. Value *TranslateDistance(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1518. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1519. hlsl::OP *hlslOP = &helper.hlslOP;
  1520. Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1521. Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1522. IRBuilder<> Builder(CI);
  1523. Value *sub = Builder.CreateFSub(src0, src1);
  1524. return TranslateLength(CI, sub, hlslOP);
  1525. }
  1526. Value *TranslateExp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1527. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1528. hlsl::OP *hlslOP = &helper.hlslOP;
  1529. IRBuilder<> Builder(CI);
  1530. Type *Ty = CI->getType();
  1531. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1532. Constant *log2eConst = ConstantFP::get(Ty->getScalarType(), M_LOG2E);
  1533. if (Ty != Ty->getScalarType()) {
  1534. log2eConst =
  1535. ConstantVector::getSplat(Ty->getVectorNumElements(), log2eConst);
  1536. }
  1537. val = Builder.CreateFMul(log2eConst, val);
  1538. Value *exp = TrivialDxilUnaryOperation(OP::OpCode::Exp, val, hlslOP, Builder);
  1539. return exp;
  1540. }
  1541. Value *TranslateLog(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1542. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1543. hlsl::OP *hlslOP = &helper.hlslOP;
  1544. IRBuilder<> Builder(CI);
  1545. Type *Ty = CI->getType();
  1546. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1547. Constant *ln2Const = ConstantFP::get(Ty->getScalarType(), M_LN2);
  1548. if (Ty != Ty->getScalarType()) {
  1549. ln2Const = ConstantVector::getSplat(Ty->getVectorNumElements(), ln2Const);
  1550. }
  1551. Value *log = TrivialDxilUnaryOperation(OP::OpCode::Log, val, hlslOP, Builder);
  1552. return Builder.CreateFMul(ln2Const, log);
  1553. }
  1554. Value *TranslateLog10(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1555. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1556. hlsl::OP *hlslOP = &helper.hlslOP;
  1557. IRBuilder<> Builder(CI);
  1558. Type *Ty = CI->getType();
  1559. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1560. Constant *log2_10Const = ConstantFP::get(Ty->getScalarType(), M_LN2 / M_LN10);
  1561. if (Ty != Ty->getScalarType()) {
  1562. log2_10Const =
  1563. ConstantVector::getSplat(Ty->getVectorNumElements(), log2_10Const);
  1564. }
  1565. Value *log = TrivialDxilUnaryOperation(OP::OpCode::Log, val, hlslOP, Builder);
  1566. return Builder.CreateFMul(log2_10Const, log);
  1567. }
  1568. Value *TranslateFMod(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1569. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1570. hlsl::OP *hlslOP = &helper.hlslOP;
  1571. Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1572. Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1573. IRBuilder<> Builder(CI);
  1574. Value *div = Builder.CreateFDiv(src0, src1);
  1575. Value *negDiv = Builder.CreateFNeg(div);
  1576. Value *ge = Builder.CreateFCmpOGE(div, negDiv);
  1577. Value *absDiv =
  1578. TrivialDxilUnaryOperation(OP::OpCode::FAbs, div, hlslOP, Builder);
  1579. Value *frc =
  1580. TrivialDxilUnaryOperation(OP::OpCode::Frc, absDiv, hlslOP, Builder);
  1581. Value *negFrc = Builder.CreateFNeg(frc);
  1582. Value *realFrc = Builder.CreateSelect(ge, frc, negFrc);
  1583. return Builder.CreateFMul(realFrc, src1);
  1584. }
  1585. Value *TranslateFUIBinary(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1586. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1587. bool isFloat = CI->getType()->getScalarType()->isFloatingPointTy();
  1588. if (isFloat) {
  1589. switch (IOP) {
  1590. case IntrinsicOp::IOP_max:
  1591. opcode = OP::OpCode::FMax;
  1592. break;
  1593. case IntrinsicOp::IOP_min:
  1594. default:
  1595. DXASSERT_NOMSG(IOP == IntrinsicOp::IOP_min);
  1596. opcode = OP::OpCode::FMin;
  1597. break;
  1598. }
  1599. }
  1600. return TrivialBinaryOperation(CI, IOP, opcode, helper, pObjHelper, Translated);
  1601. }
  1602. Value *TranslateFUITrinary(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1603. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1604. bool isFloat = CI->getType()->getScalarType()->isFloatingPointTy();
  1605. if (isFloat) {
  1606. switch (IOP) {
  1607. case IntrinsicOp::IOP_mad:
  1608. default:
  1609. DXASSERT_NOMSG(IOP == IntrinsicOp::IOP_mad);
  1610. opcode = OP::OpCode::FMad;
  1611. break;
  1612. }
  1613. }
  1614. return TrivialTrinaryOperation(CI, IOP, opcode, helper, pObjHelper, Translated);
  1615. }
  1616. Value *TranslateFrexp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1617. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1618. hlsl::OP *hlslOP = &helper.hlslOP;
  1619. Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1620. Value *expPtr = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1621. IRBuilder<> Builder(CI);
  1622. Type *i32Ty = Type::getInt32Ty(CI->getContext());
  1623. Constant *exponentMaskConst = ConstantInt::get(i32Ty, 0x7f800000);
  1624. Constant *mantisaMaskConst = ConstantInt::get(i32Ty, 0x007fffff);
  1625. Constant *exponentShiftConst = ConstantInt::get(i32Ty, 23);
  1626. Constant *mantisaOrConst = ConstantInt::get(i32Ty, 0x3f000000);
  1627. Constant *exponentBiasConst = ConstantInt::get(i32Ty, -(int)0x3f000000);
  1628. Constant *zeroVal = hlslOP->GetFloatConst(0);
  1629. // int iVal = asint(val);
  1630. Type *dstTy = i32Ty;
  1631. Type *Ty = val->getType();
  1632. if (Ty->isVectorTy()) {
  1633. unsigned vecSize = Ty->getVectorNumElements();
  1634. dstTy = VectorType::get(i32Ty, vecSize);
  1635. exponentMaskConst = ConstantVector::getSplat(vecSize, exponentMaskConst);
  1636. mantisaMaskConst = ConstantVector::getSplat(vecSize, mantisaMaskConst);
  1637. exponentShiftConst = ConstantVector::getSplat(vecSize, exponentShiftConst);
  1638. mantisaOrConst = ConstantVector::getSplat(vecSize, mantisaOrConst);
  1639. exponentBiasConst = ConstantVector::getSplat(vecSize, exponentBiasConst);
  1640. zeroVal = ConstantVector::getSplat(vecSize, zeroVal);
  1641. }
  1642. // bool ne = val != 0;
  1643. Value *notZero = Builder.CreateFCmpUNE(val, zeroVal);
  1644. notZero = Builder.CreateZExt(notZero, dstTy);
  1645. Value *intVal = Builder.CreateBitCast(val, dstTy);
  1646. // temp = intVal & exponentMask;
  1647. Value *temp = Builder.CreateAnd(intVal, exponentMaskConst);
  1648. // temp = temp + exponentBias;
  1649. temp = Builder.CreateAdd(temp, exponentBiasConst);
  1650. // temp = temp & ne;
  1651. temp = Builder.CreateAnd(temp, notZero);
  1652. // temp = temp >> exponentShift;
  1653. temp = Builder.CreateAShr(temp, exponentShiftConst);
  1654. // exp = float(temp);
  1655. Value *exp = Builder.CreateSIToFP(temp, Ty);
  1656. Builder.CreateStore(exp, expPtr);
  1657. // temp = iVal & mantisaMask;
  1658. temp = Builder.CreateAnd(intVal, mantisaMaskConst);
  1659. // temp = temp | mantisaOr;
  1660. temp = Builder.CreateOr(temp, mantisaOrConst);
  1661. // mantisa = temp & ne;
  1662. Value *mantisa = Builder.CreateAnd(temp, notZero);
  1663. return Builder.CreateBitCast(mantisa, Ty);
  1664. }
  1665. Value *TranslateLdExp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1666. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1667. hlsl::OP *hlslOP = &helper.hlslOP;
  1668. Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1669. Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1670. IRBuilder<> Builder(CI);
  1671. Value *exp =
  1672. TrivialDxilUnaryOperation(OP::OpCode::Exp, src1, hlslOP, Builder);
  1673. return Builder.CreateFMul(exp, src0);
  1674. }
  1675. Value *TranslateFWidth(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1676. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1677. hlsl::OP *hlslOP = &helper.hlslOP;
  1678. Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1679. IRBuilder<> Builder(CI);
  1680. Value *ddx =
  1681. TrivialDxilUnaryOperation(OP::OpCode::DerivCoarseX, src, hlslOP, Builder);
  1682. Value *absDdx =
  1683. TrivialDxilUnaryOperation(OP::OpCode::FAbs, ddx, hlslOP, Builder);
  1684. Value *ddy =
  1685. TrivialDxilUnaryOperation(OP::OpCode::DerivCoarseY, src, hlslOP, Builder);
  1686. Value *absDdy =
  1687. TrivialDxilUnaryOperation(OP::OpCode::FAbs, ddy, hlslOP, Builder);
  1688. return Builder.CreateFAdd(absDdx, absDdy);
  1689. }
  1690. Value *TranslateLerp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1691. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1692. // x + s(y-x)
  1693. Value *x = CI->getArgOperand(HLOperandIndex::kLerpOpXIdx);
  1694. Value *y = CI->getArgOperand(HLOperandIndex::kLerpOpYIdx);
  1695. IRBuilder<> Builder(CI);
  1696. Value *ySubx = Builder.CreateFSub(y, x);
  1697. Value *s = CI->getArgOperand(HLOperandIndex::kLerpOpSIdx);
  1698. Value *sMulSub = Builder.CreateFMul(s, ySubx);
  1699. return Builder.CreateFAdd(x, sMulSub);
  1700. }
  1701. Value *TrivialDotOperation(OP::OpCode opcode, Value *src0,
  1702. Value *src1, hlsl::OP *hlslOP,
  1703. IRBuilder<> &Builder) {
  1704. Type *Ty = src0->getType()->getScalarType();
  1705. Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty);
  1706. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  1707. SmallVector<Value *, 9> args;
  1708. args.emplace_back(opArg);
  1709. unsigned vecSize = src0->getType()->getVectorNumElements();
  1710. for (unsigned i = 0; i < vecSize; i++)
  1711. args.emplace_back(Builder.CreateExtractElement(src0, i));
  1712. for (unsigned i = 0; i < vecSize; i++)
  1713. args.emplace_back(Builder.CreateExtractElement(src1, i));
  1714. Value *dotOP = Builder.CreateCall(dxilFunc, args);
  1715. return dotOP;
  1716. }
  1717. Value *TranslateIDot(Value *arg0, Value *arg1, unsigned vecSize, hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  1718. Value *Elt0 = Builder.CreateExtractElement(arg0, (uint64_t)0);
  1719. Value *Elt1 = Builder.CreateExtractElement(arg1, (uint64_t)0);
  1720. Value *Result = Builder.CreateMul(Elt0, Elt1);
  1721. switch (vecSize) {
  1722. case 4:
  1723. Elt0 = Builder.CreateExtractElement(arg0, 3);
  1724. Elt1 = Builder.CreateExtractElement(arg1, 3);
  1725. Result = TrivialDxilTrinaryOperation(DXIL::OpCode::IMad, Elt0, Elt1, Result, hlslOP, Builder);
  1726. // Pass thru.
  1727. case 3:
  1728. Elt0 = Builder.CreateExtractElement(arg0, 2);
  1729. Elt1 = Builder.CreateExtractElement(arg1, 2);
  1730. Result = TrivialDxilTrinaryOperation(DXIL::OpCode::IMad, Elt0, Elt1, Result, hlslOP, Builder);
  1731. // Pass thru.
  1732. case 2:
  1733. Elt0 = Builder.CreateExtractElement(arg0, 1);
  1734. Elt1 = Builder.CreateExtractElement(arg1, 1);
  1735. Result = TrivialDxilTrinaryOperation(DXIL::OpCode::IMad, Elt0, Elt1, Result, hlslOP, Builder);
  1736. break;
  1737. default:
  1738. case 1:
  1739. DXASSERT(vecSize == 1, "invalid vector size.");
  1740. }
  1741. return Result;
  1742. }
  1743. Value *TranslateFDot(Value *arg0, Value *arg1, unsigned vecSize,
  1744. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  1745. switch (vecSize) {
  1746. case 2:
  1747. return TrivialDotOperation(OP::OpCode::Dot2, arg0, arg1, hlslOP, Builder);
  1748. break;
  1749. case 3:
  1750. return TrivialDotOperation(OP::OpCode::Dot3, arg0, arg1, hlslOP, Builder);
  1751. break;
  1752. case 4:
  1753. return TrivialDotOperation(OP::OpCode::Dot4, arg0, arg1, hlslOP, Builder);
  1754. break;
  1755. default:
  1756. DXASSERT(vecSize == 1, "wrong vector size");
  1757. {
  1758. Value *vecMul = Builder.CreateFMul(arg0, arg1);
  1759. return Builder.CreateExtractElement(vecMul, (uint64_t)0);
  1760. }
  1761. break;
  1762. }
  1763. }
  1764. Value *TranslateDot(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1765. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1766. hlsl::OP *hlslOP = &helper.hlslOP;
  1767. Value *arg0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1768. Type *Ty = arg0->getType();
  1769. unsigned vecSize = Ty->getVectorNumElements();
  1770. Value *arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1771. IRBuilder<> Builder(CI);
  1772. if (Ty->getScalarType()->isFloatingPointTy()) {
  1773. return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder);
  1774. } else {
  1775. return TranslateIDot(arg0, arg1, vecSize, hlslOP, Builder);
  1776. }
  1777. }
  1778. Value *TranslateNormalize(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1779. HLOperationLowerHelper &helper,
  1780. HLObjectOperationLowerHelper *pObjHelper,
  1781. bool &Translated) {
  1782. hlsl::OP *hlslOP = &helper.hlslOP;
  1783. Type *Ty = CI->getType();
  1784. Value *op = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1785. VectorType *VT = cast<VectorType>(Ty);
  1786. unsigned vecSize = VT->getNumElements();
  1787. IRBuilder<> Builder(CI);
  1788. Value *dot = TranslateFDot(op, op, vecSize, hlslOP, Builder);
  1789. DXIL::OpCode rsqrtOp = DXIL::OpCode::Rsqrt;
  1790. Function *dxilRsqrt = hlslOP->GetOpFunc(rsqrtOp, VT->getElementType());
  1791. Value *rsqrt = Builder.CreateCall(
  1792. dxilRsqrt, {hlslOP->GetI32Const((unsigned)rsqrtOp), dot},
  1793. hlslOP->GetOpCodeName(rsqrtOp));
  1794. Value *vecRsqrt = UndefValue::get(VT);
  1795. for (unsigned i = 0; i < VT->getNumElements(); i++)
  1796. vecRsqrt = Builder.CreateInsertElement(vecRsqrt, rsqrt, i);
  1797. return Builder.CreateFMul(op, vecRsqrt);
  1798. }
  1799. Value *TranslateReflect(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  1800. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1801. hlsl::OP *hlslOP = &helper.hlslOP;
  1802. // v = i - 2 * n * dot(i, n).
  1803. IRBuilder<> Builder(CI);
  1804. Value *i = CI->getArgOperand(HLOperandIndex::kReflectOpIIdx);
  1805. Value *n = CI->getArgOperand(HLOperandIndex::kReflectOpNIdx);
  1806. VectorType *VT = cast<VectorType>(i->getType());
  1807. unsigned vecSize = VT->getNumElements();
  1808. Value *dot = TranslateFDot(i, n, vecSize, hlslOP, Builder);
  1809. // 2 * dot (i, n).
  1810. dot = Builder.CreateFMul(hlslOP->GetFloatConst(2), dot);
  1811. // 2 * n * dot(i, n).
  1812. Value *vecDot = Builder.CreateVectorSplat(vecSize, dot);
  1813. Value *nMulDot = Builder.CreateFMul(vecDot, n);
  1814. // i - 2 * n * dot(i, n).
  1815. return Builder.CreateFSub(i, nMulDot);
  1816. }
  1817. Value *TranslateRefract(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  1818. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1819. hlsl::OP *hlslOP = &helper.hlslOP;
  1820. // d = dot(i, n);
  1821. // t = 1 - eta * eta * ( 1 - d*d);
  1822. // cond = t >= 1;
  1823. // r = eta * i - (eta * d + sqrt(t)) * n;
  1824. // return cond ? r : 0;
  1825. IRBuilder<> Builder(CI);
  1826. Value *i = CI->getArgOperand(HLOperandIndex::kRefractOpIIdx);
  1827. Value *n = CI->getArgOperand(HLOperandIndex::kRefractOpNIdx);
  1828. Value *eta = CI->getArgOperand(HLOperandIndex::kRefractOpEtaIdx);
  1829. VectorType *VT = cast<VectorType>(i->getType());
  1830. unsigned vecSize = VT->getNumElements();
  1831. Value *dot = TranslateFDot(i, n, vecSize, hlslOP, Builder);
  1832. // eta * eta;
  1833. Value *eta2 = Builder.CreateFMul(eta, eta);
  1834. // d*d;
  1835. Value *dot2 = Builder.CreateFMul(dot, dot);
  1836. Constant *one = ConstantFP::get(eta->getType(), 1);
  1837. Constant *zero = ConstantFP::get(eta->getType(), 0);
  1838. // 1- d*d;
  1839. dot2 = Builder.CreateFSub(one, dot2);
  1840. // eta * eta * (1-d*d);
  1841. eta2 = Builder.CreateFMul(dot2, eta2);
  1842. // t = 1 - eta * eta * ( 1 - d*d);
  1843. Value *t = Builder.CreateFSub(one, eta2);
  1844. // cond = t >= 0;
  1845. Value *cond = Builder.CreateFCmpOGE(t, zero);
  1846. // eta * i;
  1847. Value *vecEta = UndefValue::get(VT);
  1848. for (unsigned i = 0; i < vecSize; i++)
  1849. vecEta = Builder.CreateInsertElement(vecEta, eta, i);
  1850. Value *etaMulI = Builder.CreateFMul(i, vecEta);
  1851. // sqrt(t);
  1852. Value *sqrt = TrivialDxilUnaryOperation(OP::OpCode::Sqrt, t, hlslOP, Builder);
  1853. // eta * d;
  1854. Value *etaMulD = Builder.CreateFMul(eta, dot);
  1855. // eta * d + sqrt(t);
  1856. Value *etaSqrt = Builder.CreateFAdd(etaMulD, sqrt);
  1857. // (eta * d + sqrt(t)) * n;
  1858. Value *vecEtaSqrt = Builder.CreateVectorSplat(vecSize, etaSqrt);
  1859. Value *r = Builder.CreateFMul(vecEtaSqrt, n);
  1860. // r = eta * i - (eta * d + sqrt(t)) * n;
  1861. r = Builder.CreateFSub(etaMulI, r);
  1862. Value *refract =
  1863. Builder.CreateSelect(cond, r, ConstantVector::getSplat(vecSize, zero));
  1864. return refract;
  1865. }
  1866. Value *TranslateSmoothStep(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1867. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1868. hlsl::OP *hlslOP = &helper.hlslOP;
  1869. // s = saturate((x-min)/(max-min)).
  1870. IRBuilder<> Builder(CI);
  1871. Value *minVal = CI->getArgOperand(HLOperandIndex::kSmoothStepOpMinIdx);
  1872. Value *maxVal = CI->getArgOperand(HLOperandIndex::kSmoothStepOpMaxIdx);
  1873. Value *maxSubMin = Builder.CreateFSub(maxVal, minVal);
  1874. Value *x = CI->getArgOperand(HLOperandIndex::kSmoothStepOpXIdx);
  1875. Value *xSubMin = Builder.CreateFSub(x, minVal);
  1876. Value *satVal = Builder.CreateFDiv(xSubMin, maxSubMin);
  1877. Value *s = TrivialDxilUnaryOperation(DXIL::OpCode::Saturate, satVal, hlslOP,
  1878. Builder);
  1879. // return s * s *(3-2*s).
  1880. Constant *c2 = ConstantFP::get(CI->getType(),2);
  1881. Constant *c3 = ConstantFP::get(CI->getType(),3);
  1882. Value *sMul2 = Builder.CreateFMul(s, c2);
  1883. Value *result = Builder.CreateFSub(c3, sMul2);
  1884. result = Builder.CreateFMul(s, result);
  1885. result = Builder.CreateFMul(s, result);
  1886. return result;
  1887. }
  1888. Value *TranslateMSad4(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1889. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1890. hlsl::OP *hlslOP = &helper.hlslOP;
  1891. Value *ref = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  1892. Value *src = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  1893. Value *accum = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  1894. Type *Ty = CI->getType();
  1895. IRBuilder<> Builder(CI);
  1896. Value *vecRef = UndefValue::get(Ty);
  1897. for (unsigned i = 0; i < 4; i++)
  1898. vecRef = Builder.CreateInsertElement(vecRef, ref, i);
  1899. Value *srcX = Builder.CreateExtractElement(src, (uint64_t)0);
  1900. Value *srcY = Builder.CreateExtractElement(src, 1);
  1901. Value *byteSrc = UndefValue::get(Ty);
  1902. byteSrc = Builder.CreateInsertElement(byteSrc, srcX, (uint64_t)0);
  1903. // ushr r0.yzw, srcX, l(0, 8, 16, 24)
  1904. // bfi r1.yzw, l(0, 8, 16, 24), l(0, 24, 16, 8), srcX, r0.yyzw
  1905. Value *bfiOpArg =
  1906. hlslOP->GetU32Const(static_cast<unsigned>(DXIL::OpCode::Bfi));
  1907. Value *imm8 = hlslOP->GetU32Const(8);
  1908. Value *imm16 = hlslOP->GetU32Const(16);
  1909. Value *imm24 = hlslOP->GetU32Const(24);
  1910. Ty = ref->getType();
  1911. // Get x[31:8].
  1912. Value *srcXShift = Builder.CreateLShr(srcX, imm8);
  1913. // y[0~7] x[31:8].
  1914. Value *byteSrcElt = TrivialDxilOperation(
  1915. DXIL::OpCode::Bfi, {bfiOpArg, imm8, imm24, srcY, srcXShift}, Ty, Ty,
  1916. hlslOP, Builder);
  1917. byteSrc = Builder.CreateInsertElement(byteSrc, byteSrcElt, 1);
  1918. // Get x[31:16].
  1919. srcXShift = Builder.CreateLShr(srcXShift, imm8);
  1920. // y[0~15] x[31:16].
  1921. byteSrcElt = TrivialDxilOperation(DXIL::OpCode::Bfi,
  1922. {bfiOpArg, imm16, imm16, srcY, srcXShift},
  1923. Ty, Ty, hlslOP, Builder);
  1924. byteSrc = Builder.CreateInsertElement(byteSrc, byteSrcElt, 2);
  1925. // Get x[31:24].
  1926. srcXShift = Builder.CreateLShr(srcXShift, imm8);
  1927. // y[0~23] x[31:24].
  1928. byteSrcElt = TrivialDxilOperation(DXIL::OpCode::Bfi,
  1929. {bfiOpArg, imm24, imm8, srcY, srcXShift},
  1930. Ty, Ty, hlslOP, Builder);
  1931. byteSrc = Builder.CreateInsertElement(byteSrc, byteSrcElt, 3);
  1932. // Msad on vecref and byteSrc.
  1933. return TrivialDxilTrinaryOperation(DXIL::OpCode::Msad, vecRef, byteSrc, accum,
  1934. hlslOP, Builder);
  1935. }
  1936. Value *TranslateRCP(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1937. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1938. Type *Ty = CI->getType();
  1939. Value *op = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1940. IRBuilder<> Builder(CI);
  1941. Constant *one = ConstantFP::get(Ty->getScalarType(), 1.0);
  1942. if (Ty != Ty->getScalarType()) {
  1943. one = ConstantVector::getSplat(Ty->getVectorNumElements(), one);
  1944. }
  1945. return Builder.CreateFDiv(one, op);
  1946. }
  1947. Value *TranslateSign(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1948. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1949. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1950. Type *Ty = val->getType();
  1951. bool IsInt = Ty->getScalarType()->isIntegerTy();
  1952. IRBuilder<> Builder(CI);
  1953. Constant *zero = Constant::getNullValue(Ty);
  1954. Value *zeroLtVal = IsInt ? Builder.CreateICmpSLT(zero, val) : Builder.CreateFCmpOLT(zero, val);
  1955. Value *valLtZero = IsInt ? Builder.CreateICmpSLT(val, zero) : Builder.CreateFCmpOLT(val, zero);
  1956. zeroLtVal = Builder.CreateZExt(zeroLtVal, CI->getType());
  1957. valLtZero = Builder.CreateZExt(valLtZero, CI->getType());
  1958. return Builder.CreateSub(zeroLtVal, valLtZero);
  1959. }
  1960. Value *TranslateUSign(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1961. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1962. Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  1963. Type *Ty = val->getType();
  1964. IRBuilder<> Builder(CI);
  1965. Constant *zero = Constant::getNullValue(Ty);
  1966. Value *nonZero = Builder.CreateICmpNE(val, zero);
  1967. return Builder.CreateZExt(nonZero, CI->getType());
  1968. }
  1969. Value *TranslateStep(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1970. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1971. Value *edge = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1972. Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1973. Type *Ty = CI->getType();
  1974. IRBuilder<> Builder(CI);
  1975. Constant *one = ConstantFP::get(Ty->getScalarType(), 1.0);
  1976. Constant *zero = ConstantFP::get(Ty->getScalarType(), 0);
  1977. Value *cond = Builder.CreateFCmpOLT(x, edge);
  1978. if (Ty != Ty->getScalarType()) {
  1979. one = ConstantVector::getSplat(Ty->getVectorNumElements(), one);
  1980. zero = ConstantVector::getSplat(Ty->getVectorNumElements(), zero);
  1981. }
  1982. return Builder.CreateSelect(cond, zero, one);
  1983. }
  1984. Value *TranslatePow(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  1985. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1986. hlsl::OP *hlslOP = &helper.hlslOP;
  1987. Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  1988. Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  1989. bool isFXCCompatMode = CI->getModule()->GetHLModule().GetHLOptions().bFXCCompatMode;
  1990. IRBuilder<> Builder(CI);
  1991. return TranslatePowImpl(hlslOP,Builder,x,y,isFXCCompatMode);
  1992. }
  1993. Value *TranslateFaceforward(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  1994. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  1995. hlsl::OP *hlslOP = &helper.hlslOP;
  1996. Type *Ty = CI->getType();
  1997. Value *n = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  1998. Value *i = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  1999. Value *ng = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  2000. IRBuilder<> Builder(CI);
  2001. unsigned vecSize = Ty->getVectorNumElements();
  2002. // -n x sign(dot(i, ng)).
  2003. Value *dotOp = TranslateFDot(i, ng, vecSize, hlslOP, Builder);
  2004. Constant *zero = ConstantFP::get(Ty->getScalarType(), 0);
  2005. Value *dotLtZero = Builder.CreateFCmpOLT(dotOp, zero);
  2006. Value *negN = Builder.CreateFNeg(n);
  2007. Value *faceforward = Builder.CreateSelect(dotLtZero, n, negN);
  2008. return faceforward;
  2009. }
  2010. }
  2011. // MOP intrinsics
  2012. namespace {
  2013. Value *TranslateGetSamplePosition(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  2014. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2015. hlsl::OP *hlslOP = &helper.hlslOP;
  2016. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  2017. IRBuilder<> Builder(CI);
  2018. Value *sampleIdx =
  2019. CI->getArgOperand(HLOperandIndex::kGetSamplePositionSampleIdxOpIndex);
  2020. OP::OpCode opcode = OP::OpCode::Texture2DMSGetSamplePosition;
  2021. llvm::Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  2022. Function *dxilFunc =
  2023. hlslOP->GetOpFunc(opcode, Type::getVoidTy(CI->getContext()));
  2024. Value *args[] = {opArg, handle, sampleIdx};
  2025. Value *samplePos = Builder.CreateCall(dxilFunc, args);
  2026. Value *result = UndefValue::get(CI->getType());
  2027. Value *samplePosX = Builder.CreateExtractValue(samplePos, 0);
  2028. Value *samplePosY = Builder.CreateExtractValue(samplePos, 1);
  2029. result = Builder.CreateInsertElement(result, samplePosX, (uint64_t)0);
  2030. result = Builder.CreateInsertElement(result, samplePosY, 1);
  2031. return result;
  2032. }
  2033. Value *TranslateGetDimensions(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  2034. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2035. hlsl::OP *hlslOP = &helper.hlslOP;
  2036. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  2037. DxilResource::Kind RK = pObjHelper->GetRK(handle);
  2038. IRBuilder<> Builder(CI);
  2039. OP::OpCode opcode = OP::OpCode::GetDimensions;
  2040. llvm::Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  2041. Function *dxilFunc =
  2042. hlslOP->GetOpFunc(opcode, Type::getVoidTy(CI->getContext()));
  2043. Type *i32Ty = Type::getInt32Ty(CI->getContext());
  2044. Value *mipLevel = UndefValue::get(i32Ty);
  2045. unsigned widthOpIdx = HLOperandIndex::kGetDimensionsMipWidthOpIndex;
  2046. switch (RK) {
  2047. case DxilResource::Kind::Texture1D:
  2048. case DxilResource::Kind::Texture1DArray:
  2049. case DxilResource::Kind::Texture2D:
  2050. case DxilResource::Kind::Texture2DArray:
  2051. case DxilResource::Kind::TextureCube:
  2052. case DxilResource::Kind::TextureCubeArray:
  2053. case DxilResource::Kind::Texture3D: {
  2054. Value *opMipLevel =
  2055. CI->getArgOperand(HLOperandIndex::kGetDimensionsMipLevelOpIndex);
  2056. // mipLevel is in parameter, should not be pointer.
  2057. if (!opMipLevel->getType()->isPointerTy())
  2058. mipLevel = opMipLevel;
  2059. else {
  2060. // No mip level.
  2061. widthOpIdx = HLOperandIndex::kGetDimensionsNoMipWidthOpIndex;
  2062. mipLevel = ConstantInt::get(i32Ty, 0);
  2063. }
  2064. } break;
  2065. default:
  2066. widthOpIdx = HLOperandIndex::kGetDimensionsNoMipWidthOpIndex;
  2067. break;
  2068. }
  2069. Value *args[] = {opArg, handle, mipLevel};
  2070. Value *dims = Builder.CreateCall(dxilFunc, args);
  2071. unsigned dimensionIdx = 0;
  2072. Value *width = Builder.CreateExtractValue(dims, dimensionIdx++);
  2073. Value *widthPtr = CI->getArgOperand(widthOpIdx);
  2074. if (widthPtr->getType()->getPointerElementType()->isFloatingPointTy())
  2075. width = Builder.CreateSIToFP(width,
  2076. widthPtr->getType()->getPointerElementType());
  2077. Builder.CreateStore(width, widthPtr);
  2078. if (RK == DxilResource::Kind::StructuredBuffer) {
  2079. // Set stride.
  2080. Value *stridePtr = CI->getArgOperand(widthOpIdx + 1);
  2081. const DataLayout &DL = helper.dataLayout;
  2082. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  2083. Type *bufTy = pObjHelper->GetResourceType(handle);
  2084. Type *bufRetTy = bufTy->getStructElementType(0);
  2085. unsigned stride = DL.getTypeAllocSize(bufRetTy);
  2086. Builder.CreateStore(hlslOP->GetU32Const(stride), stridePtr);
  2087. } else {
  2088. if (widthOpIdx == HLOperandIndex::kGetDimensionsMipWidthOpIndex ||
  2089. // Samples is in w channel too.
  2090. RK == DXIL::ResourceKind::Texture2DMS) {
  2091. // Has mip.
  2092. for (unsigned argIdx = widthOpIdx + 1;
  2093. argIdx < CI->getNumArgOperands() - 1; argIdx++) {
  2094. Value *dim = Builder.CreateExtractValue(dims, dimensionIdx++);
  2095. Value *ptr = CI->getArgOperand(argIdx);
  2096. if (ptr->getType()->getPointerElementType()->isFloatingPointTy())
  2097. dim = Builder.CreateSIToFP(dim,
  2098. ptr->getType()->getPointerElementType());
  2099. Builder.CreateStore(dim, ptr);
  2100. }
  2101. // NumOfLevel is in w channel.
  2102. dimensionIdx = 3;
  2103. Value *dim = Builder.CreateExtractValue(dims, dimensionIdx);
  2104. Value *ptr = CI->getArgOperand(CI->getNumArgOperands() - 1);
  2105. if (ptr->getType()->getPointerElementType()->isFloatingPointTy())
  2106. dim =
  2107. Builder.CreateSIToFP(dim, ptr->getType()->getPointerElementType());
  2108. Builder.CreateStore(dim, ptr);
  2109. } else {
  2110. for (unsigned argIdx = widthOpIdx + 1; argIdx < CI->getNumArgOperands();
  2111. argIdx++) {
  2112. Value *dim = Builder.CreateExtractValue(dims, dimensionIdx++);
  2113. Value *ptr = CI->getArgOperand(argIdx);
  2114. if (ptr->getType()->getPointerElementType()->isFloatingPointTy())
  2115. dim = Builder.CreateSIToFP(dim,
  2116. ptr->getType()->getPointerElementType());
  2117. Builder.CreateStore(dim, ptr);
  2118. }
  2119. }
  2120. }
  2121. return nullptr;
  2122. }
  2123. Value *GenerateUpdateCounter(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2124. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2125. hlsl::OP *hlslOP = &helper.hlslOP;
  2126. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  2127. pObjHelper->MarkHasCounter(handle->getType(), handle);
  2128. bool bInc = IOP == IntrinsicOp::MOP_IncrementCounter;
  2129. IRBuilder<> Builder(CI);
  2130. OP::OpCode OpCode = OP::OpCode::BufferUpdateCounter;
  2131. Value *OpCodeArg = hlslOP->GetU32Const((unsigned)OpCode);
  2132. Value *IncVal = hlslOP->GetI8Const(bInc ? 1 : -1);
  2133. // Create BufferUpdateCounter call.
  2134. Value *Args[] = {OpCodeArg, handle, IncVal};
  2135. Function *F =
  2136. hlslOP->GetOpFunc(OpCode, Type::getVoidTy(handle->getContext()));
  2137. return Builder.CreateCall(F, Args);
  2138. }
  2139. Value *ScalarizeResRet(Type *RetTy, Value *ResRet, IRBuilder<> &Builder) {
  2140. // Extract value part.
  2141. Value *retVal = llvm::UndefValue::get(RetTy);
  2142. if (RetTy->isVectorTy()) {
  2143. for (unsigned i = 0; i < RetTy->getVectorNumElements(); i++) {
  2144. Value *retComp = Builder.CreateExtractValue(ResRet, i);
  2145. retVal = Builder.CreateInsertElement(retVal, retComp, i);
  2146. }
  2147. } else {
  2148. retVal = Builder.CreateExtractValue(ResRet, 0);
  2149. }
  2150. return retVal;
  2151. }
  2152. Value *ScalarizeElements(Type *RetTy, ArrayRef<Value*> Elts, IRBuilder<> &Builder) {
  2153. // Extract value part.
  2154. Value *retVal = llvm::UndefValue::get(RetTy);
  2155. if (RetTy->isVectorTy()) {
  2156. unsigned vecSize = RetTy->getVectorNumElements();
  2157. DXASSERT(vecSize <= Elts.size(), "vector size mismatch");
  2158. for (unsigned i = 0; i < vecSize; i++) {
  2159. Value *retComp = Elts[i];
  2160. retVal = Builder.CreateInsertElement(retVal, retComp, i);
  2161. }
  2162. } else {
  2163. retVal = Elts[0];
  2164. }
  2165. return retVal;
  2166. }
  2167. void UpdateStatus(Value *ResRet, Value *status, IRBuilder<> &Builder,
  2168. hlsl::OP *hlslOp) {
  2169. if (status && !isa<UndefValue>(status)) {
  2170. Value *statusVal = Builder.CreateExtractValue(ResRet, DXIL::kResRetStatusIndex);
  2171. Value *checkAccessOp = hlslOp->GetI32Const(
  2172. static_cast<unsigned>(DXIL::OpCode::CheckAccessFullyMapped));
  2173. Function *checkAccessFn = hlslOp->GetOpFunc(
  2174. DXIL::OpCode::CheckAccessFullyMapped, statusVal->getType());
  2175. // CheckAccess on status.
  2176. Value *bStatus =
  2177. Builder.CreateCall(checkAccessFn, {checkAccessOp, statusVal});
  2178. Value *extStatus =
  2179. Builder.CreateZExt(bStatus, Type::getInt32Ty(status->getContext()));
  2180. Builder.CreateStore(extStatus, status);
  2181. }
  2182. }
  2183. Value *SplatToVector(Value *Elt, Type *DstTy, IRBuilder<> &Builder) {
  2184. Value *Result = UndefValue::get(DstTy);
  2185. for (unsigned i = 0; i < DstTy->getVectorNumElements(); i++)
  2186. Result = Builder.CreateInsertElement(Result, Elt, i);
  2187. return Result;
  2188. }
  2189. Value *TranslateMul(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2190. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2191. hlsl::OP *hlslOP = &helper.hlslOP;
  2192. Value *arg0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  2193. Value *arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  2194. Type *arg0Ty = arg0->getType();
  2195. Type *arg1Ty = arg1->getType();
  2196. IRBuilder<> Builder(CI);
  2197. if (arg0Ty->isVectorTy()) {
  2198. if (arg1Ty->isVectorTy()) {
  2199. // mul(vector, vector) == dot(vector, vector)
  2200. unsigned vecSize = arg0Ty->getVectorNumElements();
  2201. if (arg0Ty->getScalarType()->isFloatingPointTy()) {
  2202. return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder);
  2203. }
  2204. else {
  2205. return TranslateIDot(arg0, arg1, vecSize, hlslOP, Builder);
  2206. }
  2207. }
  2208. else {
  2209. // mul(vector, scalar) == vector * scalar-splat
  2210. arg1 = SplatToVector(arg1, arg0Ty, Builder);
  2211. }
  2212. }
  2213. else {
  2214. if (arg1Ty->isVectorTy()) {
  2215. // mul(scalar, vector) == scalar-splat * vector
  2216. arg0 = SplatToVector(arg0, arg1Ty, Builder);
  2217. }
  2218. // else mul(scalar, scalar) == scalar * scalar;
  2219. }
  2220. // create fmul/mul for the pair of vectors or scalars
  2221. if (arg0Ty->getScalarType()->isFloatingPointTy()) {
  2222. return Builder.CreateFMul(arg0, arg1);
  2223. }
  2224. else {
  2225. return Builder.CreateMul(arg0, arg1);
  2226. }
  2227. }
  2228. // Sample intrinsics.
  2229. struct SampleHelper {
  2230. SampleHelper(CallInst *CI, OP::OpCode op, HLObjectOperationLowerHelper *pObjHelper);
  2231. OP::OpCode opcode;
  2232. Value *texHandle;
  2233. Value *samplerHandle;
  2234. static const unsigned kMaxCoordDimensions = 4;
  2235. Value *coord[kMaxCoordDimensions];
  2236. Value *special; // For CompareValue, Bias, LOD.
  2237. // SampleGrad only.
  2238. static const unsigned kMaxDDXYDimensions = 3;
  2239. Value *ddx[kMaxDDXYDimensions];
  2240. Value *ddy[kMaxDDXYDimensions];
  2241. // Optional.
  2242. static const unsigned kMaxOffsetDimensions = 3;
  2243. Value *offset[kMaxOffsetDimensions];
  2244. Value *clamp;
  2245. Value *status;
  2246. void TranslateCoord(CallInst *CI, unsigned coordIdx,
  2247. unsigned coordDimensions) {
  2248. Value *coordArg = CI->getArgOperand(coordIdx);
  2249. IRBuilder<> Builder(CI);
  2250. for (unsigned i = 0; i < coordDimensions; i++)
  2251. coord[i] = Builder.CreateExtractElement(coordArg, i);
  2252. Value *undefF = UndefValue::get(Type::getFloatTy(CI->getContext()));
  2253. for (unsigned i = coordDimensions; i < kMaxCoordDimensions; i++)
  2254. coord[i] = undefF;
  2255. }
  2256. void TranslateOffset(CallInst *CI, unsigned offsetIdx,
  2257. unsigned offsetDimensions) {
  2258. Value *undefI = UndefValue::get(Type::getInt32Ty(CI->getContext()));
  2259. if (CI->getNumArgOperands() > offsetIdx) {
  2260. Value *offsetArg = CI->getArgOperand(offsetIdx);
  2261. IRBuilder<> Builder(CI);
  2262. for (unsigned i = 0; i < offsetDimensions; i++)
  2263. offset[i] = Builder.CreateExtractElement(offsetArg, i);
  2264. for (unsigned i = offsetDimensions; i < kMaxOffsetDimensions; i++)
  2265. offset[i] = undefI;
  2266. } else {
  2267. for (unsigned i = 0; i < kMaxOffsetDimensions; i++)
  2268. offset[i] = undefI;
  2269. }
  2270. }
  2271. void SetClamp(CallInst *CI, unsigned clampIdx) {
  2272. if (CI->getNumArgOperands() > clampIdx) {
  2273. clamp = CI->getArgOperand(clampIdx);
  2274. if (clamp->getType()->isVectorTy()) {
  2275. IRBuilder<> Builder(CI);
  2276. clamp = Builder.CreateExtractElement(clamp, (uint64_t)0);
  2277. }
  2278. } else
  2279. clamp = UndefValue::get(Type::getFloatTy(CI->getContext()));
  2280. }
  2281. void SetStatus(CallInst *CI, unsigned statusIdx) {
  2282. if (CI->getNumArgOperands() == (statusIdx + 1))
  2283. status = CI->getArgOperand(statusIdx);
  2284. else
  2285. status = nullptr;
  2286. }
  2287. void SetDDXY(CallInst *CI, MutableArrayRef<Value *> ddxy, Value *ddxyArg,
  2288. unsigned ddxySize) {
  2289. IRBuilder<> Builder(CI);
  2290. for (unsigned i = 0; i < ddxySize; i++)
  2291. ddxy[i] = Builder.CreateExtractElement(ddxyArg, i);
  2292. Value *undefF = UndefValue::get(Type::getFloatTy(CI->getContext()));
  2293. for (unsigned i = ddxySize; i < kMaxDDXYDimensions; i++)
  2294. ddxy[i] = undefF;
  2295. }
  2296. };
  2297. SampleHelper::SampleHelper(
  2298. CallInst *CI, OP::OpCode op, HLObjectOperationLowerHelper *pObjHelper)
  2299. : opcode(op) {
  2300. const unsigned thisIdx =
  2301. HLOperandIndex::kHandleOpIdx; // opcode takes arg0, this pointer is arg1.
  2302. const unsigned kSamplerArgIndex = HLOperandIndex::kSampleSamplerArgIndex;
  2303. IRBuilder<> Builder(CI);
  2304. texHandle = CI->getArgOperand(thisIdx);
  2305. samplerHandle = CI->getArgOperand(kSamplerArgIndex);
  2306. DXIL::ResourceKind RK = pObjHelper->GetRK(texHandle);
  2307. if (RK == DXIL::ResourceKind::Invalid) {
  2308. opcode = DXIL::OpCode::NumOpCodes;
  2309. return;
  2310. }
  2311. unsigned coordDimensions = DxilResource::GetNumCoords(RK);
  2312. unsigned offsetDimensions = DxilResource::GetNumOffsets(RK);
  2313. const unsigned kCoordArgIdx = HLOperandIndex::kSampleCoordArgIndex;
  2314. TranslateCoord(CI, kCoordArgIdx, coordDimensions);
  2315. special = nullptr;
  2316. switch (op) {
  2317. case OP::OpCode::Sample:
  2318. TranslateOffset(CI, HLOperandIndex::kSampleOffsetArgIndex,
  2319. offsetDimensions);
  2320. SetClamp(CI, HLOperandIndex::kSampleClampArgIndex);
  2321. SetStatus(CI, HLOperandIndex::kSampleStatusArgIndex);
  2322. break;
  2323. case OP::OpCode::SampleLevel:
  2324. special = CI->getArgOperand(HLOperandIndex::kSampleLLevelArgIndex);
  2325. TranslateOffset(CI, HLOperandIndex::kSampleLOffsetArgIndex,
  2326. offsetDimensions);
  2327. SetStatus(CI, HLOperandIndex::kSampleLStatusArgIndex);
  2328. break;
  2329. case OP::OpCode::SampleBias:
  2330. special = CI->getArgOperand(HLOperandIndex::kSampleBBiasArgIndex);
  2331. TranslateOffset(CI, HLOperandIndex::kSampleBOffsetArgIndex,
  2332. offsetDimensions);
  2333. SetClamp(CI, HLOperandIndex::kSampleBClampArgIndex);
  2334. SetStatus(CI, HLOperandIndex::kSampleBStatusArgIndex);
  2335. break;
  2336. case OP::OpCode::SampleCmp:
  2337. special = CI->getArgOperand(HLOperandIndex::kSampleCmpCmpValArgIndex);
  2338. TranslateOffset(CI, HLOperandIndex::kSampleCmpOffsetArgIndex,
  2339. offsetDimensions);
  2340. SetClamp(CI, HLOperandIndex::kSampleCmpClampArgIndex);
  2341. SetStatus(CI, HLOperandIndex::kSampleCmpStatusArgIndex);
  2342. break;
  2343. case OP::OpCode::SampleCmpLevelZero:
  2344. special = CI->getArgOperand(HLOperandIndex::kSampleCmpLZCmpValArgIndex);
  2345. TranslateOffset(CI, HLOperandIndex::kSampleCmpLZOffsetArgIndex,
  2346. offsetDimensions);
  2347. SetStatus(CI, HLOperandIndex::kSampleCmpLZStatusArgIndex);
  2348. break;
  2349. case OP::OpCode::SampleGrad:
  2350. SetDDXY(CI, ddx, CI->getArgOperand(HLOperandIndex::kSampleGDDXArgIndex),
  2351. offsetDimensions);
  2352. SetDDXY(CI, ddy, CI->getArgOperand(HLOperandIndex::kSampleGDDYArgIndex),
  2353. offsetDimensions);
  2354. TranslateOffset(CI, HLOperandIndex::kSampleGOffsetArgIndex,
  2355. offsetDimensions);
  2356. SetClamp(CI, HLOperandIndex::kSampleGClampArgIndex);
  2357. SetStatus(CI, HLOperandIndex::kSampleGStatusArgIndex);
  2358. break;
  2359. case OP::OpCode::CalculateLOD:
  2360. // Only need coord for LOD calculation.
  2361. break;
  2362. default:
  2363. DXASSERT(0, "invalid opcode for Sample");
  2364. break;
  2365. }
  2366. }
  2367. Value *TranslateCalculateLOD(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2368. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2369. hlsl::OP *hlslOP = &helper.hlslOP;
  2370. SampleHelper sampleHelper(CI, OP::OpCode::CalculateLOD, pObjHelper);
  2371. if (sampleHelper.opcode == DXIL::OpCode::NumOpCodes) {
  2372. Translated = false;
  2373. return nullptr;
  2374. }
  2375. bool bClamped = IOP == IntrinsicOp::MOP_CalculateLevelOfDetail;
  2376. IRBuilder<> Builder(CI);
  2377. Value *opArg =
  2378. hlslOP->GetU32Const(static_cast<unsigned>(OP::OpCode::CalculateLOD));
  2379. Value *clamped = hlslOP->GetI1Const(bClamped);
  2380. Value *args[] = {opArg,
  2381. sampleHelper.texHandle,
  2382. sampleHelper.samplerHandle,
  2383. sampleHelper.coord[0],
  2384. sampleHelper.coord[1],
  2385. sampleHelper.coord[2],
  2386. clamped};
  2387. Function *dxilFunc = hlslOP->GetOpFunc(OP::OpCode::CalculateLOD,
  2388. Type::getFloatTy(opArg->getContext()));
  2389. Value *LOD = Builder.CreateCall(dxilFunc, args);
  2390. return LOD;
  2391. }
  2392. Value *TranslateCheckAccess(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2393. HLOperationLowerHelper &helper,
  2394. HLObjectOperationLowerHelper *pObjHelper,
  2395. bool &Translated) {
  2396. // Translate CheckAccess into uint->bool, later optimization should remove it.
  2397. // Real checkaccess is generated in UpdateStatus.
  2398. IRBuilder<> Builder(CI);
  2399. Value *V = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  2400. return Builder.CreateTrunc(V, helper.i1Ty);
  2401. }
  2402. void GenerateDxilSample(CallInst *CI, Function *F, ArrayRef<Value *> sampleArgs,
  2403. Value *status, hlsl::OP *hlslOp) {
  2404. IRBuilder<> Builder(CI);
  2405. CallInst *call = Builder.CreateCall(F, sampleArgs);
  2406. // extract value part
  2407. Value *retVal = ScalarizeResRet(CI->getType(), call, Builder);
  2408. // Replace ret val.
  2409. CI->replaceAllUsesWith(retVal);
  2410. // get status
  2411. if (status) {
  2412. UpdateStatus(call, status, Builder, hlslOp);
  2413. }
  2414. }
  2415. Value *TranslateSample(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2416. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2417. hlsl::OP *hlslOP = &helper.hlslOP;
  2418. SampleHelper sampleHelper(CI, opcode, pObjHelper);
  2419. if (sampleHelper.opcode == DXIL::OpCode::NumOpCodes) {
  2420. Translated = false;
  2421. return nullptr;
  2422. }
  2423. Type *Ty = CI->getType();
  2424. Function *F = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
  2425. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  2426. switch (opcode) {
  2427. case OP::OpCode::Sample: {
  2428. Value *sampleArgs[] = {
  2429. opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
  2430. // Coord.
  2431. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2432. sampleHelper.coord[3],
  2433. // Offset.
  2434. sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
  2435. // Clamp.
  2436. sampleHelper.clamp};
  2437. GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
  2438. } break;
  2439. case OP::OpCode::SampleLevel: {
  2440. Value *sampleArgs[] = {
  2441. opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
  2442. // Coord.
  2443. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2444. sampleHelper.coord[3],
  2445. // Offset.
  2446. sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
  2447. // LOD.
  2448. sampleHelper.special};
  2449. GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
  2450. } break;
  2451. case OP::OpCode::SampleGrad: {
  2452. Value *sampleArgs[] = {
  2453. opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
  2454. // Coord.
  2455. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2456. sampleHelper.coord[3],
  2457. // Offset.
  2458. sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
  2459. // Ddx.
  2460. sampleHelper.ddx[0], sampleHelper.ddx[1], sampleHelper.ddx[2],
  2461. // Ddy.
  2462. sampleHelper.ddy[0], sampleHelper.ddy[1], sampleHelper.ddy[2],
  2463. // Clamp.
  2464. sampleHelper.clamp};
  2465. GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
  2466. } break;
  2467. case OP::OpCode::SampleBias: {
  2468. // Clamp bias for immediate.
  2469. Value *bias = sampleHelper.special;
  2470. if (ConstantFP *FP = dyn_cast<ConstantFP>(bias)) {
  2471. float v = FP->getValueAPF().convertToFloat();
  2472. if (v > DXIL::kMaxMipLodBias)
  2473. bias = ConstantFP::get(FP->getType(), DXIL::kMaxMipLodBias);
  2474. if (v < DXIL::kMinMipLodBias)
  2475. bias = ConstantFP::get(FP->getType(), DXIL::kMinMipLodBias);
  2476. }
  2477. Value *sampleArgs[] = {
  2478. opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
  2479. // Coord.
  2480. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2481. sampleHelper.coord[3],
  2482. // Offset.
  2483. sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
  2484. // Bias.
  2485. bias,
  2486. // Clamp.
  2487. sampleHelper.clamp};
  2488. GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
  2489. } break;
  2490. case OP::OpCode::SampleCmp: {
  2491. Value *sampleArgs[] = {
  2492. opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
  2493. // Coord.
  2494. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2495. sampleHelper.coord[3],
  2496. // Offset.
  2497. sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
  2498. // CmpVal.
  2499. sampleHelper.special,
  2500. // Clamp.
  2501. sampleHelper.clamp};
  2502. GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
  2503. } break;
  2504. case OP::OpCode::SampleCmpLevelZero:
  2505. default: {
  2506. DXASSERT(opcode == OP::OpCode::SampleCmpLevelZero, "invalid sample opcode");
  2507. Value *sampleArgs[] = {
  2508. opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
  2509. // Coord.
  2510. sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
  2511. sampleHelper.coord[3],
  2512. // Offset.
  2513. sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
  2514. // CmpVal.
  2515. sampleHelper.special};
  2516. GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
  2517. } break;
  2518. }
  2519. // CI is replaced in GenerateDxilSample.
  2520. return nullptr;
  2521. }
  2522. // Gather intrinsics.
  2523. struct GatherHelper {
  2524. enum class GatherChannel {
  2525. GatherAll,
  2526. GatherRed,
  2527. GatherGreen,
  2528. GatherBlue,
  2529. GatherAlpha,
  2530. };
  2531. GatherHelper(CallInst *CI, OP::OpCode op, HLObjectOperationLowerHelper *pObjHelper,
  2532. GatherHelper::GatherChannel ch);
  2533. OP::OpCode opcode;
  2534. Value *texHandle;
  2535. Value *samplerHandle;
  2536. static const unsigned kMaxCoordDimensions = 4;
  2537. Value *coord[kMaxCoordDimensions];
  2538. unsigned channel;
  2539. Value *special; // For CompareValue, Bias, LOD.
  2540. // Optional.
  2541. static const unsigned kMaxOffsetDimensions = 2;
  2542. Value *offset[kMaxOffsetDimensions];
  2543. // For the overload send different offset for each sample.
  2544. // Only save 3 sampleOffsets because use offset for normal overload as first
  2545. // sample offset.
  2546. static const unsigned kSampleOffsetDimensions = 3;
  2547. Value *sampleOffsets[kSampleOffsetDimensions][kMaxOffsetDimensions];
  2548. Value *status;
  2549. bool hasSampleOffsets;
  2550. void TranslateCoord(CallInst *CI, unsigned coordIdx,
  2551. unsigned coordDimensions) {
  2552. Value *coordArg = CI->getArgOperand(coordIdx);
  2553. IRBuilder<> Builder(CI);
  2554. for (unsigned i = 0; i < coordDimensions; i++)
  2555. coord[i] = Builder.CreateExtractElement(coordArg, i);
  2556. Value *undefF = UndefValue::get(Type::getFloatTy(CI->getContext()));
  2557. for (unsigned i = coordDimensions; i < kMaxCoordDimensions; i++)
  2558. coord[i] = undefF;
  2559. }
  2560. void SetStatus(CallInst *CI, unsigned statusIdx) {
  2561. if (CI->getNumArgOperands() == (statusIdx + 1))
  2562. status = CI->getArgOperand(statusIdx);
  2563. else
  2564. status = nullptr;
  2565. }
  2566. void TranslateOffset(CallInst *CI, unsigned offsetIdx,
  2567. unsigned offsetDimensions) {
  2568. Value *undefI = UndefValue::get(Type::getInt32Ty(CI->getContext()));
  2569. if (CI->getNumArgOperands() > offsetIdx) {
  2570. Value *offsetArg = CI->getArgOperand(offsetIdx);
  2571. IRBuilder<> Builder(CI);
  2572. for (unsigned i = 0; i < offsetDimensions; i++)
  2573. offset[i] = Builder.CreateExtractElement(offsetArg, i);
  2574. for (unsigned i = offsetDimensions; i < kMaxOffsetDimensions; i++)
  2575. offset[i] = undefI;
  2576. } else {
  2577. for (unsigned i = 0; i < kMaxOffsetDimensions; i++)
  2578. offset[i] = undefI;
  2579. }
  2580. }
  2581. void TranslateSampleOffset(CallInst *CI, unsigned offsetIdx,
  2582. unsigned offsetDimensions) {
  2583. Value *undefI = UndefValue::get(Type::getInt32Ty(CI->getContext()));
  2584. if (CI->getNumArgOperands() >= (offsetIdx + kSampleOffsetDimensions)) {
  2585. hasSampleOffsets = true;
  2586. IRBuilder<> Builder(CI);
  2587. for (unsigned ch = 0; ch < kSampleOffsetDimensions; ch++) {
  2588. Value *offsetArg = CI->getArgOperand(offsetIdx + ch);
  2589. for (unsigned i = 0; i < offsetDimensions; i++)
  2590. sampleOffsets[ch][i] = Builder.CreateExtractElement(offsetArg, i);
  2591. for (unsigned i = offsetDimensions; i < kMaxOffsetDimensions; i++)
  2592. sampleOffsets[ch][i] = undefI;
  2593. }
  2594. }
  2595. }
  2596. // Update the offset args for gather with sample offset at sampleIdx.
  2597. void UpdateOffsetInGatherArgs(MutableArrayRef<Value *> gatherArgs,
  2598. unsigned sampleIdx) {
  2599. unsigned offsetBase = DXIL::OperandIndex::kTextureGatherOffset0OpIdx;
  2600. for (unsigned i = 0; i < kMaxOffsetDimensions; i++)
  2601. // -1 because offset for sample 0 is in GatherHelper::offset.
  2602. gatherArgs[offsetBase + i] = sampleOffsets[sampleIdx - 1][i];
  2603. }
  2604. };
  2605. GatherHelper::GatherHelper(
  2606. CallInst *CI, OP::OpCode op, HLObjectOperationLowerHelper *pObjHelper,
  2607. GatherHelper::GatherChannel ch)
  2608. : opcode(op), special(nullptr), hasSampleOffsets(false) {
  2609. const unsigned thisIdx =
  2610. HLOperandIndex::kHandleOpIdx; // opcode takes arg0, this pointer is arg1.
  2611. const unsigned kSamplerArgIndex = HLOperandIndex::kSampleSamplerArgIndex;
  2612. switch (ch) {
  2613. case GatherChannel::GatherAll:
  2614. channel = 0;
  2615. break;
  2616. case GatherChannel::GatherRed:
  2617. channel = 0;
  2618. break;
  2619. case GatherChannel::GatherGreen:
  2620. channel = 1;
  2621. break;
  2622. case GatherChannel::GatherBlue:
  2623. channel = 2;
  2624. break;
  2625. case GatherChannel::GatherAlpha:
  2626. channel = 3;
  2627. break;
  2628. }
  2629. IRBuilder<> Builder(CI);
  2630. texHandle = CI->getArgOperand(thisIdx);
  2631. samplerHandle = CI->getArgOperand(kSamplerArgIndex);
  2632. DXIL::ResourceKind RK = pObjHelper->GetRK(texHandle);
  2633. if (RK == DXIL::ResourceKind::Invalid) {
  2634. opcode = DXIL::OpCode::NumOpCodes;
  2635. return;
  2636. }
  2637. unsigned coordSize = DxilResource::GetNumCoords(RK);
  2638. unsigned offsetSize = DxilResource::GetNumOffsets(RK);
  2639. const unsigned kCoordArgIdx = HLOperandIndex::kSampleCoordArgIndex;
  2640. TranslateCoord(CI, kCoordArgIdx, coordSize);
  2641. switch (op) {
  2642. case OP::OpCode::TextureGather: {
  2643. TranslateOffset(CI, HLOperandIndex::kGatherOffsetArgIndex, offsetSize);
  2644. // Gather all don't have sample offset version overload.
  2645. if (ch != GatherChannel::GatherAll)
  2646. TranslateSampleOffset(CI, HLOperandIndex::kGatherSampleOffsetArgIndex,
  2647. offsetSize);
  2648. unsigned statusIdx =
  2649. hasSampleOffsets ? HLOperandIndex::kGatherStatusWithSampleOffsetArgIndex
  2650. : HLOperandIndex::kGatherStatusArgIndex;
  2651. SetStatus(CI, statusIdx);
  2652. } break;
  2653. case OP::OpCode::TextureGatherCmp: {
  2654. special = CI->getArgOperand(HLOperandIndex::kGatherCmpCmpValArgIndex);
  2655. TranslateOffset(CI, HLOperandIndex::kGatherCmpOffsetArgIndex, offsetSize);
  2656. // Gather all don't have sample offset version overload.
  2657. if (ch != GatherChannel::GatherAll)
  2658. TranslateSampleOffset(CI, HLOperandIndex::kGatherCmpSampleOffsetArgIndex,
  2659. offsetSize);
  2660. unsigned statusIdx =
  2661. hasSampleOffsets
  2662. ? HLOperandIndex::kGatherCmpStatusWithSampleOffsetArgIndex
  2663. : HLOperandIndex::kGatherCmpStatusArgIndex;
  2664. SetStatus(CI, statusIdx);
  2665. } break;
  2666. default:
  2667. DXASSERT(0, "invalid opcode for Gather");
  2668. break;
  2669. }
  2670. }
  2671. void GenerateDxilGather(CallInst *CI, Function *F,
  2672. MutableArrayRef<Value *> gatherArgs,
  2673. GatherHelper &helper, hlsl::OP *hlslOp) {
  2674. IRBuilder<> Builder(CI);
  2675. CallInst *call = Builder.CreateCall(F, gatherArgs);
  2676. if (!helper.hasSampleOffsets) {
  2677. // extract value part
  2678. Value *retVal = ScalarizeResRet(CI->getType(), call, Builder);
  2679. // Replace ret val.
  2680. CI->replaceAllUsesWith(retVal);
  2681. } else {
  2682. Value *retVal = UndefValue::get(CI->getType());
  2683. Value *elt = Builder.CreateExtractValue(call, (uint64_t)0);
  2684. retVal = Builder.CreateInsertElement(retVal, elt, (uint64_t)0);
  2685. helper.UpdateOffsetInGatherArgs(gatherArgs, /*sampleIdx*/ 1);
  2686. CallInst *callY = Builder.CreateCall(F, gatherArgs);
  2687. elt = Builder.CreateExtractValue(callY, (uint64_t)1);
  2688. retVal = Builder.CreateInsertElement(retVal, elt, 1);
  2689. helper.UpdateOffsetInGatherArgs(gatherArgs, /*sampleIdx*/ 2);
  2690. CallInst *callZ = Builder.CreateCall(F, gatherArgs);
  2691. elt = Builder.CreateExtractValue(callZ, (uint64_t)2);
  2692. retVal = Builder.CreateInsertElement(retVal, elt, 2);
  2693. helper.UpdateOffsetInGatherArgs(gatherArgs, /*sampleIdx*/ 3);
  2694. CallInst *callW = Builder.CreateCall(F, gatherArgs);
  2695. elt = Builder.CreateExtractValue(callW, (uint64_t)3);
  2696. retVal = Builder.CreateInsertElement(retVal, elt, 3);
  2697. // Replace ret val.
  2698. CI->replaceAllUsesWith(retVal);
  2699. // TODO: UpdateStatus for each gather call.
  2700. }
  2701. // Get status
  2702. if (helper.status) {
  2703. UpdateStatus(call, helper.status, Builder, hlslOp);
  2704. }
  2705. }
  2706. Value *TranslateGather(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  2707. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  2708. hlsl::OP *hlslOP = &helper.hlslOP;
  2709. GatherHelper::GatherChannel ch = GatherHelper::GatherChannel::GatherAll;
  2710. switch (IOP) {
  2711. case IntrinsicOp::MOP_Gather:
  2712. case IntrinsicOp::MOP_GatherCmp:
  2713. ch = GatherHelper::GatherChannel::GatherAll;
  2714. break;
  2715. case IntrinsicOp::MOP_GatherRed:
  2716. case IntrinsicOp::MOP_GatherCmpRed:
  2717. ch = GatherHelper::GatherChannel::GatherRed;
  2718. break;
  2719. case IntrinsicOp::MOP_GatherGreen:
  2720. case IntrinsicOp::MOP_GatherCmpGreen:
  2721. ch = GatherHelper::GatherChannel::GatherGreen;
  2722. break;
  2723. case IntrinsicOp::MOP_GatherBlue:
  2724. case IntrinsicOp::MOP_GatherCmpBlue:
  2725. ch = GatherHelper::GatherChannel::GatherBlue;
  2726. break;
  2727. case IntrinsicOp::MOP_GatherAlpha:
  2728. case IntrinsicOp::MOP_GatherCmpAlpha:
  2729. ch = GatherHelper::GatherChannel::GatherAlpha;
  2730. break;
  2731. default:
  2732. DXASSERT(0, "invalid gather intrinsic");
  2733. break;
  2734. }
  2735. GatherHelper gatherHelper(CI, opcode, pObjHelper, ch);
  2736. if (gatherHelper.opcode == DXIL::OpCode::NumOpCodes) {
  2737. Translated = false;
  2738. return nullptr;
  2739. }
  2740. Type *Ty = CI->getType();
  2741. Function *F = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
  2742. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  2743. Value *channelArg = hlslOP->GetU32Const(gatherHelper.channel);
  2744. switch (opcode) {
  2745. case OP::OpCode::TextureGather: {
  2746. Value *gatherArgs[] = {
  2747. opArg, gatherHelper.texHandle, gatherHelper.samplerHandle,
  2748. // Coord.
  2749. gatherHelper.coord[0], gatherHelper.coord[1], gatherHelper.coord[2],
  2750. gatherHelper.coord[3],
  2751. // Offset.
  2752. gatherHelper.offset[0], gatherHelper.offset[1],
  2753. // Channel.
  2754. channelArg};
  2755. GenerateDxilGather(CI, F, gatherArgs, gatherHelper, hlslOP);
  2756. } break;
  2757. case OP::OpCode::TextureGatherCmp: {
  2758. Value *gatherArgs[] = {
  2759. opArg, gatherHelper.texHandle, gatherHelper.samplerHandle,
  2760. // Coord.
  2761. gatherHelper.coord[0], gatherHelper.coord[1], gatherHelper.coord[2],
  2762. gatherHelper.coord[3],
  2763. // Offset.
  2764. gatherHelper.offset[0], gatherHelper.offset[1],
  2765. // Channel.
  2766. channelArg,
  2767. // CmpVal.
  2768. gatherHelper.special};
  2769. GenerateDxilGather(CI, F, gatherArgs, gatherHelper, hlslOP);
  2770. } break;
  2771. default:
  2772. DXASSERT(0, "invalid opcode for Gather");
  2773. break;
  2774. }
  2775. // CI is replaced in GenerateDxilGather.
  2776. return nullptr;
  2777. }
  2778. // Load/Store intrinsics.
  2779. struct ResLoadHelper {
  2780. ResLoadHelper(CallInst *CI, DxilResource::Kind RK, DxilResourceBase::Class RC,
  2781. Value *h, IntrinsicOp IOP, bool bForSubscript=false);
  2782. ResLoadHelper(CallInst *CI, DxilResource::Kind RK, DxilResourceBase::Class RC,
  2783. Value *h, Value *mip);
  2784. // For double subscript.
  2785. ResLoadHelper(Instruction *ldInst, Value *h, Value *idx, Value *mip)
  2786. : opcode(OP::OpCode::TextureLoad),
  2787. intrinsicOpCode(IntrinsicOp::Num_Intrinsics), handle(h), retVal(ldInst),
  2788. addr(idx), offset(nullptr), status(nullptr), mipLevel(mip) {}
  2789. OP::OpCode opcode;
  2790. IntrinsicOp intrinsicOpCode;
  2791. unsigned dxilMajor;
  2792. unsigned dxilMinor;
  2793. Value *handle;
  2794. Value *retVal;
  2795. Value *addr;
  2796. Value *offset;
  2797. Value *status;
  2798. Value *mipLevel;
  2799. };
  2800. ResLoadHelper::ResLoadHelper(CallInst *CI, DxilResource::Kind RK,
  2801. DxilResourceBase::Class RC, Value *hdl, IntrinsicOp IOP, bool bForSubscript)
  2802. : intrinsicOpCode(IOP), handle(hdl), offset(nullptr), status(nullptr) {
  2803. switch (RK) {
  2804. case DxilResource::Kind::RawBuffer:
  2805. case DxilResource::Kind::StructuredBuffer:
  2806. opcode = OP::OpCode::RawBufferLoad;
  2807. break;
  2808. case DxilResource::Kind::TypedBuffer:
  2809. opcode = OP::OpCode::BufferLoad;
  2810. break;
  2811. case DxilResource::Kind::Invalid:
  2812. DXASSERT(0, "invalid resource kind");
  2813. break;
  2814. default:
  2815. opcode = OP::OpCode::TextureLoad;
  2816. break;
  2817. }
  2818. retVal = CI;
  2819. const unsigned kAddrIdx = HLOperandIndex::kBufLoadAddrOpIdx;
  2820. addr = CI->getArgOperand(kAddrIdx);
  2821. unsigned argc = CI->getNumArgOperands();
  2822. if (opcode == OP::OpCode::TextureLoad) {
  2823. // mip at last channel
  2824. unsigned coordSize = DxilResource::GetNumCoords(RK);
  2825. if (RC == DxilResourceBase::Class::SRV) {
  2826. if (bForSubscript) {
  2827. // Use 0 when access by [].
  2828. mipLevel = IRBuilder<>(CI).getInt32(0);
  2829. } else {
  2830. if (coordSize == 1 && !addr->getType()->isVectorTy()) {
  2831. // Use addr when access by Load.
  2832. mipLevel = addr;
  2833. } else {
  2834. mipLevel = IRBuilder<>(CI).CreateExtractElement(addr, coordSize);
  2835. }
  2836. }
  2837. } else {
  2838. // Set mip level to undef for UAV.
  2839. mipLevel = UndefValue::get(Type::getInt32Ty(addr->getContext()));
  2840. }
  2841. if (RC == DxilResourceBase::Class::SRV) {
  2842. unsigned offsetIdx = HLOperandIndex::kTexLoadOffsetOpIdx;
  2843. unsigned statusIdx = HLOperandIndex::kTexLoadStatusOpIdx;
  2844. if (RK == DxilResource::Kind::Texture2DMS ||
  2845. RK == DxilResource::Kind::Texture2DMSArray) {
  2846. offsetIdx = HLOperandIndex::kTex2DMSLoadOffsetOpIdx;
  2847. statusIdx = HLOperandIndex::kTex2DMSLoadStatusOpIdx;
  2848. mipLevel =
  2849. CI->getArgOperand(HLOperandIndex::kTex2DMSLoadSampleIdxOpIdx);
  2850. }
  2851. if (argc > offsetIdx)
  2852. offset = CI->getArgOperand(offsetIdx);
  2853. if (argc > statusIdx)
  2854. status = CI->getArgOperand(statusIdx);
  2855. } else {
  2856. const unsigned kStatusIdx = HLOperandIndex::kRWTexLoadStatusOpIdx;
  2857. if (argc > kStatusIdx)
  2858. status = CI->getArgOperand(kStatusIdx);
  2859. }
  2860. } else {
  2861. const unsigned kStatusIdx = HLOperandIndex::kBufLoadStatusOpIdx;
  2862. if (argc > kStatusIdx)
  2863. status = CI->getArgOperand(kStatusIdx);
  2864. }
  2865. }
  2866. ResLoadHelper::ResLoadHelper(CallInst *CI, DxilResource::Kind RK,
  2867. DxilResourceBase::Class RC, Value *hdl, Value *mip)
  2868. : handle(hdl), offset(nullptr), status(nullptr) {
  2869. DXASSERT(RK != DxilResource::Kind::RawBuffer &&
  2870. RK != DxilResource::Kind::TypedBuffer &&
  2871. RK != DxilResource::Kind::Invalid,
  2872. "invalid resource kind");
  2873. opcode = OP::OpCode::TextureLoad;
  2874. retVal = CI;
  2875. mipLevel = mip;
  2876. const unsigned kAddrIdx = HLOperandIndex::kMipLoadAddrOpIdx;
  2877. addr = CI->getArgOperand(kAddrIdx);
  2878. unsigned argc = CI->getNumArgOperands();
  2879. const unsigned kOffsetIdx = HLOperandIndex::kMipLoadOffsetOpIdx;
  2880. const unsigned kStatusIdx = HLOperandIndex::kMipLoadStatusOpIdx;
  2881. if (argc > kOffsetIdx)
  2882. offset = CI->getArgOperand(kOffsetIdx);
  2883. if (argc > kStatusIdx)
  2884. status = CI->getArgOperand(kStatusIdx);
  2885. }
  2886. void TranslateStructBufSubscript(CallInst *CI, Value *handle, Value *status,
  2887. hlsl::OP *OP, const DataLayout &DL);
  2888. // Create { v0, v1 } from { v0.lo, v0.hi, v1.lo, v1.hi }
  2889. void Make64bitResultForLoad(Type *EltTy, ArrayRef<Value *> resultElts32,
  2890. unsigned size, MutableArrayRef<Value *> resultElts,
  2891. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  2892. Type *i64Ty = Builder.getInt64Ty();
  2893. Type *doubleTy = Builder.getDoubleTy();
  2894. if (EltTy == doubleTy) {
  2895. Function *makeDouble =
  2896. hlslOP->GetOpFunc(DXIL::OpCode::MakeDouble, doubleTy);
  2897. Value *makeDoubleOpArg =
  2898. Builder.getInt32((unsigned)DXIL::OpCode::MakeDouble);
  2899. for (unsigned i = 0; i < size; i++) {
  2900. Value *lo = resultElts32[2 * i];
  2901. Value *hi = resultElts32[2 * i + 1];
  2902. Value *V = Builder.CreateCall(makeDouble, {makeDoubleOpArg, lo, hi});
  2903. resultElts[i] = V;
  2904. }
  2905. } else {
  2906. for (unsigned i = 0; i < size; i++) {
  2907. Value *lo = resultElts32[2 * i];
  2908. Value *hi = resultElts32[2 * i + 1];
  2909. lo = Builder.CreateZExt(lo, i64Ty);
  2910. hi = Builder.CreateZExt(hi, i64Ty);
  2911. hi = Builder.CreateShl(hi, 32);
  2912. resultElts[i] = Builder.CreateOr(lo, hi);
  2913. }
  2914. }
  2915. }
  2916. static Constant *GetRawBufferMaskForETy(Type *Ty, unsigned NumComponents, hlsl::OP *OP) {
  2917. unsigned mask = 0;
  2918. switch (NumComponents) {
  2919. case 0:
  2920. break;
  2921. case 1:
  2922. mask = DXIL::kCompMask_X;
  2923. break;
  2924. case 2:
  2925. mask = DXIL::kCompMask_X | DXIL::kCompMask_Y;
  2926. break;
  2927. case 3:
  2928. mask = DXIL::kCompMask_X | DXIL::kCompMask_Y | DXIL::kCompMask_Z;
  2929. break;
  2930. case 4:
  2931. mask = DXIL::kCompMask_All;
  2932. break;
  2933. default:
  2934. DXASSERT(false, "Cannot load more than 2 components for 64bit types.");
  2935. }
  2936. return OP->GetI8Const(mask);
  2937. }
  2938. void GenerateStructBufLd(Value *handle, Value *bufIdx, Value *offset,
  2939. Value *status, Type *EltTy,
  2940. MutableArrayRef<Value *> resultElts, hlsl::OP *OP,
  2941. IRBuilder<> &Builder, unsigned NumComponents, Constant *alignment);
  2942. void TranslateLoad(ResLoadHelper &helper, HLResource::Kind RK,
  2943. IRBuilder<> &Builder, hlsl::OP *OP, const DataLayout &DL) {
  2944. Type *Ty = helper.retVal->getType();
  2945. if (Ty->isPointerTy()) {
  2946. DXASSERT(!DxilResource::IsAnyTexture(RK), "Textures should not be treated as structured buffers.");
  2947. TranslateStructBufSubscript(cast<CallInst>(helper.retVal), helper.handle,
  2948. helper.status, OP, DL);
  2949. return;
  2950. }
  2951. OP::OpCode opcode = helper.opcode;
  2952. Type *i32Ty = Builder.getInt32Ty();
  2953. Type *i64Ty = Builder.getInt64Ty();
  2954. Type *doubleTy = Builder.getDoubleTy();
  2955. Type *EltTy = Ty->getScalarType();
  2956. Constant *Alignment = OP->GetI32Const(OP->GetAllocSizeForType(EltTy));
  2957. unsigned numComponents = 1;
  2958. if (Ty->isVectorTy()) {
  2959. numComponents = Ty->getVectorNumElements();
  2960. }
  2961. if (RK == HLResource::Kind::StructuredBuffer) {
  2962. // Basic type case for StructuredBuffer::Load()
  2963. Value *ResultElts[4];
  2964. GenerateStructBufLd(helper.handle, helper.addr, OP->GetU32Const(0),
  2965. helper.status, EltTy, ResultElts, OP, Builder, numComponents, Alignment);
  2966. Value *retValNew = ScalarizeElements(Ty, ResultElts, Builder);
  2967. helper.retVal->replaceAllUsesWith(retValNew);
  2968. helper.retVal = retValNew;
  2969. return;
  2970. }
  2971. bool isTyped = opcode == OP::OpCode::TextureLoad ||
  2972. RK == DxilResource::Kind::TypedBuffer;
  2973. bool is64 = EltTy == i64Ty || EltTy == doubleTy;
  2974. if (is64 && isTyped) {
  2975. EltTy = i32Ty;
  2976. }
  2977. Function *F = OP->GetOpFunc(opcode, EltTy);
  2978. llvm::Constant *opArg = OP->GetU32Const((unsigned)opcode);
  2979. llvm::Value *undefI = llvm::UndefValue::get(i32Ty);
  2980. SmallVector<Value *, 12> loadArgs;
  2981. loadArgs.emplace_back(opArg); // opcode
  2982. loadArgs.emplace_back(helper.handle); // resource handle
  2983. if (opcode == OP::OpCode::TextureLoad) {
  2984. // set mip level
  2985. loadArgs.emplace_back(helper.mipLevel);
  2986. }
  2987. if (opcode == OP::OpCode::TextureLoad) {
  2988. // texture coord
  2989. unsigned coordSize = DxilResource::GetNumCoords(RK);
  2990. bool isVectorAddr = helper.addr->getType()->isVectorTy();
  2991. for (unsigned i = 0; i < 3; i++) {
  2992. if (i < coordSize) {
  2993. loadArgs.emplace_back(
  2994. isVectorAddr ? Builder.CreateExtractElement(helper.addr, i) : helper.addr);
  2995. }
  2996. else
  2997. loadArgs.emplace_back(undefI);
  2998. }
  2999. } else {
  3000. if (helper.addr->getType()->isVectorTy()) {
  3001. Value *scalarOffset =
  3002. Builder.CreateExtractElement(helper.addr, (uint64_t)0);
  3003. // TODO: calculate the real address based on opcode
  3004. loadArgs.emplace_back(scalarOffset); // offset
  3005. } else {
  3006. // TODO: calculate the real address based on opcode
  3007. loadArgs.emplace_back(helper.addr); // offset
  3008. }
  3009. }
  3010. // offset 0
  3011. if (opcode == OP::OpCode::TextureLoad) {
  3012. if (helper.offset && !isa<llvm::UndefValue>(helper.offset)) {
  3013. unsigned offsetSize = DxilResource::GetNumOffsets(RK);
  3014. for (unsigned i = 0; i < 3; i++) {
  3015. if (i < offsetSize)
  3016. loadArgs.emplace_back(Builder.CreateExtractElement(helper.offset, i));
  3017. else
  3018. loadArgs.emplace_back(undefI);
  3019. }
  3020. } else {
  3021. loadArgs.emplace_back(undefI);
  3022. loadArgs.emplace_back(undefI);
  3023. loadArgs.emplace_back(undefI);
  3024. }
  3025. }
  3026. // Offset 1
  3027. if (RK == DxilResource::Kind::RawBuffer) {
  3028. // elementOffset, mask, alignment
  3029. loadArgs.emplace_back(undefI);
  3030. Type *rtnTy = helper.retVal->getType();
  3031. loadArgs.emplace_back(GetRawBufferMaskForETy(rtnTy, numComponents, OP));
  3032. loadArgs.emplace_back(Alignment);
  3033. }
  3034. else if (RK == DxilResource::Kind::TypedBuffer) {
  3035. loadArgs.emplace_back(undefI);
  3036. }
  3037. Value *ResRet =
  3038. Builder.CreateCall(F, loadArgs, OP->GetOpCodeName(opcode));
  3039. Value *retValNew = nullptr;
  3040. if (!is64 || !isTyped) {
  3041. retValNew = ScalarizeResRet(Ty, ResRet, Builder);
  3042. } else {
  3043. unsigned size = numComponents;
  3044. DXASSERT(size <= 2, "typed buffer only allow 4 dwords");
  3045. EltTy = Ty->getScalarType();
  3046. Value *Elts[2];
  3047. Make64bitResultForLoad(Ty->getScalarType(),
  3048. {
  3049. Builder.CreateExtractValue(ResRet, 0),
  3050. Builder.CreateExtractValue(ResRet, 1),
  3051. Builder.CreateExtractValue(ResRet, 2),
  3052. Builder.CreateExtractValue(ResRet, 3),
  3053. },
  3054. size, Elts, OP, Builder);
  3055. retValNew = ScalarizeElements(Ty, Elts, Builder);
  3056. }
  3057. // replace
  3058. helper.retVal->replaceAllUsesWith(retValNew);
  3059. // Save new ret val.
  3060. helper.retVal = retValNew;
  3061. // get status
  3062. UpdateStatus(ResRet, helper.status, Builder, OP);
  3063. }
  3064. Value *TranslateResourceLoad(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  3065. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3066. hlsl::OP *hlslOP = &helper.hlslOP;
  3067. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  3068. IRBuilder<> Builder(CI);
  3069. DXIL::ResourceClass RC = pObjHelper->GetRC(handle);
  3070. DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
  3071. ResLoadHelper loadHelper(CI, RK, RC, handle, IOP);
  3072. TranslateLoad(loadHelper, RK, Builder, hlslOP, helper.dataLayout);
  3073. // CI is replaced in TranslateLoad.
  3074. return nullptr;
  3075. }
  3076. // Split { v0, v1 } to { v0.lo, v0.hi, v1.lo, v1.hi }
  3077. void Split64bitValForStore(Type *EltTy, ArrayRef<Value *> vals, unsigned size,
  3078. MutableArrayRef<Value *> vals32, hlsl::OP *hlslOP,
  3079. IRBuilder<> &Builder) {
  3080. Type *i32Ty = Builder.getInt32Ty();
  3081. Type *doubleTy = Builder.getDoubleTy();
  3082. Value *undefI32 = UndefValue::get(i32Ty);
  3083. if (EltTy == doubleTy) {
  3084. Function *dToU = hlslOP->GetOpFunc(DXIL::OpCode::SplitDouble, doubleTy);
  3085. Value *dToUOpArg = Builder.getInt32((unsigned)DXIL::OpCode::SplitDouble);
  3086. for (unsigned i = 0; i < size; i++) {
  3087. if (isa<UndefValue>(vals[i])) {
  3088. vals32[2 * i] = undefI32;
  3089. vals32[2 * i + 1] = undefI32;
  3090. } else {
  3091. Value *retVal = Builder.CreateCall(dToU, {dToUOpArg, vals[i]});
  3092. Value *lo = Builder.CreateExtractValue(retVal, 0);
  3093. Value *hi = Builder.CreateExtractValue(retVal, 1);
  3094. vals32[2 * i] = lo;
  3095. vals32[2 * i + 1] = hi;
  3096. }
  3097. }
  3098. } else {
  3099. for (unsigned i = 0; i < size; i++) {
  3100. if (isa<UndefValue>(vals[i])) {
  3101. vals32[2 * i] = undefI32;
  3102. vals32[2 * i + 1] = undefI32;
  3103. } else {
  3104. Value *lo = Builder.CreateTrunc(vals[i], i32Ty);
  3105. Value *hi = Builder.CreateLShr(vals[i], 32);
  3106. hi = Builder.CreateTrunc(hi, i32Ty);
  3107. vals32[2 * i] = lo;
  3108. vals32[2 * i + 1] = hi;
  3109. }
  3110. }
  3111. }
  3112. }
  3113. void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
  3114. Value *offset, IRBuilder<> &Builder, hlsl::OP *OP) {
  3115. Type *Ty = val->getType();
  3116. OP::OpCode opcode = OP::OpCode::NumOpCodes;
  3117. switch (RK) {
  3118. case DxilResource::Kind::RawBuffer:
  3119. case DxilResource::Kind::StructuredBuffer:
  3120. opcode = OP::OpCode::RawBufferStore;
  3121. break;
  3122. case DxilResource::Kind::TypedBuffer:
  3123. opcode = OP::OpCode::BufferStore;
  3124. break;
  3125. case DxilResource::Kind::Invalid:
  3126. DXASSERT(0, "invalid resource kind");
  3127. break;
  3128. default:
  3129. opcode = OP::OpCode::TextureStore;
  3130. break;
  3131. }
  3132. bool isTyped = opcode == OP::OpCode::TextureStore ||
  3133. RK == DxilResource::Kind::TypedBuffer;
  3134. Type *i32Ty = Builder.getInt32Ty();
  3135. Type *i64Ty = Builder.getInt64Ty();
  3136. Type *doubleTy = Builder.getDoubleTy();
  3137. Type *EltTy = Ty->getScalarType();
  3138. Constant *Alignment = OP->GetI32Const(OP->GetAllocSizeForType(EltTy));
  3139. bool is64 = EltTy == i64Ty || EltTy == doubleTy;
  3140. if (is64 && isTyped) {
  3141. EltTy = i32Ty;
  3142. }
  3143. Function *F = OP->GetOpFunc(opcode, EltTy);
  3144. llvm::Constant *opArg = OP->GetU32Const((unsigned)opcode);
  3145. llvm::Value *undefI =
  3146. llvm::UndefValue::get(llvm::Type::getInt32Ty(Ty->getContext()));
  3147. llvm::Value *undefVal = llvm::UndefValue::get(Ty->getScalarType());
  3148. SmallVector<Value *, 13> storeArgs;
  3149. storeArgs.emplace_back(opArg); // opcode
  3150. storeArgs.emplace_back(handle); // resource handle
  3151. if (RK == DxilResource::Kind::RawBuffer ||
  3152. RK == DxilResource::Kind::TypedBuffer) {
  3153. // Offset 0
  3154. if (offset->getType()->isVectorTy()) {
  3155. Value *scalarOffset = Builder.CreateExtractElement(offset, (uint64_t)0);
  3156. storeArgs.emplace_back(scalarOffset); // offset
  3157. } else {
  3158. storeArgs.emplace_back(offset); // offset
  3159. }
  3160. // Offset 1
  3161. storeArgs.emplace_back(undefI);
  3162. } else {
  3163. // texture store
  3164. unsigned coordSize = DxilResource::GetNumCoords(RK);
  3165. // Set x first.
  3166. if (offset->getType()->isVectorTy())
  3167. storeArgs.emplace_back(Builder.CreateExtractElement(offset, (uint64_t)0));
  3168. else
  3169. storeArgs.emplace_back(offset);
  3170. for (unsigned i = 1; i < 3; i++) {
  3171. if (i < coordSize)
  3172. storeArgs.emplace_back(Builder.CreateExtractElement(offset, i));
  3173. else
  3174. storeArgs.emplace_back(undefI);
  3175. }
  3176. // TODO: support mip for texture ST
  3177. }
  3178. // values
  3179. uint8_t mask = 0;
  3180. if (Ty->isVectorTy()) {
  3181. unsigned vecSize = Ty->getVectorNumElements();
  3182. Value *emptyVal = undefVal;
  3183. if (isTyped) {
  3184. mask = DXIL::kCompMask_All;
  3185. emptyVal = Builder.CreateExtractElement(val, (uint64_t)0);
  3186. }
  3187. for (unsigned i = 0; i < 4; i++) {
  3188. if (i < vecSize) {
  3189. storeArgs.emplace_back(Builder.CreateExtractElement(val, i));
  3190. mask |= (1<<i);
  3191. } else {
  3192. storeArgs.emplace_back(emptyVal);
  3193. }
  3194. }
  3195. } else {
  3196. if (isTyped) {
  3197. mask = DXIL::kCompMask_All;
  3198. storeArgs.emplace_back(val);
  3199. storeArgs.emplace_back(val);
  3200. storeArgs.emplace_back(val);
  3201. storeArgs.emplace_back(val);
  3202. } else {
  3203. storeArgs.emplace_back(val);
  3204. storeArgs.emplace_back(undefVal);
  3205. storeArgs.emplace_back(undefVal);
  3206. storeArgs.emplace_back(undefVal);
  3207. mask = DXIL::kCompMask_X;
  3208. }
  3209. }
  3210. if (is64 && isTyped) {
  3211. unsigned size = 1;
  3212. if (Ty->isVectorTy()) {
  3213. size = Ty->getVectorNumElements();
  3214. }
  3215. DXASSERT(size <= 2, "raw/typed buffer only allow 4 dwords");
  3216. unsigned val0OpIdx = opcode == DXIL::OpCode::TextureStore
  3217. ? DXIL::OperandIndex::kTextureStoreVal0OpIdx
  3218. : DXIL::OperandIndex::kBufferStoreVal0OpIdx;
  3219. Value *V0 = storeArgs[val0OpIdx];
  3220. Value *V1 = storeArgs[val0OpIdx+1];
  3221. Value *vals32[4];
  3222. EltTy = Ty->getScalarType();
  3223. Split64bitValForStore(EltTy, {V0, V1}, size, vals32, OP, Builder);
  3224. // Fill the uninit vals.
  3225. if (size == 1) {
  3226. vals32[2] = vals32[0];
  3227. vals32[3] = vals32[1];
  3228. }
  3229. // Change valOp to 32 version.
  3230. for (unsigned i = 0; i < 4; i++) {
  3231. storeArgs[val0OpIdx + i] = vals32[i];
  3232. }
  3233. // change mask for double
  3234. if (opcode == DXIL::OpCode::RawBufferStore) {
  3235. mask = size == 1 ?
  3236. DXIL::kCompMask_X | DXIL::kCompMask_Y : DXIL::kCompMask_All;
  3237. }
  3238. }
  3239. storeArgs.emplace_back(OP->GetU8Const(mask)); // mask
  3240. if (opcode == DXIL::OpCode::RawBufferStore)
  3241. storeArgs.emplace_back(Alignment); // alignment only for raw buffer
  3242. Builder.CreateCall(F, storeArgs);
  3243. }
  3244. Value *TranslateResourceStore(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  3245. HLOperationLowerHelper &helper,
  3246. HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3247. hlsl::OP *hlslOP = &helper.hlslOP;
  3248. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  3249. IRBuilder<> Builder(CI);
  3250. DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
  3251. Value *val = CI->getArgOperand(HLOperandIndex::kStoreValOpIdx);
  3252. Value *offset = CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx);
  3253. TranslateStore(RK, handle, val, offset, Builder, hlslOP);
  3254. return nullptr;
  3255. }
  3256. }
  3257. // Atomic intrinsics.
  3258. namespace {
  3259. // Atomic intrinsics.
  3260. struct AtomicHelper {
  3261. AtomicHelper(CallInst *CI, OP::OpCode op, Value *h);
  3262. AtomicHelper(CallInst *CI, OP::OpCode op, Value *h, Value *bufIdx,
  3263. Value *baseOffset);
  3264. OP::OpCode opcode;
  3265. Value *handle;
  3266. Value *addr;
  3267. Value *offset; // Offset for structrued buffer.
  3268. Value *value;
  3269. Value *originalValue;
  3270. Value *compareValue;
  3271. };
  3272. // For MOP version of Interlocked*.
  3273. AtomicHelper::AtomicHelper(CallInst *CI, OP::OpCode op, Value *h)
  3274. : opcode(op), handle(h), offset(nullptr), originalValue(nullptr) {
  3275. addr = CI->getArgOperand(HLOperandIndex::kObjectInterlockedDestOpIndex);
  3276. if (op == OP::OpCode::AtomicCompareExchange) {
  3277. compareValue = CI->getArgOperand(
  3278. HLOperandIndex::kObjectInterlockedCmpCompareValueOpIndex);
  3279. value =
  3280. CI->getArgOperand(HLOperandIndex::kObjectInterlockedCmpValueOpIndex);
  3281. if (CI->getNumArgOperands() ==
  3282. (HLOperandIndex::kObjectInterlockedCmpOriginalValueOpIndex + 1))
  3283. originalValue = CI->getArgOperand(
  3284. HLOperandIndex::kObjectInterlockedCmpOriginalValueOpIndex);
  3285. } else {
  3286. value = CI->getArgOperand(HLOperandIndex::kObjectInterlockedValueOpIndex);
  3287. if (CI->getNumArgOperands() ==
  3288. (HLOperandIndex::kObjectInterlockedOriginalValueOpIndex + 1))
  3289. originalValue = CI->getArgOperand(
  3290. HLOperandIndex::kObjectInterlockedOriginalValueOpIndex);
  3291. }
  3292. }
  3293. // For IOP version of Interlocked*.
  3294. AtomicHelper::AtomicHelper(CallInst *CI, OP::OpCode op, Value *h, Value *bufIdx,
  3295. Value *baseOffset)
  3296. : opcode(op), handle(h), addr(bufIdx),
  3297. offset(baseOffset), originalValue(nullptr) {
  3298. if (op == OP::OpCode::AtomicCompareExchange) {
  3299. compareValue =
  3300. CI->getArgOperand(HLOperandIndex::kInterlockedCmpCompareValueOpIndex);
  3301. value = CI->getArgOperand(HLOperandIndex::kInterlockedCmpValueOpIndex);
  3302. if (CI->getNumArgOperands() ==
  3303. (HLOperandIndex::kInterlockedCmpOriginalValueOpIndex + 1))
  3304. originalValue = CI->getArgOperand(
  3305. HLOperandIndex::kInterlockedCmpOriginalValueOpIndex);
  3306. } else {
  3307. value = CI->getArgOperand(HLOperandIndex::kInterlockedValueOpIndex);
  3308. if (CI->getNumArgOperands() ==
  3309. (HLOperandIndex::kInterlockedOriginalValueOpIndex + 1))
  3310. originalValue =
  3311. CI->getArgOperand(HLOperandIndex::kInterlockedOriginalValueOpIndex);
  3312. }
  3313. }
  3314. void TranslateAtomicBinaryOperation(AtomicHelper &helper,
  3315. DXIL::AtomicBinOpCode atomicOp,
  3316. IRBuilder<> &Builder, hlsl::OP *hlslOP) {
  3317. Value *handle = helper.handle;
  3318. Value *addr = helper.addr;
  3319. Value *val = helper.value;
  3320. Type *Ty = val->getType();
  3321. Value *undefI = UndefValue::get(Type::getInt32Ty(Ty->getContext()));
  3322. Function *dxilAtomic = hlslOP->GetOpFunc(helper.opcode, Ty->getScalarType());
  3323. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(helper.opcode));
  3324. Value *atomicOpArg = hlslOP->GetU32Const(static_cast<unsigned>(atomicOp));
  3325. Value *args[] = {opArg, handle, atomicOpArg,
  3326. undefI, undefI, undefI, // coordinates
  3327. val};
  3328. // Setup coordinates.
  3329. if (addr->getType()->isVectorTy()) {
  3330. unsigned vectorNumElements = addr->getType()->getVectorNumElements();
  3331. DXASSERT(vectorNumElements <= 3, "up to 3 elements for atomic binary op");
  3332. _Analysis_assume_(vectorNumElements <= 3);
  3333. for (unsigned i = 0; i < vectorNumElements; i++) {
  3334. Value *Elt = Builder.CreateExtractElement(addr, i);
  3335. args[DXIL::OperandIndex::kAtomicBinOpCoord0OpIdx + i] = Elt;
  3336. }
  3337. } else
  3338. args[DXIL::OperandIndex::kAtomicBinOpCoord0OpIdx] = addr;
  3339. // Set offset for structured buffer.
  3340. if (helper.offset)
  3341. args[DXIL::OperandIndex::kAtomicBinOpCoord1OpIdx] = helper.offset;
  3342. Value *origVal =
  3343. Builder.CreateCall(dxilAtomic, args, hlslOP->GetAtomicOpName(atomicOp));
  3344. if (helper.originalValue) {
  3345. Builder.CreateStore(origVal, helper.originalValue);
  3346. }
  3347. }
  3348. Value *TranslateMopAtomicBinaryOperation(CallInst *CI, IntrinsicOp IOP,
  3349. OP::OpCode opcode,
  3350. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3351. hlsl::OP *hlslOP = &helper.hlslOP;
  3352. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  3353. IRBuilder<> Builder(CI);
  3354. switch (IOP) {
  3355. case IntrinsicOp::MOP_InterlockedAdd: {
  3356. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3357. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Add, Builder,
  3358. hlslOP);
  3359. } break;
  3360. case IntrinsicOp::MOP_InterlockedAnd: {
  3361. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3362. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::And, Builder,
  3363. hlslOP);
  3364. } break;
  3365. case IntrinsicOp::MOP_InterlockedExchange: {
  3366. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3367. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Exchange,
  3368. Builder, hlslOP);
  3369. } break;
  3370. case IntrinsicOp::MOP_InterlockedMax: {
  3371. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3372. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMax, Builder,
  3373. hlslOP);
  3374. } break;
  3375. case IntrinsicOp::MOP_InterlockedMin: {
  3376. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3377. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMin, Builder,
  3378. hlslOP);
  3379. } break;
  3380. case IntrinsicOp::MOP_InterlockedUMax: {
  3381. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3382. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMax, Builder,
  3383. hlslOP);
  3384. } break;
  3385. case IntrinsicOp::MOP_InterlockedUMin: {
  3386. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3387. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMin, Builder,
  3388. hlslOP);
  3389. } break;
  3390. case IntrinsicOp::MOP_InterlockedOr: {
  3391. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3392. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Or, Builder,
  3393. hlslOP);
  3394. } break;
  3395. case IntrinsicOp::MOP_InterlockedXor: {
  3396. default:
  3397. DXASSERT(IOP == IntrinsicOp::MOP_InterlockedXor,
  3398. "invalid MOP atomic intrinsic");
  3399. AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
  3400. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Xor, Builder,
  3401. hlslOP);
  3402. } break;
  3403. }
  3404. return nullptr;
  3405. }
  3406. void TranslateAtomicCmpXChg(AtomicHelper &helper, IRBuilder<> &Builder,
  3407. hlsl::OP *hlslOP) {
  3408. Value *handle = helper.handle;
  3409. Value *addr = helper.addr;
  3410. Value *val = helper.value;
  3411. Value *cmpVal = helper.compareValue;
  3412. Type *Ty = val->getType();
  3413. Value *undefI = UndefValue::get(Type::getInt32Ty(Ty->getContext()));
  3414. Function *dxilAtomic = hlslOP->GetOpFunc(helper.opcode, Ty->getScalarType());
  3415. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(helper.opcode));
  3416. Value *args[] = {opArg, handle, undefI, undefI, undefI, // coordinates
  3417. cmpVal, val};
  3418. // Setup coordinates.
  3419. if (addr->getType()->isVectorTy()) {
  3420. unsigned vectorNumElements = addr->getType()->getVectorNumElements();
  3421. DXASSERT(vectorNumElements <= 3, "up to 3 elements in atomic op");
  3422. _Analysis_assume_(vectorNumElements <= 3);
  3423. for (unsigned i = 0; i < vectorNumElements; i++) {
  3424. Value *Elt = Builder.CreateExtractElement(addr, i);
  3425. args[DXIL::OperandIndex::kAtomicCmpExchangeCoord0OpIdx + i] = Elt;
  3426. }
  3427. } else
  3428. args[DXIL::OperandIndex::kAtomicCmpExchangeCoord0OpIdx] = addr;
  3429. // Set offset for structured buffer.
  3430. if (helper.offset)
  3431. args[DXIL::OperandIndex::kAtomicCmpExchangeCoord1OpIdx] = helper.offset;
  3432. Value *origVal = Builder.CreateCall(dxilAtomic, args);
  3433. if (helper.originalValue) {
  3434. Builder.CreateStore(origVal, helper.originalValue);
  3435. }
  3436. }
  3437. Value *TranslateMopAtomicCmpXChg(CallInst *CI, IntrinsicOp IOP,
  3438. OP::OpCode opcode,
  3439. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3440. hlsl::OP *hlslOP = &helper.hlslOP;
  3441. Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
  3442. IRBuilder<> Builder(CI);
  3443. AtomicHelper atomicHelper(CI, OP::OpCode::AtomicCompareExchange, handle);
  3444. TranslateAtomicCmpXChg(atomicHelper, Builder, hlslOP);
  3445. return nullptr;
  3446. }
  3447. void TranslateSharedMemAtomicBinOp(CallInst *CI, IntrinsicOp IOP, Value *addr) {
  3448. AtomicRMWInst::BinOp Op;
  3449. switch (IOP) {
  3450. case IntrinsicOp::IOP_InterlockedAdd:
  3451. Op = AtomicRMWInst::BinOp::Add;
  3452. break;
  3453. case IntrinsicOp::IOP_InterlockedAnd:
  3454. Op = AtomicRMWInst::BinOp::And;
  3455. break;
  3456. case IntrinsicOp::IOP_InterlockedExchange:
  3457. Op = AtomicRMWInst::BinOp::Xchg;
  3458. break;
  3459. case IntrinsicOp::IOP_InterlockedMax:
  3460. Op = AtomicRMWInst::BinOp::Max;
  3461. break;
  3462. case IntrinsicOp::IOP_InterlockedUMax:
  3463. Op = AtomicRMWInst::BinOp::UMax;
  3464. break;
  3465. case IntrinsicOp::IOP_InterlockedMin:
  3466. Op = AtomicRMWInst::BinOp::Min;
  3467. break;
  3468. case IntrinsicOp::IOP_InterlockedUMin:
  3469. Op = AtomicRMWInst::BinOp::UMin;
  3470. break;
  3471. case IntrinsicOp::IOP_InterlockedOr:
  3472. Op = AtomicRMWInst::BinOp::Or;
  3473. break;
  3474. case IntrinsicOp::IOP_InterlockedXor:
  3475. default:
  3476. DXASSERT(IOP == IntrinsicOp::IOP_InterlockedXor, "Invalid Intrinsic");
  3477. Op = AtomicRMWInst::BinOp::Xor;
  3478. break;
  3479. }
  3480. Value *val = CI->getArgOperand(HLOperandIndex::kInterlockedValueOpIndex);
  3481. IRBuilder<> Builder(CI);
  3482. Value *Result = Builder.CreateAtomicRMW(
  3483. Op, addr, val, AtomicOrdering::SequentiallyConsistent);
  3484. if (CI->getNumArgOperands() >
  3485. HLOperandIndex::kInterlockedOriginalValueOpIndex)
  3486. Builder.CreateStore(
  3487. Result,
  3488. CI->getArgOperand(HLOperandIndex::kInterlockedOriginalValueOpIndex));
  3489. }
  3490. Value *TranslateIopAtomicBinaryOperation(CallInst *CI, IntrinsicOp IOP,
  3491. DXIL::OpCode opcode,
  3492. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3493. Value *addr = CI->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex);
  3494. // Get the original addr from cast.
  3495. if (CastInst *castInst = dyn_cast<CastInst>(addr))
  3496. addr = castInst->getOperand(0);
  3497. else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(addr)) {
  3498. if (CE->getOpcode() == Instruction::AddrSpaceCast) {
  3499. addr = CE->getOperand(0);
  3500. }
  3501. }
  3502. unsigned addressSpace = addr->getType()->getPointerAddressSpace();
  3503. if (addressSpace == DXIL::kTGSMAddrSpace)
  3504. TranslateSharedMemAtomicBinOp(CI, IOP, addr);
  3505. else {
  3506. // buffer atomic translated in TranslateSubscript.
  3507. // Do nothing here.
  3508. // Mark not translated.
  3509. Translated = false;
  3510. }
  3511. return nullptr;
  3512. }
  3513. void TranslateSharedMemAtomicCmpXChg(CallInst *CI, Value *addr) {
  3514. Value *val = CI->getArgOperand(HLOperandIndex::kInterlockedCmpValueOpIndex);
  3515. Value *cmpVal =
  3516. CI->getArgOperand(HLOperandIndex::kInterlockedCmpCompareValueOpIndex);
  3517. IRBuilder<> Builder(CI);
  3518. Value *Result = Builder.CreateAtomicCmpXchg(
  3519. addr, cmpVal, val, AtomicOrdering::SequentiallyConsistent,
  3520. AtomicOrdering::SequentiallyConsistent);
  3521. if (CI->getNumArgOperands() >
  3522. HLOperandIndex::kInterlockedCmpOriginalValueOpIndex) {
  3523. Value *originVal = Builder.CreateExtractValue(Result, 0);
  3524. Builder.CreateStore(
  3525. originVal,
  3526. CI->getArgOperand(HLOperandIndex::kInterlockedCmpOriginalValueOpIndex));
  3527. }
  3528. }
  3529. Value *TranslateIopAtomicCmpXChg(CallInst *CI, IntrinsicOp IOP,
  3530. DXIL::OpCode opcode,
  3531. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3532. Value *addr = CI->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex);
  3533. // Get the original addr from cast.
  3534. if (CastInst *castInst = dyn_cast<CastInst>(addr))
  3535. addr = castInst->getOperand(0);
  3536. unsigned addressSpace = addr->getType()->getPointerAddressSpace();
  3537. if (addressSpace == DXIL::kTGSMAddrSpace)
  3538. TranslateSharedMemAtomicCmpXChg(CI, addr);
  3539. else {
  3540. // buffer atomic translated in TranslateSubscript.
  3541. // Do nothing here.
  3542. // Mark not translated.
  3543. Translated = false;
  3544. }
  3545. return nullptr;
  3546. }
  3547. }
  3548. // Process Tess Factor.
  3549. namespace {
  3550. // Clamp to [0.0f..1.0f], NaN->0.0f.
  3551. Value *CleanupTessFactorScale(Value *input, hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  3552. float fMin = 0;
  3553. float fMax = 1;
  3554. Type *f32Ty = input->getType()->getScalarType();
  3555. Value *minFactor = ConstantFP::get(f32Ty, fMin);
  3556. Value *maxFactor = ConstantFP::get(f32Ty, fMax);
  3557. Type *Ty = input->getType();
  3558. if (Ty->isVectorTy())
  3559. minFactor = SplatToVector(minFactor, input->getType(), Builder);
  3560. Value *temp = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, input, minFactor, hlslOP, Builder);
  3561. if (Ty->isVectorTy())
  3562. maxFactor = SplatToVector(maxFactor, input->getType(), Builder);
  3563. return TrivialDxilBinaryOperation(DXIL::OpCode::FMin, temp, maxFactor, hlslOP, Builder);
  3564. }
  3565. // Clamp to [1.0f..Inf], NaN->1.0f.
  3566. Value *CleanupTessFactor(Value *input, hlsl::OP *hlslOP, IRBuilder<> &Builder)
  3567. {
  3568. float fMin = 1.0;
  3569. Type *f32Ty = input->getType()->getScalarType();
  3570. Value *minFactor = ConstantFP::get(f32Ty, fMin);
  3571. minFactor = SplatToVector(minFactor, input->getType(), Builder);
  3572. return TrivialDxilBinaryOperation(DXIL::OpCode::FMax, input, minFactor, hlslOP, Builder);
  3573. }
  3574. // Do partitioning-specific clamping.
  3575. Value *ClampTessFactor(Value *input, DXIL::TessellatorPartitioning partitionMode,
  3576. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  3577. const unsigned kTESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR = 64;
  3578. const unsigned kTESSELLATOR_MAX_ODD_TESSELLATION_FACTOR = 63;
  3579. const unsigned kTESSELLATOR_MIN_EVEN_TESSELLATION_FACTOR = 2;
  3580. const unsigned kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR = 1;
  3581. const unsigned kTESSELLATOR_MAX_TESSELLATION_FACTOR = 64;
  3582. float fMin;
  3583. float fMax;
  3584. switch (partitionMode) {
  3585. case DXIL::TessellatorPartitioning::Integer:
  3586. fMin = kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR;
  3587. fMax = kTESSELLATOR_MAX_TESSELLATION_FACTOR;
  3588. break;
  3589. case DXIL::TessellatorPartitioning::Pow2:
  3590. fMin = kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR;
  3591. fMax = kTESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR;
  3592. break;
  3593. case DXIL::TessellatorPartitioning::FractionalOdd:
  3594. fMin = kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR;
  3595. fMax = kTESSELLATOR_MAX_ODD_TESSELLATION_FACTOR;
  3596. break;
  3597. case DXIL::TessellatorPartitioning::FractionalEven:
  3598. default:
  3599. DXASSERT(partitionMode == DXIL::TessellatorPartitioning::FractionalEven,
  3600. "invalid partition mode");
  3601. fMin = kTESSELLATOR_MIN_EVEN_TESSELLATION_FACTOR;
  3602. fMax = kTESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR;
  3603. break;
  3604. }
  3605. Type *f32Ty = input->getType()->getScalarType();
  3606. Value *minFactor = ConstantFP::get(f32Ty, fMin);
  3607. Value *maxFactor = ConstantFP::get(f32Ty, fMax);
  3608. Type *Ty = input->getType();
  3609. if (Ty->isVectorTy())
  3610. minFactor = SplatToVector(minFactor, input->getType(), Builder);
  3611. Value *temp = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, input, minFactor, hlslOP, Builder);
  3612. if (Ty->isVectorTy())
  3613. maxFactor = SplatToVector(maxFactor, input->getType(), Builder);
  3614. return TrivialDxilBinaryOperation(DXIL::OpCode::FMin, temp, maxFactor, hlslOP, Builder);
  3615. }
  3616. // round up for integer/pow2 partitioning
  3617. // note that this code assumes the inputs should be in the range [1, inf),
  3618. // which should be enforced by the clamp above.
  3619. Value *RoundUpTessFactor(Value *input, DXIL::TessellatorPartitioning partitionMode,
  3620. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  3621. switch (partitionMode) {
  3622. case DXIL::TessellatorPartitioning::Integer:
  3623. return TrivialDxilUnaryOperation(DXIL::OpCode::Round_pi, input, hlslOP, Builder);
  3624. case DXIL::TessellatorPartitioning::Pow2: {
  3625. const unsigned kExponentMask = 0x7f800000;
  3626. const unsigned kExponentLSB = 0x00800000;
  3627. const unsigned kMantissaMask = 0x007fffff;
  3628. Type *Ty = input->getType();
  3629. // (val = (asuint(val) & mantissamask) ?
  3630. // (asuint(val) & exponentmask) + exponentbump :
  3631. // asuint(val) & exponentmask;
  3632. Type *uintTy = Type::getInt32Ty(Ty->getContext());
  3633. if (Ty->isVectorTy())
  3634. uintTy = VectorType::get(uintTy, Ty->getVectorNumElements());
  3635. Value *uintVal = Builder.CreateCast(Instruction::CastOps::FPToUI, input, uintTy);
  3636. Value *mantMask = ConstantInt::get(uintTy->getScalarType(), kMantissaMask);
  3637. mantMask = SplatToVector(mantMask, uintTy, Builder);
  3638. Value *manVal = Builder.CreateAnd(uintVal, mantMask);
  3639. Value *expMask = ConstantInt::get(uintTy->getScalarType(), kExponentMask);
  3640. expMask = SplatToVector(expMask, uintTy, Builder);
  3641. Value *expVal = Builder.CreateAnd(uintVal, expMask);
  3642. Value *expLSB = ConstantInt::get(uintTy->getScalarType(), kExponentLSB);
  3643. expLSB = SplatToVector(expLSB, uintTy, Builder);
  3644. Value *newExpVal = Builder.CreateAdd(expVal, expLSB);
  3645. Value *manValNotZero = Builder.CreateICmpEQ(manVal, ConstantAggregateZero::get(uintTy));
  3646. Value *factors = Builder.CreateSelect(manValNotZero, newExpVal, expVal);
  3647. return Builder.CreateUIToFP(factors, Ty);
  3648. } break;
  3649. case DXIL::TessellatorPartitioning::FractionalEven:
  3650. case DXIL::TessellatorPartitioning::FractionalOdd:
  3651. return input;
  3652. default:
  3653. DXASSERT(0, "invalid partition mode");
  3654. return nullptr;
  3655. }
  3656. }
  3657. Value *TranslateProcessIsolineTessFactors(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  3658. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3659. hlsl::OP *hlslOP = &helper.hlslOP;
  3660. // Get partition mode
  3661. DXASSERT_NOMSG(helper.functionProps);
  3662. DXASSERT(helper.functionProps->shaderKind == ShaderModel::Kind::Hull, "must be hull shader");
  3663. DXIL::TessellatorPartitioning partition = helper.functionProps->ShaderProps.HS.partition;
  3664. IRBuilder<> Builder(CI);
  3665. Value *rawDetailFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRawDetailFactor);
  3666. rawDetailFactor = Builder.CreateExtractElement(rawDetailFactor, (uint64_t)0);
  3667. Value *rawDensityFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRawDensityFactor);
  3668. rawDensityFactor = Builder.CreateExtractElement(rawDensityFactor, (uint64_t)0);
  3669. Value *init = UndefValue::get(VectorType::get(helper.f32Ty, 2));
  3670. init = Builder.CreateInsertElement(init, rawDetailFactor, (uint64_t)0);
  3671. init = Builder.CreateInsertElement(init, rawDetailFactor, (uint64_t)1);
  3672. Value *clamped = ClampTessFactor(init, partition, hlslOP, Builder);
  3673. Value *rounded = RoundUpTessFactor(clamped, partition, hlslOP, Builder);
  3674. Value *roundedDetailFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedDetailFactor);
  3675. Value *temp = UndefValue::get(VectorType::get(helper.f32Ty, 1));
  3676. Value *roundedX = Builder.CreateExtractElement(rounded, (uint64_t)0);
  3677. temp = Builder.CreateInsertElement(temp, roundedX, (uint64_t)0);
  3678. Builder.CreateStore(temp, roundedDetailFactor);
  3679. Value *roundedDensityFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedDensityFactor);
  3680. Value *roundedY = Builder.CreateExtractElement(rounded, 1);
  3681. temp = Builder.CreateInsertElement(temp, roundedY, (uint64_t)0);
  3682. Builder.CreateStore(temp, roundedDensityFactor);
  3683. return nullptr;
  3684. }
  3685. // 3 inputs, 1 result
  3686. Value *ApplyTriTessFactorOp(Value *input, DXIL::OpCode opcode, hlsl::OP *hlslOP,
  3687. IRBuilder<> &Builder) {
  3688. Value *input0 = Builder.CreateExtractElement(input, (uint64_t)0);
  3689. Value *input1 = Builder.CreateExtractElement(input, 1);
  3690. Value *input2 = Builder.CreateExtractElement(input, 2);
  3691. if (opcode == DXIL::OpCode::FMax || opcode == DXIL::OpCode::FMin) {
  3692. Value *temp =
  3693. TrivialDxilBinaryOperation(opcode, input0, input1, hlslOP, Builder);
  3694. Value *combined =
  3695. TrivialDxilBinaryOperation(opcode, temp, input2, hlslOP, Builder);
  3696. return combined;
  3697. } else {
  3698. // Avg.
  3699. Value *temp = Builder.CreateFAdd(input0, input1);
  3700. Value *combined = Builder.CreateFAdd(temp, input2);
  3701. Value *rcp = ConstantFP::get(input0->getType(), 1.0 / 3.0);
  3702. combined = Builder.CreateFMul(combined, rcp);
  3703. return combined;
  3704. }
  3705. }
  3706. // 4 inputs, 1 result
  3707. Value *ApplyQuadTessFactorOp(Value *input, DXIL::OpCode opcode,
  3708. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  3709. Value *input0 = Builder.CreateExtractElement(input, (uint64_t)0);
  3710. Value *input1 = Builder.CreateExtractElement(input, 1);
  3711. Value *input2 = Builder.CreateExtractElement(input, 2);
  3712. Value *input3 = Builder.CreateExtractElement(input, 3);
  3713. if (opcode == DXIL::OpCode::FMax || opcode == DXIL::OpCode::FMin) {
  3714. Value *temp0 =
  3715. TrivialDxilBinaryOperation(opcode, input0, input1, hlslOP, Builder);
  3716. Value *temp1 =
  3717. TrivialDxilBinaryOperation(opcode, input2, input3, hlslOP, Builder);
  3718. Value *combined =
  3719. TrivialDxilBinaryOperation(opcode, temp0, temp1, hlslOP, Builder);
  3720. return combined;
  3721. } else {
  3722. // Avg.
  3723. Value *temp0 = Builder.CreateFAdd(input0, input1);
  3724. Value *temp1 = Builder.CreateFAdd(input2, input3);
  3725. Value *combined = Builder.CreateFAdd(temp0, temp1);
  3726. Value *rcp = ConstantFP::get(input0->getType(), 0.25);
  3727. combined = Builder.CreateFMul(combined, rcp);
  3728. return combined;
  3729. }
  3730. }
  3731. // 4 inputs, 2 result
  3732. Value *Apply2DQuadTessFactorOp(Value *input, DXIL::OpCode opcode,
  3733. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  3734. Value *input0 = Builder.CreateExtractElement(input, (uint64_t)0);
  3735. Value *input1 = Builder.CreateExtractElement(input, 1);
  3736. Value *input2 = Builder.CreateExtractElement(input, 2);
  3737. Value *input3 = Builder.CreateExtractElement(input, 3);
  3738. if (opcode == DXIL::OpCode::FMax || opcode == DXIL::OpCode::FMin) {
  3739. Value *temp0 =
  3740. TrivialDxilBinaryOperation(opcode, input0, input1, hlslOP, Builder);
  3741. Value *temp1 =
  3742. TrivialDxilBinaryOperation(opcode, input2, input3, hlslOP, Builder);
  3743. Value *combined = UndefValue::get(VectorType::get(input0->getType(), 2));
  3744. combined = Builder.CreateInsertElement(combined, temp0, (uint64_t)0);
  3745. combined = Builder.CreateInsertElement(combined, temp1, 1);
  3746. return combined;
  3747. } else {
  3748. // Avg.
  3749. Value *temp0 = Builder.CreateFAdd(input0, input1);
  3750. Value *temp1 = Builder.CreateFAdd(input2, input3);
  3751. Value *combined = UndefValue::get(VectorType::get(input0->getType(), 2));
  3752. combined = Builder.CreateInsertElement(combined, temp0, (uint64_t)0);
  3753. combined = Builder.CreateInsertElement(combined, temp1, 1);
  3754. Constant *rcp = ConstantFP::get(input0->getType(), 0.5);
  3755. rcp = ConstantVector::getSplat(2, rcp);
  3756. combined = Builder.CreateFMul(combined, rcp);
  3757. return combined;
  3758. }
  3759. }
  3760. Value *ResolveSmallValue(Value **pClampedResult, Value *rounded, Value *averageUnscaled,
  3761. float cutoffVal, DXIL::TessellatorPartitioning partitionMode, hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  3762. Value *clampedResult = *pClampedResult;
  3763. Value *clampedVal = clampedResult;
  3764. Value *roundedVal = rounded;
  3765. // Do partitioning-specific clamping.
  3766. Value *clampedAvg = ClampTessFactor(averageUnscaled, partitionMode, hlslOP, Builder);
  3767. Constant *cutoffVals = ConstantFP::get(Type::getFloatTy(rounded->getContext()), cutoffVal);
  3768. if (clampedAvg->getType()->isVectorTy())
  3769. cutoffVals = ConstantVector::getSplat(clampedAvg->getType()->getVectorNumElements(), cutoffVals);
  3770. // Limit the value.
  3771. clampedAvg = TrivialDxilBinaryOperation(DXIL::OpCode::FMin, clampedAvg, cutoffVals, hlslOP, Builder);
  3772. // Round up for integer/pow2 partitioning.
  3773. Value *roundedAvg = RoundUpTessFactor(clampedAvg, partitionMode, hlslOP, Builder);
  3774. if (rounded->getType() != cutoffVals->getType())
  3775. cutoffVals = ConstantVector::getSplat(rounded->getType()->getVectorNumElements(), cutoffVals);
  3776. // If the scaled value is less than three, then take the unscaled average.
  3777. Value *lt = Builder.CreateFCmpOLT(rounded, cutoffVals);
  3778. if (clampedAvg->getType() != clampedVal->getType())
  3779. clampedAvg = SplatToVector(clampedAvg, clampedVal->getType(), Builder);
  3780. *pClampedResult = Builder.CreateSelect(lt, clampedAvg, clampedVal);
  3781. if (roundedAvg->getType() != roundedVal->getType())
  3782. roundedAvg = SplatToVector(roundedAvg, roundedVal->getType(), Builder);
  3783. Value *result = Builder.CreateSelect(lt, roundedAvg, roundedVal);
  3784. return result;
  3785. }
  3786. void ResolveQuadAxes( Value **pFinalResult, Value **pClampedResult,
  3787. float cutoffVal, DXIL::TessellatorPartitioning partitionMode, hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  3788. Value *finalResult = *pFinalResult;
  3789. Value *clampedResult = *pClampedResult;
  3790. Value *clampR = clampedResult;
  3791. Value *finalR = finalResult;
  3792. Type *f32Ty = Type::getFloatTy(finalR->getContext());
  3793. Constant *cutoffVals = ConstantFP::get(f32Ty, cutoffVal);
  3794. Value *minValsX = cutoffVals;
  3795. Value *minValsY = RoundUpTessFactor(cutoffVals, partitionMode, hlslOP, Builder);
  3796. Value *clampRX = Builder.CreateExtractElement(clampR, (uint64_t)0);
  3797. Value *clampRY = Builder.CreateExtractElement(clampR, 1);
  3798. Value *maxValsX = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, clampRX, clampRY, hlslOP, Builder);
  3799. Value *finalRX = Builder.CreateExtractElement(finalR, (uint64_t)0);
  3800. Value *finalRY = Builder.CreateExtractElement(finalR, 1);
  3801. Value *maxValsY = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, finalRX, finalRY, hlslOP, Builder);
  3802. // Don't go over our threshold ("final" one is rounded).
  3803. Value * optionX = TrivialDxilBinaryOperation(DXIL::OpCode::FMin, maxValsX, minValsX, hlslOP, Builder);
  3804. Value * optionY = TrivialDxilBinaryOperation(DXIL::OpCode::FMin, maxValsY, minValsY, hlslOP, Builder);
  3805. Value *clampL = SplatToVector(optionX, clampR->getType(), Builder);
  3806. Value *finalL = SplatToVector(optionY, finalR->getType(), Builder);
  3807. cutoffVals = ConstantVector::getSplat(2, cutoffVals);
  3808. Value *lt = Builder.CreateFCmpOLT(clampedResult, cutoffVals);
  3809. *pClampedResult = Builder.CreateSelect(lt, clampL, clampR);
  3810. *pFinalResult = Builder.CreateSelect(lt, finalL, finalR);
  3811. }
  3812. Value *TranslateProcessTessFactors(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  3813. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  3814. hlsl::OP *hlslOP = &helper.hlslOP;
  3815. // Get partition mode
  3816. DXASSERT_NOMSG(helper.functionProps);
  3817. DXASSERT(helper.functionProps->shaderKind == ShaderModel::Kind::Hull, "must be hull shader");
  3818. DXIL::TessellatorPartitioning partition = helper.functionProps->ShaderProps.HS.partition;
  3819. IRBuilder<> Builder(CI);
  3820. DXIL::OpCode tessFactorOp = DXIL::OpCode::NumOpCodes;
  3821. switch (IOP) {
  3822. case IntrinsicOp::IOP_Process2DQuadTessFactorsMax:
  3823. case IntrinsicOp::IOP_ProcessQuadTessFactorsMax:
  3824. case IntrinsicOp::IOP_ProcessTriTessFactorsMax:
  3825. tessFactorOp = DXIL::OpCode::FMax;
  3826. break;
  3827. case IntrinsicOp::IOP_Process2DQuadTessFactorsMin:
  3828. case IntrinsicOp::IOP_ProcessQuadTessFactorsMin:
  3829. case IntrinsicOp::IOP_ProcessTriTessFactorsMin:
  3830. tessFactorOp = DXIL::OpCode::FMin;
  3831. break;
  3832. default:
  3833. // Default is Avg.
  3834. break;
  3835. }
  3836. Value *rawEdgeFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRawEdgeFactor);
  3837. Value *insideScale = CI->getArgOperand(HLOperandIndex::kProcessTessFactorInsideScale);
  3838. // Clamp to [0.0f..1.0f], NaN->0.0f.
  3839. Value *scales = CleanupTessFactorScale(insideScale, hlslOP, Builder);
  3840. // Do partitioning-specific clamping.
  3841. Value *clamped = ClampTessFactor(rawEdgeFactor, partition, hlslOP, Builder);
  3842. // Round up for integer/pow2 partitioning.
  3843. Value *rounded = RoundUpTessFactor(clamped, partition, hlslOP, Builder);
  3844. // Store the output.
  3845. Value *roundedEdgeFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedEdgeFactor);
  3846. Builder.CreateStore(rounded, roundedEdgeFactor);
  3847. // Clamp to [1.0f..Inf], NaN->1.0f.
  3848. bool isQuad = false;
  3849. Value *clean = CleanupTessFactor(rawEdgeFactor, hlslOP, Builder);
  3850. Value *factors = nullptr;
  3851. switch (IOP) {
  3852. case IntrinsicOp::IOP_Process2DQuadTessFactorsAvg:
  3853. case IntrinsicOp::IOP_Process2DQuadTessFactorsMax:
  3854. case IntrinsicOp::IOP_Process2DQuadTessFactorsMin:
  3855. factors = Apply2DQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
  3856. break;
  3857. case IntrinsicOp::IOP_ProcessQuadTessFactorsAvg:
  3858. case IntrinsicOp::IOP_ProcessQuadTessFactorsMax:
  3859. case IntrinsicOp::IOP_ProcessQuadTessFactorsMin:
  3860. factors = ApplyQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
  3861. isQuad = true;
  3862. break;
  3863. case IntrinsicOp::IOP_ProcessTriTessFactorsAvg:
  3864. case IntrinsicOp::IOP_ProcessTriTessFactorsMax:
  3865. case IntrinsicOp::IOP_ProcessTriTessFactorsMin:
  3866. factors = ApplyTriTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
  3867. break;
  3868. default:
  3869. DXASSERT(0, "invalid opcode for ProcessTessFactor");
  3870. break;
  3871. }
  3872. Value *scaledI = nullptr;
  3873. if (scales->getType() == factors->getType())
  3874. scaledI = Builder.CreateFMul(factors, scales);
  3875. else {
  3876. Value *vecFactors = SplatToVector(factors, scales->getType(), Builder);
  3877. scaledI = Builder.CreateFMul(vecFactors, scales);
  3878. }
  3879. // Do partitioning-specific clamping.
  3880. Value *clampedI = ClampTessFactor(scaledI, partition, hlslOP, Builder);
  3881. // Round up for integer/pow2 partitioning.
  3882. Value *roundedI = RoundUpTessFactor(clampedI, partition, hlslOP, Builder);
  3883. Value *finalI = roundedI;
  3884. if (partition == DXIL::TessellatorPartitioning::FractionalOdd) {
  3885. // If not max, set to AVG.
  3886. if (tessFactorOp != DXIL::OpCode::FMax)
  3887. tessFactorOp = DXIL::OpCode::NumOpCodes;
  3888. bool b2D = false;
  3889. Value *avgFactorsI = nullptr;
  3890. switch (IOP) {
  3891. case IntrinsicOp::IOP_Process2DQuadTessFactorsAvg:
  3892. case IntrinsicOp::IOP_Process2DQuadTessFactorsMax:
  3893. case IntrinsicOp::IOP_Process2DQuadTessFactorsMin:
  3894. avgFactorsI = Apply2DQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
  3895. b2D = true;
  3896. break;
  3897. case IntrinsicOp::IOP_ProcessQuadTessFactorsAvg:
  3898. case IntrinsicOp::IOP_ProcessQuadTessFactorsMax:
  3899. case IntrinsicOp::IOP_ProcessQuadTessFactorsMin:
  3900. avgFactorsI = ApplyQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
  3901. break;
  3902. case IntrinsicOp::IOP_ProcessTriTessFactorsAvg:
  3903. case IntrinsicOp::IOP_ProcessTriTessFactorsMax:
  3904. case IntrinsicOp::IOP_ProcessTriTessFactorsMin:
  3905. avgFactorsI = ApplyTriTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
  3906. break;
  3907. default:
  3908. DXASSERT(0, "invalid opcode for ProcessTessFactor");
  3909. break;
  3910. }
  3911. finalI =
  3912. ResolveSmallValue(/*inout*/&clampedI, roundedI, avgFactorsI, /*cufoff*/ 3.0,
  3913. partition, hlslOP, Builder);
  3914. if (b2D)
  3915. ResolveQuadAxes(/*inout*/&finalI, /*inout*/&clampedI, /*cutoff*/3.0, partition, hlslOP, Builder);
  3916. }
  3917. Value *unroundedInsideFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorUnRoundedInsideFactor);
  3918. Type *outFactorTy = unroundedInsideFactor->getType()->getPointerElementType();
  3919. if (outFactorTy != clampedI->getType()) {
  3920. DXASSERT(isQuad, "quad only write one channel of out factor");
  3921. (void)isQuad;
  3922. clampedI = Builder.CreateExtractElement(clampedI, (uint64_t)0);
  3923. // Splat clampedI to float2.
  3924. clampedI = SplatToVector(clampedI, outFactorTy, Builder);
  3925. }
  3926. Builder.CreateStore(clampedI, unroundedInsideFactor);
  3927. Value *roundedInsideFactor = CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedInsideFactor);
  3928. if (outFactorTy != finalI->getType()) {
  3929. DXASSERT(isQuad, "quad only write one channel of out factor");
  3930. finalI = Builder.CreateExtractElement(finalI, (uint64_t)0);
  3931. // Splat finalI to float2.
  3932. finalI = SplatToVector(finalI, outFactorTy, Builder);
  3933. }
  3934. Builder.CreateStore(finalI, roundedInsideFactor);
  3935. return nullptr;
  3936. }
  3937. }
  3938. // Ray Tracing.
  3939. namespace {
  3940. Value *TranslateReportIntersection(CallInst *CI, IntrinsicOp IOP,
  3941. OP::OpCode opcode,
  3942. HLOperationLowerHelper &helper,
  3943. HLObjectOperationLowerHelper *pObjHelper,
  3944. bool &Translated) {
  3945. hlsl::OP *hlslOP = &helper.hlslOP;
  3946. Value *THit = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  3947. Value *HitKind = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  3948. Value *Attr = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  3949. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
  3950. Type *Ty = Attr->getType();
  3951. Function *F = hlslOP->GetOpFunc(opcode, Ty);
  3952. IRBuilder<> Builder(CI);
  3953. return Builder.CreateCall(F, {opArg, THit, HitKind, Attr});
  3954. }
  3955. Value *TranslateCallShader(CallInst *CI, IntrinsicOp IOP,
  3956. OP::OpCode opcode,
  3957. HLOperationLowerHelper &helper,
  3958. HLObjectOperationLowerHelper *pObjHelper,
  3959. bool &Translated) {
  3960. hlsl::OP *hlslOP = &helper.hlslOP;
  3961. Value *ShaderIndex = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  3962. Value *Parameter = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  3963. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
  3964. Type *Ty = Parameter->getType();
  3965. Function *F = hlslOP->GetOpFunc(opcode, Ty);
  3966. IRBuilder<> Builder(CI);
  3967. return Builder.CreateCall(F, {opArg, ShaderIndex, Parameter});
  3968. }
  3969. Value *TranslateTraceRay(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  3970. HLOperationLowerHelper &helper,
  3971. HLObjectOperationLowerHelper *pObjHelper,
  3972. bool &Translated) {
  3973. hlsl::OP *hlslOP = &helper.hlslOP;
  3974. Value *rayDesc = CI->getArgOperand(HLOperandIndex::kTraceRayRayDescOpIdx);
  3975. Value *payLoad = CI->getArgOperand(HLOperandIndex::kTraceRayPayLoadOpIdx);
  3976. Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
  3977. Value *Args[DXIL::OperandIndex::kTraceRayNumOp];
  3978. Args[0] = opArg;
  3979. for (unsigned i = 1; i < HLOperandIndex::kTraceRayRayDescOpIdx; i++) {
  3980. Args[i] = CI->getArgOperand(i);
  3981. }
  3982. IRBuilder<> Builder(CI);
  3983. // struct RayDesc
  3984. //{
  3985. // float3 Origin;
  3986. // float TMin;
  3987. // float3 Direction;
  3988. // float TMax;
  3989. //};
  3990. Value *zeroIdx = hlslOP->GetU32Const(0);
  3991. Value *origin = Builder.CreateGEP(rayDesc, {zeroIdx, zeroIdx});
  3992. origin = Builder.CreateLoad(origin);
  3993. unsigned index = DXIL::OperandIndex::kTraceRayRayDescOpIdx;
  3994. Args[index++] = Builder.CreateExtractElement(origin, (uint64_t)0);
  3995. Args[index++] = Builder.CreateExtractElement(origin, 1);
  3996. Args[index++] = Builder.CreateExtractElement(origin, 2);
  3997. Value *tmin = Builder.CreateGEP(rayDesc, {zeroIdx, hlslOP->GetU32Const(1)});
  3998. tmin = Builder.CreateLoad(tmin);
  3999. Args[index++] = tmin;
  4000. Value *direction = Builder.CreateGEP(rayDesc, {zeroIdx, hlslOP->GetU32Const(2)});
  4001. direction = Builder.CreateLoad(direction);
  4002. Args[index++] = Builder.CreateExtractElement(direction, (uint64_t)0);
  4003. Args[index++] = Builder.CreateExtractElement(direction, 1);
  4004. Args[index++] = Builder.CreateExtractElement(direction, 2);
  4005. Value *tmax = Builder.CreateGEP(rayDesc, {zeroIdx, hlslOP->GetU32Const(3)});
  4006. tmax = Builder.CreateLoad(tmax);
  4007. Args[index++] = tmax;
  4008. Args[DXIL::OperandIndex::kTraceRayPayloadOpIdx] = payLoad;
  4009. Type *Ty = payLoad->getType();
  4010. Function *F = hlslOP->GetOpFunc(opcode, Ty);
  4011. return Builder.CreateCall(F, Args);
  4012. }
  4013. Value *TranslateNoArgVectorOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4014. HLOperationLowerHelper &helper,
  4015. HLObjectOperationLowerHelper *pObjHelper,
  4016. bool &Translated) {
  4017. hlsl::OP *hlslOP = &helper.hlslOP;
  4018. VectorType *Ty = cast<VectorType>(CI->getType());
  4019. uint8_t vals[] = {0,1,2,3};
  4020. Constant *src = ConstantDataVector::get(CI->getContext(), vals);
  4021. Value *retVal = TrivialDxilOperation(opcode, {nullptr, src}, Ty, CI, hlslOP);
  4022. return retVal;
  4023. }
  4024. Value *TranslateNoArgMatrix3x4Operation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4025. HLOperationLowerHelper &helper,
  4026. HLObjectOperationLowerHelper *pObjHelper,
  4027. bool &Translated) {
  4028. hlsl::OP *hlslOP = &helper.hlslOP;
  4029. VectorType *Ty = cast<VectorType>(CI->getType());
  4030. uint32_t rVals[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2};
  4031. Constant *rows = ConstantDataVector::get(CI->getContext(), rVals);
  4032. uint8_t cVals[] = {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3};
  4033. Constant *cols = ConstantDataVector::get(CI->getContext(), cVals);
  4034. Value *retVal =
  4035. TrivialDxilOperation(opcode, {nullptr, rows, cols}, Ty, CI, hlslOP);
  4036. return retVal;
  4037. }
  4038. Value *TranslateNoArgTransposedMatrix3x4Operation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4039. HLOperationLowerHelper &helper,
  4040. HLObjectOperationLowerHelper *pObjHelper,
  4041. bool &Translated) {
  4042. hlsl::OP *hlslOP = &helper.hlslOP;
  4043. VectorType *Ty = cast<VectorType>(CI->getType());
  4044. uint32_t rVals[] = { 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2 };
  4045. Constant *rows = ConstantDataVector::get(CI->getContext(), rVals);
  4046. uint8_t cVals[] = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 };
  4047. Constant *cols = ConstantDataVector::get(CI->getContext(), cVals);
  4048. Value *retVal =
  4049. TrivialDxilOperation(opcode, { nullptr, rows, cols }, Ty, CI, hlslOP);
  4050. return retVal;
  4051. }
  4052. Value *TranslateNoArgNoReturnPreserveOutput(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4053. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  4054. Instruction *pResult = cast<Instruction>(
  4055. TrivialNoArgOperation(CI, IOP, opcode, helper, pObjHelper, Translated));
  4056. // HL intrinsic must have had a return injected just after the call.
  4057. // SROA_Parameter_HLSL will copy from alloca to output just before each return.
  4058. // Now move call after the copy and just before the return.
  4059. if (isa<ReturnInst>(pResult->getNextNode()))
  4060. return pResult;
  4061. ReturnInst *RetI = cast<ReturnInst>(pResult->getParent()->getTerminator());
  4062. pResult->removeFromParent();
  4063. pResult->insertBefore(RetI);
  4064. return pResult;
  4065. }
  4066. // Special half dot2 with accumulate to float
  4067. Value *TranslateDot2Add(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4068. HLOperationLowerHelper &helper,
  4069. HLObjectOperationLowerHelper *pObjHelper,
  4070. bool &Translated) {
  4071. hlsl::OP *hlslOP = &helper.hlslOP;
  4072. Value *src0 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  4073. const unsigned vecSize = 2;
  4074. DXASSERT(src0->getType()->isVectorTy() &&
  4075. vecSize == src0->getType()->getVectorNumElements() &&
  4076. src0->getType()->getScalarType()->isHalfTy(),
  4077. "otherwise, unexpected input dimension or component type");
  4078. Value *src1 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  4079. DXASSERT(src0->getType() == src1->getType(),
  4080. "otherwise, mismatched argument types");
  4081. Value *accArg = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  4082. Type *accTy = accArg->getType();
  4083. DXASSERT(!accTy->isVectorTy() && accTy->isFloatTy(),
  4084. "otherwise, unexpected accumulator type");
  4085. IRBuilder<> Builder(CI);
  4086. Function *dxilFunc = hlslOP->GetOpFunc(opcode, accTy);
  4087. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  4088. SmallVector<Value *, 6> args;
  4089. args.emplace_back(opArg);
  4090. args.emplace_back(accArg);
  4091. for (unsigned i = 0; i < vecSize; i++)
  4092. args.emplace_back(Builder.CreateExtractElement(src0, i));
  4093. for (unsigned i = 0; i < vecSize; i++)
  4094. args.emplace_back(Builder.CreateExtractElement(src1, i));
  4095. return Builder.CreateCall(dxilFunc, args);
  4096. }
  4097. Value *TranslateDot4AddPacked(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
  4098. HLOperationLowerHelper &helper,
  4099. HLObjectOperationLowerHelper *pObjHelper,
  4100. bool &Translated) {
  4101. hlsl::OP *hlslOP = &helper.hlslOP;
  4102. Value *src0 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
  4103. DXASSERT(
  4104. !src0->getType()->isVectorTy() && src0->getType()->isIntegerTy(32),
  4105. "otherwise, unexpected vector support in high level intrinsic tempalte");
  4106. Value *src1 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
  4107. DXASSERT(src0->getType() == src1->getType(), "otherwise, mismatched argument types");
  4108. Value *accArg = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
  4109. Type *accTy = accArg->getType();
  4110. DXASSERT(!accTy->isVectorTy() && accTy->isIntegerTy(32),
  4111. "otherwise, unexpected vector support in high level intrinsic tempalte");
  4112. IRBuilder<> Builder(CI);
  4113. Function *dxilFunc = hlslOP->GetOpFunc(opcode, accTy);
  4114. Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
  4115. return Builder.CreateCall(dxilFunc, { opArg, accArg, src0, src1 });
  4116. }
  4117. } // namespace
  4118. // Lower table.
  4119. namespace {
  4120. Value *EmptyLower(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode,
  4121. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  4122. DXASSERT(0, "unsupported intrinsic");
  4123. return nullptr;
  4124. }
  4125. // SPIRV change starts
  4126. #ifdef ENABLE_SPIRV_CODEGEN
  4127. Value *UnsupportedVulkanIntrinsic(CallInst *CI, IntrinsicOp IOP,
  4128. DXIL::OpCode opcode,
  4129. HLOperationLowerHelper &helper,
  4130. HLObjectOperationLowerHelper *pObjHelper,
  4131. bool &Translated) {
  4132. DXASSERT(0, "unsupported Vulkan intrinsic");
  4133. return nullptr;
  4134. }
  4135. #endif // ENABLE_SPIRV_CODEGEN
  4136. // SPIRV change ends
  4137. Value *StreamOutputLower(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode,
  4138. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  4139. // Translated in DxilGenerationPass::GenerateStreamOutputOperation.
  4140. // Do nothing here.
  4141. // Mark not translated.
  4142. Translated = false;
  4143. return nullptr;
  4144. }
  4145. // This table has to match IntrinsicOp orders
  4146. IntrinsicLower gLowerTable[static_cast<unsigned>(IntrinsicOp::Num_Intrinsics)] = {
  4147. {IntrinsicOp::IOP_AcceptHitAndEndSearch, TranslateNoArgNoReturnPreserveOutput, DXIL::OpCode::AcceptHitAndEndSearch},
  4148. {IntrinsicOp::IOP_AddUint64, TranslateAddUint64, DXIL::OpCode::UAddc},
  4149. {IntrinsicOp::IOP_AllMemoryBarrier, TrivialBarrier, DXIL::OpCode::Barrier},
  4150. {IntrinsicOp::IOP_AllMemoryBarrierWithGroupSync, TrivialBarrier, DXIL::OpCode::Barrier},
  4151. {IntrinsicOp::IOP_CallShader, TranslateCallShader, DXIL::OpCode::CallShader},
  4152. {IntrinsicOp::IOP_CheckAccessFullyMapped, TranslateCheckAccess, DXIL::OpCode::CheckAccessFullyMapped},
  4153. {IntrinsicOp::IOP_D3DCOLORtoUBYTE4, TranslateD3DColorToUByte4, DXIL::OpCode::NumOpCodes},
  4154. {IntrinsicOp::IOP_DeviceMemoryBarrier, TrivialBarrier, DXIL::OpCode::Barrier},
  4155. {IntrinsicOp::IOP_DeviceMemoryBarrierWithGroupSync, TrivialBarrier, DXIL::OpCode::Barrier},
  4156. {IntrinsicOp::IOP_DispatchRaysDimensions, TranslateNoArgVectorOperation, DXIL::OpCode::DispatchRaysDimensions},
  4157. {IntrinsicOp::IOP_DispatchRaysIndex, TranslateNoArgVectorOperation, DXIL::OpCode::DispatchRaysIndex},
  4158. {IntrinsicOp::IOP_EvaluateAttributeAtSample, TranslateEvalSample, DXIL::OpCode::NumOpCodes},
  4159. {IntrinsicOp::IOP_EvaluateAttributeCentroid, TranslateEvalCentroid, DXIL::OpCode::EvalCentroid},
  4160. {IntrinsicOp::IOP_EvaluateAttributeSnapped, TranslateEvalSnapped, DXIL::OpCode::NumOpCodes},
  4161. {IntrinsicOp::IOP_GetAttributeAtVertex, TranslateGetAttributeAtVertex, DXIL::OpCode::AttributeAtVertex},
  4162. {IntrinsicOp::IOP_GetRenderTargetSampleCount, TrivialNoArgOperation, DXIL::OpCode::RenderTargetGetSampleCount},
  4163. {IntrinsicOp::IOP_GetRenderTargetSamplePosition, TranslateGetRTSamplePos, DXIL::OpCode::NumOpCodes},
  4164. {IntrinsicOp::IOP_GroupMemoryBarrier, TrivialBarrier, DXIL::OpCode::Barrier},
  4165. {IntrinsicOp::IOP_GroupMemoryBarrierWithGroupSync, TrivialBarrier, DXIL::OpCode::Barrier},
  4166. {IntrinsicOp::IOP_HitKind, TrivialNoArgWithRetOperation, DXIL::OpCode::HitKind},
  4167. {IntrinsicOp::IOP_IgnoreHit, TranslateNoArgNoReturnPreserveOutput, DXIL::OpCode::IgnoreHit},
  4168. {IntrinsicOp::IOP_InstanceID, TrivialNoArgWithRetOperation, DXIL::OpCode::InstanceID},
  4169. {IntrinsicOp::IOP_InstanceIndex, TrivialNoArgWithRetOperation, DXIL::OpCode::InstanceIndex},
  4170. {IntrinsicOp::IOP_InterlockedAdd, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4171. {IntrinsicOp::IOP_InterlockedAnd, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4172. {IntrinsicOp::IOP_InterlockedCompareExchange, TranslateIopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
  4173. {IntrinsicOp::IOP_InterlockedCompareStore, TranslateIopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
  4174. {IntrinsicOp::IOP_InterlockedExchange, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4175. {IntrinsicOp::IOP_InterlockedMax, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4176. {IntrinsicOp::IOP_InterlockedMin, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4177. {IntrinsicOp::IOP_InterlockedOr, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4178. {IntrinsicOp::IOP_InterlockedXor, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4179. {IntrinsicOp::IOP_NonUniformResourceIndex, TranslateNonUniformResourceIndex, DXIL::OpCode::NumOpCodes},
  4180. {IntrinsicOp::IOP_ObjectRayDirection, TranslateNoArgVectorOperation, DXIL::OpCode::ObjectRayDirection},
  4181. {IntrinsicOp::IOP_ObjectRayOrigin, TranslateNoArgVectorOperation, DXIL::OpCode::ObjectRayOrigin},
  4182. {IntrinsicOp::IOP_ObjectToWorld, TranslateNoArgMatrix3x4Operation, DXIL::OpCode::ObjectToWorld},
  4183. {IntrinsicOp::IOP_ObjectToWorld3x4, TranslateNoArgMatrix3x4Operation, DXIL::OpCode::ObjectToWorld},
  4184. {IntrinsicOp::IOP_ObjectToWorld4x3, TranslateNoArgTransposedMatrix3x4Operation, DXIL::OpCode::ObjectToWorld},
  4185. {IntrinsicOp::IOP_PrimitiveIndex, TrivialNoArgWithRetOperation, DXIL::OpCode::PrimitiveIndex},
  4186. {IntrinsicOp::IOP_Process2DQuadTessFactorsAvg, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4187. {IntrinsicOp::IOP_Process2DQuadTessFactorsMax, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4188. {IntrinsicOp::IOP_Process2DQuadTessFactorsMin, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4189. {IntrinsicOp::IOP_ProcessIsolineTessFactors, TranslateProcessIsolineTessFactors, DXIL::OpCode::NumOpCodes},
  4190. {IntrinsicOp::IOP_ProcessQuadTessFactorsAvg, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4191. {IntrinsicOp::IOP_ProcessQuadTessFactorsMax, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4192. {IntrinsicOp::IOP_ProcessQuadTessFactorsMin, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4193. {IntrinsicOp::IOP_ProcessTriTessFactorsAvg, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4194. {IntrinsicOp::IOP_ProcessTriTessFactorsMax, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4195. {IntrinsicOp::IOP_ProcessTriTessFactorsMin, TranslateProcessTessFactors, DXIL::OpCode::NumOpCodes},
  4196. {IntrinsicOp::IOP_QuadReadAcrossDiagonal, TranslateQuadReadAcross, DXIL::OpCode::QuadOp},
  4197. {IntrinsicOp::IOP_QuadReadAcrossX, TranslateQuadReadAcross, DXIL::OpCode::QuadOp},
  4198. {IntrinsicOp::IOP_QuadReadAcrossY, TranslateQuadReadAcross, DXIL::OpCode::QuadOp},
  4199. {IntrinsicOp::IOP_QuadReadLaneAt, TranslateQuadReadLaneAt, DXIL::OpCode::NumOpCodes},
  4200. {IntrinsicOp::IOP_RayFlags, TrivialNoArgWithRetOperation, DXIL::OpCode::RayFlags},
  4201. {IntrinsicOp::IOP_RayTCurrent, TrivialNoArgWithRetOperation, DXIL::OpCode::RayTCurrent},
  4202. {IntrinsicOp::IOP_RayTMin, TrivialNoArgWithRetOperation, DXIL::OpCode::RayTMin},
  4203. {IntrinsicOp::IOP_ReportHit, TranslateReportIntersection, DXIL::OpCode::ReportHit},
  4204. {IntrinsicOp::IOP_TraceRay, TranslateTraceRay, DXIL::OpCode::TraceRay},
  4205. {IntrinsicOp::IOP_WaveActiveAllEqual, TranslateWaveAllEqual, DXIL::OpCode::WaveActiveAllEqual},
  4206. {IntrinsicOp::IOP_WaveActiveAllTrue, TranslateWaveA2B, DXIL::OpCode::WaveAllTrue},
  4207. {IntrinsicOp::IOP_WaveActiveAnyTrue, TranslateWaveA2B, DXIL::OpCode::WaveAnyTrue},
  4208. {IntrinsicOp::IOP_WaveActiveBallot, TranslateWaveBallot, DXIL::OpCode::WaveActiveBallot},
  4209. {IntrinsicOp::IOP_WaveActiveBitAnd, TranslateWaveA2A, DXIL::OpCode::WaveActiveBit},
  4210. {IntrinsicOp::IOP_WaveActiveBitOr, TranslateWaveA2A, DXIL::OpCode::WaveActiveBit},
  4211. {IntrinsicOp::IOP_WaveActiveBitXor, TranslateWaveA2A, DXIL::OpCode::WaveActiveBit},
  4212. {IntrinsicOp::IOP_WaveActiveCountBits, TranslateWaveA2B, DXIL::OpCode::WaveAllBitCount},
  4213. {IntrinsicOp::IOP_WaveActiveMax, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp},
  4214. {IntrinsicOp::IOP_WaveActiveMin, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp},
  4215. {IntrinsicOp::IOP_WaveActiveProduct, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp},
  4216. {IntrinsicOp::IOP_WaveActiveSum, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp},
  4217. {IntrinsicOp::IOP_WaveGetLaneCount, TranslateWaveToVal, DXIL::OpCode::WaveGetLaneCount},
  4218. {IntrinsicOp::IOP_WaveGetLaneIndex, TranslateWaveToVal, DXIL::OpCode::WaveGetLaneIndex},
  4219. {IntrinsicOp::IOP_WaveIsFirstLane, TranslateWaveToVal, DXIL::OpCode::WaveIsFirstLane},
  4220. {IntrinsicOp::IOP_WavePrefixCountBits, TranslateWaveA2B, DXIL::OpCode::WavePrefixBitCount},
  4221. {IntrinsicOp::IOP_WavePrefixProduct, TranslateWaveA2A, DXIL::OpCode::WavePrefixOp},
  4222. {IntrinsicOp::IOP_WavePrefixSum, TranslateWaveA2A, DXIL::OpCode::WavePrefixOp},
  4223. {IntrinsicOp::IOP_WaveReadLaneAt, TranslateWaveReadLaneAt, DXIL::OpCode::WaveReadLaneAt},
  4224. {IntrinsicOp::IOP_WaveReadLaneFirst, TranslateWaveReadLaneFirst, DXIL::OpCode::WaveReadLaneFirst},
  4225. {IntrinsicOp::IOP_WorldRayDirection, TranslateNoArgVectorOperation, DXIL::OpCode::WorldRayDirection},
  4226. {IntrinsicOp::IOP_WorldRayOrigin, TranslateNoArgVectorOperation, DXIL::OpCode::WorldRayOrigin},
  4227. {IntrinsicOp::IOP_WorldToObject, TranslateNoArgMatrix3x4Operation, DXIL::OpCode::WorldToObject},
  4228. {IntrinsicOp::IOP_WorldToObject3x4, TranslateNoArgMatrix3x4Operation, DXIL::OpCode::WorldToObject},
  4229. {IntrinsicOp::IOP_WorldToObject4x3, TranslateNoArgTransposedMatrix3x4Operation, DXIL::OpCode::WorldToObject},
  4230. {IntrinsicOp::IOP_abort, EmptyLower, DXIL::OpCode::NumOpCodes},
  4231. {IntrinsicOp::IOP_abs, TranslateAbs, DXIL::OpCode::NumOpCodes},
  4232. {IntrinsicOp::IOP_acos, TrivialUnaryOperation, DXIL::OpCode::Acos},
  4233. {IntrinsicOp::IOP_all, TranslateAll, DXIL::OpCode::NumOpCodes},
  4234. {IntrinsicOp::IOP_any, TranslateAny, DXIL::OpCode::NumOpCodes},
  4235. {IntrinsicOp::IOP_asdouble, TranslateAsDouble, DXIL::OpCode::MakeDouble},
  4236. {IntrinsicOp::IOP_asfloat, TranslateBitcast, DXIL::OpCode::NumOpCodes},
  4237. {IntrinsicOp::IOP_asfloat16, TranslateBitcast, DXIL::OpCode::NumOpCodes},
  4238. {IntrinsicOp::IOP_asin, TrivialUnaryOperation, DXIL::OpCode::Asin},
  4239. {IntrinsicOp::IOP_asint, TranslateBitcast, DXIL::OpCode::NumOpCodes},
  4240. {IntrinsicOp::IOP_asint16, TranslateBitcast, DXIL::OpCode::NumOpCodes},
  4241. {IntrinsicOp::IOP_asuint, TranslateAsUint, DXIL::OpCode::SplitDouble},
  4242. {IntrinsicOp::IOP_asuint16, TranslateAsUint, DXIL::OpCode::NumOpCodes},
  4243. {IntrinsicOp::IOP_atan, TrivialUnaryOperation, DXIL::OpCode::Atan},
  4244. {IntrinsicOp::IOP_atan2, TranslateAtan2, DXIL::OpCode::NumOpCodes},
  4245. {IntrinsicOp::IOP_ceil, TrivialUnaryOperation, DXIL::OpCode::Round_pi},
  4246. {IntrinsicOp::IOP_clamp, TranslateClamp, DXIL::OpCode::NumOpCodes},
  4247. {IntrinsicOp::IOP_clip, TranslateClip, DXIL::OpCode::NumOpCodes},
  4248. {IntrinsicOp::IOP_cos, TrivialUnaryOperation, DXIL::OpCode::Cos},
  4249. {IntrinsicOp::IOP_cosh, TrivialUnaryOperation, DXIL::OpCode::Hcos},
  4250. {IntrinsicOp::IOP_countbits, TrivialUnaryOperation, DXIL::OpCode::Countbits},
  4251. {IntrinsicOp::IOP_cross, TranslateCross, DXIL::OpCode::NumOpCodes},
  4252. {IntrinsicOp::IOP_ddx, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseX},
  4253. {IntrinsicOp::IOP_ddx_coarse, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseX},
  4254. {IntrinsicOp::IOP_ddx_fine, TrivialUnaryOperation, DXIL::OpCode::DerivFineX},
  4255. {IntrinsicOp::IOP_ddy, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseY},
  4256. {IntrinsicOp::IOP_ddy_coarse, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseY},
  4257. {IntrinsicOp::IOP_ddy_fine, TrivialUnaryOperation, DXIL::OpCode::DerivFineY},
  4258. {IntrinsicOp::IOP_degrees, TranslateDegrees, DXIL::OpCode::NumOpCodes},
  4259. {IntrinsicOp::IOP_determinant, EmptyLower, DXIL::OpCode::NumOpCodes},
  4260. {IntrinsicOp::IOP_distance, TranslateDistance, DXIL::OpCode::NumOpCodes},
  4261. {IntrinsicOp::IOP_dot, TranslateDot, DXIL::OpCode::NumOpCodes},
  4262. {IntrinsicOp::IOP_dot2add, TranslateDot2Add, DXIL::OpCode::Dot2AddHalf},
  4263. {IntrinsicOp::IOP_dot4add_i8packed, TranslateDot4AddPacked, DXIL::OpCode::Dot4AddI8Packed},
  4264. {IntrinsicOp::IOP_dot4add_u8packed, TranslateDot4AddPacked, DXIL::OpCode::Dot4AddU8Packed},
  4265. {IntrinsicOp::IOP_dst, TranslateDst, DXIL::OpCode::NumOpCodes},
  4266. {IntrinsicOp::IOP_exp, TranslateExp, DXIL::OpCode::NumOpCodes},
  4267. {IntrinsicOp::IOP_exp2, TrivialUnaryOperation, DXIL::OpCode::Exp},
  4268. {IntrinsicOp::IOP_f16tof32, TranslateF16ToF32, DXIL::OpCode::LegacyF16ToF32},
  4269. {IntrinsicOp::IOP_f32tof16, TranslateF32ToF16, DXIL::OpCode::LegacyF32ToF16},
  4270. {IntrinsicOp::IOP_faceforward, TranslateFaceforward, DXIL::OpCode::NumOpCodes},
  4271. {IntrinsicOp::IOP_firstbithigh, TranslateFirstbitHi, DXIL::OpCode::FirstbitSHi},
  4272. {IntrinsicOp::IOP_firstbitlow, TranslateFirstbitLo, DXIL::OpCode::FirstbitLo},
  4273. {IntrinsicOp::IOP_floor, TrivialUnaryOperation, DXIL::OpCode::Round_ni},
  4274. {IntrinsicOp::IOP_fma, TrivialTrinaryOperation, DXIL::OpCode::Fma},
  4275. {IntrinsicOp::IOP_fmod, TranslateFMod, DXIL::OpCode::NumOpCodes},
  4276. {IntrinsicOp::IOP_frac, TrivialUnaryOperation, DXIL::OpCode::Frc},
  4277. {IntrinsicOp::IOP_frexp, TranslateFrexp, DXIL::OpCode::NumOpCodes},
  4278. {IntrinsicOp::IOP_fwidth, TranslateFWidth, DXIL::OpCode::NumOpCodes},
  4279. {IntrinsicOp::IOP_isfinite, TrivialIsSpecialFloat, DXIL::OpCode::IsFinite},
  4280. {IntrinsicOp::IOP_isinf, TrivialIsSpecialFloat, DXIL::OpCode::IsInf},
  4281. {IntrinsicOp::IOP_isnan, TrivialIsSpecialFloat, DXIL::OpCode::IsNaN},
  4282. {IntrinsicOp::IOP_ldexp, TranslateLdExp, DXIL::OpCode::NumOpCodes},
  4283. {IntrinsicOp::IOP_length, TranslateLength, DXIL::OpCode::NumOpCodes},
  4284. {IntrinsicOp::IOP_lerp, TranslateLerp, DXIL::OpCode::NumOpCodes},
  4285. {IntrinsicOp::IOP_lit, TranslateLit, DXIL::OpCode::NumOpCodes},
  4286. {IntrinsicOp::IOP_log, TranslateLog, DXIL::OpCode::NumOpCodes},
  4287. {IntrinsicOp::IOP_log10, TranslateLog10, DXIL::OpCode::NumOpCodes},
  4288. {IntrinsicOp::IOP_log2, TrivialUnaryOperation, DXIL::OpCode::Log},
  4289. {IntrinsicOp::IOP_mad, TranslateFUITrinary, DXIL::OpCode::IMad},
  4290. {IntrinsicOp::IOP_max, TranslateFUIBinary, DXIL::OpCode::IMax},
  4291. {IntrinsicOp::IOP_min, TranslateFUIBinary, DXIL::OpCode::IMin},
  4292. {IntrinsicOp::IOP_modf, TranslateModF, DXIL::OpCode::NumOpCodes},
  4293. {IntrinsicOp::IOP_msad4, TranslateMSad4, DXIL::OpCode::NumOpCodes},
  4294. {IntrinsicOp::IOP_mul, TranslateMul, DXIL::OpCode::NumOpCodes},
  4295. {IntrinsicOp::IOP_normalize, TranslateNormalize, DXIL::OpCode::NumOpCodes},
  4296. {IntrinsicOp::IOP_pow, TranslatePow, DXIL::OpCode::NumOpCodes},
  4297. {IntrinsicOp::IOP_radians, TranslateRadians, DXIL::OpCode::NumOpCodes},
  4298. {IntrinsicOp::IOP_rcp, TranslateRCP, DXIL::OpCode::NumOpCodes},
  4299. {IntrinsicOp::IOP_reflect, TranslateReflect, DXIL::OpCode::NumOpCodes},
  4300. {IntrinsicOp::IOP_refract, TranslateRefract, DXIL::OpCode::NumOpCodes},
  4301. {IntrinsicOp::IOP_reversebits, TrivialUnaryOperation, DXIL::OpCode::Bfrev},
  4302. {IntrinsicOp::IOP_round, TrivialUnaryOperation, DXIL::OpCode::Round_ne},
  4303. {IntrinsicOp::IOP_rsqrt, TrivialUnaryOperation, DXIL::OpCode::Rsqrt},
  4304. {IntrinsicOp::IOP_saturate, TrivialUnaryOperation, DXIL::OpCode::Saturate},
  4305. {IntrinsicOp::IOP_sign, TranslateSign, DXIL::OpCode::NumOpCodes},
  4306. {IntrinsicOp::IOP_sin, TrivialUnaryOperation, DXIL::OpCode::Sin},
  4307. {IntrinsicOp::IOP_sincos, EmptyLower, DXIL::OpCode::NumOpCodes},
  4308. {IntrinsicOp::IOP_sinh, TrivialUnaryOperation, DXIL::OpCode::Hsin},
  4309. {IntrinsicOp::IOP_smoothstep, TranslateSmoothStep, DXIL::OpCode::NumOpCodes},
  4310. {IntrinsicOp::IOP_source_mark, EmptyLower, DXIL::OpCode::NumOpCodes},
  4311. {IntrinsicOp::IOP_sqrt, TrivialUnaryOperation, DXIL::OpCode::Sqrt},
  4312. {IntrinsicOp::IOP_step, TranslateStep, DXIL::OpCode::NumOpCodes},
  4313. {IntrinsicOp::IOP_tan, TrivialUnaryOperation, DXIL::OpCode::Tan},
  4314. {IntrinsicOp::IOP_tanh, TrivialUnaryOperation, DXIL::OpCode::Htan},
  4315. {IntrinsicOp::IOP_tex1D, EmptyLower, DXIL::OpCode::NumOpCodes},
  4316. {IntrinsicOp::IOP_tex1Dbias, EmptyLower, DXIL::OpCode::NumOpCodes},
  4317. {IntrinsicOp::IOP_tex1Dgrad, EmptyLower, DXIL::OpCode::NumOpCodes},
  4318. {IntrinsicOp::IOP_tex1Dlod, EmptyLower, DXIL::OpCode::NumOpCodes},
  4319. {IntrinsicOp::IOP_tex1Dproj, EmptyLower, DXIL::OpCode::NumOpCodes},
  4320. {IntrinsicOp::IOP_tex2D, EmptyLower, DXIL::OpCode::NumOpCodes},
  4321. {IntrinsicOp::IOP_tex2Dbias, EmptyLower, DXIL::OpCode::NumOpCodes},
  4322. {IntrinsicOp::IOP_tex2Dgrad, EmptyLower, DXIL::OpCode::NumOpCodes},
  4323. {IntrinsicOp::IOP_tex2Dlod, EmptyLower, DXIL::OpCode::NumOpCodes},
  4324. {IntrinsicOp::IOP_tex2Dproj, EmptyLower, DXIL::OpCode::NumOpCodes},
  4325. {IntrinsicOp::IOP_tex3D, EmptyLower, DXIL::OpCode::NumOpCodes},
  4326. {IntrinsicOp::IOP_tex3Dbias, EmptyLower, DXIL::OpCode::NumOpCodes},
  4327. {IntrinsicOp::IOP_tex3Dgrad, EmptyLower, DXIL::OpCode::NumOpCodes},
  4328. {IntrinsicOp::IOP_tex3Dlod, EmptyLower, DXIL::OpCode::NumOpCodes},
  4329. {IntrinsicOp::IOP_tex3Dproj, EmptyLower, DXIL::OpCode::NumOpCodes},
  4330. {IntrinsicOp::IOP_texCUBE, EmptyLower, DXIL::OpCode::NumOpCodes},
  4331. {IntrinsicOp::IOP_texCUBEbias, EmptyLower, DXIL::OpCode::NumOpCodes},
  4332. {IntrinsicOp::IOP_texCUBEgrad, EmptyLower, DXIL::OpCode::NumOpCodes},
  4333. {IntrinsicOp::IOP_texCUBElod, EmptyLower, DXIL::OpCode::NumOpCodes},
  4334. {IntrinsicOp::IOP_texCUBEproj, EmptyLower, DXIL::OpCode::NumOpCodes},
  4335. {IntrinsicOp::IOP_transpose, EmptyLower, DXIL::OpCode::NumOpCodes},
  4336. {IntrinsicOp::IOP_trunc, TrivialUnaryOperation, DXIL::OpCode::Round_z},
  4337. {IntrinsicOp::MOP_Append, StreamOutputLower, DXIL::OpCode::EmitStream},
  4338. {IntrinsicOp::MOP_RestartStrip, StreamOutputLower, DXIL::OpCode::CutStream},
  4339. {IntrinsicOp::MOP_CalculateLevelOfDetail, TranslateCalculateLOD, DXIL::OpCode::NumOpCodes},
  4340. {IntrinsicOp::MOP_CalculateLevelOfDetailUnclamped, TranslateCalculateLOD, DXIL::OpCode::NumOpCodes},
  4341. {IntrinsicOp::MOP_GetDimensions, TranslateGetDimensions, DXIL::OpCode::NumOpCodes},
  4342. {IntrinsicOp::MOP_Load, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
  4343. {IntrinsicOp::MOP_Sample, TranslateSample, DXIL::OpCode::Sample},
  4344. {IntrinsicOp::MOP_SampleBias, TranslateSample, DXIL::OpCode::SampleBias},
  4345. {IntrinsicOp::MOP_SampleCmp, TranslateSample, DXIL::OpCode::SampleCmp},
  4346. {IntrinsicOp::MOP_SampleCmpLevelZero, TranslateSample, DXIL::OpCode::SampleCmpLevelZero},
  4347. {IntrinsicOp::MOP_SampleGrad, TranslateSample, DXIL::OpCode::SampleGrad},
  4348. {IntrinsicOp::MOP_SampleLevel, TranslateSample, DXIL::OpCode::SampleLevel},
  4349. {IntrinsicOp::MOP_Gather, TranslateGather, DXIL::OpCode::TextureGather},
  4350. {IntrinsicOp::MOP_GatherAlpha, TranslateGather, DXIL::OpCode::TextureGather},
  4351. {IntrinsicOp::MOP_GatherBlue, TranslateGather, DXIL::OpCode::TextureGather},
  4352. {IntrinsicOp::MOP_GatherCmp, TranslateGather, DXIL::OpCode::TextureGatherCmp},
  4353. {IntrinsicOp::MOP_GatherCmpAlpha, TranslateGather, DXIL::OpCode::TextureGatherCmp},
  4354. {IntrinsicOp::MOP_GatherCmpBlue, TranslateGather, DXIL::OpCode::TextureGatherCmp},
  4355. {IntrinsicOp::MOP_GatherCmpGreen, TranslateGather, DXIL::OpCode::TextureGatherCmp},
  4356. {IntrinsicOp::MOP_GatherCmpRed, TranslateGather, DXIL::OpCode::TextureGatherCmp},
  4357. {IntrinsicOp::MOP_GatherGreen, TranslateGather, DXIL::OpCode::TextureGather},
  4358. {IntrinsicOp::MOP_GatherRed, TranslateGather, DXIL::OpCode::TextureGather},
  4359. {IntrinsicOp::MOP_GetSamplePosition, TranslateGetSamplePosition, DXIL::OpCode::NumOpCodes},
  4360. {IntrinsicOp::MOP_Load2, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
  4361. {IntrinsicOp::MOP_Load3, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
  4362. {IntrinsicOp::MOP_Load4, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
  4363. {IntrinsicOp::MOP_InterlockedAdd, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4364. {IntrinsicOp::MOP_InterlockedAnd, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4365. {IntrinsicOp::MOP_InterlockedCompareExchange, TranslateMopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
  4366. {IntrinsicOp::MOP_InterlockedCompareStore, TranslateMopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
  4367. {IntrinsicOp::MOP_InterlockedExchange, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4368. {IntrinsicOp::MOP_InterlockedMax, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4369. {IntrinsicOp::MOP_InterlockedMin, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4370. {IntrinsicOp::MOP_InterlockedOr, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4371. {IntrinsicOp::MOP_InterlockedXor, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
  4372. {IntrinsicOp::MOP_Store, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
  4373. {IntrinsicOp::MOP_Store2, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
  4374. {IntrinsicOp::MOP_Store3, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
  4375. {IntrinsicOp::MOP_Store4, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
  4376. {IntrinsicOp::MOP_DecrementCounter, GenerateUpdateCounter, DXIL::OpCode::NumOpCodes},
  4377. {IntrinsicOp::MOP_IncrementCounter, GenerateUpdateCounter, DXIL::OpCode::NumOpCodes},
  4378. {IntrinsicOp::MOP_Consume, EmptyLower, DXIL::OpCode::NumOpCodes},
  4379. // SPIRV change starts
  4380. #ifdef ENABLE_SPIRV_CODEGEN
  4381. {IntrinsicOp::MOP_SubpassLoad, UnsupportedVulkanIntrinsic, DXIL::OpCode::NumOpCodes},
  4382. #endif // ENABLE_SPIRV_CODEGEN
  4383. // SPIRV change ends
  4384. // Manully added part.
  4385. { IntrinsicOp::IOP_InterlockedUMax, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes },
  4386. { IntrinsicOp::IOP_InterlockedUMin, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes },
  4387. { IntrinsicOp::IOP_WaveActiveUMax, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp },
  4388. { IntrinsicOp::IOP_WaveActiveUMin, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp },
  4389. { IntrinsicOp::IOP_WaveActiveUProduct, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp },
  4390. { IntrinsicOp::IOP_WaveActiveUSum, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp },
  4391. { IntrinsicOp::IOP_WavePrefixUProduct, TranslateWaveA2A, DXIL::OpCode::WavePrefixOp },
  4392. { IntrinsicOp::IOP_WavePrefixUSum, TranslateWaveA2A, DXIL::OpCode::WavePrefixOp },
  4393. { IntrinsicOp::IOP_uabs, TranslateUAbs, DXIL::OpCode::NumOpCodes },
  4394. { IntrinsicOp::IOP_uclamp, TranslateClamp, DXIL::OpCode::NumOpCodes },
  4395. { IntrinsicOp::IOP_ufirstbithigh, TranslateFirstbitHi, DXIL::OpCode::FirstbitHi },
  4396. { IntrinsicOp::IOP_umad, TranslateFUITrinary, DXIL::OpCode::UMad},
  4397. { IntrinsicOp::IOP_umax, TranslateFUIBinary, DXIL::OpCode::UMax},
  4398. { IntrinsicOp::IOP_umin, TranslateFUIBinary, DXIL::OpCode::UMin },
  4399. { IntrinsicOp::IOP_umul, TranslateFUIBinary, DXIL::OpCode::UMul },
  4400. { IntrinsicOp::IOP_usign, TranslateUSign, DXIL::OpCode::UMax },
  4401. { IntrinsicOp::MOP_InterlockedUMax, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes },
  4402. { IntrinsicOp::MOP_InterlockedUMin, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes },
  4403. };
  4404. }
  4405. static void TranslateBuiltinIntrinsic(CallInst *CI,
  4406. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  4407. unsigned opcode = hlsl::GetHLOpcode(CI);
  4408. const IntrinsicLower &lower = gLowerTable[opcode];
  4409. Value *Result =
  4410. lower.LowerFunc(CI, lower.IntriOpcode, lower.DxilOpcode, helper, pObjHelper, Translated);
  4411. if (Result)
  4412. CI->replaceAllUsesWith(Result);
  4413. }
  4414. // SharedMem.
  4415. namespace {
  4416. bool IsSharedMemPtr(Value *Ptr) {
  4417. return Ptr->getType()->getPointerAddressSpace() == DXIL::kTGSMAddrSpace;
  4418. }
  4419. bool IsLocalVariablePtr(Value *Ptr) {
  4420. while (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
  4421. Ptr = GEP->getPointerOperand();
  4422. }
  4423. bool isAlloca = isa<AllocaInst>(Ptr);
  4424. if (isAlloca) return true;
  4425. GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr);
  4426. if (!GV) return false;
  4427. return GV->getLinkage() == GlobalValue::LinkageTypes::InternalLinkage;
  4428. }
  4429. }
  4430. // Constant buffer.
  4431. namespace {
  4432. unsigned GetEltTypeByteSizeForConstBuf(Type *EltType, const DataLayout &DL) {
  4433. DXASSERT(EltType->isIntegerTy() || EltType->isFloatingPointTy(),
  4434. "not an element type");
  4435. // TODO: Use real size after change constant buffer into linear layout.
  4436. if (DL.getTypeSizeInBits(EltType) <= 32) {
  4437. // Constant buffer is 4 bytes align.
  4438. return 4;
  4439. } else
  4440. return 8;
  4441. }
  4442. Value *GenerateCBLoad(Value *handle, Value *offset, Type *EltTy, OP *hlslOP,
  4443. IRBuilder<> &Builder) {
  4444. Constant *OpArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CBufferLoad);
  4445. DXASSERT(!EltTy->isIntegerTy(1), "Bools should not be loaded as their register representation.");
  4446. // Align to 8 bytes for now.
  4447. Constant *align = hlslOP->GetU32Const(8);
  4448. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoad, EltTy);
  4449. return Builder.CreateCall(CBLoad, {OpArg, handle, offset, align});
  4450. }
  4451. Value *TranslateConstBufMatLd(Type *matType, Value *handle, Value *offset,
  4452. bool colMajor, OP *OP, const DataLayout &DL,
  4453. IRBuilder<> &Builder) {
  4454. HLMatrixType MatTy = HLMatrixType::cast(matType);
  4455. Type *EltTy = MatTy.getElementTypeForMem();
  4456. unsigned matSize = MatTy.getNumElements();
  4457. std::vector<Value *> elts(matSize);
  4458. Value *EltByteSize = ConstantInt::get(
  4459. offset->getType(), GetEltTypeByteSizeForConstBuf(EltTy, DL));
  4460. // TODO: use real size after change constant buffer into linear layout.
  4461. Value *baseOffset = offset;
  4462. for (unsigned i = 0; i < matSize; i++) {
  4463. elts[i] = GenerateCBLoad(handle, baseOffset, EltTy, OP, Builder);
  4464. baseOffset = Builder.CreateAdd(baseOffset, EltByteSize);
  4465. }
  4466. Value* Vec = HLMatrixLower::BuildVector(EltTy, elts, Builder);
  4467. Vec = MatTy.emitLoweredMemToReg(Vec, Builder);
  4468. return Vec;
  4469. }
  4470. void TranslateCBGep(GetElementPtrInst *GEP, Value *handle, Value *baseOffset,
  4471. hlsl::OP *hlslOP, IRBuilder<> &Builder,
  4472. DxilFieldAnnotation *prevFieldAnnotation,
  4473. const DataLayout &DL, DxilTypeSystem &dxilTypeSys);
  4474. Value *GenerateVecEltFromGEP(Value *ldData, GetElementPtrInst *GEP,
  4475. IRBuilder<> &Builder) {
  4476. DXASSERT(GEP->getNumIndices() == 2, "must have 2 level");
  4477. Value *baseIdx = (GEP->idx_begin())->get();
  4478. Value *zeroIdx = Builder.getInt32(0);
  4479. DXASSERT_LOCALVAR(baseIdx && zeroIdx, baseIdx == zeroIdx,
  4480. "base index must be 0");
  4481. Value *idx = (GEP->idx_begin() + 1)->get();
  4482. if (dyn_cast<ConstantInt>(idx)) {
  4483. return Builder.CreateExtractElement(ldData, idx);
  4484. } else {
  4485. // Dynamic indexing.
  4486. // Copy vec to array.
  4487. Type *Ty = ldData->getType();
  4488. Type *EltTy = Ty->getVectorElementType();
  4489. unsigned vecSize = Ty->getVectorNumElements();
  4490. ArrayType *AT = ArrayType::get(EltTy, vecSize);
  4491. IRBuilder<> AllocaBuilder(
  4492. GEP->getParent()->getParent()->getEntryBlock().getFirstInsertionPt());
  4493. Value *tempArray = AllocaBuilder.CreateAlloca(AT);
  4494. Value *zero = Builder.getInt32(0);
  4495. for (unsigned int i = 0; i < vecSize; i++) {
  4496. Value *Elt = Builder.CreateExtractElement(ldData, Builder.getInt32(i));
  4497. Value *Ptr =
  4498. Builder.CreateInBoundsGEP(tempArray, {zero, Builder.getInt32(i)});
  4499. Builder.CreateStore(Elt, Ptr);
  4500. }
  4501. // Load from temp array.
  4502. // Insert the new GEP just before the old and to-be-deleted GEP
  4503. Builder.SetInsertPoint(GEP);
  4504. Value *EltGEP = Builder.CreateInBoundsGEP(tempArray, {zero, idx});
  4505. return Builder.CreateLoad(EltGEP);
  4506. }
  4507. }
  4508. void TranslateCBAddressUser(Instruction *user, Value *handle, Value *baseOffset,
  4509. hlsl::OP *hlslOP,
  4510. DxilFieldAnnotation *prevFieldAnnotation,
  4511. DxilTypeSystem &dxilTypeSys, const DataLayout &DL) {
  4512. IRBuilder<> Builder(user);
  4513. if (CallInst *CI = dyn_cast<CallInst>(user)) {
  4514. HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
  4515. unsigned opcode = GetHLOpcode(CI);
  4516. if (group == HLOpcodeGroup::HLMatLoadStore) {
  4517. HLMatLoadStoreOpcode matOp = static_cast<HLMatLoadStoreOpcode>(opcode);
  4518. bool colMajor = matOp == HLMatLoadStoreOpcode::ColMatLoad;
  4519. DXASSERT(matOp == HLMatLoadStoreOpcode::ColMatLoad ||
  4520. matOp == HLMatLoadStoreOpcode::RowMatLoad,
  4521. "No store on cbuffer");
  4522. Type *matType = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx)
  4523. ->getType()
  4524. ->getPointerElementType();
  4525. Value *newLd = TranslateConstBufMatLd(matType, handle, baseOffset,
  4526. colMajor, hlslOP, DL, Builder);
  4527. CI->replaceAllUsesWith(newLd);
  4528. CI->eraseFromParent();
  4529. } else if (group == HLOpcodeGroup::HLSubscript) {
  4530. HLSubscriptOpcode subOp = static_cast<HLSubscriptOpcode>(opcode);
  4531. Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx);
  4532. HLMatrixType MatTy = HLMatrixType::cast(basePtr->getType()->getPointerElementType());
  4533. Type *EltTy = MatTy.getElementTypeForReg();
  4534. Value *EltByteSize = ConstantInt::get(
  4535. baseOffset->getType(), GetEltTypeByteSizeForConstBuf(EltTy, DL));
  4536. Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx);
  4537. Type *resultType = CI->getType()->getPointerElementType();
  4538. unsigned resultSize = 1;
  4539. if (resultType->isVectorTy())
  4540. resultSize = resultType->getVectorNumElements();
  4541. DXASSERT(resultSize <= 16, "up to 4x4 elements in vector or matrix");
  4542. _Analysis_assume_(resultSize <= 16);
  4543. Value *idxList[16];
  4544. switch (subOp) {
  4545. case HLSubscriptOpcode::ColMatSubscript:
  4546. case HLSubscriptOpcode::RowMatSubscript: {
  4547. for (unsigned i = 0; i < resultSize; i++) {
  4548. Value *idx =
  4549. CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx + i);
  4550. Value *offset = Builder.CreateMul(idx, EltByteSize);
  4551. idxList[i] = Builder.CreateAdd(baseOffset, offset);
  4552. }
  4553. } break;
  4554. case HLSubscriptOpcode::RowMatElement:
  4555. case HLSubscriptOpcode::ColMatElement: {
  4556. Constant *EltIdxs = cast<Constant>(idx);
  4557. for (unsigned i = 0; i < resultSize; i++) {
  4558. Value *offset =
  4559. Builder.CreateMul(EltIdxs->getAggregateElement(i), EltByteSize);
  4560. idxList[i] = Builder.CreateAdd(baseOffset, offset);
  4561. }
  4562. } break;
  4563. default:
  4564. DXASSERT(0, "invalid operation on const buffer");
  4565. break;
  4566. }
  4567. Value *ldData = UndefValue::get(resultType);
  4568. if (resultType->isVectorTy()) {
  4569. for (unsigned i = 0; i < resultSize; i++) {
  4570. Value *eltData =
  4571. GenerateCBLoad(handle, idxList[i], EltTy, hlslOP, Builder);
  4572. ldData = Builder.CreateInsertElement(ldData, eltData, i);
  4573. }
  4574. } else {
  4575. ldData = GenerateCBLoad(handle, idxList[0], EltTy, hlslOP, Builder);
  4576. }
  4577. for (auto U = CI->user_begin(); U != CI->user_end();) {
  4578. Value *subsUser = *(U++);
  4579. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(subsUser)) {
  4580. Value *subData = GenerateVecEltFromGEP(ldData, GEP, Builder);
  4581. for (auto gepU = GEP->user_begin(); gepU != GEP->user_end();) {
  4582. Value *gepUser = *(gepU++);
  4583. // Must be load here;
  4584. LoadInst *ldUser = cast<LoadInst>(gepUser);
  4585. ldUser->replaceAllUsesWith(subData);
  4586. ldUser->eraseFromParent();
  4587. }
  4588. GEP->eraseFromParent();
  4589. } else {
  4590. // Must be load here.
  4591. LoadInst *ldUser = cast<LoadInst>(subsUser);
  4592. ldUser->replaceAllUsesWith(ldData);
  4593. ldUser->eraseFromParent();
  4594. }
  4595. }
  4596. CI->eraseFromParent();
  4597. } else {
  4598. DXASSERT(0, "not implemented yet");
  4599. }
  4600. } else if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) {
  4601. Type *Ty = ldInst->getType();
  4602. Type *EltTy = Ty->getScalarType();
  4603. DXASSERT(!Ty->isAggregateType(), "should be flat in previous pass");
  4604. unsigned EltByteSize = GetEltTypeByteSizeForConstBuf(EltTy, DL);
  4605. Value *newLd = GenerateCBLoad(handle, baseOffset, EltTy, hlslOP, Builder);
  4606. if (Ty->isVectorTy()) {
  4607. Value *result = UndefValue::get(Ty);
  4608. result = Builder.CreateInsertElement(result, newLd, (uint64_t)0);
  4609. // Update offset by 4 bytes.
  4610. Value *offset =
  4611. Builder.CreateAdd(baseOffset, hlslOP->GetU32Const(EltByteSize));
  4612. for (unsigned i = 1; i < Ty->getVectorNumElements(); i++) {
  4613. Value *elt = GenerateCBLoad(handle, offset, EltTy, hlslOP, Builder);
  4614. result = Builder.CreateInsertElement(result, elt, i);
  4615. // Update offset by 4 bytes.
  4616. offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(EltByteSize));
  4617. }
  4618. newLd = result;
  4619. }
  4620. ldInst->replaceAllUsesWith(newLd);
  4621. ldInst->eraseFromParent();
  4622. } else {
  4623. // Must be GEP here
  4624. GetElementPtrInst *GEP = cast<GetElementPtrInst>(user);
  4625. TranslateCBGep(GEP, handle, baseOffset, hlslOP, Builder,
  4626. prevFieldAnnotation, DL, dxilTypeSys);
  4627. GEP->eraseFromParent();
  4628. }
  4629. }
  4630. void TranslateCBGep(GetElementPtrInst *GEP, Value *handle, Value *baseOffset,
  4631. hlsl::OP *hlslOP, IRBuilder<> &Builder,
  4632. DxilFieldAnnotation *prevFieldAnnotation,
  4633. const DataLayout &DL, DxilTypeSystem &dxilTypeSys) {
  4634. SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end());
  4635. Value *offset = baseOffset;
  4636. // update offset
  4637. DxilFieldAnnotation *fieldAnnotation = prevFieldAnnotation;
  4638. gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP);
  4639. for (; GEPIt != E; GEPIt++) {
  4640. Value *idx = GEPIt.getOperand();
  4641. unsigned immIdx = 0;
  4642. bool bImmIdx = false;
  4643. if (Constant *constIdx = dyn_cast<Constant>(idx)) {
  4644. immIdx = constIdx->getUniqueInteger().getLimitedValue();
  4645. bImmIdx = true;
  4646. }
  4647. if (GEPIt->isPointerTy()) {
  4648. Type *EltTy = GEPIt->getPointerElementType();
  4649. unsigned size = 0;
  4650. if (StructType *ST = dyn_cast<StructType>(EltTy)) {
  4651. DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST);
  4652. size = annotation->GetCBufferSize();
  4653. } else {
  4654. DXASSERT(fieldAnnotation, "must be a field");
  4655. if (ArrayType *AT = dyn_cast<ArrayType>(EltTy)) {
  4656. unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize(
  4657. *fieldAnnotation, EltTy, dxilTypeSys);
  4658. // Decide the nested array size.
  4659. unsigned nestedArraySize = 1;
  4660. Type *EltTy = AT->getArrayElementType();
  4661. // support multi level of array
  4662. while (EltTy->isArrayTy()) {
  4663. ArrayType *EltAT = cast<ArrayType>(EltTy);
  4664. nestedArraySize *= EltAT->getNumElements();
  4665. EltTy = EltAT->getElementType();
  4666. }
  4667. // Align to 4 * 4 bytes.
  4668. unsigned alignedSize = (EltSize + 15) & 0xfffffff0;
  4669. size = nestedArraySize * alignedSize;
  4670. } else {
  4671. size = DL.getTypeAllocSize(EltTy);
  4672. }
  4673. }
  4674. // Align to 4 * 4 bytes.
  4675. size = (size + 15) & 0xfffffff0;
  4676. if (bImmIdx) {
  4677. unsigned tempOffset = size * immIdx;
  4678. offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(tempOffset));
  4679. } else {
  4680. Value *tempOffset = Builder.CreateMul(idx, hlslOP->GetU32Const(size));
  4681. offset = Builder.CreateAdd(offset, tempOffset);
  4682. }
  4683. } else if (GEPIt->isStructTy()) {
  4684. StructType *ST = cast<StructType>(*GEPIt);
  4685. DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST);
  4686. fieldAnnotation = &annotation->GetFieldAnnotation(immIdx);
  4687. unsigned structOffset = fieldAnnotation->GetCBufferOffset();
  4688. offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(structOffset));
  4689. } else if (GEPIt->isArrayTy()) {
  4690. DXASSERT(fieldAnnotation != nullptr, "must a field");
  4691. unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize(
  4692. *fieldAnnotation, *GEPIt, dxilTypeSys);
  4693. // Decide the nested array size.
  4694. unsigned nestedArraySize = 1;
  4695. Type *EltTy = GEPIt->getArrayElementType();
  4696. // support multi level of array
  4697. while (EltTy->isArrayTy()) {
  4698. ArrayType *EltAT = cast<ArrayType>(EltTy);
  4699. nestedArraySize *= EltAT->getNumElements();
  4700. EltTy = EltAT->getElementType();
  4701. }
  4702. // Align to 4 * 4 bytes.
  4703. unsigned alignedSize = (EltSize + 15) & 0xfffffff0;
  4704. unsigned size = nestedArraySize * alignedSize;
  4705. if (bImmIdx) {
  4706. unsigned tempOffset = size * immIdx;
  4707. offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(tempOffset));
  4708. } else {
  4709. Value *tempOffset = Builder.CreateMul(idx, hlslOP->GetU32Const(size));
  4710. offset = Builder.CreateAdd(offset, tempOffset);
  4711. }
  4712. } else if (GEPIt->isVectorTy()) {
  4713. unsigned size = DL.getTypeAllocSize(GEPIt->getVectorElementType());
  4714. if (bImmIdx) {
  4715. unsigned tempOffset = size * immIdx;
  4716. offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(tempOffset));
  4717. } else {
  4718. Value *tempOffset = Builder.CreateMul(idx, hlslOP->GetU32Const(size));
  4719. offset = Builder.CreateAdd(offset, tempOffset);
  4720. }
  4721. } else {
  4722. gep_type_iterator temp = GEPIt;
  4723. temp++;
  4724. DXASSERT(temp == E, "scalar type must be the last");
  4725. }
  4726. }
  4727. for (auto U = GEP->user_begin(); U != GEP->user_end();) {
  4728. Instruction *user = cast<Instruction>(*(U++));
  4729. TranslateCBAddressUser(user, handle, offset, hlslOP, fieldAnnotation,
  4730. dxilTypeSys, DL);
  4731. }
  4732. }
  4733. void TranslateCBOperations(Value *handle, Value *ptr, Value *offset, OP *hlslOP,
  4734. DxilTypeSystem &dxilTypeSys, const DataLayout &DL) {
  4735. auto User = ptr->user_begin();
  4736. auto UserE = ptr->user_end();
  4737. for (; User != UserE;) {
  4738. // Must be Instruction.
  4739. Instruction *I = cast<Instruction>(*(User++));
  4740. TranslateCBAddressUser(I, handle, offset, hlslOP,
  4741. /*prevFieldAnnotation*/ nullptr, dxilTypeSys, DL);
  4742. }
  4743. }
  4744. Value *GenerateCBLoadLegacy(Value *handle, Value *legacyIdx,
  4745. unsigned channelOffset, Type *EltTy, OP *hlslOP,
  4746. IRBuilder<> &Builder) {
  4747. Constant *OpArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CBufferLoadLegacy);
  4748. DXASSERT(!EltTy->isIntegerTy(1), "Bools should not be loaded as their register representation.");
  4749. Type *doubleTy = Type::getDoubleTy(EltTy->getContext());
  4750. Type *halfTy = Type::getHalfTy(EltTy->getContext());
  4751. Type *i64Ty = Type::getInt64Ty(EltTy->getContext());
  4752. Type *i16Ty = Type::getInt16Ty(EltTy->getContext());
  4753. bool is64 = (EltTy == doubleTy) | (EltTy == i64Ty);
  4754. bool is16 = (EltTy == halfTy || EltTy == i16Ty) && !hlslOP->UseMinPrecision();
  4755. DXASSERT_LOCALVAR(is16, (is16 && channelOffset < 8) || channelOffset < 4,
  4756. "legacy cbuffer don't across 16 bytes register.");
  4757. if (is64) {
  4758. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
  4759. Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
  4760. DXASSERT((channelOffset&1)==0,"channel offset must be even for double");
  4761. unsigned eltIdx = channelOffset>>1;
  4762. Value *Result = Builder.CreateExtractValue(loadLegacy, eltIdx);
  4763. return Result;
  4764. } else {
  4765. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
  4766. Value *loadLegacy = Builder.CreateCall(CBLoad, { OpArg, handle, legacyIdx });
  4767. return Builder.CreateExtractValue(loadLegacy, channelOffset);
  4768. }
  4769. }
  4770. Value *GenerateCBLoadLegacy(Value *handle, Value *legacyIdx,
  4771. unsigned channelOffset, Type *EltTy,
  4772. unsigned vecSize, OP *hlslOP,
  4773. IRBuilder<> &Builder) {
  4774. Constant *OpArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CBufferLoadLegacy);
  4775. DXASSERT(!EltTy->isIntegerTy(1), "Bools should not be loaded as their register representation.");
  4776. Type *doubleTy = Type::getDoubleTy(EltTy->getContext());
  4777. Type *i64Ty = Type::getInt64Ty(EltTy->getContext());
  4778. Type *halfTy = Type::getHalfTy(EltTy->getContext());
  4779. Type *shortTy = Type::getInt16Ty(EltTy->getContext());
  4780. bool is64 = (EltTy == doubleTy) | (EltTy == i64Ty);
  4781. bool is16 = (EltTy == shortTy || EltTy == halfTy) && !hlslOP->UseMinPrecision();
  4782. DXASSERT((is16 && channelOffset + vecSize <= 8) ||
  4783. (channelOffset + vecSize) <= 4,
  4784. "legacy cbuffer don't across 16 bytes register.");
  4785. if (is16) {
  4786. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
  4787. Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
  4788. Value *Result = UndefValue::get(VectorType::get(EltTy, vecSize));
  4789. for (unsigned i = 0; i < vecSize; ++i) {
  4790. Value *NewElt = Builder.CreateExtractValue(loadLegacy, channelOffset + i);
  4791. Result = Builder.CreateInsertElement(Result, NewElt, i);
  4792. }
  4793. return Result;
  4794. } else if (is64) {
  4795. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
  4796. Value *loadLegacy = Builder.CreateCall(CBLoad, { OpArg, handle, legacyIdx });
  4797. Value *Result = UndefValue::get(VectorType::get(EltTy, vecSize));
  4798. unsigned smallVecSize = 2;
  4799. if (vecSize < smallVecSize)
  4800. smallVecSize = vecSize;
  4801. for (unsigned i = 0; i < smallVecSize; ++i) {
  4802. Value *NewElt = Builder.CreateExtractValue(loadLegacy, channelOffset+i);
  4803. Result = Builder.CreateInsertElement(Result, NewElt, i);
  4804. }
  4805. if (vecSize > 2) {
  4806. // Got to next cb register.
  4807. legacyIdx = Builder.CreateAdd(legacyIdx, hlslOP->GetU32Const(1));
  4808. Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
  4809. for (unsigned i = 2; i < vecSize; ++i) {
  4810. Value *NewElt =
  4811. Builder.CreateExtractValue(loadLegacy, i-2);
  4812. Result = Builder.CreateInsertElement(Result, NewElt, i);
  4813. }
  4814. }
  4815. return Result;
  4816. } else {
  4817. Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
  4818. Value *loadLegacy = Builder.CreateCall(CBLoad, { OpArg, handle, legacyIdx });
  4819. Value *Result = UndefValue::get(VectorType::get(EltTy, vecSize));
  4820. for (unsigned i = 0; i < vecSize; ++i) {
  4821. Value *NewElt = Builder.CreateExtractValue(loadLegacy, channelOffset + i);
  4822. Result = Builder.CreateInsertElement(Result, NewElt, i);
  4823. }
  4824. return Result;
  4825. }
  4826. }
  4827. Value *TranslateConstBufMatLdLegacy(HLMatrixType MatTy, Value *handle,
  4828. Value *legacyIdx, bool colMajor, OP *OP,
  4829. bool memElemRepr, const DataLayout &DL,
  4830. IRBuilder<> &Builder) {
  4831. Type *EltTy = MatTy.getElementTypeForMem();
  4832. unsigned matSize = MatTy.getNumElements();
  4833. std::vector<Value *> elts(matSize);
  4834. unsigned EltByteSize = GetEltTypeByteSizeForConstBuf(EltTy, DL);
  4835. if (colMajor) {
  4836. unsigned colByteSize = 4 * EltByteSize;
  4837. unsigned colRegSize = (colByteSize + 15) >> 4;
  4838. for (unsigned c = 0; c < MatTy.getNumColumns(); c++) {
  4839. Value *col = GenerateCBLoadLegacy(handle, legacyIdx, /*channelOffset*/ 0,
  4840. EltTy, MatTy.getNumRows(), OP, Builder);
  4841. for (unsigned r = 0; r < MatTy.getNumRows(); r++) {
  4842. unsigned matIdx = MatTy.getColumnMajorIndex(r, c);
  4843. elts[matIdx] = Builder.CreateExtractElement(col, r);
  4844. }
  4845. // Update offset for a column.
  4846. legacyIdx = Builder.CreateAdd(legacyIdx, OP->GetU32Const(colRegSize));
  4847. }
  4848. } else {
  4849. unsigned rowByteSize = 4 * EltByteSize;
  4850. unsigned rowRegSize = (rowByteSize + 15) >> 4;
  4851. for (unsigned r = 0; r < MatTy.getNumRows(); r++) {
  4852. Value *row = GenerateCBLoadLegacy(handle, legacyIdx, /*channelOffset*/ 0,
  4853. EltTy, MatTy.getNumColumns(), OP, Builder);
  4854. for (unsigned c = 0; c < MatTy.getNumColumns(); c++) {
  4855. unsigned matIdx = MatTy.getRowMajorIndex(r, c);
  4856. elts[matIdx] = Builder.CreateExtractElement(row, c);
  4857. }
  4858. // Update offset for a row.
  4859. legacyIdx = Builder.CreateAdd(legacyIdx, OP->GetU32Const(rowRegSize));
  4860. }
  4861. }
  4862. Value *Vec = HLMatrixLower::BuildVector(EltTy, elts, Builder);
  4863. if (!memElemRepr)
  4864. Vec = MatTy.emitLoweredMemToReg(Vec, Builder);
  4865. return Vec;
  4866. }
  4867. void TranslateCBGepLegacy(GetElementPtrInst *GEP, Value *handle,
  4868. Value *legacyIdx, unsigned channelOffset,
  4869. hlsl::OP *hlslOP, IRBuilder<> &Builder,
  4870. DxilFieldAnnotation *prevFieldAnnotation,
  4871. const DataLayout &DL, DxilTypeSystem &dxilTypeSys,
  4872. HLObjectOperationLowerHelper *pObjHelper);
  4873. void TranslateResourceInCB(LoadInst *LI,
  4874. HLObjectOperationLowerHelper *pObjHelper,
  4875. GlobalVariable *CbGV) {
  4876. if (LI->user_empty()) {
  4877. LI->eraseFromParent();
  4878. return;
  4879. }
  4880. GetElementPtrInst *Ptr = cast<GetElementPtrInst>(LI->getPointerOperand());
  4881. CallInst *CI = cast<CallInst>(LI->user_back());
  4882. MDNode *MD = HLModule::GetDxilResourceAttrib(CI->getCalledFunction());
  4883. Value *ResPtr = pObjHelper->GetOrCreateResourceForCbPtr(Ptr, CbGV, MD);
  4884. // Lower Ptr to GV base Ptr.
  4885. Value *GvPtr = pObjHelper->LowerCbResourcePtr(Ptr, ResPtr);
  4886. IRBuilder<> Builder(LI);
  4887. Value *GvLd = Builder.CreateLoad(GvPtr);
  4888. LI->replaceAllUsesWith(GvLd);
  4889. LI->eraseFromParent();
  4890. }
  4891. void TranslateCBAddressUserLegacy(Instruction *user, Value *handle,
  4892. Value *legacyIdx, unsigned channelOffset,
  4893. hlsl::OP *hlslOP,
  4894. DxilFieldAnnotation *prevFieldAnnotation,
  4895. DxilTypeSystem &dxilTypeSys,
  4896. const DataLayout &DL,
  4897. HLObjectOperationLowerHelper *pObjHelper) {
  4898. IRBuilder<> Builder(user);
  4899. if (CallInst *CI = dyn_cast<CallInst>(user)) {
  4900. HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
  4901. unsigned opcode = GetHLOpcode(CI);
  4902. if (group == HLOpcodeGroup::HLMatLoadStore) {
  4903. HLMatLoadStoreOpcode matOp = static_cast<HLMatLoadStoreOpcode>(opcode);
  4904. bool colMajor = matOp == HLMatLoadStoreOpcode::ColMatLoad;
  4905. DXASSERT(matOp == HLMatLoadStoreOpcode::ColMatLoad ||
  4906. matOp == HLMatLoadStoreOpcode::RowMatLoad,
  4907. "No store on cbuffer");
  4908. HLMatrixType MatTy = HLMatrixType::cast(
  4909. CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx)
  4910. ->getType()->getPointerElementType());
  4911. // This will replace a call, so we should use the register representation of elements
  4912. Value *newLd = TranslateConstBufMatLdLegacy(
  4913. MatTy, handle, legacyIdx, colMajor, hlslOP, /*memElemRepr*/false, DL, Builder);
  4914. CI->replaceAllUsesWith(newLd);
  4915. CI->eraseFromParent();
  4916. } else if (group == HLOpcodeGroup::HLSubscript) {
  4917. HLSubscriptOpcode subOp = static_cast<HLSubscriptOpcode>(opcode);
  4918. Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx);
  4919. HLMatrixType MatTy = HLMatrixType::cast(basePtr->getType()->getPointerElementType());
  4920. Type *EltTy = MatTy.getElementTypeForReg();
  4921. Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx);
  4922. Type *resultType = CI->getType()->getPointerElementType();
  4923. unsigned resultSize = 1;
  4924. if (resultType->isVectorTy())
  4925. resultSize = resultType->getVectorNumElements();
  4926. DXASSERT(resultSize <= 16, "up to 4x4 elements in vector or matrix");
  4927. _Analysis_assume_(resultSize <= 16);
  4928. Value *idxList[16];
  4929. bool colMajor = subOp == HLSubscriptOpcode::ColMatSubscript ||
  4930. subOp == HLSubscriptOpcode::ColMatElement;
  4931. bool dynamicIndexing = !isa<ConstantInt>(idx) &&
  4932. !isa<ConstantAggregateZero>(idx) &&
  4933. !isa<ConstantDataSequential>(idx);
  4934. Value *ldData = UndefValue::get(resultType);
  4935. if (!dynamicIndexing) {
  4936. // This will replace a load or GEP, so we should use the memory representation of elements
  4937. Value *matLd = TranslateConstBufMatLdLegacy(
  4938. MatTy, handle, legacyIdx, colMajor, hlslOP, /*memElemRepr*/true, DL, Builder);
  4939. // The matLd is keep original layout, just use the idx calc in
  4940. // EmitHLSLMatrixElement and EmitHLSLMatrixSubscript.
  4941. switch (subOp) {
  4942. case HLSubscriptOpcode::RowMatSubscript:
  4943. case HLSubscriptOpcode::ColMatSubscript: {
  4944. for (unsigned i = 0; i < resultSize; i++) {
  4945. idxList[i] =
  4946. CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx + i);
  4947. }
  4948. } break;
  4949. case HLSubscriptOpcode::RowMatElement:
  4950. case HLSubscriptOpcode::ColMatElement: {
  4951. Constant *EltIdxs = cast<Constant>(idx);
  4952. for (unsigned i = 0; i < resultSize; i++) {
  4953. idxList[i] = EltIdxs->getAggregateElement(i);
  4954. }
  4955. } break;
  4956. default:
  4957. DXASSERT(0, "invalid operation on const buffer");
  4958. break;
  4959. }
  4960. if (resultType->isVectorTy()) {
  4961. for (unsigned i = 0; i < resultSize; i++) {
  4962. Value *eltData = Builder.CreateExtractElement(matLd, idxList[i]);
  4963. ldData = Builder.CreateInsertElement(ldData, eltData, i);
  4964. }
  4965. } else {
  4966. Value *eltData = Builder.CreateExtractElement(matLd, idxList[0]);
  4967. ldData = eltData;
  4968. }
  4969. } else {
  4970. // Must be matSub here.
  4971. Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx);
  4972. if (colMajor) {
  4973. // idx is c * row + r.
  4974. // For first col, c is 0, so idx is r.
  4975. Value *one = Builder.getInt32(1);
  4976. // row.x = c[0].[idx]
  4977. // row.y = c[1].[idx]
  4978. // row.z = c[2].[idx]
  4979. // row.w = c[3].[idx]
  4980. Value *Elts[4];
  4981. ArrayType *AT = ArrayType::get(EltTy, MatTy.getNumColumns());
  4982. IRBuilder<> AllocaBuilder(user->getParent()
  4983. ->getParent()
  4984. ->getEntryBlock()
  4985. .getFirstInsertionPt());
  4986. Value *tempArray = AllocaBuilder.CreateAlloca(AT);
  4987. Value *zero = AllocaBuilder.getInt32(0);
  4988. Value *cbufIdx = legacyIdx;
  4989. for (unsigned int c = 0; c < MatTy.getNumColumns(); c++) {
  4990. Value *ColVal =
  4991. GenerateCBLoadLegacy(handle, cbufIdx, /*channelOffset*/ 0,
  4992. EltTy, MatTy.getNumRows(), hlslOP, Builder);
  4993. // Convert ColVal to array for indexing.
  4994. for (unsigned int r = 0; r < MatTy.getNumRows(); r++) {
  4995. Value *Elt =
  4996. Builder.CreateExtractElement(ColVal, Builder.getInt32(r));
  4997. Value *Ptr = Builder.CreateInBoundsGEP(
  4998. tempArray, {zero, Builder.getInt32(r)});
  4999. Builder.CreateStore(Elt, Ptr);
  5000. }
  5001. Value *Ptr = Builder.CreateInBoundsGEP(tempArray, {zero, idx});
  5002. Elts[c] = Builder.CreateLoad(Ptr);
  5003. // Update cbufIdx.
  5004. cbufIdx = Builder.CreateAdd(cbufIdx, one);
  5005. }
  5006. if (resultType->isVectorTy()) {
  5007. for (unsigned int c = 0; c < MatTy.getNumColumns(); c++) {
  5008. ldData = Builder.CreateInsertElement(ldData, Elts[c], c);
  5009. }
  5010. } else {
  5011. ldData = Elts[0];
  5012. }
  5013. } else {
  5014. // idx is r * col + c;
  5015. // r = idx / col;
  5016. Value *cCol = ConstantInt::get(idx->getType(), MatTy.getNumColumns());
  5017. idx = Builder.CreateUDiv(idx, cCol);
  5018. idx = Builder.CreateAdd(idx, legacyIdx);
  5019. // Just return a row; 'col' is the number of columns in the row.
  5020. ldData = GenerateCBLoadLegacy(handle, idx, /*channelOffset*/ 0, EltTy,
  5021. MatTy.getNumColumns(), hlslOP, Builder);
  5022. }
  5023. if (!resultType->isVectorTy()) {
  5024. ldData = Builder.CreateExtractElement(ldData, Builder.getInt32(0));
  5025. }
  5026. }
  5027. for (auto U = CI->user_begin(); U != CI->user_end();) {
  5028. Value *subsUser = *(U++);
  5029. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(subsUser)) {
  5030. Value *subData = GenerateVecEltFromGEP(ldData, GEP, Builder);
  5031. for (auto gepU = GEP->user_begin(); gepU != GEP->user_end();) {
  5032. Value *gepUser = *(gepU++);
  5033. // Must be load here;
  5034. LoadInst *ldUser = cast<LoadInst>(gepUser);
  5035. ldUser->replaceAllUsesWith(subData);
  5036. ldUser->eraseFromParent();
  5037. }
  5038. GEP->eraseFromParent();
  5039. } else {
  5040. // Must be load here.
  5041. LoadInst *ldUser = cast<LoadInst>(subsUser);
  5042. ldUser->replaceAllUsesWith(ldData);
  5043. ldUser->eraseFromParent();
  5044. }
  5045. }
  5046. CI->eraseFromParent();
  5047. } else {
  5048. DXASSERT(0, "not implemented yet");
  5049. }
  5050. } else if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) {
  5051. Type *Ty = ldInst->getType();
  5052. Type *EltTy = Ty->getScalarType();
  5053. // Resource inside cbuffer is lowered after GenerateDxilOperations.
  5054. if (dxilutil::IsHLSLObjectType(Ty)) {
  5055. CallInst *CI = cast<CallInst>(handle);
  5056. GlobalVariable *CbGV = cast<GlobalVariable>(
  5057. CI->getArgOperand(HLOperandIndex::kCreateHandleResourceOpIdx));
  5058. TranslateResourceInCB(ldInst, pObjHelper, CbGV);
  5059. return;
  5060. }
  5061. DXASSERT(!Ty->isAggregateType(), "should be flat in previous pass");
  5062. Value *newLd = nullptr;
  5063. if (Ty->isVectorTy())
  5064. newLd = GenerateCBLoadLegacy(handle, legacyIdx, channelOffset, EltTy,
  5065. Ty->getVectorNumElements(), hlslOP, Builder);
  5066. else
  5067. newLd = GenerateCBLoadLegacy(handle, legacyIdx, channelOffset, EltTy,
  5068. hlslOP, Builder);
  5069. ldInst->replaceAllUsesWith(newLd);
  5070. ldInst->eraseFromParent();
  5071. } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(user)) {
  5072. for (auto it = BCI->user_begin(); it != BCI->user_end(); ) {
  5073. Instruction *I = cast<Instruction>(*it++);
  5074. TranslateCBAddressUserLegacy(I,
  5075. handle, legacyIdx, channelOffset, hlslOP,
  5076. prevFieldAnnotation, dxilTypeSys,
  5077. DL, pObjHelper);
  5078. }
  5079. BCI->eraseFromParent();
  5080. } else {
  5081. // Must be GEP here
  5082. GetElementPtrInst *GEP = cast<GetElementPtrInst>(user);
  5083. TranslateCBGepLegacy(GEP, handle, legacyIdx, channelOffset, hlslOP, Builder,
  5084. prevFieldAnnotation, DL, dxilTypeSys, pObjHelper);
  5085. GEP->eraseFromParent();
  5086. }
  5087. }
  5088. void TranslateCBGepLegacy(GetElementPtrInst *GEP, Value *handle,
  5089. Value *legacyIndex, unsigned channel,
  5090. hlsl::OP *hlslOP, IRBuilder<> &Builder,
  5091. DxilFieldAnnotation *prevFieldAnnotation,
  5092. const DataLayout &DL, DxilTypeSystem &dxilTypeSys,
  5093. HLObjectOperationLowerHelper *pObjHelper) {
  5094. SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end());
  5095. // update offset
  5096. DxilFieldAnnotation *fieldAnnotation = prevFieldAnnotation;
  5097. gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP);
  5098. for (; GEPIt != E; GEPIt++) {
  5099. Value *idx = GEPIt.getOperand();
  5100. unsigned immIdx = 0;
  5101. bool bImmIdx = false;
  5102. if (Constant *constIdx = dyn_cast<Constant>(idx)) {
  5103. immIdx = constIdx->getUniqueInteger().getLimitedValue();
  5104. bImmIdx = true;
  5105. }
  5106. if (GEPIt->isPointerTy()) {
  5107. Type *EltTy = GEPIt->getPointerElementType();
  5108. unsigned size = 0;
  5109. if (StructType *ST = dyn_cast<StructType>(EltTy)) {
  5110. DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST);
  5111. size = annotation->GetCBufferSize();
  5112. } else {
  5113. DXASSERT(fieldAnnotation, "must be a field");
  5114. if (ArrayType *AT = dyn_cast<ArrayType>(EltTy)) {
  5115. unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize(
  5116. *fieldAnnotation, EltTy, dxilTypeSys);
  5117. // Decide the nested array size.
  5118. unsigned nestedArraySize = 1;
  5119. Type *EltTy = AT->getArrayElementType();
  5120. // support multi level of array
  5121. while (EltTy->isArrayTy()) {
  5122. ArrayType *EltAT = cast<ArrayType>(EltTy);
  5123. nestedArraySize *= EltAT->getNumElements();
  5124. EltTy = EltAT->getElementType();
  5125. }
  5126. // Align to 4 * 4 bytes.
  5127. unsigned alignedSize = (EltSize + 15) & 0xfffffff0;
  5128. size = nestedArraySize * alignedSize;
  5129. } else {
  5130. size = DL.getTypeAllocSize(EltTy);
  5131. }
  5132. }
  5133. // Skip 0 idx.
  5134. if (bImmIdx && immIdx == 0)
  5135. continue;
  5136. // Align to 4 * 4 bytes.
  5137. size = (size + 15) & 0xfffffff0;
  5138. // Take this as array idxing.
  5139. if (bImmIdx) {
  5140. unsigned tempOffset = size * immIdx;
  5141. unsigned idxInc = tempOffset >> 4;
  5142. legacyIndex = Builder.CreateAdd(legacyIndex, hlslOP->GetU32Const(idxInc));
  5143. } else {
  5144. Value *idxInc = Builder.CreateMul(idx, hlslOP->GetU32Const(size>>4));
  5145. legacyIndex = Builder.CreateAdd(legacyIndex, idxInc);
  5146. }
  5147. // Array always start from x channel.
  5148. channel = 0;
  5149. } else if (GEPIt->isStructTy()) {
  5150. StructType *ST = cast<StructType>(*GEPIt);
  5151. DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST);
  5152. fieldAnnotation = &annotation->GetFieldAnnotation(immIdx);
  5153. unsigned idxInc = 0;
  5154. unsigned structOffset = 0;
  5155. if (fieldAnnotation->GetCompType().Is16Bit() &&
  5156. !hlslOP->UseMinPrecision()) {
  5157. structOffset = fieldAnnotation->GetCBufferOffset() >> 1;
  5158. channel += structOffset;
  5159. idxInc = channel >> 3;
  5160. channel = channel & 0x7;
  5161. }
  5162. else {
  5163. structOffset = fieldAnnotation->GetCBufferOffset() >> 2;
  5164. channel += structOffset;
  5165. idxInc = channel >> 2;
  5166. channel = channel & 0x3;
  5167. }
  5168. if (idxInc)
  5169. legacyIndex = Builder.CreateAdd(legacyIndex, hlslOP->GetU32Const(idxInc));
  5170. } else if (GEPIt->isArrayTy()) {
  5171. DXASSERT(fieldAnnotation != nullptr, "must a field");
  5172. unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize(
  5173. *fieldAnnotation, *GEPIt, dxilTypeSys);
  5174. // Decide the nested array size.
  5175. unsigned nestedArraySize = 1;
  5176. Type *EltTy = GEPIt->getArrayElementType();
  5177. // support multi level of array
  5178. while (EltTy->isArrayTy()) {
  5179. ArrayType *EltAT = cast<ArrayType>(EltTy);
  5180. nestedArraySize *= EltAT->getNumElements();
  5181. EltTy = EltAT->getElementType();
  5182. }
  5183. // Align to 4 * 4 bytes.
  5184. unsigned alignedSize = (EltSize + 15) & 0xfffffff0;
  5185. unsigned size = nestedArraySize * alignedSize;
  5186. if (bImmIdx) {
  5187. unsigned tempOffset = size * immIdx;
  5188. unsigned idxInc = tempOffset >> 4;
  5189. legacyIndex = Builder.CreateAdd(legacyIndex, hlslOP->GetU32Const(idxInc));
  5190. } else {
  5191. Value *idxInc = Builder.CreateMul(idx, hlslOP->GetU32Const(size>>4));
  5192. legacyIndex = Builder.CreateAdd(legacyIndex, idxInc);
  5193. }
  5194. // Array always start from x channel.
  5195. channel = 0;
  5196. } else if (GEPIt->isVectorTy()) {
  5197. unsigned size = DL.getTypeAllocSize(GEPIt->getVectorElementType());
  5198. // Indexing on vector.
  5199. if (bImmIdx) {
  5200. unsigned tempOffset = size * immIdx;
  5201. if (size == 2) { // 16-bit types
  5202. unsigned channelInc = tempOffset >> 1;
  5203. DXASSERT((channel + channelInc) <= 8, "vector should not cross cb register (8x16bit)");
  5204. channel += channelInc;
  5205. if (channel == 8) {
  5206. // Get to another row.
  5207. // Update index and channel.
  5208. channel = 0;
  5209. legacyIndex = Builder.CreateAdd(legacyIndex, Builder.getInt32(1));
  5210. }
  5211. }
  5212. else {
  5213. unsigned channelInc = tempOffset >> 2;
  5214. DXASSERT((channel + channelInc) <= 4, "vector should not cross cb register (8x32bit)");
  5215. channel += channelInc;
  5216. if (channel == 4) {
  5217. // Get to another row.
  5218. // Update index and channel.
  5219. channel = 0;
  5220. legacyIndex = Builder.CreateAdd(legacyIndex, Builder.getInt32(1));
  5221. }
  5222. }
  5223. } else {
  5224. Type *EltTy = GEPIt->getVectorElementType();
  5225. // Load the whole register.
  5226. Value *newLd = GenerateCBLoadLegacy(handle, legacyIndex,
  5227. /*channelOffset*/ 0, EltTy,
  5228. /*vecSize*/ 4, hlslOP, Builder);
  5229. // Copy to array.
  5230. IRBuilder<> AllocaBuilder(GEP->getParent()->getParent()->getEntryBlock().getFirstInsertionPt());
  5231. Value *tempArray = AllocaBuilder.CreateAlloca(ArrayType::get(EltTy, 4));
  5232. Value *zeroIdx = hlslOP->GetU32Const(0);
  5233. for (unsigned i = 0; i < 4; i++) {
  5234. Value *Elt = Builder.CreateExtractElement(newLd, i);
  5235. Value *EltGEP = Builder.CreateInBoundsGEP(tempArray, {zeroIdx, hlslOP->GetU32Const(i)});
  5236. Builder.CreateStore(Elt, EltGEP);
  5237. }
  5238. // Make sure this is the end of GEP.
  5239. gep_type_iterator temp = GEPIt;
  5240. temp++;
  5241. DXASSERT(temp == E, "scalar type must be the last");
  5242. // Replace the GEP with array GEP.
  5243. Value *ArrayGEP = Builder.CreateInBoundsGEP(tempArray, {zeroIdx, idx});
  5244. GEP->replaceAllUsesWith(ArrayGEP);
  5245. return;
  5246. }
  5247. } else {
  5248. gep_type_iterator temp = GEPIt;
  5249. temp++;
  5250. DXASSERT(temp == E, "scalar type must be the last");
  5251. }
  5252. }
  5253. for (auto U = GEP->user_begin(); U != GEP->user_end();) {
  5254. Instruction *user = cast<Instruction>(*(U++));
  5255. TranslateCBAddressUserLegacy(user, handle, legacyIndex, channel, hlslOP, fieldAnnotation,
  5256. dxilTypeSys, DL, pObjHelper);
  5257. }
  5258. }
  5259. void TranslateCBOperationsLegacy(Value *handle, Value *ptr, OP *hlslOP,
  5260. DxilTypeSystem &dxilTypeSys,
  5261. const DataLayout &DL,
  5262. HLObjectOperationLowerHelper *pObjHelper) {
  5263. auto User = ptr->user_begin();
  5264. auto UserE = ptr->user_end();
  5265. Value *zeroIdx = hlslOP->GetU32Const(0);
  5266. for (; User != UserE;) {
  5267. // Must be Instruction.
  5268. Instruction *I = cast<Instruction>(*(User++));
  5269. TranslateCBAddressUserLegacy(
  5270. I, handle, zeroIdx, /*channelOffset*/ 0, hlslOP,
  5271. /*prevFieldAnnotation*/ nullptr, dxilTypeSys, DL, pObjHelper);
  5272. }
  5273. }
  5274. }
  5275. // Structured buffer.
  5276. namespace {
  5277. // Calculate offset.
  5278. Value *GEPIdxToOffset(GetElementPtrInst *GEP, IRBuilder<> &Builder,
  5279. hlsl::OP *OP, const DataLayout &DL) {
  5280. SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end());
  5281. Value *addr = nullptr;
  5282. // update offset
  5283. if (GEP->hasAllConstantIndices()) {
  5284. unsigned gepOffset =
  5285. DL.getIndexedOffset(GEP->getPointerOperandType(), Indices);
  5286. addr = OP->GetU32Const(gepOffset);
  5287. } else {
  5288. Value *offset = OP->GetU32Const(0);
  5289. gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP);
  5290. for (; GEPIt != E; GEPIt++) {
  5291. Value *idx = GEPIt.getOperand();
  5292. unsigned immIdx = 0;
  5293. if (llvm::Constant *constIdx = dyn_cast<llvm::Constant>(idx)) {
  5294. immIdx = constIdx->getUniqueInteger().getLimitedValue();
  5295. if (immIdx == 0) {
  5296. continue;
  5297. }
  5298. }
  5299. if (GEPIt->isPointerTy() || GEPIt->isArrayTy() || GEPIt->isVectorTy()) {
  5300. unsigned size = DL.getTypeAllocSize(GEPIt->getSequentialElementType());
  5301. if (immIdx) {
  5302. unsigned tempOffset = size * immIdx;
  5303. offset = Builder.CreateAdd(offset, OP->GetU32Const(tempOffset));
  5304. } else {
  5305. Value *tempOffset = Builder.CreateMul(idx, OP->GetU32Const(size));
  5306. offset = Builder.CreateAdd(offset, tempOffset);
  5307. }
  5308. } else if (GEPIt->isStructTy()) {
  5309. const StructLayout *Layout = DL.getStructLayout(cast<StructType>(*GEPIt));
  5310. unsigned structOffset = Layout->getElementOffset(immIdx);
  5311. offset = Builder.CreateAdd(offset, OP->GetU32Const(structOffset));
  5312. } else {
  5313. gep_type_iterator temp = GEPIt;
  5314. temp++;
  5315. DXASSERT(temp == E, "scalar type must be the last");
  5316. }
  5317. };
  5318. addr = offset;
  5319. }
  5320. // TODO: x4 for byte address
  5321. return addr;
  5322. }
  5323. void GenerateStructBufLd(Value *handle, Value *bufIdx, Value *offset,
  5324. Value *status, Type *EltTy,
  5325. MutableArrayRef<Value *> resultElts, hlsl::OP *OP,
  5326. IRBuilder<> &Builder, unsigned NumComponents, Constant *alignment) {
  5327. OP::OpCode opcode = OP::OpCode::RawBufferLoad;
  5328. DXASSERT(resultElts.size() <= 4,
  5329. "buffer load cannot load more than 4 values");
  5330. Function *dxilF = OP->GetOpFunc(opcode, EltTy);
  5331. Constant *mask = GetRawBufferMaskForETy(EltTy, NumComponents, OP);
  5332. Value *Args[] = {OP->GetU32Const((unsigned)opcode),
  5333. handle,
  5334. bufIdx,
  5335. offset,
  5336. mask,
  5337. alignment};
  5338. Value *Ld = Builder.CreateCall(dxilF, Args, OP::GetOpCodeName(opcode));
  5339. for (unsigned i = 0; i < resultElts.size(); i++) {
  5340. resultElts[i] = Builder.CreateExtractValue(Ld, i);
  5341. }
  5342. // status
  5343. UpdateStatus(Ld, status, Builder, OP);
  5344. return;
  5345. }
  5346. void GenerateStructBufSt(Value *handle, Value *bufIdx, Value *offset,
  5347. Type *EltTy, hlsl::OP *OP, IRBuilder<> &Builder,
  5348. ArrayRef<Value *> vals, uint8_t mask, Constant *alignment) {
  5349. OP::OpCode opcode = OP::OpCode::RawBufferStore;
  5350. DXASSERT(vals.size() == 4, "buffer store need 4 values");
  5351. Value *Args[] = {OP->GetU32Const((unsigned)opcode),
  5352. handle,
  5353. bufIdx,
  5354. offset,
  5355. vals[0],
  5356. vals[1],
  5357. vals[2],
  5358. vals[3],
  5359. OP->GetU8Const(mask),
  5360. alignment};
  5361. Function *dxilF = OP->GetOpFunc(opcode, EltTy);
  5362. Builder.CreateCall(dxilF, Args);
  5363. }
  5364. Value *TranslateStructBufMatLd(Type *matType, IRBuilder<> &Builder,
  5365. Value *handle, hlsl::OP *OP, Value *status,
  5366. Value *bufIdx, Value *baseOffset,
  5367. const DataLayout &DL) {
  5368. HLMatrixType MatTy = HLMatrixType::cast(matType);
  5369. Type *EltTy = MatTy.getElementTypeForMem();
  5370. unsigned EltSize = DL.getTypeAllocSize(EltTy);
  5371. Constant* alignment = OP->GetI32Const(EltSize);
  5372. Value *offset = baseOffset;
  5373. if (baseOffset == nullptr)
  5374. offset = OP->GetU32Const(0);
  5375. unsigned matSize = MatTy.getNumElements();
  5376. std::vector<Value *> elts(matSize);
  5377. unsigned rest = (matSize % 4);
  5378. if (rest) {
  5379. Value *ResultElts[4];
  5380. GenerateStructBufLd(handle, bufIdx, offset, status, EltTy, ResultElts, OP, Builder, 3, alignment);
  5381. for (unsigned i = 0; i < rest; i++)
  5382. elts[i] = ResultElts[i];
  5383. offset = Builder.CreateAdd(offset, OP->GetU32Const(EltSize * rest));
  5384. }
  5385. for (unsigned i = rest; i < matSize; i += 4) {
  5386. Value *ResultElts[4];
  5387. GenerateStructBufLd(handle, bufIdx, offset, status, EltTy, ResultElts, OP, Builder, 4, alignment);
  5388. elts[i] = ResultElts[0];
  5389. elts[i + 1] = ResultElts[1];
  5390. elts[i + 2] = ResultElts[2];
  5391. elts[i + 3] = ResultElts[3];
  5392. // Update offset by 4*4bytes.
  5393. offset = Builder.CreateAdd(offset, OP->GetU32Const(4 * EltSize));
  5394. }
  5395. Value *Vec = HLMatrixLower::BuildVector(EltTy, elts, Builder);
  5396. Vec = MatTy.emitLoweredMemToReg(Vec, Builder);
  5397. return Vec;
  5398. }
  5399. void TranslateStructBufMatSt(Type *matType, IRBuilder<> &Builder, Value *handle,
  5400. hlsl::OP *OP, Value *bufIdx, Value *baseOffset,
  5401. Value *val, const DataLayout &DL) {
  5402. HLMatrixType MatTy = HLMatrixType::cast(matType);
  5403. Type *EltTy = MatTy.getElementTypeForMem();
  5404. val = MatTy.emitLoweredRegToMem(val, Builder);
  5405. unsigned EltSize = DL.getTypeAllocSize(EltTy);
  5406. Constant *Alignment = OP->GetI32Const(EltSize);
  5407. Value *offset = baseOffset;
  5408. if (baseOffset == nullptr)
  5409. offset = OP->GetU32Const(0);
  5410. unsigned matSize = MatTy.getNumElements();
  5411. Value *undefElt = UndefValue::get(EltTy);
  5412. unsigned storeSize = matSize;
  5413. if (matSize % 4) {
  5414. storeSize = matSize + 4 - (matSize & 3);
  5415. }
  5416. std::vector<Value *> elts(storeSize, undefElt);
  5417. for (unsigned i = 0; i < matSize; i++)
  5418. elts[i] = Builder.CreateExtractElement(val, i);
  5419. for (unsigned i = 0; i < matSize; i += 4) {
  5420. uint8_t mask = 0;
  5421. for (unsigned j = 0; j < 4 && (i+j) < matSize; j++) {
  5422. if (elts[i+j] != undefElt)
  5423. mask |= (1<<j);
  5424. }
  5425. GenerateStructBufSt(handle, bufIdx, offset, EltTy, OP, Builder,
  5426. {elts[i], elts[i + 1], elts[i + 2], elts[i + 3]}, mask,
  5427. Alignment);
  5428. // Update offset by 4*4bytes.
  5429. offset = Builder.CreateAdd(offset, OP->GetU32Const(4 * EltSize));
  5430. }
  5431. }
  5432. void TranslateStructBufMatLdSt(CallInst *CI, Value *handle, hlsl::OP *OP,
  5433. Value *status, Value *bufIdx,
  5434. Value *baseOffset, const DataLayout &DL) {
  5435. IRBuilder<> Builder(CI);
  5436. HLOpcodeGroup group = hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction());
  5437. unsigned opcode = GetHLOpcode(CI);
  5438. DXASSERT_LOCALVAR(group, group == HLOpcodeGroup::HLMatLoadStore,
  5439. "only translate matrix loadStore here.");
  5440. HLMatLoadStoreOpcode matOp = static_cast<HLMatLoadStoreOpcode>(opcode);
  5441. // Due to the current way the initial codegen generates matrix
  5442. // orientation casts, the in-register vector matrix has already been
  5443. // reordered based on the destination's row or column-major packing orientation.
  5444. switch (matOp) {
  5445. case HLMatLoadStoreOpcode::RowMatLoad:
  5446. case HLMatLoadStoreOpcode::ColMatLoad: {
  5447. Value *ptr = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx);
  5448. Value *NewLd = TranslateStructBufMatLd(
  5449. ptr->getType()->getPointerElementType(), Builder, handle, OP, status,
  5450. bufIdx, baseOffset, DL);
  5451. CI->replaceAllUsesWith(NewLd);
  5452. } break;
  5453. case HLMatLoadStoreOpcode::RowMatStore:
  5454. case HLMatLoadStoreOpcode::ColMatStore: {
  5455. Value *ptr = CI->getArgOperand(HLOperandIndex::kMatStoreDstPtrOpIdx);
  5456. Value *val = CI->getArgOperand(HLOperandIndex::kMatStoreValOpIdx);
  5457. TranslateStructBufMatSt(ptr->getType()->getPointerElementType(), Builder,
  5458. handle, OP, bufIdx, baseOffset, val,
  5459. DL);
  5460. } break;
  5461. }
  5462. CI->eraseFromParent();
  5463. }
  5464. void TranslateStructBufSubscriptUser(Instruction *user, Value *handle,
  5465. Value *bufIdx, Value *baseOffset,
  5466. Value *status, hlsl::OP *OP, const DataLayout &DL);
  5467. // For case like mat[i][j].
  5468. // IdxList is [i][0], [i][1], [i][2],[i][3].
  5469. // Idx is j.
  5470. // return [i][j] not mat[i][j] because resource ptr and temp ptr need different
  5471. // code gen.
  5472. static Value *LowerGEPOnMatIndexListToIndex(
  5473. llvm::GetElementPtrInst *GEP, ArrayRef<Value *> IdxList) {
  5474. IRBuilder<> Builder(GEP);
  5475. Value *zero = Builder.getInt32(0);
  5476. DXASSERT(GEP->getNumIndices() == 2, "must have 2 level");
  5477. Value *baseIdx = (GEP->idx_begin())->get();
  5478. DXASSERT_LOCALVAR(baseIdx, baseIdx == zero, "base index must be 0");
  5479. Value *Idx = (GEP->idx_begin() + 1)->get();
  5480. if (ConstantInt *immIdx = dyn_cast<ConstantInt>(Idx)) {
  5481. return IdxList[immIdx->getSExtValue()];
  5482. }
  5483. else {
  5484. IRBuilder<> AllocaBuilder(
  5485. GEP->getParent()->getParent()->getEntryBlock().getFirstInsertionPt());
  5486. unsigned size = IdxList.size();
  5487. // Store idxList to temp array.
  5488. ArrayType *AT = ArrayType::get(IdxList[0]->getType(), size);
  5489. Value *tempArray = AllocaBuilder.CreateAlloca(AT);
  5490. for (unsigned i = 0; i < size; i++) {
  5491. Value *EltPtr = Builder.CreateGEP(tempArray, { zero, Builder.getInt32(i) });
  5492. Builder.CreateStore(IdxList[i], EltPtr);
  5493. }
  5494. // Load the idx.
  5495. Value *GEPOffset = Builder.CreateGEP(tempArray, { zero, Idx });
  5496. return Builder.CreateLoad(GEPOffset);
  5497. }
  5498. }
  5499. // subscript operator for matrix of struct element.
  5500. void TranslateStructBufMatSubscript(CallInst *CI, Value *handle,
  5501. hlsl::OP *hlslOP, Value *bufIdx,
  5502. Value *baseOffset, Value *status,
  5503. const DataLayout &DL) {
  5504. Value *zeroIdx = hlslOP->GetU32Const(0);
  5505. if (baseOffset == nullptr)
  5506. baseOffset = zeroIdx;
  5507. unsigned opcode = GetHLOpcode(CI);
  5508. IRBuilder<> subBuilder(CI);
  5509. HLSubscriptOpcode subOp = static_cast<HLSubscriptOpcode>(opcode);
  5510. Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx);
  5511. HLMatrixType MatTy = HLMatrixType::cast(basePtr->getType()->getPointerElementType());
  5512. Type *EltTy = MatTy.getElementTypeForReg();
  5513. Constant *alignment = hlslOP->GetI32Const(DL.getTypeAllocSize(EltTy));
  5514. Value *EltByteSize = ConstantInt::get(
  5515. baseOffset->getType(), GetEltTypeByteSizeForConstBuf(EltTy, DL));
  5516. Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx);
  5517. Type *resultType = CI->getType()->getPointerElementType();
  5518. unsigned resultSize = 1;
  5519. if (resultType->isVectorTy())
  5520. resultSize = resultType->getVectorNumElements();
  5521. DXASSERT(resultSize <= 16, "up to 4x4 elements in vector or matrix");
  5522. _Analysis_assume_(resultSize <= 16);
  5523. std::vector<Value *> idxList(resultSize);
  5524. switch (subOp) {
  5525. case HLSubscriptOpcode::ColMatSubscript:
  5526. case HLSubscriptOpcode::RowMatSubscript: {
  5527. for (unsigned i = 0; i < resultSize; i++) {
  5528. Value *offset =
  5529. CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx + i);
  5530. offset = subBuilder.CreateMul(offset, EltByteSize);
  5531. idxList[i] = subBuilder.CreateAdd(baseOffset, offset);
  5532. }
  5533. } break;
  5534. case HLSubscriptOpcode::RowMatElement:
  5535. case HLSubscriptOpcode::ColMatElement: {
  5536. Constant *EltIdxs = cast<Constant>(idx);
  5537. for (unsigned i = 0; i < resultSize; i++) {
  5538. Value *offset =
  5539. subBuilder.CreateMul(EltIdxs->getAggregateElement(i), EltByteSize);
  5540. idxList[i] = subBuilder.CreateAdd(baseOffset, offset);
  5541. }
  5542. } break;
  5543. default:
  5544. DXASSERT(0, "invalid operation on const buffer");
  5545. break;
  5546. }
  5547. Value *undefElt = UndefValue::get(EltTy);
  5548. for (auto U = CI->user_begin(); U != CI->user_end();) {
  5549. Value *subsUser = *(U++);
  5550. if (resultSize == 1) {
  5551. TranslateStructBufSubscriptUser(cast<Instruction>(subsUser), handle,
  5552. bufIdx, idxList[0], status, hlslOP, DL);
  5553. continue;
  5554. }
  5555. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(subsUser)) {
  5556. Value *GEPOffset = LowerGEPOnMatIndexListToIndex(GEP, idxList);
  5557. for (auto gepU = GEP->user_begin(); gepU != GEP->user_end();) {
  5558. Instruction *gepUserInst = cast<Instruction>(*(gepU++));
  5559. TranslateStructBufSubscriptUser(gepUserInst, handle, bufIdx, GEPOffset,
  5560. status, hlslOP, DL);
  5561. }
  5562. GEP->eraseFromParent();
  5563. } else if (StoreInst *stUser = dyn_cast<StoreInst>(subsUser)) {
  5564. IRBuilder<> stBuilder(stUser);
  5565. Value *Val = stUser->getValueOperand();
  5566. if (Val->getType()->isVectorTy()) {
  5567. for (unsigned i = 0; i < resultSize; i++) {
  5568. Value *EltVal = stBuilder.CreateExtractElement(Val, i);
  5569. uint8_t mask = DXIL::kCompMask_X;
  5570. GenerateStructBufSt(handle, bufIdx, idxList[i], EltTy, hlslOP,
  5571. stBuilder, {EltVal, undefElt, undefElt, undefElt},
  5572. mask, alignment);
  5573. }
  5574. } else {
  5575. uint8_t mask = DXIL::kCompMask_X;
  5576. GenerateStructBufSt(handle, bufIdx, idxList[0], EltTy, hlslOP,
  5577. stBuilder, {Val, undefElt, undefElt, undefElt},
  5578. mask, alignment);
  5579. }
  5580. stUser->eraseFromParent();
  5581. } else {
  5582. // Must be load here.
  5583. LoadInst *ldUser = cast<LoadInst>(subsUser);
  5584. IRBuilder<> ldBuilder(ldUser);
  5585. Value *ldData = UndefValue::get(resultType);
  5586. if (resultType->isVectorTy()) {
  5587. for (unsigned i = 0; i < resultSize; i++) {
  5588. Value *ResultElt;
  5589. // TODO: This can be inefficient for row major matrix load
  5590. GenerateStructBufLd(handle, bufIdx, idxList[i],
  5591. /*status*/ nullptr, EltTy, ResultElt, hlslOP,
  5592. ldBuilder, 1, alignment);
  5593. ldData = ldBuilder.CreateInsertElement(ldData, ResultElt, i);
  5594. }
  5595. } else {
  5596. GenerateStructBufLd(handle, bufIdx, idxList[0], /*status*/ nullptr,
  5597. EltTy, ldData, hlslOP, ldBuilder, 4, alignment);
  5598. }
  5599. ldUser->replaceAllUsesWith(ldData);
  5600. ldUser->eraseFromParent();
  5601. }
  5602. }
  5603. CI->eraseFromParent();
  5604. }
  5605. void TranslateStructBufSubscriptUser(Instruction *user, Value *handle,
  5606. Value *bufIdx, Value *baseOffset,
  5607. Value *status, hlsl::OP *OP, const DataLayout &DL) {
  5608. IRBuilder<> Builder(user);
  5609. if (CallInst *userCall = dyn_cast<CallInst>(user)) {
  5610. HLOpcodeGroup group = // user call?
  5611. hlsl::GetHLOpcodeGroupByName(userCall->getCalledFunction());
  5612. unsigned opcode = GetHLOpcode(userCall);
  5613. // For case element type of structure buffer is not structure type.
  5614. if (baseOffset == nullptr)
  5615. baseOffset = OP->GetU32Const(0);
  5616. if (group == HLOpcodeGroup::HLIntrinsic) {
  5617. IntrinsicOp IOP = static_cast<IntrinsicOp>(opcode);
  5618. switch (IOP) {
  5619. case IntrinsicOp::MOP_Load: {
  5620. if (userCall->getType()->isPointerTy()) {
  5621. // Struct will return pointers which like []
  5622. } else {
  5623. // Use builtin types on structuredBuffer.
  5624. }
  5625. DXASSERT(0, "not implement yet");
  5626. } break;
  5627. case IntrinsicOp::IOP_InterlockedAdd: {
  5628. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  5629. baseOffset);
  5630. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Add,
  5631. Builder, OP);
  5632. } break;
  5633. case IntrinsicOp::IOP_InterlockedAnd: {
  5634. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  5635. baseOffset);
  5636. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::And,
  5637. Builder, OP);
  5638. } break;
  5639. case IntrinsicOp::IOP_InterlockedExchange: {
  5640. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  5641. baseOffset);
  5642. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Exchange,
  5643. Builder, OP);
  5644. } break;
  5645. case IntrinsicOp::IOP_InterlockedMax: {
  5646. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  5647. baseOffset);
  5648. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMax,
  5649. Builder, OP);
  5650. } break;
  5651. case IntrinsicOp::IOP_InterlockedMin: {
  5652. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  5653. baseOffset);
  5654. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMin,
  5655. Builder, OP);
  5656. } break;
  5657. case IntrinsicOp::IOP_InterlockedUMax: {
  5658. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  5659. baseOffset);
  5660. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMax,
  5661. Builder, OP);
  5662. } break;
  5663. case IntrinsicOp::IOP_InterlockedUMin: {
  5664. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  5665. baseOffset);
  5666. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMin,
  5667. Builder, OP);
  5668. } break;
  5669. case IntrinsicOp::IOP_InterlockedOr: {
  5670. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  5671. baseOffset);
  5672. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Or,
  5673. Builder, OP);
  5674. } break;
  5675. case IntrinsicOp::IOP_InterlockedXor: {
  5676. AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
  5677. baseOffset);
  5678. TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Xor,
  5679. Builder, OP);
  5680. } break;
  5681. case IntrinsicOp::IOP_InterlockedCompareStore:
  5682. case IntrinsicOp::IOP_InterlockedCompareExchange: {
  5683. AtomicHelper helper(userCall, DXIL::OpCode::AtomicCompareExchange,
  5684. handle, bufIdx, baseOffset);
  5685. TranslateAtomicCmpXChg(helper, Builder, OP);
  5686. } break;
  5687. default:
  5688. DXASSERT(0, "invalid opcode");
  5689. break;
  5690. }
  5691. userCall->eraseFromParent();
  5692. } else if (group == HLOpcodeGroup::HLMatLoadStore)
  5693. TranslateStructBufMatLdSt(userCall, handle, OP, status, bufIdx,
  5694. baseOffset, DL);
  5695. else if (group == HLOpcodeGroup::HLSubscript) {
  5696. TranslateStructBufMatSubscript(userCall, handle, OP, bufIdx, baseOffset, status, DL);
  5697. }
  5698. } else if (isa<LoadInst>(user) || isa<StoreInst>(user)) {
  5699. LoadInst *ldInst = dyn_cast<LoadInst>(user);
  5700. StoreInst *stInst = dyn_cast<StoreInst>(user);
  5701. Type *Ty = isa<LoadInst>(user) ? ldInst->getType()
  5702. : stInst->getValueOperand()->getType();
  5703. Type *pOverloadTy = Ty->getScalarType();
  5704. Value *offset = baseOffset;
  5705. if (baseOffset == nullptr)
  5706. offset = OP->GetU32Const(0);
  5707. unsigned arraySize = 1;
  5708. Value *eltSize = nullptr;
  5709. if (pOverloadTy->isArrayTy()) {
  5710. arraySize = pOverloadTy->getArrayNumElements();
  5711. eltSize = OP->GetU32Const(
  5712. DL.getTypeAllocSize(pOverloadTy->getArrayElementType()));
  5713. pOverloadTy = pOverloadTy->getArrayElementType()->getScalarType();
  5714. }
  5715. if (ldInst) {
  5716. auto LdElement = [&](Value *offset, IRBuilder<> &Builder) -> Value * {
  5717. Value *ResultElts[4];
  5718. unsigned numComponents = 0;
  5719. if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
  5720. numComponents = VTy->getNumElements();
  5721. }
  5722. else {
  5723. numComponents = 1;
  5724. }
  5725. Constant *alignment =
  5726. OP->GetI32Const(DL.getTypeAllocSize(Ty->getScalarType()));
  5727. GenerateStructBufLd(handle, bufIdx, offset, status, pOverloadTy,
  5728. ResultElts, OP, Builder, numComponents, alignment);
  5729. return ScalarizeElements(Ty, ResultElts, Builder);
  5730. };
  5731. Value *newLd = LdElement(offset, Builder);
  5732. if (arraySize > 1) {
  5733. newLd =
  5734. Builder.CreateInsertValue(UndefValue::get(Ty), newLd, (uint64_t)0);
  5735. for (unsigned i = 1; i < arraySize; i++) {
  5736. offset = Builder.CreateAdd(offset, eltSize);
  5737. Value *eltLd = LdElement(offset, Builder);
  5738. newLd = Builder.CreateInsertValue(newLd, eltLd, i);
  5739. }
  5740. }
  5741. ldInst->replaceAllUsesWith(newLd);
  5742. } else {
  5743. Value *val = stInst->getValueOperand();
  5744. auto StElement = [&](Value *offset, Value *val, IRBuilder<> &Builder) {
  5745. Value *undefVal = llvm::UndefValue::get(pOverloadTy);
  5746. Value *vals[] = {undefVal, undefVal, undefVal, undefVal};
  5747. uint8_t mask = 0;
  5748. if (Ty->isVectorTy()) {
  5749. unsigned vectorNumElements = Ty->getVectorNumElements();
  5750. DXASSERT(vectorNumElements <= 4, "up to 4 elements in vector");
  5751. _Analysis_assume_(vectorNumElements <= 4);
  5752. for (unsigned i = 0; i < vectorNumElements; i++) {
  5753. vals[i] = Builder.CreateExtractElement(val, i);
  5754. mask |= (1<<i);
  5755. }
  5756. } else {
  5757. vals[0] = val;
  5758. mask = DXIL::kCompMask_X;
  5759. }
  5760. Constant *alignment =
  5761. OP->GetI32Const(DL.getTypeAllocSize(Ty->getScalarType()));
  5762. GenerateStructBufSt(handle, bufIdx, offset, pOverloadTy, OP, Builder,
  5763. vals, mask, alignment);
  5764. };
  5765. if (arraySize > 1)
  5766. val = Builder.CreateExtractValue(val, 0);
  5767. StElement(offset, val, Builder);
  5768. if (arraySize > 1) {
  5769. val = stInst->getValueOperand();
  5770. for (unsigned i = 1; i < arraySize; i++) {
  5771. offset = Builder.CreateAdd(offset, eltSize);
  5772. Value *eltVal = Builder.CreateExtractValue(val, i);
  5773. StElement(offset, eltVal, Builder);
  5774. }
  5775. }
  5776. }
  5777. user->eraseFromParent();
  5778. } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(user)) {
  5779. // Recurse users
  5780. for (auto U = BCI->user_begin(); U != BCI->user_end();) {
  5781. Value *BCIUser = *(U++);
  5782. TranslateStructBufSubscriptUser(cast<Instruction>(BCIUser), handle,
  5783. bufIdx, baseOffset, status, OP, DL);
  5784. }
  5785. BCI->eraseFromParent();
  5786. } else {
  5787. // should only used by GEP
  5788. GetElementPtrInst *GEP = cast<GetElementPtrInst>(user);
  5789. Type *Ty = GEP->getType()->getPointerElementType();
  5790. Value *offset = GEPIdxToOffset(GEP, Builder, OP, DL);
  5791. DXASSERT_LOCALVAR(Ty, offset->getType() == Type::getInt32Ty(Ty->getContext()),
  5792. "else bitness is wrong");
  5793. if (baseOffset)
  5794. offset = Builder.CreateAdd(offset, baseOffset);
  5795. for (auto U = GEP->user_begin(); U != GEP->user_end();) {
  5796. Value *GEPUser = *(U++);
  5797. TranslateStructBufSubscriptUser(cast<Instruction>(GEPUser), handle,
  5798. bufIdx, offset, status, OP, DL);
  5799. }
  5800. // delete the inst
  5801. GEP->eraseFromParent();
  5802. }
  5803. }
  5804. void TranslateStructBufSubscript(CallInst *CI, Value *handle, Value *status,
  5805. hlsl::OP *OP, const DataLayout &DL) {
  5806. Value *bufIdx = CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx);
  5807. for (auto U = CI->user_begin(); U != CI->user_end();) {
  5808. Value *user = *(U++);
  5809. TranslateStructBufSubscriptUser(cast<Instruction>(user), handle, bufIdx,
  5810. /*baseOffset*/ nullptr, status, OP, DL);
  5811. }
  5812. }
  5813. }
  5814. // HLSubscript.
  5815. namespace {
  5816. Value *TranslateTypedBufLoad(CallInst *CI, DXIL::ResourceKind RK,
  5817. DXIL::ResourceClass RC, Value *handle,
  5818. LoadInst *ldInst, IRBuilder<> &Builder,
  5819. hlsl::OP *hlslOP, const DataLayout &DL) {
  5820. ResLoadHelper ldHelper(CI, RK, RC, handle, IntrinsicOp::MOP_Load, /*bForSubscript*/ true);
  5821. // Default sampleIdx for 2DMS textures.
  5822. if (RK == DxilResource::Kind::Texture2DMS ||
  5823. RK == DxilResource::Kind::Texture2DMSArray)
  5824. ldHelper.mipLevel = hlslOP->GetU32Const(0);
  5825. // use ldInst as retVal
  5826. ldHelper.retVal = ldInst;
  5827. TranslateLoad(ldHelper, RK, Builder, hlslOP, DL);
  5828. // delete the ld
  5829. ldInst->eraseFromParent();
  5830. return ldHelper.retVal;
  5831. }
  5832. Value *UpdateVectorElt(Value *VecVal, Value *EltVal, Value *EltIdx,
  5833. unsigned vectorSize, Instruction *InsertPt) {
  5834. IRBuilder<> Builder(InsertPt);
  5835. if (ConstantInt *CEltIdx = dyn_cast<ConstantInt>(EltIdx)) {
  5836. VecVal =
  5837. Builder.CreateInsertElement(VecVal, EltVal, CEltIdx->getLimitedValue());
  5838. } else {
  5839. BasicBlock *BB = InsertPt->getParent();
  5840. BasicBlock *EndBB = BB->splitBasicBlock(InsertPt);
  5841. TerminatorInst *TI = BB->getTerminator();
  5842. IRBuilder<> SwitchBuilder(TI);
  5843. LLVMContext &Ctx = InsertPt->getContext();
  5844. SwitchInst *Switch = SwitchBuilder.CreateSwitch(EltIdx, EndBB, vectorSize);
  5845. TI->eraseFromParent();
  5846. Function *F = EndBB->getParent();
  5847. IRBuilder<> endSwitchBuilder(EndBB->begin());
  5848. Type *Ty = VecVal->getType();
  5849. PHINode *VecPhi = endSwitchBuilder.CreatePHI(Ty, vectorSize + 1);
  5850. for (unsigned i = 0; i < vectorSize; i++) {
  5851. BasicBlock *CaseBB = BasicBlock::Create(Ctx, "case", F, EndBB);
  5852. Switch->addCase(SwitchBuilder.getInt32(i), CaseBB);
  5853. IRBuilder<> CaseBuilder(CaseBB);
  5854. Value *CaseVal = CaseBuilder.CreateInsertElement(VecVal, EltVal, i);
  5855. VecPhi->addIncoming(CaseVal, CaseBB);
  5856. CaseBuilder.CreateBr(EndBB);
  5857. }
  5858. VecPhi->addIncoming(VecVal, BB);
  5859. VecVal = VecPhi;
  5860. }
  5861. return VecVal;
  5862. }
  5863. void TranslateDefaultSubscript(CallInst *CI, HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  5864. Value *ptr = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx);
  5865. hlsl::OP *hlslOP = &helper.hlslOP;
  5866. // Resource ptr.
  5867. Value *handle = ptr;
  5868. DXIL::ResourceClass RC = pObjHelper->GetRC(handle);
  5869. DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
  5870. Type *Ty = CI->getType()->getPointerElementType();
  5871. for (auto It = CI->user_begin(); It != CI->user_end(); ) {
  5872. User *user = *(It++);
  5873. Instruction *I = cast<Instruction>(user);
  5874. IRBuilder<> Builder(I);
  5875. if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) {
  5876. TranslateTypedBufLoad(CI, RK, RC, handle, ldInst, Builder, hlslOP, helper.dataLayout);
  5877. } else if (StoreInst *stInst = dyn_cast<StoreInst>(user)) {
  5878. Value *val = stInst->getValueOperand();
  5879. TranslateStore(RK, handle, val,
  5880. CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx),
  5881. Builder, hlslOP);
  5882. // delete the st
  5883. stInst->eraseFromParent();
  5884. } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(user)) {
  5885. // Must be vector type here.
  5886. unsigned vectorSize = Ty->getVectorNumElements();
  5887. DXASSERT_NOMSG(GEP->getNumIndices() == 2);
  5888. Use *GEPIdx = GEP->idx_begin();
  5889. GEPIdx++;
  5890. Value *EltIdx = *GEPIdx;
  5891. for (auto GEPIt = GEP->user_begin(); GEPIt != GEP->user_end();) {
  5892. User *GEPUser = *(GEPIt++);
  5893. if (StoreInst *SI = dyn_cast<StoreInst>(GEPUser)) {
  5894. IRBuilder<> StBuilder(SI);
  5895. // Generate Ld.
  5896. LoadInst *tmpLd = StBuilder.CreateLoad(CI);
  5897. Value *ldVal = TranslateTypedBufLoad(CI, RK, RC, handle, tmpLd, StBuilder,
  5898. hlslOP, helper.dataLayout);
  5899. // Update vector.
  5900. ldVal = UpdateVectorElt(ldVal, SI->getValueOperand(), EltIdx,
  5901. vectorSize, SI);
  5902. // Generate St.
  5903. // Reset insert point, UpdateVectorElt may move SI to different block.
  5904. StBuilder.SetInsertPoint(SI);
  5905. TranslateStore(RK, handle, ldVal,
  5906. CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx),
  5907. StBuilder, hlslOP);
  5908. SI->eraseFromParent();
  5909. continue;
  5910. }
  5911. if (LoadInst *LI = dyn_cast<LoadInst>(GEPUser)) {
  5912. IRBuilder<> LdBuilder(LI);
  5913. // Generate tmp vector load with vector type & translate it
  5914. LoadInst *tmpLd = LdBuilder.CreateLoad(CI);
  5915. Value *ldVal = TranslateTypedBufLoad(CI, RK, RC, handle, tmpLd, LdBuilder,
  5916. hlslOP, helper.dataLayout);
  5917. // get the single element
  5918. ldVal = LdBuilder.CreateExtractElement(ldVal, EltIdx);
  5919. LI->replaceAllUsesWith(ldVal);
  5920. LI->eraseFromParent();
  5921. continue;
  5922. }
  5923. if (!isa<CallInst>(GEPUser)) {
  5924. // Invalid operations.
  5925. Translated = false;
  5926. CI->getContext().emitError(GEP, "Invalid operation on typed buffer");
  5927. return;
  5928. }
  5929. CallInst *userCall = cast<CallInst>(GEPUser);
  5930. HLOpcodeGroup group =
  5931. hlsl::GetHLOpcodeGroupByName(userCall->getCalledFunction());
  5932. if (group != HLOpcodeGroup::HLIntrinsic) {
  5933. // Invalid operations.
  5934. Translated = false;
  5935. CI->getContext().emitError(userCall,
  5936. "Invalid operation on typed buffer");
  5937. return;
  5938. }
  5939. unsigned opcode = hlsl::GetHLOpcode(userCall);
  5940. IntrinsicOp IOP = static_cast<IntrinsicOp>(opcode);
  5941. switch (IOP) {
  5942. case IntrinsicOp::IOP_InterlockedAdd:
  5943. case IntrinsicOp::IOP_InterlockedAnd:
  5944. case IntrinsicOp::IOP_InterlockedExchange:
  5945. case IntrinsicOp::IOP_InterlockedMax:
  5946. case IntrinsicOp::IOP_InterlockedMin:
  5947. case IntrinsicOp::IOP_InterlockedUMax:
  5948. case IntrinsicOp::IOP_InterlockedUMin:
  5949. case IntrinsicOp::IOP_InterlockedOr:
  5950. case IntrinsicOp::IOP_InterlockedXor:
  5951. case IntrinsicOp::IOP_InterlockedCompareStore:
  5952. case IntrinsicOp::IOP_InterlockedCompareExchange: {
  5953. // Invalid operations.
  5954. Translated = false;
  5955. CI->getContext().emitError(
  5956. userCall, "Atomic operation on typed buffer is not supported");
  5957. return;
  5958. } break;
  5959. default:
  5960. // Invalid operations.
  5961. Translated = false;
  5962. CI->getContext().emitError(userCall,
  5963. "Invalid operation on typed buffer");
  5964. return;
  5965. break;
  5966. }
  5967. }
  5968. GEP->eraseFromParent();
  5969. } else {
  5970. CallInst *userCall = cast<CallInst>(user);
  5971. HLOpcodeGroup group =
  5972. hlsl::GetHLOpcodeGroupByName(userCall->getCalledFunction());
  5973. unsigned opcode = hlsl::GetHLOpcode(userCall);
  5974. if (group == HLOpcodeGroup::HLIntrinsic) {
  5975. IntrinsicOp IOP = static_cast<IntrinsicOp>(opcode);
  5976. if (RC == DXIL::ResourceClass::SRV) {
  5977. // Invalid operations.
  5978. Translated = false;
  5979. switch (IOP) {
  5980. case IntrinsicOp::IOP_InterlockedAdd:
  5981. case IntrinsicOp::IOP_InterlockedAnd:
  5982. case IntrinsicOp::IOP_InterlockedExchange:
  5983. case IntrinsicOp::IOP_InterlockedMax:
  5984. case IntrinsicOp::IOP_InterlockedMin:
  5985. case IntrinsicOp::IOP_InterlockedUMax:
  5986. case IntrinsicOp::IOP_InterlockedUMin:
  5987. case IntrinsicOp::IOP_InterlockedOr:
  5988. case IntrinsicOp::IOP_InterlockedXor:
  5989. case IntrinsicOp::IOP_InterlockedCompareStore:
  5990. case IntrinsicOp::IOP_InterlockedCompareExchange: {
  5991. CI->getContext().emitError(
  5992. userCall, "Atomic operation targets must be groupshared on UAV");
  5993. return;
  5994. } break;
  5995. default:
  5996. CI->getContext().emitError(userCall,
  5997. "Invalid operation on typed buffer");
  5998. return;
  5999. break;
  6000. }
  6001. }
  6002. switch (IOP) {
  6003. case IntrinsicOp::IOP_InterlockedAdd: {
  6004. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedAdd);
  6005. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  6006. helper.addr, /*offset*/ nullptr);
  6007. TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::Add,
  6008. Builder, hlslOP);
  6009. } break;
  6010. case IntrinsicOp::IOP_InterlockedAnd: {
  6011. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedAnd);
  6012. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  6013. helper.addr, /*offset*/ nullptr);
  6014. TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::And,
  6015. Builder, hlslOP);
  6016. } break;
  6017. case IntrinsicOp::IOP_InterlockedExchange: {
  6018. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedExchange);
  6019. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  6020. helper.addr, /*offset*/ nullptr);
  6021. TranslateAtomicBinaryOperation(
  6022. atomHelper, DXIL::AtomicBinOpCode::Exchange, Builder, hlslOP);
  6023. } break;
  6024. case IntrinsicOp::IOP_InterlockedMax: {
  6025. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedMax);
  6026. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  6027. helper.addr, /*offset*/ nullptr);
  6028. TranslateAtomicBinaryOperation(
  6029. atomHelper, DXIL::AtomicBinOpCode::IMax, Builder, hlslOP);
  6030. } break;
  6031. case IntrinsicOp::IOP_InterlockedMin: {
  6032. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedMin);
  6033. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  6034. helper.addr, /*offset*/ nullptr);
  6035. TranslateAtomicBinaryOperation(
  6036. atomHelper, DXIL::AtomicBinOpCode::IMin, Builder, hlslOP);
  6037. } break;
  6038. case IntrinsicOp::IOP_InterlockedUMax: {
  6039. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedUMax);
  6040. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  6041. helper.addr, /*offset*/ nullptr);
  6042. TranslateAtomicBinaryOperation(
  6043. atomHelper, DXIL::AtomicBinOpCode::UMax, Builder, hlslOP);
  6044. } break;
  6045. case IntrinsicOp::IOP_InterlockedUMin: {
  6046. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedUMin);
  6047. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  6048. helper.addr, /*offset*/ nullptr);
  6049. TranslateAtomicBinaryOperation(
  6050. atomHelper, DXIL::AtomicBinOpCode::UMin, Builder, hlslOP);
  6051. } break;
  6052. case IntrinsicOp::IOP_InterlockedOr: {
  6053. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedOr);
  6054. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  6055. helper.addr, /*offset*/ nullptr);
  6056. TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::Or,
  6057. Builder, hlslOP);
  6058. } break;
  6059. case IntrinsicOp::IOP_InterlockedXor: {
  6060. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedXor);
  6061. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
  6062. helper.addr, /*offset*/ nullptr);
  6063. TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::Xor,
  6064. Builder, hlslOP);
  6065. } break;
  6066. case IntrinsicOp::IOP_InterlockedCompareStore:
  6067. case IntrinsicOp::IOP_InterlockedCompareExchange: {
  6068. ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedCompareExchange);
  6069. AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicCompareExchange,
  6070. handle, helper.addr, /*offset*/ nullptr);
  6071. TranslateAtomicCmpXChg(atomHelper, Builder, hlslOP);
  6072. } break;
  6073. default:
  6074. DXASSERT(0, "invalid opcode");
  6075. break;
  6076. }
  6077. } else {
  6078. DXASSERT(0, "invalid group");
  6079. }
  6080. userCall->eraseFromParent();
  6081. }
  6082. }
  6083. }
  6084. void TranslateHLSubscript(CallInst *CI, HLSubscriptOpcode opcode,
  6085. HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  6086. if (CI->user_empty()) {
  6087. Translated = true;
  6088. return;
  6089. }
  6090. hlsl::OP *hlslOP = &helper.hlslOP;
  6091. Value *ptr = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx);
  6092. if (opcode == HLSubscriptOpcode::CBufferSubscript) {
  6093. HLModule::MergeGepUse(CI);
  6094. // Resource ptr.
  6095. Value *handle = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx);
  6096. if (helper.bLegacyCBufferLoad)
  6097. TranslateCBOperationsLegacy(handle, CI, hlslOP, helper.dxilTypeSys,
  6098. helper.dataLayout, pObjHelper);
  6099. else {
  6100. TranslateCBOperations(handle, CI, /*offset*/ hlslOP->GetU32Const(0),
  6101. hlslOP, helper.dxilTypeSys,
  6102. CI->getModule()->getDataLayout());
  6103. }
  6104. Translated = true;
  6105. return;
  6106. } else if (opcode == HLSubscriptOpcode::DoubleSubscript) {
  6107. // Resource ptr.
  6108. Value *handle = ptr;
  6109. DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
  6110. Value *coord = CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx);
  6111. Value *mipLevel =
  6112. CI->getArgOperand(HLOperandIndex::kDoubleSubscriptMipLevelOpIdx);
  6113. auto U = CI->user_begin();
  6114. DXASSERT(CI->hasOneUse(), "subscript should only has one use");
  6115. // TODO: support store.
  6116. Instruction *ldInst = cast<Instruction>(*U);
  6117. ResLoadHelper ldHelper(ldInst, handle, coord, mipLevel);
  6118. IRBuilder<> Builder(CI);
  6119. TranslateLoad(ldHelper, RK, Builder, hlslOP, helper.dataLayout);
  6120. ldInst->eraseFromParent();
  6121. Translated = true;
  6122. return;
  6123. } else {
  6124. Type *HandleTy = hlslOP->GetHandleType();
  6125. if (ptr->getType() == HandleTy) {
  6126. // Resource ptr.
  6127. Value *handle = ptr;
  6128. DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
  6129. if (RK == DxilResource::Kind::Invalid) {
  6130. Translated = false;
  6131. return;
  6132. }
  6133. Translated = true;
  6134. Type *ObjTy = pObjHelper->GetResourceType(handle);
  6135. Type *RetTy = ObjTy->getStructElementType(0);
  6136. if (RK == DxilResource::Kind::StructuredBuffer) {
  6137. TranslateStructBufSubscript(CI, handle, /*status*/ nullptr, hlslOP,
  6138. helper.dataLayout);
  6139. } else if (RetTy->isAggregateType() &&
  6140. RK == DxilResource::Kind::TypedBuffer) {
  6141. TranslateStructBufSubscript(CI, handle, /*status*/ nullptr, hlslOP,
  6142. helper.dataLayout);
  6143. // Clear offset for typed buf.
  6144. for (auto User = handle->user_begin(); User != handle->user_end(); ) {
  6145. CallInst *CI = cast<CallInst>(*(User++));
  6146. // Skip not lowered HL functions.
  6147. if (hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction()) != HLOpcodeGroup::NotHL)
  6148. continue;
  6149. switch (hlslOP->GetDxilOpFuncCallInst(CI)) {
  6150. case DXIL::OpCode::BufferLoad: {
  6151. CI->setArgOperand(DXIL::OperandIndex::kBufferLoadCoord1OpIdx,
  6152. UndefValue::get(helper.i32Ty));
  6153. } break;
  6154. case DXIL::OpCode::BufferStore: {
  6155. CI->setArgOperand(DXIL::OperandIndex::kBufferStoreCoord1OpIdx,
  6156. UndefValue::get(helper.i32Ty));
  6157. } break;
  6158. case DXIL::OpCode::AtomicBinOp: {
  6159. CI->setArgOperand(DXIL::OperandIndex::kAtomicBinOpCoord1OpIdx,
  6160. UndefValue::get(helper.i32Ty));
  6161. } break;
  6162. case DXIL::OpCode::AtomicCompareExchange: {
  6163. CI->setArgOperand(DXIL::OperandIndex::kAtomicCmpExchangeCoord1OpIdx,
  6164. UndefValue::get(helper.i32Ty));
  6165. } break;
  6166. case DXIL::OpCode::RawBufferLoad: {
  6167. // Structured buffer inside a typed buffer must be converted to typed buffer load.
  6168. // Typed buffer load is equivalent to raw buffer load, except there is no mask.
  6169. StructType *STy = cast<StructType>(CI->getFunctionType()->getReturnType());
  6170. Type *ETy = STy->getElementType(0);
  6171. SmallVector<Value *, 4> Args;
  6172. Args.emplace_back(hlslOP->GetI32Const((unsigned)DXIL::OpCode::BufferLoad));
  6173. Args.emplace_back(CI->getArgOperand(1)); // handle
  6174. Args.emplace_back(CI->getArgOperand(2)); // index
  6175. Args.emplace_back(UndefValue::get(helper.i32Ty)); // offset
  6176. IRBuilder<> builder(CI);
  6177. Function *newFunction = hlslOP->GetOpFunc(DXIL::OpCode::BufferLoad, ETy);
  6178. CallInst *newCall = builder.CreateCall(newFunction, Args);
  6179. CI->replaceAllUsesWith(newCall);
  6180. CI->eraseFromParent();
  6181. } break;
  6182. default:
  6183. DXASSERT(0, "Invalid operation on resource handle");
  6184. break;
  6185. }
  6186. }
  6187. } else {
  6188. TranslateDefaultSubscript(CI, helper, pObjHelper, Translated);
  6189. }
  6190. return;
  6191. }
  6192. }
  6193. Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx);
  6194. if (IsLocalVariablePtr(basePtr) || IsSharedMemPtr(basePtr)) {
  6195. // Translate matrix into vector of array for share memory or local
  6196. // variable should be done in HLMatrixLowerPass
  6197. DXASSERT_NOMSG(0);
  6198. Translated = true;
  6199. return;
  6200. }
  6201. // Other case should be take care in TranslateStructBufSubscript or
  6202. // TranslateCBOperations.
  6203. Translated = false;
  6204. return;
  6205. }
  6206. }
  6207. void TranslateSubscriptOperation(Function *F, HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper) {
  6208. for (auto U = F->user_begin(); U != F->user_end();) {
  6209. Value *user = *(U++);
  6210. if (!isa<Instruction>(user))
  6211. continue;
  6212. // must be call inst
  6213. CallInst *CI = cast<CallInst>(user);
  6214. unsigned opcode = GetHLOpcode(CI);
  6215. bool Translated = true;
  6216. TranslateHLSubscript(
  6217. CI, static_cast<HLSubscriptOpcode>(opcode), helper, pObjHelper, Translated);
  6218. if (Translated) {
  6219. // delete the call
  6220. DXASSERT(CI->use_empty(),
  6221. "else TranslateHLSubscript didn't replace/erase uses");
  6222. CI->eraseFromParent();
  6223. }
  6224. }
  6225. }
  6226. // Create BitCast if ptr, otherwise, create alloca of new type, write to bitcast of alloca, and return load from alloca
  6227. // If bOrigAllocaTy is true: create alloca of old type instead, write to alloca, and return load from bitcast of alloca
  6228. static Instruction *BitCastValueOrPtr(Value* V, Instruction *Insert, Type *Ty, bool bOrigAllocaTy = false, const Twine &Name = "") {
  6229. IRBuilder<> Builder(Insert);
  6230. if (Ty->isPointerTy()) {
  6231. // If pointer, we can bitcast directly
  6232. return cast<Instruction>(Builder.CreateBitCast(V, Ty, Name));
  6233. } else {
  6234. // If value, we have to alloca, store to bitcast ptr, and load
  6235. IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Insert));
  6236. Type *allocaTy = bOrigAllocaTy ? V->getType() : Ty;
  6237. Type *otherTy = bOrigAllocaTy ? Ty : V->getType();
  6238. Instruction *allocaInst = AllocaBuilder.CreateAlloca(allocaTy);
  6239. Instruction *bitCast = cast<Instruction>(Builder.CreateBitCast(allocaInst, otherTy->getPointerTo()));
  6240. Builder.CreateStore(V, bOrigAllocaTy ? allocaInst : bitCast);
  6241. return Builder.CreateLoad(bOrigAllocaTy ? bitCast : allocaInst, Name);
  6242. }
  6243. }
  6244. static Instruction *CreateTransposeShuffle(IRBuilder<> &Builder, Value *vecVal, unsigned toRows, unsigned toCols) {
  6245. SmallVector<int, 16> castMask(toCols * toRows);
  6246. unsigned idx = 0;
  6247. for (unsigned r = 0; r < toRows; r++)
  6248. for (unsigned c = 0; c < toCols; c++)
  6249. castMask[idx++] = c * toRows + r;
  6250. return cast<Instruction>(
  6251. Builder.CreateShuffleVector(vecVal, vecVal, castMask));
  6252. }
  6253. void TranslateHLBuiltinOperation(Function *F, HLOperationLowerHelper &helper,
  6254. hlsl::HLOpcodeGroup group, HLObjectOperationLowerHelper *pObjHelper) {
  6255. if (group == HLOpcodeGroup::HLIntrinsic) {
  6256. // map to dxil operations
  6257. for (auto U = F->user_begin(); U != F->user_end();) {
  6258. Value *User = *(U++);
  6259. if (!isa<Instruction>(User))
  6260. continue;
  6261. // must be call inst
  6262. CallInst *CI = cast<CallInst>(User);
  6263. // Keep the instruction to lower by other function.
  6264. bool Translated = true;
  6265. TranslateBuiltinIntrinsic(CI, helper, pObjHelper, Translated);
  6266. if (Translated) {
  6267. // delete the call
  6268. DXASSERT(CI->use_empty(),
  6269. "else TranslateBuiltinIntrinsic didn't replace/erase uses");
  6270. CI->eraseFromParent();
  6271. }
  6272. }
  6273. } else {
  6274. if (group == HLOpcodeGroup::HLMatLoadStore) {
  6275. // Both ld/st use arg1 for the pointer.
  6276. Type *PtrTy =
  6277. F->getFunctionType()->getParamType(HLOperandIndex::kMatLoadPtrOpIdx);
  6278. if (PtrTy->getPointerAddressSpace() == DXIL::kTGSMAddrSpace) {
  6279. // Translate matrix into vector of array for shared memory
  6280. // variable should be done in HLMatrixLowerPass.
  6281. if (!F->user_empty())
  6282. F->getContext().emitError("Fail to lower matrix load/store.");
  6283. } else if (PtrTy->getPointerAddressSpace() == DXIL::kDefaultAddrSpace) {
  6284. // Default address space may be function argument in lib target
  6285. if (!F->user_empty()) {
  6286. for (auto U = F->user_begin(); U != F->user_end();) {
  6287. Value *User = *(U++);
  6288. if (!isa<Instruction>(User))
  6289. continue;
  6290. // must be call inst
  6291. CallInst *CI = cast<CallInst>(User);
  6292. IRBuilder<> Builder(CI);
  6293. HLMatLoadStoreOpcode opcode = static_cast<HLMatLoadStoreOpcode>(hlsl::GetHLOpcode(CI));
  6294. switch (opcode) {
  6295. case HLMatLoadStoreOpcode::ColMatStore:
  6296. case HLMatLoadStoreOpcode::RowMatStore: {
  6297. Value *vecVal = CI->getArgOperand(HLOperandIndex::kMatStoreValOpIdx);
  6298. Value *matPtr = CI->getArgOperand(HLOperandIndex::kMatStoreDstPtrOpIdx);
  6299. Value *castPtr = Builder.CreateBitCast(matPtr, vecVal->getType()->getPointerTo());
  6300. Builder.CreateStore(vecVal, castPtr);
  6301. CI->eraseFromParent();
  6302. } break;
  6303. case HLMatLoadStoreOpcode::ColMatLoad:
  6304. case HLMatLoadStoreOpcode::RowMatLoad: {
  6305. Value *matPtr = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx);
  6306. Value *castPtr = Builder.CreateBitCast(matPtr, CI->getType()->getPointerTo());
  6307. Value *vecVal = Builder.CreateLoad(castPtr);
  6308. CI->replaceAllUsesWith(vecVal);
  6309. CI->eraseFromParent();
  6310. } break;
  6311. }
  6312. }
  6313. }
  6314. }
  6315. } else if (group == HLOpcodeGroup::HLCast) {
  6316. // HLCast may be used on matrix value function argument in lib target
  6317. if (!F->user_empty()) {
  6318. for (auto U = F->user_begin(); U != F->user_end();) {
  6319. Value *User = *(U++);
  6320. if (!isa<Instruction>(User))
  6321. continue;
  6322. // must be call inst
  6323. CallInst *CI = cast<CallInst>(User);
  6324. IRBuilder<> Builder(CI);
  6325. HLCastOpcode opcode = static_cast<HLCastOpcode>(hlsl::GetHLOpcode(CI));
  6326. bool bTranspose = false;
  6327. bool bColDest = false;
  6328. switch (opcode) {
  6329. case HLCastOpcode::RowMatrixToColMatrix:
  6330. bColDest = true;
  6331. case HLCastOpcode::ColMatrixToRowMatrix:
  6332. bTranspose = true;
  6333. case HLCastOpcode::ColMatrixToVecCast:
  6334. case HLCastOpcode::RowMatrixToVecCast: {
  6335. Value *matVal = CI->getArgOperand(HLOperandIndex::kInitFirstArgOpIdx);
  6336. Value *vecVal = BitCastValueOrPtr(matVal, CI, CI->getType(),
  6337. /*bOrigAllocaTy*/false,
  6338. matVal->getName());
  6339. if (bTranspose) {
  6340. HLMatrixType MatTy = HLMatrixType::cast(matVal->getType());
  6341. unsigned row = MatTy.getNumRows();
  6342. unsigned col = MatTy.getNumColumns();
  6343. if (bColDest) std::swap(row, col);
  6344. vecVal = CreateTransposeShuffle(Builder, vecVal, row, col);
  6345. }
  6346. CI->replaceAllUsesWith(vecVal);
  6347. CI->eraseFromParent();
  6348. } break;
  6349. }
  6350. }
  6351. }
  6352. } else if (group == HLOpcodeGroup::HLSubscript) {
  6353. TranslateSubscriptOperation(F, helper, pObjHelper);
  6354. }
  6355. // map to math function or llvm ir
  6356. }
  6357. }
  6358. typedef std::unordered_map<llvm::Instruction *, llvm::Value *> HandleMap;
  6359. static void TranslateHLExtension(Function *F,
  6360. HLSLExtensionsCodegenHelper *helper,
  6361. OP& hlslOp) {
  6362. // Find all calls to the function F.
  6363. // Store the calls in a vector for now to be replaced the loop below.
  6364. // We use a two step "find then replace" to avoid removing uses while
  6365. // iterating.
  6366. SmallVector<CallInst *, 8> CallsToReplace;
  6367. for (User *U : F->users()) {
  6368. if (CallInst *CI = dyn_cast<CallInst>(U)) {
  6369. CallsToReplace.push_back(CI);
  6370. }
  6371. }
  6372. // Get the lowering strategy to use for this intrinsic.
  6373. llvm::StringRef LowerStrategy = GetHLLowerStrategy(F);
  6374. ExtensionLowering lower(LowerStrategy, helper, hlslOp);
  6375. // Replace all calls that were successfully translated.
  6376. for (CallInst *CI : CallsToReplace) {
  6377. Value *Result = lower.Translate(CI);
  6378. if (Result && Result != CI) {
  6379. CI->replaceAllUsesWith(Result);
  6380. CI->eraseFromParent();
  6381. }
  6382. }
  6383. }
  6384. namespace hlsl {
  6385. void TranslateBuiltinOperations(
  6386. HLModule &HLM, HLSLExtensionsCodegenHelper *extCodegenHelper,
  6387. std::unordered_set<LoadInst *> &UpdateCounterSet) {
  6388. HLOperationLowerHelper helper(HLM);
  6389. HLObjectOperationLowerHelper objHelper = {HLM, UpdateCounterSet};
  6390. Module *M = HLM.GetModule();
  6391. SmallVector<Function *, 4> NonUniformResourceIndexIntrinsics;
  6392. // generate dxil operation
  6393. for (iplist<Function>::iterator F : M->getFunctionList()) {
  6394. if (F->user_empty())
  6395. continue;
  6396. if (!F->isDeclaration()) {
  6397. continue;
  6398. }
  6399. hlsl::HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(F);
  6400. if (group == HLOpcodeGroup::NotHL) {
  6401. // Nothing to do.
  6402. continue;
  6403. }
  6404. if (group == HLOpcodeGroup::HLExtIntrinsic) {
  6405. TranslateHLExtension(F, extCodegenHelper, helper.hlslOP);
  6406. continue;
  6407. }
  6408. if (group == HLOpcodeGroup::HLIntrinsic) {
  6409. CallInst *CI = cast<CallInst>(*F->user_begin()); // must be call inst
  6410. unsigned opcode = hlsl::GetHLOpcode(CI);
  6411. if (opcode == (unsigned)IntrinsicOp::IOP_NonUniformResourceIndex) {
  6412. NonUniformResourceIndexIntrinsics.push_back(F);
  6413. continue;
  6414. }
  6415. }
  6416. TranslateHLBuiltinOperation(F, helper, group, &objHelper);
  6417. }
  6418. // Translate last so value placed in NonUniformSet is still valid.
  6419. if (!NonUniformResourceIndexIntrinsics.empty()) {
  6420. for (auto F : NonUniformResourceIndexIntrinsics) {
  6421. TranslateHLBuiltinOperation(F, helper, HLOpcodeGroup::HLIntrinsic, &objHelper);
  6422. }
  6423. }
  6424. }
  6425. }