DxilGenerationPass.cpp 62 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730
  1. ///////////////////////////////////////////////////////////////////////////////
  2. // //
  3. // DxilGenerationPass.cpp //
  4. // Copyright (C) Microsoft Corporation. All rights reserved. //
  5. // This file is distributed under the University of Illinois Open Source //
  6. // License. See LICENSE.TXT for details. //
  7. // //
  8. // DxilGenerationPass implementation. //
  9. // //
  10. ///////////////////////////////////////////////////////////////////////////////
  11. #include "dxc/HLSL/DxilGenerationPass.h"
  12. #include "dxc/HLSL/DxilOperations.h"
  13. #include "dxc/HLSL/DxilModule.h"
  14. #include "dxc/HLSL/HLModule.h"
  15. #include "dxc/HLSL/HLOperations.h"
  16. #include "dxc/HLSL/DxilInstructions.h"
  17. #include "dxc/HLSL/HLMatrixLowerHelper.h"
  18. #include "dxc/HlslIntrinsicOp.h"
  19. #include "dxc/Support/Global.h"
  20. #include "dxc/HLSL/DxilTypeSystem.h"
  21. #include "dxc/HLSL/HLOperationLower.h"
  22. #include "HLSignatureLower.h"
  23. #include "dxc/HLSL/DxilUtil.h"
  24. #include "dxc/Support/exception.h"
  25. #include "DxilEntryProps.h"
  26. #include "llvm/IR/GetElementPtrTypeIterator.h"
  27. #include "llvm/IR/IRBuilder.h"
  28. #include "llvm/IR/Instructions.h"
  29. #include "llvm/IR/InstIterator.h"
  30. #include "llvm/IR/IntrinsicInst.h"
  31. #include "llvm/IR/Module.h"
  32. #include "llvm/IR/DebugInfo.h"
  33. #include "llvm/IR/PassManager.h"
  34. #include "llvm/ADT/BitVector.h"
  35. #include "llvm/ADT/SetVector.h"
  36. #include "llvm/Pass.h"
  37. #include "llvm/Transforms/Utils/SSAUpdater.h"
  38. #include "llvm/Analysis/AssumptionCache.h"
  39. #include "llvm/Transforms/Utils/PromoteMemToReg.h"
  40. #include <memory>
  41. #include <unordered_set>
  42. #include <iterator>
  43. using namespace llvm;
  44. using namespace hlsl;
  45. // TODO: use hlsl namespace for the most of this file.
  46. namespace {
  47. // Collect unused phi of resources and remove them.
  48. class ResourceRemover : public LoadAndStorePromoter {
  49. AllocaInst *AI;
  50. mutable std::unordered_set<PHINode *> unusedPhis;
  51. public:
  52. ResourceRemover(ArrayRef<Instruction *> Insts, SSAUpdater &S)
  53. : LoadAndStorePromoter(Insts, S), AI(nullptr) {}
  54. void run(AllocaInst *AI, const SmallVectorImpl<Instruction *> &Insts) {
  55. // Remember which alloca we're promoting (for isInstInList).
  56. this->AI = AI;
  57. LoadAndStorePromoter::run(Insts);
  58. for (PHINode *P : unusedPhis) {
  59. P->eraseFromParent();
  60. }
  61. }
  62. bool
  63. isInstInList(Instruction *I,
  64. const SmallVectorImpl<Instruction *> &Insts) const override {
  65. if (LoadInst *LI = dyn_cast<LoadInst>(I))
  66. return LI->getOperand(0) == AI;
  67. return cast<StoreInst>(I)->getPointerOperand() == AI;
  68. }
  69. void replaceLoadWithValue(LoadInst *LI, Value *V) const override {
  70. if (PHINode *PHI = dyn_cast<PHINode>(V)) {
  71. if (PHI->user_empty())
  72. unusedPhis.insert(PHI);
  73. }
  74. LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
  75. }
  76. };
  77. void SimplifyGlobalSymbol(GlobalVariable *GV) {
  78. Type *Ty = GV->getType()->getElementType();
  79. if (!Ty->isArrayTy()) {
  80. // Make sure only 1 load of GV in each function.
  81. std::unordered_map<Function *, Instruction *> handleMapOnFunction;
  82. for (User *U : GV->users()) {
  83. if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
  84. Function *F = LI->getParent()->getParent();
  85. auto it = handleMapOnFunction.find(F);
  86. if (it == handleMapOnFunction.end()) {
  87. handleMapOnFunction[F] = LI;
  88. } else {
  89. LI->replaceAllUsesWith(it->second);
  90. }
  91. }
  92. }
  93. for (auto it : handleMapOnFunction) {
  94. Function *F = it.first;
  95. Instruction *I = it.second;
  96. IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(F));
  97. Value *headLI = Builder.CreateLoad(GV);
  98. I->replaceAllUsesWith(headLI);
  99. }
  100. }
  101. }
  102. void InitResourceBase(const DxilResourceBase *pSource,
  103. DxilResourceBase *pDest) {
  104. DXASSERT_NOMSG(pSource->GetClass() == pDest->GetClass());
  105. pDest->SetKind(pSource->GetKind());
  106. pDest->SetID(pSource->GetID());
  107. pDest->SetSpaceID(pSource->GetSpaceID());
  108. pDest->SetLowerBound(pSource->GetLowerBound());
  109. pDest->SetRangeSize(pSource->GetRangeSize());
  110. pDest->SetGlobalSymbol(pSource->GetGlobalSymbol());
  111. pDest->SetGlobalName(pSource->GetGlobalName());
  112. pDest->SetHandle(pSource->GetHandle());
  113. if (GlobalVariable *GV = dyn_cast<GlobalVariable>(pSource->GetGlobalSymbol()))
  114. SimplifyGlobalSymbol(GV);
  115. }
  116. void InitResource(const DxilResource *pSource, DxilResource *pDest) {
  117. pDest->SetCompType(pSource->GetCompType());
  118. pDest->SetSampleCount(pSource->GetSampleCount());
  119. pDest->SetElementStride(pSource->GetElementStride());
  120. pDest->SetGloballyCoherent(pSource->IsGloballyCoherent());
  121. pDest->SetHasCounter(pSource->HasCounter());
  122. pDest->SetRW(pSource->IsRW());
  123. pDest->SetROV(pSource->IsROV());
  124. InitResourceBase(pSource, pDest);
  125. }
  126. void InitDxilModuleFromHLModule(HLModule &H, DxilModule &M, bool HasDebugInfo) {
  127. // Subsystems.
  128. unsigned ValMajor, ValMinor;
  129. H.GetValidatorVersion(ValMajor, ValMinor);
  130. M.SetValidatorVersion(ValMajor, ValMinor);
  131. M.SetShaderModel(H.GetShaderModel(), H.GetHLOptions().bUseMinPrecision);
  132. // Entry function.
  133. if (!M.GetShaderModel()->IsLib()) {
  134. Function *EntryFn = H.GetEntryFunction();
  135. M.SetEntryFunction(EntryFn);
  136. M.SetEntryFunctionName(H.GetEntryFunctionName());
  137. }
  138. std::vector<GlobalVariable* > &LLVMUsed = M.GetLLVMUsed();
  139. // Resources
  140. for (auto && C : H.GetCBuffers()) {
  141. auto b = llvm::make_unique<DxilCBuffer>();
  142. InitResourceBase(C.get(), b.get());
  143. b->SetSize(C->GetSize());
  144. LLVMUsed.emplace_back(cast<GlobalVariable>(b->GetGlobalSymbol()));
  145. M.AddCBuffer(std::move(b));
  146. }
  147. for (auto && C : H.GetUAVs()) {
  148. auto b = llvm::make_unique<DxilResource>();
  149. InitResource(C.get(), b.get());
  150. LLVMUsed.emplace_back(cast<GlobalVariable>(b->GetGlobalSymbol()));
  151. M.AddUAV(std::move(b));
  152. }
  153. for (auto && C : H.GetSRVs()) {
  154. auto b = llvm::make_unique<DxilResource>();
  155. InitResource(C.get(), b.get());
  156. LLVMUsed.emplace_back(cast<GlobalVariable>(b->GetGlobalSymbol()));
  157. M.AddSRV(std::move(b));
  158. }
  159. for (auto && C : H.GetSamplers()) {
  160. auto b = llvm::make_unique<DxilSampler>();
  161. InitResourceBase(C.get(), b.get());
  162. b->SetSamplerKind(C->GetSamplerKind());
  163. LLVMUsed.emplace_back(cast<GlobalVariable>(b->GetGlobalSymbol()));
  164. M.AddSampler(std::move(b));
  165. }
  166. // Signatures.
  167. M.ResetRootSignature(H.ReleaseRootSignature());
  168. // Shader properties.
  169. //bool m_bDisableOptimizations;
  170. M.SetDisableOptimization(H.GetHLOptions().bDisableOptimizations);
  171. //bool m_bDisableMathRefactoring;
  172. //bool m_bEnableDoublePrecision;
  173. //bool m_bEnableDoubleExtensions;
  174. //M.CollectShaderFlags();
  175. //bool m_bForceEarlyDepthStencil;
  176. //bool m_bEnableRawAndStructuredBuffers;
  177. //bool m_bEnableMSAD;
  178. //M.m_ShaderFlags.SetAllResourcesBound(H.GetHLOptions().bAllResourcesBound);
  179. // DXIL type system.
  180. M.ResetTypeSystem(H.ReleaseTypeSystem());
  181. // Dxil OP.
  182. M.ResetOP(H.ReleaseOP());
  183. // Keep llvm used.
  184. M.EmitLLVMUsed();
  185. M.SetAllResourcesBound(H.GetHLOptions().bAllResourcesBound);
  186. M.SetAutoBindingSpace(H.GetAutoBindingSpace());
  187. // Update Validator Version
  188. M.UpgradeToMinValidatorVersion();
  189. }
  190. class DxilGenerationPass : public ModulePass {
  191. HLModule *m_pHLModule;
  192. bool m_HasDbgInfo;
  193. HLSLExtensionsCodegenHelper *m_extensionsCodegenHelper;
  194. public:
  195. static char ID; // Pass identification, replacement for typeid
  196. explicit DxilGenerationPass(bool NoOpt = false)
  197. : ModulePass(ID), m_pHLModule(nullptr), m_extensionsCodegenHelper(nullptr), NotOptimized(NoOpt) {}
  198. const char *getPassName() const override { return "DXIL Generator"; }
  199. void SetExtensionsHelper(HLSLExtensionsCodegenHelper *helper) {
  200. m_extensionsCodegenHelper = helper;
  201. }
  202. bool runOnModule(Module &M) override {
  203. m_pHLModule = &M.GetOrCreateHLModule();
  204. const ShaderModel *SM = m_pHLModule->GetShaderModel();
  205. // Load up debug information, to cross-reference values and the instructions
  206. // used to load them.
  207. m_HasDbgInfo = getDebugMetadataVersionFromModule(M) != 0;
  208. // EntrySig for shader functions.
  209. DxilEntryPropsMap EntryPropsMap;
  210. if (!SM->IsLib()) {
  211. Function *EntryFn = m_pHLModule->GetEntryFunction();
  212. if (!m_pHLModule->HasDxilFunctionProps(EntryFn)) {
  213. M.getContext().emitError("Entry function don't have property.");
  214. return false;
  215. }
  216. DxilFunctionProps &props = m_pHLModule->GetDxilFunctionProps(EntryFn);
  217. std::unique_ptr<DxilEntryProps> pProps =
  218. llvm::make_unique<DxilEntryProps>(
  219. props, m_pHLModule->GetHLOptions().bUseMinPrecision);
  220. HLSignatureLower sigLower(m_pHLModule->GetEntryFunction(), *m_pHLModule,
  221. pProps->sig);
  222. sigLower.Run();
  223. EntryPropsMap[EntryFn] = std::move(pProps);
  224. } else {
  225. for (auto It = M.begin(); It != M.end();) {
  226. Function &F = *(It++);
  227. // Lower signature for each graphics or compute entry function.
  228. if (m_pHLModule->HasDxilFunctionProps(&F)) {
  229. DxilFunctionProps &props = m_pHLModule->GetDxilFunctionProps(&F);
  230. std::unique_ptr<DxilEntryProps> pProps =
  231. llvm::make_unique<DxilEntryProps>(
  232. props, m_pHLModule->GetHLOptions().bUseMinPrecision);
  233. if (m_pHLModule->IsGraphicsShader(&F) ||
  234. m_pHLModule->IsComputeShader(&F)) {
  235. HLSignatureLower sigLower(&F, *m_pHLModule, pProps->sig);
  236. // TODO: BUG: This will lower patch constant function sigs twice if
  237. // used by two hull shaders!
  238. sigLower.Run();
  239. }
  240. EntryPropsMap[&F] = std::move(pProps);
  241. }
  242. }
  243. }
  244. std::unordered_set<LoadInst *> UpdateCounterSet;
  245. std::unordered_set<Value *> NonUniformSet;
  246. GenerateDxilOperations(M, UpdateCounterSet, NonUniformSet);
  247. GenerateDxilCBufferHandles(NonUniformSet);
  248. MarkUpdateCounter(UpdateCounterSet);
  249. LowerHLCreateHandle();
  250. MarkNonUniform(NonUniformSet);
  251. // LowerHLCreateHandle() should have translated HLCreateHandle to CreateHandleForLib.
  252. // Clean up HLCreateHandle functions.
  253. for (auto It = M.begin(); It != M.end();) {
  254. Function &F = *(It++);
  255. if (!F.isDeclaration()) {
  256. if (hlsl::GetHLOpcodeGroupByName(&F) ==
  257. HLOpcodeGroup::HLCreateHandle) {
  258. if (F.user_empty()) {
  259. F.eraseFromParent();
  260. } else {
  261. M.getContext().emitError("Fail to lower createHandle.");
  262. }
  263. }
  264. }
  265. }
  266. // Translate precise on allocas into function call to keep the information after mem2reg.
  267. // The function calls will be removed after propagate precise attribute.
  268. TranslatePreciseAttribute();
  269. // High-level metadata should now be turned into low-level metadata.
  270. const bool SkipInit = true;
  271. hlsl::DxilModule &DxilMod = M.GetOrCreateDxilModule(SkipInit);
  272. auto pProps = &EntryPropsMap.begin()->second->props;
  273. InitDxilModuleFromHLModule(*m_pHLModule, DxilMod, m_HasDbgInfo);
  274. DxilMod.ResetEntryPropsMap(std::move(EntryPropsMap));
  275. if (!SM->IsLib()) {
  276. DxilMod.SetShaderProperties(pProps);
  277. }
  278. HLModule::ClearHLMetadata(M);
  279. M.ResetHLModule();
  280. // We now have a DXIL representation - record this.
  281. SetPauseResumePasses(M, "hlsl-dxilemit", "hlsl-dxilload");
  282. (void)NotOptimized; // Dummy out unused member to silence warnings
  283. return true;
  284. }
  285. private:
  286. void MarkUpdateCounter(std::unordered_set<LoadInst *> &UpdateCounterSet);
  287. // Generate DXIL cbuffer handles.
  288. void
  289. GenerateDxilCBufferHandles(std::unordered_set<Value *> &NonUniformSet);
  290. // change built-in funtion into DXIL operations
  291. void GenerateDxilOperations(Module &M,
  292. std::unordered_set<LoadInst *> &UpdateCounterSet,
  293. std::unordered_set<Value *> &NonUniformSet);
  294. void LowerHLCreateHandle();
  295. void MarkNonUniform(std::unordered_set<Value *> &NonUniformSet);
  296. // Translate precise attribute into HL function call.
  297. void TranslatePreciseAttribute();
  298. // Input module is not optimized.
  299. bool NotOptimized;
  300. };
  301. }
  302. namespace {
  303. void TranslateHLCreateHandle(Function *F, hlsl::OP &hlslOP) {
  304. Value *opArg = hlslOP.GetU32Const(
  305. (unsigned)DXIL::OpCode::CreateHandleForLib);
  306. for (auto U = F->user_begin(); U != F->user_end();) {
  307. Value *user = *(U++);
  308. if (!isa<Instruction>(user))
  309. continue;
  310. // must be call inst
  311. CallInst *CI = cast<CallInst>(user);
  312. Value *res = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  313. Value *newHandle = nullptr;
  314. IRBuilder<> Builder(CI);
  315. // Res could be ld/phi/select. Will be removed in
  316. // DxilLowerCreateHandleForLib.
  317. Function *createHandle = hlslOP.GetOpFunc(
  318. DXIL::OpCode::CreateHandleForLib, res->getType());
  319. newHandle = Builder.CreateCall(createHandle, {opArg, res});
  320. CI->replaceAllUsesWith(newHandle);
  321. if (res->user_empty()) {
  322. if (Instruction *I = dyn_cast<Instruction>(res))
  323. I->eraseFromParent();
  324. }
  325. CI->eraseFromParent();
  326. }
  327. }
  328. } // namespace
  329. void DxilGenerationPass::LowerHLCreateHandle() {
  330. Module *M = m_pHLModule->GetModule();
  331. hlsl::OP &hlslOP = *m_pHLModule->GetOP();
  332. // generate dxil operation
  333. for (iplist<Function>::iterator F : M->getFunctionList()) {
  334. if (F->user_empty())
  335. continue;
  336. if (!F->isDeclaration()) {
  337. hlsl::HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(F);
  338. if (group == HLOpcodeGroup::HLCreateHandle) {
  339. // Will lower in later pass.
  340. TranslateHLCreateHandle(F, hlslOP);
  341. }
  342. }
  343. }
  344. }
  345. void DxilGenerationPass::MarkNonUniform(
  346. std::unordered_set<Value *> &NonUniformSet) {
  347. for (Value *V : NonUniformSet) {
  348. for (User *U : V->users()) {
  349. if (GetElementPtrInst *I = dyn_cast<GetElementPtrInst>(U)) {
  350. DxilMDHelper::MarkNonUniform(I);
  351. }
  352. }
  353. }
  354. }
  355. static void
  356. MarkUavUpdateCounter(Value* LoadOrGEP,
  357. DxilResource &res,
  358. std::unordered_set<LoadInst *> &UpdateCounterSet) {
  359. if (LoadInst *ldInst = dyn_cast<LoadInst>(LoadOrGEP)) {
  360. if (UpdateCounterSet.count(ldInst)) {
  361. DXASSERT_NOMSG(res.GetClass() == DXIL::ResourceClass::UAV);
  362. res.SetHasCounter(true);
  363. }
  364. } else {
  365. DXASSERT(dyn_cast<GEPOperator>(LoadOrGEP) != nullptr,
  366. "else AddOpcodeParamForIntrinsic in CodeGen did not patch uses "
  367. "to only have ld/st refer to temp object");
  368. GEPOperator *GEP = cast<GEPOperator>(LoadOrGEP);
  369. for (auto GEPU : GEP->users()) {
  370. MarkUavUpdateCounter(GEPU, res, UpdateCounterSet);
  371. }
  372. }
  373. }
  374. static void
  375. MarkUavUpdateCounter(DxilResource &res,
  376. std::unordered_set<LoadInst *> &UpdateCounterSet) {
  377. Value *GV = res.GetGlobalSymbol();
  378. for (auto U = GV->user_begin(), E = GV->user_end(); U != E;) {
  379. User *user = *(U++);
  380. // Skip unused user.
  381. if (user->user_empty())
  382. continue;
  383. MarkUavUpdateCounter(user, res, UpdateCounterSet);
  384. }
  385. }
  386. void DxilGenerationPass::MarkUpdateCounter(
  387. std::unordered_set<LoadInst *> &UpdateCounterSet) {
  388. for (size_t i = 0; i < m_pHLModule->GetUAVs().size(); i++) {
  389. HLResource &UAV = m_pHLModule->GetUAV(i);
  390. MarkUavUpdateCounter(UAV, UpdateCounterSet);
  391. }
  392. }
  393. void DxilGenerationPass::GenerateDxilCBufferHandles(
  394. std::unordered_set<Value *> &NonUniformSet) {
  395. // For CBuffer, handle are mapped to HLCreateHandle.
  396. OP *hlslOP = m_pHLModule->GetOP();
  397. Value *opArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CreateHandleForLib);
  398. LLVMContext &Ctx = hlslOP->GetCtx();
  399. Value *zeroIdx = hlslOP->GetU32Const(0);
  400. for (size_t i = 0; i < m_pHLModule->GetCBuffers().size(); i++) {
  401. DxilCBuffer &CB = m_pHLModule->GetCBuffer(i);
  402. GlobalVariable *GV = cast<GlobalVariable>(CB.GetGlobalSymbol());
  403. // Remove GEP created in HLObjectOperationLowerHelper::UniformCbPtr.
  404. GV->removeDeadConstantUsers();
  405. std::string handleName = std::string(GV->getName());
  406. DIVariable *DIV = nullptr;
  407. DILocation *DL = nullptr;
  408. if (m_HasDbgInfo) {
  409. DebugInfoFinder &Finder = m_pHLModule->GetOrCreateDebugInfoFinder();
  410. DIV = HLModule::FindGlobalVariableDebugInfo(GV, Finder);
  411. if (DIV)
  412. // TODO: how to get col?
  413. DL = DILocation::get(Ctx, DIV->getLine(), 1,
  414. DIV->getScope());
  415. }
  416. if (CB.GetRangeSize() == 1) {
  417. Function *createHandle =
  418. hlslOP->GetOpFunc(OP::OpCode::CreateHandleForLib,
  419. GV->getType()->getElementType());
  420. for (auto U = GV->user_begin(); U != GV->user_end(); ) {
  421. // Must HLCreateHandle.
  422. CallInst *CI = cast<CallInst>(*(U++));
  423. // Put createHandle to entry block.
  424. IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(CI));
  425. Value *V = Builder.CreateLoad(GV);
  426. CallInst *handle = Builder.CreateCall(createHandle, {opArg, V}, handleName);
  427. if (m_HasDbgInfo) {
  428. // TODO: add debug info.
  429. //handle->setDebugLoc(DL);
  430. (void)(DL);
  431. }
  432. CI->replaceAllUsesWith(handle);
  433. CI->eraseFromParent();
  434. }
  435. } else {
  436. PointerType *Ty = GV->getType();
  437. Type *EltTy = Ty->getElementType()->getArrayElementType()->getPointerTo(
  438. Ty->getAddressSpace());
  439. Function *createHandle = hlslOP->GetOpFunc(
  440. OP::OpCode::CreateHandleForLib, EltTy->getPointerElementType());
  441. for (auto U = GV->user_begin(); U != GV->user_end();) {
  442. // Must HLCreateHandle.
  443. CallInst *CI = cast<CallInst>(*(U++));
  444. IRBuilder<> Builder(CI);
  445. Value *CBIndex = CI->getArgOperand(HLOperandIndex::kCreateHandleIndexOpIdx);
  446. if (isa<ConstantInt>(CBIndex)) {
  447. // Put createHandle to entry block for const index.
  448. Builder.SetInsertPoint(dxilutil::FirstNonAllocaInsertionPt(CI));
  449. }
  450. // Add GEP for cbv array use.
  451. Value *GEP = Builder.CreateGEP(GV, {zeroIdx, CBIndex});
  452. /*
  453. if (!NonUniformSet.count(CBIndex))
  454. args[DXIL::OperandIndex::kCreateHandleIsUniformOpIdx] =
  455. hlslOP->GetI1Const(0);
  456. else
  457. args[DXIL::OperandIndex::kCreateHandleIsUniformOpIdx] =
  458. hlslOP->GetI1Const(1);*/
  459. Value *V = Builder.CreateLoad(GEP);
  460. CallInst *handle = Builder.CreateCall(createHandle, {opArg, V}, handleName);
  461. CI->replaceAllUsesWith(handle);
  462. CI->eraseFromParent();
  463. }
  464. }
  465. }
  466. }
  467. void DxilGenerationPass::GenerateDxilOperations(
  468. Module &M, std::unordered_set<LoadInst *> &UpdateCounterSet,
  469. std::unordered_set<Value *> &NonUniformSet) {
  470. // remove all functions except entry function
  471. Function *entry = m_pHLModule->GetEntryFunction();
  472. const ShaderModel *pSM = m_pHLModule->GetShaderModel();
  473. Function *patchConstantFunc = nullptr;
  474. if (pSM->IsHS()) {
  475. DxilFunctionProps &funcProps = m_pHLModule->GetDxilFunctionProps(entry);
  476. patchConstantFunc = funcProps.ShaderProps.HS.patchConstantFunc;
  477. }
  478. if (!pSM->IsLib()) {
  479. for (auto F = M.begin(); F != M.end();) {
  480. Function *func = F++;
  481. if (func->isDeclaration())
  482. continue;
  483. if (func == entry)
  484. continue;
  485. if (func == patchConstantFunc)
  486. continue;
  487. if (func->user_empty())
  488. func->eraseFromParent();
  489. }
  490. }
  491. TranslateBuiltinOperations(*m_pHLModule, m_extensionsCodegenHelper,
  492. UpdateCounterSet, NonUniformSet);
  493. // Remove unused HL Operation functions.
  494. std::vector<Function *> deadList;
  495. for (iplist<Function>::iterator F : M.getFunctionList()) {
  496. hlsl::HLOpcodeGroup group = hlsl::GetHLOpcodeGroupByName(F);
  497. if (group != HLOpcodeGroup::NotHL || F->isIntrinsic())
  498. if (F->user_empty())
  499. deadList.emplace_back(F);
  500. }
  501. for (Function *F : deadList)
  502. F->eraseFromParent();
  503. }
  504. static void TranslatePreciseAttributeOnFunction(Function &F, Module &M) {
  505. BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function
  506. // Find allocas that has precise attribute, by looking at all instructions in
  507. // the entry node
  508. for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
  509. Instruction *Inst = (I++);
  510. if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst)) {
  511. if (HLModule::HasPreciseAttributeWithMetadata(AI)) {
  512. HLModule::MarkPreciseAttributeOnPtrWithFunctionCall(AI, M);
  513. }
  514. } else {
  515. DXASSERT(!HLModule::HasPreciseAttributeWithMetadata(Inst), "Only alloca can has precise metadata.");
  516. }
  517. }
  518. FastMathFlags FMF;
  519. FMF.setUnsafeAlgebra();
  520. // Set fast math for all FPMathOperators.
  521. // Already set FastMath in options. But that only enable things like fadd.
  522. // Every inst which type is float can be cast to FPMathOperator.
  523. for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
  524. BasicBlock *BB = BBI;
  525. for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
  526. if (dyn_cast<FPMathOperator>(I)) {
  527. // Set precise fast math on those instructions that support it.
  528. if (DxilModule::PreservesFastMathFlags(I))
  529. I->copyFastMathFlags(FMF);
  530. }
  531. }
  532. }
  533. }
  534. void DxilGenerationPass::TranslatePreciseAttribute() {
  535. bool bIEEEStrict = m_pHLModule->GetHLOptions().bIEEEStrict;
  536. // If IEEE strict, everying is precise, don't need to mark it.
  537. if (bIEEEStrict)
  538. return;
  539. Module &M = *m_pHLModule->GetModule();
  540. // TODO: If not inline every function, for function has call site with precise
  541. // argument and call site without precise argument, need to clone the function
  542. // to propagate the precise for the precise call site.
  543. // This should be done at CGMSHLSLRuntime::FinishCodeGen.
  544. if (m_pHLModule->GetShaderModel()->IsLib()) {
  545. // TODO: If all functions have been inlined, and unreferenced functions removed,
  546. // it should make sense to run on all funciton bodies,
  547. // even when not processing a library.
  548. for (Function &F : M.functions()) {
  549. if (!F.isDeclaration())
  550. TranslatePreciseAttributeOnFunction(F, M);
  551. }
  552. } else {
  553. Function *EntryFn = m_pHLModule->GetEntryFunction();
  554. TranslatePreciseAttributeOnFunction(*EntryFn, M);
  555. if (m_pHLModule->GetShaderModel()->IsHS()) {
  556. DxilFunctionProps &EntryQual = m_pHLModule->GetDxilFunctionProps(EntryFn);
  557. Function *patchConstantFunc = EntryQual.ShaderProps.HS.patchConstantFunc;
  558. TranslatePreciseAttributeOnFunction(*patchConstantFunc, M);
  559. }
  560. }
  561. }
  562. char DxilGenerationPass::ID = 0;
  563. ModulePass *llvm::createDxilGenerationPass(bool NotOptimized, hlsl::HLSLExtensionsCodegenHelper *extensionsHelper) {
  564. DxilGenerationPass *dxilPass = new DxilGenerationPass(NotOptimized);
  565. dxilPass->SetExtensionsHelper(extensionsHelper);
  566. return dxilPass;
  567. }
  568. INITIALIZE_PASS(DxilGenerationPass, "dxilgen", "HLSL DXIL Generation", false, false)
  569. ///////////////////////////////////////////////////////////////////////////////
  570. namespace {
  571. class HLEmitMetadata : public ModulePass {
  572. public:
  573. static char ID; // Pass identification, replacement for typeid
  574. explicit HLEmitMetadata() : ModulePass(ID) {}
  575. const char *getPassName() const override { return "HLSL High-Level Metadata Emit"; }
  576. bool runOnModule(Module &M) override {
  577. if (M.HasHLModule()) {
  578. HLModule::ClearHLMetadata(M);
  579. M.GetHLModule().EmitHLMetadata();
  580. return true;
  581. }
  582. return false;
  583. }
  584. };
  585. }
  586. char HLEmitMetadata::ID = 0;
  587. ModulePass *llvm::createHLEmitMetadataPass() {
  588. return new HLEmitMetadata();
  589. }
  590. INITIALIZE_PASS(HLEmitMetadata, "hlsl-hlemit", "HLSL High-Level Metadata Emit", false, false)
  591. ///////////////////////////////////////////////////////////////////////////////
  592. namespace {
  593. class HLEnsureMetadata : public ModulePass {
  594. public:
  595. static char ID; // Pass identification, replacement for typeid
  596. explicit HLEnsureMetadata() : ModulePass(ID) {}
  597. const char *getPassName() const override { return "HLSL High-Level Metadata Ensure"; }
  598. bool runOnModule(Module &M) override {
  599. if (!M.HasHLModule()) {
  600. M.GetOrCreateHLModule();
  601. return true;
  602. }
  603. return false;
  604. }
  605. };
  606. }
  607. char HLEnsureMetadata::ID = 0;
  608. ModulePass *llvm::createHLEnsureMetadataPass() {
  609. return new HLEnsureMetadata();
  610. }
  611. INITIALIZE_PASS(HLEnsureMetadata, "hlsl-hlensure", "HLSL High-Level Metadata Ensure", false, false)
  612. ///////////////////////////////////////////////////////////////////////////////
  613. // Precise propagate.
  614. namespace {
  615. class DxilPrecisePropagatePass : public ModulePass {
  616. public:
  617. static char ID; // Pass identification, replacement for typeid
  618. explicit DxilPrecisePropagatePass() : ModulePass(ID) {}
  619. const char *getPassName() const override { return "DXIL Precise Propagate"; }
  620. bool runOnModule(Module &M) override {
  621. DxilModule &dxilModule = M.GetOrCreateDxilModule();
  622. DxilTypeSystem &typeSys = dxilModule.GetTypeSystem();
  623. std::unordered_set<Instruction*> processedSet;
  624. std::vector<Function*> deadList;
  625. for (Function &F : M.functions()) {
  626. if (HLModule::HasPreciseAttribute(&F)) {
  627. PropagatePreciseOnFunctionUser(F, typeSys, processedSet);
  628. deadList.emplace_back(&F);
  629. }
  630. }
  631. for (Function *F : deadList)
  632. F->eraseFromParent();
  633. return true;
  634. }
  635. private:
  636. void PropagatePreciseOnFunctionUser(
  637. Function &F, DxilTypeSystem &typeSys,
  638. std::unordered_set<Instruction *> &processedSet);
  639. };
  640. char DxilPrecisePropagatePass::ID = 0;
  641. }
  642. static void PropagatePreciseAttribute(Instruction *I, DxilTypeSystem &typeSys,
  643. std::unordered_set<Instruction *> &processedSet);
  644. static void PropagatePreciseAttributeOnOperand(
  645. Value *V, DxilTypeSystem &typeSys, LLVMContext &Context,
  646. std::unordered_set<Instruction *> &processedSet) {
  647. Instruction *I = dyn_cast<Instruction>(V);
  648. // Skip none inst.
  649. if (!I)
  650. return;
  651. FPMathOperator *FPMath = dyn_cast<FPMathOperator>(I);
  652. // Skip none FPMath
  653. if (!FPMath)
  654. return;
  655. // Skip inst already marked.
  656. if (processedSet.count(I) > 0)
  657. return;
  658. // TODO: skip precise on integer type, sample instruction...
  659. processedSet.insert(I);
  660. // Set precise fast math on those instructions that support it.
  661. if (DxilModule::PreservesFastMathFlags(I))
  662. DxilModule::SetPreciseFastMathFlags(I);
  663. // Fast math not work on call, use metadata.
  664. if (CallInst *CI = dyn_cast<CallInst>(I))
  665. HLModule::MarkPreciseAttributeWithMetadata(CI);
  666. PropagatePreciseAttribute(I, typeSys, processedSet);
  667. }
  668. static void PropagatePreciseAttributeOnPointer(
  669. Value *Ptr, DxilTypeSystem &typeSys, LLVMContext &Context,
  670. std::unordered_set<Instruction *> &processedSet) {
  671. // Find all store and propagate on the val operand of store.
  672. // For CallInst, if Ptr is used as out parameter, mark it.
  673. for (User *U : Ptr->users()) {
  674. Instruction *user = cast<Instruction>(U);
  675. if (StoreInst *stInst = dyn_cast<StoreInst>(user)) {
  676. Value *val = stInst->getValueOperand();
  677. PropagatePreciseAttributeOnOperand(val, typeSys, Context, processedSet);
  678. } else if (CallInst *CI = dyn_cast<CallInst>(user)) {
  679. bool bReadOnly = true;
  680. Function *F = CI->getCalledFunction();
  681. const DxilFunctionAnnotation *funcAnnotation =
  682. typeSys.GetFunctionAnnotation(F);
  683. for (unsigned i = 0; i < CI->getNumArgOperands(); ++i) {
  684. if (Ptr != CI->getArgOperand(i))
  685. continue;
  686. const DxilParameterAnnotation &paramAnnotation =
  687. funcAnnotation->GetParameterAnnotation(i);
  688. // OutputPatch and OutputStream will be checked after scalar repl.
  689. // Here only check out/inout
  690. if (paramAnnotation.GetParamInputQual() == DxilParamInputQual::Out ||
  691. paramAnnotation.GetParamInputQual() == DxilParamInputQual::Inout) {
  692. bReadOnly = false;
  693. break;
  694. }
  695. }
  696. if (!bReadOnly)
  697. PropagatePreciseAttributeOnOperand(CI, typeSys, Context, processedSet);
  698. }
  699. }
  700. }
  701. static void
  702. PropagatePreciseAttribute(Instruction *I, DxilTypeSystem &typeSys,
  703. std::unordered_set<Instruction *> &processedSet) {
  704. LLVMContext &Context = I->getContext();
  705. if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
  706. PropagatePreciseAttributeOnPointer(AI, typeSys, Context, processedSet);
  707. } else if (dyn_cast<CallInst>(I)) {
  708. // Propagate every argument.
  709. // TODO: only propagate precise argument.
  710. for (Value *src : I->operands())
  711. PropagatePreciseAttributeOnOperand(src, typeSys, Context, processedSet);
  712. } else if (dyn_cast<FPMathOperator>(I)) {
  713. // TODO: only propagate precise argument.
  714. for (Value *src : I->operands())
  715. PropagatePreciseAttributeOnOperand(src, typeSys, Context, processedSet);
  716. } else if (LoadInst *ldInst = dyn_cast<LoadInst>(I)) {
  717. Value *Ptr = ldInst->getPointerOperand();
  718. PropagatePreciseAttributeOnPointer(Ptr, typeSys, Context, processedSet);
  719. } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I))
  720. PropagatePreciseAttributeOnPointer(GEP, typeSys, Context, processedSet);
  721. // TODO: support more case which need
  722. }
  723. void DxilPrecisePropagatePass::PropagatePreciseOnFunctionUser(
  724. Function &F, DxilTypeSystem &typeSys,
  725. std::unordered_set<Instruction *> &processedSet) {
  726. LLVMContext &Context = F.getContext();
  727. for (auto U = F.user_begin(), E = F.user_end(); U != E;) {
  728. CallInst *CI = cast<CallInst>(*(U++));
  729. Value *V = CI->getArgOperand(0);
  730. PropagatePreciseAttributeOnOperand(V, typeSys, Context, processedSet);
  731. CI->eraseFromParent();
  732. }
  733. }
  734. ModulePass *llvm::createDxilPrecisePropagatePass() {
  735. return new DxilPrecisePropagatePass();
  736. }
  737. INITIALIZE_PASS(DxilPrecisePropagatePass, "hlsl-dxil-precise", "DXIL precise attribute propagate", false, false)
  738. ///////////////////////////////////////////////////////////////////////////////
  739. namespace {
  740. class HLDeadFunctionElimination : public ModulePass {
  741. public:
  742. static char ID; // Pass identification, replacement for typeid
  743. explicit HLDeadFunctionElimination () : ModulePass(ID) {}
  744. const char *getPassName() const override { return "Remove all unused function except entry from HLModule"; }
  745. bool runOnModule(Module &M) override {
  746. if (M.HasHLModule()) {
  747. HLModule &HLM = M.GetHLModule();
  748. bool IsLib = HLM.GetShaderModel()->IsLib();
  749. // Remove unused functions except entry and patch constant func.
  750. // For library profile, only remove unused external functions.
  751. Function *EntryFunc = HLM.GetEntryFunction();
  752. Function *PatchConstantFunc = HLM.GetPatchConstantFunction();
  753. return dxilutil::RemoveUnusedFunctions(M, EntryFunc, PatchConstantFunc,
  754. IsLib);
  755. }
  756. return false;
  757. }
  758. };
  759. }
  760. char HLDeadFunctionElimination::ID = 0;
  761. ModulePass *llvm::createHLDeadFunctionEliminationPass() {
  762. return new HLDeadFunctionElimination();
  763. }
  764. INITIALIZE_PASS(HLDeadFunctionElimination, "hl-dfe", "Remove all unused function except entry from HLModule", false, false)
  765. ///////////////////////////////////////////////////////////////////////////////
  766. // Legalize resource use.
  767. // Map local or static global resource to global resource.
  768. // Require inline for static global resource.
  769. namespace {
  770. static const StringRef kStaticResourceLibErrorMsg = "static global resource use is disallowed in library exports.";
  771. class DxilPromoteStaticResources : public ModulePass {
  772. public:
  773. static char ID; // Pass identification, replacement for typeid
  774. explicit DxilPromoteStaticResources()
  775. : ModulePass(ID) {}
  776. const char *getPassName() const override {
  777. return "DXIL Legalize Static Resource Use";
  778. }
  779. bool runOnModule(Module &M) override {
  780. // Promote static global variables.
  781. return PromoteStaticGlobalResources(M);
  782. }
  783. private:
  784. bool PromoteStaticGlobalResources(Module &M);
  785. };
  786. char DxilPromoteStaticResources::ID = 0;
  787. class DxilPromoteLocalResources : public FunctionPass {
  788. void getAnalysisUsage(AnalysisUsage &AU) const override;
  789. public:
  790. static char ID; // Pass identification, replacement for typeid
  791. explicit DxilPromoteLocalResources()
  792. : FunctionPass(ID) {}
  793. const char *getPassName() const override {
  794. return "DXIL Legalize Resource Use";
  795. }
  796. bool runOnFunction(Function &F) override {
  797. // Promote local resource first.
  798. return PromoteLocalResource(F);
  799. }
  800. private:
  801. bool PromoteLocalResource(Function &F);
  802. };
  803. char DxilPromoteLocalResources::ID = 0;
  804. }
  805. void DxilPromoteLocalResources::getAnalysisUsage(AnalysisUsage &AU) const {
  806. AU.addRequired<AssumptionCacheTracker>();
  807. AU.addRequired<DominatorTreeWrapperPass>();
  808. AU.setPreservesAll();
  809. }
  810. bool DxilPromoteLocalResources::PromoteLocalResource(Function &F) {
  811. bool bModified = false;
  812. std::vector<AllocaInst *> Allocas;
  813. DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  814. AssumptionCache &AC =
  815. getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
  816. BasicBlock &BB = F.getEntryBlock();
  817. unsigned allocaSize = 0;
  818. while (1) {
  819. Allocas.clear();
  820. // Find allocas that are safe to promote, by looking at all instructions in
  821. // the entry node
  822. for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
  823. if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) { // Is it an alloca?
  824. if (HLModule::IsHLSLObjectType(dxilutil::GetArrayEltTy(AI->getAllocatedType()))) {
  825. if (isAllocaPromotable(AI))
  826. Allocas.push_back(AI);
  827. }
  828. }
  829. if (Allocas.empty())
  830. break;
  831. // No update.
  832. // Report error and break.
  833. if (allocaSize == Allocas.size()) {
  834. F.getContext().emitError(dxilutil::kResourceMapErrorMsg);
  835. break;
  836. }
  837. allocaSize = Allocas.size();
  838. PromoteMemToReg(Allocas, *DT, nullptr, &AC);
  839. bModified = true;
  840. }
  841. return bModified;
  842. }
  843. FunctionPass *llvm::createDxilPromoteLocalResources() {
  844. return new DxilPromoteLocalResources();
  845. }
  846. INITIALIZE_PASS_BEGIN(DxilPromoteLocalResources,
  847. "hlsl-dxil-promote-local-resources",
  848. "DXIL promote local resource use", false, true)
  849. INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
  850. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  851. INITIALIZE_PASS_END(DxilPromoteLocalResources,
  852. "hlsl-dxil-promote-local-resources",
  853. "DXIL promote local resource use", false, true)
  854. bool DxilPromoteStaticResources::PromoteStaticGlobalResources(
  855. Module &M) {
  856. if (M.GetOrCreateHLModule().GetShaderModel()->IsLib()) {
  857. // Read/write to global static resource is disallowed for libraries:
  858. // Resource use needs to be resolved to a single real global resource,
  859. // but it may not be possible since any external function call may re-enter
  860. // at any other library export, which could modify the global static
  861. // between write and read.
  862. // While it could work for certain cases, describing the boundary at
  863. // the HLSL level is difficult, so at this point it's better to disallow.
  864. // example of what could work:
  865. // After inlining, exported functions must have writes to static globals
  866. // before reads, and must not have any external function calls between
  867. // writes and subsequent reads, such that the static global may be
  868. // optimized away for the exported function.
  869. for (auto &GV : M.globals()) {
  870. if (GV.getLinkage() == GlobalVariable::LinkageTypes::InternalLinkage &&
  871. HLModule::IsHLSLObjectType(dxilutil::GetArrayEltTy(GV.getType()))) {
  872. if (!GV.user_empty()) {
  873. if (Instruction *I = dyn_cast<Instruction>(*GV.user_begin())) {
  874. dxilutil::EmitErrorOnInstruction(I, kStaticResourceLibErrorMsg);
  875. break;
  876. }
  877. }
  878. }
  879. }
  880. return false;
  881. }
  882. bool bModified = false;
  883. std::set<GlobalVariable *> staticResources;
  884. for (auto &GV : M.globals()) {
  885. if (GV.getLinkage() == GlobalVariable::LinkageTypes::InternalLinkage &&
  886. HLModule::IsHLSLObjectType(dxilutil::GetArrayEltTy(GV.getType()))) {
  887. staticResources.insert(&GV);
  888. }
  889. }
  890. SSAUpdater SSA;
  891. SmallVector<Instruction *, 4> Insts;
  892. // Make sure every resource load has mapped to global variable.
  893. while (!staticResources.empty()) {
  894. bool bUpdated = false;
  895. for (auto it = staticResources.begin(); it != staticResources.end();) {
  896. GlobalVariable *GV = *(it++);
  897. // Build list of instructions to promote.
  898. for (User *U : GV->users()) {
  899. Instruction *I = cast<Instruction>(U);
  900. Insts.emplace_back(I);
  901. }
  902. LoadAndStorePromoter(Insts, SSA).run(Insts);
  903. if (GV->user_empty()) {
  904. bUpdated = true;
  905. staticResources.erase(GV);
  906. }
  907. Insts.clear();
  908. }
  909. if (!bUpdated) {
  910. M.getContext().emitError(dxilutil::kResourceMapErrorMsg);
  911. break;
  912. }
  913. bModified = true;
  914. }
  915. return bModified;
  916. }
  917. ModulePass *llvm::createDxilPromoteStaticResources() {
  918. return new DxilPromoteStaticResources();
  919. }
  920. INITIALIZE_PASS(DxilPromoteStaticResources,
  921. "hlsl-dxil-promote-static-resources",
  922. "DXIL promote static resource use", false, false)
  923. ///////////////////////////////////////////////////////////////////////////////
  924. // Legalize EvalOperations.
  925. // Make sure src of EvalOperations are from function parameter.
  926. // This is needed in order to translate EvaluateAttribute operations that traces
  927. // back to LoadInput operations during translation stage. Promoting load/store
  928. // instructions beforehand will allow us to easily trace back to loadInput from
  929. // function call.
  930. namespace {
  931. class DxilLegalizeEvalOperations : public ModulePass {
  932. public:
  933. static char ID; // Pass identification, replacement for typeid
  934. explicit DxilLegalizeEvalOperations() : ModulePass(ID) {}
  935. const char *getPassName() const override {
  936. return "DXIL Legalize EvalOperations";
  937. }
  938. bool runOnModule(Module &M) override {
  939. for (Function &F : M.getFunctionList()) {
  940. hlsl::HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(&F);
  941. if (group != HLOpcodeGroup::NotHL) {
  942. std::vector<CallInst *> EvalFunctionCalls;
  943. // Find all EvaluateAttribute calls
  944. for (User *U : F.users()) {
  945. if (CallInst *CI = dyn_cast<CallInst>(U)) {
  946. IntrinsicOp evalOp =
  947. static_cast<IntrinsicOp>(hlsl::GetHLOpcode(CI));
  948. if (evalOp == IntrinsicOp::IOP_EvaluateAttributeAtSample ||
  949. evalOp == IntrinsicOp::IOP_EvaluateAttributeCentroid ||
  950. evalOp == IntrinsicOp::IOP_EvaluateAttributeSnapped ||
  951. evalOp == IntrinsicOp::IOP_GetAttributeAtVertex) {
  952. EvalFunctionCalls.push_back(CI);
  953. }
  954. }
  955. }
  956. if (EvalFunctionCalls.empty()) {
  957. continue;
  958. }
  959. // Start from the call instruction, find all allocas that this call
  960. // uses.
  961. std::unordered_set<AllocaInst *> allocas;
  962. for (CallInst *CI : EvalFunctionCalls) {
  963. FindAllocasForEvalOperations(CI, allocas);
  964. }
  965. SSAUpdater SSA;
  966. SmallVector<Instruction *, 4> Insts;
  967. for (AllocaInst *AI : allocas) {
  968. for (User *user : AI->users()) {
  969. if (isa<LoadInst>(user) || isa<StoreInst>(user)) {
  970. Insts.emplace_back(cast<Instruction>(user));
  971. }
  972. }
  973. LoadAndStorePromoter(Insts, SSA).run(Insts);
  974. Insts.clear();
  975. }
  976. }
  977. }
  978. return true;
  979. }
  980. private:
  981. void FindAllocasForEvalOperations(Value *val,
  982. std::unordered_set<AllocaInst *> &allocas);
  983. };
  984. char DxilLegalizeEvalOperations::ID = 0;
  985. // Find allocas for EvaluateAttribute operations
  986. void DxilLegalizeEvalOperations::FindAllocasForEvalOperations(
  987. Value *val, std::unordered_set<AllocaInst *> &allocas) {
  988. Value *CurVal = val;
  989. while (!isa<AllocaInst>(CurVal)) {
  990. if (CallInst *CI = dyn_cast<CallInst>(CurVal)) {
  991. CurVal = CI->getOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  992. } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(CurVal)) {
  993. Value *arg0 =
  994. IE->getOperand(0); // Could be another insertelement or undef
  995. Value *arg1 = IE->getOperand(1);
  996. FindAllocasForEvalOperations(arg0, allocas);
  997. CurVal = arg1;
  998. } else if (ShuffleVectorInst *SV = dyn_cast<ShuffleVectorInst>(CurVal)) {
  999. Value *arg0 = SV->getOperand(0);
  1000. Value *arg1 = SV->getOperand(1);
  1001. FindAllocasForEvalOperations(
  1002. arg0, allocas); // Shuffle vector could come from different allocas
  1003. CurVal = arg1;
  1004. } else if (ExtractElementInst *EE = dyn_cast<ExtractElementInst>(CurVal)) {
  1005. CurVal = EE->getOperand(0);
  1006. } else if (LoadInst *LI = dyn_cast<LoadInst>(CurVal)) {
  1007. CurVal = LI->getOperand(0);
  1008. } else {
  1009. break;
  1010. }
  1011. }
  1012. if (AllocaInst *AI = dyn_cast<AllocaInst>(CurVal)) {
  1013. allocas.insert(AI);
  1014. }
  1015. }
  1016. } // namespace
  1017. ModulePass *llvm::createDxilLegalizeEvalOperationsPass() {
  1018. return new DxilLegalizeEvalOperations();
  1019. }
  1020. INITIALIZE_PASS(DxilLegalizeEvalOperations,
  1021. "hlsl-dxil-legalize-eval-operations",
  1022. "DXIL legalize eval operations", false, false)
  1023. ///////////////////////////////////////////////////////////////////////////////
  1024. // Translate RawBufferLoad/RawBufferStore
  1025. // This pass is to make sure that we generate correct buffer load for DXIL
  1026. // For DXIL < 1.2, rawBufferLoad will be translated to BufferLoad instruction
  1027. // without mask.
  1028. // For DXIL >= 1.2, if min precision is enabled, currently generation pass is
  1029. // producing i16/f16 return type for min precisions. For rawBuffer, we will
  1030. // change this so that min precisions are returning its actual scalar type (i32/f32)
  1031. // and will be truncated to their corresponding types after loading / before storing.
  1032. namespace {
  1033. // Create { v0, v1 } from { v0.lo, v0.hi, v1.lo, v1.hi }
  1034. void Make64bitResultForLoad(Type *EltTy, ArrayRef<Value *> resultElts32,
  1035. unsigned size, MutableArrayRef<Value *> resultElts,
  1036. hlsl::OP *hlslOP, IRBuilder<> &Builder) {
  1037. Type *i64Ty = Builder.getInt64Ty();
  1038. Type *doubleTy = Builder.getDoubleTy();
  1039. if (EltTy == doubleTy) {
  1040. Function *makeDouble =
  1041. hlslOP->GetOpFunc(DXIL::OpCode::MakeDouble, doubleTy);
  1042. Value *makeDoubleOpArg =
  1043. Builder.getInt32((unsigned)DXIL::OpCode::MakeDouble);
  1044. for (unsigned i = 0; i < size; i++) {
  1045. Value *lo = resultElts32[2 * i];
  1046. Value *hi = resultElts32[2 * i + 1];
  1047. Value *V = Builder.CreateCall(makeDouble, {makeDoubleOpArg, lo, hi});
  1048. resultElts[i] = V;
  1049. }
  1050. } else {
  1051. for (unsigned i = 0; i < size; i++) {
  1052. Value *lo = resultElts32[2 * i];
  1053. Value *hi = resultElts32[2 * i + 1];
  1054. lo = Builder.CreateZExt(lo, i64Ty);
  1055. hi = Builder.CreateZExt(hi, i64Ty);
  1056. hi = Builder.CreateShl(hi, 32);
  1057. resultElts[i] = Builder.CreateOr(lo, hi);
  1058. }
  1059. }
  1060. }
  1061. // Split { v0, v1 } to { v0.lo, v0.hi, v1.lo, v1.hi }
  1062. void Split64bitValForStore(Type *EltTy, ArrayRef<Value *> vals, unsigned size,
  1063. MutableArrayRef<Value *> vals32, hlsl::OP *hlslOP,
  1064. IRBuilder<> &Builder) {
  1065. Type *i32Ty = Builder.getInt32Ty();
  1066. Type *doubleTy = Builder.getDoubleTy();
  1067. Value *undefI32 = UndefValue::get(i32Ty);
  1068. if (EltTy == doubleTy) {
  1069. Function *dToU = hlslOP->GetOpFunc(DXIL::OpCode::SplitDouble, doubleTy);
  1070. Value *dToUOpArg = Builder.getInt32((unsigned)DXIL::OpCode::SplitDouble);
  1071. for (unsigned i = 0; i < size; i++) {
  1072. if (isa<UndefValue>(vals[i])) {
  1073. vals32[2 * i] = undefI32;
  1074. vals32[2 * i + 1] = undefI32;
  1075. } else {
  1076. Value *retVal = Builder.CreateCall(dToU, {dToUOpArg, vals[i]});
  1077. Value *lo = Builder.CreateExtractValue(retVal, 0);
  1078. Value *hi = Builder.CreateExtractValue(retVal, 1);
  1079. vals32[2 * i] = lo;
  1080. vals32[2 * i + 1] = hi;
  1081. }
  1082. }
  1083. } else {
  1084. for (unsigned i = 0; i < size; i++) {
  1085. if (isa<UndefValue>(vals[i])) {
  1086. vals32[2 * i] = undefI32;
  1087. vals32[2 * i + 1] = undefI32;
  1088. } else {
  1089. Value *lo = Builder.CreateTrunc(vals[i], i32Ty);
  1090. Value *hi = Builder.CreateLShr(vals[i], 32);
  1091. hi = Builder.CreateTrunc(hi, i32Ty);
  1092. vals32[2 * i] = lo;
  1093. vals32[2 * i + 1] = hi;
  1094. }
  1095. }
  1096. }
  1097. }
  1098. class DxilTranslateRawBuffer : public ModulePass {
  1099. public:
  1100. static char ID;
  1101. explicit DxilTranslateRawBuffer() : ModulePass(ID) {}
  1102. bool runOnModule(Module &M) {
  1103. unsigned major, minor;
  1104. DxilModule &DM = M.GetDxilModule();
  1105. DM.GetDxilVersion(major, minor);
  1106. OP *hlslOP = DM.GetOP();
  1107. // Split 64bit for shader model less than 6.3.
  1108. if (major == 1 && minor <= 2) {
  1109. for (auto F = M.functions().begin(); F != M.functions().end();) {
  1110. Function *func = &*(F++);
  1111. DXIL::OpCodeClass opClass;
  1112. if (hlslOP->GetOpCodeClass(func, opClass)) {
  1113. if (opClass == DXIL::OpCodeClass::RawBufferLoad) {
  1114. Type *ETy =
  1115. hlslOP->GetOverloadType(DXIL::OpCode::RawBufferLoad, func);
  1116. bool is64 =
  1117. ETy->isDoubleTy() || ETy == Type::getInt64Ty(ETy->getContext());
  1118. if (is64) {
  1119. ReplaceRawBufferLoad64Bit(func, ETy, M);
  1120. func->eraseFromParent();
  1121. }
  1122. } else if (opClass == DXIL::OpCodeClass::RawBufferStore) {
  1123. Type *ETy =
  1124. hlslOP->GetOverloadType(DXIL::OpCode::RawBufferStore, func);
  1125. bool is64 =
  1126. ETy->isDoubleTy() || ETy == Type::getInt64Ty(ETy->getContext());
  1127. if (is64) {
  1128. ReplaceRawBufferStore64Bit(func, ETy, M);
  1129. func->eraseFromParent();
  1130. }
  1131. }
  1132. }
  1133. }
  1134. }
  1135. if (major == 1 && minor < 2) {
  1136. for (auto F = M.functions().begin(), E = M.functions().end(); F != E;) {
  1137. Function *func = &*(F++);
  1138. if (func->hasName()) {
  1139. if (func->getName().startswith("dx.op.rawBufferLoad")) {
  1140. ReplaceRawBufferLoad(func, M);
  1141. func->eraseFromParent();
  1142. } else if (func->getName().startswith("dx.op.rawBufferStore")) {
  1143. ReplaceRawBufferStore(func, M);
  1144. func->eraseFromParent();
  1145. }
  1146. }
  1147. }
  1148. } else if (M.GetDxilModule().GetUseMinPrecision()) {
  1149. for (auto F = M.functions().begin(), E = M.functions().end(); F != E;) {
  1150. Function *func = &*(F++);
  1151. if (func->hasName()) {
  1152. if (func->getName().startswith("dx.op.rawBufferLoad")) {
  1153. ReplaceMinPrecisionRawBufferLoad(func, M);
  1154. } else if (func->getName().startswith("dx.op.rawBufferStore")) {
  1155. ReplaceMinPrecisionRawBufferStore(func, M);
  1156. }
  1157. }
  1158. }
  1159. }
  1160. return true;
  1161. }
  1162. private:
  1163. // Replace RawBufferLoad/Store to BufferLoad/Store for DXIL < 1.2
  1164. void ReplaceRawBufferLoad(Function *F, Module &M);
  1165. void ReplaceRawBufferStore(Function *F, Module &M);
  1166. void ReplaceRawBufferLoad64Bit(Function *F, Type *EltTy, Module &M);
  1167. void ReplaceRawBufferStore64Bit(Function *F, Type *EltTy, Module &M);
  1168. // Replace RawBufferLoad/Store of min-precision types to have its actual storage size
  1169. void ReplaceMinPrecisionRawBufferLoad(Function *F, Module &M);
  1170. void ReplaceMinPrecisionRawBufferStore(Function *F, Module &M);
  1171. void ReplaceMinPrecisionRawBufferLoadByType(Function *F, Type *FromTy,
  1172. Type *ToTy, OP *Op,
  1173. const DataLayout &DL);
  1174. };
  1175. } // namespace
  1176. void DxilTranslateRawBuffer::ReplaceRawBufferLoad(Function *F,
  1177. Module &M) {
  1178. OP *op = M.GetDxilModule().GetOP();
  1179. Type *RTy = F->getReturnType();
  1180. if (StructType *STy = dyn_cast<StructType>(RTy)) {
  1181. Type *ETy = STy->getElementType(0);
  1182. Function *newFunction = op->GetOpFunc(hlsl::DXIL::OpCode::BufferLoad, ETy);
  1183. for (auto U = F->user_begin(), E = F->user_end(); U != E;) {
  1184. User *user = *(U++);
  1185. if (CallInst *CI = dyn_cast<CallInst>(user)) {
  1186. IRBuilder<> Builder(CI);
  1187. SmallVector<Value *, 4> args;
  1188. args.emplace_back(op->GetI32Const((unsigned)DXIL::OpCode::BufferLoad));
  1189. for (unsigned i = 1; i < 4; ++i) {
  1190. args.emplace_back(CI->getArgOperand(i));
  1191. }
  1192. CallInst *newCall = Builder.CreateCall(newFunction, args);
  1193. CI->replaceAllUsesWith(newCall);
  1194. CI->eraseFromParent();
  1195. } else {
  1196. DXASSERT(false, "function can only be used with call instructions.");
  1197. }
  1198. }
  1199. } else {
  1200. DXASSERT(false, "RawBufferLoad should return struct type.");
  1201. }
  1202. }
  1203. void DxilTranslateRawBuffer::ReplaceRawBufferLoad64Bit(Function *F, Type *EltTy, Module &M) {
  1204. OP *hlslOP = M.GetDxilModule().GetOP();
  1205. Function *bufLd = hlslOP->GetOpFunc(DXIL::OpCode::RawBufferLoad,
  1206. Type::getInt32Ty(M.getContext()));
  1207. for (auto U = F->user_begin(), E = F->user_end(); U != E;) {
  1208. User *user = *(U++);
  1209. if (CallInst *CI = dyn_cast<CallInst>(user)) {
  1210. IRBuilder<> Builder(CI);
  1211. SmallVector<Value *, 4> args(CI->arg_operands());
  1212. Value *offset = CI->getArgOperand(
  1213. DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx);
  1214. unsigned size = 0;
  1215. bool bNeedStatus = false;
  1216. for (User *U : CI->users()) {
  1217. ExtractValueInst *Elt = cast<ExtractValueInst>(U);
  1218. DXASSERT(Elt->getNumIndices() == 1, "else invalid use for resRet");
  1219. unsigned idx = Elt->getIndices()[0];
  1220. if (idx == 4) {
  1221. bNeedStatus = true;
  1222. } else {
  1223. size = std::max(size, idx+1);
  1224. }
  1225. }
  1226. unsigned maskHi = 0;
  1227. unsigned maskLo = 0;
  1228. switch (size) {
  1229. case 1:
  1230. maskLo = 3;
  1231. break;
  1232. case 2:
  1233. maskLo = 0xf;
  1234. break;
  1235. case 3:
  1236. maskLo = 0xf;
  1237. maskHi = 3;
  1238. break;
  1239. case 4:
  1240. maskLo = 0xf;
  1241. maskHi = 0xf;
  1242. break;
  1243. }
  1244. args[DXIL::OperandIndex::kRawBufferLoadMaskOpIdx] =
  1245. Builder.getInt8(maskLo);
  1246. Value *resultElts[5] = {nullptr, nullptr, nullptr, nullptr, nullptr};
  1247. CallInst *newLd = Builder.CreateCall(bufLd, args);
  1248. Value *resultElts32[8];
  1249. unsigned eltBase = 0;
  1250. for (unsigned i = 0; i < size; i++) {
  1251. if (i == 2) {
  1252. // Update offset 4 by 4 bytes.
  1253. args[DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx] =
  1254. Builder.CreateAdd(offset, Builder.getInt32(4 * 4));
  1255. args[DXIL::OperandIndex::kRawBufferLoadMaskOpIdx] =
  1256. Builder.getInt8(maskHi);
  1257. newLd = Builder.CreateCall(bufLd, args);
  1258. eltBase = 4;
  1259. }
  1260. unsigned resBase = 2 * i;
  1261. resultElts32[resBase] =
  1262. Builder.CreateExtractValue(newLd, resBase - eltBase);
  1263. resultElts32[resBase + 1] =
  1264. Builder.CreateExtractValue(newLd, resBase + 1 - eltBase);
  1265. }
  1266. Make64bitResultForLoad(EltTy, resultElts32, size, resultElts, hlslOP, Builder);
  1267. if (bNeedStatus) {
  1268. resultElts[4] = Builder.CreateExtractValue(newLd, 4);
  1269. }
  1270. for (auto it = CI->user_begin(); it != CI->user_end(); ) {
  1271. ExtractValueInst *Elt = cast<ExtractValueInst>(*(it++));
  1272. DXASSERT(Elt->getNumIndices() == 1, "else invalid use for resRet");
  1273. unsigned idx = Elt->getIndices()[0];
  1274. if (!Elt->user_empty()) {
  1275. Value *newElt = resultElts[idx];
  1276. Elt->replaceAllUsesWith(newElt);
  1277. }
  1278. Elt->eraseFromParent();
  1279. }
  1280. CI->eraseFromParent();
  1281. } else {
  1282. DXASSERT(false, "function can only be used with call instructions.");
  1283. }
  1284. }
  1285. }
  1286. void DxilTranslateRawBuffer::ReplaceRawBufferStore(Function *F,
  1287. Module &M) {
  1288. OP *op = M.GetDxilModule().GetOP();
  1289. DXASSERT(F->getReturnType()->isVoidTy(), "rawBufferStore should return a void type.");
  1290. Type *ETy = F->getFunctionType()->getParamType(4); // value
  1291. Function *newFunction = op->GetOpFunc(hlsl::DXIL::OpCode::BufferStore, ETy);
  1292. for (auto U = F->user_begin(), E = F->user_end(); U != E;) {
  1293. User *user = *(U++);
  1294. if (CallInst *CI = dyn_cast<CallInst>(user)) {
  1295. IRBuilder<> Builder(CI);
  1296. SmallVector<Value *, 4> args;
  1297. args.emplace_back(op->GetI32Const((unsigned)DXIL::OpCode::BufferStore));
  1298. for (unsigned i = 1; i < 9; ++i) {
  1299. args.emplace_back(CI->getArgOperand(i));
  1300. }
  1301. Builder.CreateCall(newFunction, args);
  1302. CI->eraseFromParent();
  1303. }
  1304. else {
  1305. DXASSERT(false, "function can only be used with call instructions.");
  1306. }
  1307. }
  1308. }
  1309. void DxilTranslateRawBuffer::ReplaceRawBufferStore64Bit(Function *F, Type *ETy,
  1310. Module &M) {
  1311. OP *hlslOP = M.GetDxilModule().GetOP();
  1312. Function *newFunction = hlslOP->GetOpFunc(hlsl::DXIL::OpCode::RawBufferStore,
  1313. Type::getInt32Ty(M.getContext()));
  1314. for (auto U = F->user_begin(), E = F->user_end(); U != E;) {
  1315. User *user = *(U++);
  1316. if (CallInst *CI = dyn_cast<CallInst>(user)) {
  1317. IRBuilder<> Builder(CI);
  1318. SmallVector<Value *, 4> args(CI->arg_operands());
  1319. Value *vals[4] = {
  1320. CI->getArgOperand(DXIL::OperandIndex::kRawBufferStoreVal0OpIdx),
  1321. CI->getArgOperand(DXIL::OperandIndex::kRawBufferStoreVal1OpIdx),
  1322. CI->getArgOperand(DXIL::OperandIndex::kRawBufferStoreVal2OpIdx),
  1323. CI->getArgOperand(DXIL::OperandIndex::kRawBufferStoreVal3OpIdx)};
  1324. ConstantInt *cMask = cast<ConstantInt>(
  1325. CI->getArgOperand(DXIL::OperandIndex::kRawBufferStoreMaskOpIdx));
  1326. Value *undefI32 = UndefValue::get(Builder.getInt32Ty());
  1327. Value *vals32[8] = {undefI32, undefI32, undefI32, undefI32,
  1328. undefI32, undefI32, undefI32, undefI32};
  1329. unsigned maskLo = 0;
  1330. unsigned maskHi = 0;
  1331. unsigned size = 0;
  1332. unsigned mask = cMask->getLimitedValue();
  1333. switch (mask) {
  1334. case 1:
  1335. maskLo = 3;
  1336. size = 1;
  1337. break;
  1338. case 3:
  1339. maskLo = 15;
  1340. size = 2;
  1341. break;
  1342. case 7:
  1343. maskLo = 15;
  1344. maskHi = 3;
  1345. size = 3;
  1346. break;
  1347. case 15:
  1348. maskLo = 15;
  1349. maskHi = 15;
  1350. size = 4;
  1351. break;
  1352. default:
  1353. DXASSERT(0, "invalid mask");
  1354. }
  1355. Split64bitValForStore(ETy, vals, size, vals32, hlslOP, Builder);
  1356. args[DXIL::OperandIndex::kRawBufferStoreMaskOpIdx] =
  1357. Builder.getInt8(maskLo);
  1358. args[DXIL::OperandIndex::kRawBufferStoreVal0OpIdx] = vals32[0];
  1359. args[DXIL::OperandIndex::kRawBufferStoreVal1OpIdx] = vals32[1];
  1360. args[DXIL::OperandIndex::kRawBufferStoreVal2OpIdx] = vals32[2];
  1361. args[DXIL::OperandIndex::kRawBufferStoreVal3OpIdx] = vals32[3];
  1362. Builder.CreateCall(newFunction, args);
  1363. if (maskHi) {
  1364. Value *offset = args[DXIL::OperandIndex::kBufferStoreCoord1OpIdx];
  1365. // Update offset 4 by 4 bytes.
  1366. offset = Builder.CreateAdd(offset, Builder.getInt32(4 * 4));
  1367. args[DXIL::OperandIndex::kRawBufferStoreElementOffsetOpIdx] = offset;
  1368. args[DXIL::OperandIndex::kRawBufferStoreMaskOpIdx] =
  1369. Builder.getInt8(maskHi);
  1370. args[DXIL::OperandIndex::kRawBufferStoreVal0OpIdx] = vals32[4];
  1371. args[DXIL::OperandIndex::kRawBufferStoreVal1OpIdx] = vals32[5];
  1372. args[DXIL::OperandIndex::kRawBufferStoreVal2OpIdx] = vals32[6];
  1373. args[DXIL::OperandIndex::kRawBufferStoreVal3OpIdx] = vals32[7];
  1374. Builder.CreateCall(newFunction, args);
  1375. }
  1376. CI->eraseFromParent();
  1377. } else {
  1378. DXASSERT(false, "function can only be used with call instructions.");
  1379. }
  1380. }
  1381. }
  1382. void DxilTranslateRawBuffer::ReplaceMinPrecisionRawBufferLoad(Function *F,
  1383. Module &M) {
  1384. OP *Op = M.GetDxilModule().GetOP();
  1385. Type *RetTy = F->getReturnType();
  1386. if (StructType *STy = dyn_cast<StructType>(RetTy)) {
  1387. Type *EltTy = STy->getElementType(0);
  1388. if (EltTy->isHalfTy()) {
  1389. ReplaceMinPrecisionRawBufferLoadByType(F, Type::getHalfTy(M.getContext()),
  1390. Type::getFloatTy(M.getContext()),
  1391. Op, M.getDataLayout());
  1392. } else if (EltTy == Type::getInt16Ty(M.getContext())) {
  1393. ReplaceMinPrecisionRawBufferLoadByType(
  1394. F, Type::getInt16Ty(M.getContext()), Type::getInt32Ty(M.getContext()),
  1395. Op, M.getDataLayout());
  1396. }
  1397. } else {
  1398. DXASSERT(false, "RawBufferLoad should return struct type.");
  1399. }
  1400. }
  1401. void DxilTranslateRawBuffer::ReplaceMinPrecisionRawBufferStore(Function *F,
  1402. Module &M) {
  1403. DXASSERT(F->getReturnType()->isVoidTy(), "rawBufferStore should return a void type.");
  1404. Type *ETy = F->getFunctionType()->getParamType(4); // value
  1405. Type *NewETy;
  1406. if (ETy->isHalfTy()) {
  1407. NewETy = Type::getFloatTy(M.getContext());
  1408. }
  1409. else if (ETy == Type::getInt16Ty(M.getContext())) {
  1410. NewETy = Type::getInt32Ty(M.getContext());
  1411. }
  1412. else {
  1413. return; // not a min precision type
  1414. }
  1415. Function *newFunction = M.GetDxilModule().GetOP()->GetOpFunc(
  1416. DXIL::OpCode::RawBufferStore, NewETy);
  1417. // for each function
  1418. // add argument 4-7 to its upconverted values
  1419. // replace function call
  1420. for (auto FuncUser = F->user_begin(), FuncEnd = F->user_end(); FuncUser != FuncEnd;) {
  1421. CallInst *CI = dyn_cast<CallInst>(*(FuncUser++));
  1422. DXASSERT(CI, "function user must be a call instruction.");
  1423. IRBuilder<> CIBuilder(CI);
  1424. SmallVector<Value *, 9> Args;
  1425. for (unsigned i = 0; i < 4; ++i) {
  1426. Args.emplace_back(CI->getArgOperand(i));
  1427. }
  1428. // values to store should be converted to its higher precision types
  1429. if (ETy->isHalfTy()) {
  1430. for (unsigned i = 4; i < 8; ++i) {
  1431. Value *NewV = CIBuilder.CreateFPExt(CI->getArgOperand(i),
  1432. Type::getFloatTy(M.getContext()));
  1433. Args.emplace_back(NewV);
  1434. }
  1435. }
  1436. else if (ETy == Type::getInt16Ty(M.getContext())) {
  1437. // This case only applies to typed buffer since Store operation of byte
  1438. // address buffer for min precision is handled by implicit conversion on
  1439. // intrinsic call. Since we are extending integer, we have to know if we
  1440. // should sign ext or zero ext. We can do this by iterating checking the
  1441. // size of the element at struct type and comp type at type annotation
  1442. CallInst *handleCI = dyn_cast<CallInst>(CI->getArgOperand(1));
  1443. DXASSERT(handleCI, "otherwise handle was not an argument to buffer store.");
  1444. ConstantInt *resClass = dyn_cast<ConstantInt>(handleCI->getArgOperand(1));
  1445. DXASSERT_LOCALVAR(resClass, resClass && resClass->getSExtValue() ==
  1446. (unsigned)DXIL::ResourceClass::UAV,
  1447. "otherwise buffer store called on non uav kind.");
  1448. ConstantInt *rangeID = dyn_cast<ConstantInt>(handleCI->getArgOperand(2)); // range id or idx?
  1449. DXASSERT(rangeID, "wrong createHandle call.");
  1450. DxilResource dxilRes = M.GetDxilModule().GetUAV(rangeID->getSExtValue());
  1451. StructType *STy = dyn_cast<StructType>(dxilRes.GetRetType());
  1452. DxilStructAnnotation *SAnnot = M.GetDxilModule().GetTypeSystem().GetStructAnnotation(STy);
  1453. ConstantInt *offsetInt = dyn_cast<ConstantInt>(CI->getArgOperand(3));
  1454. unsigned offset = offsetInt->getSExtValue();
  1455. unsigned currentOffset = 0;
  1456. for (DxilStructTypeIterator iter = begin(STy, SAnnot), ItEnd = end(STy, SAnnot); iter != ItEnd; ++iter) {
  1457. std::pair<Type *, DxilFieldAnnotation*> pair = *iter;
  1458. currentOffset += M.getDataLayout().getTypeAllocSize(pair.first);
  1459. if (currentOffset > offset) {
  1460. if (pair.second->GetCompType().IsUIntTy()) {
  1461. for (unsigned i = 4; i < 8; ++i) {
  1462. Value *NewV = CIBuilder.CreateZExt(CI->getArgOperand(i), Type::getInt32Ty(M.getContext()));
  1463. Args.emplace_back(NewV);
  1464. }
  1465. break;
  1466. }
  1467. else if (pair.second->GetCompType().IsIntTy()) {
  1468. for (unsigned i = 4; i < 8; ++i) {
  1469. Value *NewV = CIBuilder.CreateSExt(CI->getArgOperand(i), Type::getInt32Ty(M.getContext()));
  1470. Args.emplace_back(NewV);
  1471. }
  1472. break;
  1473. }
  1474. else {
  1475. DXASSERT(false, "Invalid comp type");
  1476. }
  1477. }
  1478. }
  1479. }
  1480. // mask
  1481. Args.emplace_back(CI->getArgOperand(8));
  1482. // alignment
  1483. Args.emplace_back(M.GetDxilModule().GetOP()->GetI32Const(
  1484. M.getDataLayout().getTypeAllocSize(NewETy)));
  1485. CIBuilder.CreateCall(newFunction, Args);
  1486. CI->eraseFromParent();
  1487. }
  1488. }
  1489. void DxilTranslateRawBuffer::ReplaceMinPrecisionRawBufferLoadByType(
  1490. Function *F, Type *FromTy, Type *ToTy, OP *Op, const DataLayout &DL) {
  1491. Function *newFunction = Op->GetOpFunc(DXIL::OpCode::RawBufferLoad, ToTy);
  1492. for (auto FUser = F->user_begin(), FEnd = F->user_end(); FUser != FEnd;) {
  1493. User *UserCI = *(FUser++);
  1494. if (CallInst *CI = dyn_cast<CallInst>(UserCI)) {
  1495. IRBuilder<> CIBuilder(CI);
  1496. SmallVector<Value *, 5> newFuncArgs;
  1497. // opcode, handle, index, elementOffset, mask
  1498. // Compiler is generating correct element offset even for min precision types
  1499. // So no need to recalculate here
  1500. for (unsigned i = 0; i < 5; ++i) {
  1501. newFuncArgs.emplace_back(CI->getArgOperand(i));
  1502. }
  1503. // new alignment for new type
  1504. newFuncArgs.emplace_back(Op->GetI32Const(DL.getTypeAllocSize(ToTy)));
  1505. CallInst *newCI = CIBuilder.CreateCall(newFunction, newFuncArgs);
  1506. for (auto CIUser = CI->user_begin(), CIEnd = CI->user_end();
  1507. CIUser != CIEnd;) {
  1508. User *UserEV = *(CIUser++);
  1509. if (ExtractValueInst *EV = dyn_cast<ExtractValueInst>(UserEV)) {
  1510. IRBuilder<> EVBuilder(EV);
  1511. ArrayRef<unsigned> Indices = EV->getIndices();
  1512. DXASSERT(Indices.size() == 1, "Otherwise we have wrong extract value.");
  1513. Value *newEV = EVBuilder.CreateExtractValue(newCI, Indices);
  1514. Value *newTruncV = nullptr;
  1515. if (4 == Indices[0]) { // Don't truncate status
  1516. newTruncV = newEV;
  1517. }
  1518. else if (FromTy->isHalfTy()) {
  1519. newTruncV = EVBuilder.CreateFPTrunc(newEV, FromTy);
  1520. } else if (FromTy->isIntegerTy()) {
  1521. newTruncV = EVBuilder.CreateTrunc(newEV, FromTy);
  1522. } else {
  1523. DXASSERT(false, "unexpected type conversion");
  1524. }
  1525. EV->replaceAllUsesWith(newTruncV);
  1526. EV->eraseFromParent();
  1527. }
  1528. }
  1529. CI->eraseFromParent();
  1530. }
  1531. }
  1532. F->eraseFromParent();
  1533. }
  1534. char DxilTranslateRawBuffer::ID = 0;
  1535. ModulePass *llvm::createDxilTranslateRawBuffer() {
  1536. return new DxilTranslateRawBuffer();
  1537. }
  1538. INITIALIZE_PASS(DxilTranslateRawBuffer, "hlsl-translate-dxil-raw-buffer",
  1539. "Translate raw buffer load", false, false)