DxilGenerationPass.cpp 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889
  1. ///////////////////////////////////////////////////////////////////////////////
  2. // //
  3. // DxilGenerationPass.cpp //
  4. // Copyright (C) Microsoft Corporation. All rights reserved. //
  5. // This file is distributed under the University of Illinois Open Source //
  6. // License. See LICENSE.TXT for details. //
  7. // //
  8. // DxilGenerationPass implementation. //
  9. // //
  10. ///////////////////////////////////////////////////////////////////////////////
  11. #include "HLSignatureLower.h"
  12. #include "dxc/DXIL/DxilEntryProps.h"
  13. #include "dxc/DXIL/DxilModule.h"
  14. #include "dxc/DXIL/DxilOperations.h"
  15. #include "dxc/DXIL/DxilInstructions.h"
  16. #include "dxc/DXIL/DxilUtil.h"
  17. #include "dxc/HLSL/DxilGenerationPass.h"
  18. #include "dxc/HLSL/HLSLExtensionsCodegenHelper.h"
  19. #include "dxc/HLSL/HLModule.h"
  20. #include "dxc/HLSL/HLOperationLower.h"
  21. #include "dxc/HLSL/HLOperations.h"
  22. #include "dxc/Support/Global.h"
  23. #include "llvm/Pass.h"
  24. #include "llvm/ADT/STLExtras.h"
  25. #include "llvm/Analysis/AssumptionCache.h"
  26. #include "llvm/IR/DebugInfo.h"
  27. #include "llvm/IR/DebugInfoMetadata.h"
  28. #include "llvm/IR/Function.h"
  29. #include "llvm/IR/Instruction.h"
  30. #include "llvm/IR/Instructions.h"
  31. #include "llvm/IR/IRBuilder.h"
  32. #include "llvm/IR/Operator.h"
  33. #include "llvm/IR/Module.h"
  34. #include "llvm/Support/Casting.h"
  35. #include "llvm/Transforms/Utils/SSAUpdater.h"
  36. #include <unordered_map>
  37. #include <unordered_set>
  38. #include <vector>
  39. using namespace llvm;
  40. using namespace hlsl;
  41. // TODO: use hlsl namespace for the most of this file.
  42. namespace {
  43. void SimplifyGlobalSymbol(GlobalVariable *GV) {
  44. Type *Ty = GV->getType()->getElementType();
  45. if (!Ty->isArrayTy()) {
  46. // Make sure only 1 load of GV in each function.
  47. std::unordered_map<Function *, Instruction *> handleMapOnFunction;
  48. for (User *U : GV->users()) {
  49. if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
  50. Function *F = LI->getParent()->getParent();
  51. auto it = handleMapOnFunction.find(F);
  52. if (it == handleMapOnFunction.end()) {
  53. LI->moveBefore(dxilutil::FindAllocaInsertionPt(F));
  54. handleMapOnFunction[F] = LI;
  55. } else {
  56. LI->replaceAllUsesWith(it->second);
  57. }
  58. }
  59. }
  60. }
  61. }
  62. void InitResourceBase(const DxilResourceBase *pSource,
  63. DxilResourceBase *pDest) {
  64. DXASSERT_NOMSG(pSource->GetClass() == pDest->GetClass());
  65. pDest->SetKind(pSource->GetKind());
  66. pDest->SetID(pSource->GetID());
  67. pDest->SetSpaceID(pSource->GetSpaceID());
  68. pDest->SetLowerBound(pSource->GetLowerBound());
  69. pDest->SetRangeSize(pSource->GetRangeSize());
  70. pDest->SetGlobalSymbol(pSource->GetGlobalSymbol());
  71. pDest->SetGlobalName(pSource->GetGlobalName());
  72. pDest->SetHandle(pSource->GetHandle());
  73. pDest->SetHLSLType(pSource->GetHLSLType());
  74. if (GlobalVariable *GV = dyn_cast<GlobalVariable>(pSource->GetGlobalSymbol()))
  75. SimplifyGlobalSymbol(GV);
  76. }
  77. void InitResource(const DxilResource *pSource, DxilResource *pDest) {
  78. pDest->SetCompType(pSource->GetCompType());
  79. pDest->SetSamplerFeedbackType(pSource->GetSamplerFeedbackType());
  80. pDest->SetSampleCount(pSource->GetSampleCount());
  81. pDest->SetElementStride(pSource->GetElementStride());
  82. pDest->SetGloballyCoherent(pSource->IsGloballyCoherent());
  83. pDest->SetHasCounter(pSource->HasCounter());
  84. pDest->SetRW(pSource->IsRW());
  85. pDest->SetROV(pSource->IsROV());
  86. InitResourceBase(pSource, pDest);
  87. }
  88. void InitDxilModuleFromHLModule(HLModule &H, DxilModule &M, bool HasDebugInfo) {
  89. // Subsystems.
  90. unsigned ValMajor, ValMinor;
  91. H.GetValidatorVersion(ValMajor, ValMinor);
  92. M.SetValidatorVersion(ValMajor, ValMinor);
  93. M.SetShaderModel(H.GetShaderModel(), H.GetHLOptions().bUseMinPrecision);
  94. M.SetForceZeroStoreLifetimes(H.GetHLOptions().bForceZeroStoreLifetimes);
  95. // Entry function.
  96. if (!M.GetShaderModel()->IsLib()) {
  97. Function *EntryFn = H.GetEntryFunction();
  98. M.SetEntryFunction(EntryFn);
  99. M.SetEntryFunctionName(H.GetEntryFunctionName());
  100. }
  101. std::vector<GlobalVariable* > &LLVMUsed = M.GetLLVMUsed();
  102. // Resources
  103. for (auto && C : H.GetCBuffers()) {
  104. auto b = llvm::make_unique<DxilCBuffer>();
  105. InitResourceBase(C.get(), b.get());
  106. b->SetSize(C->GetSize());
  107. if (GlobalVariable *GV = dyn_cast<GlobalVariable>(b->GetGlobalSymbol()))
  108. LLVMUsed.emplace_back(GV);
  109. M.AddCBuffer(std::move(b));
  110. }
  111. for (auto && C : H.GetUAVs()) {
  112. auto b = llvm::make_unique<DxilResource>();
  113. InitResource(C.get(), b.get());
  114. if (GlobalVariable *GV = dyn_cast<GlobalVariable>(b->GetGlobalSymbol()))
  115. LLVMUsed.emplace_back(GV);
  116. M.AddUAV(std::move(b));
  117. }
  118. for (auto && C : H.GetSRVs()) {
  119. auto b = llvm::make_unique<DxilResource>();
  120. InitResource(C.get(), b.get());
  121. if (GlobalVariable *GV = dyn_cast<GlobalVariable>(b->GetGlobalSymbol()))
  122. LLVMUsed.emplace_back(GV);
  123. M.AddSRV(std::move(b));
  124. }
  125. for (auto && C : H.GetSamplers()) {
  126. auto b = llvm::make_unique<DxilSampler>();
  127. InitResourceBase(C.get(), b.get());
  128. b->SetSamplerKind(C->GetSamplerKind());
  129. if (GlobalVariable *GV = dyn_cast<GlobalVariable>(b->GetGlobalSymbol()))
  130. LLVMUsed.emplace_back(GV);
  131. M.AddSampler(std::move(b));
  132. }
  133. // Signatures.
  134. M.ResetSerializedRootSignature(H.GetSerializedRootSignature());
  135. // Subobjects.
  136. M.ResetSubobjects(H.ReleaseSubobjects());
  137. // Shader properties.
  138. //bool m_bDisableOptimizations;
  139. M.SetDisableOptimization(H.GetHLOptions().bDisableOptimizations);
  140. M.SetLegacyResourceReservation(H.GetHLOptions().bLegacyResourceReservation);
  141. //bool m_bDisableMathRefactoring;
  142. //bool m_bEnableDoublePrecision;
  143. //bool m_bEnableDoubleExtensions;
  144. //M.CollectShaderFlags();
  145. //bool m_bForceEarlyDepthStencil;
  146. //bool m_bEnableRawAndStructuredBuffers;
  147. //bool m_bEnableMSAD;
  148. //M.m_ShaderFlags.SetAllResourcesBound(H.GetHLOptions().bAllResourcesBound);
  149. // DXIL type system.
  150. M.ResetTypeSystem(H.ReleaseTypeSystem());
  151. // Dxil OP.
  152. M.ResetOP(H.ReleaseOP());
  153. // Keep llvm used.
  154. M.EmitLLVMUsed();
  155. M.SetAllResourcesBound(H.GetHLOptions().bAllResourcesBound);
  156. M.SetAutoBindingSpace(H.GetAutoBindingSpace());
  157. // Update Validator Version
  158. M.UpgradeToMinValidatorVersion();
  159. }
  160. class DxilGenerationPass : public ModulePass {
  161. HLModule *m_pHLModule;
  162. bool m_HasDbgInfo;
  163. HLSLExtensionsCodegenHelper *m_extensionsCodegenHelper;
  164. public:
  165. static char ID; // Pass identification, replacement for typeid
  166. explicit DxilGenerationPass(bool NoOpt = false)
  167. : ModulePass(ID), m_pHLModule(nullptr), m_extensionsCodegenHelper(nullptr), NotOptimized(NoOpt) {}
  168. const char *getPassName() const override { return "DXIL Generator"; }
  169. void SetExtensionsHelper(HLSLExtensionsCodegenHelper *helper) {
  170. m_extensionsCodegenHelper = helper;
  171. }
  172. bool runOnModule(Module &M) override {
  173. m_pHLModule = &M.GetOrCreateHLModule();
  174. const ShaderModel *SM = m_pHLModule->GetShaderModel();
  175. // Load up debug information, to cross-reference values and the instructions
  176. // used to load them.
  177. m_HasDbgInfo = hasDebugInfo(M);
  178. // EntrySig for shader functions.
  179. DxilEntryPropsMap EntryPropsMap;
  180. if (!SM->IsLib()) {
  181. Function *EntryFn = m_pHLModule->GetEntryFunction();
  182. if (!m_pHLModule->HasDxilFunctionProps(EntryFn)) {
  183. dxilutil::EmitErrorOnFunction(M.getContext(), EntryFn, "Entry function don't have property.");
  184. return false;
  185. }
  186. DxilFunctionProps &props = m_pHLModule->GetDxilFunctionProps(EntryFn);
  187. std::unique_ptr<DxilEntryProps> pProps =
  188. llvm::make_unique<DxilEntryProps>(
  189. props, m_pHLModule->GetHLOptions().bUseMinPrecision);
  190. HLSignatureLower sigLower(m_pHLModule->GetEntryFunction(), *m_pHLModule,
  191. pProps->sig);
  192. sigLower.Run();
  193. EntryPropsMap[EntryFn] = std::move(pProps);
  194. } else {
  195. for (auto It = M.begin(); It != M.end();) {
  196. Function &F = *(It++);
  197. // Lower signature for each graphics or compute entry function.
  198. if (m_pHLModule->HasDxilFunctionProps(&F)) {
  199. DxilFunctionProps &props = m_pHLModule->GetDxilFunctionProps(&F);
  200. std::unique_ptr<DxilEntryProps> pProps =
  201. llvm::make_unique<DxilEntryProps>(
  202. props, m_pHLModule->GetHLOptions().bUseMinPrecision);
  203. if (m_pHLModule->IsGraphicsShader(&F) ||
  204. m_pHLModule->IsComputeShader(&F)) {
  205. HLSignatureLower sigLower(&F, *m_pHLModule, pProps->sig);
  206. // TODO: BUG: This will lower patch constant function sigs twice if
  207. // used by two hull shaders!
  208. sigLower.Run();
  209. }
  210. EntryPropsMap[&F] = std::move(pProps);
  211. }
  212. }
  213. }
  214. std::unordered_set<LoadInst *> UpdateCounterSet;
  215. GenerateDxilOperations(M, UpdateCounterSet);
  216. GenerateDxilCBufferHandles();
  217. MarkUpdateCounter(UpdateCounterSet);
  218. std::unordered_map<CallInst *, Type*> HandleToResTypeMap;
  219. LowerHLCreateHandle(HandleToResTypeMap);
  220. // LowerHLCreateHandle() should have translated HLCreateHandle to CreateHandleForLib.
  221. // Clean up HLCreateHandle functions.
  222. for (auto It = M.begin(); It != M.end();) {
  223. Function &F = *(It++);
  224. if (!F.isDeclaration()) {
  225. if (hlsl::GetHLOpcodeGroupByName(&F) ==
  226. HLOpcodeGroup::HLCreateHandle) {
  227. if (F.user_empty()) {
  228. F.eraseFromParent();
  229. } else {
  230. dxilutil::EmitErrorOnFunction(M.getContext(), &F, "Fail to lower createHandle.");
  231. }
  232. }
  233. }
  234. }
  235. // Translate precise on allocas into function call to keep the information after mem2reg.
  236. // The function calls will be removed after propagate precise attribute.
  237. TranslatePreciseAttribute();
  238. // High-level metadata should now be turned into low-level metadata.
  239. const bool SkipInit = true;
  240. hlsl::DxilModule &DxilMod = M.GetOrCreateDxilModule(SkipInit);
  241. DxilFunctionProps *pProps = nullptr;
  242. if (!SM->IsLib()) {
  243. pProps = &EntryPropsMap.begin()->second->props;
  244. }
  245. InitDxilModuleFromHLModule(*m_pHLModule, DxilMod, m_HasDbgInfo);
  246. DxilMod.ResetEntryPropsMap(std::move(EntryPropsMap));
  247. if (!SM->IsLib()) {
  248. DxilMod.SetShaderProperties(pProps);
  249. }
  250. HLModule::ClearHLMetadata(M);
  251. M.ResetHLModule();
  252. if (SM->IsSM62Plus() && DxilMod.GetUseMinPrecision()) {
  253. TranslateMinPrecisionRawBuffer(DxilMod, HandleToResTypeMap);
  254. }
  255. // We now have a DXIL representation - record this.
  256. SetPauseResumePasses(M, "hlsl-dxilemit", "hlsl-dxilload");
  257. (void)NotOptimized; // Dummy out unused member to silence warnings
  258. return true;
  259. }
  260. private:
  261. void MarkUpdateCounter(std::unordered_set<LoadInst *> &UpdateCounterSet);
  262. // Generate DXIL cbuffer handles.
  263. void
  264. GenerateDxilCBufferHandles();
  265. // change built-in funtion into DXIL operations
  266. void GenerateDxilOperations(Module &M,
  267. std::unordered_set<LoadInst *> &UpdateCounterSet);
  268. void LowerHLCreateHandle(
  269. std::unordered_map<CallInst *, Type *> &HandleToResTypeMap);
  270. // Translate precise attribute into HL function call.
  271. void TranslatePreciseAttribute();
  272. // Translate RawBufferLoad/RawBufferStore
  273. // For DXIL >= 1.2, if min precision is enabled, currently generation pass is
  274. // producing i16/f16 return type for min precisions. For rawBuffer, we will
  275. // change this so that min precisions are returning its actual scalar type
  276. // (i32/f32) and will be truncated to their corresponding types after loading
  277. // / before storing.
  278. void TranslateMinPrecisionRawBuffer(
  279. DxilModule &DM,
  280. std::unordered_map<CallInst *, Type *> &HandleToResTypeMap);
  281. // Input module is not optimized.
  282. bool NotOptimized;
  283. };
  284. }
  285. namespace {
  286. void TranslateHLCreateHandle(Function *F, hlsl::OP &hlslOP) {
  287. Value *opArg = hlslOP.GetU32Const((unsigned)DXIL::OpCode::CreateHandleForLib);
  288. for (auto U = F->user_begin(); U != F->user_end();) {
  289. Value *user = *(U++);
  290. if (!isa<Instruction>(user))
  291. continue;
  292. // must be call inst
  293. CallInst *CI = cast<CallInst>(user);
  294. Value *res = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  295. Value *newHandle = nullptr;
  296. IRBuilder<> Builder(CI);
  297. // Res could be ld/phi/select. Will be removed in
  298. // DxilLowerCreateHandleForLib.
  299. Function *createHandle =
  300. hlslOP.GetOpFunc(DXIL::OpCode::CreateHandleForLib, res->getType());
  301. newHandle = Builder.CreateCall(createHandle, {opArg, res});
  302. CI->replaceAllUsesWith(newHandle);
  303. if (res->user_empty()) {
  304. if (Instruction *I = dyn_cast<Instruction>(res))
  305. I->eraseFromParent();
  306. }
  307. CI->eraseFromParent();
  308. }
  309. }
  310. void TranslateHLAnnotateHandle(
  311. Function *F, hlsl::OP &hlslOP,
  312. std::unordered_map<CallInst *, Type *> &HandleToResTypeMap) {
  313. Value *opArg = hlslOP.GetU32Const((unsigned)DXIL::OpCode::AnnotateHandle);
  314. for (auto U = F->user_begin(); U != F->user_end();) {
  315. Value *user = *(U++);
  316. if (!isa<Instruction>(user))
  317. continue;
  318. // must be call inst
  319. CallInst *CI = cast<CallInst>(user);
  320. Value *handle =
  321. CI->getArgOperand(HLOperandIndex::kAnnotateHandleHandleOpIdx);
  322. Value *RP = CI->getArgOperand(
  323. HLOperandIndex::kAnnotateHandleResourcePropertiesOpIdx);
  324. Type *ResTy =
  325. CI->getArgOperand(HLOperandIndex::kAnnotateHandleResourceTypeOpIdx)
  326. ->getType();
  327. IRBuilder<> Builder(CI);
  328. // put annotateHandle near the Handle it annotated.
  329. if (Instruction *I = dyn_cast<Instruction>(handle)) {
  330. if (isa<PHINode>(I)) {
  331. Builder.SetInsertPoint(I->getParent()->getFirstInsertionPt());
  332. } else {
  333. Builder.SetInsertPoint(I->getNextNode());
  334. }
  335. } else if (Argument *Arg = dyn_cast<Argument>(handle)) {
  336. Builder.SetInsertPoint(Arg->getParent()->getEntryBlock().getFirstInsertionPt());
  337. }
  338. Function *annotateHandle =
  339. hlslOP.GetOpFunc(DXIL::OpCode::AnnotateHandle, Builder.getVoidTy());
  340. CallInst *newHandle =
  341. Builder.CreateCall(annotateHandle, {opArg, handle, RP});
  342. HandleToResTypeMap[newHandle] = ResTy;
  343. CI->replaceAllUsesWith(newHandle);
  344. CI->eraseFromParent();
  345. }
  346. }
  347. void TranslateHLCastHandleToRes(Function *F, hlsl::OP &hlslOP) {
  348. for (auto U = F->user_begin(); U != F->user_end();) {
  349. Value *User = *(U++);
  350. if (!isa<Instruction>(User))
  351. continue;
  352. // must be call inst
  353. CallInst *CI = cast<CallInst>(User);
  354. IRBuilder<> Builder(CI);
  355. HLCastOpcode opcode = static_cast<HLCastOpcode>(hlsl::GetHLOpcode(CI));
  356. switch (opcode) {
  357. case HLCastOpcode::HandleToResCast: {
  358. Value *Handle = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  359. for (auto HandleU = CI->user_begin(); HandleU != CI->user_end();) {
  360. Value *HandleUser = *(HandleU++);
  361. CallInst *HandleCI = dyn_cast<CallInst>(HandleUser);
  362. if (!HandleCI)
  363. continue;
  364. hlsl::HLOpcodeGroup handleGroup =
  365. hlsl::GetHLOpcodeGroup(HandleCI->getCalledFunction());
  366. if (handleGroup == HLOpcodeGroup::HLCreateHandle) {
  367. HandleCI->replaceAllUsesWith(Handle);
  368. HandleCI->eraseFromParent();
  369. }
  370. }
  371. if (CI->user_empty()) {
  372. CI->eraseFromParent();
  373. }
  374. } break;
  375. }
  376. }
  377. }
  378. } // namespace
  379. void DxilGenerationPass::LowerHLCreateHandle(
  380. std::unordered_map<CallInst *, Type *> &HandleToResTypeMap) {
  381. Module *M = m_pHLModule->GetModule();
  382. hlsl::OP &hlslOP = *m_pHLModule->GetOP();
  383. // Lower cast handle to res used by hl.createhandle.
  384. for (iplist<Function>::iterator F : M->getFunctionList()) {
  385. if (F->user_empty())
  386. continue;
  387. hlsl::HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(F);
  388. if (group == HLOpcodeGroup::HLCast) {
  389. TranslateHLCastHandleToRes(F, hlslOP);
  390. }
  391. }
  392. // generate dxil operation
  393. for (iplist<Function>::iterator F : M->getFunctionList()) {
  394. if (F->user_empty())
  395. continue;
  396. hlsl::HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(F);
  397. switch (group) {
  398. default:
  399. break;
  400. case HLOpcodeGroup::HLCreateHandle:
  401. TranslateHLCreateHandle(F, hlslOP);
  402. break;
  403. case HLOpcodeGroup::HLAnnotateHandle:
  404. TranslateHLAnnotateHandle(F, hlslOP, HandleToResTypeMap);
  405. break;
  406. }
  407. }
  408. }
  409. static void
  410. MarkUavUpdateCounter(Value* LoadOrGEP,
  411. DxilResource &res,
  412. std::unordered_set<LoadInst *> &UpdateCounterSet) {
  413. if (LoadInst *ldInst = dyn_cast<LoadInst>(LoadOrGEP)) {
  414. if (UpdateCounterSet.count(ldInst)) {
  415. DXASSERT_NOMSG(res.GetClass() == DXIL::ResourceClass::UAV);
  416. res.SetHasCounter(true);
  417. }
  418. } else {
  419. DXASSERT(dyn_cast<GEPOperator>(LoadOrGEP) != nullptr,
  420. "else AddOpcodeParamForIntrinsic in CodeGen did not patch uses "
  421. "to only have ld/st refer to temp object");
  422. GEPOperator *GEP = cast<GEPOperator>(LoadOrGEP);
  423. for (auto GEPU : GEP->users()) {
  424. MarkUavUpdateCounter(GEPU, res, UpdateCounterSet);
  425. }
  426. }
  427. }
  428. static void
  429. MarkUavUpdateCounter(DxilResource &res,
  430. std::unordered_set<LoadInst *> &UpdateCounterSet) {
  431. Value *V = res.GetGlobalSymbol();
  432. for (auto U = V->user_begin(), E = V->user_end(); U != E;) {
  433. User *user = *(U++);
  434. // Skip unused user.
  435. if (user->user_empty())
  436. continue;
  437. MarkUavUpdateCounter(user, res, UpdateCounterSet);
  438. }
  439. }
  440. void DxilGenerationPass::MarkUpdateCounter(
  441. std::unordered_set<LoadInst *> &UpdateCounterSet) {
  442. for (size_t i = 0; i < m_pHLModule->GetUAVs().size(); i++) {
  443. HLResource &UAV = m_pHLModule->GetUAV(i);
  444. MarkUavUpdateCounter(UAV, UpdateCounterSet);
  445. }
  446. }
  447. void DxilGenerationPass::GenerateDxilCBufferHandles() {
  448. // For CBuffer, handle are mapped to HLCreateHandle.
  449. OP *hlslOP = m_pHLModule->GetOP();
  450. Value *opArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CreateHandleForLib);
  451. LLVMContext &Ctx = hlslOP->GetCtx();
  452. Value *zeroIdx = hlslOP->GetU32Const(0);
  453. for (size_t i = 0; i < m_pHLModule->GetCBuffers().size(); i++) {
  454. DxilCBuffer &CB = m_pHLModule->GetCBuffer(i);
  455. GlobalVariable *GV = dyn_cast<GlobalVariable>(CB.GetGlobalSymbol());
  456. if (GV == nullptr)
  457. continue;
  458. // Remove GEP created in HLObjectOperationLowerHelper::UniformCbPtr.
  459. GV->removeDeadConstantUsers();
  460. std::string handleName = std::string(GV->getName());
  461. DIVariable *DIV = nullptr;
  462. DILocation *DL = nullptr;
  463. if (m_HasDbgInfo) {
  464. DebugInfoFinder &Finder = m_pHLModule->GetOrCreateDebugInfoFinder();
  465. DIV = dxilutil::FindGlobalVariableDebugInfo(GV, Finder);
  466. if (DIV)
  467. // TODO: how to get col?
  468. DL = DILocation::get(Ctx, DIV->getLine(), 1,
  469. DIV->getScope());
  470. }
  471. if (CB.GetRangeSize() == 1 &&
  472. !GV->getType()->getElementType()->isArrayTy()) {
  473. Function *createHandle =
  474. hlslOP->GetOpFunc(OP::OpCode::CreateHandleForLib,
  475. GV->getType()->getElementType());
  476. for (auto U = GV->user_begin(); U != GV->user_end(); ) {
  477. // Must HLCreateHandle.
  478. CallInst *CI = cast<CallInst>(*(U++));
  479. // Put createHandle to entry block.
  480. IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(CI));
  481. Value *V = Builder.CreateLoad(GV);
  482. CallInst *handle = Builder.CreateCall(createHandle, {opArg, V}, handleName);
  483. if (m_HasDbgInfo) {
  484. // TODO: add debug info.
  485. //handle->setDebugLoc(DL);
  486. (void)(DL);
  487. }
  488. CI->replaceAllUsesWith(handle);
  489. CI->eraseFromParent();
  490. }
  491. } else {
  492. PointerType *Ty = GV->getType();
  493. Type *EltTy = Ty->getElementType()->getArrayElementType()->getPointerTo(
  494. Ty->getAddressSpace());
  495. Function *createHandle = hlslOP->GetOpFunc(
  496. OP::OpCode::CreateHandleForLib, EltTy->getPointerElementType());
  497. for (auto U = GV->user_begin(); U != GV->user_end();) {
  498. // Must HLCreateHandle.
  499. CallInst *CI = cast<CallInst>(*(U++));
  500. IRBuilder<> Builder(CI);
  501. Value *CBIndex = CI->getArgOperand(HLOperandIndex::kCreateHandleIndexOpIdx);
  502. if (isa<ConstantInt>(CBIndex)) {
  503. // Put createHandle to entry block for const index.
  504. Builder.SetInsertPoint(dxilutil::FirstNonAllocaInsertionPt(CI));
  505. }
  506. // Add GEP for cbv array use.
  507. Value *GEP = Builder.CreateGEP(GV, {zeroIdx, CBIndex});
  508. if (DxilMDHelper::IsMarkedNonUniform(CI)) {
  509. DxilMDHelper::MarkNonUniform(cast<Instruction>(GEP));
  510. }
  511. Value *V = Builder.CreateLoad(GEP);
  512. CallInst *handle = Builder.CreateCall(createHandle, {opArg, V}, handleName);
  513. CI->replaceAllUsesWith(handle);
  514. CI->eraseFromParent();
  515. }
  516. }
  517. }
  518. }
  519. void DxilGenerationPass::GenerateDxilOperations(
  520. Module &M, std::unordered_set<LoadInst *> &UpdateCounterSet) {
  521. // remove all functions except entry function
  522. Function *entry = m_pHLModule->GetEntryFunction();
  523. const ShaderModel *pSM = m_pHLModule->GetShaderModel();
  524. Function *patchConstantFunc = nullptr;
  525. if (pSM->IsHS()) {
  526. DxilFunctionProps &funcProps = m_pHLModule->GetDxilFunctionProps(entry);
  527. patchConstantFunc = funcProps.ShaderProps.HS.patchConstantFunc;
  528. }
  529. if (!pSM->IsLib()) {
  530. for (auto F = M.begin(); F != M.end();) {
  531. Function *func = F++;
  532. if (func->isDeclaration())
  533. continue;
  534. if (func == entry)
  535. continue;
  536. if (func == patchConstantFunc)
  537. continue;
  538. if (func->user_empty())
  539. func->eraseFromParent();
  540. }
  541. }
  542. TranslateBuiltinOperations(*m_pHLModule, m_extensionsCodegenHelper,
  543. UpdateCounterSet);
  544. // Remove unused HL Operation functions.
  545. std::vector<Function *> deadList;
  546. for (iplist<Function>::iterator F : M.getFunctionList()) {
  547. hlsl::HLOpcodeGroup group = hlsl::GetHLOpcodeGroupByName(F);
  548. if (group != HLOpcodeGroup::NotHL || F->isIntrinsic())
  549. if (F->user_empty())
  550. deadList.emplace_back(F);
  551. }
  552. for (Function *F : deadList)
  553. F->eraseFromParent();
  554. }
  555. static void TranslatePreciseAttributeOnFunction(Function &F, Module &M) {
  556. BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function
  557. // Find allocas that has precise attribute, by looking at all instructions in
  558. // the entry node
  559. for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
  560. Instruction *Inst = (I++);
  561. if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst)) {
  562. if (HLModule::HasPreciseAttributeWithMetadata(AI)) {
  563. HLModule::MarkPreciseAttributeOnPtrWithFunctionCall(AI, M);
  564. }
  565. } else {
  566. DXASSERT(!HLModule::HasPreciseAttributeWithMetadata(Inst), "Only alloca can has precise metadata.");
  567. }
  568. }
  569. FastMathFlags FMF;
  570. FMF.setUnsafeAlgebra();
  571. // Set fast math for all FPMathOperators.
  572. // Already set FastMath in options. But that only enable things like fadd.
  573. // Every inst which type is float can be cast to FPMathOperator.
  574. for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
  575. BasicBlock *BB = BBI;
  576. for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
  577. if (dyn_cast<FPMathOperator>(I)) {
  578. // Set precise fast math on those instructions that support it.
  579. if (DxilModule::PreservesFastMathFlags(I))
  580. I->copyFastMathFlags(FMF);
  581. }
  582. }
  583. }
  584. }
  585. void DxilGenerationPass::TranslatePreciseAttribute() {
  586. bool bIEEEStrict = m_pHLModule->GetHLOptions().bIEEEStrict;
  587. if (bIEEEStrict) {
  588. // mark precise on dxil operations.
  589. Module &M = *m_pHLModule->GetModule();
  590. for (Function &F : M) {
  591. if (!hlsl::OP::IsDxilOpFunc(&F))
  592. continue;
  593. if (!F.getReturnType()->isFPOrFPVectorTy())
  594. continue;
  595. for (User *U : F.users()) {
  596. Instruction *I = dyn_cast<Instruction>(U);
  597. if (!I)
  598. continue;
  599. IRBuilder<> B(I);
  600. HLModule::MarkPreciseAttributeOnValWithFunctionCall(I, B, M);
  601. }
  602. }
  603. return;
  604. }
  605. Module &M = *m_pHLModule->GetModule();
  606. // TODO: If not inline every function, for function has call site with precise
  607. // argument and call site without precise argument, need to clone the function
  608. // to propagate the precise for the precise call site.
  609. // This should be done at CGMSHLSLRuntime::FinishCodeGen.
  610. if (m_pHLModule->GetShaderModel()->IsLib()) {
  611. // TODO: If all functions have been inlined, and unreferenced functions removed,
  612. // it should make sense to run on all funciton bodies,
  613. // even when not processing a library.
  614. for (Function &F : M.functions()) {
  615. if (!F.isDeclaration())
  616. TranslatePreciseAttributeOnFunction(F, M);
  617. }
  618. } else {
  619. Function *EntryFn = m_pHLModule->GetEntryFunction();
  620. TranslatePreciseAttributeOnFunction(*EntryFn, M);
  621. if (m_pHLModule->GetShaderModel()->IsHS()) {
  622. DxilFunctionProps &EntryQual = m_pHLModule->GetDxilFunctionProps(EntryFn);
  623. Function *patchConstantFunc = EntryQual.ShaderProps.HS.patchConstantFunc;
  624. TranslatePreciseAttributeOnFunction(*patchConstantFunc, M);
  625. }
  626. }
  627. }
  628. namespace {
  629. void ReplaceMinPrecisionRawBufferLoadByType(Function *F, Type *FromTy,
  630. Type *ToTy, OP *Op,
  631. const DataLayout &DL) {
  632. Function *newFunction = Op->GetOpFunc(DXIL::OpCode::RawBufferLoad, ToTy);
  633. for (auto FUser = F->user_begin(), FEnd = F->user_end(); FUser != FEnd;) {
  634. User *UserCI = *(FUser++);
  635. if (CallInst *CI = dyn_cast<CallInst>(UserCI)) {
  636. IRBuilder<> CIBuilder(CI);
  637. SmallVector<Value *, 5> newFuncArgs;
  638. // opcode, handle, index, elementOffset, mask
  639. // Compiler is generating correct element offset even for min precision
  640. // types So no need to recalculate here
  641. for (unsigned i = 0; i < 5; ++i) {
  642. newFuncArgs.emplace_back(CI->getArgOperand(i));
  643. }
  644. // new alignment for new type
  645. newFuncArgs.emplace_back(Op->GetI32Const(DL.getTypeAllocSize(ToTy)));
  646. CallInst *newCI = CIBuilder.CreateCall(newFunction, newFuncArgs);
  647. for (auto CIUser = CI->user_begin(), CIEnd = CI->user_end();
  648. CIUser != CIEnd;) {
  649. User *UserEV = *(CIUser++);
  650. if (ExtractValueInst *EV = dyn_cast<ExtractValueInst>(UserEV)) {
  651. IRBuilder<> EVBuilder(EV);
  652. ArrayRef<unsigned> Indices = EV->getIndices();
  653. DXASSERT(Indices.size() == 1,
  654. "Otherwise we have wrong extract value.");
  655. Value *newEV = EVBuilder.CreateExtractValue(newCI, Indices);
  656. Value *newTruncV = nullptr;
  657. if (4 == Indices[0]) { // Don't truncate status
  658. newTruncV = newEV;
  659. } else if (FromTy->isHalfTy()) {
  660. newTruncV = EVBuilder.CreateFPTrunc(newEV, FromTy);
  661. } else if (FromTy->isIntegerTy()) {
  662. newTruncV = EVBuilder.CreateTrunc(newEV, FromTy);
  663. } else {
  664. DXASSERT(false, "unexpected type conversion");
  665. }
  666. EV->replaceAllUsesWith(newTruncV);
  667. EV->eraseFromParent();
  668. }
  669. }
  670. CI->eraseFromParent();
  671. }
  672. }
  673. F->eraseFromParent();
  674. }
  675. void ReplaceMinPrecisionRawBufferStoreByType(
  676. Function *F, Type *FromTy, Type *ToTy, OP *Op,
  677. std::unordered_map<CallInst *, Type *> &HandleToResTypeMap,
  678. DxilTypeSystem &typeSys, const DataLayout &DL) {
  679. Function *newFunction = Op->GetOpFunc(DXIL::OpCode::RawBufferStore, ToTy);
  680. // for each function
  681. // add argument 4-7 to its upconverted values
  682. // replace function call
  683. for (auto FuncUser = F->user_begin(), FuncEnd = F->user_end();
  684. FuncUser != FuncEnd;) {
  685. CallInst *CI = dyn_cast<CallInst>(*(FuncUser++));
  686. DXASSERT(CI, "function user must be a call instruction.");
  687. IRBuilder<> CIBuilder(CI);
  688. SmallVector<Value *, 9> Args;
  689. for (unsigned i = 0; i < 4; ++i) {
  690. Args.emplace_back(CI->getArgOperand(i));
  691. }
  692. // values to store should be converted to its higher precision types
  693. if (FromTy->isHalfTy()) {
  694. for (unsigned i = 4; i < 8; ++i) {
  695. Value *NewV = CIBuilder.CreateFPExt(CI->getArgOperand(i),
  696. ToTy);
  697. Args.emplace_back(NewV);
  698. }
  699. } else if (FromTy->isIntegerTy()) {
  700. // This case only applies to typed buffer since Store operation of byte
  701. // address buffer for min precision is handled by implicit conversion on
  702. // intrinsic call. Since we are extending integer, we have to know if we
  703. // should sign ext or zero ext. We can do this by iterating checking the
  704. // size of the element at struct type and comp type at type annotation
  705. CallInst *handleCI = dyn_cast<CallInst>(
  706. CI->getArgOperand(DxilInst_RawBufferStore::arg_uav));
  707. DXASSERT(handleCI,
  708. "otherwise handle was not an argument to buffer store.");
  709. auto resTyIt = HandleToResTypeMap.find(handleCI);
  710. DXASSERT(resTyIt != HandleToResTypeMap.end(),
  711. "otherwise fail to handle for buffer store lost its retTy");
  712. StructType *STy = dyn_cast<StructType>(resTyIt->second);
  713. STy = cast<StructType>(STy->getElementType(0));
  714. DxilStructAnnotation *SAnnot =
  715. typeSys.GetStructAnnotation(STy);
  716. ConstantInt *offsetInt = dyn_cast<ConstantInt>(
  717. CI->getArgOperand(DxilInst_RawBufferStore::arg_elementOffset));
  718. unsigned offset = offsetInt->getSExtValue();
  719. unsigned currentOffset = 0;
  720. for (DxilStructTypeIterator iter = begin(STy, SAnnot),
  721. ItEnd = end(STy, SAnnot);
  722. iter != ItEnd; ++iter) {
  723. std::pair<Type *, DxilFieldAnnotation *> pair = *iter;
  724. currentOffset += DL.getTypeAllocSize(pair.first);
  725. if (currentOffset > offset) {
  726. if (pair.second->GetCompType().IsUIntTy()) {
  727. for (unsigned i = 4; i < 8; ++i) {
  728. Value *NewV = CIBuilder.CreateZExt(CI->getArgOperand(i), ToTy);
  729. Args.emplace_back(NewV);
  730. }
  731. break;
  732. } else if (pair.second->GetCompType().IsIntTy()) {
  733. for (unsigned i = 4; i < 8; ++i) {
  734. Value *NewV = CIBuilder.CreateSExt(CI->getArgOperand(i), ToTy);
  735. Args.emplace_back(NewV);
  736. }
  737. break;
  738. } else {
  739. DXASSERT(false, "Invalid comp type");
  740. }
  741. }
  742. }
  743. }
  744. // mask
  745. Args.emplace_back(CI->getArgOperand(8));
  746. // alignment
  747. Args.emplace_back(CIBuilder.getInt32(DL.getTypeAllocSize(ToTy)));
  748. CIBuilder.CreateCall(newFunction, Args);
  749. CI->eraseFromParent();
  750. }
  751. }
  752. } // namespace
  753. void DxilGenerationPass::TranslateMinPrecisionRawBuffer(
  754. DxilModule &DM,
  755. std::unordered_map<CallInst *, Type *> &HandleToResTypeMap) {
  756. hlsl::OP *hlslOP = DM.GetOP();
  757. LLVMContext &Ctx = DM.GetCtx();
  758. Type *I32Ty = Type::getInt32Ty(Ctx);
  759. Type *I16Ty = Type::getInt16Ty(Ctx);
  760. Type *F32Ty = Type::getFloatTy(Ctx);
  761. Type *F16Ty = Type::getHalfTy(Ctx);
  762. const DataLayout &DL = DM.GetModule()->getDataLayout();
  763. DxilTypeSystem &typeSys = DM.GetTypeSystem();
  764. SmallVector<Function *, 2> rawBufLoads;
  765. for (auto it : hlslOP->GetOpFuncList(DXIL::OpCode::RawBufferLoad)) {
  766. Function *F = it.second;
  767. if (!F)
  768. continue;
  769. rawBufLoads.emplace_back(F);
  770. }
  771. for (Function *F : rawBufLoads) {
  772. StructType *RetTy = cast<StructType>(F->getReturnType());
  773. Type *EltTy = RetTy->getElementType(0);
  774. if (EltTy->isHalfTy()) {
  775. ReplaceMinPrecisionRawBufferLoadByType(F, F16Ty, F32Ty, hlslOP, DL);
  776. } else if (EltTy == I16Ty) {
  777. ReplaceMinPrecisionRawBufferLoadByType(F, I16Ty, I32Ty, hlslOP, DL);
  778. }
  779. }
  780. SmallVector<Function *, 2> rawBufStores;
  781. for (auto it : hlslOP->GetOpFuncList(DXIL::OpCode::RawBufferStore)) {
  782. Function *F = it.second;
  783. if (!F)
  784. continue;
  785. rawBufStores.emplace_back(F);
  786. }
  787. for (Function *F : rawBufStores) {
  788. Type *EltTy =
  789. F->getFunctionType()->getParamType(DxilInst_RawBufferStore::arg_value0);
  790. if (EltTy->isHalfTy()) {
  791. ReplaceMinPrecisionRawBufferStoreByType(F, F16Ty, F32Ty, hlslOP,
  792. HandleToResTypeMap, typeSys, DL);
  793. } else if (EltTy == I16Ty) {
  794. ReplaceMinPrecisionRawBufferStoreByType(F, I16Ty, I32Ty, hlslOP,
  795. HandleToResTypeMap, typeSys, DL);
  796. }
  797. }
  798. }
  799. char DxilGenerationPass::ID = 0;
  800. ModulePass *llvm::createDxilGenerationPass(bool NotOptimized, hlsl::HLSLExtensionsCodegenHelper *extensionsHelper) {
  801. DxilGenerationPass *dxilPass = new DxilGenerationPass(NotOptimized);
  802. dxilPass->SetExtensionsHelper(extensionsHelper);
  803. return dxilPass;
  804. }
  805. INITIALIZE_PASS(DxilGenerationPass, "dxilgen", "HLSL DXIL Generation", false, false)