DxilGenerationPass.cpp 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884
  1. ///////////////////////////////////////////////////////////////////////////////
  2. // //
  3. // DxilGenerationPass.cpp //
  4. // Copyright (C) Microsoft Corporation. All rights reserved. //
  5. // This file is distributed under the University of Illinois Open Source //
  6. // License. See LICENSE.TXT for details. //
  7. // //
  8. // DxilGenerationPass implementation. //
  9. // //
  10. ///////////////////////////////////////////////////////////////////////////////
  11. #include "HLSignatureLower.h"
  12. #include "dxc/DXIL/DxilEntryProps.h"
  13. #include "dxc/DXIL/DxilModule.h"
  14. #include "dxc/DXIL/DxilOperations.h"
  15. #include "dxc/DXIL/DxilInstructions.h"
  16. #include "dxc/DXIL/DxilUtil.h"
  17. #include "dxc/HLSL/DxilGenerationPass.h"
  18. #include "dxc/HLSL/HLSLExtensionsCodegenHelper.h"
  19. #include "dxc/HLSL/HLModule.h"
  20. #include "dxc/HLSL/HLOperationLower.h"
  21. #include "dxc/HLSL/HLOperations.h"
  22. #include "dxc/Support/Global.h"
  23. #include "llvm/Pass.h"
  24. #include "llvm/ADT/STLExtras.h"
  25. #include "llvm/Analysis/AssumptionCache.h"
  26. #include "llvm/IR/DebugInfo.h"
  27. #include "llvm/IR/DebugInfoMetadata.h"
  28. #include "llvm/IR/Function.h"
  29. #include "llvm/IR/Instruction.h"
  30. #include "llvm/IR/Instructions.h"
  31. #include "llvm/IR/IRBuilder.h"
  32. #include "llvm/IR/Operator.h"
  33. #include "llvm/IR/Module.h"
  34. #include "llvm/Support/Casting.h"
  35. #include "llvm/Transforms/Utils/SSAUpdater.h"
  36. #include <unordered_map>
  37. #include <unordered_set>
  38. #include <vector>
  39. using namespace llvm;
  40. using namespace hlsl;
  41. // TODO: use hlsl namespace for the most of this file.
  42. namespace {
  43. void SimplifyGlobalSymbol(GlobalVariable *GV) {
  44. Type *Ty = GV->getType()->getElementType();
  45. if (!Ty->isArrayTy()) {
  46. // Make sure only 1 load of GV in each function.
  47. std::unordered_map<Function *, Instruction *> handleMapOnFunction;
  48. for (User *U : GV->users()) {
  49. if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
  50. Function *F = LI->getParent()->getParent();
  51. auto it = handleMapOnFunction.find(F);
  52. if (it == handleMapOnFunction.end()) {
  53. handleMapOnFunction[F] = LI;
  54. } else {
  55. LI->replaceAllUsesWith(it->second);
  56. }
  57. }
  58. }
  59. for (auto it : handleMapOnFunction) {
  60. Function *F = it.first;
  61. Instruction *I = it.second;
  62. IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(F));
  63. Value *headLI = Builder.CreateLoad(GV);
  64. I->replaceAllUsesWith(headLI);
  65. }
  66. }
  67. }
  68. void InitResourceBase(const DxilResourceBase *pSource,
  69. DxilResourceBase *pDest) {
  70. DXASSERT_NOMSG(pSource->GetClass() == pDest->GetClass());
  71. pDest->SetKind(pSource->GetKind());
  72. pDest->SetID(pSource->GetID());
  73. pDest->SetSpaceID(pSource->GetSpaceID());
  74. pDest->SetLowerBound(pSource->GetLowerBound());
  75. pDest->SetRangeSize(pSource->GetRangeSize());
  76. pDest->SetGlobalSymbol(pSource->GetGlobalSymbol());
  77. pDest->SetGlobalName(pSource->GetGlobalName());
  78. pDest->SetHandle(pSource->GetHandle());
  79. if (GlobalVariable *GV = dyn_cast<GlobalVariable>(pSource->GetGlobalSymbol()))
  80. SimplifyGlobalSymbol(GV);
  81. }
  82. void InitResource(const DxilResource *pSource, DxilResource *pDest) {
  83. pDest->SetCompType(pSource->GetCompType());
  84. pDest->SetSamplerFeedbackType(pSource->GetSamplerFeedbackType());
  85. pDest->SetSampleCount(pSource->GetSampleCount());
  86. pDest->SetElementStride(pSource->GetElementStride());
  87. pDest->SetGloballyCoherent(pSource->IsGloballyCoherent());
  88. pDest->SetHasCounter(pSource->HasCounter());
  89. pDest->SetRW(pSource->IsRW());
  90. pDest->SetROV(pSource->IsROV());
  91. InitResourceBase(pSource, pDest);
  92. }
  93. void InitDxilModuleFromHLModule(HLModule &H, DxilModule &M, bool HasDebugInfo) {
  94. // Subsystems.
  95. unsigned ValMajor, ValMinor;
  96. H.GetValidatorVersion(ValMajor, ValMinor);
  97. M.SetValidatorVersion(ValMajor, ValMinor);
  98. M.SetShaderModel(H.GetShaderModel(), H.GetHLOptions().bUseMinPrecision);
  99. // Entry function.
  100. if (!M.GetShaderModel()->IsLib()) {
  101. Function *EntryFn = H.GetEntryFunction();
  102. M.SetEntryFunction(EntryFn);
  103. M.SetEntryFunctionName(H.GetEntryFunctionName());
  104. }
  105. std::vector<GlobalVariable* > &LLVMUsed = M.GetLLVMUsed();
  106. // Resources
  107. for (auto && C : H.GetCBuffers()) {
  108. auto b = llvm::make_unique<DxilCBuffer>();
  109. InitResourceBase(C.get(), b.get());
  110. b->SetSize(C->GetSize());
  111. if (GlobalVariable *GV = dyn_cast<GlobalVariable>(b->GetGlobalSymbol()))
  112. LLVMUsed.emplace_back(GV);
  113. M.AddCBuffer(std::move(b));
  114. }
  115. for (auto && C : H.GetUAVs()) {
  116. auto b = llvm::make_unique<DxilResource>();
  117. InitResource(C.get(), b.get());
  118. if (GlobalVariable *GV = dyn_cast<GlobalVariable>(b->GetGlobalSymbol()))
  119. LLVMUsed.emplace_back(GV);
  120. M.AddUAV(std::move(b));
  121. }
  122. for (auto && C : H.GetSRVs()) {
  123. auto b = llvm::make_unique<DxilResource>();
  124. InitResource(C.get(), b.get());
  125. if (GlobalVariable *GV = dyn_cast<GlobalVariable>(b->GetGlobalSymbol()))
  126. LLVMUsed.emplace_back(GV);
  127. M.AddSRV(std::move(b));
  128. }
  129. for (auto && C : H.GetSamplers()) {
  130. auto b = llvm::make_unique<DxilSampler>();
  131. InitResourceBase(C.get(), b.get());
  132. b->SetSamplerKind(C->GetSamplerKind());
  133. if (GlobalVariable *GV = dyn_cast<GlobalVariable>(b->GetGlobalSymbol()))
  134. LLVMUsed.emplace_back(GV);
  135. M.AddSampler(std::move(b));
  136. }
  137. // Signatures.
  138. M.ResetSerializedRootSignature(H.GetSerializedRootSignature());
  139. // Subobjects.
  140. M.ResetSubobjects(H.ReleaseSubobjects());
  141. // Shader properties.
  142. //bool m_bDisableOptimizations;
  143. M.SetDisableOptimization(H.GetHLOptions().bDisableOptimizations);
  144. M.SetLegacyResourceReservation(H.GetHLOptions().bLegacyResourceReservation);
  145. //bool m_bDisableMathRefactoring;
  146. //bool m_bEnableDoublePrecision;
  147. //bool m_bEnableDoubleExtensions;
  148. //M.CollectShaderFlags();
  149. //bool m_bForceEarlyDepthStencil;
  150. //bool m_bEnableRawAndStructuredBuffers;
  151. //bool m_bEnableMSAD;
  152. //M.m_ShaderFlags.SetAllResourcesBound(H.GetHLOptions().bAllResourcesBound);
  153. // DXIL type system.
  154. M.ResetTypeSystem(H.ReleaseTypeSystem());
  155. // Dxil OP.
  156. M.ResetOP(H.ReleaseOP());
  157. // Keep llvm used.
  158. M.EmitLLVMUsed();
  159. M.SetAllResourcesBound(H.GetHLOptions().bAllResourcesBound);
  160. M.SetAutoBindingSpace(H.GetAutoBindingSpace());
  161. // Update Validator Version
  162. M.UpgradeToMinValidatorVersion();
  163. }
  164. class DxilGenerationPass : public ModulePass {
  165. HLModule *m_pHLModule;
  166. bool m_HasDbgInfo;
  167. HLSLExtensionsCodegenHelper *m_extensionsCodegenHelper;
  168. public:
  169. static char ID; // Pass identification, replacement for typeid
  170. explicit DxilGenerationPass(bool NoOpt = false)
  171. : ModulePass(ID), m_pHLModule(nullptr), m_extensionsCodegenHelper(nullptr), NotOptimized(NoOpt) {}
  172. const char *getPassName() const override { return "DXIL Generator"; }
  173. void SetExtensionsHelper(HLSLExtensionsCodegenHelper *helper) {
  174. m_extensionsCodegenHelper = helper;
  175. }
  176. bool runOnModule(Module &M) override {
  177. m_pHLModule = &M.GetOrCreateHLModule();
  178. const ShaderModel *SM = m_pHLModule->GetShaderModel();
  179. // Load up debug information, to cross-reference values and the instructions
  180. // used to load them.
  181. m_HasDbgInfo = getDebugMetadataVersionFromModule(M) != 0;
  182. // EntrySig for shader functions.
  183. DxilEntryPropsMap EntryPropsMap;
  184. if (!SM->IsLib()) {
  185. Function *EntryFn = m_pHLModule->GetEntryFunction();
  186. if (!m_pHLModule->HasDxilFunctionProps(EntryFn)) {
  187. dxilutil::EmitErrorOnFunction(EntryFn, "Entry function don't have property.");
  188. return false;
  189. }
  190. DxilFunctionProps &props = m_pHLModule->GetDxilFunctionProps(EntryFn);
  191. std::unique_ptr<DxilEntryProps> pProps =
  192. llvm::make_unique<DxilEntryProps>(
  193. props, m_pHLModule->GetHLOptions().bUseMinPrecision);
  194. HLSignatureLower sigLower(m_pHLModule->GetEntryFunction(), *m_pHLModule,
  195. pProps->sig);
  196. sigLower.Run();
  197. EntryPropsMap[EntryFn] = std::move(pProps);
  198. } else {
  199. for (auto It = M.begin(); It != M.end();) {
  200. Function &F = *(It++);
  201. // Lower signature for each graphics or compute entry function.
  202. if (m_pHLModule->HasDxilFunctionProps(&F)) {
  203. DxilFunctionProps &props = m_pHLModule->GetDxilFunctionProps(&F);
  204. std::unique_ptr<DxilEntryProps> pProps =
  205. llvm::make_unique<DxilEntryProps>(
  206. props, m_pHLModule->GetHLOptions().bUseMinPrecision);
  207. if (m_pHLModule->IsGraphicsShader(&F) ||
  208. m_pHLModule->IsComputeShader(&F)) {
  209. HLSignatureLower sigLower(&F, *m_pHLModule, pProps->sig);
  210. // TODO: BUG: This will lower patch constant function sigs twice if
  211. // used by two hull shaders!
  212. sigLower.Run();
  213. }
  214. EntryPropsMap[&F] = std::move(pProps);
  215. }
  216. }
  217. }
  218. std::unordered_set<LoadInst *> UpdateCounterSet;
  219. GenerateDxilOperations(M, UpdateCounterSet);
  220. GenerateDxilCBufferHandles();
  221. MarkUpdateCounter(UpdateCounterSet);
  222. std::unordered_map<CallInst *, Type*> HandleToResTypeMap;
  223. LowerHLCreateHandle(HandleToResTypeMap);
  224. // LowerHLCreateHandle() should have translated HLCreateHandle to CreateHandleForLib.
  225. // Clean up HLCreateHandle functions.
  226. for (auto It = M.begin(); It != M.end();) {
  227. Function &F = *(It++);
  228. if (!F.isDeclaration()) {
  229. if (hlsl::GetHLOpcodeGroupByName(&F) ==
  230. HLOpcodeGroup::HLCreateHandle) {
  231. if (F.user_empty()) {
  232. F.eraseFromParent();
  233. } else {
  234. dxilutil::EmitErrorOnFunction(&F, "Fail to lower createHandle.");
  235. }
  236. }
  237. }
  238. }
  239. // Translate precise on allocas into function call to keep the information after mem2reg.
  240. // The function calls will be removed after propagate precise attribute.
  241. TranslatePreciseAttribute();
  242. // High-level metadata should now be turned into low-level metadata.
  243. const bool SkipInit = true;
  244. hlsl::DxilModule &DxilMod = M.GetOrCreateDxilModule(SkipInit);
  245. DxilFunctionProps *pProps = nullptr;
  246. if (!SM->IsLib()) {
  247. pProps = &EntryPropsMap.begin()->second->props;
  248. }
  249. InitDxilModuleFromHLModule(*m_pHLModule, DxilMod, m_HasDbgInfo);
  250. DxilMod.ResetEntryPropsMap(std::move(EntryPropsMap));
  251. if (!SM->IsLib()) {
  252. DxilMod.SetShaderProperties(pProps);
  253. }
  254. HLModule::ClearHLMetadata(M);
  255. M.ResetHLModule();
  256. if (SM->IsSM62Plus() && DxilMod.GetUseMinPrecision()) {
  257. TranslateMinPrecisionRawBuffer(DxilMod, HandleToResTypeMap);
  258. }
  259. // We now have a DXIL representation - record this.
  260. SetPauseResumePasses(M, "hlsl-dxilemit", "hlsl-dxilload");
  261. (void)NotOptimized; // Dummy out unused member to silence warnings
  262. return true;
  263. }
  264. private:
  265. void MarkUpdateCounter(std::unordered_set<LoadInst *> &UpdateCounterSet);
  266. // Generate DXIL cbuffer handles.
  267. void
  268. GenerateDxilCBufferHandles();
  269. // change built-in funtion into DXIL operations
  270. void GenerateDxilOperations(Module &M,
  271. std::unordered_set<LoadInst *> &UpdateCounterSet);
  272. void LowerHLCreateHandle(
  273. std::unordered_map<CallInst *, Type *> &HandleToResTypeMap);
  274. // Translate precise attribute into HL function call.
  275. void TranslatePreciseAttribute();
  276. // Translate RawBufferLoad/RawBufferStore
  277. // For DXIL >= 1.2, if min precision is enabled, currently generation pass is
  278. // producing i16/f16 return type for min precisions. For rawBuffer, we will
  279. // change this so that min precisions are returning its actual scalar type
  280. // (i32/f32) and will be truncated to their corresponding types after loading
  281. // / before storing.
  282. void TranslateMinPrecisionRawBuffer(
  283. DxilModule &DM,
  284. std::unordered_map<CallInst *, Type *> &HandleToResTypeMap);
  285. // Input module is not optimized.
  286. bool NotOptimized;
  287. };
  288. }
  289. namespace {
  290. void TranslateHLCreateHandle(Function *F, hlsl::OP &hlslOP) {
  291. Value *opArg = hlslOP.GetU32Const((unsigned)DXIL::OpCode::CreateHandleForLib);
  292. for (auto U = F->user_begin(); U != F->user_end();) {
  293. Value *user = *(U++);
  294. if (!isa<Instruction>(user))
  295. continue;
  296. // must be call inst
  297. CallInst *CI = cast<CallInst>(user);
  298. Value *res = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  299. Value *newHandle = nullptr;
  300. IRBuilder<> Builder(CI);
  301. // Res could be ld/phi/select. Will be removed in
  302. // DxilLowerCreateHandleForLib.
  303. Function *createHandle =
  304. hlslOP.GetOpFunc(DXIL::OpCode::CreateHandleForLib, res->getType());
  305. newHandle = Builder.CreateCall(createHandle, {opArg, res});
  306. CI->replaceAllUsesWith(newHandle);
  307. if (res->user_empty()) {
  308. if (Instruction *I = dyn_cast<Instruction>(res))
  309. I->eraseFromParent();
  310. }
  311. CI->eraseFromParent();
  312. }
  313. }
  314. void TranslateHLAnnotateHandle(
  315. Function *F, hlsl::OP &hlslOP,
  316. std::unordered_map<CallInst *, Type *> &HandleToResTypeMap) {
  317. Value *opArg = hlslOP.GetU32Const((unsigned)DXIL::OpCode::AnnotateHandle);
  318. for (auto U = F->user_begin(); U != F->user_end();) {
  319. Value *user = *(U++);
  320. if (!isa<Instruction>(user))
  321. continue;
  322. // must be call inst
  323. CallInst *CI = cast<CallInst>(user);
  324. Value *handle =
  325. CI->getArgOperand(HLOperandIndex::kAnnotateHandleHandleOpIdx);
  326. Value *RC =
  327. CI->getArgOperand(HLOperandIndex::kAnnotateHandleResourceClassOpIdx);
  328. Value *RK =
  329. CI->getArgOperand(HLOperandIndex::kAnnotateHandleResourceKindOpIdx);
  330. Value *RP = CI->getArgOperand(
  331. HLOperandIndex::kAnnotateHandleResourcePropertiesOpIdx);
  332. Type *ResTy =
  333. CI->getArgOperand(HLOperandIndex::kAnnotateHandleResourceTypeOpIdx)
  334. ->getType();
  335. IRBuilder<> Builder(CI);
  336. Function *annotateHandle =
  337. hlslOP.GetOpFunc(DXIL::OpCode::AnnotateHandle, Builder.getVoidTy());
  338. CallInst *newHandle =
  339. Builder.CreateCall(annotateHandle, {opArg, handle, RC, RK, RP});
  340. HandleToResTypeMap[newHandle] = ResTy;
  341. CI->replaceAllUsesWith(newHandle);
  342. CI->eraseFromParent();
  343. }
  344. }
  345. void TranslateHLCastHandleToRes(Function *F, hlsl::OP &hlslOP) {
  346. for (auto U = F->user_begin(); U != F->user_end();) {
  347. Value *User = *(U++);
  348. if (!isa<Instruction>(User))
  349. continue;
  350. // must be call inst
  351. CallInst *CI = cast<CallInst>(User);
  352. IRBuilder<> Builder(CI);
  353. HLCastOpcode opcode = static_cast<HLCastOpcode>(hlsl::GetHLOpcode(CI));
  354. switch (opcode) {
  355. case HLCastOpcode::HandleToResCast: {
  356. Value *Handle = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
  357. for (auto HandleU = CI->user_begin(); HandleU != CI->user_end();) {
  358. Value *HandleUser = *(HandleU++);
  359. CallInst *HandleCI = dyn_cast<CallInst>(HandleUser);
  360. if (!HandleCI)
  361. continue;
  362. hlsl::HLOpcodeGroup handleGroup =
  363. hlsl::GetHLOpcodeGroup(HandleCI->getCalledFunction());
  364. if (handleGroup == HLOpcodeGroup::HLCreateHandle) {
  365. HandleCI->replaceAllUsesWith(Handle);
  366. HandleCI->eraseFromParent();
  367. }
  368. }
  369. if (CI->user_empty()) {
  370. CI->eraseFromParent();
  371. }
  372. } break;
  373. }
  374. }
  375. }
  376. } // namespace
  377. void DxilGenerationPass::LowerHLCreateHandle(
  378. std::unordered_map<CallInst *, Type *> &HandleToResTypeMap) {
  379. Module *M = m_pHLModule->GetModule();
  380. hlsl::OP &hlslOP = *m_pHLModule->GetOP();
  381. // Lower cast handle to res used by hl.createhandle.
  382. for (iplist<Function>::iterator F : M->getFunctionList()) {
  383. if (F->user_empty())
  384. continue;
  385. hlsl::HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(F);
  386. if (group == HLOpcodeGroup::HLCast) {
  387. TranslateHLCastHandleToRes(F, hlslOP);
  388. }
  389. }
  390. // generate dxil operation
  391. for (iplist<Function>::iterator F : M->getFunctionList()) {
  392. if (F->user_empty())
  393. continue;
  394. hlsl::HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(F);
  395. switch (group) {
  396. default:
  397. break;
  398. case HLOpcodeGroup::HLCreateHandle:
  399. TranslateHLCreateHandle(F, hlslOP);
  400. break;
  401. case HLOpcodeGroup::HLAnnotateHandle:
  402. TranslateHLAnnotateHandle(F, hlslOP, HandleToResTypeMap);
  403. break;
  404. }
  405. }
  406. }
  407. static void
  408. MarkUavUpdateCounter(Value* LoadOrGEP,
  409. DxilResource &res,
  410. std::unordered_set<LoadInst *> &UpdateCounterSet) {
  411. if (LoadInst *ldInst = dyn_cast<LoadInst>(LoadOrGEP)) {
  412. if (UpdateCounterSet.count(ldInst)) {
  413. DXASSERT_NOMSG(res.GetClass() == DXIL::ResourceClass::UAV);
  414. res.SetHasCounter(true);
  415. }
  416. } else {
  417. DXASSERT(dyn_cast<GEPOperator>(LoadOrGEP) != nullptr,
  418. "else AddOpcodeParamForIntrinsic in CodeGen did not patch uses "
  419. "to only have ld/st refer to temp object");
  420. GEPOperator *GEP = cast<GEPOperator>(LoadOrGEP);
  421. for (auto GEPU : GEP->users()) {
  422. MarkUavUpdateCounter(GEPU, res, UpdateCounterSet);
  423. }
  424. }
  425. }
  426. static void
  427. MarkUavUpdateCounter(DxilResource &res,
  428. std::unordered_set<LoadInst *> &UpdateCounterSet) {
  429. Value *V = res.GetGlobalSymbol();
  430. for (auto U = V->user_begin(), E = V->user_end(); U != E;) {
  431. User *user = *(U++);
  432. // Skip unused user.
  433. if (user->user_empty())
  434. continue;
  435. MarkUavUpdateCounter(user, res, UpdateCounterSet);
  436. }
  437. }
  438. void DxilGenerationPass::MarkUpdateCounter(
  439. std::unordered_set<LoadInst *> &UpdateCounterSet) {
  440. for (size_t i = 0; i < m_pHLModule->GetUAVs().size(); i++) {
  441. HLResource &UAV = m_pHLModule->GetUAV(i);
  442. MarkUavUpdateCounter(UAV, UpdateCounterSet);
  443. }
  444. }
  445. void DxilGenerationPass::GenerateDxilCBufferHandles() {
  446. // For CBuffer, handle are mapped to HLCreateHandle.
  447. OP *hlslOP = m_pHLModule->GetOP();
  448. Value *opArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CreateHandleForLib);
  449. LLVMContext &Ctx = hlslOP->GetCtx();
  450. Value *zeroIdx = hlslOP->GetU32Const(0);
  451. for (size_t i = 0; i < m_pHLModule->GetCBuffers().size(); i++) {
  452. DxilCBuffer &CB = m_pHLModule->GetCBuffer(i);
  453. GlobalVariable *GV = dyn_cast<GlobalVariable>(CB.GetGlobalSymbol());
  454. if (GV == nullptr)
  455. continue;
  456. // Remove GEP created in HLObjectOperationLowerHelper::UniformCbPtr.
  457. GV->removeDeadConstantUsers();
  458. std::string handleName = std::string(GV->getName());
  459. DIVariable *DIV = nullptr;
  460. DILocation *DL = nullptr;
  461. if (m_HasDbgInfo) {
  462. DebugInfoFinder &Finder = m_pHLModule->GetOrCreateDebugInfoFinder();
  463. DIV = dxilutil::FindGlobalVariableDebugInfo(GV, Finder);
  464. if (DIV)
  465. // TODO: how to get col?
  466. DL = DILocation::get(Ctx, DIV->getLine(), 1,
  467. DIV->getScope());
  468. }
  469. if (CB.GetRangeSize() == 1) {
  470. Function *createHandle =
  471. hlslOP->GetOpFunc(OP::OpCode::CreateHandleForLib,
  472. GV->getType()->getElementType());
  473. for (auto U = GV->user_begin(); U != GV->user_end(); ) {
  474. // Must HLCreateHandle.
  475. CallInst *CI = cast<CallInst>(*(U++));
  476. // Put createHandle to entry block.
  477. IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(CI));
  478. Value *V = Builder.CreateLoad(GV);
  479. CallInst *handle = Builder.CreateCall(createHandle, {opArg, V}, handleName);
  480. if (m_HasDbgInfo) {
  481. // TODO: add debug info.
  482. //handle->setDebugLoc(DL);
  483. (void)(DL);
  484. }
  485. CI->replaceAllUsesWith(handle);
  486. CI->eraseFromParent();
  487. }
  488. } else {
  489. PointerType *Ty = GV->getType();
  490. Type *EltTy = Ty->getElementType()->getArrayElementType()->getPointerTo(
  491. Ty->getAddressSpace());
  492. Function *createHandle = hlslOP->GetOpFunc(
  493. OP::OpCode::CreateHandleForLib, EltTy->getPointerElementType());
  494. for (auto U = GV->user_begin(); U != GV->user_end();) {
  495. // Must HLCreateHandle.
  496. CallInst *CI = cast<CallInst>(*(U++));
  497. IRBuilder<> Builder(CI);
  498. Value *CBIndex = CI->getArgOperand(HLOperandIndex::kCreateHandleIndexOpIdx);
  499. if (isa<ConstantInt>(CBIndex)) {
  500. // Put createHandle to entry block for const index.
  501. Builder.SetInsertPoint(dxilutil::FirstNonAllocaInsertionPt(CI));
  502. }
  503. // Add GEP for cbv array use.
  504. Value *GEP = Builder.CreateGEP(GV, {zeroIdx, CBIndex});
  505. Value *V = Builder.CreateLoad(GEP);
  506. CallInst *handle = Builder.CreateCall(createHandle, {opArg, V}, handleName);
  507. CI->replaceAllUsesWith(handle);
  508. CI->eraseFromParent();
  509. }
  510. }
  511. }
  512. }
  513. void DxilGenerationPass::GenerateDxilOperations(
  514. Module &M, std::unordered_set<LoadInst *> &UpdateCounterSet) {
  515. // remove all functions except entry function
  516. Function *entry = m_pHLModule->GetEntryFunction();
  517. const ShaderModel *pSM = m_pHLModule->GetShaderModel();
  518. Function *patchConstantFunc = nullptr;
  519. if (pSM->IsHS()) {
  520. DxilFunctionProps &funcProps = m_pHLModule->GetDxilFunctionProps(entry);
  521. patchConstantFunc = funcProps.ShaderProps.HS.patchConstantFunc;
  522. }
  523. if (!pSM->IsLib()) {
  524. for (auto F = M.begin(); F != M.end();) {
  525. Function *func = F++;
  526. if (func->isDeclaration())
  527. continue;
  528. if (func == entry)
  529. continue;
  530. if (func == patchConstantFunc)
  531. continue;
  532. if (func->user_empty())
  533. func->eraseFromParent();
  534. }
  535. }
  536. TranslateBuiltinOperations(*m_pHLModule, m_extensionsCodegenHelper,
  537. UpdateCounterSet);
  538. // Remove unused HL Operation functions.
  539. std::vector<Function *> deadList;
  540. for (iplist<Function>::iterator F : M.getFunctionList()) {
  541. hlsl::HLOpcodeGroup group = hlsl::GetHLOpcodeGroupByName(F);
  542. if (group != HLOpcodeGroup::NotHL || F->isIntrinsic())
  543. if (F->user_empty())
  544. deadList.emplace_back(F);
  545. }
  546. for (Function *F : deadList)
  547. F->eraseFromParent();
  548. }
  549. static void TranslatePreciseAttributeOnFunction(Function &F, Module &M) {
  550. BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function
  551. // Find allocas that has precise attribute, by looking at all instructions in
  552. // the entry node
  553. for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
  554. Instruction *Inst = (I++);
  555. if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst)) {
  556. if (HLModule::HasPreciseAttributeWithMetadata(AI)) {
  557. HLModule::MarkPreciseAttributeOnPtrWithFunctionCall(AI, M);
  558. }
  559. } else {
  560. DXASSERT(!HLModule::HasPreciseAttributeWithMetadata(Inst), "Only alloca can has precise metadata.");
  561. }
  562. }
  563. FastMathFlags FMF;
  564. FMF.setUnsafeAlgebra();
  565. // Set fast math for all FPMathOperators.
  566. // Already set FastMath in options. But that only enable things like fadd.
  567. // Every inst which type is float can be cast to FPMathOperator.
  568. for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
  569. BasicBlock *BB = BBI;
  570. for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
  571. if (dyn_cast<FPMathOperator>(I)) {
  572. // Set precise fast math on those instructions that support it.
  573. if (DxilModule::PreservesFastMathFlags(I))
  574. I->copyFastMathFlags(FMF);
  575. }
  576. }
  577. }
  578. }
  579. void DxilGenerationPass::TranslatePreciseAttribute() {
  580. bool bIEEEStrict = m_pHLModule->GetHLOptions().bIEEEStrict;
  581. if (bIEEEStrict) {
  582. // mark precise on dxil operations.
  583. Module &M = *m_pHLModule->GetModule();
  584. for (Function &F : M) {
  585. if (!hlsl::OP::IsDxilOpFunc(&F))
  586. continue;
  587. if (!F.getReturnType()->isFPOrFPVectorTy())
  588. continue;
  589. for (User *U : F.users()) {
  590. Instruction *I = dyn_cast<Instruction>(U);
  591. if (!I)
  592. continue;
  593. IRBuilder<> B(I);
  594. HLModule::MarkPreciseAttributeOnValWithFunctionCall(I, B, M);
  595. }
  596. }
  597. return;
  598. }
  599. Module &M = *m_pHLModule->GetModule();
  600. // TODO: If not inline every function, for function has call site with precise
  601. // argument and call site without precise argument, need to clone the function
  602. // to propagate the precise for the precise call site.
  603. // This should be done at CGMSHLSLRuntime::FinishCodeGen.
  604. if (m_pHLModule->GetShaderModel()->IsLib()) {
  605. // TODO: If all functions have been inlined, and unreferenced functions removed,
  606. // it should make sense to run on all funciton bodies,
  607. // even when not processing a library.
  608. for (Function &F : M.functions()) {
  609. if (!F.isDeclaration())
  610. TranslatePreciseAttributeOnFunction(F, M);
  611. }
  612. } else {
  613. Function *EntryFn = m_pHLModule->GetEntryFunction();
  614. TranslatePreciseAttributeOnFunction(*EntryFn, M);
  615. if (m_pHLModule->GetShaderModel()->IsHS()) {
  616. DxilFunctionProps &EntryQual = m_pHLModule->GetDxilFunctionProps(EntryFn);
  617. Function *patchConstantFunc = EntryQual.ShaderProps.HS.patchConstantFunc;
  618. TranslatePreciseAttributeOnFunction(*patchConstantFunc, M);
  619. }
  620. }
  621. }
  622. namespace {
  623. void ReplaceMinPrecisionRawBufferLoadByType(Function *F, Type *FromTy,
  624. Type *ToTy, OP *Op,
  625. const DataLayout &DL) {
  626. Function *newFunction = Op->GetOpFunc(DXIL::OpCode::RawBufferLoad, ToTy);
  627. for (auto FUser = F->user_begin(), FEnd = F->user_end(); FUser != FEnd;) {
  628. User *UserCI = *(FUser++);
  629. if (CallInst *CI = dyn_cast<CallInst>(UserCI)) {
  630. IRBuilder<> CIBuilder(CI);
  631. SmallVector<Value *, 5> newFuncArgs;
  632. // opcode, handle, index, elementOffset, mask
  633. // Compiler is generating correct element offset even for min precision
  634. // types So no need to recalculate here
  635. for (unsigned i = 0; i < 5; ++i) {
  636. newFuncArgs.emplace_back(CI->getArgOperand(i));
  637. }
  638. // new alignment for new type
  639. newFuncArgs.emplace_back(Op->GetI32Const(DL.getTypeAllocSize(ToTy)));
  640. CallInst *newCI = CIBuilder.CreateCall(newFunction, newFuncArgs);
  641. for (auto CIUser = CI->user_begin(), CIEnd = CI->user_end();
  642. CIUser != CIEnd;) {
  643. User *UserEV = *(CIUser++);
  644. if (ExtractValueInst *EV = dyn_cast<ExtractValueInst>(UserEV)) {
  645. IRBuilder<> EVBuilder(EV);
  646. ArrayRef<unsigned> Indices = EV->getIndices();
  647. DXASSERT(Indices.size() == 1,
  648. "Otherwise we have wrong extract value.");
  649. Value *newEV = EVBuilder.CreateExtractValue(newCI, Indices);
  650. Value *newTruncV = nullptr;
  651. if (4 == Indices[0]) { // Don't truncate status
  652. newTruncV = newEV;
  653. } else if (FromTy->isHalfTy()) {
  654. newTruncV = EVBuilder.CreateFPTrunc(newEV, FromTy);
  655. } else if (FromTy->isIntegerTy()) {
  656. newTruncV = EVBuilder.CreateTrunc(newEV, FromTy);
  657. } else {
  658. DXASSERT(false, "unexpected type conversion");
  659. }
  660. EV->replaceAllUsesWith(newTruncV);
  661. EV->eraseFromParent();
  662. }
  663. }
  664. CI->eraseFromParent();
  665. }
  666. }
  667. F->eraseFromParent();
  668. }
  669. void ReplaceMinPrecisionRawBufferStoreByType(
  670. Function *F, Type *FromTy, Type *ToTy, OP *Op,
  671. std::unordered_map<CallInst *, Type *> &HandleToResTypeMap,
  672. DxilTypeSystem &typeSys, const DataLayout &DL) {
  673. Function *newFunction = Op->GetOpFunc(DXIL::OpCode::RawBufferStore, ToTy);
  674. // for each function
  675. // add argument 4-7 to its upconverted values
  676. // replace function call
  677. for (auto FuncUser = F->user_begin(), FuncEnd = F->user_end();
  678. FuncUser != FuncEnd;) {
  679. CallInst *CI = dyn_cast<CallInst>(*(FuncUser++));
  680. DXASSERT(CI, "function user must be a call instruction.");
  681. IRBuilder<> CIBuilder(CI);
  682. SmallVector<Value *, 9> Args;
  683. for (unsigned i = 0; i < 4; ++i) {
  684. Args.emplace_back(CI->getArgOperand(i));
  685. }
  686. // values to store should be converted to its higher precision types
  687. if (FromTy->isHalfTy()) {
  688. for (unsigned i = 4; i < 8; ++i) {
  689. Value *NewV = CIBuilder.CreateFPExt(CI->getArgOperand(i),
  690. ToTy);
  691. Args.emplace_back(NewV);
  692. }
  693. } else if (FromTy->isIntegerTy()) {
  694. // This case only applies to typed buffer since Store operation of byte
  695. // address buffer for min precision is handled by implicit conversion on
  696. // intrinsic call. Since we are extending integer, we have to know if we
  697. // should sign ext or zero ext. We can do this by iterating checking the
  698. // size of the element at struct type and comp type at type annotation
  699. CallInst *handleCI = dyn_cast<CallInst>(
  700. CI->getArgOperand(DxilInst_RawBufferStore::arg_uav));
  701. DXASSERT(handleCI,
  702. "otherwise handle was not an argument to buffer store.");
  703. auto resTyIt = HandleToResTypeMap.find(handleCI);
  704. DXASSERT(resTyIt != HandleToResTypeMap.end(),
  705. "otherwise fail to handle for buffer store lost its retTy");
  706. StructType *STy = dyn_cast<StructType>(resTyIt->second);
  707. STy = cast<StructType>(STy->getElementType(0));
  708. DxilStructAnnotation *SAnnot =
  709. typeSys.GetStructAnnotation(STy);
  710. ConstantInt *offsetInt = dyn_cast<ConstantInt>(
  711. CI->getArgOperand(DxilInst_RawBufferStore::arg_elementOffset));
  712. unsigned offset = offsetInt->getSExtValue();
  713. unsigned currentOffset = 0;
  714. for (DxilStructTypeIterator iter = begin(STy, SAnnot),
  715. ItEnd = end(STy, SAnnot);
  716. iter != ItEnd; ++iter) {
  717. std::pair<Type *, DxilFieldAnnotation *> pair = *iter;
  718. currentOffset += DL.getTypeAllocSize(pair.first);
  719. if (currentOffset > offset) {
  720. if (pair.second->GetCompType().IsUIntTy()) {
  721. for (unsigned i = 4; i < 8; ++i) {
  722. Value *NewV = CIBuilder.CreateZExt(CI->getArgOperand(i), ToTy);
  723. Args.emplace_back(NewV);
  724. }
  725. break;
  726. } else if (pair.second->GetCompType().IsIntTy()) {
  727. for (unsigned i = 4; i < 8; ++i) {
  728. Value *NewV = CIBuilder.CreateSExt(CI->getArgOperand(i), ToTy);
  729. Args.emplace_back(NewV);
  730. }
  731. break;
  732. } else {
  733. DXASSERT(false, "Invalid comp type");
  734. }
  735. }
  736. }
  737. }
  738. // mask
  739. Args.emplace_back(CI->getArgOperand(8));
  740. // alignment
  741. Args.emplace_back(CIBuilder.getInt32(DL.getTypeAllocSize(ToTy)));
  742. CIBuilder.CreateCall(newFunction, Args);
  743. CI->eraseFromParent();
  744. }
  745. }
  746. } // namespace
  747. void DxilGenerationPass::TranslateMinPrecisionRawBuffer(
  748. DxilModule &DM,
  749. std::unordered_map<CallInst *, Type *> &HandleToResTypeMap) {
  750. hlsl::OP *hlslOP = DM.GetOP();
  751. LLVMContext &Ctx = DM.GetCtx();
  752. Type *I32Ty = Type::getInt32Ty(Ctx);
  753. Type *I16Ty = Type::getInt16Ty(Ctx);
  754. Type *F32Ty = Type::getFloatTy(Ctx);
  755. Type *F16Ty = Type::getHalfTy(Ctx);
  756. const DataLayout &DL = DM.GetModule()->getDataLayout();
  757. DxilTypeSystem &typeSys = DM.GetTypeSystem();
  758. SmallVector<Function *, 2> rawBufLoads;
  759. for (auto it : hlslOP->GetOpFuncList(DXIL::OpCode::RawBufferLoad)) {
  760. Function *F = it.second;
  761. if (!F)
  762. continue;
  763. rawBufLoads.emplace_back(F);
  764. }
  765. for (Function *F : rawBufLoads) {
  766. StructType *RetTy = cast<StructType>(F->getReturnType());
  767. Type *EltTy = RetTy->getElementType(0);
  768. if (EltTy->isHalfTy()) {
  769. ReplaceMinPrecisionRawBufferLoadByType(F, F16Ty, F32Ty, hlslOP, DL);
  770. } else if (EltTy == I16Ty) {
  771. ReplaceMinPrecisionRawBufferLoadByType(F, I16Ty, I32Ty, hlslOP, DL);
  772. }
  773. }
  774. SmallVector<Function *, 2> rawBufStores;
  775. for (auto it : hlslOP->GetOpFuncList(DXIL::OpCode::RawBufferStore)) {
  776. Function *F = it.second;
  777. if (!F)
  778. continue;
  779. rawBufStores.emplace_back(F);
  780. }
  781. for (Function *F : rawBufStores) {
  782. Type *EltTy =
  783. F->getFunctionType()->getParamType(DxilInst_RawBufferStore::arg_value0);
  784. if (EltTy->isHalfTy()) {
  785. ReplaceMinPrecisionRawBufferStoreByType(F, F16Ty, F32Ty, hlslOP,
  786. HandleToResTypeMap, typeSys, DL);
  787. } else if (EltTy == I16Ty) {
  788. ReplaceMinPrecisionRawBufferStoreByType(F, I16Ty, I32Ty, hlslOP,
  789. HandleToResTypeMap, typeSys, DL);
  790. }
  791. }
  792. }
  793. char DxilGenerationPass::ID = 0;
  794. ModulePass *llvm::createDxilGenerationPass(bool NotOptimized, hlsl::HLSLExtensionsCodegenHelper *extensionsHelper) {
  795. DxilGenerationPass *dxilPass = new DxilGenerationPass(NotOptimized);
  796. dxilPass->SetExtensionsHelper(extensionsHelper);
  797. return dxilPass;
  798. }
  799. INITIALIZE_PASS(DxilGenerationPass, "dxilgen", "HLSL DXIL Generation", false, false)