DxilConditionalMem2Reg.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434
  1. //===- DxilConditionalMem2Reg.cpp - Mem2Reg that selectively promotes Allocas ----===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. #include "llvm/Pass.h"
  10. #include "llvm/Analysis/AssumptionCache.h"
  11. #include "llvm/Transforms/Scalar.h"
  12. #include "llvm/Transforms/Utils/PromoteMemToReg.h"
  13. #include "llvm/IR/Instructions.h"
  14. #include "llvm/IR/IntrinsicInst.h"
  15. #include "llvm/IR/Dominators.h"
  16. #include "llvm/IR/Module.h"
  17. #include "llvm/IR/IRBuilder.h"
  18. #include "llvm/Support/raw_ostream.h"
  19. #include "llvm/Support/Debug.h"
  20. #include "llvm/IR/LegacyPassManager.h"
  21. #include "llvm/IR/DebugInfo.h"
  22. #include "llvm/IR/DIBuilder.h"
  23. #include "dxc/DXIL/DxilUtil.h"
  24. #include "dxc/HLSL/HLModule.h"
  25. #include "llvm/Analysis/DxilValueCache.h"
  26. #include "llvm/Analysis/ValueTracking.h"
  27. using namespace llvm;
  28. using namespace hlsl;
  29. static bool ContainsFloatingPointType(Type *Ty) {
  30. if (Ty->isFloatingPointTy()) {
  31. return true;
  32. }
  33. else if (Ty->isArrayTy()) {
  34. return ContainsFloatingPointType(Ty->getArrayElementType());
  35. }
  36. else if (Ty->isVectorTy()) {
  37. return ContainsFloatingPointType(Ty->getVectorElementType());
  38. }
  39. else if (Ty->isStructTy()) {
  40. for (unsigned i = 0, NumStructElms = Ty->getStructNumElements(); i < NumStructElms; i++) {
  41. if (ContainsFloatingPointType(Ty->getStructElementType(i)))
  42. return true;
  43. }
  44. }
  45. return false;
  46. }
  47. static bool Mem2Reg(Function &F, DominatorTree &DT, AssumptionCache &AC) {
  48. BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function
  49. bool Changed = false;
  50. std::vector<AllocaInst*> Allocas;
  51. while (1) {
  52. Allocas.clear();
  53. // Find allocas that are safe to promote, by looking at all instructions in
  54. // the entry node
  55. for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
  56. if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca?
  57. if (isAllocaPromotable(AI) &&
  58. (!HLModule::HasPreciseAttributeWithMetadata(AI) || !ContainsFloatingPointType(AI->getAllocatedType())))
  59. Allocas.push_back(AI);
  60. if (Allocas.empty()) break;
  61. PromoteMemToReg(Allocas, DT, nullptr, &AC);
  62. Changed = true;
  63. }
  64. return Changed;
  65. }
  66. //
  67. // Special Mem2Reg pass that conditionally promotes or transforms Alloca's.
  68. //
  69. // Anything marked 'dx.precise', will not be promoted because precise markers
  70. // are not propagated to the dxil operations yet and will be lost if alloca
  71. // is removed right now.
  72. //
  73. // Precise Allocas of vectors get scalarized here. It's important we do that
  74. // before Scalarizer pass because promoting the allocas later than that will
  75. // produce vector phi's (disallowed by the validator), which need another
  76. // Scalarizer pass to clean up.
  77. //
  78. class DxilConditionalMem2Reg : public FunctionPass {
  79. public:
  80. static char ID;
  81. // Function overrides that resolve options when used for DxOpt
  82. void applyOptions(PassOptions O) override {
  83. GetPassOptionBool(O, "NoOpt", &NoOpt, false);
  84. }
  85. void dumpConfig(raw_ostream &OS) override {
  86. FunctionPass::dumpConfig(OS);
  87. OS << ",NoOpt=" << NoOpt;
  88. }
  89. bool NoOpt = false;
  90. explicit DxilConditionalMem2Reg(bool NoOpt=false) : FunctionPass(ID), NoOpt(NoOpt)
  91. {
  92. initializeDxilConditionalMem2RegPass(*PassRegistry::getPassRegistry());
  93. }
  94. void getAnalysisUsage(AnalysisUsage &AU) const override {
  95. AU.addRequired<DominatorTreeWrapperPass>();
  96. AU.addRequired<AssumptionCacheTracker>();
  97. AU.setPreservesCFG();
  98. }
  99. // Collect and remove all instructions that use AI, but
  100. // give up if there are anything other than store, bitcast,
  101. // memcpy, or GEP.
  102. static bool TryRemoveUnusedAlloca(AllocaInst *AI) {
  103. std::vector<Instruction *> WorkList;
  104. WorkList.push_back(AI);
  105. for (unsigned i = 0; i < WorkList.size(); i++) {
  106. Instruction *I = WorkList[i];
  107. for (User *U : I->users()) {
  108. Instruction *UI = cast<Instruction>(U);
  109. unsigned Opcode = UI->getOpcode();
  110. if (Opcode == Instruction::BitCast ||
  111. Opcode == Instruction::GetElementPtr ||
  112. Opcode == Instruction::Store)
  113. {
  114. WorkList.push_back(UI);
  115. }
  116. else if (MemCpyInst *MC = dyn_cast<MemCpyInst>(UI)) {
  117. if (MC->getSource() == I) { // MC reads from our alloca
  118. return false;
  119. }
  120. WorkList.push_back(UI);
  121. }
  122. else { // Load? PHINode? Assume read.
  123. return false;
  124. }
  125. }
  126. }
  127. // Remove all instructions
  128. for (auto It = WorkList.rbegin(), E = WorkList.rend(); It != E; It++) {
  129. Instruction *I = *It;
  130. I->eraseFromParent();
  131. }
  132. return true;
  133. }
  134. static bool RemoveAllUnusedAllocas(Function &F) {
  135. std::vector<AllocaInst *> Allocas;
  136. BasicBlock &EntryBB = *F.begin();
  137. for (auto It = EntryBB.begin(), E = EntryBB.end(); It != E;) {
  138. Instruction &I = *(It++);
  139. if (AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
  140. Allocas.push_back(AI);
  141. }
  142. }
  143. bool Changed = false;
  144. for (AllocaInst *AI : Allocas) {
  145. Changed |= TryRemoveUnusedAlloca(AI);
  146. }
  147. return Changed;
  148. }
  149. //
  150. // Turns all allocas of vector types that are marked with 'dx.precise'
  151. // and turn them into scalars. For example:
  152. //
  153. // x = alloca <f32 x 4> !dx.precise
  154. //
  155. // becomes:
  156. //
  157. // x1 = alloca f32 !dx.precise
  158. // x2 = alloca f32 !dx.precise
  159. // x3 = alloca f32 !dx.precise
  160. // x4 = alloca f32 !dx.precise
  161. //
  162. // This function also replaces all stores and loads but leaves everything
  163. // else alone by generating insertelement and extractelement as appropriate.
  164. //
  165. static bool ScalarizePreciseVectorAlloca(Function &F) {
  166. BasicBlock *Entry = &*F.begin();
  167. bool Changed = false;
  168. for (auto it = Entry->begin(); it != Entry->end();) {
  169. Instruction *I = &*(it++);
  170. AllocaInst *AI = dyn_cast<AllocaInst>(I);
  171. if (!AI || !AI->getAllocatedType()->isVectorTy()) continue;
  172. if (!HLModule::HasPreciseAttributeWithMetadata(AI)) continue;
  173. IRBuilder<> B(AI);
  174. VectorType *VTy = cast<VectorType>(AI->getAllocatedType());
  175. Type *ScalarTy = VTy->getVectorElementType();
  176. const unsigned VectorSize = VTy->getVectorNumElements();
  177. SmallVector<AllocaInst *, 32> Elements;
  178. for (unsigned i = 0; i < VectorSize; i++) {
  179. AllocaInst *Elem = B.CreateAlloca(ScalarTy);
  180. hlsl::DxilMDHelper::CopyMetadata(*Elem, *AI);
  181. Elements.push_back(Elem);
  182. }
  183. for (auto it = AI->user_begin(); it != AI->user_end();) {
  184. User *U = *(it++);
  185. if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
  186. B.SetInsertPoint(LI);
  187. Value *Vec = UndefValue::get(VTy);
  188. for (unsigned i = 0; i < VectorSize; i++) {
  189. LoadInst *Elem = B.CreateLoad(Elements[i]);
  190. hlsl::DxilMDHelper::CopyMetadata(*Elem, *LI);
  191. Vec = B.CreateInsertElement(Vec, Elem, i);
  192. }
  193. LI->replaceAllUsesWith(Vec);
  194. LI->eraseFromParent();
  195. }
  196. else if (StoreInst *Store = dyn_cast<StoreInst>(U)) {
  197. B.SetInsertPoint(Store);
  198. Value *Vec = Store->getValueOperand();
  199. for (unsigned i = 0; i < VectorSize; i++) {
  200. Value *Elem = B.CreateExtractElement(Vec, i);
  201. StoreInst *ElemStore = B.CreateStore(Elem, Elements[i]);
  202. hlsl::DxilMDHelper::CopyMetadata(*ElemStore, *Store);
  203. }
  204. Store->eraseFromParent();
  205. }
  206. else if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
  207. DXASSERT(onlyUsedByLifetimeMarkers(BCI),
  208. "expected bitcast to only be used by lifetime intrinsics");
  209. for (auto BCIU = BCI->user_begin(), BCIE = BCI->user_end(); BCIU != BCIE;) {
  210. IntrinsicInst *II = cast<IntrinsicInst>(*(BCIU++));
  211. II->eraseFromParent();
  212. }
  213. BCI->eraseFromParent();
  214. }
  215. else {
  216. llvm_unreachable("Cannot handle non-store/load on precise vector allocas");
  217. }
  218. }
  219. AI->eraseFromParent();
  220. Changed = true;
  221. }
  222. return Changed;
  223. }
  224. struct StoreInfo {
  225. Value *V;
  226. unsigned Offset;
  227. };
  228. static bool FindAllStores(Module &M, Value *V, SmallVectorImpl<StoreInfo> *Stores) {
  229. SmallVector<StoreInfo, 8> Worklist;
  230. std::set<Value *> Seen;
  231. auto Add = [&](Value *V, unsigned OffsetInBits) {
  232. if (Seen.insert(V).second)
  233. Worklist.push_back({ V, OffsetInBits });
  234. };
  235. Add(V, 0);
  236. const DataLayout &DL = M.getDataLayout();
  237. while (Worklist.size()) {
  238. auto Info = Worklist.pop_back_val();
  239. auto *Elem = Info.V;
  240. if (auto GEP = dyn_cast<GEPOperator>(Elem)) {
  241. if (GEP->getNumIndices() != 2)
  242. continue;
  243. unsigned ElemSize = 0;
  244. Type *GEPPtrType = GEP->getPointerOperand()->getType();
  245. Type *PtrElemType = GEPPtrType->getPointerElementType();
  246. if (ArrayType *ArrayTy = dyn_cast<ArrayType>(PtrElemType)) {
  247. ElemSize = DL.getTypeAllocSizeInBits(ArrayTy->getElementType());
  248. }
  249. else if (VectorType *VectorTy = dyn_cast<VectorType>(PtrElemType)) {
  250. ElemSize = DL.getTypeAllocSizeInBits(VectorTy->getElementType());
  251. }
  252. else {
  253. return false;
  254. }
  255. unsigned OffsetInBits = 0;
  256. for (unsigned i = 0; i < GEP->getNumIndices(); i++) {
  257. auto IdxOp = dyn_cast<ConstantInt>(GEP->getOperand(i+1));
  258. if (!IdxOp) {
  259. return false;
  260. }
  261. uint64_t Idx = IdxOp->getLimitedValue();
  262. if (i == 0) {
  263. if (Idx != 0)
  264. return false;
  265. }
  266. else {
  267. OffsetInBits = Idx * ElemSize;
  268. }
  269. }
  270. for (User *U : Elem->users())
  271. Add(U, Info.Offset + OffsetInBits);
  272. }
  273. else if (auto *Store = dyn_cast<StoreInst>(Elem)) {
  274. Stores->push_back({ Store, Info.Offset });
  275. }
  276. }
  277. return true;
  278. }
  279. // Function to rewrite debug info for output argument.
  280. // Sometimes, normal local variables that get returned from functions get rewritten as
  281. // a pointer argument.
  282. //
  283. // Right now, we generally have a single dbg.declare for the Argument, but as we lower
  284. // it to storeOutput, the dbg.declare and the Argument both get removed, leavning no
  285. // debug info for the local variable.
  286. //
  287. // Solution here is to rewrite the dbg.declare as dbg.value's by finding all the stores
  288. // and writing a dbg.value immediately before the store. Fairly conservative at the moment
  289. // about what cases to rewrite (only scalars and vectors, and arrays of scalars and vectors).
  290. //
  291. bool RewriteOutputArgsDebugInfo(Function &F) {
  292. bool Changed = false;
  293. Module *M = F.getParent();
  294. DIBuilder DIB(*M);
  295. SmallVector<StoreInfo, 4> Stores;
  296. LLVMContext &Ctx = F.getContext();
  297. for (Argument &Arg : F.args()) {
  298. if (!Arg.getType()->isPointerTy())
  299. continue;
  300. Type *Ty = Arg.getType()->getPointerElementType();
  301. bool IsSimpleType =
  302. Ty->isSingleValueType() ||
  303. Ty->isVectorTy() ||
  304. (Ty->isArrayTy() && (Ty->getArrayElementType()->isVectorTy() || Ty->getArrayElementType()->isSingleValueType()));
  305. if (!IsSimpleType)
  306. continue;
  307. Stores.clear();
  308. for (User *U : Arg.users()) {
  309. if (!FindAllStores(*M, U, &Stores)) {
  310. Stores.clear();
  311. break;
  312. }
  313. }
  314. if (Stores.empty())
  315. continue;
  316. DbgDeclareInst *Declare = nullptr;
  317. if (auto *L = LocalAsMetadata::getIfExists(&Arg)) {
  318. if (auto *DINode = MetadataAsValue::getIfExists(Ctx, L)) {
  319. if (!DINode->user_empty() && std::next(DINode->user_begin()) == DINode->user_end()) {
  320. Declare = dyn_cast<DbgDeclareInst>(*DINode->user_begin());
  321. }
  322. }
  323. }
  324. if (Declare) {
  325. DITypeIdentifierMap EmptyMap;
  326. DILocalVariable *Var = Declare->getVariable();
  327. DIExpression *Expr = Declare->getExpression();
  328. DIType *VarTy = Var->getType().resolve(EmptyMap);
  329. uint64_t VarSize = VarTy->getSizeInBits();
  330. uint64_t Offset = 0;
  331. if (Expr->isBitPiece())
  332. Offset = Expr->getBitPieceOffset();
  333. for (auto &Info : Stores) {
  334. auto *Store = cast<StoreInst>(Info.V);
  335. auto Val = Store->getValueOperand();
  336. auto Loc = Store->getDebugLoc();
  337. auto &M = *F.getParent();
  338. unsigned ValSize = M.getDataLayout().getTypeAllocSizeInBits(Val->getType());
  339. DIExpression *NewExpr = nullptr;
  340. if (Offset || VarSize > ValSize) {
  341. uint64_t Elems[] = { dwarf::DW_OP_bit_piece, Offset + Info.Offset, ValSize };
  342. NewExpr = DIExpression::get(Ctx, Elems);
  343. }
  344. else {
  345. NewExpr = DIExpression::get(Ctx, {});
  346. }
  347. if (Loc->getScope()->getSubprogram() == Var->getScope()->getSubprogram())
  348. DIB.insertDbgValueIntrinsic(Val, 0, Var, NewExpr, Loc, Store);
  349. }
  350. Declare->eraseFromParent();
  351. Changed = true;
  352. }
  353. }
  354. return Changed;
  355. }
  356. bool runOnFunction(Function &F) override {
  357. DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  358. AssumptionCache *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
  359. bool Changed = false;
  360. Changed |= RewriteOutputArgsDebugInfo(F);
  361. Changed |= RemoveAllUnusedAllocas(F);
  362. Changed |= ScalarizePreciseVectorAlloca(F);
  363. Changed |= Mem2Reg(F, *DT, *AC);
  364. return Changed;
  365. }
  366. };
  367. char DxilConditionalMem2Reg::ID;
  368. Pass *llvm::createDxilConditionalMem2RegPass(bool NoOpt) {
  369. return new DxilConditionalMem2Reg(NoOpt);
  370. }
  371. INITIALIZE_PASS_BEGIN(DxilConditionalMem2Reg, "dxil-cond-mem2reg", "Dxil Conditional Mem2Reg", false, false)
  372. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  373. INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
  374. INITIALIZE_PASS_END(DxilConditionalMem2Reg, "dxil-cond-mem2reg", "Dxil Conditional Mem2Reg", false, false)