InstCombineLoadStoreAlloca.cpp 45 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203
  1. //===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file implements the visit functions for load, store and alloca.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "InstCombineInternal.h"
  14. #include "llvm/ADT/Statistic.h"
  15. #include "llvm/Analysis/Loads.h"
  16. #include "llvm/IR/DataLayout.h"
  17. #include "llvm/IR/LLVMContext.h"
  18. #include "llvm/IR/IntrinsicInst.h"
  19. #include "llvm/IR/MDBuilder.h"
  20. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  21. #include "llvm/Transforms/Utils/Local.h"
  22. using namespace llvm;
  23. #define DEBUG_TYPE "instcombine"
  24. STATISTIC(NumDeadStore, "Number of dead stores eliminated");
  25. STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
  26. /// pointsToConstantGlobal - Return true if V (possibly indirectly) points to
  27. /// some part of a constant global variable. This intentionally only accepts
  28. /// constant expressions because we can't rewrite arbitrary instructions.
  29. static bool pointsToConstantGlobal(Value *V) {
  30. if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
  31. return GV->isConstant();
  32. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
  33. if (CE->getOpcode() == Instruction::BitCast ||
  34. CE->getOpcode() == Instruction::AddrSpaceCast ||
  35. CE->getOpcode() == Instruction::GetElementPtr)
  36. return pointsToConstantGlobal(CE->getOperand(0));
  37. }
  38. return false;
  39. }
  40. /// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
  41. /// pointer to an alloca. Ignore any reads of the pointer, return false if we
  42. /// see any stores or other unknown uses. If we see pointer arithmetic, keep
  43. /// track of whether it moves the pointer (with IsOffset) but otherwise traverse
  44. /// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
  45. /// the alloca, and if the source pointer is a pointer to a constant global, we
  46. /// can optimize this.
  47. static bool
  48. isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
  49. SmallVectorImpl<Instruction *> &ToDelete) {
  50. // We track lifetime intrinsics as we encounter them. If we decide to go
  51. // ahead and replace the value with the global, this lets the caller quickly
  52. // eliminate the markers.
  53. SmallVector<std::pair<Value *, bool>, 35> ValuesToInspect;
  54. ValuesToInspect.push_back(std::make_pair(V, false));
  55. while (!ValuesToInspect.empty()) {
  56. auto ValuePair = ValuesToInspect.pop_back_val();
  57. const bool IsOffset = ValuePair.second;
  58. for (auto &U : ValuePair.first->uses()) {
  59. Instruction *I = cast<Instruction>(U.getUser());
  60. if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
  61. // Ignore non-volatile loads, they are always ok.
  62. if (!LI->isSimple()) return false;
  63. continue;
  64. }
  65. if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) {
  66. // If uses of the bitcast are ok, we are ok.
  67. ValuesToInspect.push_back(std::make_pair(I, IsOffset));
  68. continue;
  69. }
  70. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
  71. // If the GEP has all zero indices, it doesn't offset the pointer. If it
  72. // doesn't, it does.
  73. ValuesToInspect.push_back(
  74. std::make_pair(I, IsOffset || !GEP->hasAllZeroIndices()));
  75. continue;
  76. }
  77. if (auto CS = CallSite(I)) {
  78. // If this is the function being called then we treat it like a load and
  79. // ignore it.
  80. if (CS.isCallee(&U))
  81. continue;
  82. // Inalloca arguments are clobbered by the call.
  83. unsigned ArgNo = CS.getArgumentNo(&U);
  84. if (CS.isInAllocaArgument(ArgNo))
  85. return false;
  86. // If this is a readonly/readnone call site, then we know it is just a
  87. // load (but one that potentially returns the value itself), so we can
  88. // ignore it if we know that the value isn't captured.
  89. if (CS.onlyReadsMemory() &&
  90. (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo)))
  91. continue;
  92. // If this is being passed as a byval argument, the caller is making a
  93. // copy, so it is only a read of the alloca.
  94. if (CS.isByValArgument(ArgNo))
  95. continue;
  96. }
  97. // Lifetime intrinsics can be handled by the caller.
  98. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
  99. if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
  100. II->getIntrinsicID() == Intrinsic::lifetime_end) {
  101. assert(II->use_empty() && "Lifetime markers have no result to use!");
  102. ToDelete.push_back(II);
  103. continue;
  104. }
  105. }
  106. // If this is isn't our memcpy/memmove, reject it as something we can't
  107. // handle.
  108. MemTransferInst *MI = dyn_cast<MemTransferInst>(I);
  109. if (!MI)
  110. return false;
  111. // If the transfer is using the alloca as a source of the transfer, then
  112. // ignore it since it is a load (unless the transfer is volatile).
  113. if (U.getOperandNo() == 1) {
  114. if (MI->isVolatile()) return false;
  115. continue;
  116. }
  117. // If we already have seen a copy, reject the second one.
  118. if (TheCopy) return false;
  119. // If the pointer has been offset from the start of the alloca, we can't
  120. // safely handle this.
  121. if (IsOffset) return false;
  122. // If the memintrinsic isn't using the alloca as the dest, reject it.
  123. if (U.getOperandNo() != 0) return false;
  124. // If the source of the memcpy/move is not a constant global, reject it.
  125. if (!pointsToConstantGlobal(MI->getSource()))
  126. return false;
  127. // Otherwise, the transform is safe. Remember the copy instruction.
  128. TheCopy = MI;
  129. }
  130. }
  131. return true;
  132. }
  133. /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
  134. /// modified by a copy from a constant global. If we can prove this, we can
  135. /// replace any uses of the alloca with uses of the global directly.
  136. static MemTransferInst *
  137. isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
  138. SmallVectorImpl<Instruction *> &ToDelete) {
  139. MemTransferInst *TheCopy = nullptr;
  140. if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete))
  141. return TheCopy;
  142. return nullptr;
  143. }
  144. static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) {
  145. // Check for array size of 1 (scalar allocation).
  146. if (!AI.isArrayAllocation()) {
  147. // i32 1 is the canonical array size for scalar allocations.
  148. if (AI.getArraySize()->getType()->isIntegerTy(32))
  149. return nullptr;
  150. // Canonicalize it.
  151. Value *V = IC.Builder->getInt32(1);
  152. AI.setOperand(0, V);
  153. return &AI;
  154. }
  155. // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
  156. if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
  157. Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
  158. AllocaInst *New = IC.Builder->CreateAlloca(NewTy, nullptr, AI.getName());
  159. New->setAlignment(AI.getAlignment());
  160. // Scan to the end of the allocation instructions, to skip over a block of
  161. // allocas if possible...also skip interleaved debug info
  162. //
  163. BasicBlock::iterator It = New;
  164. while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It))
  165. ++It;
  166. // Now that I is pointing to the first non-allocation-inst in the block,
  167. // insert our getelementptr instruction...
  168. //
  169. Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType());
  170. Value *NullIdx = Constant::getNullValue(IdxTy);
  171. Value *Idx[2] = {NullIdx, NullIdx};
  172. Instruction *GEP =
  173. GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub");
  174. IC.InsertNewInstBefore(GEP, *It);
  175. // Now make everything use the getelementptr instead of the original
  176. // allocation.
  177. return IC.ReplaceInstUsesWith(AI, GEP);
  178. }
  179. if (isa<UndefValue>(AI.getArraySize()))
  180. return IC.ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
  181. // Ensure that the alloca array size argument has type intptr_t, so that
  182. // any casting is exposed early.
  183. Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType());
  184. if (AI.getArraySize()->getType() != IntPtrTy) {
  185. Value *V = IC.Builder->CreateIntCast(AI.getArraySize(), IntPtrTy, false);
  186. AI.setOperand(0, V);
  187. return &AI;
  188. }
  189. return nullptr;
  190. }
  191. Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
  192. if (auto *I = simplifyAllocaArraySize(*this, AI))
  193. return I;
  194. if (AI.getAllocatedType()->isSized()) {
  195. // If the alignment is 0 (unspecified), assign it the preferred alignment.
  196. if (AI.getAlignment() == 0)
  197. AI.setAlignment(DL.getPrefTypeAlignment(AI.getAllocatedType()));
  198. // Move all alloca's of zero byte objects to the entry block and merge them
  199. // together. Note that we only do this for alloca's, because malloc should
  200. // allocate and return a unique pointer, even for a zero byte allocation.
  201. if (DL.getTypeAllocSize(AI.getAllocatedType()) == 0) {
  202. // For a zero sized alloca there is no point in doing an array allocation.
  203. // This is helpful if the array size is a complicated expression not used
  204. // elsewhere.
  205. if (AI.isArrayAllocation()) {
  206. AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1));
  207. return &AI;
  208. }
  209. // Get the first instruction in the entry block.
  210. BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock();
  211. Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg();
  212. if (FirstInst != &AI) {
  213. // If the entry block doesn't start with a zero-size alloca then move
  214. // this one to the start of the entry block. There is no problem with
  215. // dominance as the array size was forced to a constant earlier already.
  216. AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
  217. if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
  218. DL.getTypeAllocSize(EntryAI->getAllocatedType()) != 0) {
  219. AI.moveBefore(FirstInst);
  220. return &AI;
  221. }
  222. // If the alignment of the entry block alloca is 0 (unspecified),
  223. // assign it the preferred alignment.
  224. if (EntryAI->getAlignment() == 0)
  225. EntryAI->setAlignment(
  226. DL.getPrefTypeAlignment(EntryAI->getAllocatedType()));
  227. // Replace this zero-sized alloca with the one at the start of the entry
  228. // block after ensuring that the address will be aligned enough for both
  229. // types.
  230. unsigned MaxAlign = std::max(EntryAI->getAlignment(),
  231. AI.getAlignment());
  232. EntryAI->setAlignment(MaxAlign);
  233. if (AI.getType() != EntryAI->getType())
  234. return new BitCastInst(EntryAI, AI.getType());
  235. return ReplaceInstUsesWith(AI, EntryAI);
  236. }
  237. }
  238. }
  239. if (AI.getAlignment()) {
  240. // Check to see if this allocation is only modified by a memcpy/memmove from
  241. // a constant global whose alignment is equal to or exceeds that of the
  242. // allocation. If this is the case, we can change all users to use
  243. // the constant global instead. This is commonly produced by the CFE by
  244. // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
  245. // is only subsequently read.
  246. SmallVector<Instruction *, 4> ToDelete;
  247. if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
  248. unsigned SourceAlign = getOrEnforceKnownAlignment(
  249. Copy->getSource(), AI.getAlignment(), DL, &AI, AC, DT);
  250. if (AI.getAlignment() <= SourceAlign) {
  251. DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
  252. DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
  253. for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
  254. EraseInstFromFunction(*ToDelete[i]);
  255. Constant *TheSrc = cast<Constant>(Copy->getSource());
  256. Constant *Cast
  257. = ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, AI.getType());
  258. Instruction *NewI = ReplaceInstUsesWith(AI, Cast);
  259. EraseInstFromFunction(*Copy);
  260. ++NumGlobalCopies;
  261. return NewI;
  262. }
  263. }
  264. }
  265. // At last, use the generic allocation site handler to aggressively remove
  266. // unused allocas.
  267. return visitAllocSite(AI);
  268. }
  269. /// \brief Helper to combine a load to a new type.
  270. ///
  271. /// This just does the work of combining a load to a new type. It handles
  272. /// metadata, etc., and returns the new instruction. The \c NewTy should be the
  273. /// loaded *value* type. This will convert it to a pointer, cast the operand to
  274. /// that pointer type, load it, etc.
  275. ///
  276. /// Note that this will create all of the instructions with whatever insert
  277. /// point the \c InstCombiner currently is using.
  278. static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewTy,
  279. const Twine &Suffix = "") {
  280. Value *Ptr = LI.getPointerOperand();
  281. unsigned AS = LI.getPointerAddressSpace();
  282. SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
  283. LI.getAllMetadata(MD);
  284. LoadInst *NewLoad = IC.Builder->CreateAlignedLoad(
  285. IC.Builder->CreateBitCast(Ptr, NewTy->getPointerTo(AS)),
  286. LI.getAlignment(), LI.getName() + Suffix);
  287. MDBuilder MDB(NewLoad->getContext());
  288. for (const auto &MDPair : MD) {
  289. unsigned ID = MDPair.first;
  290. MDNode *N = MDPair.second;
  291. // Note, essentially every kind of metadata should be preserved here! This
  292. // routine is supposed to clone a load instruction changing *only its type*.
  293. // The only metadata it makes sense to drop is metadata which is invalidated
  294. // when the pointer type changes. This should essentially never be the case
  295. // in LLVM, but we explicitly switch over only known metadata to be
  296. // conservatively correct. If you are adding metadata to LLVM which pertains
  297. // to loads, you almost certainly want to add it here.
  298. switch (ID) {
  299. case LLVMContext::MD_dbg:
  300. case LLVMContext::MD_tbaa:
  301. case LLVMContext::MD_prof:
  302. case LLVMContext::MD_fpmath:
  303. case LLVMContext::MD_tbaa_struct:
  304. case LLVMContext::MD_invariant_load:
  305. case LLVMContext::MD_alias_scope:
  306. case LLVMContext::MD_noalias:
  307. case LLVMContext::MD_nontemporal:
  308. case LLVMContext::MD_mem_parallel_loop_access:
  309. // All of these directly apply.
  310. NewLoad->setMetadata(ID, N);
  311. break;
  312. case LLVMContext::MD_nonnull:
  313. // This only directly applies if the new type is also a pointer.
  314. if (NewTy->isPointerTy()) {
  315. NewLoad->setMetadata(ID, N);
  316. break;
  317. }
  318. // If it's integral now, translate it to !range metadata.
  319. if (NewTy->isIntegerTy()) {
  320. auto *ITy = cast<IntegerType>(NewTy);
  321. auto *NullInt = ConstantExpr::getPtrToInt(
  322. ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy);
  323. auto *NonNullInt =
  324. ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1));
  325. NewLoad->setMetadata(LLVMContext::MD_range,
  326. MDB.createRange(NonNullInt, NullInt));
  327. }
  328. break;
  329. case LLVMContext::MD_range:
  330. // FIXME: It would be nice to propagate this in some way, but the type
  331. // conversions make it hard. If the new type is a pointer, we could
  332. // translate it to !nonnull metadata.
  333. break;
  334. }
  335. }
  336. return NewLoad;
  337. }
  338. /// \brief Combine a store to a new type.
  339. ///
  340. /// Returns the newly created store instruction.
  341. static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value *V) {
  342. Value *Ptr = SI.getPointerOperand();
  343. unsigned AS = SI.getPointerAddressSpace();
  344. SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
  345. SI.getAllMetadata(MD);
  346. StoreInst *NewStore = IC.Builder->CreateAlignedStore(
  347. V, IC.Builder->CreateBitCast(Ptr, V->getType()->getPointerTo(AS)),
  348. SI.getAlignment());
  349. for (const auto &MDPair : MD) {
  350. unsigned ID = MDPair.first;
  351. MDNode *N = MDPair.second;
  352. // Note, essentially every kind of metadata should be preserved here! This
  353. // routine is supposed to clone a store instruction changing *only its
  354. // type*. The only metadata it makes sense to drop is metadata which is
  355. // invalidated when the pointer type changes. This should essentially
  356. // never be the case in LLVM, but we explicitly switch over only known
  357. // metadata to be conservatively correct. If you are adding metadata to
  358. // LLVM which pertains to stores, you almost certainly want to add it
  359. // here.
  360. switch (ID) {
  361. case LLVMContext::MD_dbg:
  362. case LLVMContext::MD_tbaa:
  363. case LLVMContext::MD_prof:
  364. case LLVMContext::MD_fpmath:
  365. case LLVMContext::MD_tbaa_struct:
  366. case LLVMContext::MD_alias_scope:
  367. case LLVMContext::MD_noalias:
  368. case LLVMContext::MD_nontemporal:
  369. case LLVMContext::MD_mem_parallel_loop_access:
  370. // All of these directly apply.
  371. NewStore->setMetadata(ID, N);
  372. break;
  373. case LLVMContext::MD_invariant_load:
  374. case LLVMContext::MD_nonnull:
  375. case LLVMContext::MD_range:
  376. // These don't apply for stores.
  377. break;
  378. }
  379. }
  380. return NewStore;
  381. }
  382. /// \brief Combine loads to match the type of value their uses after looking
  383. /// through intervening bitcasts.
  384. ///
  385. /// The core idea here is that if the result of a load is used in an operation,
  386. /// we should load the type most conducive to that operation. For example, when
  387. /// loading an integer and converting that immediately to a pointer, we should
  388. /// instead directly load a pointer.
  389. ///
  390. /// However, this routine must never change the width of a load or the number of
  391. /// loads as that would introduce a semantic change. This combine is expected to
  392. /// be a semantic no-op which just allows loads to more closely model the types
  393. /// of their consuming operations.
  394. ///
  395. /// Currently, we also refuse to change the precise type used for an atomic load
  396. /// or a volatile load. This is debatable, and might be reasonable to change
  397. /// later. However, it is risky in case some backend or other part of LLVM is
  398. /// relying on the exact type loaded to select appropriate atomic operations.
  399. static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
  400. #if 0 // HLSL Change - bitcast to i32* won't help HLSL.
  401. // FIXME: We could probably with some care handle both volatile and atomic
  402. // loads here but it isn't clear that this is important.
  403. if (!LI.isSimple())
  404. return nullptr;
  405. if (LI.use_empty())
  406. return nullptr;
  407. Type *Ty = LI.getType();
  408. const DataLayout &DL = IC.getDataLayout();
  409. // Try to canonicalize loads which are only ever stored to operate over
  410. // integers instead of any other type. We only do this when the loaded type
  411. // is sized and has a size exactly the same as its store size and the store
  412. // size is a legal integer type.
  413. if (!Ty->isIntegerTy() && Ty->isSized() &&
  414. DL.isLegalInteger(DL.getTypeStoreSizeInBits(Ty)) &&
  415. DL.getTypeStoreSizeInBits(Ty) == DL.getTypeSizeInBits(Ty)) {
  416. if (std::all_of(LI.user_begin(), LI.user_end(), [&LI](User *U) {
  417. auto *SI = dyn_cast<StoreInst>(U);
  418. return SI && SI->getPointerOperand() != &LI;
  419. })) {
  420. LoadInst *NewLoad = combineLoadToNewType(
  421. IC, LI,
  422. Type::getIntNTy(LI.getContext(), DL.getTypeStoreSizeInBits(Ty)));
  423. // Replace all the stores with stores of the newly loaded value.
  424. for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) {
  425. auto *SI = cast<StoreInst>(*UI++);
  426. IC.Builder->SetInsertPoint(SI);
  427. combineStoreToNewValue(IC, *SI, NewLoad);
  428. IC.EraseInstFromFunction(*SI);
  429. }
  430. assert(LI.use_empty() && "Failed to remove all users of the load!");
  431. // Return the old load so the combiner can delete it safely.
  432. return &LI;
  433. }
  434. }
  435. // Fold away bit casts of the loaded value by loading the desired type.
  436. // We can do this for BitCastInsts as well as casts from and to pointer types,
  437. // as long as those are noops (i.e., the source or dest type have the same
  438. // bitwidth as the target's pointers).
  439. if (LI.hasOneUse())
  440. if (auto* CI = dyn_cast<CastInst>(LI.user_back())) {
  441. if (CI->isNoopCast(DL)) {
  442. LoadInst *NewLoad = combineLoadToNewType(IC, LI, CI->getDestTy());
  443. CI->replaceAllUsesWith(NewLoad);
  444. IC.EraseInstFromFunction(*CI);
  445. return &LI;
  446. }
  447. }
  448. // FIXME: We should also canonicalize loads of vectors when their elements are
  449. // cast to other types.
  450. return nullptr;
  451. #else
  452. return nullptr;
  453. #endif // HLSL Change - bitcast to i32* won't help HLSL.
  454. }
  455. static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
  456. // FIXME: We could probably with some care handle both volatile and atomic
  457. // stores here but it isn't clear that this is important.
  458. if (!LI.isSimple())
  459. return nullptr;
  460. Type *T = LI.getType();
  461. if (!T->isAggregateType())
  462. return nullptr;
  463. assert(LI.getAlignment() && "Alignement must be set at this point");
  464. if (auto *ST = dyn_cast<StructType>(T)) {
  465. // If the struct only have one element, we unpack.
  466. if (ST->getNumElements() == 1) {
  467. LoadInst *NewLoad = combineLoadToNewType(IC, LI, ST->getTypeAtIndex(0U),
  468. ".unpack");
  469. return IC.ReplaceInstUsesWith(LI, IC.Builder->CreateInsertValue(
  470. UndefValue::get(T), NewLoad, 0, LI.getName()));
  471. }
  472. }
  473. if (auto *AT = dyn_cast<ArrayType>(T)) {
  474. // If the array only have one element, we unpack.
  475. if (AT->getNumElements() == 1) {
  476. LoadInst *NewLoad = combineLoadToNewType(IC, LI, AT->getElementType(),
  477. ".unpack");
  478. return IC.ReplaceInstUsesWith(LI, IC.Builder->CreateInsertValue(
  479. UndefValue::get(T), NewLoad, 0, LI.getName()));
  480. }
  481. }
  482. return nullptr;
  483. }
  484. // If we can determine that all possible objects pointed to by the provided
  485. // pointer value are, not only dereferenceable, but also definitively less than
  486. // or equal to the provided maximum size, then return true. Otherwise, return
  487. // false (constant global values and allocas fall into this category).
  488. //
  489. // FIXME: This should probably live in ValueTracking (or similar).
  490. static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
  491. const DataLayout &DL) {
  492. SmallPtrSet<Value *, 4> Visited;
  493. SmallVector<Value *, 4> Worklist(1, V);
  494. do {
  495. Value *P = Worklist.pop_back_val();
  496. P = P->stripPointerCasts();
  497. if (!Visited.insert(P).second)
  498. continue;
  499. if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
  500. Worklist.push_back(SI->getTrueValue());
  501. Worklist.push_back(SI->getFalseValue());
  502. continue;
  503. }
  504. if (PHINode *PN = dyn_cast<PHINode>(P)) {
  505. for (Value *IncValue : PN->incoming_values())
  506. Worklist.push_back(IncValue);
  507. continue;
  508. }
  509. if (GlobalAlias *GA = dyn_cast<GlobalAlias>(P)) {
  510. if (GA->mayBeOverridden())
  511. return false;
  512. Worklist.push_back(GA->getAliasee());
  513. continue;
  514. }
  515. // If we know how big this object is, and it is less than MaxSize, continue
  516. // searching. Otherwise, return false.
  517. if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) {
  518. if (!AI->getAllocatedType()->isSized())
  519. return false;
  520. ConstantInt *CS = dyn_cast<ConstantInt>(AI->getArraySize());
  521. if (!CS)
  522. return false;
  523. uint64_t TypeSize = DL.getTypeAllocSize(AI->getAllocatedType());
  524. // Make sure that, even if the multiplication below would wrap as an
  525. // uint64_t, we still do the right thing.
  526. if ((CS->getValue().zextOrSelf(128)*APInt(128, TypeSize)).ugt(MaxSize))
  527. return false;
  528. continue;
  529. }
  530. if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
  531. if (!GV->hasDefinitiveInitializer() || !GV->isConstant())
  532. return false;
  533. uint64_t InitSize = DL.getTypeAllocSize(GV->getType()->getElementType());
  534. if (InitSize > MaxSize)
  535. return false;
  536. continue;
  537. }
  538. return false;
  539. } while (!Worklist.empty());
  540. return true;
  541. }
  542. // If we're indexing into an object of a known size, and the outer index is
  543. // not a constant, but having any value but zero would lead to undefined
  544. // behavior, replace it with zero.
  545. //
  546. // For example, if we have:
  547. // @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
  548. // ...
  549. // %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x
  550. // ... = load i32* %arrayidx, align 4
  551. // Then we know that we can replace %x in the GEP with i64 0.
  552. //
  553. // FIXME: We could fold any GEP index to zero that would cause UB if it were
  554. // not zero. Currently, we only handle the first such index. Also, we could
  555. // also search through non-zero constant indices if we kept track of the
  556. // offsets those indices implied.
  557. static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI,
  558. Instruction *MemI, unsigned &Idx) {
  559. if (GEPI->getNumOperands() < 2)
  560. return false;
  561. // Find the first non-zero index of a GEP. If all indices are zero, return
  562. // one past the last index.
  563. auto FirstNZIdx = [](const GetElementPtrInst *GEPI) {
  564. unsigned I = 1;
  565. for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) {
  566. Value *V = GEPI->getOperand(I);
  567. if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
  568. if (CI->isZero())
  569. continue;
  570. break;
  571. }
  572. return I;
  573. };
  574. // Skip through initial 'zero' indices, and find the corresponding pointer
  575. // type. See if the next index is not a constant.
  576. Idx = FirstNZIdx(GEPI);
  577. if (Idx == GEPI->getNumOperands())
  578. return false;
  579. if (isa<Constant>(GEPI->getOperand(Idx)))
  580. return false;
  581. SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);
  582. Type *AllocTy = GetElementPtrInst::getIndexedType(
  583. cast<PointerType>(GEPI->getOperand(0)->getType()->getScalarType())
  584. ->getElementType(),
  585. Ops);
  586. if (!AllocTy || !AllocTy->isSized())
  587. return false;
  588. const DataLayout &DL = IC.getDataLayout();
  589. uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy);
  590. // If there are more indices after the one we might replace with a zero, make
  591. // sure they're all non-negative. If any of them are negative, the overall
  592. // address being computed might be before the base address determined by the
  593. // first non-zero index.
  594. auto IsAllNonNegative = [&]() {
  595. for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) {
  596. bool KnownNonNegative, KnownNegative;
  597. IC.ComputeSignBit(GEPI->getOperand(i), KnownNonNegative,
  598. KnownNegative, 0, MemI);
  599. if (KnownNonNegative)
  600. continue;
  601. return false;
  602. }
  603. return true;
  604. };
  605. // FIXME: If the GEP is not inbounds, and there are extra indices after the
  606. // one we'll replace, those could cause the address computation to wrap
  607. // (rendering the IsAllNonNegative() check below insufficient). We can do
  608. // better, ignoring zero indicies (and other indicies we can prove small
  609. // enough not to wrap).
  610. if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds())
  611. return false;
  612. // Note that isObjectSizeLessThanOrEq will return true only if the pointer is
  613. // also known to be dereferenceable.
  614. return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) &&
  615. IsAllNonNegative();
  616. }
  617. // If we're indexing into an object with a variable index for the memory
  618. // access, but the object has only one element, we can assume that the index
  619. // will always be zero. If we replace the GEP, return it.
  620. template <typename T>
  621. static Instruction *replaceGEPIdxWithZero(InstCombiner &IC, Value *Ptr,
  622. T &MemI) {
  623. if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
  624. unsigned Idx;
  625. if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
  626. Instruction *NewGEPI = GEPI->clone();
  627. NewGEPI->setOperand(Idx,
  628. ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
  629. NewGEPI->insertBefore(GEPI);
  630. MemI.setOperand(MemI.getPointerOperandIndex(), NewGEPI);
  631. return NewGEPI;
  632. }
  633. }
  634. return nullptr;
  635. }
  636. Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
  637. Value *Op = LI.getOperand(0);
  638. // Try to canonicalize the loaded type.
  639. if (Instruction *Res = combineLoadToOperationType(*this, LI))
  640. return Res;
  641. // Attempt to improve the alignment.
  642. unsigned KnownAlign = getOrEnforceKnownAlignment(
  643. Op, DL.getPrefTypeAlignment(LI.getType()), DL, &LI, AC, DT);
  644. unsigned LoadAlign = LI.getAlignment();
  645. unsigned EffectiveLoadAlign =
  646. LoadAlign != 0 ? LoadAlign : DL.getABITypeAlignment(LI.getType());
  647. if (KnownAlign > EffectiveLoadAlign)
  648. LI.setAlignment(KnownAlign);
  649. else if (LoadAlign == 0)
  650. LI.setAlignment(EffectiveLoadAlign);
  651. // Replace GEP indices if possible.
  652. if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) {
  653. Worklist.Add(NewGEPI);
  654. return &LI;
  655. }
  656. // None of the following transforms are legal for volatile/atomic loads.
  657. // FIXME: Some of it is okay for atomic loads; needs refactoring.
  658. if (!LI.isSimple()) return nullptr;
  659. if (Instruction *Res = unpackLoadToAggregate(*this, LI))
  660. return Res;
  661. // Do really simple store-to-load forwarding and load CSE, to catch cases
  662. // where there are several consecutive memory accesses to the same location,
  663. // separated by a few arithmetic operations.
  664. BasicBlock::iterator BBI = &LI;
  665. AAMDNodes AATags;
  666. if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,
  667. 6, AA, &AATags)) {
  668. if (LoadInst *NLI = dyn_cast<LoadInst>(AvailableVal)) {
  669. unsigned KnownIDs[] = {
  670. LLVMContext::MD_tbaa,
  671. LLVMContext::MD_alias_scope,
  672. LLVMContext::MD_noalias,
  673. LLVMContext::MD_range,
  674. LLVMContext::MD_invariant_load,
  675. LLVMContext::MD_nonnull,
  676. };
  677. combineMetadata(NLI, &LI, KnownIDs);
  678. };
  679. return ReplaceInstUsesWith(
  680. LI, Builder->CreateBitOrPointerCast(AvailableVal, LI.getType(),
  681. LI.getName() + ".cast"));
  682. }
  683. // load(gep null, ...) -> unreachable
  684. if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
  685. const Value *GEPI0 = GEPI->getOperand(0);
  686. // TODO: Consider a target hook for valid address spaces for this xform.
  687. if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){
  688. // Insert a new store to null instruction before the load to indicate
  689. // that this code is not reachable. We do this instead of inserting
  690. // an unreachable instruction directly because we cannot modify the
  691. // CFG.
  692. new StoreInst(UndefValue::get(LI.getType()),
  693. Constant::getNullValue(Op->getType()), &LI);
  694. return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
  695. }
  696. }
  697. // load null/undef -> unreachable
  698. // TODO: Consider a target hook for valid address spaces for this xform.
  699. if (isa<UndefValue>(Op) ||
  700. (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) {
  701. // Insert a new store to null instruction before the load to indicate that
  702. // this code is not reachable. We do this instead of inserting an
  703. // unreachable instruction directly because we cannot modify the CFG.
  704. new StoreInst(UndefValue::get(LI.getType()),
  705. Constant::getNullValue(Op->getType()), &LI);
  706. return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
  707. }
  708. if (Op->hasOneUse()) {
  709. // Change select and PHI nodes to select values instead of addresses: this
  710. // helps alias analysis out a lot, allows many others simplifications, and
  711. // exposes redundancy in the code.
  712. //
  713. // Note that we cannot do the transformation unless we know that the
  714. // introduced loads cannot trap! Something like this is valid as long as
  715. // the condition is always false: load (select bool %C, int* null, int* %G),
  716. // but it would not be valid if we transformed it to load from null
  717. // unconditionally.
  718. //
  719. if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
  720. // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2).
  721. unsigned Align = LI.getAlignment();
  722. if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align) &&
  723. isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align)) {
  724. LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1),
  725. SI->getOperand(1)->getName()+".val");
  726. LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2),
  727. SI->getOperand(2)->getName()+".val");
  728. V1->setAlignment(Align);
  729. V2->setAlignment(Align);
  730. return SelectInst::Create(SI->getCondition(), V1, V2);
  731. }
  732. // load (select (cond, null, P)) -> load P
  733. if (isa<ConstantPointerNull>(SI->getOperand(1)) &&
  734. LI.getPointerAddressSpace() == 0) {
  735. LI.setOperand(0, SI->getOperand(2));
  736. return &LI;
  737. }
  738. // load (select (cond, P, null)) -> load P
  739. if (isa<ConstantPointerNull>(SI->getOperand(2)) &&
  740. LI.getPointerAddressSpace() == 0) {
  741. LI.setOperand(0, SI->getOperand(1));
  742. return &LI;
  743. }
  744. }
  745. }
  746. return nullptr;
  747. }
  748. /// \brief Combine stores to match the type of value being stored.
  749. ///
  750. /// The core idea here is that the memory does not have any intrinsic type and
  751. /// where we can we should match the type of a store to the type of value being
  752. /// stored.
  753. ///
  754. /// However, this routine must never change the width of a store or the number of
  755. /// stores as that would introduce a semantic change. This combine is expected to
  756. /// be a semantic no-op which just allows stores to more closely model the types
  757. /// of their incoming values.
  758. ///
  759. /// Currently, we also refuse to change the precise type used for an atomic or
  760. /// volatile store. This is debatable, and might be reasonable to change later.
  761. /// However, it is risky in case some backend or other part of LLVM is relying
  762. /// on the exact type stored to select appropriate atomic operations.
  763. ///
  764. /// \returns true if the store was successfully combined away. This indicates
  765. /// the caller must erase the store instruction. We have to let the caller erase
  766. /// the store instruction sas otherwise there is no way to signal whether it was
  767. /// combined or not: IC.EraseInstFromFunction returns a null pointer.
  768. static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) {
  769. // FIXME: We could probably with some care handle both volatile and atomic
  770. // stores here but it isn't clear that this is important.
  771. if (!SI.isSimple())
  772. return false;
  773. Value *V = SI.getValueOperand();
  774. // Fold away bit casts of the stored value by storing the original type.
  775. if (auto *BC = dyn_cast<BitCastInst>(V)) {
  776. V = BC->getOperand(0);
  777. combineStoreToNewValue(IC, SI, V);
  778. return true;
  779. }
  780. // FIXME: We should also canonicalize loads of vectors when their elements are
  781. // cast to other types.
  782. return false;
  783. }
  784. static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {
  785. // FIXME: We could probably with some care handle both volatile and atomic
  786. // stores here but it isn't clear that this is important.
  787. if (!SI.isSimple())
  788. return false;
  789. Value *V = SI.getValueOperand();
  790. Type *T = V->getType();
  791. if (!T->isAggregateType())
  792. return false;
  793. if (auto *ST = dyn_cast<StructType>(T)) {
  794. // If the struct only have one element, we unpack.
  795. if (ST->getNumElements() == 1) {
  796. V = IC.Builder->CreateExtractValue(V, 0);
  797. combineStoreToNewValue(IC, SI, V);
  798. return true;
  799. }
  800. }
  801. if (auto *AT = dyn_cast<ArrayType>(T)) {
  802. // If the array only have one element, we unpack.
  803. if (AT->getNumElements() == 1) {
  804. V = IC.Builder->CreateExtractValue(V, 0);
  805. combineStoreToNewValue(IC, SI, V);
  806. return true;
  807. }
  808. }
  809. return false;
  810. }
  811. /// equivalentAddressValues - Test if A and B will obviously have the same
  812. /// value. This includes recognizing that %t0 and %t1 will have the same
  813. /// value in code like this:
  814. /// %t0 = getelementptr \@a, 0, 3
  815. /// store i32 0, i32* %t0
  816. /// %t1 = getelementptr \@a, 0, 3
  817. /// %t2 = load i32* %t1
  818. ///
  819. static bool equivalentAddressValues(Value *A, Value *B) {
  820. // Test if the values are trivially equivalent.
  821. if (A == B) return true;
  822. // Test if the values come form identical arithmetic instructions.
  823. // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
  824. // its only used to compare two uses within the same basic block, which
  825. // means that they'll always either have the same value or one of them
  826. // will have an undefined value.
  827. if (isa<BinaryOperator>(A) ||
  828. isa<CastInst>(A) ||
  829. isa<PHINode>(A) ||
  830. isa<GetElementPtrInst>(A))
  831. if (Instruction *BI = dyn_cast<Instruction>(B))
  832. if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
  833. return true;
  834. // Otherwise they may not be equivalent.
  835. return false;
  836. }
  837. Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
  838. Value *Val = SI.getOperand(0);
  839. Value *Ptr = SI.getOperand(1);
  840. // Try to canonicalize the stored type.
  841. if (combineStoreToValueType(*this, SI))
  842. return EraseInstFromFunction(SI);
  843. // Attempt to improve the alignment.
  844. unsigned KnownAlign = getOrEnforceKnownAlignment(
  845. Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, AC, DT);
  846. unsigned StoreAlign = SI.getAlignment();
  847. unsigned EffectiveStoreAlign =
  848. StoreAlign != 0 ? StoreAlign : DL.getABITypeAlignment(Val->getType());
  849. if (KnownAlign > EffectiveStoreAlign)
  850. SI.setAlignment(KnownAlign);
  851. else if (StoreAlign == 0)
  852. SI.setAlignment(EffectiveStoreAlign);
  853. // Try to canonicalize the stored type.
  854. if (unpackStoreToAggregate(*this, SI))
  855. return EraseInstFromFunction(SI);
  856. // Replace GEP indices if possible.
  857. if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) {
  858. Worklist.Add(NewGEPI);
  859. return &SI;
  860. }
  861. // Don't hack volatile/atomic stores.
  862. // FIXME: Some bits are legal for atomic stores; needs refactoring.
  863. if (!SI.isSimple()) return nullptr;
  864. // If the RHS is an alloca with a single use, zapify the store, making the
  865. // alloca dead.
  866. if (Ptr->hasOneUse()) {
  867. if (isa<AllocaInst>(Ptr))
  868. return EraseInstFromFunction(SI);
  869. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
  870. if (isa<AllocaInst>(GEP->getOperand(0))) {
  871. if (GEP->getOperand(0)->hasOneUse())
  872. return EraseInstFromFunction(SI);
  873. }
  874. }
  875. }
  876. // Do really simple DSE, to catch cases where there are several consecutive
  877. // stores to the same location, separated by a few arithmetic operations. This
  878. // situation often occurs with bitfield accesses.
  879. BasicBlock::iterator BBI = &SI;
  880. for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
  881. --ScanInsts) {
  882. --BBI;
  883. // Don't count debug info directives, lest they affect codegen,
  884. // and we skip pointer-to-pointer bitcasts, which are NOPs.
  885. if (isa<DbgInfoIntrinsic>(BBI) ||
  886. (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
  887. ScanInsts++;
  888. continue;
  889. }
  890. if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
  891. // Prev store isn't volatile, and stores to the same location?
  892. if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1),
  893. SI.getOperand(1))) {
  894. ++NumDeadStore;
  895. ++BBI;
  896. EraseInstFromFunction(*PrevSI);
  897. continue;
  898. }
  899. break;
  900. }
  901. // If this is a load, we have to stop. However, if the loaded value is from
  902. // the pointer we're loading and is producing the pointer we're storing,
  903. // then *this* store is dead (X = load P; store X -> P).
  904. if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
  905. if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
  906. LI->isSimple())
  907. return EraseInstFromFunction(SI);
  908. // Otherwise, this is a load from some other location. Stores before it
  909. // may not be dead.
  910. break;
  911. }
  912. // Don't skip over loads or things that can modify memory.
  913. if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
  914. break;
  915. }
  916. // store X, null -> turns into 'unreachable' in SimplifyCFG
  917. if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) {
  918. if (!isa<UndefValue>(Val)) {
  919. SI.setOperand(0, UndefValue::get(Val->getType()));
  920. if (Instruction *U = dyn_cast<Instruction>(Val))
  921. Worklist.Add(U); // Dropped a use.
  922. }
  923. return nullptr; // Do not modify these!
  924. }
  925. // store undef, Ptr -> noop
  926. if (isa<UndefValue>(Val))
  927. return EraseInstFromFunction(SI);
  928. // If this store is the last instruction in the basic block (possibly
  929. // excepting debug info instructions), and if the block ends with an
  930. // unconditional branch, try to move it to the successor block.
  931. BBI = &SI;
  932. do {
  933. ++BBI;
  934. } while (isa<DbgInfoIntrinsic>(BBI) ||
  935. (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy()));
  936. if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
  937. if (BI->isUnconditional())
  938. if (SimplifyStoreAtEndOfBlock(SI))
  939. return nullptr; // xform done!
  940. return nullptr;
  941. }
  942. /// SimplifyStoreAtEndOfBlock - Turn things like:
  943. /// if () { *P = v1; } else { *P = v2 }
  944. /// into a phi node with a store in the successor.
  945. ///
  946. /// Simplify things like:
  947. /// *P = v1; if () { *P = v2; }
  948. /// into a phi node with a store in the successor.
  949. ///
  950. bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
  951. BasicBlock *StoreBB = SI.getParent();
  952. // Check to see if the successor block has exactly two incoming edges. If
  953. // so, see if the other predecessor contains a store to the same location.
  954. // if so, insert a PHI node (if needed) and move the stores down.
  955. BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
  956. // Determine whether Dest has exactly two predecessors and, if so, compute
  957. // the other predecessor.
  958. pred_iterator PI = pred_begin(DestBB);
  959. BasicBlock *P = *PI;
  960. BasicBlock *OtherBB = nullptr;
  961. if (P != StoreBB)
  962. OtherBB = P;
  963. if (++PI == pred_end(DestBB))
  964. return false;
  965. P = *PI;
  966. if (P != StoreBB) {
  967. if (OtherBB)
  968. return false;
  969. OtherBB = P;
  970. }
  971. if (++PI != pred_end(DestBB))
  972. return false;
  973. // Bail out if all the relevant blocks aren't distinct (this can happen,
  974. // for example, if SI is in an infinite loop)
  975. if (StoreBB == DestBB || OtherBB == DestBB)
  976. return false;
  977. // Verify that the other block ends in a branch and is not otherwise empty.
  978. BasicBlock::iterator BBI = OtherBB->getTerminator();
  979. BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
  980. if (!OtherBr || BBI == OtherBB->begin())
  981. return false;
  982. // If the other block ends in an unconditional branch, check for the 'if then
  983. // else' case. there is an instruction before the branch.
  984. StoreInst *OtherStore = nullptr;
  985. if (OtherBr->isUnconditional()) {
  986. --BBI;
  987. // Skip over debugging info.
  988. while (isa<DbgInfoIntrinsic>(BBI) ||
  989. (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
  990. if (BBI==OtherBB->begin())
  991. return false;
  992. --BBI;
  993. }
  994. // If this isn't a store, isn't a store to the same location, or is not the
  995. // right kind of store, bail out.
  996. OtherStore = dyn_cast<StoreInst>(BBI);
  997. if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
  998. !SI.isSameOperationAs(OtherStore))
  999. return false;
  1000. } else {
  1001. // Otherwise, the other block ended with a conditional branch. If one of the
  1002. // destinations is StoreBB, then we have the if/then case.
  1003. if (OtherBr->getSuccessor(0) != StoreBB &&
  1004. OtherBr->getSuccessor(1) != StoreBB)
  1005. return false;
  1006. // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
  1007. // if/then triangle. See if there is a store to the same ptr as SI that
  1008. // lives in OtherBB.
  1009. for (;; --BBI) {
  1010. // Check to see if we find the matching store.
  1011. if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
  1012. if (OtherStore->getOperand(1) != SI.getOperand(1) ||
  1013. !SI.isSameOperationAs(OtherStore))
  1014. return false;
  1015. break;
  1016. }
  1017. // If we find something that may be using or overwriting the stored
  1018. // value, or if we run out of instructions, we can't do the xform.
  1019. if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() ||
  1020. BBI == OtherBB->begin())
  1021. return false;
  1022. }
  1023. // In order to eliminate the store in OtherBr, we have to
  1024. // make sure nothing reads or overwrites the stored value in
  1025. // StoreBB.
  1026. for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
  1027. // FIXME: This should really be AA driven.
  1028. if (I->mayReadFromMemory() || I->mayWriteToMemory())
  1029. return false;
  1030. }
  1031. }
  1032. // Insert a PHI node now if we need it.
  1033. Value *MergedVal = OtherStore->getOperand(0);
  1034. if (MergedVal != SI.getOperand(0)) {
  1035. PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge");
  1036. PN->addIncoming(SI.getOperand(0), SI.getParent());
  1037. PN->addIncoming(OtherStore->getOperand(0), OtherBB);
  1038. MergedVal = InsertNewInstBefore(PN, DestBB->front());
  1039. }
  1040. // Advance to a place where it is safe to insert the new store and
  1041. // insert it.
  1042. BBI = DestBB->getFirstInsertionPt();
  1043. StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1),
  1044. SI.isVolatile(),
  1045. SI.getAlignment(),
  1046. SI.getOrdering(),
  1047. SI.getSynchScope());
  1048. InsertNewInstBefore(NewSI, *BBI);
  1049. NewSI->setDebugLoc(OtherStore->getDebugLoc());
  1050. // If the two stores had AA tags, merge them.
  1051. AAMDNodes AATags;
  1052. SI.getAAMetadata(AATags);
  1053. if (AATags) {
  1054. OtherStore->getAAMetadata(AATags, /* Merge = */ true);
  1055. NewSI->setAAMetadata(AATags);
  1056. }
  1057. // Nuke the old stores.
  1058. EraseInstFromFunction(SI);
  1059. EraseInstFromFunction(*OtherStore);
  1060. return true;
  1061. }