InstCombineLoadStoreAlloca.cpp 45 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208
  1. //===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file implements the visit functions for load, store and alloca.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "InstCombineInternal.h"
  14. #include "llvm/ADT/Statistic.h"
  15. #include "llvm/Analysis/Loads.h"
  16. #include "llvm/IR/DataLayout.h"
  17. #include "llvm/IR/LLVMContext.h"
  18. #include "llvm/IR/IntrinsicInst.h"
  19. #include "llvm/IR/MDBuilder.h"
  20. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  21. #include "llvm/Transforms/Utils/Local.h"
  22. #include "dxc/DXIL/DxilOperations.h" // HLSL Change - avoid unpack for dxil types.
  23. using namespace llvm;
  24. #define DEBUG_TYPE "instcombine"
  25. STATISTIC(NumDeadStore, "Number of dead stores eliminated");
  26. STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
  27. /// pointsToConstantGlobal - Return true if V (possibly indirectly) points to
  28. /// some part of a constant global variable. This intentionally only accepts
  29. /// constant expressions because we can't rewrite arbitrary instructions.
  30. static bool pointsToConstantGlobal(Value *V) {
  31. if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
  32. return GV->isConstant();
  33. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
  34. if (CE->getOpcode() == Instruction::BitCast ||
  35. CE->getOpcode() == Instruction::AddrSpaceCast ||
  36. CE->getOpcode() == Instruction::GetElementPtr)
  37. return pointsToConstantGlobal(CE->getOperand(0));
  38. }
  39. return false;
  40. }
  41. /// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
  42. /// pointer to an alloca. Ignore any reads of the pointer, return false if we
  43. /// see any stores or other unknown uses. If we see pointer arithmetic, keep
  44. /// track of whether it moves the pointer (with IsOffset) but otherwise traverse
  45. /// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
  46. /// the alloca, and if the source pointer is a pointer to a constant global, we
  47. /// can optimize this.
  48. static bool
  49. isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
  50. SmallVectorImpl<Instruction *> &ToDelete) {
  51. // We track lifetime intrinsics as we encounter them. If we decide to go
  52. // ahead and replace the value with the global, this lets the caller quickly
  53. // eliminate the markers.
  54. SmallVector<std::pair<Value *, bool>, 35> ValuesToInspect;
  55. ValuesToInspect.push_back(std::make_pair(V, false));
  56. while (!ValuesToInspect.empty()) {
  57. auto ValuePair = ValuesToInspect.pop_back_val();
  58. const bool IsOffset = ValuePair.second;
  59. for (auto &U : ValuePair.first->uses()) {
  60. Instruction *I = cast<Instruction>(U.getUser());
  61. if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
  62. // Ignore non-volatile loads, they are always ok.
  63. if (!LI->isSimple()) return false;
  64. continue;
  65. }
  66. if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) {
  67. // If uses of the bitcast are ok, we are ok.
  68. ValuesToInspect.push_back(std::make_pair(I, IsOffset));
  69. continue;
  70. }
  71. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
  72. // If the GEP has all zero indices, it doesn't offset the pointer. If it
  73. // doesn't, it does.
  74. ValuesToInspect.push_back(
  75. std::make_pair(I, IsOffset || !GEP->hasAllZeroIndices()));
  76. continue;
  77. }
  78. if (auto CS = CallSite(I)) {
  79. // If this is the function being called then we treat it like a load and
  80. // ignore it.
  81. if (CS.isCallee(&U))
  82. continue;
  83. // Inalloca arguments are clobbered by the call.
  84. unsigned ArgNo = CS.getArgumentNo(&U);
  85. if (CS.isInAllocaArgument(ArgNo))
  86. return false;
  87. // If this is a readonly/readnone call site, then we know it is just a
  88. // load (but one that potentially returns the value itself), so we can
  89. // ignore it if we know that the value isn't captured.
  90. if (CS.onlyReadsMemory() &&
  91. (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo)))
  92. continue;
  93. // If this is being passed as a byval argument, the caller is making a
  94. // copy, so it is only a read of the alloca.
  95. if (CS.isByValArgument(ArgNo))
  96. continue;
  97. }
  98. // Lifetime intrinsics can be handled by the caller.
  99. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
  100. if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
  101. II->getIntrinsicID() == Intrinsic::lifetime_end) {
  102. assert(II->use_empty() && "Lifetime markers have no result to use!");
  103. ToDelete.push_back(II);
  104. continue;
  105. }
  106. }
  107. // If this is isn't our memcpy/memmove, reject it as something we can't
  108. // handle.
  109. MemTransferInst *MI = dyn_cast<MemTransferInst>(I);
  110. if (!MI)
  111. return false;
  112. // If the transfer is using the alloca as a source of the transfer, then
  113. // ignore it since it is a load (unless the transfer is volatile).
  114. if (U.getOperandNo() == 1) {
  115. if (MI->isVolatile()) return false;
  116. continue;
  117. }
  118. // If we already have seen a copy, reject the second one.
  119. if (TheCopy) return false;
  120. // If the pointer has been offset from the start of the alloca, we can't
  121. // safely handle this.
  122. if (IsOffset) return false;
  123. // If the memintrinsic isn't using the alloca as the dest, reject it.
  124. if (U.getOperandNo() != 0) return false;
  125. // If the source of the memcpy/move is not a constant global, reject it.
  126. if (!pointsToConstantGlobal(MI->getSource()))
  127. return false;
  128. // Otherwise, the transform is safe. Remember the copy instruction.
  129. TheCopy = MI;
  130. }
  131. }
  132. return true;
  133. }
  134. /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
  135. /// modified by a copy from a constant global. If we can prove this, we can
  136. /// replace any uses of the alloca with uses of the global directly.
  137. static MemTransferInst *
  138. isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
  139. SmallVectorImpl<Instruction *> &ToDelete) {
  140. MemTransferInst *TheCopy = nullptr;
  141. if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete))
  142. return TheCopy;
  143. return nullptr;
  144. }
  145. static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) {
  146. // Check for array size of 1 (scalar allocation).
  147. if (!AI.isArrayAllocation()) {
  148. // i32 1 is the canonical array size for scalar allocations.
  149. if (AI.getArraySize()->getType()->isIntegerTy(32))
  150. return nullptr;
  151. // Canonicalize it.
  152. Value *V = IC.Builder->getInt32(1);
  153. AI.setOperand(0, V);
  154. return &AI;
  155. }
  156. // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
  157. if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
  158. Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
  159. AllocaInst *New = IC.Builder->CreateAlloca(NewTy, nullptr, AI.getName());
  160. New->setAlignment(AI.getAlignment());
  161. // Scan to the end of the allocation instructions, to skip over a block of
  162. // allocas if possible...also skip interleaved debug info
  163. //
  164. BasicBlock::iterator It = New;
  165. while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It))
  166. ++It;
  167. // Now that I is pointing to the first non-allocation-inst in the block,
  168. // insert our getelementptr instruction...
  169. //
  170. Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType());
  171. Value *NullIdx = Constant::getNullValue(IdxTy);
  172. Value *Idx[2] = {NullIdx, NullIdx};
  173. Instruction *GEP =
  174. GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub");
  175. IC.InsertNewInstBefore(GEP, *It);
  176. // Now make everything use the getelementptr instead of the original
  177. // allocation.
  178. return IC.ReplaceInstUsesWith(AI, GEP);
  179. }
  180. if (isa<UndefValue>(AI.getArraySize()))
  181. return IC.ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
  182. // Ensure that the alloca array size argument has type intptr_t, so that
  183. // any casting is exposed early.
  184. Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType());
  185. if (AI.getArraySize()->getType() != IntPtrTy) {
  186. Value *V = IC.Builder->CreateIntCast(AI.getArraySize(), IntPtrTy, false);
  187. AI.setOperand(0, V);
  188. return &AI;
  189. }
  190. return nullptr;
  191. }
  192. Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
  193. if (auto *I = simplifyAllocaArraySize(*this, AI))
  194. return I;
  195. if (AI.getAllocatedType()->isSized()) {
  196. // If the alignment is 0 (unspecified), assign it the preferred alignment.
  197. if (AI.getAlignment() == 0)
  198. AI.setAlignment(DL.getPrefTypeAlignment(AI.getAllocatedType()));
  199. // Move all alloca's of zero byte objects to the entry block and merge them
  200. // together. Note that we only do this for alloca's, because malloc should
  201. // allocate and return a unique pointer, even for a zero byte allocation.
  202. if (DL.getTypeAllocSize(AI.getAllocatedType()) == 0) {
  203. // For a zero sized alloca there is no point in doing an array allocation.
  204. // This is helpful if the array size is a complicated expression not used
  205. // elsewhere.
  206. if (AI.isArrayAllocation()) {
  207. AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1));
  208. return &AI;
  209. }
  210. // Get the first instruction in the entry block.
  211. BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock();
  212. Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg();
  213. if (FirstInst != &AI) {
  214. // If the entry block doesn't start with a zero-size alloca then move
  215. // this one to the start of the entry block. There is no problem with
  216. // dominance as the array size was forced to a constant earlier already.
  217. AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
  218. if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
  219. DL.getTypeAllocSize(EntryAI->getAllocatedType()) != 0) {
  220. AI.moveBefore(FirstInst);
  221. return &AI;
  222. }
  223. // If the alignment of the entry block alloca is 0 (unspecified),
  224. // assign it the preferred alignment.
  225. if (EntryAI->getAlignment() == 0)
  226. EntryAI->setAlignment(
  227. DL.getPrefTypeAlignment(EntryAI->getAllocatedType()));
  228. // Replace this zero-sized alloca with the one at the start of the entry
  229. // block after ensuring that the address will be aligned enough for both
  230. // types.
  231. unsigned MaxAlign = std::max(EntryAI->getAlignment(),
  232. AI.getAlignment());
  233. EntryAI->setAlignment(MaxAlign);
  234. if (AI.getType() != EntryAI->getType())
  235. return new BitCastInst(EntryAI, AI.getType());
  236. return ReplaceInstUsesWith(AI, EntryAI);
  237. }
  238. }
  239. }
  240. if (AI.getAlignment()) {
  241. // Check to see if this allocation is only modified by a memcpy/memmove from
  242. // a constant global whose alignment is equal to or exceeds that of the
  243. // allocation. If this is the case, we can change all users to use
  244. // the constant global instead. This is commonly produced by the CFE by
  245. // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
  246. // is only subsequently read.
  247. SmallVector<Instruction *, 4> ToDelete;
  248. if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
  249. unsigned SourceAlign = getOrEnforceKnownAlignment(
  250. Copy->getSource(), AI.getAlignment(), DL, &AI, AC, DT);
  251. if (AI.getAlignment() <= SourceAlign) {
  252. DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
  253. DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
  254. for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
  255. EraseInstFromFunction(*ToDelete[i]);
  256. Constant *TheSrc = cast<Constant>(Copy->getSource());
  257. Constant *Cast
  258. = ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, AI.getType());
  259. Instruction *NewI = ReplaceInstUsesWith(AI, Cast);
  260. EraseInstFromFunction(*Copy);
  261. ++NumGlobalCopies;
  262. return NewI;
  263. }
  264. }
  265. }
  266. // At last, use the generic allocation site handler to aggressively remove
  267. // unused allocas.
  268. return visitAllocSite(AI);
  269. }
  270. /// \brief Helper to combine a load to a new type.
  271. ///
  272. /// This just does the work of combining a load to a new type. It handles
  273. /// metadata, etc., and returns the new instruction. The \c NewTy should be the
  274. /// loaded *value* type. This will convert it to a pointer, cast the operand to
  275. /// that pointer type, load it, etc.
  276. ///
  277. /// Note that this will create all of the instructions with whatever insert
  278. /// point the \c InstCombiner currently is using.
  279. static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewTy,
  280. const Twine &Suffix = "") {
  281. Value *Ptr = LI.getPointerOperand();
  282. unsigned AS = LI.getPointerAddressSpace();
  283. SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
  284. LI.getAllMetadata(MD);
  285. LoadInst *NewLoad = IC.Builder->CreateAlignedLoad(
  286. IC.Builder->CreateBitCast(Ptr, NewTy->getPointerTo(AS)),
  287. LI.getAlignment(), LI.getName() + Suffix);
  288. MDBuilder MDB(NewLoad->getContext());
  289. for (const auto &MDPair : MD) {
  290. unsigned ID = MDPair.first;
  291. MDNode *N = MDPair.second;
  292. // Note, essentially every kind of metadata should be preserved here! This
  293. // routine is supposed to clone a load instruction changing *only its type*.
  294. // The only metadata it makes sense to drop is metadata which is invalidated
  295. // when the pointer type changes. This should essentially never be the case
  296. // in LLVM, but we explicitly switch over only known metadata to be
  297. // conservatively correct. If you are adding metadata to LLVM which pertains
  298. // to loads, you almost certainly want to add it here.
  299. switch (ID) {
  300. case LLVMContext::MD_dbg:
  301. case LLVMContext::MD_tbaa:
  302. case LLVMContext::MD_prof:
  303. case LLVMContext::MD_fpmath:
  304. case LLVMContext::MD_tbaa_struct:
  305. case LLVMContext::MD_invariant_load:
  306. case LLVMContext::MD_alias_scope:
  307. case LLVMContext::MD_noalias:
  308. case LLVMContext::MD_nontemporal:
  309. case LLVMContext::MD_mem_parallel_loop_access:
  310. // All of these directly apply.
  311. NewLoad->setMetadata(ID, N);
  312. break;
  313. case LLVMContext::MD_nonnull:
  314. // This only directly applies if the new type is also a pointer.
  315. if (NewTy->isPointerTy()) {
  316. NewLoad->setMetadata(ID, N);
  317. break;
  318. }
  319. // If it's integral now, translate it to !range metadata.
  320. if (NewTy->isIntegerTy()) {
  321. auto *ITy = cast<IntegerType>(NewTy);
  322. auto *NullInt = ConstantExpr::getPtrToInt(
  323. ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy);
  324. auto *NonNullInt =
  325. ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1));
  326. NewLoad->setMetadata(LLVMContext::MD_range,
  327. MDB.createRange(NonNullInt, NullInt));
  328. }
  329. break;
  330. case LLVMContext::MD_range:
  331. // FIXME: It would be nice to propagate this in some way, but the type
  332. // conversions make it hard. If the new type is a pointer, we could
  333. // translate it to !nonnull metadata.
  334. break;
  335. }
  336. }
  337. return NewLoad;
  338. }
  339. /// \brief Combine a store to a new type.
  340. ///
  341. /// Returns the newly created store instruction.
  342. static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value *V) {
  343. Value *Ptr = SI.getPointerOperand();
  344. unsigned AS = SI.getPointerAddressSpace();
  345. SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
  346. SI.getAllMetadata(MD);
  347. StoreInst *NewStore = IC.Builder->CreateAlignedStore(
  348. V, IC.Builder->CreateBitCast(Ptr, V->getType()->getPointerTo(AS)),
  349. SI.getAlignment());
  350. for (const auto &MDPair : MD) {
  351. unsigned ID = MDPair.first;
  352. MDNode *N = MDPair.second;
  353. // Note, essentially every kind of metadata should be preserved here! This
  354. // routine is supposed to clone a store instruction changing *only its
  355. // type*. The only metadata it makes sense to drop is metadata which is
  356. // invalidated when the pointer type changes. This should essentially
  357. // never be the case in LLVM, but we explicitly switch over only known
  358. // metadata to be conservatively correct. If you are adding metadata to
  359. // LLVM which pertains to stores, you almost certainly want to add it
  360. // here.
  361. switch (ID) {
  362. case LLVMContext::MD_dbg:
  363. case LLVMContext::MD_tbaa:
  364. case LLVMContext::MD_prof:
  365. case LLVMContext::MD_fpmath:
  366. case LLVMContext::MD_tbaa_struct:
  367. case LLVMContext::MD_alias_scope:
  368. case LLVMContext::MD_noalias:
  369. case LLVMContext::MD_nontemporal:
  370. case LLVMContext::MD_mem_parallel_loop_access:
  371. // All of these directly apply.
  372. NewStore->setMetadata(ID, N);
  373. break;
  374. case LLVMContext::MD_invariant_load:
  375. case LLVMContext::MD_nonnull:
  376. case LLVMContext::MD_range:
  377. // These don't apply for stores.
  378. break;
  379. }
  380. }
  381. return NewStore;
  382. }
  383. /// \brief Combine loads to match the type of value their uses after looking
  384. /// through intervening bitcasts.
  385. ///
  386. /// The core idea here is that if the result of a load is used in an operation,
  387. /// we should load the type most conducive to that operation. For example, when
  388. /// loading an integer and converting that immediately to a pointer, we should
  389. /// instead directly load a pointer.
  390. ///
  391. /// However, this routine must never change the width of a load or the number of
  392. /// loads as that would introduce a semantic change. This combine is expected to
  393. /// be a semantic no-op which just allows loads to more closely model the types
  394. /// of their consuming operations.
  395. ///
  396. /// Currently, we also refuse to change the precise type used for an atomic load
  397. /// or a volatile load. This is debatable, and might be reasonable to change
  398. /// later. However, it is risky in case some backend or other part of LLVM is
  399. /// relying on the exact type loaded to select appropriate atomic operations.
  400. static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
  401. #if 0 // HLSL Change - bitcast to i32* won't help HLSL.
  402. // FIXME: We could probably with some care handle both volatile and atomic
  403. // loads here but it isn't clear that this is important.
  404. if (!LI.isSimple())
  405. return nullptr;
  406. if (LI.use_empty())
  407. return nullptr;
  408. Type *Ty = LI.getType();
  409. const DataLayout &DL = IC.getDataLayout();
  410. // Try to canonicalize loads which are only ever stored to operate over
  411. // integers instead of any other type. We only do this when the loaded type
  412. // is sized and has a size exactly the same as its store size and the store
  413. // size is a legal integer type.
  414. if (!Ty->isIntegerTy() && Ty->isSized() &&
  415. DL.isLegalInteger(DL.getTypeStoreSizeInBits(Ty)) &&
  416. DL.getTypeStoreSizeInBits(Ty) == DL.getTypeSizeInBits(Ty)) {
  417. if (std::all_of(LI.user_begin(), LI.user_end(), [&LI](User *U) {
  418. auto *SI = dyn_cast<StoreInst>(U);
  419. return SI && SI->getPointerOperand() != &LI;
  420. })) {
  421. LoadInst *NewLoad = combineLoadToNewType(
  422. IC, LI,
  423. Type::getIntNTy(LI.getContext(), DL.getTypeStoreSizeInBits(Ty)));
  424. // Replace all the stores with stores of the newly loaded value.
  425. for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) {
  426. auto *SI = cast<StoreInst>(*UI++);
  427. IC.Builder->SetInsertPoint(SI);
  428. combineStoreToNewValue(IC, *SI, NewLoad);
  429. IC.EraseInstFromFunction(*SI);
  430. }
  431. assert(LI.use_empty() && "Failed to remove all users of the load!");
  432. // Return the old load so the combiner can delete it safely.
  433. return &LI;
  434. }
  435. }
  436. // Fold away bit casts of the loaded value by loading the desired type.
  437. // We can do this for BitCastInsts as well as casts from and to pointer types,
  438. // as long as those are noops (i.e., the source or dest type have the same
  439. // bitwidth as the target's pointers).
  440. if (LI.hasOneUse())
  441. if (auto* CI = dyn_cast<CastInst>(LI.user_back())) {
  442. if (CI->isNoopCast(DL)) {
  443. LoadInst *NewLoad = combineLoadToNewType(IC, LI, CI->getDestTy());
  444. CI->replaceAllUsesWith(NewLoad);
  445. IC.EraseInstFromFunction(*CI);
  446. return &LI;
  447. }
  448. }
  449. // FIXME: We should also canonicalize loads of vectors when their elements are
  450. // cast to other types.
  451. return nullptr;
  452. #else
  453. return nullptr;
  454. #endif // HLSL Change - bitcast to i32* won't help HLSL.
  455. }
  456. static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
  457. // FIXME: We could probably with some care handle both volatile and atomic
  458. // stores here but it isn't clear that this is important.
  459. if (!LI.isSimple())
  460. return nullptr;
  461. Type *T = LI.getType();
  462. if (!T->isAggregateType())
  463. return nullptr;
  464. assert(LI.getAlignment() && "Alignement must be set at this point");
  465. if (auto *ST = dyn_cast<StructType>(T)) {
  466. // If the struct only have one element, we unpack.
  467. if (ST->getNumElements() == 1
  468. && false // HLSL Change - avoid unpack dxil types.
  469. ) {
  470. LoadInst *NewLoad = combineLoadToNewType(IC, LI, ST->getTypeAtIndex(0U),
  471. ".unpack");
  472. return IC.ReplaceInstUsesWith(LI, IC.Builder->CreateInsertValue(
  473. UndefValue::get(T), NewLoad, 0, LI.getName()));
  474. }
  475. }
  476. if (auto *AT = dyn_cast<ArrayType>(T)) {
  477. // If the array only have one element, we unpack.
  478. if (AT->getNumElements() == 1) {
  479. LoadInst *NewLoad = combineLoadToNewType(IC, LI, AT->getElementType(),
  480. ".unpack");
  481. return IC.ReplaceInstUsesWith(LI, IC.Builder->CreateInsertValue(
  482. UndefValue::get(T), NewLoad, 0, LI.getName()));
  483. }
  484. }
  485. return nullptr;
  486. }
  487. // If we can determine that all possible objects pointed to by the provided
  488. // pointer value are, not only dereferenceable, but also definitively less than
  489. // or equal to the provided maximum size, then return true. Otherwise, return
  490. // false (constant global values and allocas fall into this category).
  491. //
  492. // FIXME: This should probably live in ValueTracking (or similar).
  493. static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
  494. const DataLayout &DL) {
  495. SmallPtrSet<Value *, 4> Visited;
  496. SmallVector<Value *, 4> Worklist(1, V);
  497. do {
  498. Value *P = Worklist.pop_back_val();
  499. P = P->stripPointerCasts();
  500. if (!Visited.insert(P).second)
  501. continue;
  502. if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
  503. Worklist.push_back(SI->getTrueValue());
  504. Worklist.push_back(SI->getFalseValue());
  505. continue;
  506. }
  507. if (PHINode *PN = dyn_cast<PHINode>(P)) {
  508. for (Value *IncValue : PN->incoming_values())
  509. Worklist.push_back(IncValue);
  510. continue;
  511. }
  512. if (GlobalAlias *GA = dyn_cast<GlobalAlias>(P)) {
  513. if (GA->mayBeOverridden())
  514. return false;
  515. Worklist.push_back(GA->getAliasee());
  516. continue;
  517. }
  518. // If we know how big this object is, and it is less than MaxSize, continue
  519. // searching. Otherwise, return false.
  520. if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) {
  521. if (!AI->getAllocatedType()->isSized())
  522. return false;
  523. ConstantInt *CS = dyn_cast<ConstantInt>(AI->getArraySize());
  524. if (!CS)
  525. return false;
  526. uint64_t TypeSize = DL.getTypeAllocSize(AI->getAllocatedType());
  527. // Make sure that, even if the multiplication below would wrap as an
  528. // uint64_t, we still do the right thing.
  529. if ((CS->getValue().zextOrSelf(128)*APInt(128, TypeSize)).ugt(MaxSize))
  530. return false;
  531. continue;
  532. }
  533. if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
  534. if (!GV->hasDefinitiveInitializer() || !GV->isConstant())
  535. return false;
  536. uint64_t InitSize = DL.getTypeAllocSize(GV->getType()->getElementType());
  537. if (InitSize > MaxSize)
  538. return false;
  539. continue;
  540. }
  541. return false;
  542. } while (!Worklist.empty());
  543. return true;
  544. }
  545. // If we're indexing into an object of a known size, and the outer index is
  546. // not a constant, but having any value but zero would lead to undefined
  547. // behavior, replace it with zero.
  548. //
  549. // For example, if we have:
  550. // @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
  551. // ...
  552. // %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x
  553. // ... = load i32* %arrayidx, align 4
  554. // Then we know that we can replace %x in the GEP with i64 0.
  555. //
  556. // FIXME: We could fold any GEP index to zero that would cause UB if it were
  557. // not zero. Currently, we only handle the first such index. Also, we could
  558. // also search through non-zero constant indices if we kept track of the
  559. // offsets those indices implied.
  560. static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI,
  561. Instruction *MemI, unsigned &Idx) {
  562. if (GEPI->getNumOperands() < 2)
  563. return false;
  564. // Find the first non-zero index of a GEP. If all indices are zero, return
  565. // one past the last index.
  566. auto FirstNZIdx = [](const GetElementPtrInst *GEPI) {
  567. unsigned I = 1;
  568. for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) {
  569. Value *V = GEPI->getOperand(I);
  570. if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
  571. if (CI->isZero())
  572. continue;
  573. break;
  574. }
  575. return I;
  576. };
  577. // Skip through initial 'zero' indices, and find the corresponding pointer
  578. // type. See if the next index is not a constant.
  579. Idx = FirstNZIdx(GEPI);
  580. if (Idx == GEPI->getNumOperands())
  581. return false;
  582. if (isa<Constant>(GEPI->getOperand(Idx)))
  583. return false;
  584. SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);
  585. Type *AllocTy = GetElementPtrInst::getIndexedType(
  586. cast<PointerType>(GEPI->getOperand(0)->getType()->getScalarType())
  587. ->getElementType(),
  588. Ops);
  589. if (!AllocTy || !AllocTy->isSized())
  590. return false;
  591. const DataLayout &DL = IC.getDataLayout();
  592. uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy);
  593. // If there are more indices after the one we might replace with a zero, make
  594. // sure they're all non-negative. If any of them are negative, the overall
  595. // address being computed might be before the base address determined by the
  596. // first non-zero index.
  597. auto IsAllNonNegative = [&]() {
  598. for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) {
  599. bool KnownNonNegative, KnownNegative;
  600. IC.ComputeSignBit(GEPI->getOperand(i), KnownNonNegative,
  601. KnownNegative, 0, MemI);
  602. if (KnownNonNegative)
  603. continue;
  604. return false;
  605. }
  606. return true;
  607. };
  608. // FIXME: If the GEP is not inbounds, and there are extra indices after the
  609. // one we'll replace, those could cause the address computation to wrap
  610. // (rendering the IsAllNonNegative() check below insufficient). We can do
  611. // better, ignoring zero indicies (and other indicies we can prove small
  612. // enough not to wrap).
  613. if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds())
  614. return false;
  615. // Note that isObjectSizeLessThanOrEq will return true only if the pointer is
  616. // also known to be dereferenceable.
  617. return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) &&
  618. IsAllNonNegative();
  619. }
  620. // If we're indexing into an object with a variable index for the memory
  621. // access, but the object has only one element, we can assume that the index
  622. // will always be zero. If we replace the GEP, return it.
  623. template <typename T>
  624. static Instruction *replaceGEPIdxWithZero(InstCombiner &IC, Value *Ptr,
  625. T &MemI) {
  626. if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
  627. unsigned Idx;
  628. if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
  629. Instruction *NewGEPI = GEPI->clone();
  630. NewGEPI->setOperand(Idx,
  631. ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
  632. NewGEPI->insertBefore(GEPI);
  633. MemI.setOperand(MemI.getPointerOperandIndex(), NewGEPI);
  634. return NewGEPI;
  635. }
  636. }
  637. return nullptr;
  638. }
  639. Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
  640. Value *Op = LI.getOperand(0);
  641. // Try to canonicalize the loaded type.
  642. if (Instruction *Res = combineLoadToOperationType(*this, LI))
  643. return Res;
  644. // Attempt to improve the alignment.
  645. unsigned KnownAlign = getOrEnforceKnownAlignment(
  646. Op, DL.getPrefTypeAlignment(LI.getType()), DL, &LI, AC, DT);
  647. unsigned LoadAlign = LI.getAlignment();
  648. unsigned EffectiveLoadAlign =
  649. LoadAlign != 0 ? LoadAlign : DL.getABITypeAlignment(LI.getType());
  650. if (KnownAlign > EffectiveLoadAlign)
  651. LI.setAlignment(KnownAlign);
  652. else if (LoadAlign == 0)
  653. LI.setAlignment(EffectiveLoadAlign);
  654. // Replace GEP indices if possible.
  655. if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) {
  656. Worklist.Add(NewGEPI);
  657. return &LI;
  658. }
  659. // None of the following transforms are legal for volatile/atomic loads.
  660. // FIXME: Some of it is okay for atomic loads; needs refactoring.
  661. if (!LI.isSimple()) return nullptr;
  662. if (Instruction *Res = unpackLoadToAggregate(*this, LI))
  663. return Res;
  664. // Do really simple store-to-load forwarding and load CSE, to catch cases
  665. // where there are several consecutive memory accesses to the same location,
  666. // separated by a few arithmetic operations.
  667. BasicBlock::iterator BBI = &LI;
  668. AAMDNodes AATags;
  669. if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,
  670. 6, AA, &AATags)) {
  671. if (LoadInst *NLI = dyn_cast<LoadInst>(AvailableVal)) {
  672. unsigned KnownIDs[] = {
  673. LLVMContext::MD_tbaa,
  674. LLVMContext::MD_alias_scope,
  675. LLVMContext::MD_noalias,
  676. LLVMContext::MD_range,
  677. LLVMContext::MD_invariant_load,
  678. LLVMContext::MD_nonnull,
  679. };
  680. combineMetadata(NLI, &LI, KnownIDs);
  681. };
  682. return ReplaceInstUsesWith(
  683. LI, Builder->CreateBitOrPointerCast(AvailableVal, LI.getType(),
  684. LI.getName() + ".cast"));
  685. }
  686. // load(gep null, ...) -> unreachable
  687. if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
  688. const Value *GEPI0 = GEPI->getOperand(0);
  689. // TODO: Consider a target hook for valid address spaces for this xform.
  690. if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){
  691. // Insert a new store to null instruction before the load to indicate
  692. // that this code is not reachable. We do this instead of inserting
  693. // an unreachable instruction directly because we cannot modify the
  694. // CFG.
  695. new StoreInst(UndefValue::get(LI.getType()),
  696. Constant::getNullValue(Op->getType()), &LI);
  697. return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
  698. }
  699. }
  700. // load null/undef -> unreachable
  701. // TODO: Consider a target hook for valid address spaces for this xform.
  702. if (isa<UndefValue>(Op) ||
  703. (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) {
  704. // Insert a new store to null instruction before the load to indicate that
  705. // this code is not reachable. We do this instead of inserting an
  706. // unreachable instruction directly because we cannot modify the CFG.
  707. new StoreInst(UndefValue::get(LI.getType()),
  708. Constant::getNullValue(Op->getType()), &LI);
  709. return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
  710. }
  711. if (Op->hasOneUse()) {
  712. // Change select and PHI nodes to select values instead of addresses: this
  713. // helps alias analysis out a lot, allows many others simplifications, and
  714. // exposes redundancy in the code.
  715. //
  716. // Note that we cannot do the transformation unless we know that the
  717. // introduced loads cannot trap! Something like this is valid as long as
  718. // the condition is always false: load (select bool %C, int* null, int* %G),
  719. // but it would not be valid if we transformed it to load from null
  720. // unconditionally.
  721. //
  722. if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
  723. // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2).
  724. unsigned Align = LI.getAlignment();
  725. if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align) &&
  726. isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align)) {
  727. LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1),
  728. SI->getOperand(1)->getName()+".val");
  729. LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2),
  730. SI->getOperand(2)->getName()+".val");
  731. V1->setAlignment(Align);
  732. V2->setAlignment(Align);
  733. return SelectInst::Create(SI->getCondition(), V1, V2);
  734. }
  735. // load (select (cond, null, P)) -> load P
  736. if (isa<ConstantPointerNull>(SI->getOperand(1)) &&
  737. LI.getPointerAddressSpace() == 0) {
  738. LI.setOperand(0, SI->getOperand(2));
  739. return &LI;
  740. }
  741. // load (select (cond, P, null)) -> load P
  742. if (isa<ConstantPointerNull>(SI->getOperand(2)) &&
  743. LI.getPointerAddressSpace() == 0) {
  744. LI.setOperand(0, SI->getOperand(1));
  745. return &LI;
  746. }
  747. }
  748. }
  749. return nullptr;
  750. }
  751. /// \brief Combine stores to match the type of value being stored.
  752. ///
  753. /// The core idea here is that the memory does not have any intrinsic type and
  754. /// where we can we should match the type of a store to the type of value being
  755. /// stored.
  756. ///
  757. /// However, this routine must never change the width of a store or the number of
  758. /// stores as that would introduce a semantic change. This combine is expected to
  759. /// be a semantic no-op which just allows stores to more closely model the types
  760. /// of their incoming values.
  761. ///
  762. /// Currently, we also refuse to change the precise type used for an atomic or
  763. /// volatile store. This is debatable, and might be reasonable to change later.
  764. /// However, it is risky in case some backend or other part of LLVM is relying
  765. /// on the exact type stored to select appropriate atomic operations.
  766. ///
  767. /// \returns true if the store was successfully combined away. This indicates
  768. /// the caller must erase the store instruction. We have to let the caller erase
  769. /// the store instruction sas otherwise there is no way to signal whether it was
  770. /// combined or not: IC.EraseInstFromFunction returns a null pointer.
  771. static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) {
  772. // FIXME: We could probably with some care handle both volatile and atomic
  773. // stores here but it isn't clear that this is important.
  774. if (!SI.isSimple())
  775. return false;
  776. Value *V = SI.getValueOperand();
  777. // Fold away bit casts of the stored value by storing the original type.
  778. if (auto *BC = dyn_cast<BitCastInst>(V)) {
  779. V = BC->getOperand(0);
  780. combineStoreToNewValue(IC, SI, V);
  781. return true;
  782. }
  783. // FIXME: We should also canonicalize loads of vectors when their elements are
  784. // cast to other types.
  785. return false;
  786. }
  787. static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {
  788. // FIXME: We could probably with some care handle both volatile and atomic
  789. // stores here but it isn't clear that this is important.
  790. if (!SI.isSimple())
  791. return false;
  792. Value *V = SI.getValueOperand();
  793. Type *T = V->getType();
  794. if (!T->isAggregateType())
  795. return false;
  796. if (auto *ST = dyn_cast<StructType>(T)) {
  797. // If the struct only have one element, we unpack.
  798. if (ST->getNumElements() == 1
  799. && false // HLSL Change - avoid unpack dxil types.
  800. ) {
  801. V = IC.Builder->CreateExtractValue(V, 0);
  802. combineStoreToNewValue(IC, SI, V);
  803. return true;
  804. }
  805. }
  806. if (auto *AT = dyn_cast<ArrayType>(T)) {
  807. // If the array only have one element, we unpack.
  808. if (AT->getNumElements() == 1) {
  809. V = IC.Builder->CreateExtractValue(V, 0);
  810. combineStoreToNewValue(IC, SI, V);
  811. return true;
  812. }
  813. }
  814. return false;
  815. }
  816. /// equivalentAddressValues - Test if A and B will obviously have the same
  817. /// value. This includes recognizing that %t0 and %t1 will have the same
  818. /// value in code like this:
  819. /// %t0 = getelementptr \@a, 0, 3
  820. /// store i32 0, i32* %t0
  821. /// %t1 = getelementptr \@a, 0, 3
  822. /// %t2 = load i32* %t1
  823. ///
  824. static bool equivalentAddressValues(Value *A, Value *B) {
  825. // Test if the values are trivially equivalent.
  826. if (A == B) return true;
  827. // Test if the values come form identical arithmetic instructions.
  828. // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
  829. // its only used to compare two uses within the same basic block, which
  830. // means that they'll always either have the same value or one of them
  831. // will have an undefined value.
  832. if (isa<BinaryOperator>(A) ||
  833. isa<CastInst>(A) ||
  834. isa<PHINode>(A) ||
  835. isa<GetElementPtrInst>(A))
  836. if (Instruction *BI = dyn_cast<Instruction>(B))
  837. if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
  838. return true;
  839. // Otherwise they may not be equivalent.
  840. return false;
  841. }
  842. Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
  843. Value *Val = SI.getOperand(0);
  844. Value *Ptr = SI.getOperand(1);
  845. // Try to canonicalize the stored type.
  846. if (combineStoreToValueType(*this, SI))
  847. return EraseInstFromFunction(SI);
  848. // Attempt to improve the alignment.
  849. unsigned KnownAlign = getOrEnforceKnownAlignment(
  850. Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, AC, DT);
  851. unsigned StoreAlign = SI.getAlignment();
  852. unsigned EffectiveStoreAlign =
  853. StoreAlign != 0 ? StoreAlign : DL.getABITypeAlignment(Val->getType());
  854. if (KnownAlign > EffectiveStoreAlign)
  855. SI.setAlignment(KnownAlign);
  856. else if (StoreAlign == 0)
  857. SI.setAlignment(EffectiveStoreAlign);
  858. // Try to canonicalize the stored type.
  859. if (unpackStoreToAggregate(*this, SI))
  860. return EraseInstFromFunction(SI);
  861. // Replace GEP indices if possible.
  862. if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) {
  863. Worklist.Add(NewGEPI);
  864. return &SI;
  865. }
  866. // Don't hack volatile/atomic stores.
  867. // FIXME: Some bits are legal for atomic stores; needs refactoring.
  868. if (!SI.isSimple()) return nullptr;
  869. // If the RHS is an alloca with a single use, zapify the store, making the
  870. // alloca dead.
  871. if (Ptr->hasOneUse()) {
  872. if (isa<AllocaInst>(Ptr))
  873. return EraseInstFromFunction(SI);
  874. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
  875. if (isa<AllocaInst>(GEP->getOperand(0))) {
  876. if (GEP->getOperand(0)->hasOneUse())
  877. return EraseInstFromFunction(SI);
  878. }
  879. }
  880. }
  881. // Do really simple DSE, to catch cases where there are several consecutive
  882. // stores to the same location, separated by a few arithmetic operations. This
  883. // situation often occurs with bitfield accesses.
  884. BasicBlock::iterator BBI = &SI;
  885. for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
  886. --ScanInsts) {
  887. --BBI;
  888. // Don't count debug info directives, lest they affect codegen,
  889. // and we skip pointer-to-pointer bitcasts, which are NOPs.
  890. if (isa<DbgInfoIntrinsic>(BBI) ||
  891. (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
  892. ScanInsts++;
  893. continue;
  894. }
  895. if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
  896. // Prev store isn't volatile, and stores to the same location?
  897. if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1),
  898. SI.getOperand(1))) {
  899. ++NumDeadStore;
  900. ++BBI;
  901. EraseInstFromFunction(*PrevSI);
  902. continue;
  903. }
  904. break;
  905. }
  906. // If this is a load, we have to stop. However, if the loaded value is from
  907. // the pointer we're loading and is producing the pointer we're storing,
  908. // then *this* store is dead (X = load P; store X -> P).
  909. if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
  910. if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
  911. LI->isSimple())
  912. return EraseInstFromFunction(SI);
  913. // Otherwise, this is a load from some other location. Stores before it
  914. // may not be dead.
  915. break;
  916. }
  917. // Don't skip over loads or things that can modify memory.
  918. if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
  919. break;
  920. }
  921. // store X, null -> turns into 'unreachable' in SimplifyCFG
  922. if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) {
  923. if (!isa<UndefValue>(Val)) {
  924. SI.setOperand(0, UndefValue::get(Val->getType()));
  925. if (Instruction *U = dyn_cast<Instruction>(Val))
  926. Worklist.Add(U); // Dropped a use.
  927. }
  928. return nullptr; // Do not modify these!
  929. }
  930. // store undef, Ptr -> noop
  931. if (isa<UndefValue>(Val))
  932. return EraseInstFromFunction(SI);
  933. // If this store is the last instruction in the basic block (possibly
  934. // excepting debug info instructions), and if the block ends with an
  935. // unconditional branch, try to move it to the successor block.
  936. BBI = &SI;
  937. do {
  938. ++BBI;
  939. } while (isa<DbgInfoIntrinsic>(BBI) ||
  940. (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy()));
  941. if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
  942. if (BI->isUnconditional())
  943. if (SimplifyStoreAtEndOfBlock(SI))
  944. return nullptr; // xform done!
  945. return nullptr;
  946. }
  947. /// SimplifyStoreAtEndOfBlock - Turn things like:
  948. /// if () { *P = v1; } else { *P = v2 }
  949. /// into a phi node with a store in the successor.
  950. ///
  951. /// Simplify things like:
  952. /// *P = v1; if () { *P = v2; }
  953. /// into a phi node with a store in the successor.
  954. ///
  955. bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
  956. BasicBlock *StoreBB = SI.getParent();
  957. // Check to see if the successor block has exactly two incoming edges. If
  958. // so, see if the other predecessor contains a store to the same location.
  959. // if so, insert a PHI node (if needed) and move the stores down.
  960. BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
  961. // Determine whether Dest has exactly two predecessors and, if so, compute
  962. // the other predecessor.
  963. pred_iterator PI = pred_begin(DestBB);
  964. BasicBlock *P = *PI;
  965. BasicBlock *OtherBB = nullptr;
  966. if (P != StoreBB)
  967. OtherBB = P;
  968. if (++PI == pred_end(DestBB))
  969. return false;
  970. P = *PI;
  971. if (P != StoreBB) {
  972. if (OtherBB)
  973. return false;
  974. OtherBB = P;
  975. }
  976. if (++PI != pred_end(DestBB))
  977. return false;
  978. // Bail out if all the relevant blocks aren't distinct (this can happen,
  979. // for example, if SI is in an infinite loop)
  980. if (StoreBB == DestBB || OtherBB == DestBB)
  981. return false;
  982. // Verify that the other block ends in a branch and is not otherwise empty.
  983. BasicBlock::iterator BBI = OtherBB->getTerminator();
  984. BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
  985. if (!OtherBr || BBI == OtherBB->begin())
  986. return false;
  987. // If the other block ends in an unconditional branch, check for the 'if then
  988. // else' case. there is an instruction before the branch.
  989. StoreInst *OtherStore = nullptr;
  990. if (OtherBr->isUnconditional()) {
  991. --BBI;
  992. // Skip over debugging info.
  993. while (isa<DbgInfoIntrinsic>(BBI) ||
  994. (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
  995. if (BBI==OtherBB->begin())
  996. return false;
  997. --BBI;
  998. }
  999. // If this isn't a store, isn't a store to the same location, or is not the
  1000. // right kind of store, bail out.
  1001. OtherStore = dyn_cast<StoreInst>(BBI);
  1002. if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
  1003. !SI.isSameOperationAs(OtherStore))
  1004. return false;
  1005. } else {
  1006. // Otherwise, the other block ended with a conditional branch. If one of the
  1007. // destinations is StoreBB, then we have the if/then case.
  1008. if (OtherBr->getSuccessor(0) != StoreBB &&
  1009. OtherBr->getSuccessor(1) != StoreBB)
  1010. return false;
  1011. // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
  1012. // if/then triangle. See if there is a store to the same ptr as SI that
  1013. // lives in OtherBB.
  1014. for (;; --BBI) {
  1015. // Check to see if we find the matching store.
  1016. if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
  1017. if (OtherStore->getOperand(1) != SI.getOperand(1) ||
  1018. !SI.isSameOperationAs(OtherStore))
  1019. return false;
  1020. break;
  1021. }
  1022. // If we find something that may be using or overwriting the stored
  1023. // value, or if we run out of instructions, we can't do the xform.
  1024. if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() ||
  1025. BBI == OtherBB->begin())
  1026. return false;
  1027. }
  1028. // In order to eliminate the store in OtherBr, we have to
  1029. // make sure nothing reads or overwrites the stored value in
  1030. // StoreBB.
  1031. for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
  1032. // FIXME: This should really be AA driven.
  1033. if (I->mayReadFromMemory() || I->mayWriteToMemory())
  1034. return false;
  1035. }
  1036. }
  1037. // Insert a PHI node now if we need it.
  1038. Value *MergedVal = OtherStore->getOperand(0);
  1039. if (MergedVal != SI.getOperand(0)) {
  1040. PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge");
  1041. PN->addIncoming(SI.getOperand(0), SI.getParent());
  1042. PN->addIncoming(OtherStore->getOperand(0), OtherBB);
  1043. MergedVal = InsertNewInstBefore(PN, DestBB->front());
  1044. }
  1045. // Advance to a place where it is safe to insert the new store and
  1046. // insert it.
  1047. BBI = DestBB->getFirstInsertionPt();
  1048. StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1),
  1049. SI.isVolatile(),
  1050. SI.getAlignment(),
  1051. SI.getOrdering(),
  1052. SI.getSynchScope());
  1053. InsertNewInstBefore(NewSI, *BBI);
  1054. NewSI->setDebugLoc(OtherStore->getDebugLoc());
  1055. // If the two stores had AA tags, merge them.
  1056. AAMDNodes AATags;
  1057. SI.getAAMetadata(AATags);
  1058. if (AATags) {
  1059. OtherStore->getAAMetadata(AATags, /* Merge = */ true);
  1060. NewSI->setAAMetadata(AATags);
  1061. }
  1062. // Nuke the old stores.
  1063. EraseInstFromFunction(SI);
  1064. EraseInstFromFunction(*OtherStore);
  1065. return true;
  1066. }