2
0

MemoryDependenceAnalysis.cpp 68 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685
  1. //===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation -------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file implements an analysis that determines, for a given memory
  11. // operation, what preceding memory operations it depends on. It builds on
  12. // alias analysis information, and tries to provide a lazy, caching interface to
  13. // a common kind of alias information query.
  14. //
  15. //===----------------------------------------------------------------------===//
  16. #include "llvm/Analysis/MemoryDependenceAnalysis.h"
  17. #include "llvm/ADT/STLExtras.h"
  18. #include "llvm/ADT/Statistic.h"
  19. #include "llvm/Analysis/AliasAnalysis.h"
  20. #include "llvm/Analysis/AssumptionCache.h"
  21. #include "llvm/Analysis/InstructionSimplify.h"
  22. #include "llvm/Analysis/MemoryBuiltins.h"
  23. #include "llvm/Analysis/PHITransAddr.h"
  24. #include "llvm/Analysis/ValueTracking.h"
  25. #include "llvm/IR/DataLayout.h"
  26. #include "llvm/IR/Dominators.h"
  27. #include "llvm/IR/Function.h"
  28. #include "llvm/IR/Instructions.h"
  29. #include "llvm/IR/IntrinsicInst.h"
  30. #include "llvm/IR/LLVMContext.h"
  31. #include "llvm/IR/PredIteratorCache.h"
  32. #include "llvm/Support/Debug.h"
  33. using namespace llvm;
  34. #define DEBUG_TYPE "memdep"
  35. STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
  36. STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses");
  37. STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses");
  38. STATISTIC(NumCacheNonLocalPtr,
  39. "Number of fully cached non-local ptr responses");
  40. STATISTIC(NumCacheDirtyNonLocalPtr,
  41. "Number of cached, but dirty, non-local ptr responses");
  42. STATISTIC(NumUncacheNonLocalPtr,
  43. "Number of uncached non-local ptr responses");
  44. STATISTIC(NumCacheCompleteNonLocalPtr,
  45. "Number of block queries that were completely cached");
  46. // Limit for the number of instructions to scan in a block.
  47. static const unsigned int BlockScanLimit = 500;
  48. // Limit on the number of memdep results to process.
  49. static const unsigned int NumResultsLimit = 100;
  50. char MemoryDependenceAnalysis::ID = 0;
  51. // Register this pass...
  52. INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep",
  53. "Memory Dependence Analysis", false, true)
  54. INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
  55. INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
  56. INITIALIZE_PASS_END(MemoryDependenceAnalysis, "memdep",
  57. "Memory Dependence Analysis", false, true)
  58. MemoryDependenceAnalysis::MemoryDependenceAnalysis()
  59. : FunctionPass(ID) {
  60. initializeMemoryDependenceAnalysisPass(*PassRegistry::getPassRegistry());
  61. }
  62. MemoryDependenceAnalysis::~MemoryDependenceAnalysis() {
  63. }
  64. /// Clean up memory in between runs
  65. void MemoryDependenceAnalysis::releaseMemory() {
  66. LocalDeps.clear();
  67. NonLocalDeps.clear();
  68. NonLocalPointerDeps.clear();
  69. ReverseLocalDeps.clear();
  70. ReverseNonLocalDeps.clear();
  71. ReverseNonLocalPtrDeps.clear();
  72. PredCache.clear();
  73. }
  74. /// getAnalysisUsage - Does not modify anything. It uses Alias Analysis.
  75. ///
  76. void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
  77. AU.setPreservesAll();
  78. AU.addRequired<AssumptionCacheTracker>();
  79. AU.addRequiredTransitive<AliasAnalysis>();
  80. }
  81. bool MemoryDependenceAnalysis::runOnFunction(Function &F) {
  82. AA = &getAnalysis<AliasAnalysis>();
  83. AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
  84. DominatorTreeWrapperPass *DTWP =
  85. getAnalysisIfAvailable<DominatorTreeWrapperPass>();
  86. DT = DTWP ? &DTWP->getDomTree() : nullptr;
  87. return false;
  88. }
  89. /// RemoveFromReverseMap - This is a helper function that removes Val from
  90. /// 'Inst's set in ReverseMap. If the set becomes empty, remove Inst's entry.
  91. template <typename KeyTy>
  92. static void RemoveFromReverseMap(DenseMap<Instruction*,
  93. SmallPtrSet<KeyTy, 4> > &ReverseMap,
  94. Instruction *Inst, KeyTy Val) {
  95. typename DenseMap<Instruction*, SmallPtrSet<KeyTy, 4> >::iterator
  96. InstIt = ReverseMap.find(Inst);
  97. assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
  98. bool Found = InstIt->second.erase(Val);
  99. assert(Found && "Invalid reverse map!"); (void)Found;
  100. if (InstIt->second.empty())
  101. ReverseMap.erase(InstIt);
  102. }
  103. /// GetLocation - If the given instruction references a specific memory
  104. /// location, fill in Loc with the details, otherwise set Loc.Ptr to null.
  105. /// Return a ModRefInfo value describing the general behavior of the
  106. /// instruction.
  107. static AliasAnalysis::ModRefResult
  108. GetLocation(const Instruction *Inst, MemoryLocation &Loc, AliasAnalysis *AA) {
  109. if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
  110. if (LI->isUnordered()) {
  111. Loc = MemoryLocation::get(LI);
  112. return AliasAnalysis::Ref;
  113. }
  114. if (LI->getOrdering() == Monotonic) {
  115. Loc = MemoryLocation::get(LI);
  116. return AliasAnalysis::ModRef;
  117. }
  118. Loc = MemoryLocation();
  119. return AliasAnalysis::ModRef;
  120. }
  121. if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
  122. if (SI->isUnordered()) {
  123. Loc = MemoryLocation::get(SI);
  124. return AliasAnalysis::Mod;
  125. }
  126. if (SI->getOrdering() == Monotonic) {
  127. Loc = MemoryLocation::get(SI);
  128. return AliasAnalysis::ModRef;
  129. }
  130. Loc = MemoryLocation();
  131. return AliasAnalysis::ModRef;
  132. }
  133. if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
  134. Loc = MemoryLocation::get(V);
  135. return AliasAnalysis::ModRef;
  136. }
  137. if (const CallInst *CI = isFreeCall(Inst, AA->getTargetLibraryInfo())) {
  138. // calls to free() deallocate the entire structure
  139. Loc = MemoryLocation(CI->getArgOperand(0));
  140. return AliasAnalysis::Mod;
  141. }
  142. if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
  143. AAMDNodes AAInfo;
  144. switch (II->getIntrinsicID()) {
  145. case Intrinsic::lifetime_start:
  146. case Intrinsic::lifetime_end:
  147. case Intrinsic::invariant_start:
  148. II->getAAMetadata(AAInfo);
  149. Loc = MemoryLocation(
  150. II->getArgOperand(1),
  151. cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(), AAInfo);
  152. // These intrinsics don't really modify the memory, but returning Mod
  153. // will allow them to be handled conservatively.
  154. return AliasAnalysis::Mod;
  155. case Intrinsic::invariant_end:
  156. II->getAAMetadata(AAInfo);
  157. Loc = MemoryLocation(
  158. II->getArgOperand(2),
  159. cast<ConstantInt>(II->getArgOperand(1))->getZExtValue(), AAInfo);
  160. // These intrinsics don't really modify the memory, but returning Mod
  161. // will allow them to be handled conservatively.
  162. return AliasAnalysis::Mod;
  163. default:
  164. break;
  165. }
  166. }
  167. // Otherwise, just do the coarse-grained thing that always works.
  168. if (Inst->mayWriteToMemory())
  169. return AliasAnalysis::ModRef;
  170. if (Inst->mayReadFromMemory())
  171. return AliasAnalysis::Ref;
  172. return AliasAnalysis::NoModRef;
  173. }
  174. /// getCallSiteDependencyFrom - Private helper for finding the local
  175. /// dependencies of a call site.
  176. MemDepResult MemoryDependenceAnalysis::
  177. getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
  178. BasicBlock::iterator ScanIt, BasicBlock *BB) {
  179. unsigned Limit = BlockScanLimit;
  180. // Walk backwards through the block, looking for dependencies
  181. while (ScanIt != BB->begin()) {
  182. // HLSL Change - Begin
  183. // Skip debug info
  184. if (isa<DbgInfoIntrinsic>(*std::prev(ScanIt))) {
  185. ScanIt--; continue;
  186. }
  187. // HLSL Change - End
  188. // Limit the amount of scanning we do so we don't end up with quadratic
  189. // running time on extreme testcases.
  190. --Limit;
  191. if (!Limit)
  192. return MemDepResult::getUnknown();
  193. Instruction *Inst = --ScanIt;
  194. // If this inst is a memory op, get the pointer it accessed
  195. MemoryLocation Loc;
  196. AliasAnalysis::ModRefResult MR = GetLocation(Inst, Loc, AA);
  197. if (Loc.Ptr) {
  198. // A simple instruction.
  199. if (AA->getModRefInfo(CS, Loc) != AliasAnalysis::NoModRef)
  200. return MemDepResult::getClobber(Inst);
  201. continue;
  202. }
  203. if (auto InstCS = CallSite(Inst)) {
  204. // Debug intrinsics don't cause dependences.
  205. if (isa<DbgInfoIntrinsic>(Inst)) continue;
  206. // If these two calls do not interfere, look past it.
  207. switch (AA->getModRefInfo(CS, InstCS)) {
  208. case AliasAnalysis::NoModRef:
  209. // If the two calls are the same, return InstCS as a Def, so that
  210. // CS can be found redundant and eliminated.
  211. if (isReadOnlyCall && !(MR & AliasAnalysis::Mod) &&
  212. CS.getInstruction()->isIdenticalToWhenDefined(Inst))
  213. return MemDepResult::getDef(Inst);
  214. // Otherwise if the two calls don't interact (e.g. InstCS is readnone)
  215. // keep scanning.
  216. continue;
  217. default:
  218. return MemDepResult::getClobber(Inst);
  219. }
  220. }
  221. // If we could not obtain a pointer for the instruction and the instruction
  222. // touches memory then assume that this is a dependency.
  223. if (MR != AliasAnalysis::NoModRef)
  224. return MemDepResult::getClobber(Inst);
  225. }
  226. // No dependence found. If this is the entry block of the function, it is
  227. // unknown, otherwise it is non-local.
  228. if (BB != &BB->getParent()->getEntryBlock())
  229. return MemDepResult::getNonLocal();
  230. return MemDepResult::getNonFuncLocal();
  231. }
  232. /// isLoadLoadClobberIfExtendedToFullWidth - Return true if LI is a load that
  233. /// would fully overlap MemLoc if done as a wider legal integer load.
  234. ///
  235. /// MemLocBase, MemLocOffset are lazily computed here the first time the
  236. /// base/offs of memloc is needed.
  237. static bool isLoadLoadClobberIfExtendedToFullWidth(const MemoryLocation &MemLoc,
  238. const Value *&MemLocBase,
  239. int64_t &MemLocOffs,
  240. const LoadInst *LI) {
  241. const DataLayout &DL = LI->getModule()->getDataLayout();
  242. // If we haven't already computed the base/offset of MemLoc, do so now.
  243. if (!MemLocBase)
  244. MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, DL);
  245. unsigned Size = MemoryDependenceAnalysis::getLoadLoadClobberFullWidthSize(
  246. MemLocBase, MemLocOffs, MemLoc.Size, LI);
  247. return Size != 0;
  248. }
  249. /// getLoadLoadClobberFullWidthSize - This is a little bit of analysis that
  250. /// looks at a memory location for a load (specified by MemLocBase, Offs,
  251. /// and Size) and compares it against a load. If the specified load could
  252. /// be safely widened to a larger integer load that is 1) still efficient,
  253. /// 2) safe for the target, and 3) would provide the specified memory
  254. /// location value, then this function returns the size in bytes of the
  255. /// load width to use. If not, this returns zero.
  256. unsigned MemoryDependenceAnalysis::getLoadLoadClobberFullWidthSize(
  257. const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize,
  258. const LoadInst *LI) {
  259. // We can only extend simple integer loads.
  260. if (!isa<IntegerType>(LI->getType()) || !LI->isSimple()) return 0;
  261. // Load widening is hostile to ThreadSanitizer: it may cause false positives
  262. // or make the reports more cryptic (access sizes are wrong).
  263. if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread))
  264. return 0;
  265. const DataLayout &DL = LI->getModule()->getDataLayout();
  266. // Get the base of this load.
  267. int64_t LIOffs = 0;
  268. const Value *LIBase =
  269. GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, DL);
  270. // If the two pointers are not based on the same pointer, we can't tell that
  271. // they are related.
  272. if (LIBase != MemLocBase) return 0;
  273. // Okay, the two values are based on the same pointer, but returned as
  274. // no-alias. This happens when we have things like two byte loads at "P+1"
  275. // and "P+3". Check to see if increasing the size of the "LI" load up to its
  276. // alignment (or the largest native integer type) will allow us to load all
  277. // the bits required by MemLoc.
  278. // If MemLoc is before LI, then no widening of LI will help us out.
  279. if (MemLocOffs < LIOffs) return 0;
  280. // Get the alignment of the load in bytes. We assume that it is safe to load
  281. // any legal integer up to this size without a problem. For example, if we're
  282. // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can
  283. // widen it up to an i32 load. If it is known 2-byte aligned, we can widen it
  284. // to i16.
  285. unsigned LoadAlign = LI->getAlignment();
  286. int64_t MemLocEnd = MemLocOffs+MemLocSize;
  287. // If no amount of rounding up will let MemLoc fit into LI, then bail out.
  288. if (LIOffs+LoadAlign < MemLocEnd) return 0;
  289. // This is the size of the load to try. Start with the next larger power of
  290. // two.
  291. unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits()/8U;
  292. NewLoadByteSize = NextPowerOf2(NewLoadByteSize);
  293. while (1) {
  294. // If this load size is bigger than our known alignment or would not fit
  295. // into a native integer register, then we fail.
  296. if (NewLoadByteSize > LoadAlign ||
  297. !DL.fitsInLegalInteger(NewLoadByteSize*8))
  298. return 0;
  299. if (LIOffs + NewLoadByteSize > MemLocEnd &&
  300. LI->getParent()->getParent()->hasFnAttribute(
  301. Attribute::SanitizeAddress))
  302. // We will be reading past the location accessed by the original program.
  303. // While this is safe in a regular build, Address Safety analysis tools
  304. // may start reporting false warnings. So, don't do widening.
  305. return 0;
  306. // If a load of this width would include all of MemLoc, then we succeed.
  307. if (LIOffs+NewLoadByteSize >= MemLocEnd)
  308. return NewLoadByteSize;
  309. NewLoadByteSize <<= 1;
  310. }
  311. }
  312. static bool isVolatile(Instruction *Inst) {
  313. if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
  314. return LI->isVolatile();
  315. else if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
  316. return SI->isVolatile();
  317. else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(Inst))
  318. return AI->isVolatile();
  319. return false;
  320. }
  321. /// getPointerDependencyFrom - Return the instruction on which a memory
  322. /// location depends. If isLoad is true, this routine ignores may-aliases with
  323. /// read-only operations. If isLoad is false, this routine ignores may-aliases
  324. /// with reads from read-only locations. If possible, pass the query
  325. /// instruction as well; this function may take advantage of the metadata
  326. /// annotated to the query instruction to refine the result.
  327. MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom(
  328. const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
  329. BasicBlock *BB, Instruction *QueryInst, unsigned Limit) {
  330. const Value *MemLocBase = nullptr;
  331. int64_t MemLocOffset = 0;
  332. bool isInvariantLoad = false;
  333. unsigned DefaultLimit = BlockScanLimit;
  334. if (Limit == 0)
  335. Limit = DefaultLimit;
  336. // We must be careful with atomic accesses, as they may allow another thread
  337. // to touch this location, cloberring it. We are conservative: if the
  338. // QueryInst is not a simple (non-atomic) memory access, we automatically
  339. // return getClobber.
  340. // If it is simple, we know based on the results of
  341. // "Compiler testing via a theory of sound optimisations in the C11/C++11
  342. // memory model" in PLDI 2013, that a non-atomic location can only be
  343. // clobbered between a pair of a release and an acquire action, with no
  344. // access to the location in between.
  345. // Here is an example for giving the general intuition behind this rule.
  346. // In the following code:
  347. // store x 0;
  348. // release action; [1]
  349. // acquire action; [4]
  350. // %val = load x;
  351. // It is unsafe to replace %val by 0 because another thread may be running:
  352. // acquire action; [2]
  353. // store x 42;
  354. // release action; [3]
  355. // with synchronization from 1 to 2 and from 3 to 4, resulting in %val
  356. // being 42. A key property of this program however is that if either
  357. // 1 or 4 were missing, there would be a race between the store of 42
  358. // either the store of 0 or the load (making the whole progam racy).
  359. // The paper mentionned above shows that the same property is respected
  360. // by every program that can detect any optimisation of that kind: either
  361. // it is racy (undefined) or there is a release followed by an acquire
  362. // between the pair of accesses under consideration.
  363. // If the load is invariant, we "know" that it doesn't alias *any* write. We
  364. // do want to respect mustalias results since defs are useful for value
  365. // forwarding, but any mayalias write can be assumed to be noalias.
  366. // Arguably, this logic should be pushed inside AliasAnalysis itself.
  367. if (isLoad && QueryInst) {
  368. LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
  369. if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr)
  370. isInvariantLoad = true;
  371. }
  372. const DataLayout &DL = BB->getModule()->getDataLayout();
  373. // Walk backwards through the basic block, looking for dependencies.
  374. while (ScanIt != BB->begin()) {
  375. Instruction *Inst = --ScanIt;
  376. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
  377. // Debug intrinsics don't (and can't) cause dependencies.
  378. if (isa<DbgInfoIntrinsic>(II)) continue;
  379. // Limit the amount of scanning we do so we don't end up with quadratic
  380. // running time on extreme testcases.
  381. --Limit;
  382. if (!Limit)
  383. return MemDepResult::getUnknown();
  384. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
  385. // If we reach a lifetime begin or end marker, then the query ends here
  386. // because the value is undefined.
  387. if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
  388. // FIXME: This only considers queries directly on the invariant-tagged
  389. // pointer, not on query pointers that are indexed off of them. It'd
  390. // be nice to handle that at some point (the right approach is to use
  391. // GetPointerBaseWithConstantOffset).
  392. if (AA->isMustAlias(MemoryLocation(II->getArgOperand(1)), MemLoc))
  393. return MemDepResult::getDef(II);
  394. continue;
  395. }
  396. }
  397. // Values depend on loads if the pointers are must aliased. This means that
  398. // a load depends on another must aliased load from the same value.
  399. // One exception is atomic loads: a value can depend on an atomic load that it
  400. // does not alias with when this atomic load indicates that another thread may
  401. // be accessing the location.
  402. if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
  403. // While volatile access cannot be eliminated, they do not have to clobber
  404. // non-aliasing locations, as normal accesses, for example, can be safely
  405. // reordered with volatile accesses.
  406. if (LI->isVolatile()) {
  407. if (!QueryInst)
  408. // Original QueryInst *may* be volatile
  409. return MemDepResult::getClobber(LI);
  410. if (isVolatile(QueryInst))
  411. // Ordering required if QueryInst is itself volatile
  412. return MemDepResult::getClobber(LI);
  413. // Otherwise, volatile doesn't imply any special ordering
  414. }
  415. // Atomic loads have complications involved.
  416. // A Monotonic (or higher) load is OK if the query inst is itself not atomic.
  417. // FIXME: This is overly conservative.
  418. if (LI->isAtomic() && LI->getOrdering() > Unordered) {
  419. if (!QueryInst)
  420. return MemDepResult::getClobber(LI);
  421. if (LI->getOrdering() != Monotonic)
  422. return MemDepResult::getClobber(LI);
  423. if (auto *QueryLI = dyn_cast<LoadInst>(QueryInst)) {
  424. if (!QueryLI->isSimple())
  425. return MemDepResult::getClobber(LI);
  426. } else if (auto *QuerySI = dyn_cast<StoreInst>(QueryInst)) {
  427. if (!QuerySI->isSimple())
  428. return MemDepResult::getClobber(LI);
  429. } else if (QueryInst->mayReadOrWriteMemory()) {
  430. return MemDepResult::getClobber(LI);
  431. }
  432. }
  433. MemoryLocation LoadLoc = MemoryLocation::get(LI);
  434. // If we found a pointer, check if it could be the same as our pointer.
  435. AliasResult R = AA->alias(LoadLoc, MemLoc);
  436. if (isLoad) {
  437. if (R == NoAlias) {
  438. // If this is an over-aligned integer load (for example,
  439. // "load i8* %P, align 4") see if it would obviously overlap with the
  440. // queried location if widened to a larger load (e.g. if the queried
  441. // location is 1 byte at P+1). If so, return it as a load/load
  442. // clobber result, allowing the client to decide to widen the load if
  443. // it wants to.
  444. if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) {
  445. if (LI->getAlignment() * 8 > ITy->getPrimitiveSizeInBits() &&
  446. isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase,
  447. MemLocOffset, LI))
  448. return MemDepResult::getClobber(Inst);
  449. }
  450. continue;
  451. }
  452. // Must aliased loads are defs of each other.
  453. if (R == MustAlias)
  454. return MemDepResult::getDef(Inst);
  455. #if 0 // FIXME: Temporarily disabled. GVN is cleverly rewriting loads
  456. // in terms of clobbering loads, but since it does this by looking
  457. // at the clobbering load directly, it doesn't know about any
  458. // phi translation that may have happened along the way.
  459. // If we have a partial alias, then return this as a clobber for the
  460. // client to handle.
  461. if (R == PartialAlias)
  462. return MemDepResult::getClobber(Inst);
  463. #endif
  464. // Random may-alias loads don't depend on each other without a
  465. // dependence.
  466. continue;
  467. }
  468. // Stores don't depend on other no-aliased accesses.
  469. if (R == NoAlias)
  470. continue;
  471. // Stores don't alias loads from read-only memory.
  472. if (AA->pointsToConstantMemory(LoadLoc))
  473. continue;
  474. // Stores depend on may/must aliased loads.
  475. return MemDepResult::getDef(Inst);
  476. }
  477. if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
  478. // Atomic stores have complications involved.
  479. // A Monotonic store is OK if the query inst is itself not atomic.
  480. // FIXME: This is overly conservative.
  481. if (!SI->isUnordered()) {
  482. if (!QueryInst)
  483. return MemDepResult::getClobber(SI);
  484. if (SI->getOrdering() != Monotonic)
  485. return MemDepResult::getClobber(SI);
  486. if (auto *QueryLI = dyn_cast<LoadInst>(QueryInst)) {
  487. if (!QueryLI->isSimple())
  488. return MemDepResult::getClobber(SI);
  489. } else if (auto *QuerySI = dyn_cast<StoreInst>(QueryInst)) {
  490. if (!QuerySI->isSimple())
  491. return MemDepResult::getClobber(SI);
  492. } else if (QueryInst->mayReadOrWriteMemory()) {
  493. return MemDepResult::getClobber(SI);
  494. }
  495. }
  496. // FIXME: this is overly conservative.
  497. // While volatile access cannot be eliminated, they do not have to clobber
  498. // non-aliasing locations, as normal accesses can for example be reordered
  499. // with volatile accesses.
  500. if (SI->isVolatile())
  501. return MemDepResult::getClobber(SI);
  502. // If alias analysis can tell that this store is guaranteed to not modify
  503. // the query pointer, ignore it. Use getModRefInfo to handle cases where
  504. // the query pointer points to constant memory etc.
  505. if (AA->getModRefInfo(SI, MemLoc) == AliasAnalysis::NoModRef)
  506. continue;
  507. // Ok, this store might clobber the query pointer. Check to see if it is
  508. // a must alias: in this case, we want to return this as a def.
  509. MemoryLocation StoreLoc = MemoryLocation::get(SI);
  510. // If we found a pointer, check if it could be the same as our pointer.
  511. AliasResult R = AA->alias(StoreLoc, MemLoc);
  512. if (R == NoAlias)
  513. continue;
  514. if (R == MustAlias)
  515. return MemDepResult::getDef(Inst);
  516. if (isInvariantLoad)
  517. continue;
  518. return MemDepResult::getClobber(Inst);
  519. }
  520. // If this is an allocation, and if we know that the accessed pointer is to
  521. // the allocation, return Def. This means that there is no dependence and
  522. // the access can be optimized based on that. For example, a load could
  523. // turn into undef.
  524. // Note: Only determine this to be a malloc if Inst is the malloc call, not
  525. // a subsequent bitcast of the malloc call result. There can be stores to
  526. // the malloced memory between the malloc call and its bitcast uses, and we
  527. // need to continue scanning until the malloc call.
  528. const TargetLibraryInfo *TLI = AA->getTargetLibraryInfo();
  529. if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, TLI)) {
  530. const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, DL);
  531. if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr))
  532. return MemDepResult::getDef(Inst);
  533. if (isInvariantLoad)
  534. continue;
  535. // Be conservative if the accessed pointer may alias the allocation.
  536. if (AA->alias(Inst, AccessPtr) != NoAlias)
  537. return MemDepResult::getClobber(Inst);
  538. // If the allocation is not aliased and does not read memory (like
  539. // strdup), it is safe to ignore.
  540. if (isa<AllocaInst>(Inst) ||
  541. isMallocLikeFn(Inst, TLI) || isCallocLikeFn(Inst, TLI))
  542. continue;
  543. }
  544. if (isInvariantLoad)
  545. continue;
  546. // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
  547. AliasAnalysis::ModRefResult MR = AA->getModRefInfo(Inst, MemLoc);
  548. // If necessary, perform additional analysis.
  549. if (MR == AliasAnalysis::ModRef)
  550. MR = AA->callCapturesBefore(Inst, MemLoc, DT);
  551. switch (MR) {
  552. case AliasAnalysis::NoModRef:
  553. // If the call has no effect on the queried pointer, just ignore it.
  554. continue;
  555. case AliasAnalysis::Mod:
  556. return MemDepResult::getClobber(Inst);
  557. case AliasAnalysis::Ref:
  558. // If the call is known to never store to the pointer, and if this is a
  559. // load query, we can safely ignore it (scan past it).
  560. if (isLoad)
  561. continue;
  562. default:
  563. // Otherwise, there is a potential dependence. Return a clobber.
  564. return MemDepResult::getClobber(Inst);
  565. }
  566. }
  567. // No dependence found. If this is the entry block of the function, it is
  568. // unknown, otherwise it is non-local.
  569. if (BB != &BB->getParent()->getEntryBlock())
  570. return MemDepResult::getNonLocal();
  571. return MemDepResult::getNonFuncLocal();
  572. }
  573. /// getDependency - Return the instruction on which a memory operation
  574. /// depends.
  575. MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst, unsigned ScanLimit) {
  576. Instruction *ScanPos = QueryInst;
  577. // Check for a cached result
  578. MemDepResult &LocalCache = LocalDeps[QueryInst];
  579. // If the cached entry is non-dirty, just return it. Note that this depends
  580. // on MemDepResult's default constructing to 'dirty'.
  581. if (!LocalCache.isDirty())
  582. return LocalCache;
  583. // Otherwise, if we have a dirty entry, we know we can start the scan at that
  584. // instruction, which may save us some work.
  585. if (Instruction *Inst = LocalCache.getInst()) {
  586. ScanPos = Inst;
  587. RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst);
  588. }
  589. BasicBlock *QueryParent = QueryInst->getParent();
  590. // Do the scan.
  591. if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
  592. // No dependence found. If this is the entry block of the function, it is
  593. // unknown, otherwise it is non-local.
  594. if (QueryParent != &QueryParent->getParent()->getEntryBlock())
  595. LocalCache = MemDepResult::getNonLocal();
  596. else
  597. LocalCache = MemDepResult::getNonFuncLocal();
  598. } else {
  599. MemoryLocation MemLoc;
  600. AliasAnalysis::ModRefResult MR = GetLocation(QueryInst, MemLoc, AA);
  601. if (MemLoc.Ptr) {
  602. // If we can do a pointer scan, make it happen.
  603. bool isLoad = !(MR & AliasAnalysis::Mod);
  604. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst))
  605. isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
  606. LocalCache = getPointerDependencyFrom(MemLoc, isLoad, ScanPos,
  607. QueryParent, QueryInst, ScanLimit);
  608. } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) {
  609. CallSite QueryCS(QueryInst);
  610. bool isReadOnly = AA->onlyReadsMemory(QueryCS);
  611. LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos,
  612. QueryParent);
  613. } else
  614. // Non-memory instruction.
  615. LocalCache = MemDepResult::getUnknown();
  616. }
  617. // Remember the result!
  618. if (Instruction *I = LocalCache.getInst())
  619. ReverseLocalDeps[I].insert(QueryInst);
  620. return LocalCache;
  621. }
  622. #ifndef NDEBUG
  623. /// AssertSorted - This method is used when -debug is specified to verify that
  624. /// cache arrays are properly kept sorted.
  625. static void AssertSorted(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
  626. int Count = -1) {
  627. if (Count == -1) Count = Cache.size();
  628. if (Count == 0) return;
  629. for (unsigned i = 1; i != unsigned(Count); ++i)
  630. assert(!(Cache[i] < Cache[i-1]) && "Cache isn't sorted!");
  631. }
  632. #endif
  633. /// getNonLocalCallDependency - Perform a full dependency query for the
  634. /// specified call, returning the set of blocks that the value is
  635. /// potentially live across. The returned set of results will include a
  636. /// "NonLocal" result for all blocks where the value is live across.
  637. ///
  638. /// This method assumes the instruction returns a "NonLocal" dependency
  639. /// within its own block.
  640. ///
  641. /// This returns a reference to an internal data structure that may be
  642. /// invalidated on the next non-local query or when an instruction is
  643. /// removed. Clients must copy this data if they want it around longer than
  644. /// that.
  645. const MemoryDependenceAnalysis::NonLocalDepInfo &
  646. MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
  647. assert(getDependency(QueryCS.getInstruction()).isNonLocal() &&
  648. "getNonLocalCallDependency should only be used on calls with non-local deps!");
  649. PerInstNLInfo &CacheP = NonLocalDeps[QueryCS.getInstruction()];
  650. NonLocalDepInfo &Cache = CacheP.first;
  651. /// DirtyBlocks - This is the set of blocks that need to be recomputed. In
  652. /// the cached case, this can happen due to instructions being deleted etc. In
  653. /// the uncached case, this starts out as the set of predecessors we care
  654. /// about.
  655. SmallVector<BasicBlock*, 32> DirtyBlocks;
  656. if (!Cache.empty()) {
  657. // Okay, we have a cache entry. If we know it is not dirty, just return it
  658. // with no computation.
  659. if (!CacheP.second) {
  660. ++NumCacheNonLocal;
  661. return Cache;
  662. }
  663. // If we already have a partially computed set of results, scan them to
  664. // determine what is dirty, seeding our initial DirtyBlocks worklist.
  665. for (NonLocalDepInfo::iterator I = Cache.begin(), E = Cache.end();
  666. I != E; ++I)
  667. if (I->getResult().isDirty())
  668. DirtyBlocks.push_back(I->getBB());
  669. // Sort the cache so that we can do fast binary search lookups below.
  670. std::sort(Cache.begin(), Cache.end());
  671. ++NumCacheDirtyNonLocal;
  672. //cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: "
  673. // << Cache.size() << " cached: " << *QueryInst;
  674. } else {
  675. // Seed DirtyBlocks with each of the preds of QueryInst's block.
  676. BasicBlock *QueryBB = QueryCS.getInstruction()->getParent();
  677. for (BasicBlock *Pred : PredCache.get(QueryBB))
  678. DirtyBlocks.push_back(Pred);
  679. ++NumUncacheNonLocal;
  680. }
  681. // isReadonlyCall - If this is a read-only call, we can be more aggressive.
  682. bool isReadonlyCall = AA->onlyReadsMemory(QueryCS);
  683. SmallPtrSet<BasicBlock*, 64> Visited;
  684. unsigned NumSortedEntries = Cache.size();
  685. DEBUG(AssertSorted(Cache));
  686. // Iterate while we still have blocks to update.
  687. while (!DirtyBlocks.empty()) {
  688. BasicBlock *DirtyBB = DirtyBlocks.back();
  689. DirtyBlocks.pop_back();
  690. // Already processed this block?
  691. if (!Visited.insert(DirtyBB).second)
  692. continue;
  693. // Do a binary search to see if we already have an entry for this block in
  694. // the cache set. If so, find it.
  695. DEBUG(AssertSorted(Cache, NumSortedEntries));
  696. NonLocalDepInfo::iterator Entry =
  697. std::upper_bound(Cache.begin(), Cache.begin()+NumSortedEntries,
  698. NonLocalDepEntry(DirtyBB));
  699. if (Entry != Cache.begin() && std::prev(Entry)->getBB() == DirtyBB)
  700. --Entry;
  701. NonLocalDepEntry *ExistingResult = nullptr;
  702. if (Entry != Cache.begin()+NumSortedEntries &&
  703. Entry->getBB() == DirtyBB) {
  704. // If we already have an entry, and if it isn't already dirty, the block
  705. // is done.
  706. if (!Entry->getResult().isDirty())
  707. continue;
  708. // Otherwise, remember this slot so we can update the value.
  709. ExistingResult = &*Entry;
  710. }
  711. // If the dirty entry has a pointer, start scanning from it so we don't have
  712. // to rescan the entire block.
  713. BasicBlock::iterator ScanPos = DirtyBB->end();
  714. if (ExistingResult) {
  715. if (Instruction *Inst = ExistingResult->getResult().getInst()) {
  716. ScanPos = Inst;
  717. // We're removing QueryInst's use of Inst.
  718. RemoveFromReverseMap(ReverseNonLocalDeps, Inst,
  719. QueryCS.getInstruction());
  720. }
  721. }
  722. // Find out if this block has a local dependency for QueryInst.
  723. MemDepResult Dep;
  724. if (ScanPos != DirtyBB->begin()) {
  725. Dep = getCallSiteDependencyFrom(QueryCS, isReadonlyCall,ScanPos, DirtyBB);
  726. } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
  727. // No dependence found. If this is the entry block of the function, it is
  728. // a clobber, otherwise it is unknown.
  729. Dep = MemDepResult::getNonLocal();
  730. } else {
  731. Dep = MemDepResult::getNonFuncLocal();
  732. }
  733. // If we had a dirty entry for the block, update it. Otherwise, just add
  734. // a new entry.
  735. if (ExistingResult)
  736. ExistingResult->setResult(Dep);
  737. else
  738. Cache.push_back(NonLocalDepEntry(DirtyBB, Dep));
  739. // If the block has a dependency (i.e. it isn't completely transparent to
  740. // the value), remember the association!
  741. if (!Dep.isNonLocal()) {
  742. // Keep the ReverseNonLocalDeps map up to date so we can efficiently
  743. // update this when we remove instructions.
  744. if (Instruction *Inst = Dep.getInst())
  745. ReverseNonLocalDeps[Inst].insert(QueryCS.getInstruction());
  746. } else {
  747. // If the block *is* completely transparent to the load, we need to check
  748. // the predecessors of this block. Add them to our worklist.
  749. for (BasicBlock *Pred : PredCache.get(DirtyBB))
  750. DirtyBlocks.push_back(Pred);
  751. }
  752. }
  753. return Cache;
  754. }
  755. /// getNonLocalPointerDependency - Perform a full dependency query for an
  756. /// access to the specified (non-volatile) memory location, returning the
  757. /// set of instructions that either define or clobber the value.
  758. ///
  759. /// This method assumes the pointer has a "NonLocal" dependency within its
  760. /// own block.
  761. ///
  762. void MemoryDependenceAnalysis::
  763. getNonLocalPointerDependency(Instruction *QueryInst,
  764. SmallVectorImpl<NonLocalDepResult> &Result) {
  765. const MemoryLocation Loc = MemoryLocation::get(QueryInst);
  766. bool isLoad = isa<LoadInst>(QueryInst);
  767. BasicBlock *FromBB = QueryInst->getParent();
  768. assert(FromBB);
  769. assert(Loc.Ptr->getType()->isPointerTy() &&
  770. "Can't get pointer deps of a non-pointer!");
  771. Result.clear();
  772. // This routine does not expect to deal with volatile instructions.
  773. // Doing so would require piping through the QueryInst all the way through.
  774. // TODO: volatiles can't be elided, but they can be reordered with other
  775. // non-volatile accesses.
  776. // We currently give up on any instruction which is ordered, but we do handle
  777. // atomic instructions which are unordered.
  778. // TODO: Handle ordered instructions
  779. auto isOrdered = [](Instruction *Inst) {
  780. if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
  781. return !LI->isUnordered();
  782. } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
  783. return !SI->isUnordered();
  784. }
  785. return false;
  786. };
  787. if (isVolatile(QueryInst) || isOrdered(QueryInst)) {
  788. Result.push_back(NonLocalDepResult(FromBB,
  789. MemDepResult::getUnknown(),
  790. const_cast<Value *>(Loc.Ptr)));
  791. return;
  792. }
  793. const DataLayout &DL = FromBB->getModule()->getDataLayout();
  794. PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, AC);
  795. // This is the set of blocks we've inspected, and the pointer we consider in
  796. // each block. Because of critical edges, we currently bail out if querying
  797. // a block with multiple different pointers. This can happen during PHI
  798. // translation.
  799. DenseMap<BasicBlock*, Value*> Visited;
  800. if (!getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB,
  801. Result, Visited, true))
  802. return;
  803. Result.clear();
  804. Result.push_back(NonLocalDepResult(FromBB,
  805. MemDepResult::getUnknown(),
  806. const_cast<Value *>(Loc.Ptr)));
  807. }
  808. /// GetNonLocalInfoForBlock - Compute the memdep value for BB with
  809. /// Pointer/PointeeSize using either cached information in Cache or by doing a
  810. /// lookup (which may use dirty cache info if available). If we do a lookup,
  811. /// add the result to the cache.
  812. MemDepResult MemoryDependenceAnalysis::GetNonLocalInfoForBlock(
  813. Instruction *QueryInst, const MemoryLocation &Loc, bool isLoad,
  814. BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
  815. // Do a binary search to see if we already have an entry for this block in
  816. // the cache set. If so, find it.
  817. NonLocalDepInfo::iterator Entry =
  818. std::upper_bound(Cache->begin(), Cache->begin()+NumSortedEntries,
  819. NonLocalDepEntry(BB));
  820. if (Entry != Cache->begin() && (Entry-1)->getBB() == BB)
  821. --Entry;
  822. NonLocalDepEntry *ExistingResult = nullptr;
  823. if (Entry != Cache->begin()+NumSortedEntries && Entry->getBB() == BB)
  824. ExistingResult = &*Entry;
  825. // If we have a cached entry, and it is non-dirty, use it as the value for
  826. // this dependency.
  827. if (ExistingResult && !ExistingResult->getResult().isDirty()) {
  828. ++NumCacheNonLocalPtr;
  829. return ExistingResult->getResult();
  830. }
  831. // Otherwise, we have to scan for the value. If we have a dirty cache
  832. // entry, start scanning from its position, otherwise we scan from the end
  833. // of the block.
  834. BasicBlock::iterator ScanPos = BB->end();
  835. if (ExistingResult && ExistingResult->getResult().getInst()) {
  836. assert(ExistingResult->getResult().getInst()->getParent() == BB &&
  837. "Instruction invalidated?");
  838. ++NumCacheDirtyNonLocalPtr;
  839. ScanPos = ExistingResult->getResult().getInst();
  840. // Eliminating the dirty entry from 'Cache', so update the reverse info.
  841. ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
  842. RemoveFromReverseMap(ReverseNonLocalPtrDeps, ScanPos, CacheKey);
  843. } else {
  844. ++NumUncacheNonLocalPtr;
  845. }
  846. // Scan the block for the dependency.
  847. MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB,
  848. QueryInst);
  849. // If we had a dirty entry for the block, update it. Otherwise, just add
  850. // a new entry.
  851. if (ExistingResult)
  852. ExistingResult->setResult(Dep);
  853. else
  854. Cache->push_back(NonLocalDepEntry(BB, Dep));
  855. // If the block has a dependency (i.e. it isn't completely transparent to
  856. // the value), remember the reverse association because we just added it
  857. // to Cache!
  858. if (!Dep.isDef() && !Dep.isClobber())
  859. return Dep;
  860. // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
  861. // update MemDep when we remove instructions.
  862. Instruction *Inst = Dep.getInst();
  863. assert(Inst && "Didn't depend on anything?");
  864. ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
  865. ReverseNonLocalPtrDeps[Inst].insert(CacheKey);
  866. return Dep;
  867. }
  868. /// SortNonLocalDepInfoCache - Sort the NonLocalDepInfo cache, given a certain
  869. /// number of elements in the array that are already properly ordered. This is
  870. /// optimized for the case when only a few entries are added.
  871. static void
  872. SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
  873. unsigned NumSortedEntries) {
  874. switch (Cache.size() - NumSortedEntries) {
  875. case 0:
  876. // done, no new entries.
  877. break;
  878. case 2: {
  879. // Two new entries, insert the last one into place.
  880. NonLocalDepEntry Val = Cache.back();
  881. Cache.pop_back();
  882. MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry =
  883. std::upper_bound(Cache.begin(), Cache.end()-1, Val);
  884. Cache.insert(Entry, Val);
  885. // FALL THROUGH.
  886. }
  887. case 1:
  888. // One new entry, Just insert the new value at the appropriate position.
  889. if (Cache.size() != 1) {
  890. NonLocalDepEntry Val = Cache.back();
  891. Cache.pop_back();
  892. MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry =
  893. std::upper_bound(Cache.begin(), Cache.end(), Val);
  894. Cache.insert(Entry, Val);
  895. }
  896. break;
  897. default:
  898. // Added many values, do a full scale sort.
  899. std::sort(Cache.begin(), Cache.end());
  900. break;
  901. }
  902. }
  903. /// getNonLocalPointerDepFromBB - Perform a dependency query based on
  904. /// pointer/pointeesize starting at the end of StartBB. Add any clobber/def
  905. /// results to the results vector and keep track of which blocks are visited in
  906. /// 'Visited'.
  907. ///
  908. /// This has special behavior for the first block queries (when SkipFirstBlock
  909. /// is true). In this special case, it ignores the contents of the specified
  910. /// block and starts returning dependence info for its predecessors.
  911. ///
  912. /// This function returns false on success, or true to indicate that it could
  913. /// not compute dependence information for some reason. This should be treated
  914. /// as a clobber dependence on the first instruction in the predecessor block.
  915. bool MemoryDependenceAnalysis::getNonLocalPointerDepFromBB(
  916. Instruction *QueryInst, const PHITransAddr &Pointer,
  917. const MemoryLocation &Loc, bool isLoad, BasicBlock *StartBB,
  918. SmallVectorImpl<NonLocalDepResult> &Result,
  919. DenseMap<BasicBlock *, Value *> &Visited, bool SkipFirstBlock) {
  920. // Look up the cached info for Pointer.
  921. ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
  922. // Set up a temporary NLPI value. If the map doesn't yet have an entry for
  923. // CacheKey, this value will be inserted as the associated value. Otherwise,
  924. // it'll be ignored, and we'll have to check to see if the cached size and
  925. // aa tags are consistent with the current query.
  926. NonLocalPointerInfo InitialNLPI;
  927. InitialNLPI.Size = Loc.Size;
  928. InitialNLPI.AATags = Loc.AATags;
  929. // Get the NLPI for CacheKey, inserting one into the map if it doesn't
  930. // already have one.
  931. std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
  932. NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
  933. NonLocalPointerInfo *CacheInfo = &Pair.first->second;
  934. // If we already have a cache entry for this CacheKey, we may need to do some
  935. // work to reconcile the cache entry and the current query.
  936. if (!Pair.second) {
  937. if (CacheInfo->Size < Loc.Size) {
  938. // The query's Size is greater than the cached one. Throw out the
  939. // cached data and proceed with the query at the greater size.
  940. CacheInfo->Pair = BBSkipFirstBlockPair();
  941. CacheInfo->Size = Loc.Size;
  942. for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(),
  943. DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI)
  944. if (Instruction *Inst = DI->getResult().getInst())
  945. RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
  946. CacheInfo->NonLocalDeps.clear();
  947. } else if (CacheInfo->Size > Loc.Size) {
  948. // This query's Size is less than the cached one. Conservatively restart
  949. // the query using the greater size.
  950. return getNonLocalPointerDepFromBB(QueryInst, Pointer,
  951. Loc.getWithNewSize(CacheInfo->Size),
  952. isLoad, StartBB, Result, Visited,
  953. SkipFirstBlock);
  954. }
  955. // If the query's AATags are inconsistent with the cached one,
  956. // conservatively throw out the cached data and restart the query with
  957. // no tag if needed.
  958. if (CacheInfo->AATags != Loc.AATags) {
  959. if (CacheInfo->AATags) {
  960. CacheInfo->Pair = BBSkipFirstBlockPair();
  961. CacheInfo->AATags = AAMDNodes();
  962. for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(),
  963. DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI)
  964. if (Instruction *Inst = DI->getResult().getInst())
  965. RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
  966. CacheInfo->NonLocalDeps.clear();
  967. }
  968. if (Loc.AATags)
  969. return getNonLocalPointerDepFromBB(QueryInst,
  970. Pointer, Loc.getWithoutAATags(),
  971. isLoad, StartBB, Result, Visited,
  972. SkipFirstBlock);
  973. }
  974. }
  975. NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps;
  976. // If we have valid cached information for exactly the block we are
  977. // investigating, just return it with no recomputation.
  978. if (CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
  979. // We have a fully cached result for this query then we can just return the
  980. // cached results and populate the visited set. However, we have to verify
  981. // that we don't already have conflicting results for these blocks. Check
  982. // to ensure that if a block in the results set is in the visited set that
  983. // it was for the same pointer query.
  984. if (!Visited.empty()) {
  985. for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
  986. I != E; ++I) {
  987. DenseMap<BasicBlock*, Value*>::iterator VI = Visited.find(I->getBB());
  988. if (VI == Visited.end() || VI->second == Pointer.getAddr())
  989. continue;
  990. // We have a pointer mismatch in a block. Just return clobber, saying
  991. // that something was clobbered in this result. We could also do a
  992. // non-fully cached query, but there is little point in doing this.
  993. return true;
  994. }
  995. }
  996. Value *Addr = Pointer.getAddr();
  997. for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
  998. I != E; ++I) {
  999. Visited.insert(std::make_pair(I->getBB(), Addr));
  1000. if (I->getResult().isNonLocal()) {
  1001. continue;
  1002. }
  1003. if (!DT) {
  1004. Result.push_back(NonLocalDepResult(I->getBB(),
  1005. MemDepResult::getUnknown(),
  1006. Addr));
  1007. } else if (DT->isReachableFromEntry(I->getBB())) {
  1008. Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(), Addr));
  1009. }
  1010. }
  1011. ++NumCacheCompleteNonLocalPtr;
  1012. return false;
  1013. }
  1014. // Otherwise, either this is a new block, a block with an invalid cache
  1015. // pointer or one that we're about to invalidate by putting more info into it
  1016. // than its valid cache info. If empty, the result will be valid cache info,
  1017. // otherwise it isn't.
  1018. if (Cache->empty())
  1019. CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
  1020. else
  1021. CacheInfo->Pair = BBSkipFirstBlockPair();
  1022. SmallVector<BasicBlock*, 32> Worklist;
  1023. Worklist.push_back(StartBB);
  1024. // PredList used inside loop.
  1025. SmallVector<std::pair<BasicBlock*, PHITransAddr>, 16> PredList;
  1026. // Keep track of the entries that we know are sorted. Previously cached
  1027. // entries will all be sorted. The entries we add we only sort on demand (we
  1028. // don't insert every element into its sorted position). We know that we
  1029. // won't get any reuse from currently inserted values, because we don't
  1030. // revisit blocks after we insert info for them.
  1031. unsigned NumSortedEntries = Cache->size();
  1032. DEBUG(AssertSorted(*Cache));
  1033. while (!Worklist.empty()) {
  1034. BasicBlock *BB = Worklist.pop_back_val();
  1035. // If we do process a large number of blocks it becomes very expensive and
  1036. // likely it isn't worth worrying about
  1037. if (Result.size() > NumResultsLimit) {
  1038. Worklist.clear();
  1039. // Sort it now (if needed) so that recursive invocations of
  1040. // getNonLocalPointerDepFromBB and other routines that could reuse the
  1041. // cache value will only see properly sorted cache arrays.
  1042. if (Cache && NumSortedEntries != Cache->size()) {
  1043. SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
  1044. }
  1045. // Since we bail out, the "Cache" set won't contain all of the
  1046. // results for the query. This is ok (we can still use it to accelerate
  1047. // specific block queries) but we can't do the fastpath "return all
  1048. // results from the set". Clear out the indicator for this.
  1049. CacheInfo->Pair = BBSkipFirstBlockPair();
  1050. return true;
  1051. }
  1052. // Skip the first block if we have it.
  1053. if (!SkipFirstBlock) {
  1054. // Analyze the dependency of *Pointer in FromBB. See if we already have
  1055. // been here.
  1056. assert(Visited.count(BB) && "Should check 'visited' before adding to WL");
  1057. // Get the dependency info for Pointer in BB. If we have cached
  1058. // information, we will use it, otherwise we compute it.
  1059. DEBUG(AssertSorted(*Cache, NumSortedEntries));
  1060. MemDepResult Dep = GetNonLocalInfoForBlock(QueryInst,
  1061. Loc, isLoad, BB, Cache,
  1062. NumSortedEntries);
  1063. // If we got a Def or Clobber, add this to the list of results.
  1064. if (!Dep.isNonLocal()) {
  1065. if (!DT) {
  1066. Result.push_back(NonLocalDepResult(BB,
  1067. MemDepResult::getUnknown(),
  1068. Pointer.getAddr()));
  1069. continue;
  1070. } else if (DT->isReachableFromEntry(BB)) {
  1071. Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
  1072. continue;
  1073. }
  1074. }
  1075. }
  1076. // If 'Pointer' is an instruction defined in this block, then we need to do
  1077. // phi translation to change it into a value live in the predecessor block.
  1078. // If not, we just add the predecessors to the worklist and scan them with
  1079. // the same Pointer.
  1080. if (!Pointer.NeedsPHITranslationFromBlock(BB)) {
  1081. SkipFirstBlock = false;
  1082. SmallVector<BasicBlock*, 16> NewBlocks;
  1083. for (BasicBlock *Pred : PredCache.get(BB)) {
  1084. // Verify that we haven't looked at this block yet.
  1085. std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
  1086. InsertRes = Visited.insert(std::make_pair(Pred, Pointer.getAddr()));
  1087. if (InsertRes.second) {
  1088. // First time we've looked at *PI.
  1089. NewBlocks.push_back(Pred);
  1090. continue;
  1091. }
  1092. // If we have seen this block before, but it was with a different
  1093. // pointer then we have a phi translation failure and we have to treat
  1094. // this as a clobber.
  1095. if (InsertRes.first->second != Pointer.getAddr()) {
  1096. // Make sure to clean up the Visited map before continuing on to
  1097. // PredTranslationFailure.
  1098. for (unsigned i = 0; i < NewBlocks.size(); i++)
  1099. Visited.erase(NewBlocks[i]);
  1100. goto PredTranslationFailure;
  1101. }
  1102. }
  1103. Worklist.append(NewBlocks.begin(), NewBlocks.end());
  1104. continue;
  1105. }
  1106. // We do need to do phi translation, if we know ahead of time we can't phi
  1107. // translate this value, don't even try.
  1108. if (!Pointer.IsPotentiallyPHITranslatable())
  1109. goto PredTranslationFailure;
  1110. // We may have added values to the cache list before this PHI translation.
  1111. // If so, we haven't done anything to ensure that the cache remains sorted.
  1112. // Sort it now (if needed) so that recursive invocations of
  1113. // getNonLocalPointerDepFromBB and other routines that could reuse the cache
  1114. // value will only see properly sorted cache arrays.
  1115. if (Cache && NumSortedEntries != Cache->size()) {
  1116. SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
  1117. NumSortedEntries = Cache->size();
  1118. }
  1119. Cache = nullptr;
  1120. PredList.clear();
  1121. for (BasicBlock *Pred : PredCache.get(BB)) {
  1122. PredList.push_back(std::make_pair(Pred, Pointer));
  1123. // Get the PHI translated pointer in this predecessor. This can fail if
  1124. // not translatable, in which case the getAddr() returns null.
  1125. PHITransAddr &PredPointer = PredList.back().second;
  1126. PredPointer.PHITranslateValue(BB, Pred, DT, /*MustDominate=*/false);
  1127. Value *PredPtrVal = PredPointer.getAddr();
  1128. // Check to see if we have already visited this pred block with another
  1129. // pointer. If so, we can't do this lookup. This failure can occur
  1130. // with PHI translation when a critical edge exists and the PHI node in
  1131. // the successor translates to a pointer value different than the
  1132. // pointer the block was first analyzed with.
  1133. std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
  1134. InsertRes = Visited.insert(std::make_pair(Pred, PredPtrVal));
  1135. if (!InsertRes.second) {
  1136. // We found the pred; take it off the list of preds to visit.
  1137. PredList.pop_back();
  1138. // If the predecessor was visited with PredPtr, then we already did
  1139. // the analysis and can ignore it.
  1140. if (InsertRes.first->second == PredPtrVal)
  1141. continue;
  1142. // Otherwise, the block was previously analyzed with a different
  1143. // pointer. We can't represent the result of this case, so we just
  1144. // treat this as a phi translation failure.
  1145. // Make sure to clean up the Visited map before continuing on to
  1146. // PredTranslationFailure.
  1147. for (unsigned i = 0, n = PredList.size(); i < n; ++i)
  1148. Visited.erase(PredList[i].first);
  1149. goto PredTranslationFailure;
  1150. }
  1151. }
  1152. // Actually process results here; this need to be a separate loop to avoid
  1153. // calling getNonLocalPointerDepFromBB for blocks we don't want to return
  1154. // any results for. (getNonLocalPointerDepFromBB will modify our
  1155. // datastructures in ways the code after the PredTranslationFailure label
  1156. // doesn't expect.)
  1157. for (unsigned i = 0, n = PredList.size(); i < n; ++i) {
  1158. BasicBlock *Pred = PredList[i].first;
  1159. PHITransAddr &PredPointer = PredList[i].second;
  1160. Value *PredPtrVal = PredPointer.getAddr();
  1161. bool CanTranslate = true;
  1162. // If PHI translation was unable to find an available pointer in this
  1163. // predecessor, then we have to assume that the pointer is clobbered in
  1164. // that predecessor. We can still do PRE of the load, which would insert
  1165. // a computation of the pointer in this predecessor.
  1166. if (!PredPtrVal)
  1167. CanTranslate = false;
  1168. // FIXME: it is entirely possible that PHI translating will end up with
  1169. // the same value. Consider PHI translating something like:
  1170. // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need*
  1171. // to recurse here, pedantically speaking.
  1172. // If getNonLocalPointerDepFromBB fails here, that means the cached
  1173. // result conflicted with the Visited list; we have to conservatively
  1174. // assume it is unknown, but this also does not block PRE of the load.
  1175. if (!CanTranslate ||
  1176. getNonLocalPointerDepFromBB(QueryInst, PredPointer,
  1177. Loc.getWithNewPtr(PredPtrVal),
  1178. isLoad, Pred,
  1179. Result, Visited)) {
  1180. // Add the entry to the Result list.
  1181. NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal);
  1182. Result.push_back(Entry);
  1183. // Since we had a phi translation failure, the cache for CacheKey won't
  1184. // include all of the entries that we need to immediately satisfy future
  1185. // queries. Mark this in NonLocalPointerDeps by setting the
  1186. // BBSkipFirstBlockPair pointer to null. This requires reuse of the
  1187. // cached value to do more work but not miss the phi trans failure.
  1188. NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey];
  1189. NLPI.Pair = BBSkipFirstBlockPair();
  1190. continue;
  1191. }
  1192. }
  1193. // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
  1194. CacheInfo = &NonLocalPointerDeps[CacheKey];
  1195. Cache = &CacheInfo->NonLocalDeps;
  1196. NumSortedEntries = Cache->size();
  1197. // Since we did phi translation, the "Cache" set won't contain all of the
  1198. // results for the query. This is ok (we can still use it to accelerate
  1199. // specific block queries) but we can't do the fastpath "return all
  1200. // results from the set" Clear out the indicator for this.
  1201. CacheInfo->Pair = BBSkipFirstBlockPair();
  1202. SkipFirstBlock = false;
  1203. continue;
  1204. PredTranslationFailure:
  1205. // The following code is "failure"; we can't produce a sane translation
  1206. // for the given block. It assumes that we haven't modified any of
  1207. // our datastructures while processing the current block.
  1208. if (!Cache) {
  1209. // Refresh the CacheInfo/Cache pointer if it got invalidated.
  1210. CacheInfo = &NonLocalPointerDeps[CacheKey];
  1211. Cache = &CacheInfo->NonLocalDeps;
  1212. NumSortedEntries = Cache->size();
  1213. }
  1214. // Since we failed phi translation, the "Cache" set won't contain all of the
  1215. // results for the query. This is ok (we can still use it to accelerate
  1216. // specific block queries) but we can't do the fastpath "return all
  1217. // results from the set". Clear out the indicator for this.
  1218. CacheInfo->Pair = BBSkipFirstBlockPair();
  1219. // If *nothing* works, mark the pointer as unknown.
  1220. //
  1221. // If this is the magic first block, return this as a clobber of the whole
  1222. // incoming value. Since we can't phi translate to one of the predecessors,
  1223. // we have to bail out.
  1224. if (SkipFirstBlock)
  1225. return true;
  1226. for (NonLocalDepInfo::reverse_iterator I = Cache->rbegin(); ; ++I) {
  1227. assert(I != Cache->rend() && "Didn't find current block??");
  1228. if (I->getBB() != BB)
  1229. continue;
  1230. assert((I->getResult().isNonLocal() || !DT->isReachableFromEntry(BB)) &&
  1231. "Should only be here with transparent block");
  1232. I->setResult(MemDepResult::getUnknown());
  1233. Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(),
  1234. Pointer.getAddr()));
  1235. break;
  1236. }
  1237. }
  1238. // Okay, we're done now. If we added new values to the cache, re-sort it.
  1239. SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
  1240. DEBUG(AssertSorted(*Cache));
  1241. return false;
  1242. }
  1243. /// RemoveCachedNonLocalPointerDependencies - If P exists in
  1244. /// CachedNonLocalPointerInfo, remove it.
  1245. void MemoryDependenceAnalysis::
  1246. RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) {
  1247. CachedNonLocalPointerInfo::iterator It =
  1248. NonLocalPointerDeps.find(P);
  1249. if (It == NonLocalPointerDeps.end()) return;
  1250. // Remove all of the entries in the BB->val map. This involves removing
  1251. // instructions from the reverse map.
  1252. NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
  1253. for (unsigned i = 0, e = PInfo.size(); i != e; ++i) {
  1254. Instruction *Target = PInfo[i].getResult().getInst();
  1255. if (!Target) continue; // Ignore non-local dep results.
  1256. assert(Target->getParent() == PInfo[i].getBB());
  1257. // Eliminating the dirty entry from 'Cache', so update the reverse info.
  1258. RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
  1259. }
  1260. // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
  1261. NonLocalPointerDeps.erase(It);
  1262. }
  1263. /// invalidateCachedPointerInfo - This method is used to invalidate cached
  1264. /// information about the specified pointer, because it may be too
  1265. /// conservative in memdep. This is an optional call that can be used when
  1266. /// the client detects an equivalence between the pointer and some other
  1267. /// value and replaces the other value with ptr. This can make Ptr available
  1268. /// in more places that cached info does not necessarily keep.
  1269. void MemoryDependenceAnalysis::invalidateCachedPointerInfo(Value *Ptr) {
  1270. // If Ptr isn't really a pointer, just ignore it.
  1271. if (!Ptr->getType()->isPointerTy()) return;
  1272. // Flush store info for the pointer.
  1273. RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
  1274. // Flush load info for the pointer.
  1275. RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
  1276. }
  1277. /// invalidateCachedPredecessors - Clear the PredIteratorCache info.
  1278. /// This needs to be done when the CFG changes, e.g., due to splitting
  1279. /// critical edges.
  1280. void MemoryDependenceAnalysis::invalidateCachedPredecessors() {
  1281. PredCache.clear();
  1282. }
  1283. /// removeInstruction - Remove an instruction from the dependence analysis,
  1284. /// updating the dependence of instructions that previously depended on it.
  1285. /// This method attempts to keep the cache coherent using the reverse map.
  1286. void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
  1287. // Walk through the Non-local dependencies, removing this one as the value
  1288. // for any cached queries.
  1289. NonLocalDepMapType::iterator NLDI = NonLocalDeps.find(RemInst);
  1290. if (NLDI != NonLocalDeps.end()) {
  1291. NonLocalDepInfo &BlockMap = NLDI->second.first;
  1292. for (NonLocalDepInfo::iterator DI = BlockMap.begin(), DE = BlockMap.end();
  1293. DI != DE; ++DI)
  1294. if (Instruction *Inst = DI->getResult().getInst())
  1295. RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst);
  1296. NonLocalDeps.erase(NLDI);
  1297. }
  1298. // If we have a cached local dependence query for this instruction, remove it.
  1299. //
  1300. LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst);
  1301. if (LocalDepEntry != LocalDeps.end()) {
  1302. // Remove us from DepInst's reverse set now that the local dep info is gone.
  1303. if (Instruction *Inst = LocalDepEntry->second.getInst())
  1304. RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst);
  1305. // Remove this local dependency info.
  1306. LocalDeps.erase(LocalDepEntry);
  1307. }
  1308. // If we have any cached pointer dependencies on this instruction, remove
  1309. // them. If the instruction has non-pointer type, then it can't be a pointer
  1310. // base.
  1311. // Remove it from both the load info and the store info. The instruction
  1312. // can't be in either of these maps if it is non-pointer.
  1313. if (RemInst->getType()->isPointerTy()) {
  1314. RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
  1315. RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
  1316. }
  1317. // Loop over all of the things that depend on the instruction we're removing.
  1318. //
  1319. SmallVector<std::pair<Instruction*, Instruction*>, 8> ReverseDepsToAdd;
  1320. // If we find RemInst as a clobber or Def in any of the maps for other values,
  1321. // we need to replace its entry with a dirty version of the instruction after
  1322. // it. If RemInst is a terminator, we use a null dirty value.
  1323. //
  1324. // Using a dirty version of the instruction after RemInst saves having to scan
  1325. // the entire block to get to this point.
  1326. MemDepResult NewDirtyVal;
  1327. if (!RemInst->isTerminator())
  1328. NewDirtyVal = MemDepResult::getDirty(++BasicBlock::iterator(RemInst));
  1329. ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
  1330. if (ReverseDepIt != ReverseLocalDeps.end()) {
  1331. // RemInst can't be the terminator if it has local stuff depending on it.
  1332. assert(!ReverseDepIt->second.empty() && !isa<TerminatorInst>(RemInst) &&
  1333. "Nothing can locally depend on a terminator");
  1334. for (Instruction *InstDependingOnRemInst : ReverseDepIt->second) {
  1335. assert(InstDependingOnRemInst != RemInst &&
  1336. "Already removed our local dep info");
  1337. LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
  1338. // Make sure to remember that new things depend on NewDepInst.
  1339. assert(NewDirtyVal.getInst() && "There is no way something else can have "
  1340. "a local dep on this if it is a terminator!");
  1341. ReverseDepsToAdd.push_back(std::make_pair(NewDirtyVal.getInst(),
  1342. InstDependingOnRemInst));
  1343. }
  1344. ReverseLocalDeps.erase(ReverseDepIt);
  1345. // Add new reverse deps after scanning the set, to avoid invalidating the
  1346. // 'ReverseDeps' reference.
  1347. while (!ReverseDepsToAdd.empty()) {
  1348. ReverseLocalDeps[ReverseDepsToAdd.back().first]
  1349. .insert(ReverseDepsToAdd.back().second);
  1350. ReverseDepsToAdd.pop_back();
  1351. }
  1352. }
  1353. ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
  1354. if (ReverseDepIt != ReverseNonLocalDeps.end()) {
  1355. for (Instruction *I : ReverseDepIt->second) {
  1356. assert(I != RemInst && "Already removed NonLocalDep info for RemInst");
  1357. PerInstNLInfo &INLD = NonLocalDeps[I];
  1358. // The information is now dirty!
  1359. INLD.second = true;
  1360. for (NonLocalDepInfo::iterator DI = INLD.first.begin(),
  1361. DE = INLD.first.end(); DI != DE; ++DI) {
  1362. if (DI->getResult().getInst() != RemInst) continue;
  1363. // Convert to a dirty entry for the subsequent instruction.
  1364. DI->setResult(NewDirtyVal);
  1365. if (Instruction *NextI = NewDirtyVal.getInst())
  1366. ReverseDepsToAdd.push_back(std::make_pair(NextI, I));
  1367. }
  1368. }
  1369. ReverseNonLocalDeps.erase(ReverseDepIt);
  1370. // Add new reverse deps after scanning the set, to avoid invalidating 'Set'
  1371. while (!ReverseDepsToAdd.empty()) {
  1372. ReverseNonLocalDeps[ReverseDepsToAdd.back().first]
  1373. .insert(ReverseDepsToAdd.back().second);
  1374. ReverseDepsToAdd.pop_back();
  1375. }
  1376. }
  1377. // If the instruction is in ReverseNonLocalPtrDeps then it appears as a
  1378. // value in the NonLocalPointerDeps info.
  1379. ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
  1380. ReverseNonLocalPtrDeps.find(RemInst);
  1381. if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
  1382. SmallVector<std::pair<Instruction*, ValueIsLoadPair>,8> ReversePtrDepsToAdd;
  1383. for (ValueIsLoadPair P : ReversePtrDepIt->second) {
  1384. assert(P.getPointer() != RemInst &&
  1385. "Already removed NonLocalPointerDeps info for RemInst");
  1386. NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps;
  1387. // The cache is not valid for any specific block anymore.
  1388. NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair();
  1389. // Update any entries for RemInst to use the instruction after it.
  1390. for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end();
  1391. DI != DE; ++DI) {
  1392. if (DI->getResult().getInst() != RemInst) continue;
  1393. // Convert to a dirty entry for the subsequent instruction.
  1394. DI->setResult(NewDirtyVal);
  1395. if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
  1396. ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
  1397. }
  1398. // Re-sort the NonLocalDepInfo. Changing the dirty entry to its
  1399. // subsequent value may invalidate the sortedness.
  1400. std::sort(NLPDI.begin(), NLPDI.end());
  1401. }
  1402. ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
  1403. while (!ReversePtrDepsToAdd.empty()) {
  1404. ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first]
  1405. .insert(ReversePtrDepsToAdd.back().second);
  1406. ReversePtrDepsToAdd.pop_back();
  1407. }
  1408. }
  1409. assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?");
  1410. AA->deleteValue(RemInst);
  1411. DEBUG(verifyRemoved(RemInst));
  1412. }
  1413. /// verifyRemoved - Verify that the specified instruction does not occur
  1414. /// in our internal data structures. This function verifies by asserting in
  1415. /// debug builds.
  1416. void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
  1417. #ifndef NDEBUG
  1418. for (LocalDepMapType::const_iterator I = LocalDeps.begin(),
  1419. E = LocalDeps.end(); I != E; ++I) {
  1420. assert(I->first != D && "Inst occurs in data structures");
  1421. assert(I->second.getInst() != D &&
  1422. "Inst occurs in data structures");
  1423. }
  1424. for (CachedNonLocalPointerInfo::const_iterator I =NonLocalPointerDeps.begin(),
  1425. E = NonLocalPointerDeps.end(); I != E; ++I) {
  1426. assert(I->first.getPointer() != D && "Inst occurs in NLPD map key");
  1427. const NonLocalDepInfo &Val = I->second.NonLocalDeps;
  1428. for (NonLocalDepInfo::const_iterator II = Val.begin(), E = Val.end();
  1429. II != E; ++II)
  1430. assert(II->getResult().getInst() != D && "Inst occurs as NLPD value");
  1431. }
  1432. for (NonLocalDepMapType::const_iterator I = NonLocalDeps.begin(),
  1433. E = NonLocalDeps.end(); I != E; ++I) {
  1434. assert(I->first != D && "Inst occurs in data structures");
  1435. const PerInstNLInfo &INLD = I->second;
  1436. for (NonLocalDepInfo::const_iterator II = INLD.first.begin(),
  1437. EE = INLD.first.end(); II != EE; ++II)
  1438. assert(II->getResult().getInst() != D && "Inst occurs in data structures");
  1439. }
  1440. for (ReverseDepMapType::const_iterator I = ReverseLocalDeps.begin(),
  1441. E = ReverseLocalDeps.end(); I != E; ++I) {
  1442. assert(I->first != D && "Inst occurs in data structures");
  1443. for (Instruction *Inst : I->second)
  1444. assert(Inst != D && "Inst occurs in data structures");
  1445. }
  1446. for (ReverseDepMapType::const_iterator I = ReverseNonLocalDeps.begin(),
  1447. E = ReverseNonLocalDeps.end();
  1448. I != E; ++I) {
  1449. assert(I->first != D && "Inst occurs in data structures");
  1450. for (Instruction *Inst : I->second)
  1451. assert(Inst != D && "Inst occurs in data structures");
  1452. }
  1453. for (ReverseNonLocalPtrDepTy::const_iterator
  1454. I = ReverseNonLocalPtrDeps.begin(),
  1455. E = ReverseNonLocalPtrDeps.end(); I != E; ++I) {
  1456. assert(I->first != D && "Inst occurs in rev NLPD map");
  1457. for (ValueIsLoadPair P : I->second)
  1458. assert(P != ValueIsLoadPair(D, false) &&
  1459. P != ValueIsLoadPair(D, true) &&
  1460. "Inst occurs in ReverseNonLocalPtrDeps map");
  1461. }
  1462. #endif
  1463. }