MemCpyOptimizer.cpp 45 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205
  1. //===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This pass performs various transformations related to eliminating memcpy
  11. // calls, or transforming sets of stores into memset's.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "llvm/Transforms/Scalar.h"
  15. #include "llvm/ADT/SmallVector.h"
  16. #include "llvm/ADT/Statistic.h"
  17. #include "llvm/Analysis/AliasAnalysis.h"
  18. #include "llvm/Analysis/AssumptionCache.h"
  19. #include "llvm/Analysis/MemoryDependenceAnalysis.h"
  20. #include "llvm/Analysis/TargetLibraryInfo.h"
  21. #include "llvm/Analysis/ValueTracking.h"
  22. #include "llvm/IR/DataLayout.h"
  23. #include "llvm/IR/Dominators.h"
  24. #include "llvm/IR/GetElementPtrTypeIterator.h"
  25. #include "llvm/IR/GlobalVariable.h"
  26. #include "llvm/IR/IRBuilder.h"
  27. #include "llvm/IR/Instructions.h"
  28. #include "llvm/IR/IntrinsicInst.h"
  29. #include "llvm/Support/Debug.h"
  30. #include "llvm/Support/raw_ostream.h"
  31. #include "llvm/Transforms/Utils/Local.h"
  32. #include <list>
  33. using namespace llvm;
  34. #define DEBUG_TYPE "memcpyopt"
  35. STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
  36. STATISTIC(NumMemSetInfer, "Number of memsets inferred");
  37. STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
  38. STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
  39. static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx,
  40. bool &VariableIdxFound,
  41. const DataLayout &DL) {
  42. // Skip over the first indices.
  43. gep_type_iterator GTI = gep_type_begin(GEP);
  44. for (unsigned i = 1; i != Idx; ++i, ++GTI)
  45. /*skip along*/;
  46. // Compute the offset implied by the rest of the indices.
  47. int64_t Offset = 0;
  48. for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
  49. ConstantInt *OpC = dyn_cast<ConstantInt>(GEP->getOperand(i));
  50. if (!OpC)
  51. return VariableIdxFound = true;
  52. if (OpC->isZero()) continue; // No offset.
  53. // Handle struct indices, which add their field offset to the pointer.
  54. if (StructType *STy = dyn_cast<StructType>(*GTI)) {
  55. Offset += DL.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
  56. continue;
  57. }
  58. // Otherwise, we have a sequential type like an array or vector. Multiply
  59. // the index by the ElementSize.
  60. uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType());
  61. Offset += Size*OpC->getSExtValue();
  62. }
  63. return Offset;
  64. }
  65. /// IsPointerOffset - Return true if Ptr1 is provably equal to Ptr2 plus a
  66. /// constant offset, and return that constant offset. For example, Ptr1 might
  67. /// be &A[42], and Ptr2 might be &A[40]. In this case offset would be -8.
  68. static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
  69. const DataLayout &DL) {
  70. Ptr1 = Ptr1->stripPointerCasts();
  71. Ptr2 = Ptr2->stripPointerCasts();
  72. // Handle the trivial case first.
  73. if (Ptr1 == Ptr2) {
  74. Offset = 0;
  75. return true;
  76. }
  77. GEPOperator *GEP1 = dyn_cast<GEPOperator>(Ptr1);
  78. GEPOperator *GEP2 = dyn_cast<GEPOperator>(Ptr2);
  79. bool VariableIdxFound = false;
  80. // If one pointer is a GEP and the other isn't, then see if the GEP is a
  81. // constant offset from the base, as in "P" and "gep P, 1".
  82. if (GEP1 && !GEP2 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) {
  83. Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, DL);
  84. return !VariableIdxFound;
  85. }
  86. if (GEP2 && !GEP1 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) {
  87. Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, DL);
  88. return !VariableIdxFound;
  89. }
  90. // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical
  91. // base. After that base, they may have some number of common (and
  92. // potentially variable) indices. After that they handle some constant
  93. // offset, which determines their offset from each other. At this point, we
  94. // handle no other case.
  95. if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0))
  96. return false;
  97. // Skip any common indices and track the GEP types.
  98. unsigned Idx = 1;
  99. for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx)
  100. if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx))
  101. break;
  102. int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, DL);
  103. int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, DL);
  104. if (VariableIdxFound) return false;
  105. Offset = Offset2-Offset1;
  106. return true;
  107. }
  108. /// MemsetRange - Represents a range of memset'd bytes with the ByteVal value.
  109. /// This allows us to analyze stores like:
  110. /// store 0 -> P+1
  111. /// store 0 -> P+0
  112. /// store 0 -> P+3
  113. /// store 0 -> P+2
  114. /// which sometimes happens with stores to arrays of structs etc. When we see
  115. /// the first store, we make a range [1, 2). The second store extends the range
  116. /// to [0, 2). The third makes a new range [2, 3). The fourth store joins the
  117. /// two ranges into [0, 3) which is memset'able.
  118. namespace {
  119. struct MemsetRange {
  120. // Start/End - A semi range that describes the span that this range covers.
  121. // The range is closed at the start and open at the end: [Start, End).
  122. int64_t Start, End;
  123. /// StartPtr - The getelementptr instruction that points to the start of the
  124. /// range.
  125. Value *StartPtr;
  126. /// Alignment - The known alignment of the first store.
  127. unsigned Alignment;
  128. /// TheStores - The actual stores that make up this range.
  129. SmallVector<Instruction*, 16> TheStores;
  130. bool isProfitableToUseMemset(const DataLayout &DL) const;
  131. };
  132. } // end anon namespace
  133. bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
  134. // If we found more than 4 stores to merge or 16 bytes, use memset.
  135. if (TheStores.size() >= 4 || End-Start >= 16) return true;
  136. // If there is nothing to merge, don't do anything.
  137. if (TheStores.size() < 2) return false;
  138. // If any of the stores are a memset, then it is always good to extend the
  139. // memset.
  140. for (unsigned i = 0, e = TheStores.size(); i != e; ++i)
  141. if (!isa<StoreInst>(TheStores[i]))
  142. return true;
  143. // Assume that the code generator is capable of merging pairs of stores
  144. // together if it wants to.
  145. if (TheStores.size() == 2) return false;
  146. // If we have fewer than 8 stores, it can still be worthwhile to do this.
  147. // For example, merging 4 i8 stores into an i32 store is useful almost always.
  148. // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
  149. // memset will be split into 2 32-bit stores anyway) and doing so can
  150. // pessimize the llvm optimizer.
  151. //
  152. // Since we don't have perfect knowledge here, make some assumptions: assume
  153. // the maximum GPR width is the same size as the largest legal integer
  154. // size. If so, check to see whether we will end up actually reducing the
  155. // number of stores used.
  156. unsigned Bytes = unsigned(End-Start);
  157. unsigned MaxIntSize = DL.getLargestLegalIntTypeSize();
  158. if (MaxIntSize == 0)
  159. MaxIntSize = 1;
  160. unsigned NumPointerStores = Bytes / MaxIntSize;
  161. // Assume the remaining bytes if any are done a byte at a time.
  162. unsigned NumByteStores = Bytes - NumPointerStores * MaxIntSize;
  163. // If we will reduce the # stores (according to this heuristic), do the
  164. // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
  165. // etc.
  166. return TheStores.size() > NumPointerStores+NumByteStores;
  167. }
  168. namespace {
  169. class MemsetRanges {
  170. /// Ranges - A sorted list of the memset ranges. We use std::list here
  171. /// because each element is relatively large and expensive to copy.
  172. std::list<MemsetRange> Ranges;
  173. typedef std::list<MemsetRange>::iterator range_iterator;
  174. const DataLayout &DL;
  175. public:
  176. MemsetRanges(const DataLayout &DL) : DL(DL) {}
  177. typedef std::list<MemsetRange>::const_iterator const_iterator;
  178. const_iterator begin() const { return Ranges.begin(); }
  179. const_iterator end() const { return Ranges.end(); }
  180. bool empty() const { return Ranges.empty(); }
  181. void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
  182. if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
  183. addStore(OffsetFromFirst, SI);
  184. else
  185. addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
  186. }
  187. void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
  188. int64_t StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
  189. addRange(OffsetFromFirst, StoreSize,
  190. SI->getPointerOperand(), SI->getAlignment(), SI);
  191. }
  192. void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
  193. int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
  194. addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getAlignment(), MSI);
  195. }
  196. void addRange(int64_t Start, int64_t Size, Value *Ptr,
  197. unsigned Alignment, Instruction *Inst);
  198. };
  199. } // end anon namespace
  200. /// addRange - Add a new store to the MemsetRanges data structure. This adds a
  201. /// new range for the specified store at the specified offset, merging into
  202. /// existing ranges as appropriate.
  203. ///
  204. /// Do a linear search of the ranges to see if this can be joined and/or to
  205. /// find the insertion point in the list. We keep the ranges sorted for
  206. /// simplicity here. This is a linear search of a linked list, which is ugly,
  207. /// however the number of ranges is limited, so this won't get crazy slow.
  208. void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
  209. unsigned Alignment, Instruction *Inst) {
  210. int64_t End = Start+Size;
  211. range_iterator I = Ranges.begin(), E = Ranges.end();
  212. while (I != E && Start > I->End)
  213. ++I;
  214. // We now know that I == E, in which case we didn't find anything to merge
  215. // with, or that Start <= I->End. If End < I->Start or I == E, then we need
  216. // to insert a new range. Handle this now.
  217. if (I == E || End < I->Start) {
  218. MemsetRange &R = *Ranges.insert(I, MemsetRange());
  219. R.Start = Start;
  220. R.End = End;
  221. R.StartPtr = Ptr;
  222. R.Alignment = Alignment;
  223. R.TheStores.push_back(Inst);
  224. return;
  225. }
  226. // This store overlaps with I, add it.
  227. I->TheStores.push_back(Inst);
  228. // At this point, we may have an interval that completely contains our store.
  229. // If so, just add it to the interval and return.
  230. if (I->Start <= Start && I->End >= End)
  231. return;
  232. // Now we know that Start <= I->End and End >= I->Start so the range overlaps
  233. // but is not entirely contained within the range.
  234. // See if the range extends the start of the range. In this case, it couldn't
  235. // possibly cause it to join the prior range, because otherwise we would have
  236. // stopped on *it*.
  237. if (Start < I->Start) {
  238. I->Start = Start;
  239. I->StartPtr = Ptr;
  240. I->Alignment = Alignment;
  241. }
  242. // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
  243. // is in or right at the end of I), and that End >= I->Start. Extend I out to
  244. // End.
  245. if (End > I->End) {
  246. I->End = End;
  247. range_iterator NextI = I;
  248. while (++NextI != E && End >= NextI->Start) {
  249. // Merge the range in.
  250. I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
  251. if (NextI->End > I->End)
  252. I->End = NextI->End;
  253. Ranges.erase(NextI);
  254. NextI = I;
  255. }
  256. }
  257. }
  258. //===----------------------------------------------------------------------===//
  259. // MemCpyOpt Pass
  260. //===----------------------------------------------------------------------===//
  261. namespace {
  262. class MemCpyOpt : public FunctionPass {
  263. MemoryDependenceAnalysis *MD;
  264. TargetLibraryInfo *TLI;
  265. public:
  266. static char ID; // Pass identification, replacement for typeid
  267. MemCpyOpt() : FunctionPass(ID) {
  268. initializeMemCpyOptPass(*PassRegistry::getPassRegistry());
  269. MD = nullptr;
  270. TLI = nullptr;
  271. }
  272. bool runOnFunction(Function &F) override;
  273. private:
  274. // This transformation requires dominator postdominator info
  275. void getAnalysisUsage(AnalysisUsage &AU) const override {
  276. AU.setPreservesCFG();
  277. AU.addRequired<AssumptionCacheTracker>();
  278. AU.addRequired<DominatorTreeWrapperPass>();
  279. AU.addRequired<MemoryDependenceAnalysis>();
  280. AU.addRequired<AliasAnalysis>();
  281. AU.addRequired<TargetLibraryInfoWrapperPass>();
  282. AU.addPreserved<AliasAnalysis>();
  283. AU.addPreserved<MemoryDependenceAnalysis>();
  284. }
  285. // Helper functions
  286. bool processStore(StoreInst *SI, BasicBlock::iterator &BBI);
  287. bool processMemSet(MemSetInst *SI, BasicBlock::iterator &BBI);
  288. bool processMemCpy(MemCpyInst *M);
  289. bool processMemMove(MemMoveInst *M);
  290. bool performCallSlotOptzn(Instruction *cpy, Value *cpyDst, Value *cpySrc,
  291. uint64_t cpyLen, unsigned cpyAlign, CallInst *C);
  292. bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep);
  293. bool processMemSetMemCpyDependence(MemCpyInst *M, MemSetInst *MDep);
  294. bool performMemCpyToMemSetOptzn(MemCpyInst *M, MemSetInst *MDep);
  295. bool processByValArgument(CallSite CS, unsigned ArgNo);
  296. Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr,
  297. Value *ByteVal);
  298. bool iterateOnFunction(Function &F);
  299. };
  300. char MemCpyOpt::ID = 0;
  301. }
  302. // createMemCpyOptPass - The public interface to this file...
  303. FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); }
  304. INITIALIZE_PASS_BEGIN(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
  305. false, false)
  306. INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
  307. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  308. INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
  309. INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
  310. INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
  311. INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
  312. false, false)
  313. /// tryMergingIntoMemset - When scanning forward over instructions, we look for
  314. /// some other patterns to fold away. In particular, this looks for stores to
  315. /// neighboring locations of memory. If it sees enough consecutive ones, it
  316. /// attempts to merge them together into a memcpy/memset.
  317. Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
  318. Value *StartPtr, Value *ByteVal) {
  319. const DataLayout &DL = StartInst->getModule()->getDataLayout();
  320. // Okay, so we now have a single store that can be splatable. Scan to find
  321. // all subsequent stores of the same value to offset from the same pointer.
  322. // Join these together into ranges, so we can decide whether contiguous blocks
  323. // are stored.
  324. MemsetRanges Ranges(DL);
  325. BasicBlock::iterator BI = StartInst;
  326. for (++BI; !isa<TerminatorInst>(BI); ++BI) {
  327. if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
  328. // If the instruction is readnone, ignore it, otherwise bail out. We
  329. // don't even allow readonly here because we don't want something like:
  330. // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
  331. if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
  332. break;
  333. continue;
  334. }
  335. if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
  336. // If this is a store, see if we can merge it in.
  337. if (!NextStore->isSimple()) break;
  338. // Check to see if this stored value is of the same byte-splattable value.
  339. if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
  340. break;
  341. // Check to see if this store is to a constant offset from the start ptr.
  342. int64_t Offset;
  343. if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset,
  344. DL))
  345. break;
  346. Ranges.addStore(Offset, NextStore);
  347. } else {
  348. MemSetInst *MSI = cast<MemSetInst>(BI);
  349. if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
  350. !isa<ConstantInt>(MSI->getLength()))
  351. break;
  352. // Check to see if this store is to a constant offset from the start ptr.
  353. int64_t Offset;
  354. if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, DL))
  355. break;
  356. Ranges.addMemSet(Offset, MSI);
  357. }
  358. }
  359. // If we have no ranges, then we just had a single store with nothing that
  360. // could be merged in. This is a very common case of course.
  361. if (Ranges.empty())
  362. return nullptr;
  363. // If we had at least one store that could be merged in, add the starting
  364. // store as well. We try to avoid this unless there is at least something
  365. // interesting as a small compile-time optimization.
  366. Ranges.addInst(0, StartInst);
  367. // If we create any memsets, we put it right before the first instruction that
  368. // isn't part of the memset block. This ensure that the memset is dominated
  369. // by any addressing instruction needed by the start of the block.
  370. IRBuilder<> Builder(BI);
  371. // Now that we have full information about ranges, loop over the ranges and
  372. // emit memset's for anything big enough to be worthwhile.
  373. Instruction *AMemSet = nullptr;
  374. for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
  375. I != E; ++I) {
  376. const MemsetRange &Range = *I;
  377. if (Range.TheStores.size() == 1) continue;
  378. // If it is profitable to lower this range to memset, do so now.
  379. if (!Range.isProfitableToUseMemset(DL))
  380. continue;
  381. // Otherwise, we do want to transform this! Create a new memset.
  382. // Get the starting pointer of the block.
  383. StartPtr = Range.StartPtr;
  384. // Determine alignment
  385. unsigned Alignment = Range.Alignment;
  386. if (Alignment == 0) {
  387. Type *EltType =
  388. cast<PointerType>(StartPtr->getType())->getElementType();
  389. Alignment = DL.getABITypeAlignment(EltType);
  390. }
  391. AMemSet =
  392. Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
  393. DEBUG(dbgs() << "Replace stores:\n";
  394. for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
  395. dbgs() << *Range.TheStores[i] << '\n';
  396. dbgs() << "With: " << *AMemSet << '\n');
  397. if (!Range.TheStores.empty())
  398. AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
  399. // Zap all the stores.
  400. for (SmallVectorImpl<Instruction *>::const_iterator
  401. SI = Range.TheStores.begin(),
  402. SE = Range.TheStores.end(); SI != SE; ++SI) {
  403. MD->removeInstruction(*SI);
  404. (*SI)->eraseFromParent();
  405. }
  406. ++NumMemSetInfer;
  407. }
  408. return AMemSet;
  409. }
  410. bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
  411. if (!SI->isSimple()) return false;
  412. const DataLayout &DL = SI->getModule()->getDataLayout();
  413. // Detect cases where we're performing call slot forwarding, but
  414. // happen to be using a load-store pair to implement it, rather than
  415. // a memcpy.
  416. if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
  417. if (LI->isSimple() && LI->hasOneUse() &&
  418. LI->getParent() == SI->getParent()) {
  419. MemDepResult ldep = MD->getDependency(LI);
  420. CallInst *C = nullptr;
  421. if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst()))
  422. C = dyn_cast<CallInst>(ldep.getInst());
  423. if (C) {
  424. // Check that nothing touches the dest of the "copy" between
  425. // the call and the store.
  426. AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
  427. MemoryLocation StoreLoc = MemoryLocation::get(SI);
  428. for (BasicBlock::iterator I = --BasicBlock::iterator(SI),
  429. E = C; I != E; --I) {
  430. if (AA.getModRefInfo(&*I, StoreLoc) != AliasAnalysis::NoModRef) {
  431. C = nullptr;
  432. break;
  433. }
  434. }
  435. }
  436. if (C) {
  437. unsigned storeAlign = SI->getAlignment();
  438. if (!storeAlign)
  439. storeAlign = DL.getABITypeAlignment(SI->getOperand(0)->getType());
  440. unsigned loadAlign = LI->getAlignment();
  441. if (!loadAlign)
  442. loadAlign = DL.getABITypeAlignment(LI->getType());
  443. bool changed = performCallSlotOptzn(
  444. LI, SI->getPointerOperand()->stripPointerCasts(),
  445. LI->getPointerOperand()->stripPointerCasts(),
  446. DL.getTypeStoreSize(SI->getOperand(0)->getType()),
  447. std::min(storeAlign, loadAlign), C);
  448. if (changed) {
  449. MD->removeInstruction(SI);
  450. SI->eraseFromParent();
  451. MD->removeInstruction(LI);
  452. LI->eraseFromParent();
  453. ++NumMemCpyInstr;
  454. return true;
  455. }
  456. }
  457. }
  458. }
  459. // There are two cases that are interesting for this code to handle: memcpy
  460. // and memset. Right now we only handle memset.
  461. // Ensure that the value being stored is something that can be memset'able a
  462. // byte at a time like "0" or "-1" or any width, as well as things like
  463. // 0xA0A0A0A0 and 0.0.
  464. if (Value *ByteVal = isBytewiseValue(SI->getOperand(0)))
  465. if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
  466. ByteVal)) {
  467. BBI = I; // Don't invalidate iterator.
  468. return true;
  469. }
  470. return false;
  471. }
  472. bool MemCpyOpt::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
  473. // See if there is another memset or store neighboring this memset which
  474. // allows us to widen out the memset to do a single larger store.
  475. if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
  476. if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(),
  477. MSI->getValue())) {
  478. BBI = I; // Don't invalidate iterator.
  479. return true;
  480. }
  481. return false;
  482. }
  483. /// performCallSlotOptzn - takes a memcpy and a call that it depends on,
  484. /// and checks for the possibility of a call slot optimization by having
  485. /// the call write its result directly into the destination of the memcpy.
  486. bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
  487. Value *cpyDest, Value *cpySrc,
  488. uint64_t cpyLen, unsigned cpyAlign,
  489. CallInst *C) {
  490. // The general transformation to keep in mind is
  491. //
  492. // call @func(..., src, ...)
  493. // memcpy(dest, src, ...)
  494. //
  495. // ->
  496. //
  497. // memcpy(dest, src, ...)
  498. // call @func(..., dest, ...)
  499. //
  500. // Since moving the memcpy is technically awkward, we additionally check that
  501. // src only holds uninitialized values at the moment of the call, meaning that
  502. // the memcpy can be discarded rather than moved.
  503. // Deliberately get the source and destination with bitcasts stripped away,
  504. // because we'll need to do type comparisons based on the underlying type.
  505. CallSite CS(C);
  506. // Require that src be an alloca. This simplifies the reasoning considerably.
  507. AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
  508. if (!srcAlloca)
  509. return false;
  510. ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
  511. if (!srcArraySize)
  512. return false;
  513. const DataLayout &DL = cpy->getModule()->getDataLayout();
  514. uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) *
  515. srcArraySize->getZExtValue();
  516. if (cpyLen < srcSize)
  517. return false;
  518. // Check that accessing the first srcSize bytes of dest will not cause a
  519. // trap. Otherwise the transform is invalid since it might cause a trap
  520. // to occur earlier than it otherwise would.
  521. if (AllocaInst *A = dyn_cast<AllocaInst>(cpyDest)) {
  522. // The destination is an alloca. Check it is larger than srcSize.
  523. ConstantInt *destArraySize = dyn_cast<ConstantInt>(A->getArraySize());
  524. if (!destArraySize)
  525. return false;
  526. uint64_t destSize = DL.getTypeAllocSize(A->getAllocatedType()) *
  527. destArraySize->getZExtValue();
  528. if (destSize < srcSize)
  529. return false;
  530. } else if (Argument *A = dyn_cast<Argument>(cpyDest)) {
  531. if (A->getDereferenceableBytes() < srcSize) {
  532. // If the destination is an sret parameter then only accesses that are
  533. // outside of the returned struct type can trap.
  534. if (!A->hasStructRetAttr())
  535. return false;
  536. Type *StructTy = cast<PointerType>(A->getType())->getElementType();
  537. if (!StructTy->isSized()) {
  538. // The call may never return and hence the copy-instruction may never
  539. // be executed, and therefore it's not safe to say "the destination
  540. // has at least <cpyLen> bytes, as implied by the copy-instruction",
  541. return false;
  542. }
  543. uint64_t destSize = DL.getTypeAllocSize(StructTy);
  544. if (destSize < srcSize)
  545. return false;
  546. }
  547. } else {
  548. return false;
  549. }
  550. // Check that dest points to memory that is at least as aligned as src.
  551. unsigned srcAlign = srcAlloca->getAlignment();
  552. if (!srcAlign)
  553. srcAlign = DL.getABITypeAlignment(srcAlloca->getAllocatedType());
  554. bool isDestSufficientlyAligned = srcAlign <= cpyAlign;
  555. // If dest is not aligned enough and we can't increase its alignment then
  556. // bail out.
  557. if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest))
  558. return false;
  559. // Check that src is not accessed except via the call and the memcpy. This
  560. // guarantees that it holds only undefined values when passed in (so the final
  561. // memcpy can be dropped), that it is not read or written between the call and
  562. // the memcpy, and that writing beyond the end of it is undefined.
  563. SmallVector<User*, 8> srcUseList(srcAlloca->user_begin(),
  564. srcAlloca->user_end());
  565. while (!srcUseList.empty()) {
  566. User *U = srcUseList.pop_back_val();
  567. if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) {
  568. for (User *UU : U->users())
  569. srcUseList.push_back(UU);
  570. continue;
  571. }
  572. if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(U)) {
  573. if (!G->hasAllZeroIndices())
  574. return false;
  575. for (User *UU : U->users())
  576. srcUseList.push_back(UU);
  577. continue;
  578. }
  579. if (const IntrinsicInst *IT = dyn_cast<IntrinsicInst>(U))
  580. if (IT->getIntrinsicID() == Intrinsic::lifetime_start ||
  581. IT->getIntrinsicID() == Intrinsic::lifetime_end)
  582. continue;
  583. if (U != C && U != cpy)
  584. return false;
  585. }
  586. // Check that src isn't captured by the called function since the
  587. // transformation can cause aliasing issues in that case.
  588. for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
  589. if (CS.getArgument(i) == cpySrc && !CS.doesNotCapture(i))
  590. return false;
  591. // Since we're changing the parameter to the callsite, we need to make sure
  592. // that what would be the new parameter dominates the callsite.
  593. DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  594. if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest))
  595. if (!DT.dominates(cpyDestInst, C))
  596. return false;
  597. // In addition to knowing that the call does not access src in some
  598. // unexpected manner, for example via a global, which we deduce from
  599. // the use analysis, we also need to know that it does not sneakily
  600. // access dest. We rely on AA to figure this out for us.
  601. AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
  602. AliasAnalysis::ModRefResult MR = AA.getModRefInfo(C, cpyDest, srcSize);
  603. // If necessary, perform additional analysis.
  604. if (MR != AliasAnalysis::NoModRef)
  605. MR = AA.callCapturesBefore(C, cpyDest, srcSize, &DT);
  606. if (MR != AliasAnalysis::NoModRef)
  607. return false;
  608. // All the checks have passed, so do the transformation.
  609. bool changedArgument = false;
  610. for (unsigned i = 0; i < CS.arg_size(); ++i)
  611. if (CS.getArgument(i)->stripPointerCasts() == cpySrc) {
  612. Value *Dest = cpySrc->getType() == cpyDest->getType() ? cpyDest
  613. : CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),
  614. cpyDest->getName(), C);
  615. changedArgument = true;
  616. if (CS.getArgument(i)->getType() == Dest->getType())
  617. CS.setArgument(i, Dest);
  618. else
  619. CS.setArgument(i, CastInst::CreatePointerCast(Dest,
  620. CS.getArgument(i)->getType(), Dest->getName(), C));
  621. }
  622. if (!changedArgument)
  623. return false;
  624. // If the destination wasn't sufficiently aligned then increase its alignment.
  625. if (!isDestSufficientlyAligned) {
  626. assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!");
  627. cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
  628. }
  629. // Drop any cached information about the call, because we may have changed
  630. // its dependence information by changing its parameter.
  631. MD->removeInstruction(C);
  632. // Update AA metadata
  633. // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
  634. // handled here, but combineMetadata doesn't support them yet
  635. unsigned KnownIDs[] = {
  636. LLVMContext::MD_tbaa,
  637. LLVMContext::MD_alias_scope,
  638. LLVMContext::MD_noalias,
  639. };
  640. combineMetadata(C, cpy, KnownIDs);
  641. // Remove the memcpy.
  642. MD->removeInstruction(cpy);
  643. ++NumMemCpyInstr;
  644. return true;
  645. }
  646. /// processMemCpyMemCpyDependence - We've found that the (upward scanning)
  647. /// memory dependence of memcpy 'M' is the memcpy 'MDep'. Try to simplify M to
  648. /// copy from MDep's input if we can.
  649. ///
  650. bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep) {
  651. // We can only transforms memcpy's where the dest of one is the source of the
  652. // other.
  653. if (M->getSource() != MDep->getDest() || MDep->isVolatile())
  654. return false;
  655. // If dep instruction is reading from our current input, then it is a noop
  656. // transfer and substituting the input won't change this instruction. Just
  657. // ignore the input and let someone else zap MDep. This handles cases like:
  658. // memcpy(a <- a)
  659. // memcpy(b <- a)
  660. if (M->getSource() == MDep->getSource())
  661. return false;
  662. // Second, the length of the memcpy's must be the same, or the preceding one
  663. // must be larger than the following one.
  664. ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
  665. ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength());
  666. if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
  667. return false;
  668. AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
  669. // Verify that the copied-from memory doesn't change in between the two
  670. // transfers. For example, in:
  671. // memcpy(a <- b)
  672. // *b = 42;
  673. // memcpy(c <- a)
  674. // It would be invalid to transform the second memcpy into memcpy(c <- b).
  675. //
  676. // TODO: If the code between M and MDep is transparent to the destination "c",
  677. // then we could still perform the xform by moving M up to the first memcpy.
  678. //
  679. // NOTE: This is conservative, it will stop on any read from the source loc,
  680. // not just the defining memcpy.
  681. MemDepResult SourceDep = MD->getPointerDependencyFrom(
  682. MemoryLocation::getForSource(MDep), false, M, M->getParent());
  683. if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
  684. return false;
  685. // If the dest of the second might alias the source of the first, then the
  686. // source and dest might overlap. We still want to eliminate the intermediate
  687. // value, but we have to generate a memmove instead of memcpy.
  688. bool UseMemMove = false;
  689. if (!AA.isNoAlias(MemoryLocation::getForDest(M),
  690. MemoryLocation::getForSource(MDep)))
  691. UseMemMove = true;
  692. // If all checks passed, then we can transform M.
  693. // Make sure to use the lesser of the alignment of the source and the dest
  694. // since we're changing where we're reading from, but don't want to increase
  695. // the alignment past what can be read from or written to.
  696. // TODO: Is this worth it if we're creating a less aligned memcpy? For
  697. // example we could be moving from movaps -> movq on x86.
  698. unsigned Align = std::min(MDep->getAlignment(), M->getAlignment());
  699. IRBuilder<> Builder(M);
  700. if (UseMemMove)
  701. Builder.CreateMemMove(M->getRawDest(), MDep->getRawSource(), M->getLength(),
  702. Align, M->isVolatile());
  703. else
  704. Builder.CreateMemCpy(M->getRawDest(), MDep->getRawSource(), M->getLength(),
  705. Align, M->isVolatile());
  706. // Remove the instruction we're replacing.
  707. MD->removeInstruction(M);
  708. M->eraseFromParent();
  709. ++NumMemCpyInstr;
  710. return true;
  711. }
  712. /// We've found that the (upward scanning) memory dependence of \p MemCpy is
  713. /// \p MemSet. Try to simplify \p MemSet to only set the trailing bytes that
  714. /// weren't copied over by \p MemCpy.
  715. ///
  716. /// In other words, transform:
  717. /// \code
  718. /// memset(dst, c, dst_size);
  719. /// memcpy(dst, src, src_size);
  720. /// \endcode
  721. /// into:
  722. /// \code
  723. /// memcpy(dst, src, src_size);
  724. /// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
  725. /// \endcode
  726. bool MemCpyOpt::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
  727. MemSetInst *MemSet) {
  728. // We can only transform memset/memcpy with the same destination.
  729. if (MemSet->getDest() != MemCpy->getDest())
  730. return false;
  731. // Check that there are no other dependencies on the memset destination.
  732. MemDepResult DstDepInfo = MD->getPointerDependencyFrom(
  733. MemoryLocation::getForDest(MemSet), false, MemCpy, MemCpy->getParent());
  734. if (DstDepInfo.getInst() != MemSet)
  735. return false;
  736. // Use the same i8* dest as the memcpy, killing the memset dest if different.
  737. Value *Dest = MemCpy->getRawDest();
  738. Value *DestSize = MemSet->getLength();
  739. Value *SrcSize = MemCpy->getLength();
  740. // By default, create an unaligned memset.
  741. unsigned Align = 1;
  742. // If Dest is aligned, and SrcSize is constant, use the minimum alignment
  743. // of the sum.
  744. const unsigned DestAlign =
  745. std::max(MemSet->getAlignment(), MemCpy->getAlignment());
  746. if (DestAlign > 1)
  747. if (ConstantInt *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
  748. Align = MinAlign(SrcSizeC->getZExtValue(), DestAlign);
  749. IRBuilder<> Builder(MemCpy);
  750. // If the sizes have different types, zext the smaller one.
  751. if (DestSize->getType() != SrcSize->getType()) {
  752. if (DestSize->getType()->getIntegerBitWidth() >
  753. SrcSize->getType()->getIntegerBitWidth())
  754. SrcSize = Builder.CreateZExt(SrcSize, DestSize->getType());
  755. else
  756. DestSize = Builder.CreateZExt(DestSize, SrcSize->getType());
  757. }
  758. Value *MemsetLen =
  759. Builder.CreateSelect(Builder.CreateICmpULE(DestSize, SrcSize),
  760. ConstantInt::getNullValue(DestSize->getType()),
  761. Builder.CreateSub(DestSize, SrcSize));
  762. Builder.CreateMemSet(Builder.CreateGEP(Dest, SrcSize), MemSet->getOperand(1),
  763. MemsetLen, Align);
  764. MD->removeInstruction(MemSet);
  765. MemSet->eraseFromParent();
  766. return true;
  767. }
  768. /// Transform memcpy to memset when its source was just memset.
  769. /// In other words, turn:
  770. /// \code
  771. /// memset(dst1, c, dst1_size);
  772. /// memcpy(dst2, dst1, dst2_size);
  773. /// \endcode
  774. /// into:
  775. /// \code
  776. /// memset(dst1, c, dst1_size);
  777. /// memset(dst2, c, dst2_size);
  778. /// \endcode
  779. /// When dst2_size <= dst1_size.
  780. ///
  781. /// The \p MemCpy must have a Constant length.
  782. bool MemCpyOpt::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
  783. MemSetInst *MemSet) {
  784. // This only makes sense on memcpy(..., memset(...), ...).
  785. if (MemSet->getRawDest() != MemCpy->getRawSource())
  786. return false;
  787. ConstantInt *CopySize = cast<ConstantInt>(MemCpy->getLength());
  788. ConstantInt *MemSetSize = dyn_cast<ConstantInt>(MemSet->getLength());
  789. // Make sure the memcpy doesn't read any more than what the memset wrote.
  790. // Don't worry about sizes larger than i64.
  791. if (!MemSetSize || CopySize->getZExtValue() > MemSetSize->getZExtValue())
  792. return false;
  793. IRBuilder<> Builder(MemCpy);
  794. Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
  795. CopySize, MemCpy->getAlignment());
  796. return true;
  797. }
  798. /// processMemCpy - perform simplification of memcpy's. If we have memcpy A
  799. /// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
  800. /// B to be a memcpy from X to Z (or potentially a memmove, depending on
  801. /// circumstances). This allows later passes to remove the first memcpy
  802. /// altogether.
  803. bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
  804. // We can only optimize non-volatile memcpy's.
  805. if (M->isVolatile()) return false;
  806. // If the source and destination of the memcpy are the same, then zap it.
  807. if (M->getSource() == M->getDest()) {
  808. MD->removeInstruction(M);
  809. M->eraseFromParent();
  810. return false;
  811. }
  812. // If copying from a constant, try to turn the memcpy into a memset.
  813. if (GlobalVariable *GV = dyn_cast<GlobalVariable>(M->getSource()))
  814. if (GV->isConstant() && GV->hasDefinitiveInitializer())
  815. if (Value *ByteVal = isBytewiseValue(GV->getInitializer())) {
  816. IRBuilder<> Builder(M);
  817. Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
  818. M->getAlignment(), false);
  819. MD->removeInstruction(M);
  820. M->eraseFromParent();
  821. ++NumCpyToSet;
  822. return true;
  823. }
  824. MemDepResult DepInfo = MD->getDependency(M);
  825. // Try to turn a partially redundant memset + memcpy into
  826. // memcpy + smaller memset. We don't need the memcpy size for this.
  827. if (DepInfo.isClobber())
  828. if (MemSetInst *MDep = dyn_cast<MemSetInst>(DepInfo.getInst()))
  829. if (processMemSetMemCpyDependence(M, MDep))
  830. return true;
  831. // The optimizations after this point require the memcpy size.
  832. ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
  833. if (!CopySize) return false;
  834. // There are four possible optimizations we can do for memcpy:
  835. // a) memcpy-memcpy xform which exposes redundance for DSE.
  836. // b) call-memcpy xform for return slot optimization.
  837. // c) memcpy from freshly alloca'd space or space that has just started its
  838. // lifetime copies undefined data, and we can therefore eliminate the
  839. // memcpy in favor of the data that was already at the destination.
  840. // d) memcpy from a just-memset'd source can be turned into memset.
  841. if (DepInfo.isClobber()) {
  842. if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
  843. if (performCallSlotOptzn(M, M->getDest(), M->getSource(),
  844. CopySize->getZExtValue(), M->getAlignment(),
  845. C)) {
  846. MD->removeInstruction(M);
  847. M->eraseFromParent();
  848. return true;
  849. }
  850. }
  851. }
  852. MemoryLocation SrcLoc = MemoryLocation::getForSource(M);
  853. MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(SrcLoc, true,
  854. M, M->getParent());
  855. if (SrcDepInfo.isClobber()) {
  856. if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
  857. return processMemCpyMemCpyDependence(M, MDep);
  858. } else if (SrcDepInfo.isDef()) {
  859. Instruction *I = SrcDepInfo.getInst();
  860. bool hasUndefContents = false;
  861. if (isa<AllocaInst>(I)) {
  862. hasUndefContents = true;
  863. } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
  864. if (II->getIntrinsicID() == Intrinsic::lifetime_start)
  865. if (ConstantInt *LTSize = dyn_cast<ConstantInt>(II->getArgOperand(0)))
  866. if (LTSize->getZExtValue() >= CopySize->getZExtValue())
  867. hasUndefContents = true;
  868. }
  869. if (hasUndefContents) {
  870. MD->removeInstruction(M);
  871. M->eraseFromParent();
  872. ++NumMemCpyInstr;
  873. return true;
  874. }
  875. }
  876. if (SrcDepInfo.isClobber())
  877. if (MemSetInst *MDep = dyn_cast<MemSetInst>(SrcDepInfo.getInst()))
  878. if (performMemCpyToMemSetOptzn(M, MDep)) {
  879. MD->removeInstruction(M);
  880. M->eraseFromParent();
  881. ++NumCpyToSet;
  882. return true;
  883. }
  884. return false;
  885. }
  886. /// processMemMove - Transforms memmove calls to memcpy calls when the src/dst
  887. /// are guaranteed not to alias.
  888. bool MemCpyOpt::processMemMove(MemMoveInst *M) {
  889. AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
  890. if (!TLI->has(LibFunc::memmove))
  891. return false;
  892. // See if the pointers alias.
  893. if (!AA.isNoAlias(MemoryLocation::getForDest(M),
  894. MemoryLocation::getForSource(M)))
  895. return false;
  896. DEBUG(dbgs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n");
  897. // If not, then we know we can transform this.
  898. Module *Mod = M->getParent()->getParent()->getParent();
  899. Type *ArgTys[3] = { M->getRawDest()->getType(),
  900. M->getRawSource()->getType(),
  901. M->getLength()->getType() };
  902. M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy,
  903. ArgTys));
  904. // MemDep may have over conservative information about this instruction, just
  905. // conservatively flush it from the cache.
  906. MD->removeInstruction(M);
  907. ++NumMoveToCpy;
  908. return true;
  909. }
  910. /// processByValArgument - This is called on every byval argument in call sites.
  911. bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
  912. const DataLayout &DL = CS.getCaller()->getParent()->getDataLayout();
  913. // Find out what feeds this byval argument.
  914. Value *ByValArg = CS.getArgument(ArgNo);
  915. Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType();
  916. uint64_t ByValSize = DL.getTypeAllocSize(ByValTy);
  917. MemDepResult DepInfo = MD->getPointerDependencyFrom(
  918. MemoryLocation(ByValArg, ByValSize), true, CS.getInstruction(),
  919. CS.getInstruction()->getParent());
  920. if (!DepInfo.isClobber())
  921. return false;
  922. // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
  923. // a memcpy, see if we can byval from the source of the memcpy instead of the
  924. // result.
  925. MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst());
  926. if (!MDep || MDep->isVolatile() ||
  927. ByValArg->stripPointerCasts() != MDep->getDest())
  928. return false;
  929. // The length of the memcpy must be larger or equal to the size of the byval.
  930. ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
  931. if (!C1 || C1->getValue().getZExtValue() < ByValSize)
  932. return false;
  933. // Get the alignment of the byval. If the call doesn't specify the alignment,
  934. // then it is some target specific value that we can't know.
  935. unsigned ByValAlign = CS.getParamAlignment(ArgNo+1);
  936. if (ByValAlign == 0) return false;
  937. // If it is greater than the memcpy, then we check to see if we can force the
  938. // source of the memcpy to the alignment we need. If we fail, we bail out.
  939. AssumptionCache &AC =
  940. getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
  941. *CS->getParent()->getParent());
  942. DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  943. if (MDep->getAlignment() < ByValAlign &&
  944. getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL,
  945. CS.getInstruction(), &AC, &DT) < ByValAlign)
  946. return false;
  947. // Verify that the copied-from memory doesn't change in between the memcpy and
  948. // the byval call.
  949. // memcpy(a <- b)
  950. // *b = 42;
  951. // foo(*a)
  952. // It would be invalid to transform the second memcpy into foo(*b).
  953. //
  954. // NOTE: This is conservative, it will stop on any read from the source loc,
  955. // not just the defining memcpy.
  956. MemDepResult SourceDep =
  957. MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false,
  958. CS.getInstruction(), MDep->getParent());
  959. if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
  960. return false;
  961. Value *TmpCast = MDep->getSource();
  962. if (MDep->getSource()->getType() != ByValArg->getType())
  963. TmpCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
  964. "tmpcast", CS.getInstruction());
  965. DEBUG(dbgs() << "MemCpyOpt: Forwarding memcpy to byval:\n"
  966. << " " << *MDep << "\n"
  967. << " " << *CS.getInstruction() << "\n");
  968. // Otherwise we're good! Update the byval argument.
  969. CS.setArgument(ArgNo, TmpCast);
  970. ++NumMemCpyInstr;
  971. return true;
  972. }
  973. /// iterateOnFunction - Executes one iteration of MemCpyOpt.
  974. bool MemCpyOpt::iterateOnFunction(Function &F) {
  975. bool MadeChange = false;
  976. // Walk all instruction in the function.
  977. for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
  978. for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
  979. // Avoid invalidating the iterator.
  980. Instruction *I = BI++;
  981. bool RepeatInstruction = false;
  982. if (StoreInst *SI = dyn_cast<StoreInst>(I))
  983. MadeChange |= processStore(SI, BI);
  984. else if (MemSetInst *M = dyn_cast<MemSetInst>(I))
  985. RepeatInstruction = processMemSet(M, BI);
  986. else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I))
  987. RepeatInstruction = processMemCpy(M);
  988. else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I))
  989. RepeatInstruction = processMemMove(M);
  990. else if (auto CS = CallSite(I)) {
  991. for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
  992. if (CS.isByValArgument(i))
  993. MadeChange |= processByValArgument(CS, i);
  994. }
  995. // Reprocess the instruction if desired.
  996. if (RepeatInstruction) {
  997. if (BI != BB->begin()) --BI;
  998. MadeChange = true;
  999. }
  1000. }
  1001. }
  1002. return MadeChange;
  1003. }
  1004. // MemCpyOpt::runOnFunction - This is the main transformation entry point for a
  1005. // function.
  1006. //
  1007. bool MemCpyOpt::runOnFunction(Function &F) {
  1008. if (skipOptnoneFunction(F))
  1009. return false;
  1010. bool MadeChange = false;
  1011. MD = &getAnalysis<MemoryDependenceAnalysis>();
  1012. TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
  1013. // If we don't have at least memset and memcpy, there is little point of doing
  1014. // anything here. These are required by a freestanding implementation, so if
  1015. // even they are disabled, there is no point in trying hard.
  1016. if (!TLI->has(LibFunc::memset) || !TLI->has(LibFunc::memcpy))
  1017. return false;
  1018. while (1) {
  1019. if (!iterateOnFunction(F))
  1020. break;
  1021. MadeChange = true;
  1022. }
  1023. MD = nullptr;
  1024. return MadeChange;
  1025. }