LoadCombine.cpp 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
  1. //===- LoadCombine.cpp - Combine Adjacent Loads ---------------------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. /// \file
  10. /// This transformation combines adjacent loads.
  11. ///
  12. //===----------------------------------------------------------------------===//
  13. #include "llvm/Transforms/Scalar.h"
  14. #include "llvm/ADT/DenseMap.h"
  15. #include "llvm/ADT/Statistic.h"
  16. #include "llvm/Analysis/AliasAnalysis.h"
  17. #include "llvm/Analysis/AliasSetTracker.h"
  18. #include "llvm/Analysis/TargetFolder.h"
  19. #include "llvm/IR/DataLayout.h"
  20. #include "llvm/IR/Function.h"
  21. #include "llvm/IR/IRBuilder.h"
  22. #include "llvm/IR/Instructions.h"
  23. #include "llvm/IR/Module.h"
  24. #include "llvm/Pass.h"
  25. #include "llvm/Support/Debug.h"
  26. #include "llvm/Support/MathExtras.h"
  27. #include "llvm/Support/raw_ostream.h"
  28. using namespace llvm;
  29. #define DEBUG_TYPE "load-combine"
  30. STATISTIC(NumLoadsAnalyzed, "Number of loads analyzed for combining");
  31. STATISTIC(NumLoadsCombined, "Number of loads combined");
  32. namespace {
  33. struct PointerOffsetPair {
  34. Value *Pointer;
  35. uint64_t Offset;
  36. };
  37. struct LoadPOPPair {
  38. LoadPOPPair() = default;
  39. LoadPOPPair(LoadInst *L, PointerOffsetPair P, unsigned O)
  40. : Load(L), POP(P), InsertOrder(O) {}
  41. LoadInst *Load;
  42. PointerOffsetPair POP;
  43. /// \brief The new load needs to be created before the first load in IR order.
  44. unsigned InsertOrder;
  45. };
  46. class LoadCombine : public BasicBlockPass {
  47. LLVMContext *C;
  48. AliasAnalysis *AA;
  49. public:
  50. LoadCombine() : BasicBlockPass(ID), C(nullptr), AA(nullptr) {
  51. initializeSROAPass(*PassRegistry::getPassRegistry());
  52. }
  53. using llvm::Pass::doInitialization;
  54. bool doInitialization(Function &) override;
  55. bool runOnBasicBlock(BasicBlock &BB) override;
  56. void getAnalysisUsage(AnalysisUsage &AU) const override;
  57. const char *getPassName() const override { return "LoadCombine"; }
  58. static char ID;
  59. typedef IRBuilder<true, TargetFolder> BuilderTy;
  60. private:
  61. BuilderTy *Builder;
  62. PointerOffsetPair getPointerOffsetPair(LoadInst &);
  63. bool combineLoads(DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> &);
  64. bool aggregateLoads(SmallVectorImpl<LoadPOPPair> &);
  65. bool combineLoads(SmallVectorImpl<LoadPOPPair> &);
  66. };
  67. }
  68. bool LoadCombine::doInitialization(Function &F) {
  69. DEBUG(dbgs() << "LoadCombine function: " << F.getName() << "\n");
  70. C = &F.getContext();
  71. return true;
  72. }
  73. PointerOffsetPair LoadCombine::getPointerOffsetPair(LoadInst &LI) {
  74. PointerOffsetPair POP;
  75. POP.Pointer = LI.getPointerOperand();
  76. POP.Offset = 0;
  77. while (isa<BitCastInst>(POP.Pointer) || isa<GetElementPtrInst>(POP.Pointer)) {
  78. if (auto *GEP = dyn_cast<GetElementPtrInst>(POP.Pointer)) {
  79. auto &DL = LI.getModule()->getDataLayout();
  80. unsigned BitWidth = DL.getPointerTypeSizeInBits(GEP->getType());
  81. APInt Offset(BitWidth, 0);
  82. if (GEP->accumulateConstantOffset(DL, Offset))
  83. POP.Offset += Offset.getZExtValue();
  84. else
  85. // Can't handle GEPs with variable indices.
  86. return POP;
  87. POP.Pointer = GEP->getPointerOperand();
  88. } else if (auto *BC = dyn_cast<BitCastInst>(POP.Pointer))
  89. POP.Pointer = BC->getOperand(0);
  90. }
  91. return POP;
  92. }
  93. bool LoadCombine::combineLoads(
  94. DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> &LoadMap) {
  95. bool Combined = false;
  96. for (auto &Loads : LoadMap) {
  97. if (Loads.second.size() < 2)
  98. continue;
  99. std::sort(Loads.second.begin(), Loads.second.end(),
  100. [](const LoadPOPPair &A, const LoadPOPPair &B) {
  101. return A.POP.Offset < B.POP.Offset;
  102. });
  103. if (aggregateLoads(Loads.second))
  104. Combined = true;
  105. }
  106. return Combined;
  107. }
  108. /// \brief Try to aggregate loads from a sorted list of loads to be combined.
  109. ///
  110. /// It is guaranteed that no writes occur between any of the loads. All loads
  111. /// have the same base pointer. There are at least two loads.
  112. bool LoadCombine::aggregateLoads(SmallVectorImpl<LoadPOPPair> &Loads) {
  113. assert(Loads.size() >= 2 && "Insufficient loads!");
  114. LoadInst *BaseLoad = nullptr;
  115. SmallVector<LoadPOPPair, 8> AggregateLoads;
  116. bool Combined = false;
  117. uint64_t PrevOffset = -1ull;
  118. uint64_t PrevSize = 0;
  119. for (auto &L : Loads) {
  120. if (PrevOffset == -1ull) {
  121. BaseLoad = L.Load;
  122. PrevOffset = L.POP.Offset;
  123. PrevSize = L.Load->getModule()->getDataLayout().getTypeStoreSize(
  124. L.Load->getType());
  125. AggregateLoads.push_back(L);
  126. continue;
  127. }
  128. if (L.Load->getAlignment() > BaseLoad->getAlignment())
  129. continue;
  130. if (L.POP.Offset > PrevOffset + PrevSize) {
  131. // No other load will be combinable
  132. if (combineLoads(AggregateLoads))
  133. Combined = true;
  134. AggregateLoads.clear();
  135. PrevOffset = -1;
  136. continue;
  137. }
  138. if (L.POP.Offset != PrevOffset + PrevSize)
  139. // This load is offset less than the size of the last load.
  140. // FIXME: We may want to handle this case.
  141. continue;
  142. PrevOffset = L.POP.Offset;
  143. PrevSize = L.Load->getModule()->getDataLayout().getTypeStoreSize(
  144. L.Load->getType());
  145. AggregateLoads.push_back(L);
  146. }
  147. if (combineLoads(AggregateLoads))
  148. Combined = true;
  149. return Combined;
  150. }
  151. /// \brief Given a list of combinable load. Combine the maximum number of them.
  152. bool LoadCombine::combineLoads(SmallVectorImpl<LoadPOPPair> &Loads) {
  153. // Remove loads from the end while the size is not a power of 2.
  154. unsigned TotalSize = 0;
  155. for (const auto &L : Loads)
  156. TotalSize += L.Load->getType()->getPrimitiveSizeInBits();
  157. while (TotalSize != 0 && !isPowerOf2_32(TotalSize))
  158. TotalSize -= Loads.pop_back_val().Load->getType()->getPrimitiveSizeInBits();
  159. if (Loads.size() < 2)
  160. return false;
  161. DEBUG({
  162. dbgs() << "***** Combining Loads ******\n";
  163. for (const auto &L : Loads) {
  164. dbgs() << L.POP.Offset << ": " << *L.Load << "\n";
  165. }
  166. });
  167. // Find first load. This is where we put the new load.
  168. LoadPOPPair FirstLP;
  169. FirstLP.InsertOrder = -1u;
  170. for (const auto &L : Loads)
  171. if (L.InsertOrder < FirstLP.InsertOrder)
  172. FirstLP = L;
  173. unsigned AddressSpace =
  174. FirstLP.POP.Pointer->getType()->getPointerAddressSpace();
  175. Builder->SetInsertPoint(FirstLP.Load);
  176. Value *Ptr = Builder->CreateConstGEP1_64(
  177. Builder->CreatePointerCast(Loads[0].POP.Pointer,
  178. Builder->getInt8PtrTy(AddressSpace)),
  179. Loads[0].POP.Offset);
  180. LoadInst *NewLoad = new LoadInst(
  181. Builder->CreatePointerCast(
  182. Ptr, PointerType::get(IntegerType::get(Ptr->getContext(), TotalSize),
  183. Ptr->getType()->getPointerAddressSpace())),
  184. Twine(Loads[0].Load->getName()) + ".combined", false,
  185. Loads[0].Load->getAlignment(), FirstLP.Load);
  186. for (const auto &L : Loads) {
  187. Builder->SetInsertPoint(L.Load);
  188. Value *V = Builder->CreateExtractInteger(
  189. L.Load->getModule()->getDataLayout(), NewLoad,
  190. cast<IntegerType>(L.Load->getType()),
  191. L.POP.Offset - Loads[0].POP.Offset, "combine.extract");
  192. L.Load->replaceAllUsesWith(V);
  193. }
  194. NumLoadsCombined = NumLoadsCombined + Loads.size();
  195. return true;
  196. }
  197. bool LoadCombine::runOnBasicBlock(BasicBlock &BB) {
  198. if (skipOptnoneFunction(BB))
  199. return false;
  200. AA = &getAnalysis<AliasAnalysis>();
  201. IRBuilder<true, TargetFolder> TheBuilder(
  202. BB.getContext(), TargetFolder(BB.getModule()->getDataLayout()));
  203. Builder = &TheBuilder;
  204. DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> LoadMap;
  205. AliasSetTracker AST(*AA);
  206. bool Combined = false;
  207. unsigned Index = 0;
  208. for (auto &I : BB) {
  209. if (I.mayThrow() || (I.mayWriteToMemory() && AST.containsUnknown(&I))) {
  210. if (combineLoads(LoadMap))
  211. Combined = true;
  212. LoadMap.clear();
  213. AST.clear();
  214. continue;
  215. }
  216. LoadInst *LI = dyn_cast<LoadInst>(&I);
  217. if (!LI)
  218. continue;
  219. ++NumLoadsAnalyzed;
  220. if (!LI->isSimple() || !LI->getType()->isIntegerTy())
  221. continue;
  222. auto POP = getPointerOffsetPair(*LI);
  223. if (!POP.Pointer)
  224. continue;
  225. LoadMap[POP.Pointer].push_back(LoadPOPPair(LI, POP, Index++));
  226. AST.add(LI);
  227. }
  228. if (combineLoads(LoadMap))
  229. Combined = true;
  230. return Combined;
  231. }
  232. void LoadCombine::getAnalysisUsage(AnalysisUsage &AU) const {
  233. AU.setPreservesCFG();
  234. AU.addRequired<AliasAnalysis>();
  235. AU.addPreserved<AliasAnalysis>();
  236. }
  237. char LoadCombine::ID = 0;
  238. BasicBlockPass *llvm::createLoadCombinePass() {
  239. return new LoadCombine();
  240. }
  241. INITIALIZE_PASS_BEGIN(LoadCombine, "load-combine", "Combine Adjacent Loads",
  242. false, false)
  243. INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
  244. INITIALIZE_PASS_END(LoadCombine, "load-combine", "Combine Adjacent Loads",
  245. false, false)