SampleProfile.cpp 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777
  1. //===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file implements the SampleProfileLoader transformation. This pass
  11. // reads a profile file generated by a sampling profiler (e.g. Linux Perf -
  12. // http://perf.wiki.kernel.org/) and generates IR metadata to reflect the
  13. // profile information in the given profile.
  14. //
  15. // This pass generates branch weight annotations on the IR:
  16. //
  17. // - prof: Represents branch weights. This annotation is added to branches
  18. // to indicate the weights of each edge coming out of the branch.
  19. // The weight of each edge is the weight of the target block for
  20. // that edge. The weight of a block B is computed as the maximum
  21. // number of samples found in B.
  22. //
  23. //===----------------------------------------------------------------------===//
  24. #include "llvm/Transforms/Scalar.h"
  25. #include "llvm/ADT/DenseMap.h"
  26. #include "llvm/ADT/SmallPtrSet.h"
  27. #include "llvm/ADT/SmallSet.h"
  28. #include "llvm/ADT/StringRef.h"
  29. #include "llvm/Analysis/LoopInfo.h"
  30. #include "llvm/Analysis/PostDominators.h"
  31. #include "llvm/IR/Constants.h"
  32. #include "llvm/IR/DebugInfo.h"
  33. #include "llvm/IR/DiagnosticInfo.h"
  34. #include "llvm/IR/Dominators.h"
  35. #include "llvm/IR/Function.h"
  36. #include "llvm/IR/InstIterator.h"
  37. #include "llvm/IR/Instructions.h"
  38. #include "llvm/IR/LLVMContext.h"
  39. #include "llvm/IR/MDBuilder.h"
  40. #include "llvm/IR/Metadata.h"
  41. #include "llvm/IR/Module.h"
  42. #include "llvm/Pass.h"
  43. #include "llvm/ProfileData/SampleProfReader.h"
  44. #include "llvm/Support/CommandLine.h"
  45. #include "llvm/Support/Debug.h"
  46. #include "llvm/Support/raw_ostream.h"
  47. #include <cctype>
  48. using namespace llvm;
  49. using namespace sampleprof;
  50. #define DEBUG_TYPE "sample-profile"
  51. // Command line option to specify the file to read samples from. This is
  52. // mainly used for debugging.
  53. static cl::opt<std::string> SampleProfileFile(
  54. "sample-profile-file", cl::init(""), cl::value_desc("filename"),
  55. cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
  56. static cl::opt<unsigned> SampleProfileMaxPropagateIterations(
  57. "sample-profile-max-propagate-iterations", cl::init(100),
  58. cl::desc("Maximum number of iterations to go through when propagating "
  59. "sample block/edge weights through the CFG."));
  60. namespace {
  61. typedef DenseMap<BasicBlock *, unsigned> BlockWeightMap;
  62. typedef DenseMap<BasicBlock *, BasicBlock *> EquivalenceClassMap;
  63. typedef std::pair<BasicBlock *, BasicBlock *> Edge;
  64. typedef DenseMap<Edge, unsigned> EdgeWeightMap;
  65. typedef DenseMap<BasicBlock *, SmallVector<BasicBlock *, 8>> BlockEdgeMap;
  66. /// \brief Sample profile pass.
  67. ///
  68. /// This pass reads profile data from the file specified by
  69. /// -sample-profile-file and annotates every affected function with the
  70. /// profile information found in that file.
  71. class SampleProfileLoader : public FunctionPass {
  72. public:
  73. // Class identification, replacement for typeinfo
  74. static char ID;
  75. SampleProfileLoader(StringRef Name = SampleProfileFile)
  76. : FunctionPass(ID), DT(nullptr), PDT(nullptr), LI(nullptr), Ctx(nullptr),
  77. Reader(), Samples(nullptr), Filename(Name), ProfileIsValid(false) {
  78. initializeSampleProfileLoaderPass(*PassRegistry::getPassRegistry());
  79. }
  80. bool doInitialization(Module &M) override;
  81. void dump() { Reader->dump(); }
  82. const char *getPassName() const override { return "Sample profile pass"; }
  83. bool runOnFunction(Function &F) override;
  84. void getAnalysisUsage(AnalysisUsage &AU) const override {
  85. AU.setPreservesCFG();
  86. AU.addRequired<LoopInfoWrapperPass>();
  87. AU.addRequired<DominatorTreeWrapperPass>();
  88. AU.addRequired<PostDominatorTree>();
  89. }
  90. protected:
  91. unsigned getFunctionLoc(Function &F);
  92. bool emitAnnotations(Function &F);
  93. unsigned getInstWeight(Instruction &I);
  94. unsigned getBlockWeight(BasicBlock *BB);
  95. void printEdgeWeight(raw_ostream &OS, Edge E);
  96. void printBlockWeight(raw_ostream &OS, BasicBlock *BB);
  97. void printBlockEquivalence(raw_ostream &OS, BasicBlock *BB);
  98. bool computeBlockWeights(Function &F);
  99. void findEquivalenceClasses(Function &F);
  100. void findEquivalencesFor(BasicBlock *BB1,
  101. SmallVector<BasicBlock *, 8> Descendants,
  102. DominatorTreeBase<BasicBlock> *DomTree);
  103. void propagateWeights(Function &F);
  104. unsigned visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
  105. void buildEdges(Function &F);
  106. bool propagateThroughEdges(Function &F);
  107. /// \brief Line number for the function header. Used to compute absolute
  108. /// line numbers from the relative line numbers found in the profile.
  109. unsigned HeaderLineno;
  110. /// \brief Map basic blocks to their computed weights.
  111. ///
  112. /// The weight of a basic block is defined to be the maximum
  113. /// of all the instruction weights in that block.
  114. BlockWeightMap BlockWeights;
  115. /// \brief Map edges to their computed weights.
  116. ///
  117. /// Edge weights are computed by propagating basic block weights in
  118. /// SampleProfile::propagateWeights.
  119. EdgeWeightMap EdgeWeights;
  120. /// \brief Set of visited blocks during propagation.
  121. SmallPtrSet<BasicBlock *, 128> VisitedBlocks;
  122. /// \brief Set of visited edges during propagation.
  123. SmallSet<Edge, 128> VisitedEdges;
  124. /// \brief Equivalence classes for block weights.
  125. ///
  126. /// Two blocks BB1 and BB2 are in the same equivalence class if they
  127. /// dominate and post-dominate each other, and they are in the same loop
  128. /// nest. When this happens, the two blocks are guaranteed to execute
  129. /// the same number of times.
  130. EquivalenceClassMap EquivalenceClass;
  131. /// \brief Dominance, post-dominance and loop information.
  132. DominatorTree *DT;
  133. PostDominatorTree *PDT;
  134. LoopInfo *LI;
  135. /// \brief Predecessors for each basic block in the CFG.
  136. BlockEdgeMap Predecessors;
  137. /// \brief Successors for each basic block in the CFG.
  138. BlockEdgeMap Successors;
  139. /// \brief LLVM context holding the debug data we need.
  140. LLVMContext *Ctx;
  141. /// \brief Profile reader object.
  142. std::unique_ptr<SampleProfileReader> Reader;
  143. /// \brief Samples collected for the body of this function.
  144. FunctionSamples *Samples;
  145. /// \brief Name of the profile file to load.
  146. StringRef Filename;
  147. /// \brief Flag indicating whether the profile input loaded successfully.
  148. bool ProfileIsValid;
  149. };
  150. }
  151. /// \brief Print the weight of edge \p E on stream \p OS.
  152. ///
  153. /// \param OS Stream to emit the output to.
  154. /// \param E Edge to print.
  155. void SampleProfileLoader::printEdgeWeight(raw_ostream &OS, Edge E) {
  156. OS << "weight[" << E.first->getName() << "->" << E.second->getName()
  157. << "]: " << EdgeWeights[E] << "\n";
  158. }
  159. /// \brief Print the equivalence class of block \p BB on stream \p OS.
  160. ///
  161. /// \param OS Stream to emit the output to.
  162. /// \param BB Block to print.
  163. void SampleProfileLoader::printBlockEquivalence(raw_ostream &OS,
  164. BasicBlock *BB) {
  165. BasicBlock *Equiv = EquivalenceClass[BB];
  166. OS << "equivalence[" << BB->getName()
  167. << "]: " << ((Equiv) ? EquivalenceClass[BB]->getName() : "NONE") << "\n";
  168. }
  169. /// \brief Print the weight of block \p BB on stream \p OS.
  170. ///
  171. /// \param OS Stream to emit the output to.
  172. /// \param BB Block to print.
  173. void SampleProfileLoader::printBlockWeight(raw_ostream &OS, BasicBlock *BB) {
  174. OS << "weight[" << BB->getName() << "]: " << BlockWeights[BB] << "\n";
  175. }
  176. /// \brief Get the weight for an instruction.
  177. ///
  178. /// The "weight" of an instruction \p Inst is the number of samples
  179. /// collected on that instruction at runtime. To retrieve it, we
  180. /// need to compute the line number of \p Inst relative to the start of its
  181. /// function. We use HeaderLineno to compute the offset. We then
  182. /// look up the samples collected for \p Inst using BodySamples.
  183. ///
  184. /// \param Inst Instruction to query.
  185. ///
  186. /// \returns The profiled weight of I.
  187. unsigned SampleProfileLoader::getInstWeight(Instruction &Inst) {
  188. DebugLoc DLoc = Inst.getDebugLoc();
  189. if (!DLoc)
  190. return 0;
  191. unsigned Lineno = DLoc.getLine();
  192. if (Lineno < HeaderLineno)
  193. return 0;
  194. const DILocation *DIL = DLoc;
  195. int LOffset = Lineno - HeaderLineno;
  196. unsigned Discriminator = DIL->getDiscriminator();
  197. unsigned Weight = Samples->samplesAt(LOffset, Discriminator);
  198. DEBUG(dbgs() << " " << Lineno << "." << Discriminator << ":" << Inst
  199. << " (line offset: " << LOffset << "." << Discriminator
  200. << " - weight: " << Weight << ")\n");
  201. return Weight;
  202. }
  203. /// \brief Compute the weight of a basic block.
  204. ///
  205. /// The weight of basic block \p BB is the maximum weight of all the
  206. /// instructions in BB. The weight of \p BB is computed and cached in
  207. /// the BlockWeights map.
  208. ///
  209. /// \param BB The basic block to query.
  210. ///
  211. /// \returns The computed weight of BB.
  212. unsigned SampleProfileLoader::getBlockWeight(BasicBlock *BB) {
  213. // If we've computed BB's weight before, return it.
  214. std::pair<BlockWeightMap::iterator, bool> Entry =
  215. BlockWeights.insert(std::make_pair(BB, 0));
  216. if (!Entry.second)
  217. return Entry.first->second;
  218. // Otherwise, compute and cache BB's weight.
  219. unsigned Weight = 0;
  220. for (auto &I : BB->getInstList()) {
  221. unsigned InstWeight = getInstWeight(I);
  222. if (InstWeight > Weight)
  223. Weight = InstWeight;
  224. }
  225. Entry.first->second = Weight;
  226. return Weight;
  227. }
  228. /// \brief Compute and store the weights of every basic block.
  229. ///
  230. /// This populates the BlockWeights map by computing
  231. /// the weights of every basic block in the CFG.
  232. ///
  233. /// \param F The function to query.
  234. bool SampleProfileLoader::computeBlockWeights(Function &F) {
  235. bool Changed = false;
  236. DEBUG(dbgs() << "Block weights\n");
  237. for (auto &BB : F) {
  238. unsigned Weight = getBlockWeight(&BB);
  239. Changed |= (Weight > 0);
  240. DEBUG(printBlockWeight(dbgs(), &BB));
  241. }
  242. return Changed;
  243. }
  244. /// \brief Find equivalence classes for the given block.
  245. ///
  246. /// This finds all the blocks that are guaranteed to execute the same
  247. /// number of times as \p BB1. To do this, it traverses all the
  248. /// descendants of \p BB1 in the dominator or post-dominator tree.
  249. ///
  250. /// A block BB2 will be in the same equivalence class as \p BB1 if
  251. /// the following holds:
  252. ///
  253. /// 1- \p BB1 is a descendant of BB2 in the opposite tree. So, if BB2
  254. /// is a descendant of \p BB1 in the dominator tree, then BB2 should
  255. /// dominate BB1 in the post-dominator tree.
  256. ///
  257. /// 2- Both BB2 and \p BB1 must be in the same loop.
  258. ///
  259. /// For every block BB2 that meets those two requirements, we set BB2's
  260. /// equivalence class to \p BB1.
  261. ///
  262. /// \param BB1 Block to check.
  263. /// \param Descendants Descendants of \p BB1 in either the dom or pdom tree.
  264. /// \param DomTree Opposite dominator tree. If \p Descendants is filled
  265. /// with blocks from \p BB1's dominator tree, then
  266. /// this is the post-dominator tree, and vice versa.
  267. void SampleProfileLoader::findEquivalencesFor(
  268. BasicBlock *BB1, SmallVector<BasicBlock *, 8> Descendants,
  269. DominatorTreeBase<BasicBlock> *DomTree) {
  270. for (auto *BB2 : Descendants) {
  271. bool IsDomParent = DomTree->dominates(BB2, BB1);
  272. bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2);
  273. if (BB1 != BB2 && VisitedBlocks.insert(BB2).second && IsDomParent &&
  274. IsInSameLoop) {
  275. EquivalenceClass[BB2] = BB1;
  276. // If BB2 is heavier than BB1, make BB2 have the same weight
  277. // as BB1.
  278. //
  279. // Note that we don't worry about the opposite situation here
  280. // (when BB2 is lighter than BB1). We will deal with this
  281. // during the propagation phase. Right now, we just want to
  282. // make sure that BB1 has the largest weight of all the
  283. // members of its equivalence set.
  284. unsigned &BB1Weight = BlockWeights[BB1];
  285. unsigned &BB2Weight = BlockWeights[BB2];
  286. BB1Weight = std::max(BB1Weight, BB2Weight);
  287. }
  288. }
  289. }
  290. /// \brief Find equivalence classes.
  291. ///
  292. /// Since samples may be missing from blocks, we can fill in the gaps by setting
  293. /// the weights of all the blocks in the same equivalence class to the same
  294. /// weight. To compute the concept of equivalence, we use dominance and loop
  295. /// information. Two blocks B1 and B2 are in the same equivalence class if B1
  296. /// dominates B2, B2 post-dominates B1 and both are in the same loop.
  297. ///
  298. /// \param F The function to query.
  299. void SampleProfileLoader::findEquivalenceClasses(Function &F) {
  300. SmallVector<BasicBlock *, 8> DominatedBBs;
  301. DEBUG(dbgs() << "\nBlock equivalence classes\n");
  302. // Find equivalence sets based on dominance and post-dominance information.
  303. for (auto &BB : F) {
  304. BasicBlock *BB1 = &BB;
  305. // Compute BB1's equivalence class once.
  306. if (EquivalenceClass.count(BB1)) {
  307. DEBUG(printBlockEquivalence(dbgs(), BB1));
  308. continue;
  309. }
  310. // By default, blocks are in their own equivalence class.
  311. EquivalenceClass[BB1] = BB1;
  312. // Traverse all the blocks dominated by BB1. We are looking for
  313. // every basic block BB2 such that:
  314. //
  315. // 1- BB1 dominates BB2.
  316. // 2- BB2 post-dominates BB1.
  317. // 3- BB1 and BB2 are in the same loop nest.
  318. //
  319. // If all those conditions hold, it means that BB2 is executed
  320. // as many times as BB1, so they are placed in the same equivalence
  321. // class by making BB2's equivalence class be BB1.
  322. DominatedBBs.clear();
  323. DT->getDescendants(BB1, DominatedBBs);
  324. findEquivalencesFor(BB1, DominatedBBs, PDT->DT);
  325. // Repeat the same logic for all the blocks post-dominated by BB1.
  326. // We are looking for every basic block BB2 such that:
  327. //
  328. // 1- BB1 post-dominates BB2.
  329. // 2- BB2 dominates BB1.
  330. // 3- BB1 and BB2 are in the same loop nest.
  331. //
  332. // If all those conditions hold, BB2's equivalence class is BB1.
  333. DominatedBBs.clear();
  334. PDT->getDescendants(BB1, DominatedBBs);
  335. findEquivalencesFor(BB1, DominatedBBs, DT);
  336. DEBUG(printBlockEquivalence(dbgs(), BB1));
  337. }
  338. // Assign weights to equivalence classes.
  339. //
  340. // All the basic blocks in the same equivalence class will execute
  341. // the same number of times. Since we know that the head block in
  342. // each equivalence class has the largest weight, assign that weight
  343. // to all the blocks in that equivalence class.
  344. DEBUG(dbgs() << "\nAssign the same weight to all blocks in the same class\n");
  345. for (auto &BI : F) {
  346. BasicBlock *BB = &BI;
  347. BasicBlock *EquivBB = EquivalenceClass[BB];
  348. if (BB != EquivBB)
  349. BlockWeights[BB] = BlockWeights[EquivBB];
  350. DEBUG(printBlockWeight(dbgs(), BB));
  351. }
  352. }
  353. /// \brief Visit the given edge to decide if it has a valid weight.
  354. ///
  355. /// If \p E has not been visited before, we copy to \p UnknownEdge
  356. /// and increment the count of unknown edges.
  357. ///
  358. /// \param E Edge to visit.
  359. /// \param NumUnknownEdges Current number of unknown edges.
  360. /// \param UnknownEdge Set if E has not been visited before.
  361. ///
  362. /// \returns E's weight, if known. Otherwise, return 0.
  363. unsigned SampleProfileLoader::visitEdge(Edge E, unsigned *NumUnknownEdges,
  364. Edge *UnknownEdge) {
  365. if (!VisitedEdges.count(E)) {
  366. (*NumUnknownEdges)++;
  367. *UnknownEdge = E;
  368. return 0;
  369. }
  370. return EdgeWeights[E];
  371. }
  372. /// \brief Propagate weights through incoming/outgoing edges.
  373. ///
  374. /// If the weight of a basic block is known, and there is only one edge
  375. /// with an unknown weight, we can calculate the weight of that edge.
  376. ///
  377. /// Similarly, if all the edges have a known count, we can calculate the
  378. /// count of the basic block, if needed.
  379. ///
  380. /// \param F Function to process.
  381. ///
  382. /// \returns True if new weights were assigned to edges or blocks.
  383. bool SampleProfileLoader::propagateThroughEdges(Function &F) {
  384. bool Changed = false;
  385. DEBUG(dbgs() << "\nPropagation through edges\n");
  386. for (auto &BI : F) {
  387. BasicBlock *BB = &BI;
  388. // Visit all the predecessor and successor edges to determine
  389. // which ones have a weight assigned already. Note that it doesn't
  390. // matter that we only keep track of a single unknown edge. The
  391. // only case we are interested in handling is when only a single
  392. // edge is unknown (see setEdgeOrBlockWeight).
  393. for (unsigned i = 0; i < 2; i++) {
  394. unsigned TotalWeight = 0;
  395. unsigned NumUnknownEdges = 0;
  396. Edge UnknownEdge, SelfReferentialEdge;
  397. if (i == 0) {
  398. // First, visit all predecessor edges.
  399. for (auto *Pred : Predecessors[BB]) {
  400. Edge E = std::make_pair(Pred, BB);
  401. TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
  402. if (E.first == E.second)
  403. SelfReferentialEdge = E;
  404. }
  405. } else {
  406. // On the second round, visit all successor edges.
  407. for (auto *Succ : Successors[BB]) {
  408. Edge E = std::make_pair(BB, Succ);
  409. TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
  410. }
  411. }
  412. // After visiting all the edges, there are three cases that we
  413. // can handle immediately:
  414. //
  415. // - All the edge weights are known (i.e., NumUnknownEdges == 0).
  416. // In this case, we simply check that the sum of all the edges
  417. // is the same as BB's weight. If not, we change BB's weight
  418. // to match. Additionally, if BB had not been visited before,
  419. // we mark it visited.
  420. //
  421. // - Only one edge is unknown and BB has already been visited.
  422. // In this case, we can compute the weight of the edge by
  423. // subtracting the total block weight from all the known
  424. // edge weights. If the edges weight more than BB, then the
  425. // edge of the last remaining edge is set to zero.
  426. //
  427. // - There exists a self-referential edge and the weight of BB is
  428. // known. In this case, this edge can be based on BB's weight.
  429. // We add up all the other known edges and set the weight on
  430. // the self-referential edge as we did in the previous case.
  431. //
  432. // In any other case, we must continue iterating. Eventually,
  433. // all edges will get a weight, or iteration will stop when
  434. // it reaches SampleProfileMaxPropagateIterations.
  435. if (NumUnknownEdges <= 1) {
  436. unsigned &BBWeight = BlockWeights[BB];
  437. if (NumUnknownEdges == 0) {
  438. // If we already know the weight of all edges, the weight of the
  439. // basic block can be computed. It should be no larger than the sum
  440. // of all edge weights.
  441. if (TotalWeight > BBWeight) {
  442. BBWeight = TotalWeight;
  443. Changed = true;
  444. DEBUG(dbgs() << "All edge weights for " << BB->getName()
  445. << " known. Set weight for block: ";
  446. printBlockWeight(dbgs(), BB););
  447. }
  448. if (VisitedBlocks.insert(BB).second)
  449. Changed = true;
  450. } else if (NumUnknownEdges == 1 && VisitedBlocks.count(BB)) {
  451. // If there is a single unknown edge and the block has been
  452. // visited, then we can compute E's weight.
  453. if (BBWeight >= TotalWeight)
  454. EdgeWeights[UnknownEdge] = BBWeight - TotalWeight;
  455. else
  456. EdgeWeights[UnknownEdge] = 0;
  457. VisitedEdges.insert(UnknownEdge);
  458. Changed = true;
  459. DEBUG(dbgs() << "Set weight for edge: ";
  460. printEdgeWeight(dbgs(), UnknownEdge));
  461. }
  462. } else if (SelfReferentialEdge.first && VisitedBlocks.count(BB)) {
  463. unsigned &BBWeight = BlockWeights[BB];
  464. // We have a self-referential edge and the weight of BB is known.
  465. if (BBWeight >= TotalWeight)
  466. EdgeWeights[SelfReferentialEdge] = BBWeight - TotalWeight;
  467. else
  468. EdgeWeights[SelfReferentialEdge] = 0;
  469. VisitedEdges.insert(SelfReferentialEdge);
  470. Changed = true;
  471. DEBUG(dbgs() << "Set self-referential edge weight to: ";
  472. printEdgeWeight(dbgs(), SelfReferentialEdge));
  473. }
  474. }
  475. }
  476. return Changed;
  477. }
  478. /// \brief Build in/out edge lists for each basic block in the CFG.
  479. ///
  480. /// We are interested in unique edges. If a block B1 has multiple
  481. /// edges to another block B2, we only add a single B1->B2 edge.
  482. void SampleProfileLoader::buildEdges(Function &F) {
  483. for (auto &BI : F) {
  484. BasicBlock *B1 = &BI;
  485. // Add predecessors for B1.
  486. SmallPtrSet<BasicBlock *, 16> Visited;
  487. if (!Predecessors[B1].empty())
  488. llvm_unreachable("Found a stale predecessors list in a basic block.");
  489. for (pred_iterator PI = pred_begin(B1), PE = pred_end(B1); PI != PE; ++PI) {
  490. BasicBlock *B2 = *PI;
  491. if (Visited.insert(B2).second)
  492. Predecessors[B1].push_back(B2);
  493. }
  494. // Add successors for B1.
  495. Visited.clear();
  496. if (!Successors[B1].empty())
  497. llvm_unreachable("Found a stale successors list in a basic block.");
  498. for (succ_iterator SI = succ_begin(B1), SE = succ_end(B1); SI != SE; ++SI) {
  499. BasicBlock *B2 = *SI;
  500. if (Visited.insert(B2).second)
  501. Successors[B1].push_back(B2);
  502. }
  503. }
  504. }
  505. /// \brief Propagate weights into edges
  506. ///
  507. /// The following rules are applied to every block BB in the CFG:
  508. ///
  509. /// - If BB has a single predecessor/successor, then the weight
  510. /// of that edge is the weight of the block.
  511. ///
  512. /// - If all incoming or outgoing edges are known except one, and the
  513. /// weight of the block is already known, the weight of the unknown
  514. /// edge will be the weight of the block minus the sum of all the known
  515. /// edges. If the sum of all the known edges is larger than BB's weight,
  516. /// we set the unknown edge weight to zero.
  517. ///
  518. /// - If there is a self-referential edge, and the weight of the block is
  519. /// known, the weight for that edge is set to the weight of the block
  520. /// minus the weight of the other incoming edges to that block (if
  521. /// known).
  522. void SampleProfileLoader::propagateWeights(Function &F) {
  523. bool Changed = true;
  524. unsigned i = 0;
  525. // Add an entry count to the function using the samples gathered
  526. // at the function entry.
  527. F.setEntryCount(Samples->getHeadSamples());
  528. // Before propagation starts, build, for each block, a list of
  529. // unique predecessors and successors. This is necessary to handle
  530. // identical edges in multiway branches. Since we visit all blocks and all
  531. // edges of the CFG, it is cleaner to build these lists once at the start
  532. // of the pass.
  533. buildEdges(F);
  534. // Propagate until we converge or we go past the iteration limit.
  535. while (Changed && i++ < SampleProfileMaxPropagateIterations) {
  536. Changed = propagateThroughEdges(F);
  537. }
  538. // Generate MD_prof metadata for every branch instruction using the
  539. // edge weights computed during propagation.
  540. DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n");
  541. MDBuilder MDB(F.getContext());
  542. for (auto &BI : F) {
  543. BasicBlock *BB = &BI;
  544. TerminatorInst *TI = BB->getTerminator();
  545. if (TI->getNumSuccessors() == 1)
  546. continue;
  547. if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
  548. continue;
  549. DEBUG(dbgs() << "\nGetting weights for branch at line "
  550. << TI->getDebugLoc().getLine() << ".\n");
  551. SmallVector<unsigned, 4> Weights;
  552. bool AllWeightsZero = true;
  553. for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
  554. BasicBlock *Succ = TI->getSuccessor(I);
  555. Edge E = std::make_pair(BB, Succ);
  556. unsigned Weight = EdgeWeights[E];
  557. DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E));
  558. Weights.push_back(Weight);
  559. if (Weight != 0)
  560. AllWeightsZero = false;
  561. }
  562. // Only set weights if there is at least one non-zero weight.
  563. // In any other case, let the analyzer set weights.
  564. if (!AllWeightsZero) {
  565. DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
  566. TI->setMetadata(llvm::LLVMContext::MD_prof,
  567. MDB.createBranchWeights(Weights));
  568. } else {
  569. DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
  570. }
  571. }
  572. }
  573. /// \brief Get the line number for the function header.
  574. ///
  575. /// This looks up function \p F in the current compilation unit and
  576. /// retrieves the line number where the function is defined. This is
  577. /// line 0 for all the samples read from the profile file. Every line
  578. /// number is relative to this line.
  579. ///
  580. /// \param F Function object to query.
  581. ///
  582. /// \returns the line number where \p F is defined. If it returns 0,
  583. /// it means that there is no debug information available for \p F.
  584. unsigned SampleProfileLoader::getFunctionLoc(Function &F) {
  585. if (DISubprogram *S = getDISubprogram(&F))
  586. return S->getLine();
  587. // If could not find the start of \p F, emit a diagnostic to inform the user
  588. // about the missed opportunity.
  589. F.getContext().diagnose(DiagnosticInfoSampleProfile(
  590. "No debug information found in function " + F.getName() +
  591. ": Function profile not used",
  592. DS_Warning));
  593. return 0;
  594. }
  595. /// \brief Generate branch weight metadata for all branches in \p F.
  596. ///
  597. /// Branch weights are computed out of instruction samples using a
  598. /// propagation heuristic. Propagation proceeds in 3 phases:
  599. ///
  600. /// 1- Assignment of block weights. All the basic blocks in the function
  601. /// are initial assigned the same weight as their most frequently
  602. /// executed instruction.
  603. ///
  604. /// 2- Creation of equivalence classes. Since samples may be missing from
  605. /// blocks, we can fill in the gaps by setting the weights of all the
  606. /// blocks in the same equivalence class to the same weight. To compute
  607. /// the concept of equivalence, we use dominance and loop information.
  608. /// Two blocks B1 and B2 are in the same equivalence class if B1
  609. /// dominates B2, B2 post-dominates B1 and both are in the same loop.
  610. ///
  611. /// 3- Propagation of block weights into edges. This uses a simple
  612. /// propagation heuristic. The following rules are applied to every
  613. /// block BB in the CFG:
  614. ///
  615. /// - If BB has a single predecessor/successor, then the weight
  616. /// of that edge is the weight of the block.
  617. ///
  618. /// - If all the edges are known except one, and the weight of the
  619. /// block is already known, the weight of the unknown edge will
  620. /// be the weight of the block minus the sum of all the known
  621. /// edges. If the sum of all the known edges is larger than BB's weight,
  622. /// we set the unknown edge weight to zero.
  623. ///
  624. /// - If there is a self-referential edge, and the weight of the block is
  625. /// known, the weight for that edge is set to the weight of the block
  626. /// minus the weight of the other incoming edges to that block (if
  627. /// known).
  628. ///
  629. /// Since this propagation is not guaranteed to finalize for every CFG, we
  630. /// only allow it to proceed for a limited number of iterations (controlled
  631. /// by -sample-profile-max-propagate-iterations).
  632. ///
  633. /// FIXME: Try to replace this propagation heuristic with a scheme
  634. /// that is guaranteed to finalize. A work-list approach similar to
  635. /// the standard value propagation algorithm used by SSA-CCP might
  636. /// work here.
  637. ///
  638. /// Once all the branch weights are computed, we emit the MD_prof
  639. /// metadata on BB using the computed values for each of its branches.
  640. ///
  641. /// \param F The function to query.
  642. ///
  643. /// \returns true if \p F was modified. Returns false, otherwise.
  644. bool SampleProfileLoader::emitAnnotations(Function &F) {
  645. bool Changed = false;
  646. // Initialize invariants used during computation and propagation.
  647. HeaderLineno = getFunctionLoc(F);
  648. if (HeaderLineno == 0)
  649. return false;
  650. DEBUG(dbgs() << "Line number for the first instruction in " << F.getName()
  651. << ": " << HeaderLineno << "\n");
  652. // Compute basic block weights.
  653. Changed |= computeBlockWeights(F);
  654. if (Changed) {
  655. // Find equivalence classes.
  656. findEquivalenceClasses(F);
  657. // Propagate weights to all edges.
  658. propagateWeights(F);
  659. }
  660. return Changed;
  661. }
  662. char SampleProfileLoader::ID = 0;
  663. INITIALIZE_PASS_BEGIN(SampleProfileLoader, "sample-profile",
  664. "Sample Profile loader", false, false)
  665. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  666. INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
  667. INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
  668. INITIALIZE_PASS_DEPENDENCY(AddDiscriminators)
  669. INITIALIZE_PASS_END(SampleProfileLoader, "sample-profile",
  670. "Sample Profile loader", false, false)
  671. bool SampleProfileLoader::doInitialization(Module &M) {
  672. auto ReaderOrErr = SampleProfileReader::create(Filename, M.getContext());
  673. if (std::error_code EC = ReaderOrErr.getError()) {
  674. std::string Msg = "Could not open profile: " + EC.message();
  675. M.getContext().diagnose(DiagnosticInfoSampleProfile(Filename.data(), Msg));
  676. return false;
  677. }
  678. Reader = std::move(ReaderOrErr.get());
  679. ProfileIsValid = (Reader->read() == sampleprof_error::success);
  680. return true;
  681. }
  682. FunctionPass *llvm::createSampleProfileLoaderPass() {
  683. return new SampleProfileLoader(SampleProfileFile);
  684. }
  685. FunctionPass *llvm::createSampleProfileLoaderPass(StringRef Name) {
  686. return new SampleProfileLoader(Name);
  687. }
  688. bool SampleProfileLoader::runOnFunction(Function &F) {
  689. if (!ProfileIsValid)
  690. return false;
  691. DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  692. PDT = &getAnalysis<PostDominatorTree>();
  693. LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
  694. Ctx = &F.getParent()->getContext();
  695. Samples = Reader->getSamplesFor(F);
  696. if (!Samples->empty())
  697. return emitAnnotations(F);
  698. return false;
  699. }