SampleProfile.cpp 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782
  1. //===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file implements the SampleProfileLoader transformation. This pass
  11. // reads a profile file generated by a sampling profiler (e.g. Linux Perf -
  12. // http://perf.wiki.kernel.org/) and generates IR metadata to reflect the
  13. // profile information in the given profile.
  14. //
  15. // This pass generates branch weight annotations on the IR:
  16. //
  17. // - prof: Represents branch weights. This annotation is added to branches
  18. // to indicate the weights of each edge coming out of the branch.
  19. // The weight of each edge is the weight of the target block for
  20. // that edge. The weight of a block B is computed as the maximum
  21. // number of samples found in B.
  22. //
  23. //===----------------------------------------------------------------------===//
  24. #include "llvm/Transforms/Scalar.h"
  25. #include "llvm/ADT/DenseMap.h"
  26. #include "llvm/ADT/SmallPtrSet.h"
  27. #include "llvm/ADT/SmallSet.h"
  28. #include "llvm/ADT/StringRef.h"
  29. #include "llvm/Analysis/LoopInfo.h"
  30. #include "llvm/Analysis/PostDominators.h"
  31. #include "llvm/IR/Constants.h"
  32. #include "llvm/IR/DebugInfo.h"
  33. #include "llvm/IR/DiagnosticInfo.h"
  34. #include "llvm/IR/Dominators.h"
  35. #include "llvm/IR/Function.h"
  36. #include "llvm/IR/InstIterator.h"
  37. #include "llvm/IR/Instructions.h"
  38. #include "llvm/IR/LLVMContext.h"
  39. #include "llvm/IR/MDBuilder.h"
  40. #include "llvm/IR/Metadata.h"
  41. #include "llvm/IR/Module.h"
  42. #include "llvm/Pass.h"
  43. #include "llvm/ProfileData/SampleProfReader.h"
  44. #include "llvm/Support/CommandLine.h"
  45. #include "llvm/Support/Debug.h"
  46. #include "llvm/Support/raw_ostream.h"
  47. #include <cctype>
  48. using namespace llvm;
  49. using namespace sampleprof;
  50. #define DEBUG_TYPE "sample-profile"
  51. #if 0 // HLSL Change Start
  52. // Command line option to specify the file to read samples from. This is
  53. // mainly used for debugging.
  54. static cl::opt<std::string> SampleProfileFile(
  55. "sample-profile-file", cl::init(""), cl::value_desc("filename"),
  56. cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
  57. static cl::opt<unsigned> SampleProfileMaxPropagateIterations(
  58. "sample-profile-max-propagate-iterations", cl::init(100),
  59. cl::desc("Maximum number of iterations to go through when propagating "
  60. "sample block/edge weights through the CFG."));
  61. #else
  62. static const char SampleProfileFile[] = "";
  63. static const unsigned SampleProfileMaxPropagateIterations = 100;
  64. #endif // HLSL Change Ends
  65. namespace {
  66. typedef DenseMap<BasicBlock *, unsigned> BlockWeightMap;
  67. typedef DenseMap<BasicBlock *, BasicBlock *> EquivalenceClassMap;
  68. typedef std::pair<BasicBlock *, BasicBlock *> Edge;
  69. typedef DenseMap<Edge, unsigned> EdgeWeightMap;
  70. typedef DenseMap<BasicBlock *, SmallVector<BasicBlock *, 8>> BlockEdgeMap;
  71. /// \brief Sample profile pass.
  72. ///
  73. /// This pass reads profile data from the file specified by
  74. /// -sample-profile-file and annotates every affected function with the
  75. /// profile information found in that file.
  76. class SampleProfileLoader : public FunctionPass {
  77. public:
  78. // Class identification, replacement for typeinfo
  79. static char ID;
  80. SampleProfileLoader(StringRef Name = SampleProfileFile)
  81. : FunctionPass(ID), DT(nullptr), PDT(nullptr), LI(nullptr), Ctx(nullptr),
  82. Reader(), Samples(nullptr), Filename(Name), ProfileIsValid(false) {
  83. initializeSampleProfileLoaderPass(*PassRegistry::getPassRegistry());
  84. }
  85. bool doInitialization(Module &M) override;
  86. void dump() { Reader->dump(); }
  87. const char *getPassName() const override { return "Sample profile pass"; }
  88. bool runOnFunction(Function &F) override;
  89. void getAnalysisUsage(AnalysisUsage &AU) const override {
  90. AU.setPreservesCFG();
  91. AU.addRequired<LoopInfoWrapperPass>();
  92. AU.addRequired<DominatorTreeWrapperPass>();
  93. AU.addRequired<PostDominatorTree>();
  94. }
  95. protected:
  96. unsigned getFunctionLoc(Function &F);
  97. bool emitAnnotations(Function &F);
  98. unsigned getInstWeight(Instruction &I);
  99. unsigned getBlockWeight(BasicBlock *BB);
  100. void printEdgeWeight(raw_ostream &OS, Edge E);
  101. void printBlockWeight(raw_ostream &OS, BasicBlock *BB);
  102. void printBlockEquivalence(raw_ostream &OS, BasicBlock *BB);
  103. bool computeBlockWeights(Function &F);
  104. void findEquivalenceClasses(Function &F);
  105. void findEquivalencesFor(BasicBlock *BB1,
  106. SmallVector<BasicBlock *, 8> Descendants,
  107. DominatorTreeBase<BasicBlock> *DomTree);
  108. void propagateWeights(Function &F);
  109. unsigned visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
  110. void buildEdges(Function &F);
  111. bool propagateThroughEdges(Function &F);
  112. /// \brief Line number for the function header. Used to compute absolute
  113. /// line numbers from the relative line numbers found in the profile.
  114. unsigned HeaderLineno;
  115. /// \brief Map basic blocks to their computed weights.
  116. ///
  117. /// The weight of a basic block is defined to be the maximum
  118. /// of all the instruction weights in that block.
  119. BlockWeightMap BlockWeights;
  120. /// \brief Map edges to their computed weights.
  121. ///
  122. /// Edge weights are computed by propagating basic block weights in
  123. /// SampleProfile::propagateWeights.
  124. EdgeWeightMap EdgeWeights;
  125. /// \brief Set of visited blocks during propagation.
  126. SmallPtrSet<BasicBlock *, 128> VisitedBlocks;
  127. /// \brief Set of visited edges during propagation.
  128. SmallSet<Edge, 128> VisitedEdges;
  129. /// \brief Equivalence classes for block weights.
  130. ///
  131. /// Two blocks BB1 and BB2 are in the same equivalence class if they
  132. /// dominate and post-dominate each other, and they are in the same loop
  133. /// nest. When this happens, the two blocks are guaranteed to execute
  134. /// the same number of times.
  135. EquivalenceClassMap EquivalenceClass;
  136. /// \brief Dominance, post-dominance and loop information.
  137. DominatorTree *DT;
  138. PostDominatorTree *PDT;
  139. LoopInfo *LI;
  140. /// \brief Predecessors for each basic block in the CFG.
  141. BlockEdgeMap Predecessors;
  142. /// \brief Successors for each basic block in the CFG.
  143. BlockEdgeMap Successors;
  144. /// \brief LLVM context holding the debug data we need.
  145. LLVMContext *Ctx;
  146. /// \brief Profile reader object.
  147. std::unique_ptr<SampleProfileReader> Reader;
  148. /// \brief Samples collected for the body of this function.
  149. FunctionSamples *Samples;
  150. /// \brief Name of the profile file to load.
  151. StringRef Filename;
  152. /// \brief Flag indicating whether the profile input loaded successfully.
  153. bool ProfileIsValid;
  154. };
  155. }
  156. /// \brief Print the weight of edge \p E on stream \p OS.
  157. ///
  158. /// \param OS Stream to emit the output to.
  159. /// \param E Edge to print.
  160. void SampleProfileLoader::printEdgeWeight(raw_ostream &OS, Edge E) {
  161. OS << "weight[" << E.first->getName() << "->" << E.second->getName()
  162. << "]: " << EdgeWeights[E] << "\n";
  163. }
  164. /// \brief Print the equivalence class of block \p BB on stream \p OS.
  165. ///
  166. /// \param OS Stream to emit the output to.
  167. /// \param BB Block to print.
  168. void SampleProfileLoader::printBlockEquivalence(raw_ostream &OS,
  169. BasicBlock *BB) {
  170. BasicBlock *Equiv = EquivalenceClass[BB];
  171. OS << "equivalence[" << BB->getName()
  172. << "]: " << ((Equiv) ? EquivalenceClass[BB]->getName() : "NONE") << "\n";
  173. }
  174. /// \brief Print the weight of block \p BB on stream \p OS.
  175. ///
  176. /// \param OS Stream to emit the output to.
  177. /// \param BB Block to print.
  178. void SampleProfileLoader::printBlockWeight(raw_ostream &OS, BasicBlock *BB) {
  179. OS << "weight[" << BB->getName() << "]: " << BlockWeights[BB] << "\n";
  180. }
  181. /// \brief Get the weight for an instruction.
  182. ///
  183. /// The "weight" of an instruction \p Inst is the number of samples
  184. /// collected on that instruction at runtime. To retrieve it, we
  185. /// need to compute the line number of \p Inst relative to the start of its
  186. /// function. We use HeaderLineno to compute the offset. We then
  187. /// look up the samples collected for \p Inst using BodySamples.
  188. ///
  189. /// \param Inst Instruction to query.
  190. ///
  191. /// \returns The profiled weight of I.
  192. unsigned SampleProfileLoader::getInstWeight(Instruction &Inst) {
  193. DebugLoc DLoc = Inst.getDebugLoc();
  194. if (!DLoc)
  195. return 0;
  196. unsigned Lineno = DLoc.getLine();
  197. if (Lineno < HeaderLineno)
  198. return 0;
  199. const DILocation *DIL = DLoc;
  200. int LOffset = Lineno - HeaderLineno;
  201. unsigned Discriminator = DIL->getDiscriminator();
  202. unsigned Weight = Samples->samplesAt(LOffset, Discriminator);
  203. DEBUG(dbgs() << " " << Lineno << "." << Discriminator << ":" << Inst
  204. << " (line offset: " << LOffset << "." << Discriminator
  205. << " - weight: " << Weight << ")\n");
  206. return Weight;
  207. }
  208. /// \brief Compute the weight of a basic block.
  209. ///
  210. /// The weight of basic block \p BB is the maximum weight of all the
  211. /// instructions in BB. The weight of \p BB is computed and cached in
  212. /// the BlockWeights map.
  213. ///
  214. /// \param BB The basic block to query.
  215. ///
  216. /// \returns The computed weight of BB.
  217. unsigned SampleProfileLoader::getBlockWeight(BasicBlock *BB) {
  218. // If we've computed BB's weight before, return it.
  219. std::pair<BlockWeightMap::iterator, bool> Entry =
  220. BlockWeights.insert(std::make_pair(BB, 0));
  221. if (!Entry.second)
  222. return Entry.first->second;
  223. // Otherwise, compute and cache BB's weight.
  224. unsigned Weight = 0;
  225. for (auto &I : BB->getInstList()) {
  226. unsigned InstWeight = getInstWeight(I);
  227. if (InstWeight > Weight)
  228. Weight = InstWeight;
  229. }
  230. Entry.first->second = Weight;
  231. return Weight;
  232. }
  233. /// \brief Compute and store the weights of every basic block.
  234. ///
  235. /// This populates the BlockWeights map by computing
  236. /// the weights of every basic block in the CFG.
  237. ///
  238. /// \param F The function to query.
  239. bool SampleProfileLoader::computeBlockWeights(Function &F) {
  240. bool Changed = false;
  241. DEBUG(dbgs() << "Block weights\n");
  242. for (auto &BB : F) {
  243. unsigned Weight = getBlockWeight(&BB);
  244. Changed |= (Weight > 0);
  245. DEBUG(printBlockWeight(dbgs(), &BB));
  246. }
  247. return Changed;
  248. }
  249. /// \brief Find equivalence classes for the given block.
  250. ///
  251. /// This finds all the blocks that are guaranteed to execute the same
  252. /// number of times as \p BB1. To do this, it traverses all the
  253. /// descendants of \p BB1 in the dominator or post-dominator tree.
  254. ///
  255. /// A block BB2 will be in the same equivalence class as \p BB1 if
  256. /// the following holds:
  257. ///
  258. /// 1- \p BB1 is a descendant of BB2 in the opposite tree. So, if BB2
  259. /// is a descendant of \p BB1 in the dominator tree, then BB2 should
  260. /// dominate BB1 in the post-dominator tree.
  261. ///
  262. /// 2- Both BB2 and \p BB1 must be in the same loop.
  263. ///
  264. /// For every block BB2 that meets those two requirements, we set BB2's
  265. /// equivalence class to \p BB1.
  266. ///
  267. /// \param BB1 Block to check.
  268. /// \param Descendants Descendants of \p BB1 in either the dom or pdom tree.
  269. /// \param DomTree Opposite dominator tree. If \p Descendants is filled
  270. /// with blocks from \p BB1's dominator tree, then
  271. /// this is the post-dominator tree, and vice versa.
  272. void SampleProfileLoader::findEquivalencesFor(
  273. BasicBlock *BB1, SmallVector<BasicBlock *, 8> Descendants,
  274. DominatorTreeBase<BasicBlock> *DomTree) {
  275. for (auto *BB2 : Descendants) {
  276. bool IsDomParent = DomTree->dominates(BB2, BB1);
  277. bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2);
  278. if (BB1 != BB2 && VisitedBlocks.insert(BB2).second && IsDomParent &&
  279. IsInSameLoop) {
  280. EquivalenceClass[BB2] = BB1;
  281. // If BB2 is heavier than BB1, make BB2 have the same weight
  282. // as BB1.
  283. //
  284. // Note that we don't worry about the opposite situation here
  285. // (when BB2 is lighter than BB1). We will deal with this
  286. // during the propagation phase. Right now, we just want to
  287. // make sure that BB1 has the largest weight of all the
  288. // members of its equivalence set.
  289. unsigned &BB1Weight = BlockWeights[BB1];
  290. unsigned &BB2Weight = BlockWeights[BB2];
  291. BB1Weight = std::max(BB1Weight, BB2Weight);
  292. }
  293. }
  294. }
  295. /// \brief Find equivalence classes.
  296. ///
  297. /// Since samples may be missing from blocks, we can fill in the gaps by setting
  298. /// the weights of all the blocks in the same equivalence class to the same
  299. /// weight. To compute the concept of equivalence, we use dominance and loop
  300. /// information. Two blocks B1 and B2 are in the same equivalence class if B1
  301. /// dominates B2, B2 post-dominates B1 and both are in the same loop.
  302. ///
  303. /// \param F The function to query.
  304. void SampleProfileLoader::findEquivalenceClasses(Function &F) {
  305. SmallVector<BasicBlock *, 8> DominatedBBs;
  306. DEBUG(dbgs() << "\nBlock equivalence classes\n");
  307. // Find equivalence sets based on dominance and post-dominance information.
  308. for (auto &BB : F) {
  309. BasicBlock *BB1 = &BB;
  310. // Compute BB1's equivalence class once.
  311. if (EquivalenceClass.count(BB1)) {
  312. DEBUG(printBlockEquivalence(dbgs(), BB1));
  313. continue;
  314. }
  315. // By default, blocks are in their own equivalence class.
  316. EquivalenceClass[BB1] = BB1;
  317. // Traverse all the blocks dominated by BB1. We are looking for
  318. // every basic block BB2 such that:
  319. //
  320. // 1- BB1 dominates BB2.
  321. // 2- BB2 post-dominates BB1.
  322. // 3- BB1 and BB2 are in the same loop nest.
  323. //
  324. // If all those conditions hold, it means that BB2 is executed
  325. // as many times as BB1, so they are placed in the same equivalence
  326. // class by making BB2's equivalence class be BB1.
  327. DominatedBBs.clear();
  328. DT->getDescendants(BB1, DominatedBBs);
  329. findEquivalencesFor(BB1, DominatedBBs, PDT->DT);
  330. // Repeat the same logic for all the blocks post-dominated by BB1.
  331. // We are looking for every basic block BB2 such that:
  332. //
  333. // 1- BB1 post-dominates BB2.
  334. // 2- BB2 dominates BB1.
  335. // 3- BB1 and BB2 are in the same loop nest.
  336. //
  337. // If all those conditions hold, BB2's equivalence class is BB1.
  338. DominatedBBs.clear();
  339. PDT->getDescendants(BB1, DominatedBBs);
  340. findEquivalencesFor(BB1, DominatedBBs, DT);
  341. DEBUG(printBlockEquivalence(dbgs(), BB1));
  342. }
  343. // Assign weights to equivalence classes.
  344. //
  345. // All the basic blocks in the same equivalence class will execute
  346. // the same number of times. Since we know that the head block in
  347. // each equivalence class has the largest weight, assign that weight
  348. // to all the blocks in that equivalence class.
  349. DEBUG(dbgs() << "\nAssign the same weight to all blocks in the same class\n");
  350. for (auto &BI : F) {
  351. BasicBlock *BB = &BI;
  352. BasicBlock *EquivBB = EquivalenceClass[BB];
  353. if (BB != EquivBB)
  354. BlockWeights[BB] = BlockWeights[EquivBB];
  355. DEBUG(printBlockWeight(dbgs(), BB));
  356. }
  357. }
  358. /// \brief Visit the given edge to decide if it has a valid weight.
  359. ///
  360. /// If \p E has not been visited before, we copy to \p UnknownEdge
  361. /// and increment the count of unknown edges.
  362. ///
  363. /// \param E Edge to visit.
  364. /// \param NumUnknownEdges Current number of unknown edges.
  365. /// \param UnknownEdge Set if E has not been visited before.
  366. ///
  367. /// \returns E's weight, if known. Otherwise, return 0.
  368. unsigned SampleProfileLoader::visitEdge(Edge E, unsigned *NumUnknownEdges,
  369. Edge *UnknownEdge) {
  370. if (!VisitedEdges.count(E)) {
  371. (*NumUnknownEdges)++;
  372. *UnknownEdge = E;
  373. return 0;
  374. }
  375. return EdgeWeights[E];
  376. }
  377. /// \brief Propagate weights through incoming/outgoing edges.
  378. ///
  379. /// If the weight of a basic block is known, and there is only one edge
  380. /// with an unknown weight, we can calculate the weight of that edge.
  381. ///
  382. /// Similarly, if all the edges have a known count, we can calculate the
  383. /// count of the basic block, if needed.
  384. ///
  385. /// \param F Function to process.
  386. ///
  387. /// \returns True if new weights were assigned to edges or blocks.
  388. bool SampleProfileLoader::propagateThroughEdges(Function &F) {
  389. bool Changed = false;
  390. DEBUG(dbgs() << "\nPropagation through edges\n");
  391. for (auto &BI : F) {
  392. BasicBlock *BB = &BI;
  393. // Visit all the predecessor and successor edges to determine
  394. // which ones have a weight assigned already. Note that it doesn't
  395. // matter that we only keep track of a single unknown edge. The
  396. // only case we are interested in handling is when only a single
  397. // edge is unknown (see setEdgeOrBlockWeight).
  398. for (unsigned i = 0; i < 2; i++) {
  399. unsigned TotalWeight = 0;
  400. unsigned NumUnknownEdges = 0;
  401. Edge UnknownEdge, SelfReferentialEdge;
  402. if (i == 0) {
  403. // First, visit all predecessor edges.
  404. for (auto *Pred : Predecessors[BB]) {
  405. Edge E = std::make_pair(Pred, BB);
  406. TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
  407. if (E.first == E.second)
  408. SelfReferentialEdge = E;
  409. }
  410. } else {
  411. // On the second round, visit all successor edges.
  412. for (auto *Succ : Successors[BB]) {
  413. Edge E = std::make_pair(BB, Succ);
  414. TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
  415. }
  416. }
  417. // After visiting all the edges, there are three cases that we
  418. // can handle immediately:
  419. //
  420. // - All the edge weights are known (i.e., NumUnknownEdges == 0).
  421. // In this case, we simply check that the sum of all the edges
  422. // is the same as BB's weight. If not, we change BB's weight
  423. // to match. Additionally, if BB had not been visited before,
  424. // we mark it visited.
  425. //
  426. // - Only one edge is unknown and BB has already been visited.
  427. // In this case, we can compute the weight of the edge by
  428. // subtracting the total block weight from all the known
  429. // edge weights. If the edges weight more than BB, then the
  430. // edge of the last remaining edge is set to zero.
  431. //
  432. // - There exists a self-referential edge and the weight of BB is
  433. // known. In this case, this edge can be based on BB's weight.
  434. // We add up all the other known edges and set the weight on
  435. // the self-referential edge as we did in the previous case.
  436. //
  437. // In any other case, we must continue iterating. Eventually,
  438. // all edges will get a weight, or iteration will stop when
  439. // it reaches SampleProfileMaxPropagateIterations.
  440. if (NumUnknownEdges <= 1) {
  441. unsigned &BBWeight = BlockWeights[BB];
  442. if (NumUnknownEdges == 0) {
  443. // If we already know the weight of all edges, the weight of the
  444. // basic block can be computed. It should be no larger than the sum
  445. // of all edge weights.
  446. if (TotalWeight > BBWeight) {
  447. BBWeight = TotalWeight;
  448. Changed = true;
  449. DEBUG(dbgs() << "All edge weights for " << BB->getName()
  450. << " known. Set weight for block: ";
  451. printBlockWeight(dbgs(), BB););
  452. }
  453. if (VisitedBlocks.insert(BB).second)
  454. Changed = true;
  455. } else if (NumUnknownEdges == 1 && VisitedBlocks.count(BB)) {
  456. // If there is a single unknown edge and the block has been
  457. // visited, then we can compute E's weight.
  458. if (BBWeight >= TotalWeight)
  459. EdgeWeights[UnknownEdge] = BBWeight - TotalWeight;
  460. else
  461. EdgeWeights[UnknownEdge] = 0;
  462. VisitedEdges.insert(UnknownEdge);
  463. Changed = true;
  464. DEBUG(dbgs() << "Set weight for edge: ";
  465. printEdgeWeight(dbgs(), UnknownEdge));
  466. }
  467. } else if (SelfReferentialEdge.first && VisitedBlocks.count(BB)) {
  468. unsigned &BBWeight = BlockWeights[BB];
  469. // We have a self-referential edge and the weight of BB is known.
  470. if (BBWeight >= TotalWeight)
  471. EdgeWeights[SelfReferentialEdge] = BBWeight - TotalWeight;
  472. else
  473. EdgeWeights[SelfReferentialEdge] = 0;
  474. VisitedEdges.insert(SelfReferentialEdge);
  475. Changed = true;
  476. DEBUG(dbgs() << "Set self-referential edge weight to: ";
  477. printEdgeWeight(dbgs(), SelfReferentialEdge));
  478. }
  479. }
  480. }
  481. return Changed;
  482. }
  483. /// \brief Build in/out edge lists for each basic block in the CFG.
  484. ///
  485. /// We are interested in unique edges. If a block B1 has multiple
  486. /// edges to another block B2, we only add a single B1->B2 edge.
  487. void SampleProfileLoader::buildEdges(Function &F) {
  488. for (auto &BI : F) {
  489. BasicBlock *B1 = &BI;
  490. // Add predecessors for B1.
  491. SmallPtrSet<BasicBlock *, 16> Visited;
  492. if (!Predecessors[B1].empty())
  493. llvm_unreachable("Found a stale predecessors list in a basic block.");
  494. for (pred_iterator PI = pred_begin(B1), PE = pred_end(B1); PI != PE; ++PI) {
  495. BasicBlock *B2 = *PI;
  496. if (Visited.insert(B2).second)
  497. Predecessors[B1].push_back(B2);
  498. }
  499. // Add successors for B1.
  500. Visited.clear();
  501. if (!Successors[B1].empty())
  502. llvm_unreachable("Found a stale successors list in a basic block.");
  503. for (succ_iterator SI = succ_begin(B1), SE = succ_end(B1); SI != SE; ++SI) {
  504. BasicBlock *B2 = *SI;
  505. if (Visited.insert(B2).second)
  506. Successors[B1].push_back(B2);
  507. }
  508. }
  509. }
  510. /// \brief Propagate weights into edges
  511. ///
  512. /// The following rules are applied to every block BB in the CFG:
  513. ///
  514. /// - If BB has a single predecessor/successor, then the weight
  515. /// of that edge is the weight of the block.
  516. ///
  517. /// - If all incoming or outgoing edges are known except one, and the
  518. /// weight of the block is already known, the weight of the unknown
  519. /// edge will be the weight of the block minus the sum of all the known
  520. /// edges. If the sum of all the known edges is larger than BB's weight,
  521. /// we set the unknown edge weight to zero.
  522. ///
  523. /// - If there is a self-referential edge, and the weight of the block is
  524. /// known, the weight for that edge is set to the weight of the block
  525. /// minus the weight of the other incoming edges to that block (if
  526. /// known).
  527. void SampleProfileLoader::propagateWeights(Function &F) {
  528. bool Changed = true;
  529. unsigned i = 0;
  530. // Add an entry count to the function using the samples gathered
  531. // at the function entry.
  532. F.setEntryCount(Samples->getHeadSamples());
  533. // Before propagation starts, build, for each block, a list of
  534. // unique predecessors and successors. This is necessary to handle
  535. // identical edges in multiway branches. Since we visit all blocks and all
  536. // edges of the CFG, it is cleaner to build these lists once at the start
  537. // of the pass.
  538. buildEdges(F);
  539. // Propagate until we converge or we go past the iteration limit.
  540. while (Changed && i++ < SampleProfileMaxPropagateIterations) {
  541. Changed = propagateThroughEdges(F);
  542. }
  543. // Generate MD_prof metadata for every branch instruction using the
  544. // edge weights computed during propagation.
  545. DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n");
  546. MDBuilder MDB(F.getContext());
  547. for (auto &BI : F) {
  548. BasicBlock *BB = &BI;
  549. TerminatorInst *TI = BB->getTerminator();
  550. if (TI->getNumSuccessors() == 1)
  551. continue;
  552. if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
  553. continue;
  554. DEBUG(dbgs() << "\nGetting weights for branch at line "
  555. << TI->getDebugLoc().getLine() << ".\n");
  556. SmallVector<unsigned, 4> Weights;
  557. bool AllWeightsZero = true;
  558. for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
  559. BasicBlock *Succ = TI->getSuccessor(I);
  560. Edge E = std::make_pair(BB, Succ);
  561. unsigned Weight = EdgeWeights[E];
  562. DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E));
  563. Weights.push_back(Weight);
  564. if (Weight != 0)
  565. AllWeightsZero = false;
  566. }
  567. // Only set weights if there is at least one non-zero weight.
  568. // In any other case, let the analyzer set weights.
  569. if (!AllWeightsZero) {
  570. DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
  571. TI->setMetadata(llvm::LLVMContext::MD_prof,
  572. MDB.createBranchWeights(Weights));
  573. } else {
  574. DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
  575. }
  576. }
  577. }
  578. /// \brief Get the line number for the function header.
  579. ///
  580. /// This looks up function \p F in the current compilation unit and
  581. /// retrieves the line number where the function is defined. This is
  582. /// line 0 for all the samples read from the profile file. Every line
  583. /// number is relative to this line.
  584. ///
  585. /// \param F Function object to query.
  586. ///
  587. /// \returns the line number where \p F is defined. If it returns 0,
  588. /// it means that there is no debug information available for \p F.
  589. unsigned SampleProfileLoader::getFunctionLoc(Function &F) {
  590. if (DISubprogram *S = getDISubprogram(&F))
  591. return S->getLine();
  592. // If could not find the start of \p F, emit a diagnostic to inform the user
  593. // about the missed opportunity.
  594. F.getContext().diagnose(DiagnosticInfoSampleProfile(
  595. "No debug information found in function " + F.getName() +
  596. ": Function profile not used",
  597. DS_Warning));
  598. return 0;
  599. }
  600. /// \brief Generate branch weight metadata for all branches in \p F.
  601. ///
  602. /// Branch weights are computed out of instruction samples using a
  603. /// propagation heuristic. Propagation proceeds in 3 phases:
  604. ///
  605. /// 1- Assignment of block weights. All the basic blocks in the function
  606. /// are initial assigned the same weight as their most frequently
  607. /// executed instruction.
  608. ///
  609. /// 2- Creation of equivalence classes. Since samples may be missing from
  610. /// blocks, we can fill in the gaps by setting the weights of all the
  611. /// blocks in the same equivalence class to the same weight. To compute
  612. /// the concept of equivalence, we use dominance and loop information.
  613. /// Two blocks B1 and B2 are in the same equivalence class if B1
  614. /// dominates B2, B2 post-dominates B1 and both are in the same loop.
  615. ///
  616. /// 3- Propagation of block weights into edges. This uses a simple
  617. /// propagation heuristic. The following rules are applied to every
  618. /// block BB in the CFG:
  619. ///
  620. /// - If BB has a single predecessor/successor, then the weight
  621. /// of that edge is the weight of the block.
  622. ///
  623. /// - If all the edges are known except one, and the weight of the
  624. /// block is already known, the weight of the unknown edge will
  625. /// be the weight of the block minus the sum of all the known
  626. /// edges. If the sum of all the known edges is larger than BB's weight,
  627. /// we set the unknown edge weight to zero.
  628. ///
  629. /// - If there is a self-referential edge, and the weight of the block is
  630. /// known, the weight for that edge is set to the weight of the block
  631. /// minus the weight of the other incoming edges to that block (if
  632. /// known).
  633. ///
  634. /// Since this propagation is not guaranteed to finalize for every CFG, we
  635. /// only allow it to proceed for a limited number of iterations (controlled
  636. /// by -sample-profile-max-propagate-iterations).
  637. ///
  638. /// FIXME: Try to replace this propagation heuristic with a scheme
  639. /// that is guaranteed to finalize. A work-list approach similar to
  640. /// the standard value propagation algorithm used by SSA-CCP might
  641. /// work here.
  642. ///
  643. /// Once all the branch weights are computed, we emit the MD_prof
  644. /// metadata on BB using the computed values for each of its branches.
  645. ///
  646. /// \param F The function to query.
  647. ///
  648. /// \returns true if \p F was modified. Returns false, otherwise.
  649. bool SampleProfileLoader::emitAnnotations(Function &F) {
  650. bool Changed = false;
  651. // Initialize invariants used during computation and propagation.
  652. HeaderLineno = getFunctionLoc(F);
  653. if (HeaderLineno == 0)
  654. return false;
  655. DEBUG(dbgs() << "Line number for the first instruction in " << F.getName()
  656. << ": " << HeaderLineno << "\n");
  657. // Compute basic block weights.
  658. Changed |= computeBlockWeights(F);
  659. if (Changed) {
  660. // Find equivalence classes.
  661. findEquivalenceClasses(F);
  662. // Propagate weights to all edges.
  663. propagateWeights(F);
  664. }
  665. return Changed;
  666. }
  667. char SampleProfileLoader::ID = 0;
  668. INITIALIZE_PASS_BEGIN(SampleProfileLoader, "sample-profile",
  669. "Sample Profile loader", false, false)
  670. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  671. INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
  672. INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
  673. INITIALIZE_PASS_DEPENDENCY(AddDiscriminators)
  674. INITIALIZE_PASS_END(SampleProfileLoader, "sample-profile",
  675. "Sample Profile loader", false, false)
  676. bool SampleProfileLoader::doInitialization(Module &M) {
  677. auto ReaderOrErr = SampleProfileReader::create(Filename, M.getContext());
  678. if (std::error_code EC = ReaderOrErr.getError()) {
  679. std::string Msg = "Could not open profile: " + EC.message();
  680. M.getContext().diagnose(DiagnosticInfoSampleProfile(Filename.data(), Msg));
  681. return false;
  682. }
  683. Reader = std::move(ReaderOrErr.get());
  684. ProfileIsValid = (Reader->read() == sampleprof_error::success);
  685. return true;
  686. }
  687. FunctionPass *llvm::createSampleProfileLoaderPass() {
  688. return new SampleProfileLoader(SampleProfileFile);
  689. }
  690. FunctionPass *llvm::createSampleProfileLoaderPass(StringRef Name) {
  691. return new SampleProfileLoader(Name);
  692. }
  693. bool SampleProfileLoader::runOnFunction(Function &F) {
  694. if (!ProfileIsValid)
  695. return false;
  696. DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  697. PDT = &getAnalysis<PostDominatorTree>();
  698. LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
  699. Ctx = &F.getParent()->getContext();
  700. Samples = Reader->getSamplesFor(F);
  701. if (!Samples->empty())
  702. return emitAnnotations(F);
  703. return false;
  704. }