MachineBlockPlacement.cpp 49 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227
  1. //===-- MachineBlockPlacement.cpp - Basic Block Code Layout optimization --===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file implements basic block placement transformations using the CFG
  11. // structure and branch probability estimates.
  12. //
  13. // The pass strives to preserve the structure of the CFG (that is, retain
  14. // a topological ordering of basic blocks) in the absence of a *strong* signal
  15. // to the contrary from probabilities. However, within the CFG structure, it
  16. // attempts to choose an ordering which favors placing more likely sequences of
  17. // blocks adjacent to each other.
  18. //
  19. // The algorithm works from the inner-most loop within a function outward, and
  20. // at each stage walks through the basic blocks, trying to coalesce them into
  21. // sequential chains where allowed by the CFG (or demanded by heavy
  22. // probabilities). Finally, it walks the blocks in topological order, and the
  23. // first time it reaches a chain of basic blocks, it schedules them in the
  24. // function in-order.
  25. //
  26. //===----------------------------------------------------------------------===//
  27. #include "llvm/CodeGen/Passes.h"
  28. #include "llvm/ADT/DenseMap.h"
  29. #include "llvm/ADT/SmallPtrSet.h"
  30. #include "llvm/ADT/SmallVector.h"
  31. #include "llvm/ADT/Statistic.h"
  32. #include "llvm/CodeGen/MachineBasicBlock.h"
  33. #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
  34. #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
  35. #include "llvm/CodeGen/MachineDominators.h"
  36. #include "llvm/CodeGen/MachineFunction.h"
  37. #include "llvm/CodeGen/MachineFunctionPass.h"
  38. #include "llvm/CodeGen/MachineLoopInfo.h"
  39. #include "llvm/CodeGen/MachineModuleInfo.h"
  40. #include "llvm/Support/Allocator.h"
  41. #include "llvm/Support/CommandLine.h"
  42. #include "llvm/Support/Debug.h"
  43. #include "llvm/Support/raw_ostream.h"
  44. #include "llvm/Target/TargetInstrInfo.h"
  45. #include "llvm/Target/TargetLowering.h"
  46. #include "llvm/Target/TargetSubtargetInfo.h"
  47. #include <algorithm>
  48. using namespace llvm;
  49. #define DEBUG_TYPE "block-placement"
  50. STATISTIC(NumCondBranches, "Number of conditional branches");
  51. STATISTIC(NumUncondBranches, "Number of uncondittional branches");
  52. STATISTIC(CondBranchTakenFreq,
  53. "Potential frequency of taking conditional branches");
  54. STATISTIC(UncondBranchTakenFreq,
  55. "Potential frequency of taking unconditional branches");
  56. static cl::opt<unsigned> AlignAllBlock("align-all-blocks",
  57. cl::desc("Force the alignment of all "
  58. "blocks in the function."),
  59. cl::init(0), cl::Hidden);
  60. // FIXME: Find a good default for this flag and remove the flag.
  61. static cl::opt<unsigned> ExitBlockBias(
  62. "block-placement-exit-block-bias",
  63. cl::desc("Block frequency percentage a loop exit block needs "
  64. "over the original exit to be considered the new exit."),
  65. cl::init(0), cl::Hidden);
  66. static cl::opt<bool> OutlineOptionalBranches(
  67. "outline-optional-branches",
  68. cl::desc("Put completely optional branches, i.e. branches with a common "
  69. "post dominator, out of line."),
  70. cl::init(false), cl::Hidden);
  71. static cl::opt<unsigned> OutlineOptionalThreshold(
  72. "outline-optional-threshold",
  73. cl::desc("Don't outline optional branches that are a single block with an "
  74. "instruction count below this threshold"),
  75. cl::init(4), cl::Hidden);
  76. namespace {
  77. class BlockChain;
  78. /// \brief Type for our function-wide basic block -> block chain mapping.
  79. typedef DenseMap<MachineBasicBlock *, BlockChain *> BlockToChainMapType;
  80. }
  81. namespace {
  82. /// \brief A chain of blocks which will be laid out contiguously.
  83. ///
  84. /// This is the datastructure representing a chain of consecutive blocks that
  85. /// are profitable to layout together in order to maximize fallthrough
  86. /// probabilities and code locality. We also can use a block chain to represent
  87. /// a sequence of basic blocks which have some external (correctness)
  88. /// requirement for sequential layout.
  89. ///
  90. /// Chains can be built around a single basic block and can be merged to grow
  91. /// them. They participate in a block-to-chain mapping, which is updated
  92. /// automatically as chains are merged together.
  93. class BlockChain {
  94. /// \brief The sequence of blocks belonging to this chain.
  95. ///
  96. /// This is the sequence of blocks for a particular chain. These will be laid
  97. /// out in-order within the function.
  98. SmallVector<MachineBasicBlock *, 4> Blocks;
  99. /// \brief A handle to the function-wide basic block to block chain mapping.
  100. ///
  101. /// This is retained in each block chain to simplify the computation of child
  102. /// block chains for SCC-formation and iteration. We store the edges to child
  103. /// basic blocks, and map them back to their associated chains using this
  104. /// structure.
  105. BlockToChainMapType &BlockToChain;
  106. public:
  107. /// \brief Construct a new BlockChain.
  108. ///
  109. /// This builds a new block chain representing a single basic block in the
  110. /// function. It also registers itself as the chain that block participates
  111. /// in with the BlockToChain mapping.
  112. BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB)
  113. : Blocks(1, BB), BlockToChain(BlockToChain), LoopPredecessors(0) {
  114. assert(BB && "Cannot create a chain with a null basic block");
  115. BlockToChain[BB] = this;
  116. }
  117. /// \brief Iterator over blocks within the chain.
  118. typedef SmallVectorImpl<MachineBasicBlock *>::iterator iterator;
  119. /// \brief Beginning of blocks within the chain.
  120. iterator begin() { return Blocks.begin(); }
  121. /// \brief End of blocks within the chain.
  122. iterator end() { return Blocks.end(); }
  123. /// \brief Merge a block chain into this one.
  124. ///
  125. /// This routine merges a block chain into this one. It takes care of forming
  126. /// a contiguous sequence of basic blocks, updating the edge list, and
  127. /// updating the block -> chain mapping. It does not free or tear down the
  128. /// old chain, but the old chain's block list is no longer valid.
  129. void merge(MachineBasicBlock *BB, BlockChain *Chain) {
  130. assert(BB);
  131. assert(!Blocks.empty());
  132. // Fast path in case we don't have a chain already.
  133. if (!Chain) {
  134. assert(!BlockToChain[BB]);
  135. Blocks.push_back(BB);
  136. BlockToChain[BB] = this;
  137. return;
  138. }
  139. assert(BB == *Chain->begin());
  140. assert(Chain->begin() != Chain->end());
  141. // Update the incoming blocks to point to this chain, and add them to the
  142. // chain structure.
  143. for (MachineBasicBlock *ChainBB : *Chain) {
  144. Blocks.push_back(ChainBB);
  145. assert(BlockToChain[ChainBB] == Chain && "Incoming blocks not in chain");
  146. BlockToChain[ChainBB] = this;
  147. }
  148. }
  149. #ifndef NDEBUG
  150. /// \brief Dump the blocks in this chain.
  151. LLVM_DUMP_METHOD void dump() {
  152. for (MachineBasicBlock *MBB : *this)
  153. MBB->dump();
  154. }
  155. #endif // NDEBUG
  156. /// \brief Count of predecessors within the loop currently being processed.
  157. ///
  158. /// This count is updated at each loop we process to represent the number of
  159. /// in-loop predecessors of this chain.
  160. unsigned LoopPredecessors;
  161. };
  162. }
  163. namespace {
  164. class MachineBlockPlacement : public MachineFunctionPass {
  165. /// \brief A typedef for a block filter set.
  166. typedef SmallPtrSet<MachineBasicBlock *, 16> BlockFilterSet;
  167. /// \brief A handle to the branch probability pass.
  168. const MachineBranchProbabilityInfo *MBPI;
  169. /// \brief A handle to the function-wide block frequency pass.
  170. const MachineBlockFrequencyInfo *MBFI;
  171. /// \brief A handle to the loop info.
  172. const MachineLoopInfo *MLI;
  173. /// \brief A handle to the target's instruction info.
  174. const TargetInstrInfo *TII;
  175. /// \brief A handle to the target's lowering info.
  176. const TargetLoweringBase *TLI;
  177. /// \brief A handle to the post dominator tree.
  178. MachineDominatorTree *MDT;
  179. /// \brief A set of blocks that are unavoidably execute, i.e. they dominate
  180. /// all terminators of the MachineFunction.
  181. SmallPtrSet<MachineBasicBlock *, 4> UnavoidableBlocks;
  182. /// \brief Allocator and owner of BlockChain structures.
  183. ///
  184. /// We build BlockChains lazily while processing the loop structure of
  185. /// a function. To reduce malloc traffic, we allocate them using this
  186. /// slab-like allocator, and destroy them after the pass completes. An
  187. /// important guarantee is that this allocator produces stable pointers to
  188. /// the chains.
  189. SpecificBumpPtrAllocator<BlockChain> ChainAllocator;
  190. /// \brief Function wide BasicBlock to BlockChain mapping.
  191. ///
  192. /// This mapping allows efficiently moving from any given basic block to the
  193. /// BlockChain it participates in, if any. We use it to, among other things,
  194. /// allow implicitly defining edges between chains as the existing edges
  195. /// between basic blocks.
  196. DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain;
  197. void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
  198. SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
  199. const BlockFilterSet *BlockFilter = nullptr);
  200. MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB,
  201. BlockChain &Chain,
  202. const BlockFilterSet *BlockFilter);
  203. MachineBasicBlock *
  204. selectBestCandidateBlock(BlockChain &Chain,
  205. SmallVectorImpl<MachineBasicBlock *> &WorkList,
  206. const BlockFilterSet *BlockFilter);
  207. MachineBasicBlock *
  208. getFirstUnplacedBlock(MachineFunction &F, const BlockChain &PlacedChain,
  209. MachineFunction::iterator &PrevUnplacedBlockIt,
  210. const BlockFilterSet *BlockFilter);
  211. void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
  212. SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
  213. const BlockFilterSet *BlockFilter = nullptr);
  214. MachineBasicBlock *findBestLoopTop(MachineLoop &L,
  215. const BlockFilterSet &LoopBlockSet);
  216. MachineBasicBlock *findBestLoopExit(MachineFunction &F, MachineLoop &L,
  217. const BlockFilterSet &LoopBlockSet);
  218. void buildLoopChains(MachineFunction &F, MachineLoop &L);
  219. void rotateLoop(BlockChain &LoopChain, MachineBasicBlock *ExitingBB,
  220. const BlockFilterSet &LoopBlockSet);
  221. void buildCFGChains(MachineFunction &F);
  222. public:
  223. static char ID; // Pass identification, replacement for typeid
  224. MachineBlockPlacement() : MachineFunctionPass(ID) {
  225. initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry());
  226. }
  227. bool runOnMachineFunction(MachineFunction &F) override;
  228. void getAnalysisUsage(AnalysisUsage &AU) const override {
  229. AU.addRequired<MachineBranchProbabilityInfo>();
  230. AU.addRequired<MachineBlockFrequencyInfo>();
  231. AU.addRequired<MachineDominatorTree>();
  232. AU.addRequired<MachineLoopInfo>();
  233. MachineFunctionPass::getAnalysisUsage(AU);
  234. }
  235. };
  236. }
  237. char MachineBlockPlacement::ID = 0;
  238. char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID;
  239. INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement",
  240. "Branch Probability Basic Block Placement", false, false)
  241. INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
  242. INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
  243. INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
  244. INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
  245. INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement",
  246. "Branch Probability Basic Block Placement", false, false)
  247. #ifndef NDEBUG
  248. /// \brief Helper to print the name of a MBB.
  249. ///
  250. /// Only used by debug logging.
  251. static std::string getBlockName(MachineBasicBlock *BB) {
  252. std::string Result;
  253. raw_string_ostream OS(Result);
  254. OS << "BB#" << BB->getNumber();
  255. OS << " (derived from LLVM BB '" << BB->getName() << "')";
  256. OS.flush();
  257. return Result;
  258. }
  259. /// \brief Helper to print the number of a MBB.
  260. ///
  261. /// Only used by debug logging.
  262. static std::string getBlockNum(MachineBasicBlock *BB) {
  263. std::string Result;
  264. raw_string_ostream OS(Result);
  265. OS << "BB#" << BB->getNumber();
  266. OS.flush();
  267. return Result;
  268. }
  269. #endif
  270. /// \brief Mark a chain's successors as having one fewer preds.
  271. ///
  272. /// When a chain is being merged into the "placed" chain, this routine will
  273. /// quickly walk the successors of each block in the chain and mark them as
  274. /// having one fewer active predecessor. It also adds any successors of this
  275. /// chain which reach the zero-predecessor state to the worklist passed in.
  276. void MachineBlockPlacement::markChainSuccessors(
  277. BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
  278. SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
  279. const BlockFilterSet *BlockFilter) {
  280. // Walk all the blocks in this chain, marking their successors as having
  281. // a predecessor placed.
  282. for (MachineBasicBlock *MBB : Chain) {
  283. // Add any successors for which this is the only un-placed in-loop
  284. // predecessor to the worklist as a viable candidate for CFG-neutral
  285. // placement. No subsequent placement of this block will violate the CFG
  286. // shape, so we get to use heuristics to choose a favorable placement.
  287. for (MachineBasicBlock *Succ : MBB->successors()) {
  288. if (BlockFilter && !BlockFilter->count(Succ))
  289. continue;
  290. BlockChain &SuccChain = *BlockToChain[Succ];
  291. // Disregard edges within a fixed chain, or edges to the loop header.
  292. if (&Chain == &SuccChain || Succ == LoopHeaderBB)
  293. continue;
  294. // This is a cross-chain edge that is within the loop, so decrement the
  295. // loop predecessor count of the destination chain.
  296. if (SuccChain.LoopPredecessors > 0 && --SuccChain.LoopPredecessors == 0)
  297. BlockWorkList.push_back(*SuccChain.begin());
  298. }
  299. }
  300. }
  301. /// \brief Select the best successor for a block.
  302. ///
  303. /// This looks across all successors of a particular block and attempts to
  304. /// select the "best" one to be the layout successor. It only considers direct
  305. /// successors which also pass the block filter. It will attempt to avoid
  306. /// breaking CFG structure, but cave and break such structures in the case of
  307. /// very hot successor edges.
  308. ///
  309. /// \returns The best successor block found, or null if none are viable.
  310. MachineBasicBlock *
  311. MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
  312. BlockChain &Chain,
  313. const BlockFilterSet *BlockFilter) {
  314. const BranchProbability HotProb(4, 5); // 80%
  315. MachineBasicBlock *BestSucc = nullptr;
  316. // FIXME: Due to the performance of the probability and weight routines in
  317. // the MBPI analysis, we manually compute probabilities using the edge
  318. // weights. This is suboptimal as it means that the somewhat subtle
  319. // definition of edge weight semantics is encoded here as well. We should
  320. // improve the MBPI interface to efficiently support query patterns such as
  321. // this.
  322. uint32_t BestWeight = 0;
  323. uint32_t WeightScale = 0;
  324. uint32_t SumWeight = MBPI->getSumForBlock(BB, WeightScale);
  325. DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
  326. for (MachineBasicBlock *Succ : BB->successors()) {
  327. if (BlockFilter && !BlockFilter->count(Succ))
  328. continue;
  329. BlockChain &SuccChain = *BlockToChain[Succ];
  330. if (&SuccChain == &Chain) {
  331. DEBUG(dbgs() << " " << getBlockName(Succ) << " -> Already merged!\n");
  332. continue;
  333. }
  334. if (Succ != *SuccChain.begin()) {
  335. DEBUG(dbgs() << " " << getBlockName(Succ) << " -> Mid chain!\n");
  336. continue;
  337. }
  338. uint32_t SuccWeight = MBPI->getEdgeWeight(BB, Succ);
  339. BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
  340. // If we outline optional branches, look whether Succ is unavoidable, i.e.
  341. // dominates all terminators of the MachineFunction. If it does, other
  342. // successors must be optional. Don't do this for cold branches.
  343. if (OutlineOptionalBranches && SuccProb > HotProb.getCompl() &&
  344. UnavoidableBlocks.count(Succ) > 0) {
  345. auto HasShortOptionalBranch = [&]() {
  346. for (MachineBasicBlock *Pred : Succ->predecessors()) {
  347. // Check whether there is an unplaced optional branch.
  348. if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) ||
  349. BlockToChain[Pred] == &Chain)
  350. continue;
  351. // Check whether the optional branch has exactly one BB.
  352. if (Pred->pred_size() > 1 || *Pred->pred_begin() != BB)
  353. continue;
  354. // Check whether the optional branch is small.
  355. if (Pred->size() < OutlineOptionalThreshold)
  356. return true;
  357. }
  358. return false;
  359. };
  360. if (!HasShortOptionalBranch())
  361. return Succ;
  362. }
  363. // Only consider successors which are either "hot", or wouldn't violate
  364. // any CFG constraints.
  365. if (SuccChain.LoopPredecessors != 0) {
  366. if (SuccProb < HotProb) {
  367. DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb
  368. << " (prob) (CFG conflict)\n");
  369. continue;
  370. }
  371. // Make sure that a hot successor doesn't have a globally more
  372. // important predecessor.
  373. BlockFrequency CandidateEdgeFreq =
  374. MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl();
  375. bool BadCFGConflict = false;
  376. for (MachineBasicBlock *Pred : Succ->predecessors()) {
  377. if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) ||
  378. BlockToChain[Pred] == &Chain)
  379. continue;
  380. BlockFrequency PredEdgeFreq =
  381. MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, Succ);
  382. if (PredEdgeFreq >= CandidateEdgeFreq) {
  383. BadCFGConflict = true;
  384. break;
  385. }
  386. }
  387. if (BadCFGConflict) {
  388. DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb
  389. << " (prob) (non-cold CFG conflict)\n");
  390. continue;
  391. }
  392. }
  393. DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb
  394. << " (prob)"
  395. << (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "")
  396. << "\n");
  397. if (BestSucc && BestWeight >= SuccWeight)
  398. continue;
  399. BestSucc = Succ;
  400. BestWeight = SuccWeight;
  401. }
  402. return BestSucc;
  403. }
  404. /// \brief Select the best block from a worklist.
  405. ///
  406. /// This looks through the provided worklist as a list of candidate basic
  407. /// blocks and select the most profitable one to place. The definition of
  408. /// profitable only really makes sense in the context of a loop. This returns
  409. /// the most frequently visited block in the worklist, which in the case of
  410. /// a loop, is the one most desirable to be physically close to the rest of the
  411. /// loop body in order to improve icache behavior.
  412. ///
  413. /// \returns The best block found, or null if none are viable.
  414. MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
  415. BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList,
  416. const BlockFilterSet *BlockFilter) {
  417. // Once we need to walk the worklist looking for a candidate, cleanup the
  418. // worklist of already placed entries.
  419. // FIXME: If this shows up on profiles, it could be folded (at the cost of
  420. // some code complexity) into the loop below.
  421. WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(),
  422. [&](MachineBasicBlock *BB) {
  423. return BlockToChain.lookup(BB) == &Chain;
  424. }),
  425. WorkList.end());
  426. MachineBasicBlock *BestBlock = nullptr;
  427. BlockFrequency BestFreq;
  428. for (MachineBasicBlock *MBB : WorkList) {
  429. BlockChain &SuccChain = *BlockToChain[MBB];
  430. if (&SuccChain == &Chain) {
  431. DEBUG(dbgs() << " " << getBlockName(MBB) << " -> Already merged!\n");
  432. continue;
  433. }
  434. assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block");
  435. BlockFrequency CandidateFreq = MBFI->getBlockFreq(MBB);
  436. DEBUG(dbgs() << " " << getBlockName(MBB) << " -> ";
  437. MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n");
  438. if (BestBlock && BestFreq >= CandidateFreq)
  439. continue;
  440. BestBlock = MBB;
  441. BestFreq = CandidateFreq;
  442. }
  443. return BestBlock;
  444. }
  445. /// \brief Retrieve the first unplaced basic block.
  446. ///
  447. /// This routine is called when we are unable to use the CFG to walk through
  448. /// all of the basic blocks and form a chain due to unnatural loops in the CFG.
  449. /// We walk through the function's blocks in order, starting from the
  450. /// LastUnplacedBlockIt. We update this iterator on each call to avoid
  451. /// re-scanning the entire sequence on repeated calls to this routine.
  452. MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
  453. MachineFunction &F, const BlockChain &PlacedChain,
  454. MachineFunction::iterator &PrevUnplacedBlockIt,
  455. const BlockFilterSet *BlockFilter) {
  456. for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F.end(); I != E;
  457. ++I) {
  458. if (BlockFilter && !BlockFilter->count(I))
  459. continue;
  460. if (BlockToChain[I] != &PlacedChain) {
  461. PrevUnplacedBlockIt = I;
  462. // Now select the head of the chain to which the unplaced block belongs
  463. // as the block to place. This will force the entire chain to be placed,
  464. // and satisfies the requirements of merging chains.
  465. return *BlockToChain[I]->begin();
  466. }
  467. }
  468. return nullptr;
  469. }
  470. void MachineBlockPlacement::buildChain(
  471. MachineBasicBlock *BB, BlockChain &Chain,
  472. SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
  473. const BlockFilterSet *BlockFilter) {
  474. assert(BB);
  475. assert(BlockToChain[BB] == &Chain);
  476. MachineFunction &F = *BB->getParent();
  477. MachineFunction::iterator PrevUnplacedBlockIt = F.begin();
  478. MachineBasicBlock *LoopHeaderBB = BB;
  479. markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter);
  480. BB = *std::prev(Chain.end());
  481. for (;;) {
  482. assert(BB);
  483. assert(BlockToChain[BB] == &Chain);
  484. assert(*std::prev(Chain.end()) == BB);
  485. // Look for the best viable successor if there is one to place immediately
  486. // after this block.
  487. MachineBasicBlock *BestSucc = selectBestSuccessor(BB, Chain, BlockFilter);
  488. // If an immediate successor isn't available, look for the best viable
  489. // block among those we've identified as not violating the loop's CFG at
  490. // this point. This won't be a fallthrough, but it will increase locality.
  491. if (!BestSucc)
  492. BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter);
  493. if (!BestSucc) {
  494. BestSucc =
  495. getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt, BlockFilter);
  496. if (!BestSucc)
  497. break;
  498. DEBUG(dbgs() << "Unnatural loop CFG detected, forcibly merging the "
  499. "layout successor until the CFG reduces\n");
  500. }
  501. // Place this block, updating the datastructures to reflect its placement.
  502. BlockChain &SuccChain = *BlockToChain[BestSucc];
  503. // Zero out LoopPredecessors for the successor we're about to merge in case
  504. // we selected a successor that didn't fit naturally into the CFG.
  505. SuccChain.LoopPredecessors = 0;
  506. DEBUG(dbgs() << "Merging from " << getBlockNum(BB) << " to "
  507. << getBlockNum(BestSucc) << "\n");
  508. markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter);
  509. Chain.merge(BestSucc, &SuccChain);
  510. BB = *std::prev(Chain.end());
  511. }
  512. DEBUG(dbgs() << "Finished forming chain for header block "
  513. << getBlockNum(*Chain.begin()) << "\n");
  514. }
  515. /// \brief Find the best loop top block for layout.
  516. ///
  517. /// Look for a block which is strictly better than the loop header for laying
  518. /// out at the top of the loop. This looks for one and only one pattern:
  519. /// a latch block with no conditional exit. This block will cause a conditional
  520. /// jump around it or will be the bottom of the loop if we lay it out in place,
  521. /// but if it it doesn't end up at the bottom of the loop for any reason,
  522. /// rotation alone won't fix it. Because such a block will always result in an
  523. /// unconditional jump (for the backedge) rotating it in front of the loop
  524. /// header is always profitable.
  525. MachineBasicBlock *
  526. MachineBlockPlacement::findBestLoopTop(MachineLoop &L,
  527. const BlockFilterSet &LoopBlockSet) {
  528. // Check that the header hasn't been fused with a preheader block due to
  529. // crazy branches. If it has, we need to start with the header at the top to
  530. // prevent pulling the preheader into the loop body.
  531. BlockChain &HeaderChain = *BlockToChain[L.getHeader()];
  532. if (!LoopBlockSet.count(*HeaderChain.begin()))
  533. return L.getHeader();
  534. DEBUG(dbgs() << "Finding best loop top for: " << getBlockName(L.getHeader())
  535. << "\n");
  536. BlockFrequency BestPredFreq;
  537. MachineBasicBlock *BestPred = nullptr;
  538. for (MachineBasicBlock *Pred : L.getHeader()->predecessors()) {
  539. if (!LoopBlockSet.count(Pred))
  540. continue;
  541. DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", "
  542. << Pred->succ_size() << " successors, ";
  543. MBFI->printBlockFreq(dbgs(), Pred) << " freq\n");
  544. if (Pred->succ_size() > 1)
  545. continue;
  546. BlockFrequency PredFreq = MBFI->getBlockFreq(Pred);
  547. if (!BestPred || PredFreq > BestPredFreq ||
  548. (!(PredFreq < BestPredFreq) &&
  549. Pred->isLayoutSuccessor(L.getHeader()))) {
  550. BestPred = Pred;
  551. BestPredFreq = PredFreq;
  552. }
  553. }
  554. // If no direct predecessor is fine, just use the loop header.
  555. if (!BestPred)
  556. return L.getHeader();
  557. // Walk backwards through any straight line of predecessors.
  558. while (BestPred->pred_size() == 1 &&
  559. (*BestPred->pred_begin())->succ_size() == 1 &&
  560. *BestPred->pred_begin() != L.getHeader())
  561. BestPred = *BestPred->pred_begin();
  562. DEBUG(dbgs() << " final top: " << getBlockName(BestPred) << "\n");
  563. return BestPred;
  564. }
  565. /// \brief Find the best loop exiting block for layout.
  566. ///
  567. /// This routine implements the logic to analyze the loop looking for the best
  568. /// block to layout at the top of the loop. Typically this is done to maximize
  569. /// fallthrough opportunities.
  570. MachineBasicBlock *
  571. MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L,
  572. const BlockFilterSet &LoopBlockSet) {
  573. // We don't want to layout the loop linearly in all cases. If the loop header
  574. // is just a normal basic block in the loop, we want to look for what block
  575. // within the loop is the best one to layout at the top. However, if the loop
  576. // header has be pre-merged into a chain due to predecessors not having
  577. // analyzable branches, *and* the predecessor it is merged with is *not* part
  578. // of the loop, rotating the header into the middle of the loop will create
  579. // a non-contiguous range of blocks which is Very Bad. So start with the
  580. // header and only rotate if safe.
  581. BlockChain &HeaderChain = *BlockToChain[L.getHeader()];
  582. if (!LoopBlockSet.count(*HeaderChain.begin()))
  583. return nullptr;
  584. BlockFrequency BestExitEdgeFreq;
  585. unsigned BestExitLoopDepth = 0;
  586. MachineBasicBlock *ExitingBB = nullptr;
  587. // If there are exits to outer loops, loop rotation can severely limit
  588. // fallthrough opportunites unless it selects such an exit. Keep a set of
  589. // blocks where rotating to exit with that block will reach an outer loop.
  590. SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop;
  591. DEBUG(dbgs() << "Finding best loop exit for: " << getBlockName(L.getHeader())
  592. << "\n");
  593. for (MachineBasicBlock *MBB : L.getBlocks()) {
  594. BlockChain &Chain = *BlockToChain[MBB];
  595. // Ensure that this block is at the end of a chain; otherwise it could be
  596. // mid-way through an inner loop or a successor of an unanalyzable branch.
  597. if (MBB != *std::prev(Chain.end()))
  598. continue;
  599. // Now walk the successors. We need to establish whether this has a viable
  600. // exiting successor and whether it has a viable non-exiting successor.
  601. // We store the old exiting state and restore it if a viable looping
  602. // successor isn't found.
  603. MachineBasicBlock *OldExitingBB = ExitingBB;
  604. BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq;
  605. bool HasLoopingSucc = false;
  606. // FIXME: Due to the performance of the probability and weight routines in
  607. // the MBPI analysis, we use the internal weights and manually compute the
  608. // probabilities to avoid quadratic behavior.
  609. uint32_t WeightScale = 0;
  610. uint32_t SumWeight = MBPI->getSumForBlock(MBB, WeightScale);
  611. for (MachineBasicBlock *Succ : MBB->successors()) {
  612. if (Succ->isLandingPad())
  613. continue;
  614. if (Succ == MBB)
  615. continue;
  616. BlockChain &SuccChain = *BlockToChain[Succ];
  617. // Don't split chains, either this chain or the successor's chain.
  618. if (&Chain == &SuccChain) {
  619. DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> "
  620. << getBlockName(Succ) << " (chain conflict)\n");
  621. continue;
  622. }
  623. uint32_t SuccWeight = MBPI->getEdgeWeight(MBB, Succ);
  624. if (LoopBlockSet.count(Succ)) {
  625. DEBUG(dbgs() << " looping: " << getBlockName(MBB) << " -> "
  626. << getBlockName(Succ) << " (" << SuccWeight << ")\n");
  627. HasLoopingSucc = true;
  628. continue;
  629. }
  630. unsigned SuccLoopDepth = 0;
  631. if (MachineLoop *ExitLoop = MLI->getLoopFor(Succ)) {
  632. SuccLoopDepth = ExitLoop->getLoopDepth();
  633. if (ExitLoop->contains(&L))
  634. BlocksExitingToOuterLoop.insert(MBB);
  635. }
  636. BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
  637. BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb;
  638. DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> "
  639. << getBlockName(Succ) << " [L:" << SuccLoopDepth << "] (";
  640. MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n");
  641. // Note that we bias this toward an existing layout successor to retain
  642. // incoming order in the absence of better information. The exit must have
  643. // a frequency higher than the current exit before we consider breaking
  644. // the layout.
  645. BranchProbability Bias(100 - ExitBlockBias, 100);
  646. if (!ExitingBB || SuccLoopDepth > BestExitLoopDepth ||
  647. ExitEdgeFreq > BestExitEdgeFreq ||
  648. (MBB->isLayoutSuccessor(Succ) &&
  649. !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) {
  650. BestExitEdgeFreq = ExitEdgeFreq;
  651. ExitingBB = MBB;
  652. }
  653. }
  654. if (!HasLoopingSucc) {
  655. // Restore the old exiting state, no viable looping successor was found.
  656. ExitingBB = OldExitingBB;
  657. BestExitEdgeFreq = OldBestExitEdgeFreq;
  658. continue;
  659. }
  660. }
  661. // Without a candidate exiting block or with only a single block in the
  662. // loop, just use the loop header to layout the loop.
  663. if (!ExitingBB || L.getNumBlocks() == 1)
  664. return nullptr;
  665. // Also, if we have exit blocks which lead to outer loops but didn't select
  666. // one of them as the exiting block we are rotating toward, disable loop
  667. // rotation altogether.
  668. if (!BlocksExitingToOuterLoop.empty() &&
  669. !BlocksExitingToOuterLoop.count(ExitingBB))
  670. return nullptr;
  671. DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB) << "\n");
  672. return ExitingBB;
  673. }
  674. /// \brief Attempt to rotate an exiting block to the bottom of the loop.
  675. ///
  676. /// Once we have built a chain, try to rotate it to line up the hot exit block
  677. /// with fallthrough out of the loop if doing so doesn't introduce unnecessary
  678. /// branches. For example, if the loop has fallthrough into its header and out
  679. /// of its bottom already, don't rotate it.
  680. void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
  681. MachineBasicBlock *ExitingBB,
  682. const BlockFilterSet &LoopBlockSet) {
  683. if (!ExitingBB)
  684. return;
  685. MachineBasicBlock *Top = *LoopChain.begin();
  686. bool ViableTopFallthrough = false;
  687. for (MachineBasicBlock *Pred : Top->predecessors()) {
  688. BlockChain *PredChain = BlockToChain[Pred];
  689. if (!LoopBlockSet.count(Pred) &&
  690. (!PredChain || Pred == *std::prev(PredChain->end()))) {
  691. ViableTopFallthrough = true;
  692. break;
  693. }
  694. }
  695. // If the header has viable fallthrough, check whether the current loop
  696. // bottom is a viable exiting block. If so, bail out as rotating will
  697. // introduce an unnecessary branch.
  698. if (ViableTopFallthrough) {
  699. MachineBasicBlock *Bottom = *std::prev(LoopChain.end());
  700. for (MachineBasicBlock *Succ : Bottom->successors()) {
  701. BlockChain *SuccChain = BlockToChain[Succ];
  702. if (!LoopBlockSet.count(Succ) &&
  703. (!SuccChain || Succ == *SuccChain->begin()))
  704. return;
  705. }
  706. }
  707. BlockChain::iterator ExitIt =
  708. std::find(LoopChain.begin(), LoopChain.end(), ExitingBB);
  709. if (ExitIt == LoopChain.end())
  710. return;
  711. std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end());
  712. }
  713. /// \brief Forms basic block chains from the natural loop structures.
  714. ///
  715. /// These chains are designed to preserve the existing *structure* of the code
  716. /// as much as possible. We can then stitch the chains together in a way which
  717. /// both preserves the topological structure and minimizes taken conditional
  718. /// branches.
  719. void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
  720. MachineLoop &L) {
  721. // First recurse through any nested loops, building chains for those inner
  722. // loops.
  723. for (MachineLoop *InnerLoop : L)
  724. buildLoopChains(F, *InnerLoop);
  725. SmallVector<MachineBasicBlock *, 16> BlockWorkList;
  726. BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end());
  727. // First check to see if there is an obviously preferable top block for the
  728. // loop. This will default to the header, but may end up as one of the
  729. // predecessors to the header if there is one which will result in strictly
  730. // fewer branches in the loop body.
  731. MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet);
  732. // If we selected just the header for the loop top, look for a potentially
  733. // profitable exit block in the event that rotating the loop can eliminate
  734. // branches by placing an exit edge at the bottom.
  735. MachineBasicBlock *ExitingBB = nullptr;
  736. if (LoopTop == L.getHeader())
  737. ExitingBB = findBestLoopExit(F, L, LoopBlockSet);
  738. BlockChain &LoopChain = *BlockToChain[LoopTop];
  739. // FIXME: This is a really lame way of walking the chains in the loop: we
  740. // walk the blocks, and use a set to prevent visiting a particular chain
  741. // twice.
  742. SmallPtrSet<BlockChain *, 4> UpdatedPreds;
  743. assert(LoopChain.LoopPredecessors == 0);
  744. UpdatedPreds.insert(&LoopChain);
  745. for (MachineBasicBlock *LoopBB : L.getBlocks()) {
  746. BlockChain &Chain = *BlockToChain[LoopBB];
  747. if (!UpdatedPreds.insert(&Chain).second)
  748. continue;
  749. assert(Chain.LoopPredecessors == 0);
  750. for (MachineBasicBlock *ChainBB : Chain) {
  751. assert(BlockToChain[ChainBB] == &Chain);
  752. for (MachineBasicBlock *Pred : ChainBB->predecessors()) {
  753. if (BlockToChain[Pred] == &Chain || !LoopBlockSet.count(Pred))
  754. continue;
  755. ++Chain.LoopPredecessors;
  756. }
  757. }
  758. if (Chain.LoopPredecessors == 0)
  759. BlockWorkList.push_back(*Chain.begin());
  760. }
  761. buildChain(LoopTop, LoopChain, BlockWorkList, &LoopBlockSet);
  762. rotateLoop(LoopChain, ExitingBB, LoopBlockSet);
  763. DEBUG({
  764. // Crash at the end so we get all of the debugging output first.
  765. bool BadLoop = false;
  766. if (LoopChain.LoopPredecessors) {
  767. BadLoop = true;
  768. dbgs() << "Loop chain contains a block without its preds placed!\n"
  769. << " Loop header: " << getBlockName(*L.block_begin()) << "\n"
  770. << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n";
  771. }
  772. for (MachineBasicBlock *ChainBB : LoopChain) {
  773. dbgs() << " ... " << getBlockName(ChainBB) << "\n";
  774. if (!LoopBlockSet.erase(ChainBB)) {
  775. // We don't mark the loop as bad here because there are real situations
  776. // where this can occur. For example, with an unanalyzable fallthrough
  777. // from a loop block to a non-loop block or vice versa.
  778. dbgs() << "Loop chain contains a block not contained by the loop!\n"
  779. << " Loop header: " << getBlockName(*L.block_begin()) << "\n"
  780. << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
  781. << " Bad block: " << getBlockName(ChainBB) << "\n";
  782. }
  783. }
  784. if (!LoopBlockSet.empty()) {
  785. BadLoop = true;
  786. for (MachineBasicBlock *LoopBB : LoopBlockSet)
  787. dbgs() << "Loop contains blocks never placed into a chain!\n"
  788. << " Loop header: " << getBlockName(*L.block_begin()) << "\n"
  789. << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
  790. << " Bad block: " << getBlockName(LoopBB) << "\n";
  791. }
  792. assert(!BadLoop && "Detected problems with the placement of this loop.");
  793. });
  794. }
  795. void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
  796. // Ensure that every BB in the function has an associated chain to simplify
  797. // the assumptions of the remaining algorithm.
  798. SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
  799. for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
  800. MachineBasicBlock *BB = FI;
  801. BlockChain *Chain =
  802. new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
  803. // Also, merge any blocks which we cannot reason about and must preserve
  804. // the exact fallthrough behavior for.
  805. for (;;) {
  806. Cond.clear();
  807. MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
  808. if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough())
  809. break;
  810. MachineFunction::iterator NextFI(std::next(FI));
  811. MachineBasicBlock *NextBB = NextFI;
  812. // Ensure that the layout successor is a viable block, as we know that
  813. // fallthrough is a possibility.
  814. assert(NextFI != FE && "Can't fallthrough past the last block.");
  815. DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: "
  816. << getBlockName(BB) << " -> " << getBlockName(NextBB)
  817. << "\n");
  818. Chain->merge(NextBB, nullptr);
  819. FI = NextFI;
  820. BB = NextBB;
  821. }
  822. }
  823. if (OutlineOptionalBranches) {
  824. // Find the nearest common dominator of all of F's terminators.
  825. MachineBasicBlock *Terminator = nullptr;
  826. for (MachineBasicBlock &MBB : F) {
  827. if (MBB.succ_size() == 0) {
  828. if (Terminator == nullptr)
  829. Terminator = &MBB;
  830. else
  831. Terminator = MDT->findNearestCommonDominator(Terminator, &MBB);
  832. }
  833. }
  834. // MBBs dominating this common dominator are unavoidable.
  835. UnavoidableBlocks.clear();
  836. for (MachineBasicBlock &MBB : F) {
  837. if (MDT->dominates(&MBB, Terminator)) {
  838. UnavoidableBlocks.insert(&MBB);
  839. }
  840. }
  841. }
  842. // Build any loop-based chains.
  843. for (MachineLoop *L : *MLI)
  844. buildLoopChains(F, *L);
  845. SmallVector<MachineBasicBlock *, 16> BlockWorkList;
  846. SmallPtrSet<BlockChain *, 4> UpdatedPreds;
  847. for (MachineBasicBlock &MBB : F) {
  848. BlockChain &Chain = *BlockToChain[&MBB];
  849. if (!UpdatedPreds.insert(&Chain).second)
  850. continue;
  851. assert(Chain.LoopPredecessors == 0);
  852. for (MachineBasicBlock *ChainBB : Chain) {
  853. assert(BlockToChain[ChainBB] == &Chain);
  854. for (MachineBasicBlock *Pred : ChainBB->predecessors()) {
  855. if (BlockToChain[Pred] == &Chain)
  856. continue;
  857. ++Chain.LoopPredecessors;
  858. }
  859. }
  860. if (Chain.LoopPredecessors == 0)
  861. BlockWorkList.push_back(*Chain.begin());
  862. }
  863. BlockChain &FunctionChain = *BlockToChain[&F.front()];
  864. buildChain(&F.front(), FunctionChain, BlockWorkList);
  865. #ifndef NDEBUG
  866. typedef SmallPtrSet<MachineBasicBlock *, 16> FunctionBlockSetType;
  867. #endif
  868. DEBUG({
  869. // Crash at the end so we get all of the debugging output first.
  870. bool BadFunc = false;
  871. FunctionBlockSetType FunctionBlockSet;
  872. for (MachineBasicBlock &MBB : F)
  873. FunctionBlockSet.insert(&MBB);
  874. for (MachineBasicBlock *ChainBB : FunctionChain)
  875. if (!FunctionBlockSet.erase(ChainBB)) {
  876. BadFunc = true;
  877. dbgs() << "Function chain contains a block not in the function!\n"
  878. << " Bad block: " << getBlockName(ChainBB) << "\n";
  879. }
  880. if (!FunctionBlockSet.empty()) {
  881. BadFunc = true;
  882. for (MachineBasicBlock *RemainingBB : FunctionBlockSet)
  883. dbgs() << "Function contains blocks never placed into a chain!\n"
  884. << " Bad block: " << getBlockName(RemainingBB) << "\n";
  885. }
  886. assert(!BadFunc && "Detected problems with the block placement.");
  887. });
  888. // Splice the blocks into place.
  889. MachineFunction::iterator InsertPos = F.begin();
  890. for (MachineBasicBlock *ChainBB : FunctionChain) {
  891. DEBUG(dbgs() << (ChainBB == *FunctionChain.begin() ? "Placing chain "
  892. : " ... ")
  893. << getBlockName(ChainBB) << "\n");
  894. if (InsertPos != MachineFunction::iterator(ChainBB))
  895. F.splice(InsertPos, ChainBB);
  896. else
  897. ++InsertPos;
  898. // Update the terminator of the previous block.
  899. if (ChainBB == *FunctionChain.begin())
  900. continue;
  901. MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(ChainBB));
  902. // FIXME: It would be awesome of updateTerminator would just return rather
  903. // than assert when the branch cannot be analyzed in order to remove this
  904. // boiler plate.
  905. Cond.clear();
  906. MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
  907. if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) {
  908. // The "PrevBB" is not yet updated to reflect current code layout, so,
  909. // o. it may fall-through to a block without explict "goto" instruction
  910. // before layout, and no longer fall-through it after layout; or
  911. // o. just opposite.
  912. //
  913. // AnalyzeBranch() may return erroneous value for FBB when these two
  914. // situations take place. For the first scenario FBB is mistakenly set
  915. // NULL; for the 2nd scenario, the FBB, which is expected to be NULL,
  916. // is mistakenly pointing to "*BI".
  917. //
  918. bool needUpdateBr = true;
  919. if (!Cond.empty() && (!FBB || FBB == ChainBB)) {
  920. PrevBB->updateTerminator();
  921. needUpdateBr = false;
  922. Cond.clear();
  923. TBB = FBB = nullptr;
  924. if (TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) {
  925. // FIXME: This should never take place.
  926. TBB = FBB = nullptr;
  927. }
  928. }
  929. // If PrevBB has a two-way branch, try to re-order the branches
  930. // such that we branch to the successor with higher weight first.
  931. if (TBB && !Cond.empty() && FBB &&
  932. MBPI->getEdgeWeight(PrevBB, FBB) > MBPI->getEdgeWeight(PrevBB, TBB) &&
  933. !TII->ReverseBranchCondition(Cond)) {
  934. DEBUG(dbgs() << "Reverse order of the two branches: "
  935. << getBlockName(PrevBB) << "\n");
  936. DEBUG(dbgs() << " Edge weight: " << MBPI->getEdgeWeight(PrevBB, FBB)
  937. << " vs " << MBPI->getEdgeWeight(PrevBB, TBB) << "\n");
  938. DebugLoc dl; // FIXME: this is nowhere
  939. TII->RemoveBranch(*PrevBB);
  940. TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl);
  941. needUpdateBr = true;
  942. }
  943. if (needUpdateBr)
  944. PrevBB->updateTerminator();
  945. }
  946. }
  947. // Fixup the last block.
  948. Cond.clear();
  949. MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
  950. if (!TII->AnalyzeBranch(F.back(), TBB, FBB, Cond))
  951. F.back().updateTerminator();
  952. // Walk through the backedges of the function now that we have fully laid out
  953. // the basic blocks and align the destination of each backedge. We don't rely
  954. // exclusively on the loop info here so that we can align backedges in
  955. // unnatural CFGs and backedges that were introduced purely because of the
  956. // loop rotations done during this layout pass.
  957. if (F.getFunction()->hasFnAttribute(Attribute::OptimizeForSize))
  958. return;
  959. if (FunctionChain.begin() == FunctionChain.end())
  960. return; // Empty chain.
  961. const BranchProbability ColdProb(1, 5); // 20%
  962. BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin());
  963. BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb;
  964. for (MachineBasicBlock *ChainBB : FunctionChain) {
  965. if (ChainBB == *FunctionChain.begin())
  966. continue;
  967. // Don't align non-looping basic blocks. These are unlikely to execute
  968. // enough times to matter in practice. Note that we'll still handle
  969. // unnatural CFGs inside of a natural outer loop (the common case) and
  970. // rotated loops.
  971. MachineLoop *L = MLI->getLoopFor(ChainBB);
  972. if (!L)
  973. continue;
  974. unsigned Align = TLI->getPrefLoopAlignment(L);
  975. if (!Align)
  976. continue; // Don't care about loop alignment.
  977. // If the block is cold relative to the function entry don't waste space
  978. // aligning it.
  979. BlockFrequency Freq = MBFI->getBlockFreq(ChainBB);
  980. if (Freq < WeightedEntryFreq)
  981. continue;
  982. // If the block is cold relative to its loop header, don't align it
  983. // regardless of what edges into the block exist.
  984. MachineBasicBlock *LoopHeader = L->getHeader();
  985. BlockFrequency LoopHeaderFreq = MBFI->getBlockFreq(LoopHeader);
  986. if (Freq < (LoopHeaderFreq * ColdProb))
  987. continue;
  988. // Check for the existence of a non-layout predecessor which would benefit
  989. // from aligning this block.
  990. MachineBasicBlock *LayoutPred =
  991. &*std::prev(MachineFunction::iterator(ChainBB));
  992. // Force alignment if all the predecessors are jumps. We already checked
  993. // that the block isn't cold above.
  994. if (!LayoutPred->isSuccessor(ChainBB)) {
  995. ChainBB->setAlignment(Align);
  996. continue;
  997. }
  998. // Align this block if the layout predecessor's edge into this block is
  999. // cold relative to the block. When this is true, other predecessors make up
  1000. // all of the hot entries into the block and thus alignment is likely to be
  1001. // important.
  1002. BranchProbability LayoutProb =
  1003. MBPI->getEdgeProbability(LayoutPred, ChainBB);
  1004. BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
  1005. if (LayoutEdgeFreq <= (Freq * ColdProb))
  1006. ChainBB->setAlignment(Align);
  1007. }
  1008. }
  1009. bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
  1010. // Check for single-block functions and skip them.
  1011. if (std::next(F.begin()) == F.end())
  1012. return false;
  1013. if (skipOptnoneFunction(*F.getFunction()))
  1014. return false;
  1015. MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
  1016. MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
  1017. MLI = &getAnalysis<MachineLoopInfo>();
  1018. TII = F.getSubtarget().getInstrInfo();
  1019. TLI = F.getSubtarget().getTargetLowering();
  1020. MDT = &getAnalysis<MachineDominatorTree>();
  1021. assert(BlockToChain.empty());
  1022. buildCFGChains(F);
  1023. BlockToChain.clear();
  1024. ChainAllocator.DestroyAll();
  1025. if (AlignAllBlock)
  1026. // Align all of the blocks in the function to a specific alignment.
  1027. for (MachineBasicBlock &MBB : F)
  1028. MBB.setAlignment(AlignAllBlock);
  1029. // We always return true as we have no way to track whether the final order
  1030. // differs from the original order.
  1031. return true;
  1032. }
  1033. namespace {
  1034. /// \brief A pass to compute block placement statistics.
  1035. ///
  1036. /// A separate pass to compute interesting statistics for evaluating block
  1037. /// placement. This is separate from the actual placement pass so that they can
  1038. /// be computed in the absence of any placement transformations or when using
  1039. /// alternative placement strategies.
  1040. class MachineBlockPlacementStats : public MachineFunctionPass {
  1041. /// \brief A handle to the branch probability pass.
  1042. const MachineBranchProbabilityInfo *MBPI;
  1043. /// \brief A handle to the function-wide block frequency pass.
  1044. const MachineBlockFrequencyInfo *MBFI;
  1045. public:
  1046. static char ID; // Pass identification, replacement for typeid
  1047. MachineBlockPlacementStats() : MachineFunctionPass(ID) {
  1048. initializeMachineBlockPlacementStatsPass(*PassRegistry::getPassRegistry());
  1049. }
  1050. bool runOnMachineFunction(MachineFunction &F) override;
  1051. void getAnalysisUsage(AnalysisUsage &AU) const override {
  1052. AU.addRequired<MachineBranchProbabilityInfo>();
  1053. AU.addRequired<MachineBlockFrequencyInfo>();
  1054. AU.setPreservesAll();
  1055. MachineFunctionPass::getAnalysisUsage(AU);
  1056. }
  1057. };
  1058. }
  1059. char MachineBlockPlacementStats::ID = 0;
  1060. char &llvm::MachineBlockPlacementStatsID = MachineBlockPlacementStats::ID;
  1061. INITIALIZE_PASS_BEGIN(MachineBlockPlacementStats, "block-placement-stats",
  1062. "Basic Block Placement Stats", false, false)
  1063. INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
  1064. INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
  1065. INITIALIZE_PASS_END(MachineBlockPlacementStats, "block-placement-stats",
  1066. "Basic Block Placement Stats", false, false)
  1067. bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) {
  1068. // Check for single-block functions and skip them.
  1069. if (std::next(F.begin()) == F.end())
  1070. return false;
  1071. MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
  1072. MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
  1073. for (MachineBasicBlock &MBB : F) {
  1074. BlockFrequency BlockFreq = MBFI->getBlockFreq(&MBB);
  1075. Statistic &NumBranches =
  1076. (MBB.succ_size() > 1) ? NumCondBranches : NumUncondBranches;
  1077. Statistic &BranchTakenFreq =
  1078. (MBB.succ_size() > 1) ? CondBranchTakenFreq : UncondBranchTakenFreq;
  1079. for (MachineBasicBlock *Succ : MBB.successors()) {
  1080. // Skip if this successor is a fallthrough.
  1081. if (MBB.isLayoutSuccessor(Succ))
  1082. continue;
  1083. BlockFrequency EdgeFreq =
  1084. BlockFreq * MBPI->getEdgeProbability(&MBB, Succ);
  1085. ++NumBranches;
  1086. BranchTakenFreq += EdgeFreq.getFrequency();
  1087. }
  1088. }
  1089. return false;
  1090. }