ScopeNestedCFG.cpp 68 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872
  1. ///////////////////////////////////////////////////////////////////////////////
  2. // //
  3. // ScopeNestedCFG.cpp //
  4. // Copyright (C) Microsoft Corporation. All rights reserved. //
  5. // This file is distributed under the University of Illinois Open Source //
  6. // License. See LICENSE.TXT for details. //
  7. // //
  8. // Implements the ScopeNested CFG Transformation. //
  9. // //
  10. ///////////////////////////////////////////////////////////////////////////////
  11. #include "DxilConvPasses/ScopeNestedCFG.h"
  12. #include "llvm/Analysis/ReducibilityAnalysis.h"
  13. #include "dxc/Support/Global.h"
  14. #include "llvm/Support/raw_ostream.h"
  15. #include "llvm/Support/Debug.h"
  16. #include "llvm/Pass.h"
  17. #include "llvm/IR/LLVMContext.h"
  18. #include "llvm/IR/Module.h"
  19. #include "llvm/IR/Function.h"
  20. #include "llvm/IR/IRBuilder.h"
  21. #include "llvm/IR/LegacyPassManager.h"
  22. #include "llvm/IR/CFG.h"
  23. #include "llvm/Analysis/CFG.h"
  24. #include "llvm/Analysis/LoopPass.h"
  25. #include "llvm/Transforms/Utils/Cloning.h"
  26. #include "llvm/Transforms/Scalar.h"
  27. #include "llvm/ADT/BitVector.h"
  28. #include <vector>
  29. #include <unordered_map>
  30. #include <unordered_set>
  31. #include <set>
  32. #include <algorithm>
  33. using namespace llvm;
  34. using std::unique_ptr;
  35. using std::shared_ptr;
  36. using std::pair;
  37. using std::vector;
  38. using std::unordered_map;
  39. using std::unordered_set;
  40. using std::set;
  41. #define SNCFG_DBG 0
  42. //===----------------------------------------------------------------------===//
  43. // ScopeNested CFG Transformation
  44. //===----------------------------------------------------------------------===//
  45. //
  46. // The transformation requires the following LLVM passes:
  47. // -simplifycfg -loop-simplify -reg2mem_hlsl to be run on each function.
  48. // This is to rely on LLVM standard loop analysis info and to be able to clone
  49. // basic blocks, if necessary.
  50. //
  51. // The core of the algorithm is the transformation of an acyclic CFG region into
  52. // a region that corresponds to control-flow with structured nested scopes.
  53. // Scoping information is conveyed by inserting helper basic blocks (BBs) and
  54. // annotating their terminators with the corresponding "dx.BranchKind" metadata
  55. // (see BranchKind enum in ScopeNestedCFG.h) to make it possible for clients
  56. // to recover the structure after the pass.
  57. //
  58. // To handle loops, the algorithm transforms each loop nest from the deepest
  59. // nested loop upwards. Each transformed loop is conceptually treated as a single loop node,
  60. // defined by LoopEntry and LoopExit (if there is an exit) BB pair.
  61. // A loop is made acyclic region by "removing" its backedge.
  62. // The process finishes with transforming function body starting from the entry basic block (BB).
  63. //
  64. // Tranforming an acyclic region.
  65. // 1. Topological ordering is done by DFS graph traversal.
  66. // - Each BB is assigned an ID
  67. // - For each BB, a set of all reachable BBs is computed.
  68. // 2. Using topological block order, reachable merge points are propagated along predecessors,
  69. // and for each split point (if, switch), the closest merge point is determined, by intersecting
  70. // reachable merge point sets of the successor BBs. A switch uses a heuristic that picks
  71. // the closest merge point reachable via majority of successors.
  72. // 3. The CFG is tranformed to have scope-nested structure. Here are some interesting details:
  73. // - A custom scope-stack is used to recover scopes.
  74. // - The tranformation operates on the original CFG, with its original structure preserved
  75. // during transformation until the very last moment.
  76. // Cloned BBs are inserted into the CFG and their terminators temporarily form self-loops.
  77. // The implementation maintains a set of edges to instantiate as the final, which
  78. // destroys the original CFG.
  79. // - Loops are treated as a single loop node identified via two BBs: LoopBegin->LoopEnd.
  80. // There is a subroutine to clone an entire loop, if there is a need.
  81. // - The branches are annotated with dx.BranchKind.
  82. // - For a switch scope, the tranformation identifies switch breaks, and then recomputes merge points
  83. // for scopes nested inside the switch scope.
  84. //
  85. namespace ScopeNestedCFGNS {
  86. class ScopeNestedCFG : public FunctionPass {
  87. public:
  88. static char ID;
  89. explicit ScopeNestedCFG()
  90. : FunctionPass(ID)
  91. , m_HelperExitCondIndex(0) {
  92. Clear();
  93. initializeScopeNestedCFGPass(*PassRegistry::getPassRegistry());
  94. }
  95. virtual bool runOnFunction(Function &F);
  96. virtual void getAnalysisUsage(AnalysisUsage &AU) const {
  97. AU.addRequiredID(ReducibilityAnalysisID);
  98. AU.addRequired<LoopInfoWrapperPass>();
  99. }
  100. private:
  101. struct LoopItem {
  102. BasicBlock *pLB; // Loop begin
  103. BasicBlock *pLE; // Loop end
  104. BasicBlock *pLP; // Loop preheader
  105. BasicBlock *pLL; // Loop latch
  106. LoopItem() { pLB = pLE = pLP = pLL = nullptr; }
  107. };
  108. LLVMContext *m_pCtx;
  109. Module *m_pModule;
  110. Function *m_pFunc;
  111. LoopInfo *m_pLI;
  112. unsigned m_HelperExitCondIndex;
  113. BasicBlock *m_pLoopHeader;
  114. vector<Loop *> m_Loops;
  115. unordered_map<BasicBlock*, LoopItem> m_LoopMap;
  116. unordered_map<BasicBlock*, BasicBlock*> m_LE2LBMap;
  117. void Clear();
  118. //
  119. // Preliminary CFG transformations and related utilities.
  120. //
  121. void SanitizeBranches();
  122. void SanitizeBranchesRec(BasicBlock *pBB, unordered_set<BasicBlock *> &VisitedBB);
  123. void CollectUniqueSuccessors(const BasicBlock *pBB,
  124. const BasicBlock *pSuccessorToExclude,
  125. vector<BasicBlock *> &Successors);
  126. //
  127. // Loop region transformations.
  128. //
  129. void CollectLoopsRec(Loop *pLoop);
  130. void AnnotateBranch(BasicBlock *pBB, BranchKind Kind);
  131. BranchKind GetBranchAnnotation(const BasicBlock *pBB);
  132. void RemoveBranchAnnotation(BasicBlock *pBB);
  133. void GetUniqueExitBlocks(const SmallVectorImpl<Loop::Edge> &ExitEdges, SmallVectorImpl<BasicBlock *> &ExitBlocks);
  134. bool IsLoopBackedge(BasicBlock *pNode);
  135. bool IsAcyclicRegionTerminator(const BasicBlock *pBB);
  136. BasicBlock *GetEffectiveNodeToFollowSuccessor(BasicBlock *pBB);
  137. bool IsMergePoint(BasicBlock *pBB);
  138. BasicBlock *SplitEdge(BasicBlock *pBB, unsigned SuccIdx, const Twine &Name, Loop *pLoop, BasicBlock *pToInsertBB);
  139. BasicBlock *SplitEdge(BasicBlock *pBB, BasicBlock *pSucc, const Twine &Name, Loop *pLoop, BasicBlock *pToInsertBB);
  140. /// Ensure that the latch node terminates by an unconditional branch. Return the latch node.
  141. BasicBlock *SanitizeLoopLatch(Loop *pLoop);
  142. unsigned GetHelperExitCondIndex() { return m_HelperExitCondIndex++; }
  143. /// Ensure that loop has either single exit or no exits. Return the exit node or nullptr.
  144. BasicBlock *SanitizeLoopExits(Loop *pLoop);
  145. void SanitizeLoopContinues(Loop *pLoop);
  146. void AnnotateLoopBranches(Loop *pLoop, LoopItem *pLI);
  147. //
  148. // BasicBlock topological order and reachability sets for acyclic region.
  149. //
  150. class BlockTopologicalOrderAndReachability {
  151. public:
  152. void AppendBlock(BasicBlock *pBB, unique_ptr<BitVector> ReachableBBs);
  153. unsigned GetNumBlocks() const;
  154. BasicBlock *GetBlock(unsigned Id) const;
  155. unsigned GetBlockId(BasicBlock *pBB) const;
  156. BitVector *GetReachableBBs(BasicBlock *pBB) const;
  157. BitVector *GetReachableBBs(unsigned Id) const;
  158. void dump(raw_ostream &OS) const;
  159. private:
  160. struct BasicBlockState {
  161. BasicBlock *pBB;
  162. unique_ptr<BitVector> ReachableBBs;
  163. BasicBlockState(BasicBlock *p, unique_ptr<BitVector> bv) : pBB(p), ReachableBBs(std::move(bv)) {}
  164. };
  165. vector<BasicBlockState> m_BlockState;
  166. unordered_map<BasicBlock *, unsigned> m_BlockIdMap;
  167. };
  168. void ComputeBlockTopologicalOrderAndReachability(BasicBlock *pEntry, BlockTopologicalOrderAndReachability &BTO);
  169. void ComputeBlockTopologicalOrderAndReachabilityRec(BasicBlock *pNode,
  170. BlockTopologicalOrderAndReachability &BTO,
  171. unordered_map<BasicBlock *, unsigned> &Marks);
  172. //
  173. // Recovery of scope end points.
  174. //
  175. struct MergePointInfo {
  176. unsigned MP; // Index of the merge point, if known.
  177. set<unsigned> CandidateSet;
  178. };
  179. using MergePointsMap = unordered_map<BasicBlock *, unique_ptr<MergePointInfo> >;
  180. using ScopeEndPointsMap = unordered_map<BasicBlock *, BasicBlock *>;
  181. using SwitchBreaksMap = unordered_map<BasicBlock *, BasicBlock *>;
  182. void DetermineScopeEndPoints(BasicBlock *pEntry,
  183. bool bRecomputeSwitchScope,
  184. const BlockTopologicalOrderAndReachability &BTO,
  185. const SwitchBreaksMap &SwitchBreaks,
  186. ScopeEndPointsMap &ScopeEndPoints,
  187. ScopeEndPointsMap &DeltaScopeEndPoints);
  188. void DetermineReachableMergePoints(BasicBlock *pEntry,
  189. BasicBlock *pExit,
  190. bool bRecomputeSwitchScope,
  191. const BitVector *pReachableBBs,
  192. const BlockTopologicalOrderAndReachability &BTO,
  193. const SwitchBreaksMap &SwitchBreaks,
  194. const ScopeEndPointsMap &OldScopeEndPoints,
  195. MergePointsMap &MergePoints);
  196. void DetermineSwitchBreaks(BasicBlock *pSwitchBegin,
  197. const ScopeEndPointsMap &ScopeEndPoints,
  198. const BlockTopologicalOrderAndReachability &BTO,
  199. SwitchBreaksMap &SwitchBreaks);
  200. //
  201. // Transformation of acyclic region.
  202. //
  203. void TransformAcyclicRegion(BasicBlock *pEntry);
  204. // Scope stack.
  205. struct ScopeStackItem {
  206. enum class Kind {
  207. Invalid = 0,
  208. Return,
  209. Fallthrough,
  210. If,
  211. Switch,
  212. };
  213. Kind ScopeKind;
  214. BasicBlock *pScopeBeginBB;
  215. BasicBlock *pClonedScopeBeginBB;
  216. BasicBlock *pScopeEndBB;
  217. BasicBlock *pClonedScopeEndBB;
  218. unsigned SuccIdx;
  219. BasicBlock *pPrevSuccBB;
  220. BasicBlock *pClonedPrevSuccBB;
  221. shared_ptr<ScopeEndPointsMap> ScopeEndPoints;
  222. bool bRestoreIfScopeEndPoint;
  223. shared_ptr<ScopeEndPointsMap> DeltaScopeEndPoints;
  224. shared_ptr<SwitchBreaksMap> SwitchBreaks;
  225. ScopeStackItem()
  226. : ScopeKind(Kind::Invalid)
  227. , pScopeBeginBB(nullptr)
  228. , pClonedScopeBeginBB(nullptr)
  229. , pScopeEndBB(nullptr)
  230. , pClonedScopeEndBB(nullptr)
  231. , SuccIdx(0)
  232. , pPrevSuccBB(nullptr)
  233. , pClonedPrevSuccBB(nullptr)
  234. , bRestoreIfScopeEndPoint(false)
  235. {}
  236. };
  237. vector<ScopeStackItem> m_ScopeStack;
  238. ScopeStackItem &PushScope(BasicBlock *pBB);
  239. ScopeStackItem &RePushScope(const ScopeStackItem &Scope);
  240. ScopeStackItem *GetScope(unsigned Idx = 0);
  241. ScopeStackItem *FindParentScope(ScopeStackItem::Kind ScopeKind);
  242. void PopScope();
  243. // Cloning.
  244. void AddEdge(BasicBlock *pClonedSrcBB, unsigned SuccSlotIdx, BasicBlock *pDstBB,
  245. unordered_map<BasicBlock *, vector<BasicBlock *> > &Edges);
  246. BasicBlock *CloneBasicBlockAndFixupValues(const BasicBlock *pBB,
  247. ValueToValueMapTy &RegionValueRemap,
  248. const Twine &NameSuffix = "");
  249. BasicBlock *CloneNode(BasicBlock *pBB,
  250. unordered_map<BasicBlock *, vector<BasicBlock *> > &BlockClones,
  251. ValueToValueMapTy &RegionValueRemap);
  252. BasicBlock *CloneLoop(BasicBlock *pHeaderBB,
  253. BasicBlock *pClonedPreHeaderBB,
  254. unordered_map<BasicBlock *, vector<BasicBlock *> > &BlockClones,
  255. unordered_map<BasicBlock *, vector<BasicBlock *> > &Edges,
  256. ValueToValueMapTy &RegionValueRemap);
  257. BasicBlock *CloneLoopRec(BasicBlock *pBB,
  258. BasicBlock *pClonedPredBB,
  259. unsigned ClonedPredIdx,
  260. unordered_map<BasicBlock *, vector<BasicBlock *> > &BlockClones,
  261. unordered_map<BasicBlock *, vector<BasicBlock *> > &Edges,
  262. unordered_set<BasicBlock *> &VisitedBlocks,
  263. const LoopItem &LI,
  264. LoopItem &ClonedLI,
  265. ValueToValueMapTy &RegionValueRemap);
  266. //
  267. // Utility functions.
  268. //
  269. bool IsIf(BasicBlock *pBB);
  270. bool IsIf(TerminatorInst *pTI);
  271. bool IsSwitch(BasicBlock *pBB);
  272. bool IsSwitch(TerminatorInst *pTI);
  273. Value *GetFalse();
  274. Value *GetTrue();
  275. ConstantInt *GetI32Const(int v);
  276. void DumpIntSet(raw_ostream &s, set<unsigned> Set);
  277. };
  278. char ScopeNestedCFG::ID = 0;
  279. bool ScopeNestedCFG::runOnFunction(Function &F) {
  280. #if SNCFG_DBG
  281. dbgs() << "ScopeNestedCFG: processing function " << F.getName();
  282. #endif
  283. Clear();
  284. m_pCtx = &F.getContext();
  285. m_pModule = F.getParent();
  286. m_pFunc = &F;
  287. m_pLI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
  288. // Sanitize branches.
  289. SanitizeBranches();
  290. // Collect loops innermost to outermost.
  291. for (auto itLoop = m_pLI->begin(), endLoop = m_pLI->end(); itLoop != endLoop; ++itLoop) {
  292. Loop *pLoop = *itLoop;
  293. CollectLoopsRec(pLoop);
  294. }
  295. //
  296. // Phase 1:
  297. // - verify, analyze and prepare loop shape
  298. // - record loop information
  299. // - classify and annotate loop branches
  300. //
  301. for (size_t iLoop = 0; iLoop < m_Loops.size(); iLoop++) {
  302. Loop *pLoop = m_Loops[iLoop];
  303. BasicBlock *pPreHeader = pLoop->getLoopPreheader();
  304. BasicBlock *pHeader = pLoop->getHeader();
  305. BasicBlock *pLatch = pLoop->getLoopLatch();
  306. BasicBlock *pExit = nullptr;
  307. // Make sure there is preheader.
  308. IFTBOOL(pPreHeader != nullptr, DXC_E_SCOPE_NESTED_FAILED);
  309. // Make sure there is a single backedge.
  310. IFTBOOL(pLoop->getNumBackEdges() == 1, DXC_E_SCOPE_NESTED_FAILED);
  311. // Prepare loop latch.
  312. pLatch = SanitizeLoopLatch(pLoop);
  313. // Prepare exits and breaks.
  314. pExit = SanitizeLoopExits(pLoop);
  315. // Prepare continues.
  316. SanitizeLoopContinues(pLoop);
  317. // Record essential loop information.
  318. LoopItem LI;
  319. LI.pLB = pHeader;
  320. LI.pLE = pExit;
  321. LI.pLL = pLatch;
  322. LI.pLP = pPreHeader;
  323. DXASSERT_NOMSG(m_LoopMap.find(LI.pLB) == m_LoopMap.end());
  324. m_LoopMap[LI.pLB] = LI;
  325. if (LI.pLE != nullptr) {
  326. DXASSERT_NOMSG(m_LE2LBMap.find(LI.pLE) == m_LE2LBMap.end());
  327. m_LE2LBMap[LI.pLE] = LI.pLB;
  328. }
  329. // Annotate known branches for the loop.
  330. AnnotateLoopBranches(pLoop, &LI);
  331. }
  332. //
  333. // Phase 2:
  334. // - for each loop from most inner:
  335. // + "remove" backedge
  336. // + transform acyclic region
  337. // - transform entry region
  338. //
  339. for (size_t iLoop = 0; iLoop < m_Loops.size(); iLoop++) {
  340. Loop *pLoop = m_Loops[iLoop];
  341. BasicBlock *pHeader = pLoop->getHeader();
  342. LoopItem LI = m_LoopMap[pHeader];
  343. BasicBlock *pLatch = LI.pLL;
  344. DXASSERT_LOCALVAR_NOMSG(pLatch, pLatch->getTerminator()->getNumSuccessors() == 1 && pLatch->getTerminator()->getSuccessor(0) == pHeader);
  345. m_pLoopHeader = pHeader;
  346. TransformAcyclicRegion(pHeader);
  347. }
  348. m_pLoopHeader = nullptr;
  349. TransformAcyclicRegion(F.begin());
  350. return true;
  351. }
  352. void ScopeNestedCFG::Clear() {
  353. m_pCtx = nullptr;
  354. m_pModule = nullptr;
  355. m_pFunc = nullptr;
  356. m_pLI = nullptr;
  357. m_HelperExitCondIndex = 0;
  358. m_pLoopHeader = nullptr;
  359. m_Loops.clear();
  360. m_LoopMap.clear();
  361. m_LE2LBMap.clear();
  362. }
  363. //-----------------------------------------------------------------------------
  364. // Preliminary CFG transformations and related utilities.
  365. //-----------------------------------------------------------------------------
  366. void ScopeNestedCFG::SanitizeBranches() {
  367. unordered_set<BasicBlock *> VisitedBB;
  368. SanitizeBranchesRec(m_pFunc->begin(), VisitedBB);
  369. }
  370. void ScopeNestedCFG::SanitizeBranchesRec(BasicBlock *pBB, unordered_set<BasicBlock *> &VisitedBB) {
  371. // Mark pBB as visited, and return if pBB already has been visited.
  372. if (!VisitedBB.emplace(pBB).second)
  373. return;
  374. // Sanitize branch.
  375. if (BranchInst *I = dyn_cast<BranchInst>(pBB->getTerminator())) {
  376. // a. Convert a conditional branch to unconditional, if successors are the same.
  377. if (I->isConditional()) {
  378. BasicBlock *pSucc1 = I->getSuccessor(0);
  379. BasicBlock *pSucc2 = I->getSuccessor(1);
  380. if (pSucc1 == pSucc2) {
  381. BranchInst::Create(pSucc1, I);
  382. I->eraseFromParent();
  383. }
  384. }
  385. }
  386. else if (SwitchInst *I = dyn_cast<SwitchInst>(pBB->getTerminator())) {
  387. // b. Group switch successors.
  388. struct SwitchCaseGroup {
  389. BasicBlock *pSuccBB;
  390. vector<ConstantInt *> CaseValue;
  391. };
  392. vector<SwitchCaseGroup> SwitchCaseGroups;
  393. unordered_map<BasicBlock *, unsigned> BB2GroupIdMap;
  394. BasicBlock *pDefaultBB = I->getDefaultDest();
  395. for (SwitchInst::CaseIt itCase = I->case_begin(), endCase = I->case_end(); itCase != endCase; ++itCase) {
  396. BasicBlock *pSuccBB = itCase.getCaseSuccessor();
  397. ConstantInt *pCaseValue = itCase.getCaseValue();
  398. if (pSuccBB == pDefaultBB) {
  399. // Assimilate this case label into default label.
  400. continue;
  401. }
  402. auto itGroup = BB2GroupIdMap.insert( {pSuccBB, SwitchCaseGroups.size()} );
  403. if (itGroup.second) {
  404. SwitchCaseGroups.emplace_back(SwitchCaseGroup{});
  405. }
  406. SwitchCaseGroup &G = SwitchCaseGroups[itGroup.first->second];
  407. G.pSuccBB = pSuccBB;
  408. G.CaseValue.emplace_back(pCaseValue);
  409. }
  410. if (SwitchCaseGroups.size() == 0) {
  411. // All case labels were assimilated into the default label.
  412. // Replace switch with an unconditional branch.
  413. BranchInst::Create(pDefaultBB, I);
  414. I->eraseFromParent();
  415. } else {
  416. // Rewrite switch instruction such that case labels are grouped by the successor.
  417. unsigned CaseIdx = 0;
  418. for (const SwitchCaseGroup &G : SwitchCaseGroups) {
  419. for (ConstantInt *pCaseValue : G.CaseValue) {
  420. SwitchInst::CaseIt itCase(I, CaseIdx++);
  421. itCase.setSuccessor(G.pSuccBB);
  422. itCase.setValue(pCaseValue);
  423. }
  424. }
  425. // Remove unused case labels.
  426. for (unsigned NumCases = I->getNumCases(); CaseIdx < NumCases; NumCases--) {
  427. I->removeCase(SwitchInst::CaseIt{I, NumCases-1});
  428. }
  429. }
  430. }
  431. // Recurse, visiting each successor group once.
  432. TerminatorInst * pTI = pBB->getTerminator();
  433. BasicBlock *pPrevSuccBB = nullptr;
  434. for (unsigned i = 0; i < pTI->getNumSuccessors(); i++) {
  435. BasicBlock *pSuccBB = pTI->getSuccessor(i);
  436. if (pSuccBB != pPrevSuccBB) {
  437. SanitizeBranchesRec(pSuccBB, VisitedBB);
  438. }
  439. pPrevSuccBB = pSuccBB;
  440. }
  441. }
  442. void ScopeNestedCFG::CollectUniqueSuccessors(const BasicBlock *pBB,
  443. const BasicBlock *pSuccessorToExclude,
  444. vector<BasicBlock *> &Successors) {
  445. DXASSERT_NOMSG(Successors.empty());
  446. const TerminatorInst *pTI = pBB->getTerminator();
  447. BasicBlock *pPrevSuccBB = nullptr;
  448. for (unsigned i = 0; i < pTI->getNumSuccessors(); i++) {
  449. BasicBlock *pSuccBB = pTI->getSuccessor(i);
  450. if (pSuccBB != pPrevSuccBB) {
  451. pPrevSuccBB = pSuccBB;
  452. if (pSuccBB != pSuccessorToExclude)
  453. Successors.emplace_back(pSuccBB);
  454. }
  455. }
  456. }
  457. //-----------------------------------------------------------------------------
  458. // Loop region transformations.
  459. //-----------------------------------------------------------------------------
  460. void ScopeNestedCFG::CollectLoopsRec(Loop *pLoop) {
  461. for (auto itLoop = pLoop->begin(), endLoop = pLoop->end(); itLoop != endLoop; ++itLoop ) {
  462. Loop *pNestedLoop = *itLoop;
  463. CollectLoopsRec(pNestedLoop);
  464. }
  465. m_Loops.emplace_back(pLoop);
  466. }
  467. void ScopeNestedCFG::AnnotateBranch(BasicBlock *pBB, BranchKind Kind) {
  468. TerminatorInst *pTI = pBB->getTerminator();
  469. DXASSERT(dyn_cast<BranchInst>(pTI) != nullptr || dyn_cast<SwitchInst>(pTI) != nullptr, "annotate only branch and switch terminators");
  470. // Check that we are not changing the annotation.
  471. MDNode *pMD = pTI->getMetadata("dx.BranchKind");
  472. if (pMD != nullptr) {
  473. ConstantAsMetadata *p1 = dyn_cast<ConstantAsMetadata>(pMD->getOperand(0));
  474. ConstantInt *pVal = dyn_cast<ConstantInt>(p1->getValue());
  475. BranchKind OldKind = (BranchKind)pVal->getZExtValue();
  476. DXASSERT_LOCALVAR(OldKind, OldKind == Kind ||
  477. (OldKind == BranchKind::IfBegin && Kind == BranchKind::IfNoEnd) ||
  478. (OldKind == BranchKind::IfNoEnd && Kind == BranchKind::IfBegin),
  479. "the algorithm should not be changing branch types implicitly (unless it is an if)");
  480. }
  481. pTI->setMetadata("dx.BranchKind", MDNode::get(*m_pCtx, ConstantAsMetadata::get(GetI32Const((int)Kind))));
  482. }
  483. BranchKind ScopeNestedCFG::GetBranchAnnotation(const BasicBlock *pBB) {
  484. const TerminatorInst *pTI = pBB->getTerminator();
  485. MDNode *pMD = pTI->getMetadata("dx.BranchKind");
  486. if (pMD != nullptr) {
  487. ConstantAsMetadata *p1 = dyn_cast<ConstantAsMetadata>(pMD->getOperand(0));
  488. ConstantInt *pVal = dyn_cast<ConstantInt>(p1->getValue());
  489. return (BranchKind)pVal->getZExtValue();
  490. }
  491. return BranchKind::Invalid;
  492. }
  493. void ScopeNestedCFG::RemoveBranchAnnotation(BasicBlock *pBB) {
  494. TerminatorInst *pTI = pBB->getTerminator();
  495. pTI->setMetadata("dx.BranchKind", nullptr);
  496. }
  497. void ScopeNestedCFG::GetUniqueExitBlocks(const SmallVectorImpl<Loop::Edge> &ExitEdges,
  498. SmallVectorImpl<BasicBlock *> &ExitBlocks) {
  499. DXASSERT_NOMSG(ExitBlocks.empty());
  500. unordered_set<BasicBlock *> S;
  501. for (size_t i = 0; i < ExitEdges.size(); i++) {
  502. const Loop::Edge &E = ExitEdges[i];
  503. BasicBlock *B = const_cast<BasicBlock *>(E.second);
  504. auto itp = S.insert(B);
  505. if (itp.second) {
  506. ExitBlocks.push_back(B);
  507. }
  508. }
  509. }
  510. bool ScopeNestedCFG::IsLoopBackedge(BasicBlock *pNode) {
  511. BranchKind BK = GetBranchAnnotation(pNode);
  512. if (BK == BranchKind::LoopBackEdge) {
  513. DXASSERT_NOMSG(pNode->getTerminator()->getNumSuccessors() == 1);
  514. DXASSERT_NOMSG(pNode->getTerminator()->getSuccessor(0) == m_pLoopHeader);
  515. return true;
  516. }
  517. return false;
  518. }
  519. BasicBlock *ScopeNestedCFG::GetEffectiveNodeToFollowSuccessor(BasicBlock *pBB) {
  520. BasicBlock *pEffectiveSuccessor = nullptr;
  521. BranchKind BK = GetBranchAnnotation(pBB);
  522. switch (BK) {
  523. case BranchKind::LoopBegin: {
  524. TerminatorInst *pTI = pBB->getTerminator();
  525. DXASSERT_NOMSG(pTI->getNumSuccessors() == 1);
  526. BasicBlock *pLoopHead = pTI->getSuccessor(0);
  527. auto itLoop = m_LoopMap.find(pLoopHead);
  528. DXASSERT_NOMSG(itLoop != m_LoopMap.end());
  529. const LoopItem &LI = itLoop->second;
  530. DXASSERT_NOMSG(LI.pLB == pLoopHead && LI.pLP == pBB);
  531. DXASSERT_NOMSG(LI.pLE->getTerminator()->getNumSuccessors() == 1);
  532. pEffectiveSuccessor = LI.pLE;
  533. break;
  534. }
  535. case BranchKind::LoopNoEnd:
  536. pEffectiveSuccessor = nullptr;
  537. break;
  538. default:
  539. pEffectiveSuccessor = pBB;
  540. break;
  541. }
  542. return pEffectiveSuccessor;
  543. }
  544. bool ScopeNestedCFG::IsMergePoint(BasicBlock *pBB) {
  545. unordered_set<BasicBlock *> UniquePredecessors;
  546. for (auto itPred = pred_begin(pBB), endPred = pred_end(pBB); itPred != endPred; ++itPred) {
  547. BasicBlock *pPredBB = *itPred;
  548. if (IsLoopBackedge(pPredBB))
  549. continue;
  550. UniquePredecessors.insert(pPredBB);
  551. }
  552. return UniquePredecessors.size() >= 2;
  553. }
  554. bool ScopeNestedCFG::IsAcyclicRegionTerminator(const BasicBlock *pNode) {
  555. // Return.
  556. if (dyn_cast<ReturnInst>(pNode->getTerminator()))
  557. return true;
  558. BranchKind BK = GetBranchAnnotation(pNode);
  559. switch (BK) {
  560. case BranchKind::LoopBreak:
  561. case BranchKind::LoopContinue:
  562. case BranchKind::LoopBackEdge:
  563. return true;
  564. }
  565. return false;
  566. }
  567. BasicBlock *ScopeNestedCFG::SplitEdge(BasicBlock *pBB, BasicBlock *pSucc, const Twine &Name, Loop *pLoop, BasicBlock *pToInsertBB) {
  568. unsigned SuccIdx = GetSuccessorNumber(pBB, pSucc);
  569. return SplitEdge(pBB, SuccIdx, Name, pLoop, pToInsertBB);
  570. }
  571. BasicBlock *ScopeNestedCFG::SplitEdge(BasicBlock *pBB, unsigned SuccIdx, const Twine &Name, Loop *pLoop, BasicBlock *pToInsertBB) {
  572. BasicBlock *pNewBB = pToInsertBB;
  573. if (pToInsertBB == nullptr) {
  574. pNewBB = BasicBlock::Create(*m_pCtx, Name, m_pFunc, pBB->getNextNode());
  575. }
  576. if (pLoop != nullptr) {
  577. pLoop->addBasicBlockToLoop(pNewBB, *m_pLI);
  578. }
  579. BasicBlock *pSucc = pBB->getTerminator()->getSuccessor(SuccIdx);
  580. pBB->getTerminator()->setSuccessor(SuccIdx, pNewBB);
  581. if (pToInsertBB == nullptr) {
  582. BranchInst::Create(pSucc, pNewBB);
  583. } else {
  584. TerminatorInst *pTI = pNewBB->getTerminator();
  585. DXASSERT_NOMSG(dyn_cast<BranchInst>(pTI) != nullptr && pTI->getNumSuccessors() == 1);
  586. pTI->setSuccessor(0, pSucc);
  587. }
  588. return pNewBB;
  589. }
  590. BasicBlock *ScopeNestedCFG::SanitizeLoopLatch(Loop *pLoop) {
  591. BasicBlock *pHeader = pLoop->getHeader();
  592. BasicBlock *pLatch = pLoop->getLoopLatch();
  593. TerminatorInst *pTI = pLatch->getTerminator();
  594. DXASSERT_NOMSG(pTI->getNumSuccessors() != 0 && dyn_cast<ReturnInst>(pTI) == nullptr);
  595. BasicBlock *pNewLatch = pLatch;
  596. // Make sure that latch node is empty and terminates with a 'br'.
  597. if (dyn_cast<BranchInst>(pTI) == nullptr ||
  598. (&*pLatch->begin()) != pTI ||
  599. pTI->getNumSuccessors() > 1) {
  600. pNewLatch = SplitEdge(pLatch, pHeader, "dx.LoopLatch", pLoop, nullptr);
  601. }
  602. return pNewLatch;
  603. }
  604. BasicBlock *ScopeNestedCFG::SanitizeLoopExits(Loop *pLoop) {
  605. Loop *pOuterLoop = pLoop->getParentLoop();
  606. BasicBlock *pPreHeader = pLoop->getLoopPreheader();
  607. BasicBlock *pLatch = pLoop->getLoopLatch();
  608. SmallVector<Loop::Edge, 8> ExitEdges;
  609. pLoop->getExitEdges(ExitEdges);
  610. SmallVector<BasicBlock *, 8> OldExitBBs;
  611. GetUniqueExitBlocks(ExitEdges, OldExitBBs);
  612. if (OldExitBBs.empty()) {
  613. // A loop without breaks.
  614. return nullptr;
  615. }
  616. // Create the loop exit BB.
  617. BasicBlock *pExit = BasicBlock::Create(*m_pCtx, "dx.LoopExit", m_pFunc, pLatch->getNextNode());
  618. if (pOuterLoop != nullptr) {
  619. pOuterLoop->addBasicBlockToLoop(pExit, *m_pLI);
  620. }
  621. // Create helper exit blocks.
  622. SmallVector<BasicBlock *, 8> HelperExitBBs;
  623. for (size_t iExitBB = 0; iExitBB < OldExitBBs.size(); iExitBB++) {
  624. BasicBlock *pOldExit = OldExitBBs[iExitBB];
  625. BasicBlock *pNewExit = BasicBlock::Create(*m_pCtx, "dx.LoopExitHelper", m_pFunc, pLatch->getNextNode());
  626. HelperExitBBs.push_back(pNewExit);
  627. if (pOuterLoop != nullptr) {
  628. pOuterLoop->addBasicBlockToLoop(pNewExit, *m_pLI);
  629. }
  630. // Adjust exit edges.
  631. SmallVector<BasicBlock *, 8> OldExitPredBBs;
  632. for (auto itPred = pred_begin(pOldExit), endPred = pred_end(pOldExit); itPred != endPred; ++itPred) {
  633. OldExitPredBBs.push_back(*itPred);
  634. }
  635. for (size_t PredIdx = 0; PredIdx < OldExitPredBBs.size(); PredIdx++) {
  636. BasicBlock *pOldExitPred = OldExitPredBBs[PredIdx];
  637. if (pLoop->contains(pOldExitPred)) {
  638. unsigned PredSuccIdx = GetSuccessorNumber(pOldExitPred, pOldExit);
  639. pOldExitPred->getTerminator()->setSuccessor(PredSuccIdx, pNewExit);
  640. }
  641. }
  642. DXASSERT_NOMSG(pred_begin(pNewExit) != pred_end(pNewExit));
  643. // Connect helper exit to the loop exit node.
  644. BranchInst::Create(pExit, pNewExit);
  645. }
  646. DXASSERT_NOMSG(HelperExitBBs.size() == OldExitBBs.size());
  647. // Fix up conditions for the rest of execution.
  648. unsigned NumExits = HelperExitBBs.size();
  649. BasicBlock *pRestOfExecutionBB = OldExitBBs.back();
  650. BranchInst::Create(pRestOfExecutionBB, pExit);
  651. for (unsigned i = 0; i < NumExits - 1; i++) {
  652. unsigned ExitIdx = NumExits - 2 - i;
  653. BasicBlock *pExitHelper = HelperExitBBs[ExitIdx];
  654. BasicBlock *pOldExit = OldExitBBs[ExitIdx];
  655. // Declare helper-exit guard variable.
  656. AllocaInst *pAI = new AllocaInst(Type::getInt1Ty(*m_pCtx),
  657. Twine("dx.LoopExitHelperCond"),
  658. m_pFunc->begin()->begin());
  659. // Initialize the guard to 'false' before the loop.
  660. new StoreInst(GetFalse(), pAI, pPreHeader->getTerminator());
  661. // Assing the guard to 'true' in exit helper.
  662. new StoreInst(GetTrue(), pAI, pExitHelper->begin());
  663. // Insert an 'if' to conditionally guard exit execution.
  664. BasicBlock *pIfBB = BasicBlock::Create(*m_pCtx, "dx.LoopExitHelperIf", m_pFunc, pExit->getNextNode());
  665. if (pOuterLoop != nullptr) {
  666. pOuterLoop->addBasicBlockToLoop(pIfBB, *m_pLI);
  667. }
  668. LoadInst *pLoadCondI = new LoadInst(pAI);
  669. (void)BranchInst::Create(pOldExit, pRestOfExecutionBB, pLoadCondI, pIfBB);
  670. pIfBB->getInstList().insert(pIfBB->begin(), pLoadCondI);
  671. // Adjust rest-of-computation point.
  672. pExit->getTerminator()->setSuccessor(0, pIfBB);
  673. pRestOfExecutionBB = pIfBB;
  674. }
  675. // Duplicate helper exit nodes such that each has unique predecessor.
  676. for (size_t iHelperBB = 0; iHelperBB < HelperExitBBs.size(); iHelperBB++) {
  677. BasicBlock *pHelperBB = HelperExitBBs[iHelperBB];
  678. // Collect unique predecessors.
  679. SmallVector<BasicBlock *, 8> PredBBs;
  680. unordered_set<BasicBlock *> UniquePredBBs;
  681. for (auto itPred = pred_begin(pHelperBB), endPred = pred_end(pHelperBB); itPred != endPred; ++itPred) {
  682. BasicBlock *pPredBB = *itPred;
  683. auto P = UniquePredBBs.insert(pPredBB);
  684. if (P.second) {
  685. PredBBs.push_back(pPredBB);
  686. }
  687. }
  688. // Duplicate helper node.
  689. BasicBlock *pInsertionBB = PredBBs[0];
  690. for (size_t iSrc = 1; iSrc < PredBBs.size(); iSrc++) {
  691. BasicBlock *pPredBB = PredBBs[iSrc];
  692. ValueToValueMapTy EmptyRemap;
  693. BasicBlock *pClone = CloneBasicBlockAndFixupValues(pHelperBB, EmptyRemap);
  694. if (pOuterLoop != nullptr) {
  695. pOuterLoop->addBasicBlockToLoop(pClone, *m_pLI);
  696. }
  697. // Redirect predecessor successors.
  698. for (unsigned PredSuccIdx = 0; PredSuccIdx < pPredBB->getTerminator()->getNumSuccessors(); PredSuccIdx++) {
  699. if (pPredBB->getTerminator()->getSuccessor(PredSuccIdx) != pHelperBB)
  700. continue;
  701. pPredBB->getTerminator()->setSuccessor(PredSuccIdx, pClone);
  702. // Update LoopInfo.
  703. if (pOuterLoop != nullptr && !pOuterLoop->contains(pExit)) {
  704. pOuterLoop->addBasicBlockToLoop(pExit, *m_pLI);
  705. }
  706. // Insert into function.
  707. m_pFunc->getBasicBlockList().insertAfter(pInsertionBB, pClone);
  708. pInsertionBB = pClone;
  709. }
  710. }
  711. }
  712. return pExit;
  713. }
  714. void ScopeNestedCFG::SanitizeLoopContinues(Loop *pLoop) {
  715. BasicBlock *pLatch = pLoop->getLoopLatch();
  716. TerminatorInst *pLatchTI = pLatch->getTerminator();
  717. DXASSERT_LOCALVAR_NOMSG(pLatchTI, dyn_cast<BranchInst>(pLatchTI) != nullptr && pLatchTI->getNumSuccessors() == 1 && (&*pLatch->begin()) == pLatchTI);
  718. // Collect continue BBs.
  719. SmallVector<BasicBlock *, 8> LatchPredBBs;
  720. for (auto itPred = pred_begin(pLatch), endPred = pred_end(pLatch); itPred != endPred; ++itPred) {
  721. BasicBlock *pPredBB = *itPred;
  722. LatchPredBBs.push_back(pPredBB);
  723. }
  724. DXASSERT_NOMSG(LatchPredBBs.size() >= 1);
  725. // Insert continue helpers.
  726. for (size_t i = 0; i < LatchPredBBs.size(); i++) {
  727. BasicBlock *pPredBB = LatchPredBBs[i];
  728. BasicBlock *pContinue = SplitEdge(pPredBB, pLatch, "dx.LoopContinue", pLoop, nullptr);
  729. DXASSERT_LOCALVAR_NOMSG(pContinue, pContinue->getTerminator()->getNumSuccessors() == 1);
  730. DXASSERT_NOMSG((++pred_begin(pContinue)) == pred_end(pContinue));
  731. }
  732. }
  733. void ScopeNestedCFG::AnnotateLoopBranches(Loop *pLoop, LoopItem *pLI) {
  734. // Annotate LB & LE.
  735. if (pLI->pLE != nullptr) {
  736. AnnotateBranch(pLI->pLP, BranchKind::LoopBegin);
  737. AnnotateBranch(pLI->pLE, BranchKind::LoopExit);
  738. DXASSERT_NOMSG(pLI->pLE->getTerminator()->getNumSuccessors() == 1);
  739. // Record and annotate loop breaks.
  740. for (auto itPred = pred_begin(pLI->pLE), endPred = pred_end(pLI->pLE); itPred != endPred; ++itPred) {
  741. BasicBlock *pPredBB = *itPred;
  742. DXASSERT_NOMSG(pPredBB->getTerminator()->getNumSuccessors() == 1);
  743. AnnotateBranch(pPredBB, BranchKind::LoopBreak);
  744. }
  745. } else {
  746. AnnotateBranch(pLI->pLP, BranchKind::LoopNoEnd);
  747. }
  748. // Record and annotate loop continues.
  749. for (auto itPred = pred_begin(pLI->pLL), endPred = pred_end(pLI->pLL); itPred != endPred; ++itPred) {
  750. BasicBlock *pPredBB = *itPred;
  751. DXASSERT_NOMSG(pPredBB->getTerminator()->getNumSuccessors() == 1);
  752. DXASSERT_NOMSG((++pred_begin(pPredBB)) == pred_end(pPredBB));
  753. AnnotateBranch(pPredBB, BranchKind::LoopContinue);
  754. }
  755. // Annotate loop backedge.
  756. AnnotateBranch(pLI->pLL, BranchKind::LoopBackEdge);
  757. }
  758. //-----------------------------------------------------------------------------
  759. // BasicBlock topological order for acyclic region.
  760. //-----------------------------------------------------------------------------
  761. void ScopeNestedCFG::BlockTopologicalOrderAndReachability::AppendBlock(BasicBlock *pBB,
  762. unique_ptr<BitVector> ReachableBBs) {
  763. unsigned Id = (unsigned)m_BlockState.size();
  764. auto itp = m_BlockIdMap.insert( {pBB, Id } );
  765. DXASSERT_NOMSG(itp.second);
  766. ReachableBBs->set(Id);
  767. m_BlockState.emplace_back(BasicBlockState(pBB, std::move(ReachableBBs)));
  768. }
  769. unsigned ScopeNestedCFG::BlockTopologicalOrderAndReachability::GetNumBlocks() const {
  770. DXASSERT_NOMSG(m_BlockState.size() < UINT32_MAX);
  771. return (unsigned)m_BlockState.size();
  772. }
  773. BasicBlock *ScopeNestedCFG::BlockTopologicalOrderAndReachability::GetBlock(unsigned Id) const {
  774. return m_BlockState[Id].pBB;
  775. }
  776. unsigned ScopeNestedCFG::BlockTopologicalOrderAndReachability::GetBlockId(BasicBlock *pBB) const {
  777. const auto it = m_BlockIdMap.find(pBB);
  778. if (it != m_BlockIdMap.cend())
  779. return it->second;
  780. else
  781. return UINT32_MAX;
  782. }
  783. BitVector *ScopeNestedCFG::BlockTopologicalOrderAndReachability::GetReachableBBs(BasicBlock *pBB) const {
  784. return GetReachableBBs(GetBlockId(pBB));
  785. }
  786. BitVector *ScopeNestedCFG::BlockTopologicalOrderAndReachability::GetReachableBBs(unsigned Id) const {
  787. return m_BlockState[Id].ReachableBBs.get();
  788. }
  789. void ScopeNestedCFG::BlockTopologicalOrderAndReachability::dump(raw_ostream &OS) const {
  790. for (unsigned i = 0; i < GetNumBlocks(); i++) {
  791. BasicBlock *pBB = GetBlock(i);
  792. DXASSERT_NOMSG(GetBlockId(pBB) == i);
  793. OS << i << ": " << pBB->getName() << ", ReachableBBs = { ";
  794. BitVector *pReachableBBs = GetReachableBBs(i);
  795. bool bFirst = true;
  796. for (unsigned j = 0; j < GetNumBlocks(); j++) {
  797. if (pReachableBBs->test(j)) {
  798. if (!bFirst) OS << ", ";
  799. OS << j;
  800. bFirst = false;
  801. }
  802. }
  803. OS << " }\n";
  804. }
  805. }
  806. void ScopeNestedCFG::ComputeBlockTopologicalOrderAndReachability(BasicBlock *pEntry,
  807. BlockTopologicalOrderAndReachability &BTO) {
  808. unordered_map<BasicBlock *, unsigned> WaterMarks;
  809. ComputeBlockTopologicalOrderAndReachabilityRec(pEntry, BTO, WaterMarks);
  810. #if SNCFG_DBG
  811. dbgs() << "\nBB topological order and reachable BBs rooted at " << pEntry->getName() << ":\n";
  812. BTO.dump(dbgs());
  813. #endif
  814. }
  815. void ScopeNestedCFG::ComputeBlockTopologicalOrderAndReachabilityRec(BasicBlock *pNode,
  816. BlockTopologicalOrderAndReachability &BTO,
  817. unordered_map<BasicBlock *, unsigned> &Marks) {
  818. auto itMarkBB = Marks.find(pNode);
  819. if (Marks.find(pNode) != Marks.end()) {
  820. DXASSERT(itMarkBB->second == 2, "acyclic component has a cycle");
  821. return;
  822. }
  823. unsigned NumBBs = (unsigned)pNode->getParent()->getBasicBlockList().size();
  824. // Region terminator.
  825. if (IsAcyclicRegionTerminator(pNode)) {
  826. Marks[pNode] = 2; // late watermark
  827. BTO.AppendBlock(pNode, std::make_unique<BitVector>(NumBBs, false));
  828. return;
  829. }
  830. BasicBlock *pNodeToFollowSuccessors = GetEffectiveNodeToFollowSuccessor(pNode);
  831. // Loop with no exit.
  832. if (pNodeToFollowSuccessors == nullptr) {
  833. Marks[pNode] = 2; // late watermark
  834. BTO.AppendBlock(pNode, std::make_unique<BitVector>(NumBBs, false));
  835. return;
  836. }
  837. Marks[pNode] = 1; // early watermark
  838. auto ReachableBBs = std::make_unique<BitVector>(NumBBs, false);
  839. for (auto itSucc = succ_begin(pNodeToFollowSuccessors), endSucc = succ_end(pNodeToFollowSuccessors); itSucc != endSucc; ++itSucc) {
  840. BasicBlock *pSuccBB = *itSucc;
  841. ComputeBlockTopologicalOrderAndReachabilityRec(pSuccBB, BTO, Marks);
  842. // Union reachable BBs.
  843. (*ReachableBBs) |= (*BTO.GetReachableBBs(pSuccBB));
  844. }
  845. Marks[pNode] = 2; // late watermark
  846. BTO.AppendBlock(pNode, std::move(ReachableBBs));
  847. }
  848. //-----------------------------------------------------------------------------
  849. // Recovery of scope end points.
  850. //-----------------------------------------------------------------------------
  851. void ScopeNestedCFG::DetermineScopeEndPoints(BasicBlock *pEntry,
  852. bool bRecomputeSwitchScope,
  853. const BlockTopologicalOrderAndReachability &BTO,
  854. const SwitchBreaksMap &SwitchBreaks,
  855. ScopeEndPointsMap &ScopeEndPoints,
  856. ScopeEndPointsMap &DeltaScopeEndPoints) {
  857. DXASSERT_NOMSG(DeltaScopeEndPoints.empty());
  858. // 1. Determine sets of reachable merge points and identifiable scope end points.
  859. MergePointsMap MergePoints;
  860. BasicBlock *pExit = nullptr;
  861. BitVector *pReachableBBs = nullptr;
  862. if (bRecomputeSwitchScope) {
  863. auto it = ScopeEndPoints.find(pEntry);
  864. if (it != ScopeEndPoints.end()) {
  865. pExit = it->second;
  866. }
  867. pReachableBBs = BTO.GetReachableBBs(pEntry);
  868. }
  869. DetermineReachableMergePoints(pEntry, pExit, bRecomputeSwitchScope, pReachableBBs,
  870. BTO, SwitchBreaks, ScopeEndPoints, MergePoints);
  871. // 2. Construct partial scope end points map.
  872. for (auto &itMPI : MergePoints) {
  873. BasicBlock *pBB = itMPI.first;
  874. MergePointInfo &MPI = *itMPI.second;
  875. BasicBlock *pEndBB = nullptr;
  876. if (MPI.MP != UINT32_MAX) {
  877. pEndBB = BTO.GetBlock(MPI.MP);
  878. }
  879. auto itOldEndPointBB = ScopeEndPoints.find(pBB);
  880. if (itOldEndPointBB != ScopeEndPoints.end() && itOldEndPointBB->second != pEndBB) {
  881. DeltaScopeEndPoints[pBB] = itOldEndPointBB->second;
  882. itOldEndPointBB->second = pEndBB;
  883. } else {
  884. ScopeEndPoints[pBB] = pEndBB;
  885. }
  886. }
  887. #if SNCFG_DBG
  888. dbgs() << "\nScope ends:\n";
  889. for (auto it = ScopeEndPoints.begin(); it != ScopeEndPoints.end(); ++it) {
  890. BasicBlock *pBegin = it->first;
  891. BasicBlock *pEnd = it->second;
  892. dbgs() << pBegin->getName() << ", ID=" << BTO.GetBlockId(pBegin) << " -> ";
  893. if (pEnd) {
  894. dbgs() << pEnd->getName() << ", ID=" << BTO.GetBlockId(pEnd) << "\n";
  895. } else {
  896. dbgs() << "unreachable\n";
  897. }
  898. }
  899. #endif
  900. }
  901. void ScopeNestedCFG::DetermineReachableMergePoints(BasicBlock *pEntry,
  902. BasicBlock *pExit,
  903. bool bRecomputeSwitchScope,
  904. const BitVector *pReachableBBs,
  905. const BlockTopologicalOrderAndReachability &BTO,
  906. const SwitchBreaksMap &SwitchBreaks,
  907. const ScopeEndPointsMap &OldScopeEndPoints,
  908. MergePointsMap &MergePoints) {
  909. DXASSERT_NOMSG(MergePoints.empty());
  910. unsigned MinBBIdx = 0;
  911. unsigned MaxBBIdx = BTO.GetNumBlocks() - 1;
  912. if (bRecomputeSwitchScope) {
  913. MinBBIdx = BTO.GetBlockId(pExit);
  914. MaxBBIdx = BTO.GetBlockId(pEntry);
  915. }
  916. for (unsigned iBB = MinBBIdx; iBB <= MaxBBIdx; iBB++) {
  917. if (bRecomputeSwitchScope && !pReachableBBs->test(iBB)) {
  918. // The block does not belong to the current switch region.
  919. continue;
  920. }
  921. BasicBlock *pBB = BTO.GetBlock(iBB);
  922. MergePoints[pBB] = unique_ptr<MergePointInfo>(new MergePointInfo);
  923. MergePointInfo &MPI = *MergePoints[pBB];
  924. BasicBlock *pNodeToFollowSuccessors = GetEffectiveNodeToFollowSuccessor(pBB);
  925. MPI.MP = UINT32_MAX;
  926. if (!IsAcyclicRegionTerminator(pBB) &&
  927. pNodeToFollowSuccessors != nullptr &&
  928. !IsLoopBackedge(pNodeToFollowSuccessors) &&
  929. !(bRecomputeSwitchScope && pBB == pExit)) {
  930. // a. Collect unique successors, excluding switch break.
  931. const auto itSwitchBreaks = SwitchBreaks.find(pBB);
  932. const BasicBlock *pSwitchBreak = (itSwitchBreaks == SwitchBreaks.cend()) ? nullptr : itSwitchBreaks->second;
  933. vector<BasicBlock *> Successors;
  934. CollectUniqueSuccessors(pNodeToFollowSuccessors, pSwitchBreak, Successors);
  935. // b. Partition successors.
  936. struct Partition {
  937. set<unsigned> MPIndices;
  938. unordered_set<BasicBlock *> Blocks;
  939. };
  940. vector<Partition> Partitions;
  941. for (auto pSuccBB : Successors) {
  942. if (MergePoints.find(pSuccBB) == MergePoints.end()) {
  943. DXASSERT_NOMSG(bRecomputeSwitchScope && BTO.GetBlockId(pSuccBB) < MinBBIdx);
  944. MergePoints[pSuccBB] = std::make_unique<MergePointInfo>();
  945. }
  946. MergePointInfo &SuccMPI = *MergePoints[pSuccBB];
  947. // Find a partition for this successor.
  948. bool bFound = false;
  949. for (auto &P : Partitions) {
  950. set<unsigned> Intersection;
  951. std::set_intersection(P.MPIndices.begin(), P.MPIndices.end(),
  952. SuccMPI.CandidateSet.begin(), SuccMPI.CandidateSet.end(),
  953. std::inserter(Intersection, Intersection.end()));
  954. if (!Intersection.empty()) {
  955. swap(P.MPIndices, Intersection);
  956. P.Blocks.insert(pSuccBB);
  957. bFound = true;
  958. break;
  959. }
  960. }
  961. if (!bFound) {
  962. // Create a new partition.
  963. Partition P;
  964. P.MPIndices = SuccMPI.CandidateSet;
  965. P.Blocks.insert(pSuccBB);
  966. Partitions.emplace_back(P);
  967. }
  968. }
  969. // c. Analyze successors.
  970. if (Partitions.size() == 1) {
  971. auto &Intersection = Partitions[0].MPIndices;
  972. if (!Intersection.empty()) {
  973. MPI.MP = *Intersection.crbegin();
  974. swap(MPI.CandidateSet, Intersection); // discard partition set, as we do not need it anymore.
  975. } else {
  976. MPI.MP = UINT32_MAX;
  977. }
  978. } else {
  979. // We do not [yet] know the merge point.
  980. MPI.MP = UINT32_MAX;
  981. // For switch, select the largest partition with at least two elements.
  982. if (SwitchInst *pSI = dyn_cast<SwitchInst>(pNodeToFollowSuccessors->getTerminator())) {
  983. size_t MaxPartSize = 0;
  984. size_t MaxPartIdx = 0;
  985. for (size_t i = 0; i < Partitions.size(); i++) {
  986. auto s = Partitions[i].Blocks.size();
  987. if (s > MaxPartSize) {
  988. MaxPartSize = s;
  989. MaxPartIdx = i;
  990. }
  991. }
  992. if (MaxPartSize >= 2) {
  993. MPI.MP = *Partitions[MaxPartIdx].MPIndices.crbegin();
  994. swap(MPI.CandidateSet, Partitions[MaxPartIdx].MPIndices); // discard partition set, as we do not need it anymore.
  995. }
  996. //TODO: during final testing consider to remove.
  997. if (MPI.MP == UINT32_MAX) {
  998. auto itOldMP = OldScopeEndPoints.find(pBB);
  999. if (itOldMP != OldScopeEndPoints.end()) {
  1000. MPI.MP = BTO.GetBlockId(itOldMP->second);
  1001. MPI.CandidateSet.insert(MPI.MP);
  1002. }
  1003. }
  1004. }
  1005. if (MPI.MP == UINT32_MAX) {
  1006. // Compute MP union for upcoming propagation upwards.
  1007. set<unsigned> Union;
  1008. for (auto pSuccBB : Successors) {
  1009. MergePointInfo &SuccMPI = *MergePoints[pSuccBB];
  1010. set<unsigned> TmpSet;
  1011. std::set_union(Union.begin(), Union.end(),
  1012. SuccMPI.CandidateSet.begin(), SuccMPI.CandidateSet.end(),
  1013. std::inserter(TmpSet, TmpSet.end()));
  1014. swap(Union, TmpSet);
  1015. }
  1016. swap(MPI.CandidateSet, Union);
  1017. }
  1018. }
  1019. }
  1020. // Add a merge point to the candidate set.
  1021. if (IsMergePoint(pBB)) {
  1022. DXASSERT_NOMSG(m_LoopMap.find(pBB) == m_LoopMap.cend());
  1023. DXASSERT_NOMSG(m_LE2LBMap.find(pBB) == m_LE2LBMap.cend());
  1024. MPI.CandidateSet.insert(iBB);
  1025. }
  1026. }
  1027. //TODO: during final testing consider to remove.
  1028. // Compensate switch end point.
  1029. if (SwitchInst *pSI = dyn_cast<SwitchInst>(pEntry->getTerminator())) {
  1030. auto itOldEP = OldScopeEndPoints.find(pEntry);
  1031. auto itMP = MergePoints.find(pEntry);
  1032. if (itOldEP != OldScopeEndPoints.end()) {
  1033. unsigned OldMP = BTO.GetBlockId(itOldEP->second);
  1034. MergePointInfo &MPI = *itMP->second;
  1035. if (MPI.MP != OldMP) {
  1036. MPI.MP = OldMP;
  1037. MPI.CandidateSet.clear();
  1038. if (MPI.MP != UINT32_MAX) {
  1039. MPI.CandidateSet.insert(MPI.MP);
  1040. }
  1041. }
  1042. }
  1043. }
  1044. #if SNCFG_DBG
  1045. dbgs() << "\nScope ends:\n";
  1046. for (auto it = MergePoints.begin(); it != MergePoints.end(); ++it) {
  1047. BasicBlock *pBB = it->first;
  1048. MergePointInfo &MPI = *it->second;
  1049. dbgs() << it->first->getName() << ": ID = " << BTO.GetBlockId(pBB) << ", MP = " << (int)MPI.MP << "\n";
  1050. dbgs() << " CandidateSet = "; DumpIntSet(dbgs(), MPI.CandidateSet); dbgs() << "\n";
  1051. }
  1052. #endif
  1053. }
  1054. void ScopeNestedCFG::DetermineSwitchBreaks(BasicBlock *pSwitchBegin,
  1055. const ScopeEndPointsMap &ScopeEndPoints,
  1056. const BlockTopologicalOrderAndReachability &BTO,
  1057. SwitchBreaksMap &SwitchBreaks) {
  1058. DXASSERT_NOMSG(SwitchBreaks.empty());
  1059. TerminatorInst *pTI = pSwitchBegin->getTerminator();
  1060. DXASSERT_LOCALVAR_NOMSG(pTI, dyn_cast<SwitchInst>(pTI) != nullptr);
  1061. auto it = ScopeEndPoints.find(pSwitchBegin);
  1062. if (it == ScopeEndPoints.end())
  1063. return;
  1064. BasicBlock *pSwitchEnd = it->second;
  1065. if (pSwitchEnd == nullptr)
  1066. return;
  1067. BitVector *pReachableFromSwitchBegin = BTO.GetReachableBBs(pSwitchBegin);
  1068. for (auto itPred = pred_begin(pSwitchEnd), endPred = pred_end(pSwitchEnd); itPred != endPred; ++itPred) {
  1069. BasicBlock *pPredBB = *itPred;
  1070. unsigned PredId = BTO.GetBlockId(pPredBB);
  1071. // An alternative entry into the acyclic component.
  1072. if (PredId == UINT32_MAX)
  1073. continue;
  1074. // Record this switch break.
  1075. if (pReachableFromSwitchBegin->test(PredId)) {
  1076. SwitchBreaks.insert( {pPredBB, pSwitchEnd} );
  1077. }
  1078. }
  1079. #if SNCFG_DBG
  1080. if (!SwitchBreaks.empty()) {
  1081. dbgs() << "\nSwitch breaks:\n";
  1082. for (auto it = SwitchBreaks.begin(); it != SwitchBreaks.end(); ++it) {
  1083. BasicBlock *pSrcBB = it->first;
  1084. BasicBlock *pDstBB = it->second;
  1085. dbgs() << pSrcBB->getName() << " -> " << pDstBB->getName() << "\n";
  1086. }
  1087. }
  1088. #endif
  1089. }
  1090. //-----------------------------------------------------------------------------
  1091. // Transformation of acyclic region.
  1092. //-----------------------------------------------------------------------------
  1093. void ScopeNestedCFG::TransformAcyclicRegion(BasicBlock *pEntry) {
  1094. unordered_map<BasicBlock *, vector<BasicBlock *> > BlockClones;
  1095. unordered_map<BasicBlock *, vector<BasicBlock *> > Edges;
  1096. ValueToValueMapTy RegionValueRemap;
  1097. BlockTopologicalOrderAndReachability BTO;
  1098. ComputeBlockTopologicalOrderAndReachability(pEntry, BTO);
  1099. // Set up entry scope.
  1100. ScopeStackItem &EntryScope = PushScope(pEntry);
  1101. DXASSERT_NOMSG(EntryScope.pScopeBeginBB == pEntry);
  1102. EntryScope.pClonedScopeBeginBB = pEntry;
  1103. EntryScope.pScopeEndBB = nullptr;
  1104. EntryScope.pClonedScopeEndBB = nullptr;
  1105. DXASSERT_NOMSG(EntryScope.SuccIdx == 0);
  1106. EntryScope.ScopeEndPoints = std::make_shared<ScopeEndPointsMap>();
  1107. EntryScope.DeltaScopeEndPoints = std::make_shared<ScopeEndPointsMap>();
  1108. EntryScope.SwitchBreaks = std::make_shared<SwitchBreaksMap>();
  1109. DetermineScopeEndPoints(pEntry, false, BTO, SwitchBreaksMap{},
  1110. *EntryScope.ScopeEndPoints.get(), *EntryScope.DeltaScopeEndPoints.get());
  1111. while (!m_ScopeStack.empty()) {
  1112. ScopeStackItem Scope = *GetScope();
  1113. PopScope();
  1114. // Assume: (1) current node is already cloned (if needed),
  1115. // (2) current node is already properly connected to its predecessor
  1116. TerminatorInst *pScopeBeginTI = Scope.pScopeBeginBB->getTerminator();
  1117. BranchKind BeginScopeBranchKind = GetBranchAnnotation(Scope.pScopeBeginBB);
  1118. //
  1119. // I. Process the node.
  1120. //
  1121. // 1. The node is a scope terminator.
  1122. // 1a. Return.
  1123. if (dyn_cast<ReturnInst>(pScopeBeginTI)) {
  1124. continue;
  1125. }
  1126. DXASSERT_NOMSG(pScopeBeginTI->getNumSuccessors() > 0);
  1127. // 1b. Break and continue.
  1128. switch (BeginScopeBranchKind) {
  1129. case BranchKind::LoopBreak: {
  1130. // Connect to loop exit.
  1131. TerminatorInst *pClonedScopeBeginTI = Scope.pClonedScopeBeginBB->getTerminator();
  1132. DXASSERT_LOCALVAR_NOMSG(pClonedScopeBeginTI, pClonedScopeBeginTI->getNumSuccessors() == 1);
  1133. DXASSERT_NOMSG(m_LoopMap.find(pEntry) != m_LoopMap.end());
  1134. LoopItem &LI = m_LoopMap[pEntry];
  1135. AddEdge(Scope.pClonedScopeBeginBB, 0, LI.pLE, Edges);
  1136. continue;
  1137. }
  1138. case BranchKind::LoopContinue: {
  1139. // Connect to loop latch.
  1140. TerminatorInst *pClonedScopeBeginTI = Scope.pClonedScopeBeginBB->getTerminator();
  1141. DXASSERT_LOCALVAR_NOMSG(pClonedScopeBeginTI, pClonedScopeBeginTI->getNumSuccessors() == 1);
  1142. DXASSERT_NOMSG(m_LoopMap.find(pEntry) != m_LoopMap.end());
  1143. LoopItem &LI = m_LoopMap[pEntry];
  1144. AddEdge(Scope.pClonedScopeBeginBB, 0, LI.pLL, Edges);
  1145. continue;
  1146. }
  1147. default: ; // Process further.
  1148. }
  1149. // 1c. Loop latch node.
  1150. if (IsLoopBackedge(Scope.pScopeBeginBB)) {
  1151. continue;
  1152. }
  1153. // 2. Clone a nested loop and proceed after the loop.
  1154. if (BeginScopeBranchKind == BranchKind::LoopBegin || BeginScopeBranchKind == BranchKind::LoopNoEnd) {
  1155. // The node is a loop preheader, which has been already cloned, if necessary.
  1156. // Original loop.
  1157. BasicBlock *pPreheader = Scope.pScopeBeginBB;
  1158. DXASSERT_NOMSG(pPreheader->getTerminator()->getNumSuccessors() == 1);
  1159. BasicBlock *pHeader = pPreheader->getTerminator()->getSuccessor(0);
  1160. LoopItem &Loop = m_LoopMap[pHeader];
  1161. // Clone loop.
  1162. BasicBlock *pClonedHeader = CloneLoop(pHeader, Scope.pClonedScopeBeginBB, BlockClones, Edges, RegionValueRemap);
  1163. // Connect cloned preheader to cloned loop.
  1164. AddEdge(Scope.pClonedScopeBeginBB, 0, pClonedHeader, Edges);
  1165. // Push loop-end node onto the stack.
  1166. LoopItem &ClonedLoop = m_LoopMap[pClonedHeader];
  1167. if (Loop.pLE != nullptr) {
  1168. // Loop with loop exit node.
  1169. DXASSERT_NOMSG(Loop.pLE->getTerminator()->getNumSuccessors() == 1);
  1170. ScopeStackItem &AfterEndLoopScope = PushScope(Loop.pLE);
  1171. AfterEndLoopScope.pClonedScopeBeginBB = ClonedLoop.pLE;
  1172. AfterEndLoopScope.ScopeEndPoints = Scope.ScopeEndPoints;
  1173. AfterEndLoopScope.DeltaScopeEndPoints = Scope.DeltaScopeEndPoints;
  1174. AfterEndLoopScope.SwitchBreaks = Scope.SwitchBreaks;
  1175. } else {
  1176. // Loop without loop exit node.
  1177. DXASSERT_NOMSG(ClonedLoop.pLE == nullptr);
  1178. }
  1179. continue;
  1180. }
  1181. // 3. Classify scope.
  1182. bool bSwitchScope = IsSwitch(pScopeBeginTI);
  1183. bool bIfScope = IsIf(pScopeBeginTI);
  1184. // 4. Open scope.
  1185. if (Scope.SuccIdx == 0 && (bIfScope || bSwitchScope)) {
  1186. if (bSwitchScope) {
  1187. // Detect switch breaks for switch scope.
  1188. SwitchBreaksMap SwitchBreaks;
  1189. DetermineSwitchBreaks(Scope.pScopeBeginBB, *Scope.ScopeEndPoints.get(), BTO, SwitchBreaks);
  1190. if (!SwitchBreaks.empty()) {
  1191. // After switch breaks are known, recompute scope end points more precisely.
  1192. Scope.DeltaScopeEndPoints = std::make_shared<ScopeEndPointsMap>();
  1193. Scope.SwitchBreaks = std::make_shared<SwitchBreaksMap>(SwitchBreaks);
  1194. DetermineScopeEndPoints(Scope.pScopeBeginBB, true, BTO,
  1195. *Scope.SwitchBreaks.get(),
  1196. *Scope.ScopeEndPoints.get(),
  1197. *Scope.DeltaScopeEndPoints.get());
  1198. }
  1199. }
  1200. if (bIfScope) {
  1201. // Refine if-scope end point.
  1202. auto itEndIfScope = Scope.ScopeEndPoints->find(Scope.pScopeBeginBB);
  1203. DXASSERT_NOMSG(itEndIfScope != Scope.ScopeEndPoints->cend());
  1204. if (itEndIfScope->second == nullptr) {
  1205. ScopeStackItem *pParentScope = GetScope();
  1206. BasicBlock *pCandidateEndScopeBB = nullptr;
  1207. if (pParentScope != nullptr && pParentScope->pScopeEndBB != nullptr) {
  1208. // Determine which branch has parent's end scope node.
  1209. unsigned ParentScopeEndId = BTO.GetBlockId(pParentScope->pScopeEndBB);
  1210. for (unsigned i = 0; i < pScopeBeginTI->getNumSuccessors(); i++) {
  1211. BasicBlock *pSucc = pScopeBeginTI->getSuccessor(i);
  1212. // Skip a switch break.
  1213. auto itSwBreak = Scope.SwitchBreaks->find(pSucc);
  1214. if (itSwBreak != Scope.SwitchBreaks->end()) {
  1215. continue;
  1216. }
  1217. BitVector *pReachableBBs = BTO.GetReachableBBs(pSucc);
  1218. if (pReachableBBs->test(ParentScopeEndId)) {
  1219. if (!pCandidateEndScopeBB) {
  1220. // Case1: one of IF's branches terminates only by region terminators.
  1221. pCandidateEndScopeBB = pSucc;
  1222. } else {
  1223. // Case2: both branches terminate only by region terminators (e.g., SWITCH breaks).
  1224. pCandidateEndScopeBB = nullptr;
  1225. }
  1226. }
  1227. }
  1228. if (pCandidateEndScopeBB) {
  1229. Scope.bRestoreIfScopeEndPoint = true;
  1230. itEndIfScope->second = pCandidateEndScopeBB;
  1231. #if SNCFG_DBG
  1232. BasicBlock *pBegin = Scope.pScopeBeginBB;
  1233. BasicBlock *pEnd = pCandidateEndScopeBB;
  1234. dbgs() << "\nAdjusted IF's end: ";
  1235. dbgs() << pBegin->getName() << ", ID=" << BTO.GetBlockId(pBegin) << " -> ";
  1236. dbgs() << pEnd->getName() << ", ID=" << BTO.GetBlockId(pEnd) << "\n";
  1237. #endif
  1238. }
  1239. }
  1240. }
  1241. }
  1242. // Determine scope end and set up helper nodes, if necessary.
  1243. BranchKind ScopeBeginBranchKind = BranchKind::Invalid;
  1244. BranchKind ScopeEndBranchKind = BranchKind::Invalid;
  1245. auto itEndScope = Scope.ScopeEndPoints->find(Scope.pScopeBeginBB);
  1246. if (itEndScope != Scope.ScopeEndPoints->cend() && itEndScope->second != nullptr) {
  1247. Scope.pScopeEndBB = itEndScope->second;
  1248. Scope.pClonedScopeEndBB = BasicBlock::Create(*m_pCtx, bIfScope ? "dx.EndIfScope" : "dx.EndSwitchScope", m_pFunc, Scope.pScopeEndBB);
  1249. BranchInst::Create(Scope.pClonedScopeEndBB, Scope.pClonedScopeEndBB);
  1250. ScopeBeginBranchKind = bIfScope ? BranchKind::IfBegin : BranchKind::SwitchBegin;
  1251. ScopeEndBranchKind = bIfScope ? BranchKind::IfEnd : BranchKind::SwitchEnd;
  1252. } else {
  1253. Scope.pScopeEndBB = nullptr;
  1254. Scope.pClonedScopeEndBB = nullptr;
  1255. ScopeBeginBranchKind = bIfScope ? BranchKind::IfNoEnd : BranchKind::SwitchNoEnd;
  1256. }
  1257. // Annotate scope-begin and scope-end branches.
  1258. DXASSERT_NOMSG(ScopeBeginBranchKind != BranchKind::Invalid);
  1259. AnnotateBranch(Scope.pClonedScopeBeginBB, ScopeBeginBranchKind);
  1260. if (Scope.pClonedScopeEndBB != nullptr) {
  1261. DXASSERT_NOMSG(ScopeEndBranchKind != BranchKind::Invalid);
  1262. AnnotateBranch(Scope.pClonedScopeEndBB, ScopeEndBranchKind);
  1263. }
  1264. }
  1265. // 5. Push unfinished if and switch scopes onto the stack.
  1266. if ((bIfScope || bSwitchScope) &&
  1267. Scope.SuccIdx < pScopeBeginTI->getNumSuccessors()) {
  1268. ScopeStackItem &UnfinishedScope = RePushScope(Scope);
  1269. // Advance successor.
  1270. UnfinishedScope.SuccIdx++;
  1271. }
  1272. // 6. Finalize scope.
  1273. if ((bIfScope || bSwitchScope) && (Scope.SuccIdx == pScopeBeginTI->getNumSuccessors())) {
  1274. if (Scope.pScopeEndBB != nullptr) {
  1275. bool bEndScopeSharedWithParent = false;
  1276. ScopeStackItem *pParentScope = GetScope();
  1277. if (pParentScope != nullptr) {
  1278. if (Scope.pScopeEndBB == pParentScope->pScopeEndBB) {
  1279. bEndScopeSharedWithParent = true;
  1280. if (Scope.pClonedScopeEndBB != nullptr) {
  1281. AddEdge(Scope.pClonedScopeEndBB, 0, pParentScope->pClonedScopeEndBB, Edges);
  1282. }
  1283. }
  1284. }
  1285. if (!bEndScopeSharedWithParent) {
  1286. // Clone original end-of-scope BB.
  1287. ScopeStackItem &AfterEndOfScopeScope = PushScope(Scope.pScopeEndBB);
  1288. AfterEndOfScopeScope.pClonedScopeBeginBB = CloneNode(Scope.pScopeEndBB, BlockClones, RegionValueRemap);
  1289. AfterEndOfScopeScope.ScopeEndPoints = Scope.ScopeEndPoints;
  1290. AfterEndOfScopeScope.DeltaScopeEndPoints = Scope.DeltaScopeEndPoints;
  1291. AfterEndOfScopeScope.SwitchBreaks = Scope.SwitchBreaks;
  1292. AddEdge(Scope.pClonedScopeEndBB, 0, AfterEndOfScopeScope.pClonedScopeBeginBB, Edges);
  1293. }
  1294. }
  1295. // Restore original (parent scope) ScopeEndPoints.
  1296. if (bSwitchScope) {
  1297. for (auto &it : *Scope.DeltaScopeEndPoints) {
  1298. BasicBlock *pBB = it.first;
  1299. BasicBlock *pOldMP = it.second;
  1300. (*Scope.ScopeEndPoints)[pBB] = pOldMP;
  1301. }
  1302. }
  1303. if (Scope.bRestoreIfScopeEndPoint) {
  1304. DXASSERT_NOMSG(bIfScope);
  1305. auto itEndIfScope = Scope.ScopeEndPoints->find(Scope.pScopeBeginBB);
  1306. DXASSERT_NOMSG(itEndIfScope != Scope.ScopeEndPoints->cend());
  1307. DXASSERT_NOMSG(itEndIfScope->second != nullptr);
  1308. itEndIfScope->second = nullptr;
  1309. }
  1310. continue;
  1311. }
  1312. //
  1313. // II. Process successors.
  1314. //
  1315. BasicBlock *pSuccBB = pScopeBeginTI->getSuccessor(Scope.SuccIdx);
  1316. // 7. Already processed successor.
  1317. if (bIfScope || bSwitchScope) {
  1318. if (pSuccBB == Scope.pPrevSuccBB) {
  1319. DXASSERT_NOMSG(Scope.pClonedPrevSuccBB != nullptr);
  1320. AddEdge(Scope.pClonedScopeBeginBB, Scope.SuccIdx, Scope.pClonedPrevSuccBB, Edges);
  1321. continue;
  1322. }
  1323. }
  1324. // 8. Successor meets end-of-scope.
  1325. bool bEndOfScope = false;
  1326. if (pSuccBB == Scope.pScopeEndBB) {
  1327. // 8a. Successor is end of current scope.
  1328. bEndOfScope = true;
  1329. AddEdge(Scope.pClonedScopeBeginBB, Scope.SuccIdx, Scope.pClonedScopeEndBB, Edges);
  1330. } else {
  1331. // 8b. Successor is end of parent scope.
  1332. ScopeStackItem *pParentScope = GetScope();
  1333. if (pParentScope != nullptr) {
  1334. auto it = Scope.SwitchBreaks->find(Scope.pScopeBeginBB);
  1335. bool bSwitchBreak = it != Scope.SwitchBreaks->cend();
  1336. if (pSuccBB == pParentScope->pScopeEndBB) {
  1337. bEndOfScope = true;
  1338. if (!bSwitchBreak) {
  1339. AddEdge(Scope.pClonedScopeBeginBB, Scope.SuccIdx, pParentScope->pClonedScopeEndBB, Edges);
  1340. }
  1341. }
  1342. if (bSwitchBreak) {
  1343. if (pSuccBB == it->second) {
  1344. // Switch break.
  1345. bEndOfScope = true;
  1346. ScopeStackItem *pSwitchScope = FindParentScope(ScopeStackItem::Kind::Switch);
  1347. DXASSERT_NOMSG(pSuccBB == pSwitchScope->pScopeEndBB);
  1348. BasicBlock *pSwitchBreakHelper = BasicBlock::Create(*m_pCtx, "dx.SwitchBreak", m_pFunc, pSuccBB);
  1349. BranchInst::Create(pSwitchBreakHelper, pSwitchBreakHelper);
  1350. AnnotateBranch(pSwitchBreakHelper, BranchKind::SwitchBreak);
  1351. AddEdge(Scope.pClonedScopeBeginBB, Scope.SuccIdx, pSwitchBreakHelper, Edges);
  1352. AddEdge(pSwitchBreakHelper, 0, pSwitchScope->pClonedScopeEndBB, Edges);
  1353. }
  1354. }
  1355. }
  1356. }
  1357. // 9. Clone successor & push its record onto the stack.
  1358. if (!bEndOfScope) {
  1359. BasicBlock *pClonedSucc = CloneNode(pSuccBB, BlockClones, RegionValueRemap);
  1360. if (bIfScope || bSwitchScope) {
  1361. ScopeStackItem *pParentScope = GetScope();
  1362. pParentScope->pPrevSuccBB = pSuccBB;
  1363. pParentScope->pClonedPrevSuccBB = pClonedSucc;
  1364. }
  1365. // Create new scope to process the successor.
  1366. ScopeStackItem &SuccScope = PushScope(pSuccBB);
  1367. SuccScope.pPrevSuccBB = nullptr;
  1368. SuccScope.pClonedPrevSuccBB = nullptr;
  1369. SuccScope.pClonedScopeBeginBB = pClonedSucc;
  1370. SuccScope.ScopeEndPoints = Scope.ScopeEndPoints;
  1371. SuccScope.DeltaScopeEndPoints = Scope.DeltaScopeEndPoints;
  1372. SuccScope.SwitchBreaks = Scope.SwitchBreaks;
  1373. AddEdge(Scope.pClonedScopeBeginBB, Scope.SuccIdx, SuccScope.pClonedScopeBeginBB, Edges);
  1374. }
  1375. }
  1376. // Fixup edges.
  1377. for (auto itEdge = Edges.begin(), endEdge = Edges.end(); itEdge != endEdge; ++itEdge) {
  1378. BasicBlock *pBB = itEdge->first;
  1379. vector<BasicBlock *> &Successors = itEdge->second;
  1380. TerminatorInst *pTI = pBB->getTerminator();
  1381. DXASSERT_NOMSG(Successors.size() == pTI->getNumSuccessors());
  1382. for (unsigned i = 0; i < pTI->getNumSuccessors(); ++i) {
  1383. pTI->setSuccessor(i, Successors[i]);
  1384. }
  1385. }
  1386. }
  1387. ScopeNestedCFG::ScopeStackItem &ScopeNestedCFG::PushScope(BasicBlock *pBB) {
  1388. ScopeStackItem SSI;
  1389. SSI.pScopeBeginBB = pBB;
  1390. TerminatorInst *pTI = pBB->getTerminator();
  1391. if (dyn_cast<BranchInst>(pTI)) {
  1392. DXASSERT_NOMSG(!IsLoopBackedge(pBB));
  1393. unsigned NumSucc = pBB->getTerminator()->getNumSuccessors();
  1394. switch (NumSucc) {
  1395. case 1: SSI.ScopeKind = ScopeStackItem::Kind::Fallthrough; break;
  1396. case 2: SSI.ScopeKind = ScopeStackItem::Kind::If; break;
  1397. default: DXASSERT_NOMSG(false);
  1398. }
  1399. }
  1400. else if (dyn_cast<ReturnInst>(pTI)) {
  1401. SSI.ScopeKind = ScopeStackItem::Kind::Return;
  1402. }
  1403. else if (dyn_cast<SwitchInst>(pTI)) {
  1404. SSI.ScopeKind = ScopeStackItem::Kind::Switch;
  1405. }
  1406. else {
  1407. DXASSERT_NOMSG(false);
  1408. }
  1409. m_ScopeStack.emplace_back(SSI);
  1410. return *GetScope();
  1411. }
  1412. ScopeNestedCFG::ScopeStackItem &ScopeNestedCFG::RePushScope(const ScopeStackItem &Scope) {
  1413. m_ScopeStack.emplace_back(Scope);
  1414. return *GetScope();
  1415. }
  1416. ScopeNestedCFG::ScopeStackItem *ScopeNestedCFG::GetScope(unsigned Idx) {
  1417. if (m_ScopeStack.size() > Idx) {
  1418. return &m_ScopeStack[m_ScopeStack.size() - 1 - Idx];
  1419. } else {
  1420. return nullptr;
  1421. }
  1422. }
  1423. ScopeNestedCFG::ScopeStackItem *ScopeNestedCFG::FindParentScope(ScopeStackItem::Kind ScopeKind) {
  1424. for (auto it = m_ScopeStack.rbegin(); it != m_ScopeStack.rend(); ++it) {
  1425. ScopeStackItem &SSI = *it;
  1426. if (SSI.ScopeKind == ScopeKind)
  1427. return &SSI;
  1428. }
  1429. IFT(DXC_E_SCOPE_NESTED_FAILED);
  1430. return nullptr;
  1431. }
  1432. void ScopeNestedCFG::PopScope() {
  1433. m_ScopeStack.pop_back();
  1434. }
  1435. void ScopeNestedCFG::AddEdge(BasicBlock *pClonedSrcBB, unsigned SuccSlotIdx, BasicBlock *pDstBB,
  1436. unordered_map<BasicBlock *, vector<BasicBlock *> > &Edges) {
  1437. DXASSERT_NOMSG(pDstBB != nullptr);
  1438. TerminatorInst *pTI = pClonedSrcBB->getTerminator();
  1439. vector<BasicBlock *> *pSuccessors;
  1440. auto it = Edges.find(pClonedSrcBB);
  1441. if (it == Edges.end()) {
  1442. Edges[pClonedSrcBB] = vector<BasicBlock *>(pTI->getNumSuccessors());
  1443. pSuccessors = &Edges[pClonedSrcBB];
  1444. } else {
  1445. pSuccessors = &it->second;
  1446. }
  1447. (*pSuccessors)[SuccSlotIdx] = pDstBB;
  1448. }
  1449. BasicBlock *ScopeNestedCFG::CloneBasicBlockAndFixupValues(const BasicBlock *pBB,
  1450. ValueToValueMapTy &RegionValueRemap,
  1451. const Twine &NameSuffix) {
  1452. // Create a clone.
  1453. ValueToValueMapTy CloneMap;
  1454. BasicBlock *pCloneBB = CloneBasicBlock(pBB, CloneMap, NameSuffix);
  1455. // Update remapped values to the value remap for the acyclic region.
  1456. for (auto it = CloneMap.begin(), endIt = CloneMap.end(); it != endIt; ++it) {
  1457. RegionValueRemap[it->first] = it->second;
  1458. }
  1459. // Fixup values.
  1460. for (auto itInst = pCloneBB->begin(), endInst = pCloneBB->end(); itInst != endInst; ++itInst) {
  1461. Instruction *pInst = itInst;
  1462. for (unsigned i = 0; i < pInst->getNumOperands(); i++) {
  1463. Value *V = pInst->getOperand(i);
  1464. auto itV = RegionValueRemap.find(V);
  1465. if (itV != RegionValueRemap.end()) {
  1466. // Replace the replicated value.
  1467. pInst->replaceUsesOfWith(V, itV->second);
  1468. }
  1469. }
  1470. }
  1471. return pCloneBB;
  1472. }
  1473. BasicBlock *ScopeNestedCFG::CloneNode(BasicBlock *pBB,
  1474. unordered_map<BasicBlock *, vector<BasicBlock *> > &BlockClones,
  1475. ValueToValueMapTy &RegionValueRemap) {
  1476. auto it = BlockClones.find(pBB);
  1477. if (it == BlockClones.end()) {
  1478. // First time we see this BB.
  1479. vector<BasicBlock *> V;
  1480. V.emplace_back(pBB);
  1481. BlockClones[pBB] = V;
  1482. return pBB;
  1483. }
  1484. // Create a clone.
  1485. BasicBlock *pCloneBB = CloneBasicBlockAndFixupValues(pBB, RegionValueRemap);
  1486. it->second.emplace_back(pCloneBB);
  1487. m_pFunc->getBasicBlockList().insertAfter(pBB, pCloneBB);
  1488. // Temporarily adjust successors.
  1489. for (unsigned i = 0; i < pCloneBB->getTerminator()->getNumSuccessors(); i++) {
  1490. pCloneBB->getTerminator()->setSuccessor(i, pCloneBB);
  1491. }
  1492. return pCloneBB;
  1493. }
  1494. BasicBlock *ScopeNestedCFG::CloneLoop(BasicBlock *pHeaderBB,
  1495. BasicBlock *pClonedPreHeaderBB,
  1496. unordered_map<BasicBlock *, vector<BasicBlock *> > &BlockClones,
  1497. unordered_map<BasicBlock *, vector<BasicBlock *> > &Edges,
  1498. ValueToValueMapTy &RegionValueRemap) {
  1499. // 1. clone every reachable node from LoopHeader (not! preheader) to LoopExit (if not null).
  1500. // 2. collect cloned edges along the way
  1501. // 3. update loop map [for this loop only] (in case we need to copy a cloned loop in the future).
  1502. DXASSERT_NOMSG(m_LoopMap.find(pHeaderBB) != m_LoopMap.end());
  1503. const LoopItem &LI = m_LoopMap.find(pHeaderBB)->second;
  1504. unordered_set<BasicBlock *> VisitedBlocks;
  1505. LoopItem ClonedLI;
  1506. ClonedLI.pLP = pClonedPreHeaderBB;
  1507. CloneLoopRec(pHeaderBB, nullptr, 0, BlockClones, Edges, VisitedBlocks, LI, ClonedLI, RegionValueRemap);
  1508. m_LoopMap[ClonedLI.pLB] = ClonedLI;
  1509. return ClonedLI.pLB;
  1510. }
  1511. BasicBlock *ScopeNestedCFG::CloneLoopRec(BasicBlock *pBB,
  1512. BasicBlock *pClonePredBB,
  1513. unsigned ClonedPredIdx,
  1514. unordered_map<BasicBlock *, vector<BasicBlock *> > &BlockClones,
  1515. unordered_map<BasicBlock *, vector<BasicBlock *> > &Edges,
  1516. unordered_set<BasicBlock *> &VisitedBlocks,
  1517. const LoopItem &LI,
  1518. LoopItem &ClonedLI,
  1519. ValueToValueMapTy &RegionValueRemap) {
  1520. auto itBB = VisitedBlocks.find(pBB);
  1521. if (itBB != VisitedBlocks.end()) {
  1522. BasicBlock *pClonedBB = *BlockClones[*itBB].rbegin();
  1523. // Clone the edge, but do not follow successors.
  1524. if (pClonePredBB != nullptr) {
  1525. AddEdge(pClonePredBB, ClonedPredIdx, pClonedBB, Edges);
  1526. }
  1527. return pClonedBB;
  1528. }
  1529. VisitedBlocks.insert(pBB);
  1530. // Clone myself.
  1531. BasicBlock *pClonedBB = CloneNode(pBB, BlockClones, RegionValueRemap);
  1532. // Add edge from the predecessor BB to myself.
  1533. if (pClonePredBB != nullptr) {
  1534. AddEdge(pClonePredBB, ClonedPredIdx, pClonedBB, Edges);
  1535. } else {
  1536. ClonedLI.pLB = pClonedBB;
  1537. }
  1538. // Loop exit?
  1539. if (pBB == LI.pLE) {
  1540. ClonedLI.pLE = pClonedBB;
  1541. return pClonedBB;
  1542. }
  1543. // Loop latch?
  1544. if (pBB == LI.pLL) {
  1545. ClonedLI.pLL = pClonedBB;
  1546. AddEdge(ClonedLI.pLL, 0, ClonedLI.pLB, Edges);
  1547. }
  1548. // Process successors.
  1549. TerminatorInst *pTI = pBB->getTerminator();
  1550. BasicBlock *pPrevSuccBB = nullptr;
  1551. BasicBlock *pPrevClonedSuccBB = nullptr;
  1552. for (unsigned SuccIdx = 0; SuccIdx < pTI->getNumSuccessors(); SuccIdx++) {
  1553. BasicBlock *pSuccBB = pTI->getSuccessor(SuccIdx);
  1554. if (pSuccBB != pPrevSuccBB) {
  1555. pPrevClonedSuccBB = CloneLoopRec(pSuccBB, pClonedBB, SuccIdx, BlockClones, Edges,
  1556. VisitedBlocks, LI, ClonedLI, RegionValueRemap);
  1557. pPrevSuccBB = pSuccBB;
  1558. } else {
  1559. AddEdge(pClonedBB, SuccIdx, pPrevClonedSuccBB, Edges);
  1560. }
  1561. }
  1562. return pClonedBB;
  1563. }
  1564. //-----------------------------------------------------------------------------
  1565. // Utility functions.
  1566. //-----------------------------------------------------------------------------
  1567. bool ScopeNestedCFG::IsIf(BasicBlock *pBB) {
  1568. return IsIf(pBB->getTerminator());
  1569. }
  1570. bool ScopeNestedCFG::IsIf(TerminatorInst *pTI) {
  1571. return pTI->getNumSuccessors() == 2 && dyn_cast<BranchInst>(pTI) != nullptr;
  1572. }
  1573. bool ScopeNestedCFG::IsSwitch(BasicBlock *pBB) {
  1574. return IsSwitch(pBB->getTerminator());
  1575. }
  1576. bool ScopeNestedCFG::IsSwitch(TerminatorInst *pTI) {
  1577. return dyn_cast<SwitchInst>(pTI) != nullptr;
  1578. }
  1579. Value *ScopeNestedCFG::GetFalse() {
  1580. return Constant::getIntegerValue(IntegerType::get(*m_pCtx, 1), APInt(1, 0));
  1581. }
  1582. Value *ScopeNestedCFG::GetTrue() {
  1583. return Constant::getIntegerValue(IntegerType::get(*m_pCtx, 1), APInt(1, 1));
  1584. }
  1585. ConstantInt *ScopeNestedCFG::GetI32Const(int v) {
  1586. return ConstantInt::get(*m_pCtx, APInt(32, v));
  1587. }
  1588. void ScopeNestedCFG::DumpIntSet(raw_ostream &s, set<unsigned> Set) {
  1589. s << "{ ";
  1590. for (auto it = Set.begin(); it != Set.end(); ++it)
  1591. s << *it << " ";
  1592. s << "}";
  1593. }
  1594. }
  1595. using namespace ScopeNestedCFGNS;
  1596. INITIALIZE_PASS_BEGIN(ScopeNestedCFG, "scopenested", "Scope-nested CFG transformation", false, false)
  1597. INITIALIZE_PASS_DEPENDENCY(ReducibilityAnalysis)
  1598. INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
  1599. INITIALIZE_PASS_END(ScopeNestedCFG, "scopenested", "Scope-nested CFG transformation", false, false)
  1600. namespace llvm {
  1601. FunctionPass *createScopeNestedCFGPass() {
  1602. return new ScopeNestedCFG();
  1603. }
  1604. }