DxilLoopUnroll.cpp 38 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129
  1. //===- DxilLoopUnroll.cpp - Special Unroll for Constant Values ------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //===----------------------------------------------------------------------===//
  10. //
  11. // Special loop unroll routine for creating mandatory constant values and
  12. // loops that have exits.
  13. //
  14. // Overview of algorithm:
  15. //
  16. // 1. Identify a set of blocks to unroll.
  17. //
  18. // LLVM's concept of loop excludes exit blocks, which are blocks that no
  19. // longer have a path to the loop latch. However, some exit blocks in HLSL
  20. // also need to be unrolled. For example:
  21. //
  22. // [unroll]
  23. // for (uint i = 0; i < 4; i++)
  24. // {
  25. // if (...)
  26. // {
  27. // // This block here is an exit block, since it's.
  28. // // guaranteed to exit the loop.
  29. // ...
  30. // a[i] = ...; // Indexing requires unroll.
  31. // return;
  32. // }
  33. // }
  34. //
  35. //
  36. // 2. Create LCSSA based on the new loop boundary.
  37. //
  38. // See LCSSA.cpp for more details. It creates trivial PHI nodes for any
  39. // outgoing values of the loop at the exit blocks, so when the loop body
  40. // gets cloned, the outgoing values can be added to those PHI nodes easily.
  41. //
  42. // We are using a modified LCSSA routine here because we are including some
  43. // of the original exit blocks in the unroll.
  44. //
  45. //
  46. // 3. Unroll the loop until we succeed.
  47. //
  48. // Unlike LLVM, we do not try to find a loop count before unrolling.
  49. // Instead, we unroll to find a constant terminal condition. Give up when we
  50. // fail to do so.
  51. //
  52. //
  53. //===----------------------------------------------------------------------===//
  54. #include "llvm/Pass.h"
  55. #include "llvm/Analysis/LoopPass.h"
  56. #include "llvm/Analysis/InstructionSimplify.h"
  57. #include "llvm/Analysis/AssumptionCache.h"
  58. #include "llvm/Analysis/LoopPass.h"
  59. #include "llvm/Analysis/InstructionSimplify.h"
  60. #include "llvm/Analysis/AssumptionCache.h"
  61. #include "llvm/Analysis/ScalarEvolution.h"
  62. #include "llvm/Transforms/Scalar.h"
  63. #include "llvm/Transforms/Utils/Cloning.h"
  64. #include "llvm/Transforms/Utils/Local.h"
  65. #include "llvm/Transforms/Utils/UnrollLoop.h"
  66. #include "llvm/Transforms/Utils/SSAUpdater.h"
  67. #include "llvm/Transforms/Utils/LoopUtils.h"
  68. #include "llvm/Transforms/Utils/PromoteMemToReg.h"
  69. #include "llvm/IR/DiagnosticInfo.h"
  70. #include "llvm/IR/Instructions.h"
  71. #include "llvm/IR/Module.h"
  72. #include "llvm/IR/Verifier.h"
  73. #include "llvm/IR/PredIteratorCache.h"
  74. #include "llvm/IR/IntrinsicInst.h"
  75. #include "llvm/Support/raw_ostream.h"
  76. #include "llvm/Support/Debug.h"
  77. #include "llvm/ADT/SetVector.h"
  78. #include "llvm/IR/LegacyPassManager.h"
  79. #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  80. #include "dxc/DXIL/DxilUtil.h"
  81. #include "dxc/DXIL/DxilOperations.h"
  82. #include "dxc/HLSL/HLModule.h"
  83. #include "llvm/Analysis/DxilValueCache.h"
  84. #include "llvm/Analysis/ValueTracking.h"
  85. #include "DxilRemoveUnstructuredLoopExits.h"
  86. using namespace llvm;
  87. using namespace hlsl;
  88. // Copied over from LoopUnroll.cpp - RemapInstruction()
  89. static inline void RemapInstruction(Instruction *I,
  90. ValueToValueMapTy &VMap) {
  91. for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
  92. Value *Op = I->getOperand(op);
  93. ValueToValueMapTy::iterator It = VMap.find(Op);
  94. if (It != VMap.end())
  95. I->setOperand(op, It->second);
  96. }
  97. if (PHINode *PN = dyn_cast<PHINode>(I)) {
  98. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
  99. ValueToValueMapTy::iterator It = VMap.find(PN->getIncomingBlock(i));
  100. if (It != VMap.end())
  101. PN->setIncomingBlock(i, cast<BasicBlock>(It->second));
  102. }
  103. }
  104. }
  105. namespace {
  106. class DxilLoopUnroll : public LoopPass {
  107. public:
  108. static char ID;
  109. std::unordered_set<Function *> CleanedUpAlloca;
  110. unsigned MaxIterationAttempt = 0;
  111. bool OnlyWarnOnFail = false;
  112. bool StructurizeLoopExits = false;
  113. DxilLoopUnroll(unsigned MaxIterationAttempt = 1024, bool OnlyWarnOnFail=false, bool StructurizeLoopExits=false) :
  114. LoopPass(ID),
  115. MaxIterationAttempt(MaxIterationAttempt),
  116. OnlyWarnOnFail(OnlyWarnOnFail),
  117. StructurizeLoopExits(StructurizeLoopExits)
  118. {
  119. initializeDxilLoopUnrollPass(*PassRegistry::getPassRegistry());
  120. }
  121. const char *getPassName() const override { return "Dxil Loop Unroll"; }
  122. bool runOnLoop(Loop *L, LPPassManager &LPM) override;
  123. bool IsLoopSafeToClone(Loop *L);
  124. void getAnalysisUsage(AnalysisUsage &AU) const override {
  125. AU.addRequired<LoopInfoWrapperPass>();
  126. AU.addRequired<AssumptionCacheTracker>();
  127. AU.addRequired<DominatorTreeWrapperPass>();
  128. AU.addPreserved<DominatorTreeWrapperPass>();
  129. AU.addRequired<ScalarEvolution>();
  130. AU.addRequired<DxilValueCache>();
  131. AU.addRequiredID(&LCSSAID);
  132. AU.addRequiredID(LoopSimplifyID);
  133. }
  134. // Function overrides that resolve options when used for DxOpt
  135. void applyOptions(PassOptions O) override {
  136. GetPassOptionUnsigned(O, "MaxIterationAttempt", &MaxIterationAttempt, false);
  137. GetPassOptionBool(O, "OnlyWarnOnFail", &OnlyWarnOnFail, false);
  138. }
  139. void dumpConfig(raw_ostream &OS) override {
  140. LoopPass::dumpConfig(OS);
  141. OS << ",MaxIterationAttempt=" << MaxIterationAttempt;
  142. OS << ",OnlyWarnOnFail=" << OnlyWarnOnFail;
  143. }
  144. };
  145. char DxilLoopUnroll::ID;
  146. static void FailLoopUnroll(bool WarnOnly, Function *F, DebugLoc DL, const Twine &Message) {
  147. LLVMContext &Ctx = F->getContext();
  148. DiagnosticSeverity severity = DiagnosticSeverity::DS_Error;
  149. if (WarnOnly)
  150. severity = DiagnosticSeverity::DS_Warning;
  151. Ctx.diagnose(DiagnosticInfoDxil(F, DL.get(), Message, severity));
  152. }
  153. struct LoopIteration {
  154. SmallVector<BasicBlock *, 16> Body;
  155. BasicBlock *Latch = nullptr;
  156. BasicBlock *Header = nullptr;
  157. ValueToValueMapTy VarMap;
  158. SetVector<BasicBlock *> Extended; // Blocks that are included in the clone that are not in the core loop body.
  159. LoopIteration() {}
  160. };
  161. static bool GetConstantI1(Value *V, bool *Val=nullptr) {
  162. if (ConstantInt *C = dyn_cast<ConstantInt>(V)) {
  163. if (V->getType()->isIntegerTy(1)) {
  164. if (Val)
  165. *Val = (bool)C->getLimitedValue();
  166. return true;
  167. }
  168. }
  169. return false;
  170. }
  171. static bool IsMarkedFullUnroll(Loop *L) {
  172. if (MDNode *LoopID = L->getLoopID())
  173. return GetUnrollMetadata(LoopID, "llvm.loop.unroll.full");
  174. return false;
  175. }
  176. static bool IsMarkedUnrollCount(Loop *L, int *OutCount) {
  177. if (MDNode *LoopID = L->getLoopID()) {
  178. if (MDNode *MD = GetUnrollMetadata(LoopID, "llvm.loop.unroll.count")) {
  179. assert(MD->getNumOperands() == 2 &&
  180. "Unroll count hint metadata should have two operands.");
  181. ConstantInt *Val = mdconst::extract<ConstantInt>(MD->getOperand(1));
  182. int Count = Val->getZExtValue();
  183. *OutCount = Count;
  184. return true;
  185. }
  186. }
  187. return false;
  188. }
  189. static bool HasSuccessorsInLoop(BasicBlock *BB, Loop *L) {
  190. for (BasicBlock *Succ : successors(BB)) {
  191. if (L->contains(Succ)) {
  192. return true;
  193. }
  194. }
  195. return false;
  196. }
  197. static void DetachFromSuccessors(BasicBlock *BB) {
  198. SmallVector<BasicBlock *, 16> Successors(succ_begin(BB), succ_end(BB));
  199. for (BasicBlock *Succ : Successors) {
  200. Succ->removePredecessor(BB);
  201. }
  202. }
  203. /// Return true if the specified block is in the list.
  204. static bool isExitBlock(BasicBlock *BB,
  205. const SmallVectorImpl<BasicBlock *> &ExitBlocks) {
  206. for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
  207. if (ExitBlocks[i] == BB)
  208. return true;
  209. return false;
  210. }
  211. // Copied and modified from LCSSA.cpp
  212. static bool processInstruction(SetVector<BasicBlock *> &Body, Loop &L, Instruction &Inst, DominatorTree &DT, // HLSL Change
  213. const SmallVectorImpl<BasicBlock *> &ExitBlocks,
  214. PredIteratorCache &PredCache, LoopInfo *LI) {
  215. SmallVector<Use *, 16> UsesToRewrite;
  216. BasicBlock *InstBB = Inst.getParent();
  217. for (Use &U : Inst.uses()) {
  218. Instruction *User = cast<Instruction>(U.getUser());
  219. BasicBlock *UserBB = User->getParent();
  220. if (PHINode *PN = dyn_cast<PHINode>(User))
  221. UserBB = PN->getIncomingBlock(U);
  222. if (InstBB != UserBB && /*!L.contains(UserBB)*/!Body.count(UserBB)) // HLSL Change
  223. UsesToRewrite.push_back(&U);
  224. }
  225. // If there are no uses outside the loop, exit with no change.
  226. if (UsesToRewrite.empty())
  227. return false;
  228. #if 0 // HLSL Change
  229. ++NumLCSSA; // We are applying the transformation
  230. #endif // HLSL Change
  231. // Invoke instructions are special in that their result value is not available
  232. // along their unwind edge. The code below tests to see whether DomBB
  233. // dominates
  234. // the value, so adjust DomBB to the normal destination block, which is
  235. // effectively where the value is first usable.
  236. BasicBlock *DomBB = Inst.getParent();
  237. if (InvokeInst *Inv = dyn_cast<InvokeInst>(&Inst))
  238. DomBB = Inv->getNormalDest();
  239. DomTreeNode *DomNode = DT.getNode(DomBB);
  240. SmallVector<PHINode *, 16> AddedPHIs;
  241. SmallVector<PHINode *, 8> PostProcessPHIs;
  242. SSAUpdater SSAUpdate;
  243. SSAUpdate.Initialize(Inst.getType(), Inst.getName());
  244. // Insert the LCSSA phi's into all of the exit blocks dominated by the
  245. // value, and add them to the Phi's map.
  246. for (SmallVectorImpl<BasicBlock *>::const_iterator BBI = ExitBlocks.begin(),
  247. BBE = ExitBlocks.end();
  248. BBI != BBE; ++BBI) {
  249. BasicBlock *ExitBB = *BBI;
  250. if (!DT.dominates(DomNode, DT.getNode(ExitBB)))
  251. continue;
  252. // If we already inserted something for this BB, don't reprocess it.
  253. if (SSAUpdate.HasValueForBlock(ExitBB))
  254. continue;
  255. PHINode *PN = PHINode::Create(Inst.getType(), PredCache.size(ExitBB),
  256. Inst.getName() + ".lcssa", ExitBB->begin());
  257. // Add inputs from inside the loop for this PHI.
  258. for (BasicBlock *Pred : PredCache.get(ExitBB)) {
  259. PN->addIncoming(&Inst, Pred);
  260. // If the exit block has a predecessor not within the loop, arrange for
  261. // the incoming value use corresponding to that predecessor to be
  262. // rewritten in terms of a different LCSSA PHI.
  263. if (/*!L.contains(Pred)*/ !Body.count(Pred)) // HLSL Change
  264. UsesToRewrite.push_back(
  265. &PN->getOperandUse(PN->getOperandNumForIncomingValue(
  266. PN->getNumIncomingValues() - 1)));
  267. }
  268. AddedPHIs.push_back(PN);
  269. // Remember that this phi makes the value alive in this block.
  270. SSAUpdate.AddAvailableValue(ExitBB, PN);
  271. // LoopSimplify might fail to simplify some loops (e.g. when indirect
  272. // branches are involved). In such situations, it might happen that an exit
  273. // for Loop L1 is the header of a disjoint Loop L2. Thus, when we create
  274. // PHIs in such an exit block, we are also inserting PHIs into L2's header.
  275. // This could break LCSSA form for L2 because these inserted PHIs can also
  276. // have uses outside of L2. Remember all PHIs in such situation as to
  277. // revisit than later on. FIXME: Remove this if indirectbr support into
  278. // LoopSimplify gets improved.
  279. if (auto *OtherLoop = LI->getLoopFor(ExitBB))
  280. if (!L.contains(OtherLoop))
  281. PostProcessPHIs.push_back(PN);
  282. }
  283. // Rewrite all uses outside the loop in terms of the new PHIs we just
  284. // inserted.
  285. for (unsigned i = 0, e = UsesToRewrite.size(); i != e; ++i) {
  286. // If this use is in an exit block, rewrite to use the newly inserted PHI.
  287. // This is required for correctness because SSAUpdate doesn't handle uses in
  288. // the same block. It assumes the PHI we inserted is at the end of the
  289. // block.
  290. Instruction *User = cast<Instruction>(UsesToRewrite[i]->getUser());
  291. BasicBlock *UserBB = User->getParent();
  292. if (PHINode *PN = dyn_cast<PHINode>(User))
  293. UserBB = PN->getIncomingBlock(*UsesToRewrite[i]);
  294. if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) {
  295. // Tell the VHs that the uses changed. This updates SCEV's caches.
  296. if (UsesToRewrite[i]->get()->hasValueHandle())
  297. ValueHandleBase::ValueIsRAUWd(*UsesToRewrite[i], UserBB->begin());
  298. UsesToRewrite[i]->set(UserBB->begin());
  299. continue;
  300. }
  301. // Otherwise, do full PHI insertion.
  302. SSAUpdate.RewriteUse(*UsesToRewrite[i]);
  303. }
  304. // Post process PHI instructions that were inserted into another disjoint loop
  305. // and update their exits properly.
  306. for (auto *I : PostProcessPHIs) {
  307. if (I->use_empty())
  308. continue;
  309. BasicBlock *PHIBB = I->getParent();
  310. Loop *OtherLoop = LI->getLoopFor(PHIBB);
  311. SmallVector<BasicBlock *, 8> EBs;
  312. OtherLoop->getExitBlocks(EBs);
  313. if (EBs.empty())
  314. continue;
  315. // Recurse and re-process each PHI instruction. FIXME: we should really
  316. // convert this entire thing to a worklist approach where we process a
  317. // vector of instructions...
  318. SetVector<BasicBlock *> OtherLoopBody(OtherLoop->block_begin(), OtherLoop->block_end()); // HLSL Change
  319. processInstruction(OtherLoopBody, *OtherLoop, *I, DT, EBs, PredCache, LI);
  320. }
  321. // Remove PHI nodes that did not have any uses rewritten.
  322. for (unsigned i = 0, e = AddedPHIs.size(); i != e; ++i) {
  323. if (AddedPHIs[i]->use_empty())
  324. AddedPHIs[i]->eraseFromParent();
  325. }
  326. return true;
  327. }
  328. // Copied from LCSSA.cpp
  329. static bool blockDominatesAnExit(BasicBlock *BB,
  330. DominatorTree &DT,
  331. const SmallVectorImpl<BasicBlock *> &ExitBlocks) {
  332. DomTreeNode *DomNode = DT.getNode(BB);
  333. for (BasicBlock *Exit : ExitBlocks)
  334. if (DT.dominates(DomNode, DT.getNode(Exit)))
  335. return true;
  336. return false;
  337. }
  338. // Copied from LCSSA.cpp
  339. //
  340. // We need to recreate the LCSSA form since our loop boundary is potentially different from
  341. // the canonical one.
  342. static bool CreateLCSSA(SetVector<BasicBlock *> &Body, const SmallVectorImpl<BasicBlock *> &ExitBlocks, Loop *L, DominatorTree &DT, LoopInfo *LI) {
  343. PredIteratorCache PredCache;
  344. bool Changed = false;
  345. // Look at all the instructions in the loop, checking to see if they have uses
  346. // outside the loop. If so, rewrite those uses.
  347. for (SetVector<BasicBlock *>::iterator BBI = Body.begin(), BBE = Body.end();
  348. BBI != BBE; ++BBI) {
  349. BasicBlock *BB = *BBI;
  350. // For large loops, avoid use-scanning by using dominance information: In
  351. // particular, if a block does not dominate any of the loop exits, then none
  352. // of the values defined in the block could be used outside the loop.
  353. if (!blockDominatesAnExit(BB, DT, ExitBlocks))
  354. continue;
  355. for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
  356. // Reject two common cases fast: instructions with no uses (like stores)
  357. // and instructions with one use that is in the same block as this.
  358. if (I->use_empty() ||
  359. (I->hasOneUse() && I->user_back()->getParent() == BB &&
  360. !isa<PHINode>(I->user_back())))
  361. continue;
  362. Changed |= processInstruction(Body, *L, *I, DT, ExitBlocks, PredCache, LI);
  363. }
  364. }
  365. return Changed;
  366. }
  367. static Value *GetGEPPtrOrigin(GEPOperator *GEP) {
  368. Value *Ptr = GEP->getPointerOperand();
  369. while (Ptr) {
  370. if (AllocaInst *AI = dyn_cast<AllocaInst>(Ptr)) {
  371. return AI;
  372. }
  373. else if (GEPOperator *NewGEP = dyn_cast<GEPOperator>(Ptr)) {
  374. Ptr = NewGEP->getPointerOperand();
  375. }
  376. else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
  377. return GV;
  378. }
  379. else {
  380. break;
  381. }
  382. }
  383. return nullptr;
  384. }
  385. // Find all blocks in the loop with instructions that
  386. // would require an unroll to be correct.
  387. //
  388. // For example:
  389. // for (int i = 0; i < 10; i++) {
  390. // gep i
  391. // }
  392. //
  393. static void FindProblemBlocks(BasicBlock *Header, const SmallVectorImpl<BasicBlock *> &BlocksInLoop, std::unordered_set<BasicBlock *> &ProblemBlocks, SetVector<AllocaInst *> &ProblemAllocas) {
  394. SmallVector<Instruction *, 16> WorkList;
  395. std::unordered_set<BasicBlock *> BlocksInLoopSet(BlocksInLoop.begin(), BlocksInLoop.end());
  396. std::unordered_set<Instruction *> InstructionsSeen;
  397. for (Instruction &I : *Header) {
  398. PHINode *PN = dyn_cast<PHINode>(&I);
  399. if (!PN)
  400. break;
  401. WorkList.push_back(PN);
  402. InstructionsSeen.insert(PN);
  403. }
  404. while (WorkList.size()) {
  405. Instruction *I = WorkList.pop_back_val();
  406. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
  407. Type *EltType = GEP->getType()->getPointerElementType();
  408. // NOTE: This is a very convservative in the following conditions:
  409. // - constant global resource arrays with external linkage (these can be
  410. // dynamically accessed)
  411. // - global resource arrays or alloca resource arrays, as long as all
  412. // writes come from the same original resource definition (which can
  413. // also be an array).
  414. //
  415. // We may want to make this more precise in the future if it becomes a
  416. // problem.
  417. //
  418. if (hlsl::dxilutil::IsHLSLObjectType(EltType)) {
  419. if (Value *Ptr = GetGEPPtrOrigin(cast<GEPOperator>(GEP))) {
  420. if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
  421. if (!GV->isExternalLinkage(llvm::GlobalValue::ExternalLinkage))
  422. ProblemBlocks.insert(GEP->getParent());
  423. }
  424. else if (AllocaInst *AI = dyn_cast<AllocaInst>(Ptr)) {
  425. ProblemAllocas.insert(AI);
  426. ProblemBlocks.insert(GEP->getParent());
  427. }
  428. }
  429. continue; // Stop Propagating
  430. }
  431. }
  432. for (User *U : I->users()) {
  433. if (Instruction *UserI = dyn_cast<Instruction>(U)) {
  434. if (!InstructionsSeen.count(UserI) &&
  435. BlocksInLoopSet.count(UserI->getParent()))
  436. {
  437. InstructionsSeen.insert(UserI);
  438. WorkList.push_back(UserI);
  439. }
  440. }
  441. }
  442. }
  443. }
  444. // Helper function for getting GEP's const index value
  445. inline static int64_t GetGEPIndex(GEPOperator *GEP, unsigned idx) {
  446. return cast<ConstantInt>(GEP->getOperand(idx + 1))->getSExtValue();
  447. }
  448. // Replace allocas with all constant indices with scalar allocas, then promote
  449. // them to values where possible (mem2reg).
  450. //
  451. // Before loop unroll, we did not have constant indices for arrays and SROA was
  452. // unable to break them into scalars. Now that unroll has potentially given
  453. // them constant values, we need to turn them into scalars.
  454. //
  455. // if "AllowOOBIndex" is true, it turns any out of bound index into 0.
  456. // Otherwise it emits an error and fails compilation.
  457. //
  458. template<typename IteratorT>
  459. static bool BreakUpArrayAllocas(bool AllowOOBIndex, IteratorT ItBegin, IteratorT ItEnd, DominatorTree *DT, AssumptionCache *AC, DxilValueCache *DVC) {
  460. bool Success = true;
  461. SmallVector<AllocaInst *, 8> WorkList(ItBegin, ItEnd);
  462. SmallVector<GEPOperator *, 16> GEPs;
  463. while (WorkList.size()) {
  464. AllocaInst *AI = WorkList.pop_back_val();
  465. Type *AllocaType = AI->getAllocatedType();
  466. // Only deal with array allocas.
  467. if (!AllocaType->isArrayTy())
  468. continue;
  469. unsigned ArraySize = AI->getAllocatedType()->getArrayNumElements();
  470. Type *ElementType = AllocaType->getArrayElementType();
  471. if (!ArraySize)
  472. continue;
  473. GEPs.clear(); // Re-use array
  474. for (User *U : AI->users()) {
  475. if (GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
  476. // Try to set all GEP operands to constant
  477. if (!GEP->hasAllConstantIndices() && isa<GetElementPtrInst>(GEP)) {
  478. for (unsigned i = 0; i < GEP->getNumIndices(); i++) {
  479. Value *IndexOp = GEP->getOperand(i + 1);
  480. if (isa<Constant>(IndexOp))
  481. continue;
  482. if (Constant *C = DVC->GetConstValue(IndexOp))
  483. GEP->setOperand(i + 1, C);
  484. }
  485. }
  486. if (!GEP->hasAllConstantIndices() || GEP->getNumIndices() < 2 ||
  487. GetGEPIndex(GEP, 0) != 0)
  488. {
  489. GEPs.clear();
  490. break;
  491. }
  492. else {
  493. GEPs.push_back(GEP);
  494. }
  495. continue;
  496. }
  497. // Ignore uses that are only used by lifetime intrinsics.
  498. if (onlyUsedByLifetimeMarkers(U))
  499. continue;
  500. // We've found something that prevents us from safely replacing this alloca.
  501. GEPs.clear();
  502. break;
  503. }
  504. if (!GEPs.size())
  505. continue;
  506. SmallVector<AllocaInst *, 8> ScalarAllocas;
  507. ScalarAllocas.resize(ArraySize);
  508. IRBuilder<> B(AI);
  509. for (GEPOperator *GEP : GEPs) {
  510. int64_t idx = GetGEPIndex(GEP, 1);
  511. GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(GEP);
  512. if (idx < 0 || idx >= ArraySize) {
  513. if (AllowOOBIndex)
  514. idx = 0;
  515. else {
  516. Success = false;
  517. if (GEPInst)
  518. hlsl::dxilutil::EmitErrorOnInstruction(GEPInst, "Array access out of bound.");
  519. continue;
  520. }
  521. }
  522. AllocaInst *ScalarAlloca = ScalarAllocas[idx];
  523. if (!ScalarAlloca) {
  524. ScalarAlloca = B.CreateAlloca(ElementType);
  525. ScalarAllocas[idx] = ScalarAlloca;
  526. if (ElementType->isArrayTy()) {
  527. WorkList.push_back(ScalarAlloca);
  528. }
  529. }
  530. Value *NewPointer = nullptr;
  531. if (ElementType->isArrayTy()) {
  532. SmallVector<Value *, 2> Indices;
  533. Indices.push_back(B.getInt32(0));
  534. for (unsigned i = 2; i < GEP->getNumIndices(); i++) {
  535. Indices.push_back(GEP->getOperand(i + 1));
  536. }
  537. NewPointer = B.CreateGEP(ScalarAlloca, Indices);
  538. } else {
  539. NewPointer = ScalarAlloca;
  540. }
  541. // TODO: Inherit lifetimes start/end locations from AI if available.
  542. GEP->replaceAllUsesWith(NewPointer);
  543. }
  544. if (!ElementType->isArrayTy()) {
  545. ScalarAllocas.erase(std::remove(ScalarAllocas.begin(), ScalarAllocas.end(), nullptr), ScalarAllocas.end());
  546. PromoteMemToReg(ScalarAllocas, *DT, nullptr, AC);
  547. }
  548. }
  549. return Success;
  550. }
  551. static void RecursivelyRemoveLoopFromQueue(LPPassManager &LPM, Loop *L) {
  552. // Copy the sub loops into a separate list because
  553. // the original list may change.
  554. SmallVector<Loop *, 4> SubLoops(L->getSubLoops().begin(), L->getSubLoops().end());
  555. // Must remove all child loops first.
  556. for (Loop *SubL : SubLoops) {
  557. RecursivelyRemoveLoopFromQueue(LPM, SubL);
  558. }
  559. LPM.deleteLoopFromQueue(L);
  560. }
  561. // Mostly copied from Loop::isSafeToClone, but making exception
  562. // for dx.op.barrier.
  563. //
  564. bool DxilLoopUnroll::IsLoopSafeToClone(Loop *L) {
  565. // Return false if any loop blocks contain indirectbrs, or there are any calls
  566. // to noduplicate functions.
  567. for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) {
  568. if (isa<IndirectBrInst>((*I)->getTerminator()))
  569. return false;
  570. if (const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator()))
  571. if (II->cannotDuplicate())
  572. return false;
  573. for (BasicBlock::iterator BI = (*I)->begin(), BE = (*I)->end(); BI != BE; ++BI) {
  574. if (const CallInst *CI = dyn_cast<CallInst>(BI)) {
  575. if (CI->cannotDuplicate() &&
  576. !hlsl::OP::IsDxilOpFuncCallInst(CI, hlsl::OP::OpCode::Barrier))
  577. {
  578. return false;
  579. }
  580. }
  581. }
  582. }
  583. return true;
  584. }
  585. bool DxilLoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
  586. DebugLoc LoopLoc = L->getStartLoc(); // Debug location for the start of the loop.
  587. Function *F = L->getHeader()->getParent();
  588. ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();
  589. DxilValueCache *DVC = &getAnalysis<DxilValueCache>();
  590. bool HasExplicitLoopCount = false;
  591. int ExplicitUnrollCountSigned = 0;
  592. // If the loop is not marked as [unroll], don't do anything.
  593. if (IsMarkedUnrollCount(L, &ExplicitUnrollCountSigned)) {
  594. HasExplicitLoopCount = true;
  595. }
  596. else if (!IsMarkedFullUnroll(L)) {
  597. return false;
  598. }
  599. unsigned ExplicitUnrollCount = 0;
  600. if (HasExplicitLoopCount) {
  601. if (ExplicitUnrollCountSigned < 1) {
  602. FailLoopUnroll(false, F, LoopLoc, "Could not unroll loop. Invalid unroll count.");
  603. return false;
  604. }
  605. ExplicitUnrollCount = (unsigned)ExplicitUnrollCountSigned;
  606. }
  607. if (!IsLoopSafeToClone(L))
  608. return false;
  609. unsigned TripCount = 0;
  610. BasicBlock *ExitingBlock = L->getLoopLatch();
  611. if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
  612. ExitingBlock = L->getExitingBlock();
  613. if (ExitingBlock) {
  614. TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
  615. }
  616. // Analysis passes
  617. DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  618. AssumptionCache *AC =
  619. &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(*F);
  620. LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
  621. Loop *OuterL = L->getParentLoop();
  622. BasicBlock *Latch = L->getLoopLatch();
  623. BasicBlock *Header = L->getHeader();
  624. BasicBlock *Predecessor = L->getLoopPredecessor();
  625. const DataLayout &DL = F->getParent()->getDataLayout();
  626. // Quit if we don't have a single latch block or predecessor
  627. if (!Latch || !Predecessor) {
  628. return false;
  629. }
  630. // If the loop exit condition is not in the latch, then the loop is not rotated. Give up.
  631. if (!cast<BranchInst>(Latch->getTerminator())->isConditional()) {
  632. return false;
  633. }
  634. SmallVector<BasicBlock *, 16> ExitBlocks;
  635. L->getExitBlocks(ExitBlocks);
  636. std::unordered_set<BasicBlock *> ExitBlockSet(ExitBlocks.begin(), ExitBlocks.end());
  637. SmallVector<BasicBlock *, 16> BlocksInLoop; // Set of blocks including both body and exits
  638. BlocksInLoop.append(L->getBlocks().begin(), L->getBlocks().end());
  639. BlocksInLoop.append(ExitBlocks.begin(), ExitBlocks.end());
  640. // Heuristically find blocks that likely need to be unrolled
  641. SetVector<AllocaInst *> ProblemAllocas;
  642. std::unordered_set<BasicBlock *> ProblemBlocks;
  643. FindProblemBlocks(L->getHeader(), BlocksInLoop, ProblemBlocks, ProblemAllocas);
  644. if (StructurizeLoopExits && hlsl::RemoveUnstructuredLoopExits(L, LI, DT, /* exclude */&ProblemBlocks)) {
  645. // Recompute the loop if we managed to simplify the exit blocks
  646. Latch = L->getLoopLatch();
  647. ExitBlocks.clear();
  648. L->getExitBlocks(ExitBlocks);
  649. ExitBlockSet = std::unordered_set<BasicBlock *>(ExitBlocks.begin(), ExitBlocks.end());
  650. BlocksInLoop.clear();
  651. BlocksInLoop.append(L->getBlocks().begin(), L->getBlocks().end());
  652. BlocksInLoop.append(ExitBlocks.begin(), ExitBlocks.end());
  653. }
  654. // Keep track of the PHI nodes at the header.
  655. SmallVector<PHINode *, 16> PHIs;
  656. for (auto it = Header->begin(); it != Header->end(); it++) {
  657. if (PHINode *PN = dyn_cast<PHINode>(it)) {
  658. PHIs.push_back(PN);
  659. }
  660. else {
  661. break;
  662. }
  663. }
  664. // Quick simplification of PHINode incoming values
  665. for (PHINode *PN : PHIs) {
  666. for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
  667. Value *OldIncomingV = PN->getIncomingValue(i);
  668. if (Instruction *IncomingI = dyn_cast<Instruction>(OldIncomingV)) {
  669. if (Value *NewIncomingV = llvm::SimplifyInstruction(IncomingI, DL)) {
  670. PN->setIncomingValue(i, NewIncomingV);
  671. }
  672. }
  673. }
  674. }
  675. SetVector<BasicBlock *> ToBeCloned; // List of blocks that will be cloned.
  676. for (BasicBlock *BB : L->getBlocks()) // Include the body right away
  677. ToBeCloned.insert(BB);
  678. // Find the exit blocks that also need to be included
  679. // in the unroll.
  680. SmallVector<BasicBlock *, 8> NewExits; // New set of exit blocks as boundaries for LCSSA
  681. SmallVector<BasicBlock *, 8> FakeExits; // Set of blocks created to allow cloning original exit blocks.
  682. for (BasicBlock *BB : ExitBlocks) {
  683. bool CloneThisExitBlock = ProblemBlocks.count(BB);
  684. if (CloneThisExitBlock) {
  685. ToBeCloned.insert(BB);
  686. // If we are cloning this basic block, we must create a new exit
  687. // block for inserting LCSSA PHI nodes.
  688. BasicBlock *FakeExit = BasicBlock::Create(BB->getContext(), "loop.exit.new");
  689. F->getBasicBlockList().insert(BB, FakeExit);
  690. TerminatorInst *OldTerm = BB->getTerminator();
  691. OldTerm->removeFromParent();
  692. FakeExit->getInstList().push_back(OldTerm);
  693. BranchInst::Create(FakeExit, BB);
  694. for (BasicBlock *Succ : successors(FakeExit)) {
  695. for (Instruction &I : *Succ) {
  696. if (PHINode *PN = dyn_cast<PHINode>(&I)) {
  697. for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
  698. if (PN->getIncomingBlock(i) == BB)
  699. PN->setIncomingBlock(i, FakeExit);
  700. }
  701. }
  702. }
  703. }
  704. NewExits.push_back(FakeExit);
  705. FakeExits.push_back(FakeExit);
  706. // Update Dom tree with new exit
  707. if (!DT->getNode(FakeExit))
  708. DT->addNewBlock(FakeExit, BB);
  709. }
  710. else {
  711. // If we are not including this exit block in the unroll,
  712. // use it for LCSSA as normal.
  713. NewExits.push_back(BB);
  714. }
  715. }
  716. // Simplify the PHI nodes that have single incoming value. The original LCSSA form
  717. // (if exists) does not necessarily work for our unroll because we may be unrolling
  718. // from a different boundary.
  719. for (BasicBlock *BB : BlocksInLoop)
  720. hlsl::dxilutil::SimplifyTrivialPHIs(BB);
  721. // Re-establish LCSSA form to get ready for unrolling.
  722. CreateLCSSA(ToBeCloned, NewExits, L, *DT, LI);
  723. SmallVector<std::unique_ptr<LoopIteration>, 16> Iterations; // List of cloned iterations
  724. bool Succeeded = false;
  725. unsigned MaxAttempt = this->MaxIterationAttempt;
  726. // If we were able to figure out the definitive trip count,
  727. // just unroll that many times.
  728. if (TripCount != 0) {
  729. MaxAttempt = TripCount;
  730. }
  731. else if (HasExplicitLoopCount) {
  732. MaxAttempt = ExplicitUnrollCount;
  733. }
  734. for (unsigned IterationI = 0; IterationI < MaxAttempt; IterationI++) {
  735. LoopIteration *PrevIteration = nullptr;
  736. if (Iterations.size())
  737. PrevIteration = Iterations.back().get();
  738. Iterations.push_back(llvm::make_unique<LoopIteration>());
  739. LoopIteration &CurIteration = *Iterations.back().get();
  740. // Clone the blocks.
  741. for (BasicBlock *BB : ToBeCloned) {
  742. BasicBlock *ClonedBB = CloneBasicBlock(BB, CurIteration.VarMap);
  743. CurIteration.VarMap[BB] = ClonedBB;
  744. ClonedBB->insertInto(F, Header);
  745. if (ExitBlockSet.count(BB))
  746. CurIteration.Extended.insert(ClonedBB);
  747. CurIteration.Body.push_back(ClonedBB);
  748. // Identify the special blocks.
  749. if (BB == Latch) {
  750. CurIteration.Latch = ClonedBB;
  751. }
  752. if (BB == Header) {
  753. CurIteration.Header = ClonedBB;
  754. }
  755. }
  756. for (BasicBlock *BB : ToBeCloned) {
  757. BasicBlock *ClonedBB = cast<BasicBlock>(CurIteration.VarMap[BB]);
  758. // If branching to outside of the loop, need to update the
  759. // phi nodes there to include new values.
  760. for (BasicBlock *Succ : successors(ClonedBB)) {
  761. if (ToBeCloned.count(Succ))
  762. continue;
  763. for (Instruction &I : *Succ) {
  764. PHINode *PN = dyn_cast<PHINode>(&I);
  765. if (!PN)
  766. break;
  767. // Find the incoming value for this new block. If there is an entry
  768. // for this block in the map, then it was defined in the loop, use it.
  769. // Otherwise it came from outside the loop.
  770. Value *OldIncoming = PN->getIncomingValueForBlock(BB);
  771. Value *NewIncoming = OldIncoming;
  772. ValueToValueMapTy::iterator Itor = CurIteration.VarMap.find(OldIncoming);
  773. if (Itor != CurIteration.VarMap.end())
  774. NewIncoming = Itor->second;
  775. PN->addIncoming(NewIncoming, ClonedBB);
  776. }
  777. }
  778. }
  779. // Remap the instructions inside of cloned blocks.
  780. for (BasicBlock *BB : CurIteration.Body) {
  781. for (Instruction &I : *BB) {
  782. ::RemapInstruction(&I, CurIteration.VarMap);
  783. }
  784. }
  785. // If this is the first block
  786. if (!PrevIteration) {
  787. // Replace the phi nodes in the clone block with the values coming
  788. // from outside of the loop
  789. for (PHINode *PN : PHIs) {
  790. PHINode *ClonedPN = cast<PHINode>(CurIteration.VarMap[PN]);
  791. Value *ReplacementVal = ClonedPN->getIncomingValueForBlock(Predecessor);
  792. ClonedPN->replaceAllUsesWith(ReplacementVal);
  793. ClonedPN->eraseFromParent();
  794. CurIteration.VarMap[PN] = ReplacementVal;
  795. }
  796. }
  797. else {
  798. // Replace the phi nodes with the value defined INSIDE the previous iteration.
  799. for (PHINode *PN : PHIs) {
  800. PHINode *ClonedPN = cast<PHINode>(CurIteration.VarMap[PN]);
  801. Value *ReplacementVal = PN->getIncomingValueForBlock(Latch);
  802. auto itRep = PrevIteration->VarMap.find(ReplacementVal);
  803. if (itRep != PrevIteration->VarMap.end())
  804. ReplacementVal = itRep->second;
  805. ClonedPN->replaceAllUsesWith(ReplacementVal);
  806. ClonedPN->eraseFromParent();
  807. CurIteration.VarMap[PN] = ReplacementVal;
  808. }
  809. // Make the latch of the previous iteration branch to the header
  810. // of this new iteration.
  811. if (BranchInst *BI = dyn_cast<BranchInst>(PrevIteration->Latch->getTerminator())) {
  812. for (unsigned i = 0; i < BI->getNumSuccessors(); i++) {
  813. if (BI->getSuccessor(i) == PrevIteration->Header) {
  814. BI->setSuccessor(i, CurIteration.Header);
  815. break;
  816. }
  817. }
  818. }
  819. }
  820. // Check exit condition to see if we fully unrolled the loop
  821. if (BranchInst *BI = dyn_cast<BranchInst>(CurIteration.Latch->getTerminator())) {
  822. bool Cond = false;
  823. Value *ConstantCond = BI->getCondition();
  824. if (Value *C = DVC->GetValue(ConstantCond))
  825. ConstantCond = C;
  826. if (GetConstantI1(ConstantCond, &Cond)) {
  827. if (BI->getSuccessor(Cond ? 1 : 0) == CurIteration.Header) {
  828. Succeeded = true;
  829. break;
  830. }
  831. }
  832. }
  833. // We've reached the N defined in [unroll(N)]
  834. if ((HasExplicitLoopCount && IterationI+1 >= ExplicitUnrollCount) ||
  835. (TripCount != 0 && IterationI+1 >= TripCount))
  836. {
  837. Succeeded = true;
  838. BranchInst *BI = cast<BranchInst>(CurIteration.Latch->getTerminator());
  839. BasicBlock *ExitBlock = nullptr;
  840. for (unsigned i = 0; i < BI->getNumSuccessors(); i++) {
  841. BasicBlock *Succ = BI->getSuccessor(i);
  842. if (Succ != CurIteration.Header) {
  843. ExitBlock = Succ;
  844. break;
  845. }
  846. }
  847. BranchInst *NewBI = BranchInst::Create(ExitBlock, BI);
  848. BI->replaceAllUsesWith(NewBI);
  849. BI->eraseFromParent();
  850. break;
  851. }
  852. }
  853. if (Succeeded) {
  854. // We are going to be cleaning them up later. Maker sure
  855. // they're in entry block so deleting loop blocks don't
  856. // kill them too.
  857. for (AllocaInst *AI : ProblemAllocas)
  858. DXASSERT_LOCALVAR(AI, AI->getParent() == &F->getEntryBlock(), "Alloca is not in entry block.");
  859. LoopIteration &FirstIteration = *Iterations.front().get();
  860. // Make the predecessor branch to the first new header.
  861. {
  862. BranchInst *BI = cast<BranchInst>(Predecessor->getTerminator());
  863. for (unsigned i = 0, NumSucc = BI->getNumSuccessors(); i < NumSucc; i++) {
  864. if (BI->getSuccessor(i) == Header) {
  865. BI->setSuccessor(i, FirstIteration.Header);
  866. }
  867. }
  868. }
  869. if (OuterL) {
  870. // Core body blocks need to be added to outer loop
  871. for (size_t i = 0; i < Iterations.size(); i++) {
  872. LoopIteration &Iteration = *Iterations[i].get();
  873. for (BasicBlock *BB : Iteration.Body) {
  874. if (!Iteration.Extended.count(BB)) {
  875. OuterL->addBasicBlockToLoop(BB, *LI);
  876. }
  877. }
  878. }
  879. // Our newly created exit blocks may need to be added to outer loop
  880. for (BasicBlock *BB : FakeExits) {
  881. if (HasSuccessorsInLoop(BB, OuterL))
  882. OuterL->addBasicBlockToLoop(BB, *LI);
  883. }
  884. // Cloned exit blocks may need to be added to outer loop
  885. for (size_t i = 0; i < Iterations.size(); i++) {
  886. LoopIteration &Iteration = *Iterations[i].get();
  887. for (BasicBlock *BB : Iteration.Extended) {
  888. if (HasSuccessorsInLoop(BB, OuterL))
  889. OuterL->addBasicBlockToLoop(BB, *LI);
  890. }
  891. }
  892. }
  893. SE->forgetLoop(L);
  894. // Remove the original blocks that we've cloned from all loops.
  895. for (BasicBlock *BB : ToBeCloned)
  896. LI->removeBlock(BB);
  897. // Remove loop and all child loops from queue.
  898. RecursivelyRemoveLoopFromQueue(LPM, L);
  899. // Remove dead blocks.
  900. for (BasicBlock *BB : ToBeCloned)
  901. DetachFromSuccessors(BB);
  902. for (BasicBlock *BB : ToBeCloned)
  903. BB->dropAllReferences();
  904. for (BasicBlock *BB : ToBeCloned)
  905. BB->eraseFromParent();
  906. // Blocks need to be removed from DomTree. There's no easy way
  907. // to remove them in the right order, so just make DomTree
  908. // recalculate.
  909. DT->recalculate(*F);
  910. if (OuterL) {
  911. // This process may have created multiple back edges for the
  912. // parent loop. Simplify to keep it well-formed.
  913. simplifyLoop(OuterL, DT, LI, this, nullptr, nullptr, AC);
  914. }
  915. // Now that we potentially turned some GEP indices into constants,
  916. // try to clean up their allocas.
  917. if (!BreakUpArrayAllocas(OnlyWarnOnFail /* allow oob index */, ProblemAllocas.begin(), ProblemAllocas.end(), DT, AC, DVC)) {
  918. FailLoopUnroll(false, F, LoopLoc, "Could not unroll loop due to out of bound array access.");
  919. }
  920. return true;
  921. }
  922. // If we were unsuccessful in unrolling the loop
  923. else {
  924. const char *Msg =
  925. "Could not unroll loop. Loop bound could not be deduced at compile time. "
  926. "Use [unroll(n)] to give an explicit count.";
  927. if (OnlyWarnOnFail) {
  928. FailLoopUnroll(true /*warn only*/, F, LoopLoc, Msg);
  929. }
  930. else {
  931. FailLoopUnroll(false /*warn only*/, F, LoopLoc,
  932. Twine(Msg) + Twine(" Use '-HV 2016' to treat this as warning."));
  933. }
  934. // Remove all the cloned blocks
  935. for (std::unique_ptr<LoopIteration> &Ptr : Iterations) {
  936. LoopIteration &Iteration = *Ptr.get();
  937. for (BasicBlock *BB : Iteration.Body)
  938. DetachFromSuccessors(BB);
  939. }
  940. for (std::unique_ptr<LoopIteration> &Ptr : Iterations) {
  941. LoopIteration &Iteration = *Ptr.get();
  942. for (BasicBlock *BB : Iteration.Body)
  943. BB->dropAllReferences();
  944. }
  945. for (std::unique_ptr<LoopIteration> &Ptr : Iterations) {
  946. LoopIteration &Iteration = *Ptr.get();
  947. for (BasicBlock *BB : Iteration.Body)
  948. BB->eraseFromParent();
  949. }
  950. return false;
  951. }
  952. }
  953. }
  954. Pass *llvm::createDxilLoopUnrollPass(unsigned MaxIterationAttempt, bool OnlyWarnOnFail, bool StructurizeLoopExits) {
  955. return new DxilLoopUnroll(MaxIterationAttempt, OnlyWarnOnFail, StructurizeLoopExits);
  956. }
  957. INITIALIZE_PASS_BEGIN(DxilLoopUnroll, "dxil-loop-unroll", "Dxil Unroll loops", false, false)
  958. INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
  959. INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
  960. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  961. INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
  962. INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
  963. INITIALIZE_PASS_DEPENDENCY(DxilValueCache)
  964. INITIALIZE_PASS_END(DxilLoopUnroll, "dxil-loop-unroll", "Dxil Unroll loops", false, false)