DxilLoopUnroll.cpp 41 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202
  1. //===- DxilLoopUnroll.cpp - Special Unroll for Constant Values ------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //===----------------------------------------------------------------------===//
  10. //
  11. // Special loop unroll routine for creating mandatory constant values and
  12. // loops that have exits.
  13. //
  14. // Overview of algorithm:
  15. //
  16. // 1. Identify a set of blocks to unroll.
  17. //
  18. // LLVM's concept of loop excludes exit blocks, which are blocks that no
  19. // longer have a path to the loop latch. However, some exit blocks in HLSL
  20. // also need to be unrolled. For example:
  21. //
  22. // [unroll]
  23. // for (uint i = 0; i < 4; i++)
  24. // {
  25. // if (...)
  26. // {
  27. // // This block here is an exit block, since it's.
  28. // // guaranteed to exit the loop.
  29. // ...
  30. // a[i] = ...; // Indexing requires unroll.
  31. // return;
  32. // }
  33. // }
  34. //
  35. //
  36. // 2. Create LCSSA based on the new loop boundary.
  37. //
  38. // See LCSSA.cpp for more details. It creates trivial PHI nodes for any
  39. // outgoing values of the loop at the exit blocks, so when the loop body
  40. // gets cloned, the outgoing values can be added to those PHI nodes easily.
  41. //
  42. // We are using a modified LCSSA routine here because we are including some
  43. // of the original exit blocks in the unroll.
  44. //
  45. //
  46. // 3. Unroll the loop until we succeed.
  47. //
  48. // Unlike LLVM, we do not try to find a loop count before unrolling.
  49. // Instead, we unroll to find a constant terminal condition. Give up when we
  50. // fail to do so.
  51. //
  52. //
  53. //===----------------------------------------------------------------------===//
  54. #include "llvm/Pass.h"
  55. #include "llvm/Analysis/LoopPass.h"
  56. #include "llvm/Analysis/InstructionSimplify.h"
  57. #include "llvm/Analysis/AssumptionCache.h"
  58. #include "llvm/Analysis/LoopPass.h"
  59. #include "llvm/Analysis/InstructionSimplify.h"
  60. #include "llvm/Analysis/AssumptionCache.h"
  61. #include "llvm/Analysis/ScalarEvolution.h"
  62. #include "llvm/Transforms/Scalar.h"
  63. #include "llvm/Transforms/Utils/Cloning.h"
  64. #include "llvm/Transforms/Utils/Local.h"
  65. #include "llvm/Transforms/Utils/UnrollLoop.h"
  66. #include "llvm/Transforms/Utils/SSAUpdater.h"
  67. #include "llvm/Transforms/Utils/LoopUtils.h"
  68. #include "llvm/Transforms/Utils/PromoteMemToReg.h"
  69. #include "llvm/IR/DiagnosticInfo.h"
  70. #include "llvm/IR/Instructions.h"
  71. #include "llvm/IR/Module.h"
  72. #include "llvm/IR/Verifier.h"
  73. #include "llvm/IR/PredIteratorCache.h"
  74. #include "llvm/IR/IntrinsicInst.h"
  75. #include "llvm/Support/raw_ostream.h"
  76. #include "llvm/Support/Debug.h"
  77. #include "llvm/ADT/SetVector.h"
  78. #include "llvm/IR/LegacyPassManager.h"
  79. #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  80. #include "dxc/DXIL/DxilUtil.h"
  81. #include "dxc/DXIL/DxilOperations.h"
  82. #include "dxc/HLSL/HLModule.h"
  83. #include "llvm/Analysis/DxilValueCache.h"
  84. #include "llvm/Analysis/ValueTracking.h"
  85. #include "DxilRemoveUnstructuredLoopExits.h"
  86. using namespace llvm;
  87. using namespace hlsl;
  88. namespace {
  89. struct ClonedIteration {
  90. SmallVector<BasicBlock *, 16> Body;
  91. BasicBlock *Latch = nullptr;
  92. BasicBlock *Header = nullptr;
  93. ValueToValueMapTy VarMap;
  94. SetVector<BasicBlock *> Extended; // Blocks that are included in the clone that are not in the core loop body.
  95. ClonedIteration() {}
  96. };
  97. class DxilLoopUnroll : public LoopPass {
  98. public:
  99. static char ID;
  100. std::set<Loop *> LoopsThatFailed;
  101. std::unordered_set<Function *> CleanedUpAlloca;
  102. unsigned MaxIterationAttempt = 0;
  103. bool OnlyWarnOnFail = false;
  104. bool StructurizeLoopExits = false;
  105. DxilLoopUnroll(unsigned MaxIterationAttempt = 1024, bool OnlyWarnOnFail=false, bool StructurizeLoopExits=false) :
  106. LoopPass(ID),
  107. MaxIterationAttempt(MaxIterationAttempt),
  108. OnlyWarnOnFail(OnlyWarnOnFail),
  109. StructurizeLoopExits(StructurizeLoopExits)
  110. {
  111. initializeDxilLoopUnrollPass(*PassRegistry::getPassRegistry());
  112. }
  113. const char *getPassName() const override { return "Dxil Loop Unroll"; }
  114. bool runOnLoop(Loop *L, LPPassManager &LPM) override;
  115. bool doFinalization() override;
  116. bool IsLoopSafeToClone(Loop *L);
  117. void getAnalysisUsage(AnalysisUsage &AU) const override {
  118. AU.addRequired<LoopInfoWrapperPass>();
  119. AU.addRequired<AssumptionCacheTracker>();
  120. AU.addRequired<DominatorTreeWrapperPass>();
  121. AU.addPreserved<DominatorTreeWrapperPass>();
  122. AU.addRequired<ScalarEvolution>();
  123. AU.addRequired<DxilValueCache>();
  124. AU.addRequiredID(&LCSSAID);
  125. AU.addRequiredID(LoopSimplifyID);
  126. }
  127. // Function overrides that resolve options when used for DxOpt
  128. void applyOptions(PassOptions O) override {
  129. GetPassOptionUnsigned(O, "MaxIterationAttempt", &MaxIterationAttempt, false);
  130. GetPassOptionBool(O, "OnlyWarnOnFail", &OnlyWarnOnFail, false);
  131. }
  132. void dumpConfig(raw_ostream &OS) override {
  133. LoopPass::dumpConfig(OS);
  134. OS << ",MaxIterationAttempt=" << MaxIterationAttempt;
  135. OS << ",OnlyWarnOnFail=" << OnlyWarnOnFail;
  136. }
  137. void RecursivelyRemoveLoopOnSuccess(LPPassManager &LPM, Loop *L);
  138. void RecursivelyRecreateSubLoopForIteration(LPPassManager &LPM, LoopInfo *LI, Loop *OuterL, Loop *L, ClonedIteration &Iter, unsigned Depth=0);
  139. };
  140. // Copied over from LoopUnroll.cpp - RemapInstruction()
  141. static inline void DxilLoopUnrollRemapInstruction(Instruction *I,
  142. ValueToValueMapTy &VMap) {
  143. for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
  144. Value *Op = I->getOperand(op);
  145. ValueToValueMapTy::iterator It = VMap.find(Op);
  146. if (It != VMap.end())
  147. I->setOperand(op, It->second);
  148. }
  149. if (PHINode *PN = dyn_cast<PHINode>(I)) {
  150. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
  151. ValueToValueMapTy::iterator It = VMap.find(PN->getIncomingBlock(i));
  152. if (It != VMap.end())
  153. PN->setIncomingBlock(i, cast<BasicBlock>(It->second));
  154. }
  155. }
  156. }
  157. char DxilLoopUnroll::ID;
  158. static void FailLoopUnroll(bool WarnOnly, Function *F, DebugLoc DL, const Twine &Message) {
  159. LLVMContext &Ctx = F->getContext();
  160. DiagnosticSeverity severity = DiagnosticSeverity::DS_Error;
  161. if (WarnOnly)
  162. severity = DiagnosticSeverity::DS_Warning;
  163. Ctx.diagnose(DiagnosticInfoDxil(F, DL.get(), Message, severity));
  164. }
  165. static bool GetConstantI1(Value *V, bool *Val=nullptr) {
  166. if (ConstantInt *C = dyn_cast<ConstantInt>(V)) {
  167. if (V->getType()->isIntegerTy(1)) {
  168. if (Val)
  169. *Val = (bool)C->getLimitedValue();
  170. return true;
  171. }
  172. }
  173. return false;
  174. }
  175. static bool IsMarkedFullUnroll(Loop *L) {
  176. if (MDNode *LoopID = L->getLoopID())
  177. return GetUnrollMetadata(LoopID, "llvm.loop.unroll.full");
  178. return false;
  179. }
  180. static bool IsMarkedUnrollCount(Loop *L, int *OutCount) {
  181. if (MDNode *LoopID = L->getLoopID()) {
  182. if (MDNode *MD = GetUnrollMetadata(LoopID, "llvm.loop.unroll.count")) {
  183. assert(MD->getNumOperands() == 2 &&
  184. "Unroll count hint metadata should have two operands.");
  185. ConstantInt *Val = mdconst::extract<ConstantInt>(MD->getOperand(1));
  186. int Count = Val->getZExtValue();
  187. *OutCount = Count;
  188. return true;
  189. }
  190. }
  191. return false;
  192. }
  193. static bool HasSuccessorsInLoop(BasicBlock *BB, Loop *L) {
  194. for (BasicBlock *Succ : successors(BB)) {
  195. if (L->contains(Succ)) {
  196. return true;
  197. }
  198. }
  199. return false;
  200. }
  201. static void DetachFromSuccessors(BasicBlock *BB) {
  202. SmallVector<BasicBlock *, 16> Successors(succ_begin(BB), succ_end(BB));
  203. for (BasicBlock *Succ : Successors) {
  204. Succ->removePredecessor(BB);
  205. }
  206. }
  207. /// Return true if the specified block is in the list.
  208. static bool isExitBlock(BasicBlock *BB,
  209. const SmallVectorImpl<BasicBlock *> &ExitBlocks) {
  210. for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
  211. if (ExitBlocks[i] == BB)
  212. return true;
  213. return false;
  214. }
  215. // Copied and modified from LCSSA.cpp
  216. static bool processInstruction(SetVector<BasicBlock *> &Body, Loop &L, Instruction &Inst, DominatorTree &DT, // HLSL Change
  217. const SmallVectorImpl<BasicBlock *> &ExitBlocks,
  218. PredIteratorCache &PredCache, LoopInfo *LI) {
  219. SmallVector<Use *, 16> UsesToRewrite;
  220. BasicBlock *InstBB = Inst.getParent();
  221. for (Use &U : Inst.uses()) {
  222. Instruction *User = cast<Instruction>(U.getUser());
  223. BasicBlock *UserBB = User->getParent();
  224. if (PHINode *PN = dyn_cast<PHINode>(User))
  225. UserBB = PN->getIncomingBlock(U);
  226. if (InstBB != UserBB && /*!L.contains(UserBB)*/!Body.count(UserBB)) // HLSL Change
  227. UsesToRewrite.push_back(&U);
  228. }
  229. // If there are no uses outside the loop, exit with no change.
  230. if (UsesToRewrite.empty())
  231. return false;
  232. #if 0 // HLSL Change
  233. ++NumLCSSA; // We are applying the transformation
  234. #endif // HLSL Change
  235. // Invoke instructions are special in that their result value is not available
  236. // along their unwind edge. The code below tests to see whether DomBB
  237. // dominates
  238. // the value, so adjust DomBB to the normal destination block, which is
  239. // effectively where the value is first usable.
  240. BasicBlock *DomBB = Inst.getParent();
  241. if (InvokeInst *Inv = dyn_cast<InvokeInst>(&Inst))
  242. DomBB = Inv->getNormalDest();
  243. DomTreeNode *DomNode = DT.getNode(DomBB);
  244. SmallVector<PHINode *, 16> AddedPHIs;
  245. SmallVector<PHINode *, 8> PostProcessPHIs;
  246. SSAUpdater SSAUpdate;
  247. SSAUpdate.Initialize(Inst.getType(), Inst.getName());
  248. // Insert the LCSSA phi's into all of the exit blocks dominated by the
  249. // value, and add them to the Phi's map.
  250. for (SmallVectorImpl<BasicBlock *>::const_iterator BBI = ExitBlocks.begin(),
  251. BBE = ExitBlocks.end();
  252. BBI != BBE; ++BBI) {
  253. BasicBlock *ExitBB = *BBI;
  254. if (!DT.dominates(DomNode, DT.getNode(ExitBB)))
  255. continue;
  256. // If we already inserted something for this BB, don't reprocess it.
  257. if (SSAUpdate.HasValueForBlock(ExitBB))
  258. continue;
  259. PHINode *PN = PHINode::Create(Inst.getType(), PredCache.size(ExitBB),
  260. Inst.getName() + ".lcssa", ExitBB->begin());
  261. // Add inputs from inside the loop for this PHI.
  262. for (BasicBlock *Pred : PredCache.get(ExitBB)) {
  263. PN->addIncoming(&Inst, Pred);
  264. // If the exit block has a predecessor not within the loop, arrange for
  265. // the incoming value use corresponding to that predecessor to be
  266. // rewritten in terms of a different LCSSA PHI.
  267. if (/*!L.contains(Pred)*/ !Body.count(Pred)) // HLSL Change
  268. UsesToRewrite.push_back(
  269. &PN->getOperandUse(PN->getOperandNumForIncomingValue(
  270. PN->getNumIncomingValues() - 1)));
  271. }
  272. AddedPHIs.push_back(PN);
  273. // Remember that this phi makes the value alive in this block.
  274. SSAUpdate.AddAvailableValue(ExitBB, PN);
  275. // LoopSimplify might fail to simplify some loops (e.g. when indirect
  276. // branches are involved). In such situations, it might happen that an exit
  277. // for Loop L1 is the header of a disjoint Loop L2. Thus, when we create
  278. // PHIs in such an exit block, we are also inserting PHIs into L2's header.
  279. // This could break LCSSA form for L2 because these inserted PHIs can also
  280. // have uses outside of L2. Remember all PHIs in such situation as to
  281. // revisit than later on. FIXME: Remove this if indirectbr support into
  282. // LoopSimplify gets improved.
  283. if (auto *OtherLoop = LI->getLoopFor(ExitBB))
  284. if (!L.contains(OtherLoop))
  285. PostProcessPHIs.push_back(PN);
  286. }
  287. // Rewrite all uses outside the loop in terms of the new PHIs we just
  288. // inserted.
  289. for (unsigned i = 0, e = UsesToRewrite.size(); i != e; ++i) {
  290. // If this use is in an exit block, rewrite to use the newly inserted PHI.
  291. // This is required for correctness because SSAUpdate doesn't handle uses in
  292. // the same block. It assumes the PHI we inserted is at the end of the
  293. // block.
  294. Instruction *User = cast<Instruction>(UsesToRewrite[i]->getUser());
  295. BasicBlock *UserBB = User->getParent();
  296. if (PHINode *PN = dyn_cast<PHINode>(User))
  297. UserBB = PN->getIncomingBlock(*UsesToRewrite[i]);
  298. if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) {
  299. // Tell the VHs that the uses changed. This updates SCEV's caches.
  300. if (UsesToRewrite[i]->get()->hasValueHandle())
  301. ValueHandleBase::ValueIsRAUWd(*UsesToRewrite[i], UserBB->begin());
  302. UsesToRewrite[i]->set(UserBB->begin());
  303. continue;
  304. }
  305. // Otherwise, do full PHI insertion.
  306. SSAUpdate.RewriteUse(*UsesToRewrite[i]);
  307. }
  308. // Post process PHI instructions that were inserted into another disjoint loop
  309. // and update their exits properly.
  310. for (auto *I : PostProcessPHIs) {
  311. if (I->use_empty())
  312. continue;
  313. BasicBlock *PHIBB = I->getParent();
  314. Loop *OtherLoop = LI->getLoopFor(PHIBB);
  315. SmallVector<BasicBlock *, 8> EBs;
  316. OtherLoop->getExitBlocks(EBs);
  317. if (EBs.empty())
  318. continue;
  319. // Recurse and re-process each PHI instruction. FIXME: we should really
  320. // convert this entire thing to a worklist approach where we process a
  321. // vector of instructions...
  322. SetVector<BasicBlock *> OtherLoopBody(OtherLoop->block_begin(), OtherLoop->block_end()); // HLSL Change
  323. processInstruction(OtherLoopBody, *OtherLoop, *I, DT, EBs, PredCache, LI);
  324. }
  325. // Remove PHI nodes that did not have any uses rewritten.
  326. for (unsigned i = 0, e = AddedPHIs.size(); i != e; ++i) {
  327. if (AddedPHIs[i]->use_empty())
  328. AddedPHIs[i]->eraseFromParent();
  329. }
  330. return true;
  331. }
  332. // Copied from LCSSA.cpp
  333. static bool blockDominatesAnExit(BasicBlock *BB,
  334. DominatorTree &DT,
  335. const SmallVectorImpl<BasicBlock *> &ExitBlocks) {
  336. DomTreeNode *DomNode = DT.getNode(BB);
  337. for (BasicBlock *Exit : ExitBlocks)
  338. if (DT.dominates(DomNode, DT.getNode(Exit)))
  339. return true;
  340. return false;
  341. }
  342. // Copied from LCSSA.cpp
  343. //
  344. // We need to recreate the LCSSA form since our loop boundary is potentially different from
  345. // the canonical one.
  346. static bool CreateLCSSA(SetVector<BasicBlock *> &Body, const SmallVectorImpl<BasicBlock *> &ExitBlocks, Loop *L, DominatorTree &DT, LoopInfo *LI) {
  347. PredIteratorCache PredCache;
  348. bool Changed = false;
  349. // Look at all the instructions in the loop, checking to see if they have uses
  350. // outside the loop. If so, rewrite those uses.
  351. for (SetVector<BasicBlock *>::iterator BBI = Body.begin(), BBE = Body.end();
  352. BBI != BBE; ++BBI) {
  353. BasicBlock *BB = *BBI;
  354. // For large loops, avoid use-scanning by using dominance information: In
  355. // particular, if a block does not dominate any of the loop exits, then none
  356. // of the values defined in the block could be used outside the loop.
  357. if (!blockDominatesAnExit(BB, DT, ExitBlocks))
  358. continue;
  359. for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
  360. // Reject two common cases fast: instructions with no uses (like stores)
  361. // and instructions with one use that is in the same block as this.
  362. if (I->use_empty() ||
  363. (I->hasOneUse() && I->user_back()->getParent() == BB &&
  364. !isa<PHINode>(I->user_back())))
  365. continue;
  366. Changed |= processInstruction(Body, *L, *I, DT, ExitBlocks, PredCache, LI);
  367. }
  368. }
  369. return Changed;
  370. }
  371. static Value *GetGEPPtrOrigin(GEPOperator *GEP) {
  372. Value *Ptr = GEP->getPointerOperand();
  373. while (Ptr) {
  374. if (AllocaInst *AI = dyn_cast<AllocaInst>(Ptr)) {
  375. return AI;
  376. }
  377. else if (GEPOperator *NewGEP = dyn_cast<GEPOperator>(Ptr)) {
  378. Ptr = NewGEP->getPointerOperand();
  379. }
  380. else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
  381. return GV;
  382. }
  383. else {
  384. break;
  385. }
  386. }
  387. return nullptr;
  388. }
  389. // Find all blocks in the loop with instructions that
  390. // would require an unroll to be correct.
  391. //
  392. // For example:
  393. // for (int i = 0; i < 10; i++) {
  394. // gep i
  395. // }
  396. //
  397. static void FindProblemBlocks(BasicBlock *Header, const SmallVectorImpl<BasicBlock *> &BlocksInLoop, std::unordered_set<BasicBlock *> &ProblemBlocks, SetVector<AllocaInst *> &ProblemAllocas) {
  398. SmallVector<Instruction *, 16> WorkList;
  399. std::unordered_set<BasicBlock *> BlocksInLoopSet(BlocksInLoop.begin(), BlocksInLoop.end());
  400. std::unordered_set<Instruction *> InstructionsSeen;
  401. for (Instruction &I : *Header) {
  402. PHINode *PN = dyn_cast<PHINode>(&I);
  403. if (!PN)
  404. break;
  405. WorkList.push_back(PN);
  406. InstructionsSeen.insert(PN);
  407. }
  408. while (WorkList.size()) {
  409. Instruction *I = WorkList.pop_back_val();
  410. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
  411. Type *EltType = GEP->getType()->getPointerElementType();
  412. // NOTE: This is a very convservative in the following conditions:
  413. // - constant global resource arrays with external linkage (these can be
  414. // dynamically accessed)
  415. // - global resource arrays or alloca resource arrays, as long as all
  416. // writes come from the same original resource definition (which can
  417. // also be an array).
  418. //
  419. // We may want to make this more precise in the future if it becomes a
  420. // problem.
  421. //
  422. if (hlsl::dxilutil::IsHLSLObjectType(EltType)) {
  423. if (Value *Ptr = GetGEPPtrOrigin(cast<GEPOperator>(GEP))) {
  424. if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
  425. if (!GV->isExternalLinkage(llvm::GlobalValue::ExternalLinkage))
  426. ProblemBlocks.insert(GEP->getParent());
  427. }
  428. else if (AllocaInst *AI = dyn_cast<AllocaInst>(Ptr)) {
  429. ProblemAllocas.insert(AI);
  430. ProblemBlocks.insert(GEP->getParent());
  431. }
  432. }
  433. continue; // Stop Propagating
  434. }
  435. }
  436. for (User *U : I->users()) {
  437. if (Instruction *UserI = dyn_cast<Instruction>(U)) {
  438. if (!InstructionsSeen.count(UserI) &&
  439. BlocksInLoopSet.count(UserI->getParent()))
  440. {
  441. InstructionsSeen.insert(UserI);
  442. WorkList.push_back(UserI);
  443. }
  444. }
  445. }
  446. }
  447. }
  448. // Helper function for getting GEP's const index value
  449. inline static int64_t GetGEPIndex(GEPOperator *GEP, unsigned idx) {
  450. return cast<ConstantInt>(GEP->getOperand(idx + 1))->getSExtValue();
  451. }
  452. // Replace allocas with all constant indices with scalar allocas, then promote
  453. // them to values where possible (mem2reg).
  454. //
  455. // Before loop unroll, we did not have constant indices for arrays and SROA was
  456. // unable to break them into scalars. Now that unroll has potentially given
  457. // them constant values, we need to turn them into scalars.
  458. //
  459. // if "AllowOOBIndex" is true, it turns any out of bound index into 0.
  460. // Otherwise it emits an error and fails compilation.
  461. //
  462. template<typename IteratorT>
  463. static bool BreakUpArrayAllocas(bool AllowOOBIndex, IteratorT ItBegin, IteratorT ItEnd, DominatorTree *DT, AssumptionCache *AC, DxilValueCache *DVC) {
  464. bool Success = true;
  465. SmallVector<AllocaInst *, 8> WorkList(ItBegin, ItEnd);
  466. SmallVector<GEPOperator *, 16> GEPs;
  467. while (WorkList.size()) {
  468. AllocaInst *AI = WorkList.pop_back_val();
  469. Type *AllocaType = AI->getAllocatedType();
  470. // Only deal with array allocas.
  471. if (!AllocaType->isArrayTy())
  472. continue;
  473. unsigned ArraySize = AI->getAllocatedType()->getArrayNumElements();
  474. Type *ElementType = AllocaType->getArrayElementType();
  475. if (!ArraySize)
  476. continue;
  477. GEPs.clear(); // Re-use array
  478. for (User *U : AI->users()) {
  479. if (GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
  480. // Try to set all GEP operands to constant
  481. if (!GEP->hasAllConstantIndices() && isa<GetElementPtrInst>(GEP)) {
  482. for (unsigned i = 0; i < GEP->getNumIndices(); i++) {
  483. Value *IndexOp = GEP->getOperand(i + 1);
  484. if (isa<Constant>(IndexOp))
  485. continue;
  486. if (Constant *C = DVC->GetConstValue(IndexOp))
  487. GEP->setOperand(i + 1, C);
  488. }
  489. }
  490. if (!GEP->hasAllConstantIndices() || GEP->getNumIndices() < 2 ||
  491. GetGEPIndex(GEP, 0) != 0)
  492. {
  493. GEPs.clear();
  494. break;
  495. }
  496. else {
  497. GEPs.push_back(GEP);
  498. }
  499. continue;
  500. }
  501. // Ignore uses that are only used by lifetime intrinsics.
  502. if (onlyUsedByLifetimeMarkers(U))
  503. continue;
  504. // We've found something that prevents us from safely replacing this alloca.
  505. GEPs.clear();
  506. break;
  507. }
  508. if (!GEPs.size())
  509. continue;
  510. SmallVector<AllocaInst *, 8> ScalarAllocas;
  511. ScalarAllocas.resize(ArraySize);
  512. IRBuilder<> B(AI);
  513. for (GEPOperator *GEP : GEPs) {
  514. int64_t idx = GetGEPIndex(GEP, 1);
  515. GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(GEP);
  516. if (idx < 0 || idx >= ArraySize) {
  517. if (AllowOOBIndex)
  518. idx = 0;
  519. else {
  520. Success = false;
  521. if (GEPInst)
  522. hlsl::dxilutil::EmitErrorOnInstruction(GEPInst, "Array access out of bound.");
  523. continue;
  524. }
  525. }
  526. AllocaInst *ScalarAlloca = ScalarAllocas[idx];
  527. if (!ScalarAlloca) {
  528. ScalarAlloca = B.CreateAlloca(ElementType);
  529. ScalarAllocas[idx] = ScalarAlloca;
  530. if (ElementType->isArrayTy()) {
  531. WorkList.push_back(ScalarAlloca);
  532. }
  533. }
  534. Value *NewPointer = nullptr;
  535. if (ElementType->isArrayTy()) {
  536. SmallVector<Value *, 2> Indices;
  537. Indices.push_back(B.getInt32(0));
  538. for (unsigned i = 2; i < GEP->getNumIndices(); i++) {
  539. Indices.push_back(GEP->getOperand(i + 1));
  540. }
  541. NewPointer = B.CreateGEP(ScalarAlloca, Indices);
  542. } else {
  543. NewPointer = ScalarAlloca;
  544. }
  545. // TODO: Inherit lifetimes start/end locations from AI if available.
  546. GEP->replaceAllUsesWith(NewPointer);
  547. }
  548. if (!ElementType->isArrayTy()) {
  549. ScalarAllocas.erase(std::remove(ScalarAllocas.begin(), ScalarAllocas.end(), nullptr), ScalarAllocas.end());
  550. PromoteMemToReg(ScalarAllocas, *DT, nullptr, AC);
  551. }
  552. }
  553. return Success;
  554. }
  555. void DxilLoopUnroll::RecursivelyRemoveLoopOnSuccess(LPPassManager &LPM, Loop *L) {
  556. // Copy the sub loops into a separate list because
  557. // the original list may change.
  558. SmallVector<Loop *, 4> SubLoops(L->getSubLoops().begin(), L->getSubLoops().end());
  559. // Must remove all child loops first.
  560. for (Loop *SubL : SubLoops) {
  561. RecursivelyRemoveLoopOnSuccess(LPM, SubL);
  562. }
  563. // Remove any loops/subloops that failed because we are about to
  564. // delete them. This will not prevent them from being retried because
  565. // they would have been recreated for each cloned iteration.
  566. LoopsThatFailed.erase(L);
  567. // Loop is done and about to be deleted, remove it from queue.
  568. LPM.deleteLoopFromQueue(L);
  569. }
  570. // Mostly copied from Loop::isSafeToClone, but making exception
  571. // for dx.op.barrier.
  572. //
  573. bool DxilLoopUnroll::IsLoopSafeToClone(Loop *L) {
  574. // Return false if any loop blocks contain indirectbrs, or there are any calls
  575. // to noduplicate functions.
  576. for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) {
  577. if (isa<IndirectBrInst>((*I)->getTerminator()))
  578. return false;
  579. if (const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator()))
  580. if (II->cannotDuplicate())
  581. return false;
  582. for (BasicBlock::iterator BI = (*I)->begin(), BE = (*I)->end(); BI != BE; ++BI) {
  583. if (const CallInst *CI = dyn_cast<CallInst>(BI)) {
  584. if (CI->cannotDuplicate() &&
  585. !hlsl::OP::IsDxilOpFuncCallInst(CI, hlsl::OP::OpCode::Barrier))
  586. {
  587. return false;
  588. }
  589. }
  590. }
  591. }
  592. return true;
  593. }
  594. void DxilLoopUnroll::RecursivelyRecreateSubLoopForIteration(LPPassManager &LPM, LoopInfo *LI, Loop *OuterL, Loop *L, ClonedIteration &Iter, unsigned Depth) {
  595. Loop *NewL = new Loop();
  596. // Insert it to queue in a depth first way, otherwise `insertLoopIntoQueue`
  597. // inserts adds parent first.
  598. LPM.insertLoopIntoQueue(NewL);
  599. if (OuterL) {
  600. OuterL->addChildLoop(NewL);
  601. }
  602. else {
  603. LI->addTopLevelLoop(NewL);
  604. }
  605. // First add all the blocks. It's important that we first add them here first
  606. // (Instead of letting the recursive call do the job), since it's important that
  607. // the loop header is added FIRST.
  608. for (auto it = L->block_begin(), end = L->block_end(); it != end; it++) {
  609. BasicBlock *OriginalBB = *it;
  610. BasicBlock *NewBB = cast<BasicBlock>(Iter.VarMap[OriginalBB]);
  611. // Manually call addBlockEntry instead of addBasicBlockToLoop because
  612. // addBasicBlockToLoop also checks and sets the BB -> Loop mapping.
  613. NewL->addBlockEntry(NewBB);
  614. LI->changeLoopFor(NewBB, NewL);
  615. // Now check if the block has been added to outer loops already. This is
  616. // only necessary for the first depth of this call.
  617. if (Depth == 0) {
  618. Loop *OuterL_it = OuterL;
  619. while (OuterL_it) {
  620. OuterL_it->addBlockEntry(NewBB);
  621. OuterL_it = OuterL_it->getParentLoop();
  622. }
  623. }
  624. }
  625. // Construct any sub-loops that exist. The BB -> Loop mapping in LI will be
  626. // rewritten to the sub-loop as needed.
  627. for (Loop *SubL : L->getSubLoops()) {
  628. RecursivelyRecreateSubLoopForIteration(LPM, LI, NewL, SubL, Iter, Depth+1);
  629. }
  630. }
  631. bool DxilLoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
  632. DebugLoc LoopLoc = L->getStartLoc(); // Debug location for the start of the loop.
  633. Function *F = L->getHeader()->getParent();
  634. ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();
  635. DxilValueCache *DVC = &getAnalysis<DxilValueCache>();
  636. bool HasExplicitLoopCount = false;
  637. int ExplicitUnrollCountSigned = 0;
  638. // If the loop is not marked as [unroll], don't do anything.
  639. if (IsMarkedUnrollCount(L, &ExplicitUnrollCountSigned)) {
  640. HasExplicitLoopCount = true;
  641. }
  642. else if (!IsMarkedFullUnroll(L)) {
  643. return false;
  644. }
  645. unsigned ExplicitUnrollCount = 0;
  646. if (HasExplicitLoopCount) {
  647. if (ExplicitUnrollCountSigned < 1) {
  648. FailLoopUnroll(false, F, LoopLoc, "Could not unroll loop. Invalid unroll count.");
  649. return false;
  650. }
  651. ExplicitUnrollCount = (unsigned)ExplicitUnrollCountSigned;
  652. }
  653. if (!IsLoopSafeToClone(L))
  654. return false;
  655. unsigned TripCount = 0;
  656. BasicBlock *ExitingBlock = L->getLoopLatch();
  657. if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
  658. ExitingBlock = L->getExitingBlock();
  659. if (ExitingBlock) {
  660. TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
  661. }
  662. // Analysis passes
  663. DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  664. AssumptionCache *AC =
  665. &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(*F);
  666. LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
  667. Loop *OuterL = L->getParentLoop();
  668. BasicBlock *Latch = L->getLoopLatch();
  669. BasicBlock *Header = L->getHeader();
  670. BasicBlock *Predecessor = L->getLoopPredecessor();
  671. const DataLayout &DL = F->getParent()->getDataLayout();
  672. // Quit if we don't have a single latch block or predecessor
  673. if (!Latch || !Predecessor) {
  674. return false;
  675. }
  676. // If the loop exit condition is not in the latch, then the loop is not rotated. Give up.
  677. if (!cast<BranchInst>(Latch->getTerminator())->isConditional()) {
  678. return false;
  679. }
  680. SmallVector<BasicBlock *, 16> ExitBlocks;
  681. L->getExitBlocks(ExitBlocks);
  682. std::unordered_set<BasicBlock *> ExitBlockSet(ExitBlocks.begin(), ExitBlocks.end());
  683. SmallVector<BasicBlock *, 16> BlocksInLoop; // Set of blocks including both body and exits
  684. BlocksInLoop.append(L->getBlocks().begin(), L->getBlocks().end());
  685. BlocksInLoop.append(ExitBlocks.begin(), ExitBlocks.end());
  686. // Heuristically find blocks that likely need to be unrolled
  687. SetVector<AllocaInst *> ProblemAllocas;
  688. std::unordered_set<BasicBlock *> ProblemBlocks;
  689. FindProblemBlocks(L->getHeader(), BlocksInLoop, ProblemBlocks, ProblemAllocas);
  690. if (StructurizeLoopExits && hlsl::RemoveUnstructuredLoopExits(L, LI, DT, /* exclude */&ProblemBlocks)) {
  691. // Recompute the loop if we managed to simplify the exit blocks
  692. Latch = L->getLoopLatch();
  693. ExitBlocks.clear();
  694. L->getExitBlocks(ExitBlocks);
  695. ExitBlockSet = std::unordered_set<BasicBlock *>(ExitBlocks.begin(), ExitBlocks.end());
  696. BlocksInLoop.clear();
  697. BlocksInLoop.append(L->getBlocks().begin(), L->getBlocks().end());
  698. BlocksInLoop.append(ExitBlocks.begin(), ExitBlocks.end());
  699. }
  700. // Keep track of the PHI nodes at the header.
  701. SmallVector<PHINode *, 16> PHIs;
  702. for (auto it = Header->begin(); it != Header->end(); it++) {
  703. if (PHINode *PN = dyn_cast<PHINode>(it)) {
  704. PHIs.push_back(PN);
  705. }
  706. else {
  707. break;
  708. }
  709. }
  710. // Quick simplification of PHINode incoming values
  711. for (PHINode *PN : PHIs) {
  712. for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
  713. Value *OldIncomingV = PN->getIncomingValue(i);
  714. if (Instruction *IncomingI = dyn_cast<Instruction>(OldIncomingV)) {
  715. if (Value *NewIncomingV = llvm::SimplifyInstruction(IncomingI, DL)) {
  716. PN->setIncomingValue(i, NewIncomingV);
  717. }
  718. }
  719. }
  720. }
  721. SetVector<BasicBlock *> ToBeCloned; // List of blocks that will be cloned.
  722. for (BasicBlock *BB : L->getBlocks()) // Include the body right away
  723. ToBeCloned.insert(BB);
  724. // Find the exit blocks that also need to be included
  725. // in the unroll.
  726. SmallVector<BasicBlock *, 8> NewExits; // New set of exit blocks as boundaries for LCSSA
  727. SmallVector<BasicBlock *, 8> FakeExits; // Set of blocks created to allow cloning original exit blocks.
  728. for (BasicBlock *BB : ExitBlocks) {
  729. bool CloneThisExitBlock = ProblemBlocks.count(BB);
  730. if (CloneThisExitBlock) {
  731. ToBeCloned.insert(BB);
  732. // If we are cloning this basic block, we must create a new exit
  733. // block for inserting LCSSA PHI nodes.
  734. BasicBlock *FakeExit = BasicBlock::Create(BB->getContext(), "loop.exit.new");
  735. F->getBasicBlockList().insert(BB, FakeExit);
  736. TerminatorInst *OldTerm = BB->getTerminator();
  737. OldTerm->removeFromParent();
  738. FakeExit->getInstList().push_back(OldTerm);
  739. BranchInst::Create(FakeExit, BB);
  740. for (BasicBlock *Succ : successors(FakeExit)) {
  741. for (Instruction &I : *Succ) {
  742. if (PHINode *PN = dyn_cast<PHINode>(&I)) {
  743. for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
  744. if (PN->getIncomingBlock(i) == BB)
  745. PN->setIncomingBlock(i, FakeExit);
  746. }
  747. }
  748. }
  749. }
  750. NewExits.push_back(FakeExit);
  751. FakeExits.push_back(FakeExit);
  752. // Update Dom tree with new exit
  753. if (!DT->getNode(FakeExit))
  754. DT->addNewBlock(FakeExit, BB);
  755. }
  756. else {
  757. // If we are not including this exit block in the unroll,
  758. // use it for LCSSA as normal.
  759. NewExits.push_back(BB);
  760. }
  761. }
  762. // Simplify the PHI nodes that have single incoming value. The original LCSSA form
  763. // (if exists) does not necessarily work for our unroll because we may be unrolling
  764. // from a different boundary.
  765. for (BasicBlock *BB : BlocksInLoop)
  766. hlsl::dxilutil::SimplifyTrivialPHIs(BB);
  767. // Re-establish LCSSA form to get ready for unrolling.
  768. CreateLCSSA(ToBeCloned, NewExits, L, *DT, LI);
  769. SmallVector<std::unique_ptr<ClonedIteration>, 16> Iterations; // List of cloned iterations
  770. bool Succeeded = false;
  771. unsigned MaxAttempt = this->MaxIterationAttempt;
  772. // If we were able to figure out the definitive trip count,
  773. // just unroll that many times.
  774. if (TripCount != 0) {
  775. MaxAttempt = TripCount;
  776. }
  777. else if (HasExplicitLoopCount) {
  778. MaxAttempt = ExplicitUnrollCount;
  779. }
  780. for (unsigned IterationI = 0; IterationI < MaxAttempt; IterationI++) {
  781. ClonedIteration *PrevIteration = nullptr;
  782. if (Iterations.size())
  783. PrevIteration = Iterations.back().get();
  784. Iterations.push_back(llvm::make_unique<ClonedIteration>());
  785. ClonedIteration &CurIteration = *Iterations.back().get();
  786. // Clone the blocks.
  787. for (BasicBlock *BB : ToBeCloned) {
  788. BasicBlock *ClonedBB = CloneBasicBlock(BB, CurIteration.VarMap);
  789. CurIteration.VarMap[BB] = ClonedBB;
  790. ClonedBB->insertInto(F, Header);
  791. if (ExitBlockSet.count(BB))
  792. CurIteration.Extended.insert(ClonedBB);
  793. CurIteration.Body.push_back(ClonedBB);
  794. // Identify the special blocks.
  795. if (BB == Latch) {
  796. CurIteration.Latch = ClonedBB;
  797. }
  798. if (BB == Header) {
  799. CurIteration.Header = ClonedBB;
  800. }
  801. }
  802. for (BasicBlock *BB : ToBeCloned) {
  803. BasicBlock *ClonedBB = cast<BasicBlock>(CurIteration.VarMap[BB]);
  804. // If branching to outside of the loop, need to update the
  805. // phi nodes there to include new values.
  806. for (BasicBlock *Succ : successors(ClonedBB)) {
  807. if (ToBeCloned.count(Succ))
  808. continue;
  809. for (Instruction &I : *Succ) {
  810. PHINode *PN = dyn_cast<PHINode>(&I);
  811. if (!PN)
  812. break;
  813. // Find the incoming value for this new block. If there is an entry
  814. // for this block in the map, then it was defined in the loop, use it.
  815. // Otherwise it came from outside the loop.
  816. Value *OldIncoming = PN->getIncomingValueForBlock(BB);
  817. Value *NewIncoming = OldIncoming;
  818. ValueToValueMapTy::iterator Itor = CurIteration.VarMap.find(OldIncoming);
  819. if (Itor != CurIteration.VarMap.end())
  820. NewIncoming = Itor->second;
  821. PN->addIncoming(NewIncoming, ClonedBB);
  822. }
  823. }
  824. }
  825. // Remap the instructions inside of cloned blocks.
  826. for (BasicBlock *BB : CurIteration.Body) {
  827. for (Instruction &I : *BB) {
  828. DxilLoopUnrollRemapInstruction(&I, CurIteration.VarMap);
  829. }
  830. }
  831. // If this is the first block
  832. if (!PrevIteration) {
  833. // Replace the phi nodes in the clone block with the values coming
  834. // from outside of the loop
  835. for (PHINode *PN : PHIs) {
  836. PHINode *ClonedPN = cast<PHINode>(CurIteration.VarMap[PN]);
  837. Value *ReplacementVal = ClonedPN->getIncomingValueForBlock(Predecessor);
  838. ClonedPN->replaceAllUsesWith(ReplacementVal);
  839. ClonedPN->eraseFromParent();
  840. CurIteration.VarMap[PN] = ReplacementVal;
  841. }
  842. }
  843. else {
  844. // Replace the phi nodes with the value defined INSIDE the previous iteration.
  845. for (PHINode *PN : PHIs) {
  846. PHINode *ClonedPN = cast<PHINode>(CurIteration.VarMap[PN]);
  847. Value *ReplacementVal = PN->getIncomingValueForBlock(Latch);
  848. auto itRep = PrevIteration->VarMap.find(ReplacementVal);
  849. if (itRep != PrevIteration->VarMap.end())
  850. ReplacementVal = itRep->second;
  851. ClonedPN->replaceAllUsesWith(ReplacementVal);
  852. ClonedPN->eraseFromParent();
  853. CurIteration.VarMap[PN] = ReplacementVal;
  854. }
  855. // Make the latch of the previous iteration branch to the header
  856. // of this new iteration.
  857. if (BranchInst *BI = dyn_cast<BranchInst>(PrevIteration->Latch->getTerminator())) {
  858. for (unsigned i = 0; i < BI->getNumSuccessors(); i++) {
  859. if (BI->getSuccessor(i) == PrevIteration->Header) {
  860. BI->setSuccessor(i, CurIteration.Header);
  861. break;
  862. }
  863. }
  864. }
  865. }
  866. // Check exit condition to see if we fully unrolled the loop
  867. if (BranchInst *BI = dyn_cast<BranchInst>(CurIteration.Latch->getTerminator())) {
  868. bool Cond = false;
  869. Value *ConstantCond = BI->getCondition();
  870. if (Value *C = DVC->GetValue(ConstantCond))
  871. ConstantCond = C;
  872. if (GetConstantI1(ConstantCond, &Cond)) {
  873. if (BI->getSuccessor(Cond ? 1 : 0) == CurIteration.Header) {
  874. Succeeded = true;
  875. break;
  876. }
  877. }
  878. }
  879. // We've reached the N defined in [unroll(N)]
  880. if ((HasExplicitLoopCount && IterationI+1 >= ExplicitUnrollCount) ||
  881. (TripCount != 0 && IterationI+1 >= TripCount))
  882. {
  883. Succeeded = true;
  884. BranchInst *BI = cast<BranchInst>(CurIteration.Latch->getTerminator());
  885. BasicBlock *ExitBlock = nullptr;
  886. for (unsigned i = 0; i < BI->getNumSuccessors(); i++) {
  887. BasicBlock *Succ = BI->getSuccessor(i);
  888. if (Succ != CurIteration.Header) {
  889. ExitBlock = Succ;
  890. break;
  891. }
  892. }
  893. BranchInst *NewBI = BranchInst::Create(ExitBlock, BI);
  894. BI->replaceAllUsesWith(NewBI);
  895. BI->eraseFromParent();
  896. break;
  897. }
  898. }
  899. if (Succeeded) {
  900. // Now that we successfully unrolled the loop L, if there were any sub loops in L,
  901. // we have to recreate all the sub-loops for each iteration of L that we cloned.
  902. for (std::unique_ptr<ClonedIteration> &IterPtr : Iterations) {
  903. for (Loop *SubL : L->getSubLoops())
  904. RecursivelyRecreateSubLoopForIteration(LPM, LI, OuterL, SubL, *IterPtr);
  905. }
  906. // We are going to be cleaning them up later. Maker sure
  907. // they're in entry block so deleting loop blocks don't
  908. // kill them too.
  909. for (AllocaInst *AI : ProblemAllocas)
  910. DXASSERT_LOCALVAR(AI, AI->getParent() == &F->getEntryBlock(), "Alloca is not in entry block.");
  911. ClonedIteration &FirstIteration = *Iterations.front().get();
  912. // Make the predecessor branch to the first new header.
  913. {
  914. BranchInst *BI = cast<BranchInst>(Predecessor->getTerminator());
  915. for (unsigned i = 0, NumSucc = BI->getNumSuccessors(); i < NumSucc; i++) {
  916. if (BI->getSuccessor(i) == Header) {
  917. BI->setSuccessor(i, FirstIteration.Header);
  918. }
  919. }
  920. }
  921. if (OuterL) {
  922. // Core body blocks need to be added to outer loop
  923. for (size_t i = 0; i < Iterations.size(); i++) {
  924. ClonedIteration &Iteration = *Iterations[i].get();
  925. for (BasicBlock *BB : Iteration.Body) {
  926. if (!Iteration.Extended.count(BB) &&
  927. !OuterL->contains(BB))
  928. {
  929. OuterL->addBasicBlockToLoop(BB, *LI);
  930. }
  931. }
  932. }
  933. // Our newly created exit blocks may need to be added to outer loop
  934. for (BasicBlock *BB : FakeExits) {
  935. if (HasSuccessorsInLoop(BB, OuterL))
  936. OuterL->addBasicBlockToLoop(BB, *LI);
  937. }
  938. // Cloned exit blocks may need to be added to outer loop
  939. for (size_t i = 0; i < Iterations.size(); i++) {
  940. ClonedIteration &Iteration = *Iterations[i].get();
  941. for (BasicBlock *BB : Iteration.Extended) {
  942. if (HasSuccessorsInLoop(BB, OuterL))
  943. OuterL->addBasicBlockToLoop(BB, *LI);
  944. }
  945. }
  946. }
  947. SE->forgetLoop(L);
  948. // Remove the original blocks that we've cloned from all loops.
  949. for (BasicBlock *BB : ToBeCloned)
  950. LI->removeBlock(BB);
  951. // Remove loop and all child loops from queue.
  952. RecursivelyRemoveLoopOnSuccess(LPM, L);
  953. // Remove dead blocks.
  954. for (BasicBlock *BB : ToBeCloned)
  955. DetachFromSuccessors(BB);
  956. for (BasicBlock *BB : ToBeCloned)
  957. BB->dropAllReferences();
  958. for (BasicBlock *BB : ToBeCloned)
  959. BB->eraseFromParent();
  960. // Blocks need to be removed from DomTree. There's no easy way
  961. // to remove them in the right order, so just make DomTree
  962. // recalculate.
  963. DT->recalculate(*F);
  964. if (OuterL) {
  965. // This process may have created multiple back edges for the
  966. // parent loop. Simplify to keep it well-formed.
  967. simplifyLoop(OuterL, DT, LI, this, nullptr, nullptr, AC);
  968. }
  969. // Now that we potentially turned some GEP indices into constants,
  970. // try to clean up their allocas.
  971. if (!BreakUpArrayAllocas(OnlyWarnOnFail /* allow oob index */, ProblemAllocas.begin(), ProblemAllocas.end(), DT, AC, DVC)) {
  972. FailLoopUnroll(false, F, LoopLoc, "Could not unroll loop due to out of bound array access.");
  973. }
  974. return true;
  975. }
  976. // If we were unsuccessful in unrolling the loop
  977. else {
  978. // Mark loop as failed.
  979. LoopsThatFailed.insert(L);
  980. // Remove all the cloned blocks
  981. for (std::unique_ptr<ClonedIteration> &Ptr : Iterations) {
  982. ClonedIteration &Iteration = *Ptr.get();
  983. for (BasicBlock *BB : Iteration.Body)
  984. DetachFromSuccessors(BB);
  985. }
  986. for (std::unique_ptr<ClonedIteration> &Ptr : Iterations) {
  987. ClonedIteration &Iteration = *Ptr.get();
  988. for (BasicBlock *BB : Iteration.Body)
  989. BB->dropAllReferences();
  990. }
  991. for (std::unique_ptr<ClonedIteration> &Ptr : Iterations) {
  992. ClonedIteration &Iteration = *Ptr.get();
  993. for (BasicBlock *BB : Iteration.Body)
  994. BB->eraseFromParent();
  995. }
  996. return false;
  997. }
  998. }
  999. bool DxilLoopUnroll::doFinalization() {
  1000. const char *Msg =
  1001. "Could not unroll loop. Loop bound could not be deduced at compile time. "
  1002. "Use [unroll(n)] to give an explicit count.";
  1003. if (LoopsThatFailed.size()) {
  1004. for (Loop *L : LoopsThatFailed) {
  1005. Function *F = L->getHeader()->getParent();
  1006. DebugLoc LoopLoc = L->getStartLoc(); // Debug location for the start of the loop.
  1007. if (OnlyWarnOnFail) {
  1008. FailLoopUnroll(true /*warn only*/, F, LoopLoc, Msg);
  1009. }
  1010. else {
  1011. FailLoopUnroll(false /*warn only*/, F, LoopLoc,
  1012. Twine(Msg) + Twine(" Use '-HV 2016' to treat this as warning."));
  1013. }
  1014. }
  1015. }
  1016. return false;
  1017. }
  1018. }
  1019. Pass *llvm::createDxilLoopUnrollPass(unsigned MaxIterationAttempt, bool OnlyWarnOnFail, bool StructurizeLoopExits) {
  1020. return new DxilLoopUnroll(MaxIterationAttempt, OnlyWarnOnFail, StructurizeLoopExits);
  1021. }
  1022. INITIALIZE_PASS_BEGIN(DxilLoopUnroll, "dxil-loop-unroll", "Dxil Unroll loops", false, false)
  1023. INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
  1024. INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
  1025. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  1026. INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
  1027. INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
  1028. INITIALIZE_PASS_DEPENDENCY(DxilValueCache)
  1029. INITIALIZE_PASS_END(DxilLoopUnroll, "dxil-loop-unroll", "Dxil Unroll loops", false, false)