Browse Source

Hoist exits out of loops when unrolling to make code structured. (#3103)

Adam Yang 5 years ago
parent
commit
5d6674ad0a

+ 18 - 0
include/dxc/HLSL/DxilNoops.h

@@ -0,0 +1,18 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// DxilNoops.h                                                               //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+#pragma once
+
+#include "llvm/ADT/StringRef.h"
+
+namespace hlsl {
+static const llvm::StringRef kNoopName = "dx.noop";
+static const llvm::StringRef kPreservePrefix = "dx.preserve.";
+static const llvm::StringRef kNothingName = "dx.nothing.a";
+static const llvm::StringRef kPreserveName = "dx.preserve.value.a";
+}

+ 1 - 0
include/llvm/Transforms/IPO/PassManagerBuilder.h

@@ -133,6 +133,7 @@ public:
   bool HLSLResMayAlias = false; // HLSL Change
   unsigned ScanLimit = 0; // HLSL Change
   bool EnableGVN = true; // HLSL Change
+  bool StructurizeLoopExitsForUnroll; // HLSL Change
 
 private:
   /// ExtensionList - This is list of all of the extensions that are registered.

+ 4 - 2
include/llvm/Transforms/Scalar.h

@@ -129,7 +129,7 @@ void initializeDxilFixConstArrayInitializerPass(PassRegistry&);
 Pass *createDxilConditionalMem2RegPass(bool NoOpt);
 void initializeDxilConditionalMem2RegPass(PassRegistry&);
 
-Pass *createDxilLoopUnrollPass(unsigned MaxIterationAttempt, bool OnlyWarnOnFail);
+Pass *createDxilLoopUnrollPass(unsigned MaxIterationAttempt, bool OnlyWarnOnFail, bool StructurizeLoopExits);
 void initializeDxilLoopUnrollPass(PassRegistry&);
 
 Pass *createDxilEraseDeadRegionPass();
@@ -261,7 +261,9 @@ Pass *createLoopInstSimplifyPass();
 // LoopUnroll - This pass is a simple loop unrolling pass.
 //
 Pass *createLoopUnrollPass(int Threshold = -1, int Count = -1,
-                           int AllowPartial = -1, int Runtime = -1);
+                           int AllowPartial = -1, int Runtime = -1,
+                           bool StructurizeLoopExits = false // HLSL Change
+                          );
 // Create an unrolling pass for full unrolling only.
 Pass *createSimpleLoopUnrollPass();
 

+ 10 - 16
lib/HLSL/DxilNoops.cpp

@@ -97,23 +97,17 @@
 #include "llvm/Support/raw_os_ostream.h"
 #include "dxc/DXIL/DxilMetadataHelper.h"
 #include "dxc/DXIL/DxilConstants.h"
+#include "dxc/HLSL/DxilNoops.h"
 #include "llvm/Analysis/DxilValueCache.h"
 
 #include <unordered_set>
 
 using namespace llvm;
 
-namespace {
-StringRef kNoopName = "dx.noop";
-StringRef kPreservePrefix = "dx.preserve.";
-StringRef kNothingName = "dx.nothing.a";
-StringRef kPreserveName = "dx.preserve.value.a";
-}
-
 static Function *GetOrCreateNoopF(Module &M) {
   LLVMContext &Ctx = M.getContext();
   FunctionType *FT = FunctionType::get(Type::getVoidTy(Ctx), false);
-  Function *NoopF = cast<Function>(M.getOrInsertFunction(::kNoopName, FT));
+  Function *NoopF = cast<Function>(M.getOrInsertFunction(hlsl::kNoopName, FT));
   NoopF->addFnAttr(Attribute::AttrKind::Convergent);
   return NoopF;
 }
@@ -199,7 +193,7 @@ static Value *GetOrCreatePreserveCond(Function *F) {
   assert(!F->isDeclaration());
 
   Module *M = F->getParent();
-  GlobalVariable *GV = M->getGlobalVariable(kPreserveName, true);
+  GlobalVariable *GV = M->getGlobalVariable(hlsl::kPreserveName, true);
   if (!GV) {
     Type *i32Ty = Type::getInt32Ty(M->getContext());
     Type *i32ArrayTy = ArrayType::get(i32Ty, 1);
@@ -210,7 +204,7 @@ static Value *GetOrCreatePreserveCond(Function *F) {
     GV = new GlobalVariable(*M,
       i32ArrayTy, true,
       llvm::GlobalValue::InternalLinkage,
-      InitialValue, kPreserveName);
+      InitialValue, hlsl::kPreserveName);
   }
 
   for (User *U : GV->users()) {
@@ -237,7 +231,7 @@ static Value *GetOrCreatePreserveCond(Function *F) {
 
 
 static Function *GetOrCreatePreserveF(Module *M, Type *Ty) {
-  std::string str = kPreservePrefix;
+  std::string str = hlsl::kPreservePrefix;
   raw_string_ostream os(str);
   Ty->print(os);
   os.flush();
@@ -445,7 +439,7 @@ public:
       if (!F->isDeclaration())
         continue;
 
-      if (F->getName().startswith(kPreservePrefix)) {
+      if (F->getName().startswith(hlsl::kPreservePrefix)) {
         for (auto uit = F->user_begin(), end = F->user_end(); uit != end;) {
           User *U = *(uit++);
           CallInst *CI = cast<CallInst>(U);
@@ -559,7 +553,7 @@ public:
   Instruction *GetFinalNoopInst(Module &M, Instruction *InsertBefore) {
     Type *i32Ty = Type::getInt32Ty(M.getContext());
     if (!NothingGV) {
-      NothingGV = M.getGlobalVariable(kNothingName);
+      NothingGV = M.getGlobalVariable(hlsl::kNothingName);
       if (!NothingGV) {
         Type *i32ArrayTy = ArrayType::get(i32Ty, 1);
 
@@ -569,7 +563,7 @@ public:
         NothingGV = new GlobalVariable(M,
           i32ArrayTy, true,
           llvm::GlobalValue::InternalLinkage,
-          InitialValue, kNothingName);
+          InitialValue, hlsl::kNothingName);
       }
     }
 
@@ -590,7 +584,7 @@ char DxilFinalizePreserves::ID;
 bool DxilFinalizePreserves::LowerPreserves(Module &M) {
   bool Changed = false;
 
-  GlobalVariable *GV = M.getGlobalVariable(kPreserveName, true);
+  GlobalVariable *GV = M.getGlobalVariable(hlsl::kPreserveName, true);
   if (GV) {
     for (User *U : GV->users()) {
       GEPOperator *Gep = cast<GEPOperator>(U);
@@ -626,7 +620,7 @@ bool DxilFinalizePreserves::LowerNoops(Module &M) {
   for (Function &F : M) {
     if (!F.isDeclaration())
       continue;
-    if (F.getName() == kNoopName) {
+    if (F.getName() == hlsl::kNoopName) {
       NoopF = &F;
     }
   }

+ 11 - 6
lib/Transforms/IPO/PassManagerBuilder.cpp

@@ -207,7 +207,7 @@ void PassManagerBuilder::populateFunctionPassManager(
 }
 
 // HLSL Change Starts
-static void addHLSLPasses(bool HLSLHighLevel, unsigned OptLevel, bool OnlyWarnOnUnrollFail, hlsl::HLSLExtensionsCodegenHelper *ExtHelper, legacy::PassManagerBase &MPM) {
+static void addHLSLPasses(bool HLSLHighLevel, unsigned OptLevel, bool OnlyWarnOnUnrollFail, bool StructurizeLoopExitsForUnroll, hlsl::HLSLExtensionsCodegenHelper *ExtHelper, legacy::PassManagerBase &MPM) {
 
   // Don't do any lowering if we're targeting high-level.
   if (HLSLHighLevel) {
@@ -292,12 +292,12 @@ static void addHLSLPasses(bool HLSLHighLevel, unsigned OptLevel, bool OnlyWarnOn
   // struct members.
   // Needs to happen before resources are lowered and before HL
   // module is gone.
-  MPM.add(createDxilLoopUnrollPass(1024, OnlyWarnOnUnrollFail));
+  MPM.add(createDxilLoopUnrollPass(1024, OnlyWarnOnUnrollFail, StructurizeLoopExitsForUnroll));
 
   // Default unroll pass. This is purely for optimizing loops without
   // attributes.
   if (OptLevel > 2) {
-    MPM.add(createLoopUnrollPass());
+    MPM.add(createLoopUnrollPass(-1, -1, -1, -1, StructurizeLoopExitsForUnroll));
   }
 
   if (!NoOpt)
@@ -351,7 +351,12 @@ void PassManagerBuilder::populateModulePassManager(
     addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
 
     // HLSL Change Begins.
-    addHLSLPasses(HLSLHighLevel, OptLevel, this->HLSLOnlyWarnOnUnrollFail, HLSLExtensionsCodeGen, MPM);
+    addHLSLPasses(HLSLHighLevel, OptLevel,
+      this->HLSLOnlyWarnOnUnrollFail,
+      this->StructurizeLoopExitsForUnroll,
+      this->HLSLExtensionsCodeGen,
+      MPM);
+
     if (!HLSLHighLevel) {
       MPM.add(createDxilConvergentClearPass());
       MPM.add(createMultiDimArrayToOneDimArrayPass());
@@ -386,7 +391,7 @@ void PassManagerBuilder::populateModulePassManager(
     delete Inliner;
     Inliner = nullptr;
   }
-  addHLSLPasses(HLSLHighLevel, OptLevel, this->HLSLOnlyWarnOnUnrollFail, HLSLExtensionsCodeGen, MPM); // HLSL Change
+  addHLSLPasses(HLSLHighLevel, OptLevel, this->HLSLOnlyWarnOnUnrollFail, this->StructurizeLoopExitsForUnroll, HLSLExtensionsCodeGen, MPM); // HLSL Change
   // HLSL Change Ends
 
   // Add LibraryInfo if we have some.
@@ -601,7 +606,7 @@ void PassManagerBuilder::populateModulePassManager(
   MPM.add(createInstructionCombiningPass());
 
   if (!DisableUnrollLoops) {
-    MPM.add(createLoopUnrollPass());    // Unroll small loops
+    MPM.add(createLoopUnrollPass(/* HLSL Change begin */-1, -1, -1, -1, this->StructurizeLoopExitsForUnroll /* HLSL Change end */));    // Unroll small loops
 
     // LoopUnroll may generate some redundency to cleanup.
     MPM.add(createInstructionCombiningPass());

+ 1 - 0
lib/Transforms/Scalar/CMakeLists.txt

@@ -51,6 +51,7 @@ add_llvm_library(LLVMScalarOpts
   DxilFixConstArrayInitializer.cpp # HLSL Change
   DxilEliminateVector.cpp # HLSL Change
   DxilConditionalMem2Reg.cpp # HLSL Change
+  DxilRemoveUnstructuredLoopExits.cpp # HLSL Change
   Scalarizer.cpp
   SeparateConstOffsetFromGEP.cpp
   SimplifyCFGPass.cpp

+ 24 - 4
lib/Transforms/Scalar/DxilLoopUnroll.cpp

@@ -84,6 +84,8 @@
 #include "dxc/HLSL/HLModule.h"
 #include "llvm/Analysis/DxilValueCache.h"
 
+#include "DxilRemoveUnstructuredLoopExits.h"
+
 using namespace llvm;
 using namespace hlsl;
 
@@ -116,11 +118,13 @@ public:
   std::unordered_set<Function *> CleanedUpAlloca;
   unsigned MaxIterationAttempt = 0;
   bool OnlyWarnOnFail = false;
+  bool StructurizeLoopExits = false;
 
-  DxilLoopUnroll(unsigned MaxIterationAttempt = 1024, bool OnlyWarnOnFail=false) :
+  DxilLoopUnroll(unsigned MaxIterationAttempt = 1024, bool OnlyWarnOnFail=false, bool StructurizeLoopExits=false) :
     LoopPass(ID),
     MaxIterationAttempt(MaxIterationAttempt),
-    OnlyWarnOnFail(OnlyWarnOnFail)
+    OnlyWarnOnFail(OnlyWarnOnFail),
+    StructurizeLoopExits(StructurizeLoopExits)
   {
     initializeDxilLoopUnrollPass(*PassRegistry::getPassRegistry());
   }
@@ -133,6 +137,7 @@ public:
     AU.addPreserved<DominatorTreeWrapperPass>();
     AU.addRequired<ScalarEvolution>();
     AU.addRequired<DxilValueCache>();
+    AU.addRequiredID(&LCSSAID);
     AU.addRequiredID(LoopSimplifyID);
   }
 
@@ -671,6 +676,7 @@ bool DxilLoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
     TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
   }
 
+
   // Analysis passes
   DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   AssumptionCache *AC =
@@ -706,6 +712,19 @@ bool DxilLoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   std::unordered_set<BasicBlock *> ProblemBlocks;
   FindProblemBlocks(L->getHeader(), BlocksInLoop, ProblemBlocks, ProblemAllocas);
 
+  if (StructurizeLoopExits && hlsl::RemoveUnstructuredLoopExits(L, LI, DT, /* exclude */&ProblemBlocks)) {
+    // Recompute the loop if we managed to simplify the exit blocks
+
+    Latch = L->getLoopLatch();
+    ExitBlocks.clear();
+    L->getExitBlocks(ExitBlocks);
+    ExitBlockSet = std::unordered_set<BasicBlock *>(ExitBlocks.begin(), ExitBlocks.end());
+
+    BlocksInLoop.clear();
+    BlocksInLoop.append(L->getBlocks().begin(), L->getBlocks().end());
+    BlocksInLoop.append(ExitBlocks.begin(), ExitBlocks.end());
+  }
+
   // Keep track of the PHI nodes at the header.
   SmallVector<PHINode *, 16> PHIs;
   for (auto it = Header->begin(); it != Header->end(); it++) {
@@ -1057,8 +1076,8 @@ bool DxilLoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
 
 }
 
-Pass *llvm::createDxilLoopUnrollPass(unsigned MaxIterationAttempt, bool OnlyWarnOnFail) {
-  return new DxilLoopUnroll(MaxIterationAttempt, OnlyWarnOnFail);
+Pass *llvm::createDxilLoopUnrollPass(unsigned MaxIterationAttempt, bool OnlyWarnOnFail, bool StructurizeLoopExits) {
+  return new DxilLoopUnroll(MaxIterationAttempt, OnlyWarnOnFail, StructurizeLoopExits);
 }
 
 INITIALIZE_PASS_BEGIN(DxilLoopUnroll, "dxil-loop-unroll", "Dxil Unroll loops", false, false)
@@ -1070,3 +1089,4 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_DEPENDENCY(DxilValueCache)
 INITIALIZE_PASS_END(DxilLoopUnroll, "dxil-loop-unroll", "Dxil Unroll loops", false, false)
 
+

+ 477 - 0
lib/Transforms/Scalar/DxilRemoveUnstructuredLoopExits.cpp

@@ -0,0 +1,477 @@
+//===- DxilRemoveUnstructuredLoopExits.cpp - Make unrolled loops structured ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// Loops that look like the following when unrolled becomes unstructured:
+//
+//      for(;;) {
+//        if (a) {
+//          if (b) {
+//            exit_code_0;
+//            break;       // Unstructured loop exit
+//          }
+//
+//          code_0;
+//
+//          if (c) {
+//            if (d) {
+//              exit_code_1;
+//              break;    // Unstructured loop exit
+//            }
+//            code_1;
+//          }
+//
+//          code_2;
+//
+//          ...
+//        }
+//
+//        code_3;
+//
+//        if (exit)
+//          break;
+//      }
+//      
+//
+// This pass transforms the loop into the following form:
+//
+//      bool broke_0 = false;
+//      bool broke_1 = false;
+//
+//      for(;;) {
+//        if (a) {
+//          if (b) {
+//            broke_0 = true;       // Break flag
+//          }
+//
+//          if (!broke_0) {
+//            code_0;
+//          }
+//
+//          if (!broke_0) {
+//            if (c) {
+//              if (d) {
+//                broke_1 = true;   // Break flag
+//              }
+//              if (!broke_1) {
+//                code_1;
+//              }
+//            }
+//
+//            if (!broke_1) {
+//              code_2;
+//            }
+//          }
+//
+//          ...
+//        }
+//
+//        if (!broke_0) {
+//          break;
+//        }
+//
+//        if (!broke_1) {
+//          break;
+//        }
+//
+//        code_3;
+//
+//        if (exit)
+//          break;
+//      }
+//
+//      if (broke_0) {
+//        exit_code_0;
+//      }
+//
+//      if (broke_1) {
+//        exit_code_1;
+//      }
+//
+// Essentially it hoists the exit branch out of the loop.
+//
+// This function should be called any time before a function is unrolled to
+// avoid generating unstructured code.
+//
+// There are several limitations at the moment:
+//
+//   - if code_0, code_1, etc has any loops in there, this transform
+//     does not take place. Since the values that flow out of the conditions
+//     are phi of undef, I do not want to risk the loops not exiting.
+//
+//   - code_0, code_1, etc, become conditional only when there are
+//     side effects in there. This doesn't impact code correctness,
+//     but the code will execute for one iteration even if the exit condition
+//     is met.
+//
+//   - If there are values used by exit_code that isn't defined in the 
+//     loop header (or anywhere that doesn't dominate the loop latch)
+//     this transformation does not take place.
+//
+// These limitations can be fixed in the future as needed.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SetVector.h"
+#include "dxc/HLSL/DxilNoops.h"
+
+#include <unordered_map>
+#include <unordered_set>
+
+#include "DxilRemoveUnstructuredLoopExits.h"
+
+using namespace llvm;
+
+static bool IsNoop(Instruction *inst) {
+  if (CallInst *ci = dyn_cast<CallInst>(inst)) {
+    if (Function *f = ci->getCalledFunction()) {
+      return f->getName() == hlsl::kNoopName;
+    }
+  }
+  return false;
+}
+
+static BasicBlock *GetExitBlockForExitingBlock(Loop *L, BasicBlock *exiting_block) {
+  BranchInst *br = dyn_cast<BranchInst>(exiting_block->getTerminator());
+  assert(L->contains(exiting_block));
+  assert(br->isConditional());
+  BasicBlock *result = L->contains(br->getSuccessor(0)) ? br->getSuccessor(1) : br->getSuccessor(0);
+  assert(!L->contains(result));
+  return result;
+}
+
+static bool RemoveUnstructuredLoopExitsIteration(BasicBlock *exiting_block, Loop *L, LoopInfo *LI, DominatorTree *DT) {
+
+  LLVMContext &ctx = L->getHeader()->getContext();
+  Type *i1Ty = Type::getInt1Ty(ctx);
+
+  BasicBlock *exit_block = GetExitBlockForExitingBlock(L, exiting_block);
+
+  // If there's more than one predecessors for this exit block, don't risk it.
+  if (!exit_block->getSinglePredecessor())
+    return false;
+
+  {
+    BasicBlock *latch = L->getLoopLatch();
+    BasicBlock *latch_exit = GetExitBlockForExitingBlock(L, latch);
+
+    // If the latch and the exiting block go to the same place, then we probably already fixed this exit.
+    if (exit_block == latch_exit) {
+      return false;
+    }
+
+    for (Instruction &I : *exit_block) {
+      if (PHINode *phi = dyn_cast<PHINode>(&I)) {
+        // If there are values flowing out of the loop into the exit_block,
+        // if any of those values do not dominate the latch, they would need
+        // to be propagated to the latch, which we don't do right now.
+        //
+        if (Instruction *value = dyn_cast<Instruction>(phi->getIncomingValueForBlock(exiting_block))) {
+          if (!DT->dominates(value, latch)) {
+            return false;
+          }
+        }
+      }
+      else {
+        break;
+      }
+    }
+  }
+
+  BranchInst *exiting_br = cast<BranchInst>(exiting_block->getTerminator());
+  Value *exit_cond = exiting_br->getCondition();
+
+  Value *exit_cond_dominates_latch = nullptr;
+  BasicBlock *new_exiting_block = nullptr;
+  SmallVector<std::pair<BasicBlock *, Value *>, 4> blocks_with_side_effect;
+  bool give_up = false;
+  std::unordered_map<BasicBlock *, PHINode *> cached_phis;
+
+  // Use a worklist to propagate the exit condition from within the block
+  {
+    Value *false_value = ConstantInt::getFalse(i1Ty);
+
+    struct Propagate_Data {
+      BasicBlock *bb;
+      Value *exit_cond;
+    };
+
+    std::unordered_set<BasicBlock *> seen;
+    SmallVector<Propagate_Data, 4> work_list;
+
+    work_list.push_back({ exiting_block, exit_cond, });
+    seen.insert(exiting_block);
+
+    BasicBlock *latch = L->getLoopLatch();
+
+    for (unsigned i = 0; i < work_list.size(); i++) {
+      Propagate_Data data = work_list[i];
+
+      BasicBlock *bb = data.bb;
+
+      // Don't continue to propagate when we hit the latch
+      if (bb == latch && DT->dominates(bb, latch)) {
+        exit_cond_dominates_latch = data.exit_cond;
+        new_exiting_block = bb;
+        continue;
+      }
+
+      // Do not include the exiting block itself in this calculation
+      if (i != 0) {
+        // If this block is part of an inner loop... Give up for now.
+        if (LI->getLoopFor(data.bb) != L) {
+          give_up = true;
+        }
+        // Otherwise just remember the blocks with side effects (including the latch)
+        else {
+          for (Instruction &I : *bb) {
+            if (I.mayReadOrWriteMemory() && !IsNoop(&I)) {
+              blocks_with_side_effect.push_back({ bb, data.exit_cond });
+              break;
+            }
+          }
+        }
+      } // If this is not the first iteration
+
+      for (BasicBlock *succ : llvm::successors(bb)) {
+        if (!L->contains(succ))
+          continue;
+
+        PHINode *phi = cached_phis[succ];
+        if (!phi) {
+          phi = PHINode::Create(i1Ty, 2, "dx.struct_exit.exit_cond", &*succ->begin());
+          for (BasicBlock *pred : llvm::predecessors(succ)) {
+            phi->addIncoming(false_value, pred);
+          }
+          cached_phis[succ] = phi;
+        }
+
+        for (unsigned i = 0; i < phi->getNumIncomingValues(); i++) {
+          if (phi->getIncomingBlock(i) == bb) {
+            phi->setIncomingValue(i, data.exit_cond);
+            break;
+          }
+        }
+
+        if (!seen.count(succ)) {
+          work_list.push_back({ succ, phi, });
+          seen.insert(succ);
+        }
+
+      } // for each succ
+    } // for each in worklist
+  } // if exit condition is an instruction
+
+  if (give_up) {
+    for (std::pair<BasicBlock *, PHINode *> pair : cached_phis) {
+      if (pair.second)
+        pair.second->dropAllReferences();
+    }
+    for (std::pair<BasicBlock *, PHINode *> pair : cached_phis) {
+      if (pair.second)
+        pair.second->eraseFromParent();
+    }
+    return false;
+  }
+
+  // Make the exiting block not exit.
+  {
+    BasicBlock *non_exiting_block = exiting_br->getSuccessor(exiting_br->getSuccessor(0) == exit_block ? 1 : 0);
+    BranchInst::Create(non_exiting_block, exiting_block);
+    exiting_br->eraseFromParent();
+    exiting_br = nullptr;
+  }
+
+  // If bb has side effect, split it into 3 basic blocks, where its body is
+  // gated behind if (!exit_cond)
+  for (std::pair<BasicBlock *, Value *> data : blocks_with_side_effect) {
+    BasicBlock *bb = data.first;
+    Value *exit_cond = data.second;
+
+    BasicBlock *body = bb->splitBasicBlock(bb->getFirstNonPHI());
+    body->setName("dx.struct_exit.cond_body");
+    BasicBlock *end = body->splitBasicBlock(body->getTerminator());
+    end->setName("dx.struct_exit.cond_end");
+
+    bb->getTerminator()->eraseFromParent();
+    BranchInst::Create(end, body, exit_cond, bb);
+
+    for (Instruction &inst : *body) {
+      PHINode *phi = nullptr;
+
+      for (User *user : inst.users()) {
+        Instruction *user_inst = dyn_cast<Instruction>(user);
+        if (!user_inst)
+          continue;
+
+        if (user_inst->getParent() != body) {
+          if (!phi) {
+            phi = PHINode::Create(inst.getType(), 2, "", &*end->begin());
+            phi->addIncoming(UndefValue::get(inst.getType()), bb);
+            phi->addIncoming(&inst, body);
+          }
+
+          user_inst->replaceUsesOfWith(&inst, phi);
+        }
+      } // For each user of inst of body
+    } // For each inst in body
+
+    L->addBasicBlockToLoop(body, *LI);
+    L->addBasicBlockToLoop(end, *LI);
+
+  } // For each bb with side effect
+
+  assert(exit_cond_dominates_latch);
+  assert(new_exiting_block);
+
+  // Split the block where we're now exiting from, and branch to latch exit
+  BasicBlock *latch_exit = GetExitBlockForExitingBlock(L, L->getLoopLatch());
+  StringRef old_name = new_exiting_block->getName();
+  BasicBlock *new_not_exiting_block = new_exiting_block->splitBasicBlock(new_exiting_block->getFirstNonPHI());
+  new_exiting_block->setName("dx.struct_exit.new_exiting");
+  new_not_exiting_block->setName(old_name);
+  L->addBasicBlockToLoop(new_not_exiting_block, *LI);
+
+  new_exiting_block->getTerminator()->eraseFromParent();
+  BranchInst::Create(latch_exit, new_not_exiting_block, exit_cond_dominates_latch, new_exiting_block);
+
+  // Split the latch exit, since it's going to branch to the real exit block
+  BasicBlock *post_exit_location = latch_exit->splitBasicBlock(latch_exit->getFirstNonPHI());
+  // If latch exit is part of an outer loop, add its split in there too.
+  if (Loop *outer_loop = LI->getLoopFor(latch_exit)) {
+    outer_loop->addBasicBlockToLoop(post_exit_location, *LI);
+  }
+  // If the original exit block is part of an outer loop, then latch exit (which is the
+  // new exit block) must be part of it, since all blocks that branch to within
+  // a loop must be part of that loop structure.
+  else if (Loop *outer_loop = LI->getLoopFor(exit_block)) {
+    outer_loop->addBasicBlockToLoop(latch_exit, *LI);
+  }
+
+  // Since now new exiting block is branching to latch exit, its phis need to be updated.
+  for (Instruction &inst : *latch_exit) {
+    PHINode *phi = dyn_cast<PHINode>(&inst);
+    if (!phi)
+      break;
+    phi->addIncoming(UndefValue::get(phi->getType()), new_exiting_block);
+  }
+
+  unsigned latch_exit_num_predecessors = 0;
+  for (BasicBlock *pred : llvm::predecessors(latch_exit)) {
+    (void)pred;
+    latch_exit_num_predecessors++;
+  }
+
+  // Make exit condition visible
+  PHINode *exit_cond_lcssa = PHINode::Create(exit_cond_dominates_latch->getType(), latch_exit_num_predecessors, "dx.struct_exit.exit_cond_lcssa", latch_exit->begin());
+  for (BasicBlock *pred : llvm::predecessors(latch_exit)) {
+    if (pred == new_exiting_block) {
+      exit_cond_lcssa->addIncoming(exit_cond_dominates_latch, pred);
+    }
+    else {
+      exit_cond_lcssa->addIncoming(ConstantInt::getFalse(exit_cond_lcssa->getType()), pred);
+    }
+  }
+
+  // Take the exit outside the loop.
+  latch_exit->getTerminator()->eraseFromParent();
+  BranchInst::Create(exit_block, post_exit_location, exit_cond_lcssa, latch_exit);
+
+  // Fix the phi's in the real exit block, and insert phis in the latch exit to maintain
+  // lcssa form.
+  for (Instruction &inst : *exit_block) {
+    PHINode *phi = dyn_cast<PHINode>(&inst);
+    if (!phi)
+      break;
+
+    for (unsigned i = 0; i < phi->getNumIncomingValues(); i++) {
+      if (phi->getIncomingBlock(i) == exiting_block) {
+        phi->setIncomingBlock(i, latch_exit);
+
+        PHINode *lcssa_phi = PHINode::Create(phi->getType(), latch_exit_num_predecessors, "dx.struct_exit.lcssa_phi", latch_exit->begin());
+        for (BasicBlock *pred : llvm::predecessors(latch_exit)) {
+          if (pred == new_exiting_block) {
+            lcssa_phi->addIncoming(phi->getIncomingValue(i), new_exiting_block);
+          }
+          else {
+            lcssa_phi->addIncoming(UndefValue::get(lcssa_phi->getType()), pred);
+          }
+        }
+
+        phi->setIncomingValue(i, lcssa_phi);
+      }
+    }
+  }
+
+  DT->recalculate(*L->getHeader()->getParent());
+  assert(L->isLCSSAForm(*DT));
+
+  return true;
+}
+
+bool hlsl::RemoveUnstructuredLoopExits(llvm::Loop *L, llvm::LoopInfo *LI, llvm::DominatorTree *DT, std::unordered_set<llvm::BasicBlock *> *exclude_set) {
+  
+  bool changed = false;
+
+  if (!L->isLCSSAForm(*DT))
+    return false;
+
+  // Give up if loop is not rotated somehow
+  if (BasicBlock *latch = L->getLoopLatch()) {
+    if (!cast<BranchInst>(latch->getTerminator())->isConditional())
+      return false;
+  }
+  // Give up if there's not a single latch
+  else {
+    return false;
+  }
+
+  for (;;) {
+    // Recompute exiting block every time, since they could change between
+    // iterations
+    llvm::SmallVector<BasicBlock *, 4> exiting_blocks;
+    L->getExitingBlocks(exiting_blocks);
+
+    bool local_changed = false;
+    for (BasicBlock *exiting_block : exiting_blocks) {
+      auto latch = L->getLoopLatch();
+      if (latch == exiting_block)
+        continue;
+
+      if (exclude_set && exclude_set->count(GetExitBlockForExitingBlock(L, exiting_block)))
+        continue;
+
+      // As soon as we got a success, break and start a new iteration, since
+      // exiting blocks could have changed.
+      local_changed = RemoveUnstructuredLoopExitsIteration(exiting_block, L, LI, DT);
+      if (local_changed) {
+        break;
+      }
+    }
+
+    changed |= local_changed;
+    if (!local_changed) {
+      break;
+    }
+  }
+
+  return changed;
+}
+

+ 24 - 0
lib/Transforms/Scalar/DxilRemoveUnstructuredLoopExits.h

@@ -0,0 +1,24 @@
+//===- DxilRemoveUnstructuredLoopExits.h - Make unrolled loops structured ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include <unordered_set>
+
+namespace llvm {
+  class Loop;
+  class LoopInfo;
+  class DominatorTree;
+  class BasicBlock;
+}
+
+namespace hlsl {
+
+  // exclude_set is a list of *EXIT BLOCKS* to exclude (NOTE: not *exiting* blocks)
+  bool RemoveUnstructuredLoopExits(llvm::Loop *L, llvm::LoopInfo *LI, llvm::DominatorTree *DT, std::unordered_set<llvm::BasicBlock *> *exclude_set = nullptr);
+}
+

+ 12 - 3
lib/Transforms/Scalar/LoopUnrollPass.cpp

@@ -31,6 +31,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/UnrollLoop.h"
+#include "DxilRemoveUnstructuredLoopExits.h" // HLSL Change
 #include <climits>
 
 using namespace llvm;
@@ -100,7 +101,7 @@ namespace {
   class LoopUnroll : public LoopPass {
   public:
     static char ID; // Pass ID, replacement for typeid
-    LoopUnroll(int T = -1, int C = -1, int P = -1, int R = -1) : LoopPass(ID) {
+    LoopUnroll(int T = -1, int C = -1, int P = -1, int R = -1, /*HLSL change*/bool StructurizeLoopExits=false) : LoopPass(ID) {
       CurrentThreshold = (T == -1) ? unsigned(UnrollThreshold) : unsigned(T);
       CurrentPercentDynamicCostSavedThreshold =
           UnrollPercentDynamicCostSavedThreshold;
@@ -120,6 +121,8 @@ namespace {
       UserCount = (C != -1) || (UnrollCount.getNumOccurrences() > 0);
 
       initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
+
+      this->StructurizeLoopExits = StructurizeLoopExits; // HLSL Change
     }
 
     /// A magic value for use with the Threshold parameter to indicate
@@ -142,6 +145,8 @@ namespace {
     bool CurrentAllowPartial;
     bool CurrentRuntime;
 
+    bool StructurizeLoopExits; // HLSL Change
+
     // Flags for whether the 'current' settings are user-specified.
     bool UserCount;
     bool UserThreshold;
@@ -164,6 +169,7 @@ namespace {
       AU.addRequiredID(LCSSAID);
       AU.addPreservedID(LCSSAID);
       AU.addRequired<ScalarEvolution>();
+      AU.addRequired<DominatorTreeWrapperPass>(); // HLSL Change
       AU.addPreserved<ScalarEvolution>();
       AU.addRequired<TargetTransformInfoWrapperPass>();
       // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
@@ -262,8 +268,8 @@ INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
 INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
 
 Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial,
-                                 int Runtime) {
-  return new LoopUnroll(Threshold, Count, AllowPartial, Runtime);
+                                 int Runtime, /* HLSL Change */ bool StructurizeLoopExits) {
+  return new LoopUnroll(Threshold, Count, AllowPartial, Runtime, /* HLSL Change */ StructurizeLoopExits);
 }
 
 Pass *llvm::createSimpleLoopUnrollPass() {
@@ -933,6 +939,9 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
     return false;
   }
 
+  if (StructurizeLoopExits) // HLSL Change
+    hlsl::RemoveUnstructuredLoopExits(L, LI, &getAnalysis<DominatorTreeWrapperPass>().getDomTree()); // HLSL Change
+
   // Unroll the loop.
   if (!UnrollLoop(L, Count, TripCount, AllowRuntime, UP.AllowExpensiveTripCount,
                   TripMultiple, LI, this, &LPM, &AC))

+ 5 - 0
tools/clang/lib/CodeGen/BackendUtil.cpp

@@ -335,6 +335,11 @@ void EmitAssemblyHelper::CreatePasses() {
 
   PMBuilder.EnableGVN = !CodeGenOpts.HLSLOptimizationToggles.count("gvn") ||
                         CodeGenOpts.HLSLOptimizationToggles.find("gvn")->second;
+
+  PMBuilder.StructurizeLoopExitsForUnroll =
+                        CodeGenOpts.HLSLOptimizationToggles.count("structurize-loop-exits-for-unroll") &&
+                        CodeGenOpts.HLSLOptimizationToggles.find("structurize-loop-exits-for-unroll")->second;
+
   // HLSL Change - end
 
   PMBuilder.DisableUnitAtATime = !CodeGenOpts.UnitAtATime;

+ 23 - 0
tools/clang/test/HLSLFileCheck/hlsl/control_flow/loops/_readme.txt

@@ -0,0 +1,23 @@
+There is a confirmation bias problem when testing debug info using file-check.
+Say your test file contains:
+
+  // RUN: %dxc -E main -T vs_6_0 -Zi %s | FileCheck %s
+  // CHECK: foo
+  void main() {}
+
+Due to /Zi, the !dx.source.contents metadata will be present and contain a string
+with the original source file. This means that the generated file contains your
+"// CHECK: foo", and hence the "foo" itself, so the check will succeed by default!
+
+The current workaround is to include the following in your test to explicitly match
+the quoted source file:
+
+  // Exclude quoted source file (see readme)
+  // CHECK-LABEL: {{!"[^"]*\\0A[^"]*"}}
+
+This will match a metadata string containing \0A (newline), which should only appear
+in the quoted source file. It will not match itself in the quoted source file because
+the regex won't match itself, and even less the escaped version of itself.
+
+Note that if you see a failure on that line, it means that something else before that
+CHECK failed to match.

+ 62 - 0
tools/clang/test/HLSLFileCheck/hlsl/control_flow/loops/struct_exit.hlsl

@@ -0,0 +1,62 @@
+// RUN: %dxc -E main -Zi -O3 -T ps_6_0 -opt-enable structurize-loop-exits-for-unroll %s | FileCheck %s
+// RUN: %dxc -E main -Zi -Od -T ps_6_0 -opt-enable structurize-loop-exits-for-unroll %s -DFORCE_UNROLL | FileCheck %s
+// RUN: %dxc -E main -Zi -T ps_6_0 %s -opt-enable structurize-loop-exits-for-unroll -DFORCE_UNROLL | FileCheck %s
+
+// CHECK: %{{.+}} = call float @dx.op.unary.f32(i32 13
+
+// CHECK: dx.struct_exit.cond_body
+// CHECK: store float
+
+// CHECK: %{{.+}} = call float @dx.op.unary.f32(i32 13
+
+// CHECK: dx.struct_exit.cond_body
+// CHECK: store float
+
+// CHECK: %{{.+}} = call float @dx.op.unary.f32(i32 13
+
+// CHECK: dx.struct_exit.cond_body
+// CHECK: store float
+
+#ifdef FORCE_UNROLL
+#define UNROLL [unroll]
+#else
+#define UNROLL
+#endif
+
+Texture2D tex0;
+RWTexture1D<float> uav0;
+RWTexture1D<float> uav1;
+
+const uint idx;
+
+[RootSignature("CBV(b0), DescriptorTable(SRV(t0)), DescriptorTable(UAV(u0), UAV(u1))")]
+float main(uint a : A, uint b : B, uint c : C) : SV_Target {
+
+  float ret = 0;
+  float array[] = {1.0, 2.0, 3.0,};
+
+  UNROLL for(uint i = 1; i <= 3; i++) {
+
+    if ((a * i) & c) {
+      ret += sin(i * b); // check for sin
+
+      if ((a * i) & b) {
+
+        if ((c | a) & b) {
+          // loop exit here:
+          uav0[i] += a;
+          return 1;
+        }
+
+        array[(idx + i) % 5] += a; // check that this side-effect is bounded within exit cond
+      }
+    }
+  }
+
+  return ret + array[0];
+}
+
+
+// Exclude quoted source file (see readme)
+// CHECK-LABEL: {{!"[^"]*\\0A[^"]*"}}
+

+ 53 - 0
tools/clang/test/HLSLFileCheck/hlsl/control_flow/loops/struct_exit_disable.hlsl

@@ -0,0 +1,53 @@
+// RUN: %dxc -E main -Zi -O3 -T ps_6_0 %s | FileCheck %s
+// RUN: %dxc -E main -Zi -Od -T ps_6_0 %s -DFORCE_UNROLL | FileCheck %s
+// RUN: %dxc -E main -Zi -T ps_6_0 %s -DFORCE_UNROLL | FileCheck %s
+
+// CHECK: %{{.+}} = call float @dx.op.unary.f32(i32 13
+// CHECK: %{{.+}} = call float @dx.op.unary.f32(i32 13
+// CHECK: %{{.+}} = call float @dx.op.unary.f32(i32 13
+
+// CHECK-NOT: dx.struct_exit
+
+#ifdef FORCE_UNROLL
+#define UNROLL [unroll]
+#else
+#define UNROLL
+#endif
+
+Texture2D tex0;
+RWTexture1D<float> uav0;
+RWTexture1D<float> uav1;
+
+const uint idx;
+
+[RootSignature("CBV(b0), DescriptorTable(SRV(t0)), DescriptorTable(UAV(u0), UAV(u1))")]
+float main(uint a : A, uint b : B, uint c : C) : SV_Target {
+
+  float ret = 0;
+  float array[] = {1.0, 2.0, 3.0,};
+
+  UNROLL for(uint i = 1; i <= 3; i++) {
+
+    if ((a * i) & c) {
+      ret += sin(i * b); // check for sin
+
+      if ((a * i) & b) {
+
+        if ((c | a) & b) {
+          // loop exit here:
+          uav0[i] += a;
+          return 1;
+        }
+
+        array[(idx + i) % 5] += a; // check that this side-effect is bounded within exit cond
+      }
+    }
+  }
+
+  return ret + array[0];
+}
+
+
+// Exclude quoted source file (see readme)
+// CHECK-LABEL: {{!"[^"]*\\0A[^"]*"}}
+

+ 63 - 0
tools/clang/test/HLSLFileCheck/hlsl/control_flow/loops/struct_exit_exit_value.hlsl

@@ -0,0 +1,63 @@
+// RUN: %dxc -Zi -E main -O3 -T ps_6_0 -opt-enable structurize-loop-exits-for-unroll %s | FileCheck %s
+// RUN: %dxc -Zi -E main -Od -T ps_6_0 -opt-enable structurize-loop-exits-for-unroll %s -DFORCE_UNROLL | FileCheck %s
+// RUN: %dxc -Zi -E main -T ps_6_0 -opt-enable structurize-loop-exits-for-unroll %s -DFORCE_UNROLL | FileCheck %s
+
+// CHECK: %{{.+}} = call float @dx.op.unary.f32(i32 13
+// CHECK: %{{.+}} = call float @dx.op.unary.f32(i32 13
+// CHECK: %{{.+}} = call float @dx.op.unary.f32(i32 13
+
+// Make sure we didn't transform
+// CHECK-NOT: dx.struct_exit
+
+#ifdef FORCE_UNROLL
+#define UNROLL [unroll]
+#else
+#define UNROLL
+#endif
+
+Texture2D tex0;
+RWTexture1D<float> uav0;
+RWTexture1D<float> uav1;
+
+const uint idx;
+
+[RootSignature("CBV(b0), DescriptorTable(SRV(t0)), DescriptorTable(UAV(u0), UAV(u1))")]
+float main(uint a : A, uint b : B, uint c : C) : SV_Target {
+
+  float ret = 0;
+  float array[] = {1.0, 2.0, 3.0,};
+
+  UNROLL for(uint i = 1; i <= 3; i++) {
+
+    if ((a * i) & c) {
+      ret += sin(i * b); // check for sin
+
+      if ((a * i) & b) {
+
+        int offset = 0; // This value doesn't dominate latch, is loop dependent,
+                        // and therefore must be propagated through to loop latch
+                        // so the hoisted loop exit can use it.
+                        //
+                        // We don't do this right now, so the transformation shouldn't
+                        // happen.
+        if (i % 2 == 0) {
+          offset = 1;
+        }
+
+        if ((c | a) & b) {
+          // loop exit here
+          uav0[i + offset] += a;
+          return 1;
+        }
+
+        array[(idx + i) % 5] += a;
+      }
+    }
+  }
+
+  return ret + array[0];
+}
+
+// Exclude quoted source file (see readme)
+// CHECK-LABEL: {{!"[^"]*\\0A[^"]*"}}
+

+ 58 - 0
tools/clang/test/HLSLFileCheck/hlsl/control_flow/loops/struct_exit_inner_loop.hlsl

@@ -0,0 +1,58 @@
+// RUN: %dxc -Zi -E main -O3 -T ps_6_0 -opt-enable structurize-loop-exits-for-unroll %s -DFORCE_UNROLL | FileCheck %s
+// RUN: %dxc -Zi -E main -Od -T ps_6_0 -opt-enable structurize-loop-exits-for-unroll %s -DFORCE_UNROLL | FileCheck %s
+// RUN: %dxc -Zi -E main -T ps_6_0 %s -opt-enable structurize-loop-exits-for-unroll -DFORCE_UNROLL | FileCheck %s
+
+// CHECK: %{{.+}} = call float @dx.op.unary.f32(i32 13
+// CHECK: %{{.+}} = call float @dx.op.unary.f32(i32 13
+// CHECK: %{{.+}} = call float @dx.op.unary.f32(i32 13
+
+// Make sure we didn't transform
+// CHECK-NOT: dx.struct_exit
+
+#ifdef FORCE_UNROLL
+#define UNROLL [unroll]
+#else
+#define UNROLL
+#endif
+
+Texture2D tex0;
+RWTexture1D<float> uav0;
+RWTexture1D<float> uav1;
+
+const uint idx;
+
+[RootSignature("CBV(b0), DescriptorTable(SRV(t0)), DescriptorTable(UAV(u0), UAV(u1))")]
+float main(uint a : A, uint b : B, uint c : C) : SV_Target {
+
+  float ret = 0;
+  float array[] = {1.0, 2.0, 3.0,};
+
+  UNROLL for(uint i = 1; i <= 3; i++) {
+
+    if ((a * i) & c) {
+      ret += sin(i * b); // check for sin
+
+      if ((a * i) & b) {
+        if ((c | a) & b) {
+          // loop exit here
+          uav0[i] += a;
+          return 1;
+        }
+
+        // This loop should prevent the struct_exit transformation
+        // from happening.
+        [loop] for (uint j = 0; j < 5; j++) {
+          uav1[j] += j*c;
+        }
+
+        array[(idx + i) % 5] += a;
+      }
+    }
+  }
+
+  return ret + array[0];
+}
+
+// Exclude quoted source file (see readme)
+// CHECK-LABEL: {{!"[^"]*\\0A[^"]*"}}
+

+ 67 - 0
tools/clang/test/HLSLFileCheck/hlsl/control_flow/loops/struct_exit_multi.hlsl

@@ -0,0 +1,67 @@
+// RUN: %dxc -E main -Zi -O3 -T ps_6_0 -opt-enable structurize-loop-exits-for-unroll %s | FileCheck %s
+// RUN: %dxc -E main -Zi -Od -T ps_6_0 -opt-enable structurize-loop-exits-for-unroll %s -DFORCE_UNROLL | FileCheck %s
+// RUN: %dxc -E main -Zi -T ps_6_0 %s -opt-enable structurize-loop-exits-for-unroll -DFORCE_UNROLL | FileCheck %s
+
+// CHECK: %{{.+}} = call float @dx.op.unary.f32(i32 13
+
+// CHECK: dx.struct_exit.cond_body
+// CHECK: store float
+
+// CHECK: %{{.+}} = call float @dx.op.unary.f32(i32 13
+
+// CHECK: dx.struct_exit.cond_body
+// CHECK: store float
+
+// CHECK: %{{.+}} = call float @dx.op.unary.f32(i32 13
+
+// CHECK: dx.struct_exit.cond_body
+// CHECK: store float
+
+#ifdef FORCE_UNROLL
+#define UNROLL [unroll]
+#else
+#define UNROLL
+#endif
+
+Texture2D tex0;
+RWTexture1D<float> uav0;
+RWTexture1D<float> uav1;
+
+const uint idx;
+
+[RootSignature("CBV(b0), DescriptorTable(SRV(t0)), DescriptorTable(UAV(u0), UAV(u1))")]
+float main(uint a : A, uint b : B, uint c : C) : SV_Target {
+
+  float ret = 0;
+  float array[] = {1.0, 2.0, 3.0,};
+
+  UNROLL for(uint i = 1; i <= 3; i++) {
+
+    if ((a * i) & c) {
+      ret += sin(i * b); // check for sin
+
+      if ((a * i) & b) {
+
+        if ((c | i) & b) {
+          // loop exit here:
+          uav0[i] += a;
+          return 1;
+        }
+
+        if ((c | i) & a) {
+          // loop exit here
+          uav0[i*2] += b;
+          return 2;
+        }
+
+        array[(idx + i) % 5] += a; // check that this side-effect is bounded within exit cond
+      }
+    }
+  }
+
+  return ret + array[0];
+}
+
+// Exclude quoted source file (see readme)
+// CHECK-LABEL: {{!"[^"]*\\0A[^"]*"}}
+

+ 63 - 0
tools/clang/test/HLSLFileCheck/hlsl/control_flow/loops/struct_exit_outer_loop.hlsl

@@ -0,0 +1,63 @@
+// RUN: %dxc -E main -Zi -O3 -T ps_6_0 -opt-enable structurize-loop-exits-for-unroll %s | FileCheck %s
+// RUN: %dxc -E main -Zi -Od -T ps_6_0 -opt-enable structurize-loop-exits-for-unroll %s -DFORCE_UNROLL | FileCheck %s
+// RUN: %dxc -E main -Zi -T ps_6_0 %s -opt-enable structurize-loop-exits-for-unroll -DFORCE_UNROLL | FileCheck %s
+
+// CHECK: %{{.+}} = call float @dx.op.unary.f32(i32 13
+
+// CHECK: dx.struct_exit.cond_body
+// CHECK: store float
+
+// CHECK: %{{.+}} = call float @dx.op.unary.f32(i32 13
+
+// CHECK: dx.struct_exit.cond_body
+// CHECK: store float
+
+// CHECK: %{{.+}} = call float @dx.op.unary.f32(i32 13
+
+// CHECK: dx.struct_exit.cond_body
+// CHECK: store float
+
+#ifdef FORCE_UNROLL
+#define UNROLL [unroll]
+#else
+#define UNROLL
+#endif
+
+Texture2D tex0;
+RWTexture1D<float> uav0;
+RWTexture1D<float> uav1;
+
+const uint idx;
+
+[RootSignature("CBV(b0), DescriptorTable(SRV(t0)), DescriptorTable(UAV(u0), UAV(u1))")]
+float main(uint a : A, uint b : B, uint c : C) : SV_Target {
+
+  float ret = 0;
+  float array[] = {1.0, 2.0, 3.0,};
+
+  [loop] for (uint j = 0; j < 2; j++) {
+    UNROLL for(uint i = 1; i <= 3; i++) {
+
+      if ((a * i) & c) {
+        ret += sin(i * b); // check for sin
+
+        if ((a * i) & b) {
+
+          if ((c | a) & b) {
+            // loop exit here:
+            uav0[i] += a;
+            return 1;
+          }
+
+          array[(idx + i) % 5] += a; // check that this side-effect is bounded within exit cond
+        }
+      }
+    }
+  }
+
+  return ret + array[0];
+}
+
+// Exclude quoted source file (see readme)
+// CHECK-LABEL: {{!"[^"]*\\0A[^"]*"}}
+