Jelajahi Sumber

Added -opt-disable sink to disable instruction sinking in instcombine (#5050)

In some cases, sinking instructions can significantly extend register lifetime. Added option -opt-disable sink in the same vein as -opt-disable "gvn" so shader authors can have some degree of control over this behaviour.
Adam Yang 2 tahun lalu
induk
melakukan
129024cd18

+ 1 - 1
include/dxc/HLSL/DxilGenerationPass.h

@@ -117,7 +117,7 @@ void initializeDxilValidateWaveSensitivityPass(llvm::PassRegistry&);
 FunctionPass *createCleanupDxBreakPass();
 void initializeCleanupDxBreakPass(llvm::PassRegistry&);
 
-FunctionPass *createDxilLoopDeletionPass();
+FunctionPass *createDxilLoopDeletionPass(bool NoSink);
 void initializeDxilLoopDeletionPass(llvm::PassRegistry &);
 
 ModulePass *createHLLegalizeParameter();

+ 2 - 0
include/llvm/Transforms/IPO/PassManagerBuilder.h

@@ -137,6 +137,8 @@ public:
   bool HLSLEnableLifetimeMarkers = false; // HLSL Change
   bool HLSLEnableDebugNops = false; // HLSL Change
   bool HLSLEarlyInlining = true; // HLSL Change
+  bool HLSLNoSink = false; // HLSL Change
+  void addHLSLPasses(legacy::PassManagerBase &MPM); // HLSL Change
 
 private:
   /// ExtensionList - This is list of all of the extensions that are registered.

+ 1 - 0
include/llvm/Transforms/Scalar.h

@@ -214,6 +214,7 @@ Pass *createIndVarSimplifyPass();
 //    %Z = add int 2, %X
 //
 FunctionPass *createInstructionCombiningPass();
+FunctionPass *createInstructionCombiningPass(bool HLSLSkipSinkSelect); // HLSL Change
 
 //===----------------------------------------------------------------------===//
 //

+ 14 - 4
lib/HLSL/DxilLoopDeletion.cpp

@@ -16,26 +16,36 @@
 #include "llvm/IR/Function.h"
 #include "dxc/HLSL/DxilGenerationPass.h"
 #include "llvm/IR/LegacyPassManager.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
 namespace {
   class DxilLoopDeletion : public FunctionPass {
   public:
+    bool m_HLSLNoSink = false;
     static char ID; // Pass ID, replacement for typeid
-    DxilLoopDeletion() : FunctionPass(ID) {
+    DxilLoopDeletion(bool NoSink=false) : FunctionPass(ID), m_HLSLNoSink(NoSink) {
     }
 
     bool runOnFunction(Function &F) override;
 
+    void applyOptions(PassOptions O) override {
+      GetPassOptionBool(O, "NoSink", &m_HLSLNoSink, /*defaultValue*/false);
+    }
+    void dumpConfig(raw_ostream &OS) override {
+      FunctionPass::dumpConfig(OS);
+      OS << ",NoSink=" << m_HLSLNoSink;
+    }
+
   };
 }
 
 char DxilLoopDeletion::ID = 0;
 INITIALIZE_PASS(DxilLoopDeletion, "dxil-loop-deletion",
-                "Delete dead loops", false, false)
+                "Dxil Delete dead loops", false, false)
 
-FunctionPass *llvm::createDxilLoopDeletionPass() { return new DxilLoopDeletion(); }
+FunctionPass *llvm::createDxilLoopDeletionPass(bool NoSink) { return new DxilLoopDeletion(NoSink); }
 
 bool DxilLoopDeletion::runOnFunction(Function &F) {
   // Run loop simplify first to make sure loop invariant is moved so loop
@@ -48,7 +58,7 @@ bool DxilLoopDeletion::runOnFunction(Function &F) {
   legacy::FunctionPassManager SimplifyPM(F.getParent());
   SimplifyPM.add(createCFGSimplificationPass());
   SimplifyPM.add(createDeadCodeEliminationPass());
-  SimplifyPM.add(createInstructionCombiningPass());
+  SimplifyPM.add(createInstructionCombiningPass(/*HLSL No sink*/m_HLSLNoSink));
 
   const unsigned kMaxIteration = 3;
   unsigned i=0;

+ 14 - 19
lib/Transforms/IPO/PassManagerBuilder.cpp

@@ -207,7 +207,7 @@ void PassManagerBuilder::populateFunctionPassManager(
 }
 
 // HLSL Change Starts
-static void addHLSLPasses(bool HLSLHighLevel, unsigned OptLevel, bool OnlyWarnOnUnrollFail, bool StructurizeLoopExitsForUnroll, bool EnableLifetimeMarkers, hlsl::HLSLExtensionsCodegenHelper *ExtHelper, legacy::PassManagerBase &MPM) {
+void PassManagerBuilder::addHLSLPasses(legacy::PassManagerBase &MPM) {
 
   // Don't do any lowering if we're targeting high-level.
   if (HLSLHighLevel) {
@@ -268,7 +268,7 @@ static void addHLSLPasses(bool HLSLHighLevel, unsigned OptLevel, bool OnlyWarnOn
     // Clean up inefficiencies that can cause unnecessary live values related to
     // lifetime marker cleanup blocks. This is the earliest possible location
     // without interfering with HLSL-specific lowering.
-    if (EnableLifetimeMarkers) {
+    if (HLSLEnableLifetimeMarkers) {
       MPM.add(createSROAPass());
       MPM.add(createSimplifyInstPass());
       MPM.add(createJumpThreadingPass());
@@ -287,7 +287,7 @@ static void addHLSLPasses(bool HLSLHighLevel, unsigned OptLevel, bool OnlyWarnOn
   // Verify no undef resource again after promotion
   MPM.add(createInvalidateUndefResourcesPass());
 
-  MPM.add(createDxilGenerationPass(NoOpt, ExtHelper));
+  MPM.add(createDxilGenerationPass(NoOpt, this->HLSLExtensionsCodeGen));
 
   // Propagate precise attribute.
   MPM.add(createDxilPrecisePropagatePass());
@@ -306,7 +306,7 @@ static void addHLSLPasses(bool HLSLHighLevel, unsigned OptLevel, bool OnlyWarnOn
   // struct members.
   // Needs to happen before resources are lowered and before HL
   // module is gone.
-  MPM.add(createDxilLoopUnrollPass(1024, OnlyWarnOnUnrollFail, StructurizeLoopExitsForUnroll));
+  MPM.add(createDxilLoopUnrollPass(1024, HLSLOnlyWarnOnUnrollFail, StructurizeLoopExitsForUnroll));
 
   // Default unroll pass. This is purely for optimizing loops without
   // attributes.
@@ -365,12 +365,7 @@ void PassManagerBuilder::populateModulePassManager(
     addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
 
     // HLSL Change Begins.
-    addHLSLPasses(HLSLHighLevel, OptLevel,
-      this->HLSLOnlyWarnOnUnrollFail,
-      this->StructurizeLoopExitsForUnroll,
-      this->HLSLEnableLifetimeMarkers,
-      this->HLSLExtensionsCodeGen,
-      MPM);
+    addHLSLPasses(MPM);
 
     if (!HLSLHighLevel) {
       MPM.add(createDxilConvergentClearPass());
@@ -414,7 +409,7 @@ void PassManagerBuilder::populateModulePassManager(
     MPM.add(Inliner);
     Inliner = nullptr;
   }
-  addHLSLPasses(HLSLHighLevel, OptLevel, this->HLSLOnlyWarnOnUnrollFail, this->StructurizeLoopExitsForUnroll, this->HLSLEnableLifetimeMarkers, HLSLExtensionsCodeGen, MPM); // HLSL Change
+  addHLSLPasses(MPM);
   // HLSL Change Ends
 
   // Add LibraryInfo if we have some.
@@ -431,7 +426,7 @@ void PassManagerBuilder::populateModulePassManager(
 
     MPM.add(createDeadArgEliminationPass());  // Dead argument elimination
 
-    MPM.add(createInstructionCombiningPass());// Clean up after IPCP & DAE
+    MPM.add(createInstructionCombiningPass(HLSLNoSink));// Clean up after IPCP & DAE
     addExtensionsToPM(EP_Peephole, MPM);
     MPM.add(createCFGSimplificationPass());   // Clean up after IPCP & DAE
   }
@@ -462,7 +457,7 @@ void PassManagerBuilder::populateModulePassManager(
   // HLSL Change. MPM.add(createJumpThreadingPass());         // Thread jumps.
   MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals
   MPM.add(createCFGSimplificationPass());     // Merge & remove BBs
-  MPM.add(createInstructionCombiningPass());  // Combine silly seq's
+  MPM.add(createInstructionCombiningPass(HLSLNoSink));  // Combine silly seq's
   addExtensionsToPM(EP_Peephole, MPM);
   // HLSL Change Begins.
   // HLSL does not allow recursize functions.
@@ -475,7 +470,7 @@ void PassManagerBuilder::populateModulePassManager(
   // HLSL Change - disable LICM in frontend for not consider register pressure.
   //MPM.add(createLICMPass());                  // Hoist loop invariants
   //MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3)); // HLSL Change - may move barrier inside divergent if.
-  MPM.add(createInstructionCombiningPass());
+  MPM.add(createInstructionCombiningPass(HLSLNoSink));
   MPM.add(createIndVarSimplifyPass());        // Canonicalize indvars
   // HLSL Change Begins
   // Don't allow loop idiom pass which may insert memset/memcpy thereby breaking the dxil
@@ -517,7 +512,7 @@ void PassManagerBuilder::populateModulePassManager(
 
   // Run instcombine after redundancy elimination to exploit opportunities
   // opened up by them.
-  MPM.add(createInstructionCombiningPass());
+  MPM.add(createInstructionCombiningPass(HLSLNoSink));
   addExtensionsToPM(EP_Peephole, MPM);
   // HLSL Change. MPM.add(createJumpThreadingPass());         // Thread jumps
   MPM.add(createCorrelatedValuePropagationPass());
@@ -557,7 +552,7 @@ void PassManagerBuilder::populateModulePassManager(
 
   MPM.add(createAggressiveDCEPass());         // Delete dead instructions
   MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
-  MPM.add(createInstructionCombiningPass());  // Clean up after everything.
+  MPM.add(createInstructionCombiningPass(HLSLNoSink));  // Clean up after everything.
   addExtensionsToPM(EP_Peephole, MPM);
 
   // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC
@@ -587,7 +582,7 @@ void PassManagerBuilder::populateModulePassManager(
   // on -O1 and no #pragma is found). Would be good to have these two passes
   // as function calls, so that we can only pass them when the vectorizer
   // changed the code.
-  MPM.add(createInstructionCombiningPass());
+  MPM.add(createInstructionCombiningPass(HLSLNoSink));
 #if HLSL_VECTORIZATION_ENABLED // HLSL Change - don't build vectorization passes
   if (OptLevel > 1 && ExtraVectorizerPasses) {
     // At higher optimization levels, try to clean up any runtime overlap and
@@ -631,14 +626,14 @@ void PassManagerBuilder::populateModulePassManager(
 
   addExtensionsToPM(EP_Peephole, MPM);
   MPM.add(createCFGSimplificationPass());
-  MPM.add(createDxilLoopDeletionPass()); // HLSL Change - try to delete loop again.
+  MPM.add(createDxilLoopDeletionPass(HLSLNoSink)); // HLSL Change - try to delete loop again.
   //MPM.add(createInstructionCombiningPass()); // HLSL Change - pass is included in above
 
   if (!DisableUnrollLoops) {
     MPM.add(createLoopUnrollPass(/* HLSL Change begin */-1, -1, -1, -1, this->StructurizeLoopExitsForUnroll /* HLSL Change end */));    // Unroll small loops
 
     // LoopUnroll may generate some redundency to cleanup.
-    MPM.add(createInstructionCombiningPass());
+    MPM.add(createInstructionCombiningPass(HLSLNoSink));
 
     // Runtime unrolling will introduce runtime check in loop prologue. If the
     // unrolled loop is a inner loop, then the prologue will be inside the

+ 2 - 0
lib/Transforms/InstCombine/InstCombineInternal.h

@@ -174,6 +174,8 @@ public:
   typedef IRBuilder<true, TargetFolder, InstCombineIRInserter> BuilderTy;
   BuilderTy *Builder;
 
+  bool m_HLSLNoSinks = false; // HLSL Change
+
 private:
   // Mode in which we are running the combiner.
   const bool MinimizeSize;

+ 27 - 3
lib/Transforms/InstCombine/InstructionCombining.cpp

@@ -2738,6 +2738,7 @@ bool InstCombiner::run() {
         // only has us as a predecessors (we'd have to split the critical edge
         // otherwise), we can keep going.
         if (UserIsSuccessor && UserParent->getSinglePredecessor()) {
+          if (!m_HLSLNoSinks) // HLSL Change
           // Okay, the CFG is simple enough, try to sink this instruction.
           if (TryToSinkInstruction(I, UserParent)) {
             MadeIRChange = true;
@@ -2979,7 +2980,7 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
 }
 
 static bool
-combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist,
+combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist, bool HLSLNoSink/*HLSL Change*/,
                                 AliasAnalysis *AA, AssumptionCache &AC,
                                 TargetLibraryInfo &TLI, DominatorTree &DT,
                                 LoopInfo *LI = nullptr) {
@@ -3009,6 +3010,7 @@ combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist,
 
     InstCombiner IC(Worklist, &Builder, MinimizeSize,
                     AA, &AC, &TLI, &DT, DL, LI);
+    IC.m_HLSLNoSinks = HLSLNoSink;
     if (IC.run())
       Changed = true;
 
@@ -3028,7 +3030,7 @@ PreservedAnalyses InstCombinePass::run(Function &F,
   auto *LI = AM->getCachedResult<LoopAnalysis>(F);
 
   // FIXME: The AliasAnalysis is not yet supported in the new pass manager
-  if (!combineInstructionsOverFunction(F, Worklist, nullptr, AC, TLI, DT, LI))
+  if (!combineInstructionsOverFunction(F, Worklist, /*HLSLNoSink*/false, nullptr, AC, TLI, DT, LI))
     // No changes, all analyses are preserved.
     return PreservedAnalyses::all();
 
@@ -3054,8 +3056,25 @@ public:
     initializeInstructionCombiningPassPass(*PassRegistry::getPassRegistry());
   }
 
+  // HLSL Change - begin
+  bool m_HLSLNoSink = false;
+  InstructionCombiningPass(bool HLSLNoSink) : FunctionPass(ID) {
+    initializeInstructionCombiningPassPass(*PassRegistry::getPassRegistry());
+    m_HLSLNoSink = HLSLNoSink;
+  }
+
+  void applyOptions(PassOptions O) override {
+    GetPassOptionBool(O, "NoSink", &m_HLSLNoSink, /*defaultValue*/false);
+  }
+  void dumpConfig(raw_ostream &OS) override {
+    FunctionPass::dumpConfig(OS);
+    OS << ",NoSink=" << m_HLSLNoSink;
+  }
+  // HLSL Change - end
+
   void getAnalysisUsage(AnalysisUsage &AU) const override;
   bool runOnFunction(Function &F) override;
+
 };
 }
 
@@ -3082,7 +3101,7 @@ bool InstructionCombiningPass::runOnFunction(Function &F) {
   auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
   auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
 
-  return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, LI);
+  return combineInstructionsOverFunction(F, Worklist, m_HLSLNoSink /*HLSL Change*/, AA, AC, TLI, DT, LI);
 }
 
 char InstructionCombiningPass::ID = 0;
@@ -3107,3 +3126,8 @@ void LLVMInitializeInstCombine(LLVMPassRegistryRef R) {
 FunctionPass *llvm::createInstructionCombiningPass() {
   return new InstructionCombiningPass();
 }
+// HLSL Change - begin
+FunctionPass *llvm::createInstructionCombiningPass(bool HLSLNoSink) {
+  return new InstructionCombiningPass(HLSLNoSink);
+}
+// HLSL Change - end

+ 3 - 0
tools/clang/lib/CodeGen/BackendUtil.cpp

@@ -340,6 +340,9 @@ void EmitAssemblyHelper::CreatePasses() {
   PMBuilder.EnableGVN = !CodeGenOpts.HLSLOptimizationToggles.count("gvn") ||
                         CodeGenOpts.HLSLOptimizationToggles.find("gvn")->second;
 
+  PMBuilder.HLSLNoSink = CodeGenOpts.HLSLOptimizationToggles.count("sink") &&
+                         !CodeGenOpts.HLSLOptimizationToggles.find("sink")->second;
+
   PMBuilder.StructurizeLoopExitsForUnroll =
                         !CodeGenOpts.HLSLOptimizationToggles.count("structurize-loop-exits-for-unroll") ||
                         CodeGenOpts.HLSLOptimizationToggles.find("structurize-loop-exits-for-unroll")->second;

+ 77 - 0
tools/clang/test/HLSLFileCheck/passes/llvm/instcombine/dead_loop_nosink.ll

@@ -0,0 +1,77 @@
+; RUN: %opt %s -dxil-loop-deletion,NoSink=1 -S | FileCheck %s
+
+; dxil-loop-deletion runs instcombine internally. Check the no sink flag is set correctly.
+; Use NoSink=1 to turn off instruction sinking in instcombine
+
+; CHECK: @main
+
+; Make sure loop is deleted.
+; CHECK-NOT: loop:
+
+; CHECK: %sel0 =
+
+; CHECK: if.1:
+; CHECK: %sel1 =
+
+; CHECK: if.2:
+; CHECK: %sel2 =
+
+; CHECK: if.3:
+; CHECK: %sel3 =
+
+; CHECK: if.4:
+; CHECK: %sel4 =
+
+; CHECK: if.5:
+; CHECK: %sel5 =
+
+; CHECK: if.6:
+; CHECK: %sel6 =
+
+define float @main(
+  float %a0_, float %a0, float %a1, float %a2, float %a3, float %a4, float %a5, float %a6,
+  i1 %cond0, i1 %cond1, i1 %cond2, i1 %cond3, i1 %cond4, i1 %cond5, i1 %cond6,
+  i1 %br0, i1 %br1, i1 %br2, i1 %br3, i1 %br4, i1 %br5, i1 %br6,
+  i32 %loop_bound)
+{
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %loop ]
+  %loop_cond = icmp slt i32 %i, %loop_bound
+  %i.inc = add i32 %i, 1
+  br i1 %loop_cond, label %loop, label %if.0
+
+if.0:
+  %sel0 = select i1 %cond0, float 0.0, float %a0
+  br i1 %br0, label %if.1, label %if.end
+
+if.1:
+  %sel1 = select i1 %cond1, float %sel0, float %a1
+  br i1 %br1, label %if.2, label %if.end
+
+if.2:
+  %sel2 = select i1 %cond2, float %sel1, float %a2
+  br i1 %br2, label %if.3, label %if.end
+
+if.3:
+  %sel3 = select i1 %cond3, float %sel2, float %a3
+  br i1 %br3, label %if.4, label %if.end
+
+if.4:
+  %sel4 = select i1 %cond4, float %sel3, float %a4
+  br i1 %br4, label %if.5, label %if.end
+
+if.5:
+  %sel5 = select i1 %cond5, float %sel4, float %a5
+  br i1 %br5, label %if.6, label %if.end
+
+if.6:
+  %sel6 = select i1 %cond6, float %sel5, float %a6
+  br label %if.end
+
+if.end:
+  %val = phi float [ %sel6, %if.6 ], [ 0.0, %if.0 ], [ 0.0, %if.1 ], [ 0.0, %if.2 ], [ 0.0, %if.3 ], [ 0.0, %if.4 ], [ 0.0, %if.5 ]
+  ret float %val
+}

+ 38 - 0
tools/clang/test/HLSLFileCheck/passes/llvm/instcombine/nosink.hlsl

@@ -0,0 +1,38 @@
+// RUN: %dxc %s /T ps_6_0 -opt-disable sink | FileCheck %s
+
+// Make sure the selects are NOT sunk into a single block
+
+cbuffer cb {
+    bool cond[6];
+    bool br[6];
+    float a[6];
+}
+
+float main() : SV_Target {
+    float val = 0;
+    float ret = 0;
+    // CHECK: br
+    if (br[0]) {
+        // CHECK: select
+        val = cond[0] ? 0 : a[0];
+        // CHECK: br
+        if (br[1]) {
+            // CHECK: select
+            val = cond[1] ? val : a[1];
+            // CHECK: br
+            if (br[2]) {
+                // CHECK: select
+                val = cond[2] ? val : a[2];
+                // CHECK: br
+                if (br[3]) {
+                    // CHECK: select
+                    val = cond[3] ? val : a[3];
+                    ret = val;
+                }
+            }
+        }
+    }
+    // CHECK: phi
+
+    return ret;
+}

+ 66 - 0
tools/clang/test/HLSLFileCheck/passes/llvm/instcombine/nosink.ll

@@ -0,0 +1,66 @@
+; RUN: %opt %s -instcombine,NoSink=1 -S | FileCheck %s
+
+; Use NoSink=1 to turn off instruction sinking in instcombine
+
+; CHECK: @main
+; CHECK: if.0:
+; CHECK: %sel0 =
+
+; CHECK: if.1:
+; CHECK: %sel1 =
+
+; CHECK: if.2:
+; CHECK: %sel2 =
+
+; CHECK: if.3:
+; CHECK: %sel3 =
+
+; CHECK: if.4:
+; CHECK: %sel4 =
+
+; CHECK: if.5:
+; CHECK: %sel5 =
+
+; CHECK: if.6:
+; CHECK: %sel6 =
+
+define float @main(
+  float %a0, float %a1, float %a2, float %a3, float %a4, float %a5, float %a6,
+  i1 %cond0, i1 %cond1, i1 %cond2, i1 %cond3, i1 %cond4, i1 %cond5, i1 %cond6,
+  i1 %br0, i1 %br1, i1 %br2, i1 %br3, i1 %br4, i1 %br5, i1 %br6)
+{
+entry:
+  br i1 %br0, label %if.0, label %if.end
+
+if.0:
+  %sel0 = select i1 %cond0, float 0.0, float %a0
+  br i1 %br0, label %if.1, label %if.end
+
+if.1:
+  %sel1 = select i1 %cond1, float %sel0, float %a1
+  br i1 %br1, label %if.2, label %if.end
+
+if.2:
+  %sel2 = select i1 %cond2, float %sel1, float %a2
+  br i1 %br2, label %if.3, label %if.end
+
+if.3:
+  %sel3 = select i1 %cond3, float %sel2, float %a3
+  br i1 %br3, label %if.4, label %if.end
+
+if.4:
+  %sel4 = select i1 %cond4, float %sel3, float %a4
+  br i1 %br4, label %if.5, label %if.end
+
+if.5:
+  %sel5 = select i1 %cond5, float %sel4, float %a5
+  br i1 %br5, label %if.6, label %if.end
+
+if.6:
+  %sel6 = select i1 %cond6, float %sel5, float %a6
+  br label %if.end
+
+if.end:
+  %val = phi float [ %sel6, %if.6 ], [ 0.0, %if.0 ], [ 0.0, %if.1 ], [ 0.0, %if.2 ], [ 0.0, %if.3 ], [ 0.0, %if.4 ], [ 0.0, %if.5 ], [ 0.0, %entry ]
+  ret float %val
+}

+ 39 - 0
tools/clang/test/HLSLFileCheck/passes/llvm/instcombine/sink.hlsl

@@ -0,0 +1,39 @@
+// RUN: %dxc %s /T ps_6_0 -opt-enable sink | FileCheck %s
+// RUN: %dxc %s /T ps_6_0                  | FileCheck %s
+
+// Make sure the selects are sunk into a single block
+
+cbuffer cb {
+    bool cond[6];
+    bool br[6];
+    float a[6];
+}
+
+float main() : SV_Target {
+    float val = 0;
+    float ret = 0;
+    // CHECK: br
+    if (br[0]) {
+        val = cond[0] ? 0 : a[0];
+        // CHECK: br
+        if (br[1]) {
+            val = cond[1] ? val : a[1];
+            // CHECK: br
+            if (br[2]) {
+                val = cond[2] ? val : a[2];
+                // CHECK: br
+                if (br[3]) {
+                    // CHECK: select
+                    // CHECK: select
+                    // CHECK: select
+                    // CHECK: select
+                    val = cond[3] ? val : a[3];
+                    ret = val;
+                }
+            }
+        }
+    }
+    // CHECK: phi
+
+    return ret;
+}

+ 55 - 0
tools/clang/test/HLSLFileCheck/passes/llvm/instcombine/sink.ll

@@ -0,0 +1,55 @@
+; RUN: %opt %s -instcombine -S | FileCheck %s
+
+; Check that sel0 - sel6 are sank into if.6
+
+; CHECK: @main
+
+; CHECK: if.6:
+; CHECK-NEXT: %sel0 =
+; CHECK-NEXT: %sel1 =
+; CHECK-NEXT: %sel2 =
+; CHECK-NEXT: %sel3 =
+; CHECK-NEXT: %sel4 =
+; CHECK-NEXT: %sel5 =
+; CHECK-NEXT: %sel6 =
+
+define float @main(
+  float %a0, float %a1, float %a2, float %a3, float %a4, float %a5, float %a6,
+  i1 %cond0, i1 %cond1, i1 %cond2, i1 %cond3, i1 %cond4, i1 %cond5, i1 %cond6,
+  i1 %br0, i1 %br1, i1 %br2, i1 %br3, i1 %br4, i1 %br5, i1 %br6)
+{
+entry:
+  br i1 %br0, label %if.0, label %if.end
+
+if.0:
+  %sel0 = select i1 %cond0, float 0.0, float %a0
+  br i1 %br0, label %if.1, label %if.end
+
+if.1:
+  %sel1 = select i1 %cond1, float %sel0, float %a1
+  br i1 %br1, label %if.2, label %if.end
+
+if.2:
+  %sel2 = select i1 %cond2, float %sel1, float %a2
+  br i1 %br2, label %if.3, label %if.end
+
+if.3:
+  %sel3 = select i1 %cond3, float %sel2, float %a3
+  br i1 %br3, label %if.4, label %if.end
+
+if.4:
+  %sel4 = select i1 %cond4, float %sel3, float %a4
+  br i1 %br4, label %if.5, label %if.end
+
+if.5:
+  %sel5 = select i1 %cond5, float %sel4, float %a5
+  br i1 %br5, label %if.6, label %if.end
+
+if.6:
+  %sel6 = select i1 %cond6, float %sel5, float %a6
+  br label %if.end
+
+if.end:
+  %val = phi float [ %sel6, %if.6 ], [ 0.0, %if.0 ], [ 0.0, %if.1 ], [ 0.0, %if.2 ], [ 0.0, %if.3 ], [ 0.0, %if.4 ], [ 0.0, %if.5 ], [ 0.0, %entry ]
+  ret float %val
+}

+ 35 - 0
tools/clang/unittests/HLSL/CompilerTest.cpp

@@ -180,6 +180,7 @@ public:
   TEST_METHOD(CompileWhenIncludeEmptyThenOK)
 
   TEST_METHOD(CompileWhenODumpThenPassConfig)
+  TEST_METHOD(CompileWhenODumpThenCheckNoSink)
   TEST_METHOD(CompileWhenODumpThenOptimizerMatch)
   TEST_METHOD(CompileWhenVdThenProducesDxilContainer)
 
@@ -2963,6 +2964,40 @@ TEST_F(CompilerTest, CompileWhenIncludeEmptyThenOK) {
 
 static const char EmptyCompute[] = "[numthreads(8,8,1)] void main() { }";
 
+TEST_F(CompilerTest, CompileWhenODumpThenCheckNoSink) {
+  struct Check {
+    std::vector<const WCHAR *> Args;
+    std::vector<const WCHAR *> Passes;
+  };
+
+  Check Checks[] = {
+    { {L"-Odump"},                      {L"-instcombine,NoSink=0",L"-dxil-loop-deletion,NoSink=0"} },
+    { {L"-Odump",L"-opt-disable sink"}, {L"-instcombine,NoSink=1",L"-dxil-loop-deletion,NoSink=1"} },
+  };
+
+  for (Check &C : Checks) {
+
+    CComPtr<IDxcCompiler> pCompiler;
+    CComPtr<IDxcOperationResult> pResult;
+    CComPtr<IDxcBlobEncoding> pSource;
+
+    VERIFY_SUCCEEDED(CreateCompiler(&pCompiler));
+    CreateBlobFromText(EmptyCompute, &pSource);
+
+    VERIFY_SUCCEEDED(pCompiler->Compile(pSource, L"source.hlsl", L"main",
+      L"cs_6_0", C.Args.data(), C.Args.size(), nullptr, 0, nullptr, &pResult));
+
+    VerifyOperationSucceeded(pResult);
+    CComPtr<IDxcBlob> pResultBlob;
+    VERIFY_SUCCEEDED(pResult->GetResult(&pResultBlob));
+    wstring passes = BlobToWide(pResultBlob);
+
+    for (const WCHAR *pPattern : C.Passes) {
+      VERIFY_ARE_NOT_EQUAL(wstring::npos, passes.find(pPattern));
+    }
+  }
+}
+
 TEST_F(CompilerTest, CompileWhenODumpThenPassConfig) {
   CComPtr<IDxcCompiler> pCompiler;
   CComPtr<IDxcOperationResult> pResult;

+ 6 - 1
utils/hct/hctdb.py

@@ -2218,7 +2218,9 @@ class db_dxil(object):
         add_pass('deadargelim', 'DAE', 'Dead Argument Elimination', [])
         # Should we get rid of this, or invest in bugpoint support?
         add_pass('deadarghaX0r', 'DAH', 'Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)', [])
-        add_pass('instcombine', 'InstructionCombiningPass', 'Combine redundant instructions', [])
+        add_pass('instcombine', 'InstructionCombiningPass', 'Combine redundant instructions', [
+            {'n':'NoSink', 't':'bool', 'c':1},
+        ])
         add_pass('prune-eh', 'PruneEH', 'Remove unused exception handling info', [])
         add_pass('functionattrs', 'FunctionAttrs', 'Deduce function attributes', [])
         # add_pass('argpromotion', 'ArgPromotion', "Promote 'by reference' arguments to scalars", [
@@ -2248,6 +2250,9 @@ class db_dxil(object):
         add_pass('dxil-remove-dead-blocks', 'DxilRemoveDeadBlocks', 'DxilRemoveDeadBlocks', [])
         add_pass('dxil-o0-legalize', 'DxilNoOptLegalize', 'DXIL No-Opt Legalize', [])
         add_pass('dxil-o0-simplify-inst', 'DxilNoOptSimplifyInstructions', 'DXIL No-Opt Simplify Inst', [])
+        add_pass('dxil-loop-deletion', 'DxilLoopDeletion', "Dxil Delete dead loops", [
+            {'n':'NoSink', 't':'bool', 'c':1},
+        ])
         add_pass('loop-deletion', 'LoopDeletion', "Delete dead loops", [])
         add_pass('loop-interchange', 'LoopInterchange', 'Interchanges loops for cache reuse', [])
         add_pass('loop-unroll', 'LoopUnroll', 'Unroll loops', [