Ver Fonte

Loop count for all trivial form loops. Mem2Reg only when necessary. (#2250)


* Comments and small adjustments

* Deleting loops from scalar evolution correctly

* Better error/warning message
Adam Yang há 6 anos atrás
pai
commit
e32833cac7

+ 1 - 0
include/llvm/InitializePasses.h

@@ -261,6 +261,7 @@ void initializeResourceToHandlePass(PassRegistry&);
 void initializeSROA_SSAUp_HLSLPass(PassRegistry&);
 void initializeHoistConstantArrayPass(PassRegistry&);
 void initializeDxilLoopUnrollPass(PassRegistry&);
+void initializeDxilConditionalMem2RegPass(PassRegistry&);
 void initializeDxilFixConstArrayInitializerPass(PassRegistry&);
 // HLSL Change Ends
 void initializeScalarEvolutionAliasAnalysisPass(PassRegistry&);

+ 3 - 0
include/llvm/Transforms/Scalar.h

@@ -134,6 +134,9 @@ void initializeSROA_Parameter_HLSLPass(PassRegistry&);
 Pass *createDxilFixConstArrayInitializerPass();
 void initializeDxilFixConstArrayInitializerPass(PassRegistry&);
 
+Pass *createDxilConditionalMem2RegPass(bool NoOpt);
+void initializeDxilConditionalMem2RegPass(PassRegistry&);
+
 Pass *createDxilLoopUnrollPass(unsigned MaxIterationAttempt);
 void initializeDxilLoopUnrollPass(PassRegistry&);
 //===----------------------------------------------------------------------===//

+ 6 - 0
lib/HLSL/DxcOptimizer.cpp

@@ -87,6 +87,7 @@ HRESULT SetupRegistryPassForHLSL() {
     initializeDxilAllocateResourcesForLibPass(Registry);
     initializeDxilCleanupAddrSpaceCastPass(Registry);
     initializeDxilCondenseResourcesPass(Registry);
+    initializeDxilConditionalMem2RegPass(Registry);
     initializeDxilConvergentClearPass(Registry);
     initializeDxilConvergentMarkPass(Registry);
     initializeDxilDeadFunctionEliminationPass(Registry);
@@ -190,6 +191,7 @@ static ArrayRef<LPCSTR> GetPassArgNames(LPCSTR passName) {
   static const LPCSTR ArgPromotionArgs[] = { "maxElements" };
   static const LPCSTR CFGSimplifyPassArgs[] = { "Threshold", "Ftor", "bonus-inst-threshold" };
   static const LPCSTR DxilAddPixelHitInstrumentationArgs[] = { "force-early-z", "add-pixel-cost", "rt-width", "sv-position-index", "num-pixels" };
+  static const LPCSTR DxilConditionalMem2RegArgs[] = { "NoOpt" };
   static const LPCSTR DxilDebugInstrumentationArgs[] = { "UAVSize", "parameter0", "parameter1", "parameter2" };
   static const LPCSTR DxilGenerationPassArgs[] = { "NotOptimized" };
   static const LPCSTR DxilOutputColorBecomesConstantArgs[] = { "mod-mode", "constant-red", "constant-green", "constant-blue", "constant-alpha" };
@@ -223,6 +225,7 @@ static ArrayRef<LPCSTR> GetPassArgNames(LPCSTR passName) {
   if (strcmp(passName, "argpromotion") == 0) return ArrayRef<LPCSTR>(ArgPromotionArgs, _countof(ArgPromotionArgs));
   if (strcmp(passName, "simplifycfg") == 0) return ArrayRef<LPCSTR>(CFGSimplifyPassArgs, _countof(CFGSimplifyPassArgs));
   if (strcmp(passName, "hlsl-dxil-add-pixel-hit-instrmentation") == 0) return ArrayRef<LPCSTR>(DxilAddPixelHitInstrumentationArgs, _countof(DxilAddPixelHitInstrumentationArgs));
+  if (strcmp(passName, "dxil-cond-mem2reg") == 0) return ArrayRef<LPCSTR>(DxilConditionalMem2RegArgs, _countof(DxilConditionalMem2RegArgs));
   if (strcmp(passName, "hlsl-dxil-debug-instrumentation") == 0) return ArrayRef<LPCSTR>(DxilDebugInstrumentationArgs, _countof(DxilDebugInstrumentationArgs));
   if (strcmp(passName, "dxilgen") == 0) return ArrayRef<LPCSTR>(DxilGenerationPassArgs, _countof(DxilGenerationPassArgs));
   if (strcmp(passName, "hlsl-dxil-constantColor") == 0) return ArrayRef<LPCSTR>(DxilOutputColorBecomesConstantArgs, _countof(DxilOutputColorBecomesConstantArgs));
@@ -263,6 +266,7 @@ static ArrayRef<LPCSTR> GetPassArgDescriptions(LPCSTR passName) {
   static const LPCSTR ArgPromotionArgs[] = { "None" };
   static const LPCSTR CFGSimplifyPassArgs[] = { "None", "None", "Control the number of bonus instructions (default = 1)" };
   static const LPCSTR DxilAddPixelHitInstrumentationArgs[] = { "None", "None", "None", "None", "None" };
+  static const LPCSTR DxilConditionalMem2RegArgs[] = { "None" };
   static const LPCSTR DxilDebugInstrumentationArgs[] = { "None", "None", "None", "None" };
   static const LPCSTR DxilGenerationPassArgs[] = { "None" };
   static const LPCSTR DxilOutputColorBecomesConstantArgs[] = { "None", "None", "None", "None", "None" };
@@ -296,6 +300,7 @@ static ArrayRef<LPCSTR> GetPassArgDescriptions(LPCSTR passName) {
   if (strcmp(passName, "argpromotion") == 0) return ArrayRef<LPCSTR>(ArgPromotionArgs, _countof(ArgPromotionArgs));
   if (strcmp(passName, "simplifycfg") == 0) return ArrayRef<LPCSTR>(CFGSimplifyPassArgs, _countof(CFGSimplifyPassArgs));
   if (strcmp(passName, "hlsl-dxil-add-pixel-hit-instrmentation") == 0) return ArrayRef<LPCSTR>(DxilAddPixelHitInstrumentationArgs, _countof(DxilAddPixelHitInstrumentationArgs));
+  if (strcmp(passName, "dxil-cond-mem2reg") == 0) return ArrayRef<LPCSTR>(DxilConditionalMem2RegArgs, _countof(DxilConditionalMem2RegArgs));
   if (strcmp(passName, "hlsl-dxil-debug-instrumentation") == 0) return ArrayRef<LPCSTR>(DxilDebugInstrumentationArgs, _countof(DxilDebugInstrumentationArgs));
   if (strcmp(passName, "dxilgen") == 0) return ArrayRef<LPCSTR>(DxilGenerationPassArgs, _countof(DxilGenerationPassArgs));
   if (strcmp(passName, "hlsl-dxil-constantColor") == 0) return ArrayRef<LPCSTR>(DxilOutputColorBecomesConstantArgs, _countof(DxilOutputColorBecomesConstantArgs));
@@ -340,6 +345,7 @@ static bool IsPassOptionName(StringRef S) {
     ||  S.equals("InlineThreshold")
     ||  S.equals("InsertLifetime")
     ||  S.equals("MaxHeaderSize")
+    ||  S.equals("NoOpt")
     ||  S.equals("NotOptimized")
     ||  S.equals("Os")
     ||  S.equals("ReplaceAllVectors")

+ 6 - 4
lib/Transforms/IPO/PassManagerBuilder.cpp

@@ -252,10 +252,12 @@ static void addHLSLPasses(bool HLSLHighLevel, unsigned OptLevel, hlsl::HLSLExten
   // Change dynamic indexing vector to array.
   MPM.add(createDynamicIndexingVectorToArrayPass(NoOpt));
 
-  if (!NoOpt) {
-    // mem2reg
-    MPM.add(createPromoteMemoryToRegisterPass());
+  // mem2reg
+  // Special Mem2Reg pass that only happens if optimization is
+  // enabled or loop unroll is needed.
+  MPM.add(createDxilConditionalMem2RegPass(NoOpt));
 
+  if (!NoOpt) {
     MPM.add(createDxilConvergentMarkPass());
   }
 
@@ -269,7 +271,7 @@ static void addHLSLPasses(bool HLSLHighLevel, unsigned OptLevel, hlsl::HLSLExten
   // Needs to happen before resources are lowered and before HL
   // module is gone.
   MPM.add(createLoopRotatePass());
-  MPM.add(createDxilLoopUnrollPass(/*MaxIterationAttempt*/ 128));
+  MPM.add(createDxilLoopUnrollPass(1024));
 
   // Default unroll pass. This is purely for optimizing loops without
   // attributes.

+ 122 - 15
lib/Transforms/Scalar/DxilLoopUnroll.cpp

@@ -60,6 +60,7 @@
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/Local.h"
@@ -74,6 +75,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/IR/LegacyPassManager.h"
 
 #include "dxc/DXIL/DxilUtil.h"
 #include "dxc/HLSL/HLModule.h"
@@ -110,7 +112,7 @@ public:
   std::unordered_set<Function *> CleanedUpAlloca;
   unsigned MaxIterationAttempt = 0;
 
-  DxilLoopUnroll(unsigned MaxIterationAttempt = 128) :
+  DxilLoopUnroll(unsigned MaxIterationAttempt = 1024) :
     LoopPass(ID),
     MaxIterationAttempt(MaxIterationAttempt)
   {
@@ -120,16 +122,17 @@ public:
   bool runOnLoop(Loop *L, LPPassManager &LPM) override;
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<LoopInfoWrapperPass>();
-    AU.addRequiredID(LoopSimplifyID);
     AU.addRequired<AssumptionCacheTracker>();
     AU.addRequired<DominatorTreeWrapperPass>();
     AU.addPreserved<DominatorTreeWrapperPass>();
+    AU.addRequired<ScalarEvolution>();
+    AU.addRequiredID(LoopSimplifyID);
   }
 };
 
 char DxilLoopUnroll::ID;
 
-static void FailLoopUnroll(bool WarnOnly, LLVMContext &Ctx, DebugLoc DL, const char *Message) {
+static void FailLoopUnroll(bool WarnOnly, LLVMContext &Ctx, DebugLoc DL, const Twine &Message) {
   if (WarnOnly) {
     if (DL)
       Ctx.emitWarning(hlsl::dxilutil::FormatMessageAtLocation(DL, Message));
@@ -684,6 +687,7 @@ bool DxilLoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
 
   DebugLoc LoopLoc = L->getStartLoc(); // Debug location for the start of the loop.
   Function *F = L->getHeader()->getParent();
+  ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();
 
   bool HasExplicitLoopCount = false;
   int ExplicitUnrollCountSigned = 0;
@@ -714,6 +718,18 @@ bool DxilLoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
     FxcCompatMode = HM.GetHLOptions().bFXCCompatMode;
   }
 
+  unsigned TripCount = 0;
+  unsigned TripMultiple = 0;
+  bool HasTripCount = false;
+  BasicBlock *ExitingBlock = L->getLoopLatch();
+  if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
+    ExitingBlock = L->getExitingBlock();
+  if (ExitingBlock) {
+    TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
+    TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
+    HasTripCount = TripMultiple != 1 || TripCount == 1;
+  }
+
   // Analysis passes
   DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   AssumptionCache *AC =
@@ -736,12 +752,6 @@ bool DxilLoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
     return false;
   }
 
-  // Promote alloca's
-  if (!CleanedUpAlloca.count(F)) {
-    CleanedUpAlloca.insert(F);
-    Mem2Reg(*F, *DT, *AC);
-  }
-
   SmallVector<BasicBlock *, 16> ExitBlocks;
   L->getExitBlocks(ExitBlocks);
   std::unordered_set<BasicBlock *> ExitBlockSet(ExitBlocks.begin(), ExitBlocks.end());
@@ -839,9 +849,15 @@ bool DxilLoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   SmallVector<std::unique_ptr<LoopIteration>, 16> Iterations; // List of cloned iterations
   bool Succeeded = false;
 
-  if (HasExplicitLoopCount) {
-    this->MaxIterationAttempt = std::max(this->MaxIterationAttempt, ExplicitUnrollCount);
+  // If we were able to figure out the definitive trip count,
+  // just unroll that many times.
+  if (HasTripCount) {
+    this->MaxIterationAttempt = TripCount;
   }
+  else if (HasExplicitLoopCount) {
+    this->MaxIterationAttempt = ExplicitUnrollCount;
+  }
+
   for (unsigned IterationI = 0; IterationI < this->MaxIterationAttempt; IterationI++) {
 
     LoopIteration *PrevIteration = nullptr;
@@ -957,7 +973,9 @@ bool DxilLoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
     }
 
     // We've reached the N defined in [unroll(N)]
-    if (HasExplicitLoopCount && IterationI+1 >= ExplicitUnrollCount) {
+    if ((HasExplicitLoopCount && IterationI+1 >= ExplicitUnrollCount) ||
+      (HasTripCount && IterationI+1 >= TripCount))
+    {
       Succeeded = true;
       BranchInst *BI = cast<BranchInst>(CurIteration.Latch->getTerminator());
 
@@ -1024,6 +1042,8 @@ bool DxilLoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
       }
     }
 
+    SE->forgetLoop(L);
+
     // Remove the original blocks that we've cloned from all loops.
     for (BasicBlock *BB : ToBeCloned)
       LI->removeBlock(BB);
@@ -1061,9 +1081,16 @@ bool DxilLoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
 
   // If we were unsuccessful in unrolling the loop
   else {
-    FailLoopUnroll(FxcCompatMode /*warn only*/, F->getContext(), LoopLoc,
-      "Could not unroll loop. Loop bound could not be deduced at compile time. "
-      "To give an explicit unroll bound, use unroll(n).");
+    const char *Msg =
+        "Could not unroll loop. Loop bound could not be deduced at compile time. "
+        "Use [unroll(n)] to give an explicit count.";
+    if (FxcCompatMode) {
+      FailLoopUnroll(true /*warn only*/, F->getContext(), LoopLoc, Msg);
+    }
+    else {
+      FailLoopUnroll(false /*warn only*/, F->getContext(), LoopLoc,
+        Twine(Msg) + Twine(" Use '-HV 2016' to treat this as warning."));
+    }
 
     // Remove all the cloned blocks
     for (std::unique_ptr<LoopIteration> &Ptr : Iterations) {
@@ -1088,8 +1115,88 @@ bool DxilLoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
 
 }
 
+// Special Mem2Reg pass
+//
+// In order to figure out loop bounds to unroll, we must first run mem2reg pass
+// on the function, but we don't want to run mem2reg on functions that don't
+// have to be unrolled when /Od is given. This pass considers all these
+// conditions and runs mem2reg on functions only when needed.
+//
+class DxilConditionalMem2Reg : public FunctionPass {
+public:
+  static char ID;
+
+  // Function overrides that resolve options when used for DxOpt
+  void applyOptions(PassOptions O) {
+    GetPassOptionBool(O, "NoOpt", &NoOpt, false);
+  }
+  void dumpConfig(raw_ostream &OS) {
+    FunctionPass::dumpConfig(OS);
+    OS << ",NoOpt=" << NoOpt;
+  }
+
+  bool NoOpt = false;
+  explicit DxilConditionalMem2Reg(bool NoOpt=false) : FunctionPass(ID), NoOpt(NoOpt)
+  {
+    initializeDxilConditionalMem2RegPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<LoopInfoWrapperPass>();
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<AssumptionCacheTracker>();
+    AU.addRequiredID(LoopSimplifyID);
+    AU.setPreservesCFG();
+  }
+
+  // Recursively find loops that are marked with [unroll]
+  static bool HasLoopsMarkedUnrollRecursive(Loop *L) {
+    int Count = 0;
+    if (IsMarkedFullUnroll(L) || IsMarkedUnrollCount(L, &Count)) {
+      return true;
+    }
+    for (Loop *ChildLoop : *L) {
+      if (HasLoopsMarkedUnrollRecursive(ChildLoop))
+        return true;
+    }
+    return false;
+  }
+
+  bool runOnFunction(Function &F) {
+    LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+    DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+    AssumptionCache *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+
+    bool NeedPromote = false;
+    bool Changed = false;
+
+    if (NoOpt) {
+      // If any of the functions are marked as full unroll.
+      for (Loop *L : *LI) {
+        if (HasLoopsMarkedUnrollRecursive(L)) {
+          NeedPromote = true;
+          break;
+        }
+      }
+    }
+    else {
+      NeedPromote = true;
+    }
+
+    if (NeedPromote)
+      Changed |= Mem2Reg(F, *DT, *AC);
+
+    return Changed;
+  }
+};
+char DxilConditionalMem2Reg::ID;
+
+Pass *llvm::createDxilConditionalMem2RegPass(bool NoOpt) {
+  return new DxilConditionalMem2Reg(NoOpt);
+}
 Pass *llvm::createDxilLoopUnrollPass(unsigned MaxIterationAttempt) {
   return new DxilLoopUnroll(MaxIterationAttempt);
 }
 
+INITIALIZE_PASS(DxilConditionalMem2Reg, "dxil-cond-mem2reg", "Dxil Conditional Mem2Reg", false, false)
 INITIALIZE_PASS(DxilLoopUnroll, "dxil-loop-unroll", "Dxil Unroll loops", false, false)

+ 2 - 2
tools/clang/test/CodeGenHLSL/batch/declarations/precise/matrix.hlsl

@@ -1,4 +1,4 @@
-// RUN: %dxc -E main -T vs_6_0 %s | FileCheck %s | XFail GitHub #2080
+// RUN: %dxc -E main -T vs_6_0 %s | FileCheck %s
 
 // Test that precise modifier on a matrix has an effect.
 
@@ -16,4 +16,4 @@ float2x2 main(float2x2 m : IN) : OUT
 {
   precise float2x2 result = m * m;
   return result;
-}
+}

+ 38 - 0
tools/clang/test/CodeGenHLSL/batch/unroll/big_step.hlsl

@@ -0,0 +1,38 @@
+// RUN: %dxc -Od -E main -T ps_6_0 %s | FileCheck %s
+// CHECK: @main
+// CHECK: @dx.op.unary.f32(i32 13
+// CHECK: @dx.op.unary.f32(i32 13
+// CHECK: @dx.op.unary.f32(i32 13
+// CHECK: @dx.op.unary.f32(i32 13
+
+// CHECK: @dx.op.unary.f32(i32 13
+// CHECK: @dx.op.unary.f32(i32 13
+// CHECK: @dx.op.unary.f32(i32 13
+// CHECK: @dx.op.unary.f32(i32 13
+
+// CHECK: @dx.op.unary.f32(i32 13
+// CHECK: @dx.op.unary.f32(i32 13
+// CHECK: @dx.op.unary.f32(i32 13
+// CHECK: @dx.op.unary.f32(i32 13
+
+// CHECK: @dx.op.unary.f32(i32 13
+// CHECK: @dx.op.unary.f32(i32 13
+// CHECK: @dx.op.unary.f32(i32 13
+// CHECK: @dx.op.unary.f32(i32 13
+
+// CHECK-NOT: @dx.op.unary.f32(i32 13
+
+// Confirm that loops with greater than 1 step should be able to be unrolled
+
+[RootSignature("")]
+float main(float y : Y) : SV_Target {
+  float x = 0;
+
+  static const uint kLoopCount = 512;
+
+  [unroll]
+  for (uint i = 0; i < kLoopCount; i += 32) {
+    x = sin(x * x + y);
+  }
+  return x;
+}

+ 33 - 0
tools/clang/test/CodeGenHLSL/batch/unroll/big_step_non_trivial.hlsl

@@ -0,0 +1,33 @@
+// RUN: %dxc -Od -E main -T ps_6_0 %s | FileCheck %s
+// CHECK: @main
+
+// CHECK: @dx.op.unary.f32(i32 13
+// CHECK: @dx.op.unary.f32(i32 13
+// CHECK: @dx.op.unary.f32(i32 13
+// CHECK: @dx.op.unary.f32(i32 13
+
+// CHECK: @dx.op.unary.f32(i32 13
+// CHECK: @dx.op.unary.f32(i32 13
+// CHECK: @dx.op.unary.f32(i32 13
+// CHECK: @dx.op.unary.f32(i32 13
+
+// CHECK-NOT: @dx.op.unary.f32(i32 13
+
+// Confirm that loops with fairly complex exit conditions
+// should be able to be unrolled
+
+[RootSignature("")]
+float main(float y : Y) : SV_Target {
+  float x = 0;
+
+  static const uint kLoopCount = 512;
+
+  int j = 10;
+  [unroll]
+  for (uint i = 0; i < kLoopCount && j > 2; i += 16) {
+    x = sin(x * x + y);
+    i -= 8;
+    j -= 1;
+  }
+  return x;
+}

+ 1 - 0
tools/clang/test/CodeGenHLSL/batch/unroll/fail.hlsl

@@ -1,5 +1,6 @@
 // RUN: %dxc -Od -E main -T ps_6_0 %s | FileCheck %s
 // CHECK-DAG: Could not unroll loop.
+// CHECK-DAG: -HV 2016
 // CHECK-NOT: @main
 
 // Check that the compilation fails due to unable to

+ 3 - 3
tools/clang/test/CodeGenHLSL/batch/unroll/large_count.hlsl

@@ -1,7 +1,7 @@
 // RUN: %dxc -Od -E main -T ps_6_0 %s | FileCheck %s
-// CHECK: Could not unroll loop
-// CHECK: To give an explicit unroll bound, use unroll(n)
-// CHECK-NOT: @main
+// CHECK: @main
+
+// Confirm that simple loops should be able to be unrolled
 
 [RootSignature("")]
 float main(float y : Y) : SV_Target {

+ 3 - 1
tools/clang/test/CodeGenHLSL/batch/unroll/warning.hlsl

@@ -1,7 +1,9 @@
-// RUN: %dxc -HV 2016 -Od -E main -T ps_6_0 %s | FileCheck %s
+// RUN: %dxc /HV 2016 -Od -E main -T ps_6_0 %s | FileCheck %s
 // CHECK-DAG: warning: Could not unroll loop.
+// CHECK-NOT: -HV 2016
 // CHECK-NOT: @main
 
+// Check that the warning doesn't mention HV 2016
 // Check that the compilation fails due to unable to
 // find the loop bound.
 

+ 3 - 0
utils/hct/hctdb.py

@@ -1525,6 +1525,9 @@ class db_dxil(object):
             {'n':'force-ssa-updater', 'i':'ForceSSAUpdater', 't':'bool', 'd':'Force the pass to not use DomTree and mem2reg, insteadforming SSA values through the SSAUpdater infrastructure.'},
             {'n':'sroa-random-shuffle-slices', 'i':'SROARandomShuffleSlices', 't':'bool', 'd':'Enable randomly shuffling the slices to help uncover instability in their order.'},
             {'n':'sroa-strict-inbounds', 'i':'SROAStrictInbounds', 't':'bool', 'd':'Experiment with completely strict handling of inbounds GEPs.'}])
+        add_pass("dxil-cond-mem2reg", "DxilConditionalMem2Reg", "Dxil Conditional Mem2Reg", [
+                {'n':'NoOpt', 't':'bool', 'c':1},
+            ])
         add_pass('scalarrepl', 'SROA_DT', 'Scalar Replacement of Aggregates (DT)', [
             {'n':'Threshold', 't':'int', 'c':1},
             {'n':'StructMemberThreshold', 't':'int', 'c':1},