浏览代码

Moved unroll to after dxilgen and scalarizer. (#3094)

Adam Yang 5 年之前
父节点
当前提交
0e77209223

+ 1 - 0
include/llvm/Transforms/IPO/PassManagerBuilder.h

@@ -128,6 +128,7 @@ public:
   bool PrepareForLTO;
   bool HLSLHighLevel = false; // HLSL Change
   bool HLSLAllowPreserveValues = false; // HLSL Change
+  bool HLSLOnlyWarnOnUnrollFail = false; // HLSL Change
   hlsl::HLSLExtensionsCodegenHelper *HLSLExtensionsCodeGen = nullptr; // HLSL Change
   bool HLSLResMayAlias = false; // HLSL Change
   unsigned ScanLimit = 0; // HLSL Change

+ 1 - 1
include/llvm/Transforms/Scalar.h

@@ -129,7 +129,7 @@ void initializeDxilFixConstArrayInitializerPass(PassRegistry&);
 Pass *createDxilConditionalMem2RegPass(bool NoOpt);
 void initializeDxilConditionalMem2RegPass(PassRegistry&);
 
-Pass *createDxilLoopUnrollPass(unsigned MaxIterationAttempt);
+Pass *createDxilLoopUnrollPass(unsigned MaxIterationAttempt, bool OnlyWarnOnFail);
 void initializeDxilLoopUnrollPass(PassRegistry&);
 
 Pass *createDxilEraseDeadRegionPass();

+ 4 - 2
lib/Analysis/DxilValueCache.cpp

@@ -164,8 +164,10 @@ Value *DxilValueCache::ProcessAndSimplify_Br(Instruction *I, DominatorTree *DT)
     Value *Cond = TryGetCachedValue(Br->getCondition());
 
     if (IsUnreachable_(BB)) {
-      MarkUnreachable(FalseSucc);
-      MarkUnreachable(TrueSucc);
+      if (FalseSucc->getSinglePredecessor())
+        MarkUnreachable(FalseSucc);
+      if (TrueSucc->getSinglePredecessor())
+        MarkUnreachable(TrueSucc);
     }
     else if (IsConstantTrue(Cond)) {
       if (IsAlwaysReachable_(BB)) {

+ 6 - 0
lib/HLSL/DxcOptimizer.cpp

@@ -206,6 +206,7 @@ static ArrayRef<LPCSTR> GetPassArgNames(LPCSTR passName) {
   static const LPCSTR DxilDebugInstrumentationArgs[] = { "UAVSize", "parameter0", "parameter1", "parameter2" };
   static const LPCSTR DxilGenerationPassArgs[] = { "NotOptimized" };
   static const LPCSTR DxilInsertPreservesArgs[] = { "AllowPreserves" };
+  static const LPCSTR DxilLoopUnrollArgs[] = { "MaxIterationAttempt", "OnlyWarnOnFail" };
   static const LPCSTR DxilOutputColorBecomesConstantArgs[] = { "mod-mode", "constant-red", "constant-green", "constant-blue", "constant-alpha" };
   static const LPCSTR DxilPIXMeshShaderOutputInstrumentationArgs[] = { "UAVSize" };
   static const LPCSTR DxilRenameResourcesArgs[] = { "prefix", "from-binding", "keep-name" };
@@ -243,6 +244,7 @@ static ArrayRef<LPCSTR> GetPassArgNames(LPCSTR passName) {
   if (strcmp(passName, "hlsl-dxil-debug-instrumentation") == 0) return ArrayRef<LPCSTR>(DxilDebugInstrumentationArgs, _countof(DxilDebugInstrumentationArgs));
   if (strcmp(passName, "dxilgen") == 0) return ArrayRef<LPCSTR>(DxilGenerationPassArgs, _countof(DxilGenerationPassArgs));
   if (strcmp(passName, "dxil-insert-preserves") == 0) return ArrayRef<LPCSTR>(DxilInsertPreservesArgs, _countof(DxilInsertPreservesArgs));
+  if (strcmp(passName, "dxil-loop-unroll") == 0) return ArrayRef<LPCSTR>(DxilLoopUnrollArgs, _countof(DxilLoopUnrollArgs));
   if (strcmp(passName, "hlsl-dxil-constantColor") == 0) return ArrayRef<LPCSTR>(DxilOutputColorBecomesConstantArgs, _countof(DxilOutputColorBecomesConstantArgs));
   if (strcmp(passName, "hlsl-dxil-pix-meshshader-output-instrumentation") == 0) return ArrayRef<LPCSTR>(DxilPIXMeshShaderOutputInstrumentationArgs, _countof(DxilPIXMeshShaderOutputInstrumentationArgs));
   if (strcmp(passName, "dxil-rename-resources") == 0) return ArrayRef<LPCSTR>(DxilRenameResourcesArgs, _countof(DxilRenameResourcesArgs));
@@ -287,6 +289,7 @@ static ArrayRef<LPCSTR> GetPassArgDescriptions(LPCSTR passName) {
   static const LPCSTR DxilDebugInstrumentationArgs[] = { "None", "None", "None", "None" };
   static const LPCSTR DxilGenerationPassArgs[] = { "None" };
   static const LPCSTR DxilInsertPreservesArgs[] = { "None" };
+  static const LPCSTR DxilLoopUnrollArgs[] = { "Maximum number of iterations to attempt when iteratively unrolling.", "Whether to just warn when unrolling fails." };
   static const LPCSTR DxilOutputColorBecomesConstantArgs[] = { "None", "None", "None", "None", "None" };
   static const LPCSTR DxilPIXMeshShaderOutputInstrumentationArgs[] = { "None" };
   static const LPCSTR DxilRenameResourcesArgs[] = { "Prefix to add to resource names", "Append binding to name when bound", "Keep name when appending binding" };
@@ -324,6 +327,7 @@ static ArrayRef<LPCSTR> GetPassArgDescriptions(LPCSTR passName) {
   if (strcmp(passName, "hlsl-dxil-debug-instrumentation") == 0) return ArrayRef<LPCSTR>(DxilDebugInstrumentationArgs, _countof(DxilDebugInstrumentationArgs));
   if (strcmp(passName, "dxilgen") == 0) return ArrayRef<LPCSTR>(DxilGenerationPassArgs, _countof(DxilGenerationPassArgs));
   if (strcmp(passName, "dxil-insert-preserves") == 0) return ArrayRef<LPCSTR>(DxilInsertPreservesArgs, _countof(DxilInsertPreservesArgs));
+  if (strcmp(passName, "dxil-loop-unroll") == 0) return ArrayRef<LPCSTR>(DxilLoopUnrollArgs, _countof(DxilLoopUnrollArgs));
   if (strcmp(passName, "hlsl-dxil-constantColor") == 0) return ArrayRef<LPCSTR>(DxilOutputColorBecomesConstantArgs, _countof(DxilOutputColorBecomesConstantArgs));
   if (strcmp(passName, "hlsl-dxil-pix-meshshader-output-instrumentation") == 0) return ArrayRef<LPCSTR>(DxilPIXMeshShaderOutputInstrumentationArgs, _countof(DxilPIXMeshShaderOutputInstrumentationArgs));
   if (strcmp(passName, "dxil-rename-resources") == 0) return ArrayRef<LPCSTR>(DxilRenameResourcesArgs, _countof(DxilRenameResourcesArgs));
@@ -369,8 +373,10 @@ static bool IsPassOptionName(StringRef S) {
     ||  S.equals("InlineThreshold")
     ||  S.equals("InsertLifetime")
     ||  S.equals("MaxHeaderSize")
+    ||  S.equals("MaxIterationAttempt")
     ||  S.equals("NoOpt")
     ||  S.equals("NotOptimized")
+    ||  S.equals("OnlyWarnOnFail")
     ||  S.equals("Os")
     ||  S.equals("ReplaceAllVectors")
     ||  S.equals("RequiresDomTree")

+ 34 - 6
lib/HLSL/DxilCondenseResources.cpp

@@ -1875,8 +1875,10 @@ void DxilLowerCreateHandleForLib::UpdateResourceSymbols() {
 namespace {
 
 void ReplaceResourceUserWithHandle(
-    LoadInst *Res, Value *handle) {
-  for (auto resUser = Res->user_begin(); resUser != Res->user_end();) {
+    DxilResource &res,
+    LoadInst *load, Value *handle)
+{
+  for (auto resUser = load->user_begin(); resUser != load->user_end();) {
     Value *V = *(resUser++);
     CallInst *CI = dyn_cast<CallInst>(V);
     DxilInst_CreateHandleForLib createHandle(CI);
@@ -1884,7 +1886,33 @@ void ReplaceResourceUserWithHandle(
     CI->replaceAllUsesWith(handle);
     CI->eraseFromParent();
   }
-  Res->eraseFromParent();
+
+  if (res.GetClass() == DXIL::ResourceClass::UAV) {
+    // Before this pass, the global resources might not have been mapped with all the uses.
+    // Now we're 100% sure who uses what resources (otherwise the compilation would have failed),
+    // so we do a round on marking UAV's as having counter.
+    static auto IsDxilOp = [](Value *V, hlsl::OP::OpCode Op) -> bool {
+      Instruction *I = dyn_cast<Instruction>(V);
+      if (!I)
+        return false;
+      return hlsl::OP::IsDxilOpFuncCallInst(I, Op);
+    };
+
+    // Search all users for update counter
+    for (User *U : handle->users()) {
+      if (IsDxilOp(U, hlsl::OP::OpCode::BufferUpdateCounter)) {
+        res.SetHasCounter(true);
+      }
+      else if (IsDxilOp(U, hlsl::OP::OpCode::AnnotateHandle)) {
+        for (User *UU : U->users()) {
+          if (IsDxilOp(UU, hlsl::OP::OpCode::BufferUpdateCounter))
+            res.SetHasCounter(true);
+        }
+      }
+    }
+  }
+
+  load->eraseFromParent();
 }
 
 } // namespace
@@ -1959,7 +1987,7 @@ void DxilLowerCreateHandleForLib::TranslateDxilResourceUses(
       Function *userF = ldInst->getParent()->getParent();
       DXASSERT(handleMapOnFunction.count(userF), "must exist");
       Value *handle = handleMapOnFunction[userF];
-      ReplaceResourceUserWithHandle(ldInst, handle);
+      ReplaceResourceUserWithHandle(static_cast<DxilResource &>(res), ldInst, handle);
     } else {
       DXASSERT(dyn_cast<GEPOperator>(user) != nullptr,
                "else AddOpcodeParamForIntrinsic in CodeGen did not patch uses "
@@ -2017,14 +2045,14 @@ void DxilLowerCreateHandleForLib::TranslateDxilResourceUses(
         // Must be load inst.
         LoadInst *ldInst = cast<LoadInst>(*(GEPU++));
         if (handle) {
-          ReplaceResourceUserWithHandle(ldInst, handle);
+          ReplaceResourceUserWithHandle(static_cast<DxilResource &>(res), ldInst, handle);
         } else {
           IRBuilder<> Builder = IRBuilder<>(ldInst);
           createHandleArgs[DXIL::OperandIndex::kCreateHandleResIndexOpIdx] =
               Builder.CreateAdd(idx, resLowerBound);
           Value *localHandle =
               Builder.CreateCall(createHandle, createHandleArgs, handleName);
-          ReplaceResourceUserWithHandle(ldInst, localHandle);
+          ReplaceResourceUserWithHandle(static_cast<DxilResource &>(res), ldInst, localHandle);
         }
       }
 

+ 19 - 19
lib/Transforms/IPO/PassManagerBuilder.cpp

@@ -207,7 +207,7 @@ void PassManagerBuilder::populateFunctionPassManager(
 }
 
 // HLSL Change Starts
-static void addHLSLPasses(bool HLSLHighLevel, unsigned OptLevel, hlsl::HLSLExtensionsCodegenHelper *ExtHelper, legacy::PassManagerBase &MPM) {
+static void addHLSLPasses(bool HLSLHighLevel, unsigned OptLevel, bool OnlyWarnOnUnrollFail, hlsl::HLSLExtensionsCodegenHelper *ExtHelper, legacy::PassManagerBase &MPM) {
 
   // Don't do any lowering if we're targeting high-level.
   if (HLSLHighLevel) {
@@ -268,19 +268,6 @@ static void addHLSLPasses(bool HLSLHighLevel, unsigned OptLevel, hlsl::HLSLExten
   if (!NoOpt)
     MPM.add(createCFGSimplificationPass());
 
-  // Passes to handle [unroll]
-  // Needs to happen after SROA since loop count may depend on
-  // struct members.
-  // Needs to happen before resources are lowered and before HL
-  // module is gone.
-  MPM.add(createDxilLoopUnrollPass(1024));
-
-  // Default unroll pass. This is purely for optimizing loops without
-  // attributes.
-  if (OptLevel > 2) {
-    MPM.add(createLoopUnrollPass());
-  }
-
   MPM.add(createDxilPromoteLocalResources());
   MPM.add(createDxilPromoteStaticResources());
   // Verify no undef resource again after promotion
@@ -297,15 +284,28 @@ static void addHLSLPasses(bool HLSLHighLevel, unsigned OptLevel, hlsl::HLSLExten
   // scalarize vector to scalar
   MPM.add(createScalarizerPass(!NoOpt /* AllowFolding */));
 
+  // Remove vector instructions
+  MPM.add(createDxilEliminateVectorPass());
+
+  // Passes to handle [unroll]
+  // Needs to happen after SROA since loop count may depend on
+  // struct members.
+  // Needs to happen before resources are lowered and before HL
+  // module is gone.
+  MPM.add(createDxilLoopUnrollPass(1024, OnlyWarnOnUnrollFail));
+
+  // Default unroll pass. This is purely for optimizing loops without
+  // attributes.
+  if (OptLevel > 2) {
+    MPM.add(createLoopUnrollPass());
+  }
+
   if (!NoOpt)
     MPM.add(createSimplifyInstPass());
 
   if (!NoOpt)
     MPM.add(createCFGSimplificationPass());
 
-  // Remove vector instructions
-  MPM.add(createDxilEliminateVectorPass());
-
   MPM.add(createDeadCodeEliminationPass());
 
   if (OptLevel > 0) {
@@ -351,7 +351,7 @@ void PassManagerBuilder::populateModulePassManager(
     addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
 
     // HLSL Change Begins.
-    addHLSLPasses(HLSLHighLevel, OptLevel, HLSLExtensionsCodeGen, MPM);
+    addHLSLPasses(HLSLHighLevel, OptLevel, this->HLSLOnlyWarnOnUnrollFail, HLSLExtensionsCodeGen, MPM);
     if (!HLSLHighLevel) {
       MPM.add(createDxilConvergentClearPass());
       MPM.add(createMultiDimArrayToOneDimArrayPass());
@@ -386,7 +386,7 @@ void PassManagerBuilder::populateModulePassManager(
     delete Inliner;
     Inliner = nullptr;
   }
-  addHLSLPasses(HLSLHighLevel, OptLevel, HLSLExtensionsCodeGen, MPM); // HLSL Change
+  addHLSLPasses(HLSLHighLevel, OptLevel, this->HLSLOnlyWarnOnUnrollFail, HLSLExtensionsCodeGen, MPM); // HLSL Change
   // HLSL Change Ends
 
   // Add LibraryInfo if we have some.

+ 21 - 13
lib/Transforms/Scalar/DxilLoopUnroll.cpp

@@ -114,11 +114,13 @@ public:
   static char ID;
 
   std::unordered_set<Function *> CleanedUpAlloca;
-  const unsigned MaxIterationAttempt;
+  unsigned MaxIterationAttempt = 0;
+  bool OnlyWarnOnFail = false;
 
-  DxilLoopUnroll(unsigned MaxIterationAttempt = 1024) :
+  DxilLoopUnroll(unsigned MaxIterationAttempt = 1024, bool OnlyWarnOnFail=false) :
     LoopPass(ID),
-    MaxIterationAttempt(MaxIterationAttempt)
+    MaxIterationAttempt(MaxIterationAttempt),
+    OnlyWarnOnFail(OnlyWarnOnFail)
   {
     initializeDxilLoopUnrollPass(*PassRegistry::getPassRegistry());
   }
@@ -133,6 +135,18 @@ public:
     AU.addRequired<DxilValueCache>();
     AU.addRequiredID(LoopSimplifyID);
   }
+
+  // Function overrides that resolve options when used for DxOpt
+  void applyOptions(PassOptions O) override {
+    GetPassOptionUnsigned(O, "MaxIterationAttempt", &MaxIterationAttempt, false);
+    GetPassOptionBool(O, "OnlyWarnOnFail", &OnlyWarnOnFail, false);
+  }
+  void dumpConfig(raw_ostream &OS) override {
+    LoopPass::dumpConfig(OS);
+    OS << ",MaxIterationAttempt=" << MaxIterationAttempt;
+    OS << ",OnlyWarnOnFail=" << OnlyWarnOnFail;
+  }
+
 };
 
 char DxilLoopUnroll::ID;
@@ -647,12 +661,6 @@ bool DxilLoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   if (!L->isSafeToClone())
     return false;
 
-  bool FxcCompatMode = false;
-  if (F->getParent()->HasHLModule()) {
-    HLModule &HM = F->getParent()->GetHLModule();
-    FxcCompatMode = HM.GetHLOptions().bFXCCompatMode;
-  }
-
   unsigned TripCount = 0;
 
   BasicBlock *ExitingBlock = L->getLoopLatch();
@@ -1006,7 +1014,7 @@ bool DxilLoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
 
     // Now that we potentially turned some GEP indices into constants,
     // try to clean up their allocas.
-    if (!BreakUpArrayAllocas(FxcCompatMode /* allow oob index */, ProblemAllocas.begin(), ProblemAllocas.end(), DT, AC, DVC)) {
+    if (!BreakUpArrayAllocas(OnlyWarnOnFail /* allow oob index */, ProblemAllocas.begin(), ProblemAllocas.end(), DT, AC, DVC)) {
       FailLoopUnroll(false, F, LoopLoc, "Could not unroll loop due to out of bound array access.");
     }
 
@@ -1018,7 +1026,7 @@ bool DxilLoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
     const char *Msg =
         "Could not unroll loop. Loop bound could not be deduced at compile time. "
         "Use [unroll(n)] to give an explicit count.";
-    if (FxcCompatMode) {
+    if (OnlyWarnOnFail) {
       FailLoopUnroll(true /*warn only*/, F, LoopLoc, Msg);
     }
     else {
@@ -1049,8 +1057,8 @@ bool DxilLoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
 
 }
 
-Pass *llvm::createDxilLoopUnrollPass(unsigned MaxIterationAttempt) {
-  return new DxilLoopUnroll(MaxIterationAttempt);
+Pass *llvm::createDxilLoopUnrollPass(unsigned MaxIterationAttempt, bool OnlyWarnOnFail) {
+  return new DxilLoopUnroll(MaxIterationAttempt, OnlyWarnOnFail);
 }
 
 INITIALIZE_PASS_BEGIN(DxilLoopUnroll, "dxil-loop-unroll", "Dxil Unroll loops", false, false)

+ 2 - 0
tools/clang/include/clang/Frontend/CodeGenOptions.h

@@ -180,6 +180,8 @@ public:
   bool HLSLHighLevel = false;
   /// Whether we allow preserve intermediate values
   bool HLSLAllowPreserveValues = false;
+  /// Whether we fail compilation if loop fails to unroll
+  bool HLSLOnlyWarnOnUnrollFail = false;
   /// Whether use row major as default matrix major.
   bool HLSLDefaultRowMajor = false;
   /// Whether use legacy cbuffer load.

+ 1 - 0
tools/clang/lib/CodeGen/BackendUtil.cpp

@@ -328,6 +328,7 @@ void EmitAssemblyHelper::CreatePasses() {
   // HLSL Change - begin
   PMBuilder.HLSLHighLevel = CodeGenOpts.HLSLHighLevel;
   PMBuilder.HLSLAllowPreserveValues = CodeGenOpts.HLSLAllowPreserveValues;
+  PMBuilder.HLSLOnlyWarnOnUnrollFail = CodeGenOpts.HLSLOnlyWarnOnUnrollFail;
   PMBuilder.HLSLExtensionsCodeGen = CodeGenOpts.HLSLExtensionsCodegen.get();
   PMBuilder.HLSLResMayAlias = CodeGenOpts.HLSLResMayAlias;
   PMBuilder.ScanLimit = CodeGenOpts.ScanLimit;

+ 72 - 0
tools/clang/test/HLSLFileCheck/hlsl/control_flow/attributes/unroll/nested_update_counter.hlsl

@@ -0,0 +1,72 @@
+// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
+
+// CHECK: call i32 @dx.op.bufferUpdateCounter
+// CHECK: call i32 @dx.op.bufferUpdateCounter
+// CHECK: call i32 @dx.op.bufferUpdateCounter
+// CHECK: call i32 @dx.op.bufferUpdateCounter
+
+// CHECK: call i32 @dx.op.bufferUpdateCounter
+// CHECK: call i32 @dx.op.bufferUpdateCounter
+// CHECK: call i32 @dx.op.bufferUpdateCounter
+// CHECK: call i32 @dx.op.bufferUpdateCounter
+
+// CHECK: call i32 @dx.op.bufferUpdateCounter
+// CHECK: call i32 @dx.op.bufferUpdateCounter
+// CHECK: call i32 @dx.op.bufferUpdateCounter
+// CHECK: call i32 @dx.op.bufferUpdateCounter
+
+// CHECK: call i32 @dx.op.bufferUpdateCounter
+// CHECK: call i32 @dx.op.bufferUpdateCounter
+// CHECK: call i32 @dx.op.bufferUpdateCounter
+// CHECK: call i32 @dx.op.bufferUpdateCounter
+
+// CHECK-NOT: call i32 @dx.op.bufferUpdateCounter
+
+//
+// This test ensures resources are still correctly marked as having counter,
+// even as unrolls are moved after dxil generation.
+//
+
+RWStructuredBuffer<float4> buf0;
+RWStructuredBuffer<float4> buf1;
+RWStructuredBuffer<float4> buf2;
+RWStructuredBuffer<float4> buf3;
+uint g_cond;
+uint g_cond2;
+
+float routine(float value) {
+  RWStructuredBuffer<float4> buffers[] = { buf0, buf1, buf2, buf3, };
+  float ret = 0;
+  [unroll]
+  for (uint k = 0; k < 4; k++) {
+    ret += 15;
+    if (g_cond == k) {
+      buffers[k].IncrementCounter();
+      buffers[k][0] = value;
+      return ret;
+    }
+  }
+  return ret+1;
+}
+
+float main(float3 a : A, float3 b : B) : SV_Target {
+
+  float ret = 0;
+  [unroll]
+  for (uint l = 0; l < 1; l++) {
+    [unroll]
+    for (uint i = 0; i < 4; i++) {
+
+      [loop]
+      for (uint j = 0; j < 4; j++) {
+        ret += routine(j);
+        ret++;
+      }
+
+      ret--;
+    }
+  }
+
+  return ret;
+}
+

+ 44 - 0
tools/clang/test/HLSLFileCheck/hlsl/control_flow/attributes/unroll/nested_vector.hlsl

@@ -0,0 +1,44 @@
+// RUN: %dxc /Od /T lib_6_3 /exports UnrollTest %s | FileCheck %s
+// RUN: %dxc /Od /T lib_6_3 /exports UnrollTest %s /Zi | FileCheck %s
+
+// Check that we can do unroll properly using vectors
+
+// CHECK: %{{[0-9]+}} = call float @dx.op.dot3
+// CHECK: %{{[0-9]+}} = call float @dx.op.dot3
+// CHECK: %{{[0-9]+}} = call float @dx.op.dot3
+// CHECK: %{{[0-9]+}} = call float @dx.op.dot3
+
+// CHECK: %{{[0-9]+}} = call float @dx.op.dot3
+// CHECK: %{{[0-9]+}} = call float @dx.op.dot3
+// CHECK: %{{[0-9]+}} = call float @dx.op.dot3
+// CHECK: %{{[0-9]+}} = call float @dx.op.dot3
+
+// CHECK: %{{[0-9]+}} = call float @dx.op.dot3
+// CHECK: %{{[0-9]+}} = call float @dx.op.dot3
+// CHECK: %{{[0-9]+}} = call float @dx.op.dot3
+// CHECK: %{{[0-9]+}} = call float @dx.op.dot3
+
+// CHECK: %{{[0-9]+}} = call float @dx.op.dot3
+// CHECK: %{{[0-9]+}} = call float @dx.op.dot3
+// CHECK: %{{[0-9]+}} = call float @dx.op.dot3
+// CHECK: %{{[0-9]+}} = call float @dx.op.dot3
+
+// CHECK-NOT: %{{[0-9]+}} = call float @dx.op.dot3
+
+struct MyInt2 { int x, y; };
+
+float UnrollTest(float3 a : A, float3 b : B)
+{
+	int4 offset;
+  float ret = 0;
+	[unroll] for (offset.x = 0; offset.x <= 1; ++offset.x)
+	[unroll] for (offset.y = 0; offset.y <= 1; ++offset.y)
+	[unroll] for (offset.z = 0; offset.z <= 1; ++offset.z)
+	[unroll] for (offset.w = 0; offset.w <= 1; ++offset.w)
+	{
+    ret += dot(a, b);
+  }
+  return ret;
+}
+
+

+ 1 - 0
tools/clang/tools/dxcompiler/dxcompilerobj.cpp

@@ -1131,6 +1131,7 @@ public:
 
     compiler.getCodeGenOpts().HLSLHighLevel = Opts.CodeGenHighLevel;
     compiler.getCodeGenOpts().HLSLAllowPreserveValues = Opts.AllowPreserveValues;
+    compiler.getCodeGenOpts().HLSLOnlyWarnOnUnrollFail = Opts.EnableFXCCompatMode;
     compiler.getCodeGenOpts().HLSLResMayAlias = Opts.ResMayAlias;
     compiler.getCodeGenOpts().ScanLimit = Opts.ScanLimit;
     compiler.getCodeGenOpts().HLSLOptimizationToggles = Opts.DxcOptimizationToggles;

+ 4 - 1
utils/hct/hctdb.py

@@ -2138,7 +2138,10 @@ class db_dxil(object):
         # C:\nobackup\work\HLSLonLLVM\lib\Transforms\IPO\PassManagerBuilder.cpp:353
         add_pass('indvars', 'IndVarSimplify', "Induction Variable Simplification", [])
         add_pass('loop-idiom', 'LoopIdiomRecognize', "Recognize loop idioms", [])
-        add_pass('dxil-loop-unroll', 'DxilLoopUnroll', 'DxilLoopUnroll', [])
+        add_pass('dxil-loop-unroll', 'DxilLoopUnroll', 'DxilLoopUnroll', [
+            {'n':'MaxIterationAttempt', 't':'unsigned', 'c':1, 'd':'Maximum number of iterations to attempt when iteratively unrolling.'},
+            {'n':'OnlyWarnOnFail', 't':'bool', 'c':1, 'd':'Whether to just warn when unrolling fails.'},
+        ])
         add_pass('dxil-erase-dead-region', 'DxilEraseDeadRegion', 'DxilEraseDeadRegion', [])
         add_pass('dxil-remove-dead-blocks', 'DxilRemoveDeadBlocks', 'DxilRemoveDeadBlocks', [])
         add_pass('loop-deletion', 'LoopDeletion', "Delete dead loops", [])