Răsfoiți Sursa

Legalize sample offset when optimization is disabled. (#97)

Xiang Li 8 ani în urmă
părinte
comite
73b6f95a40

+ 12 - 0
include/dxc/HLSL/DxilConstants.h

@@ -671,6 +671,18 @@ namespace DXIL {
     // TextureGatherCmp.
     const unsigned kTextureGatherCmpCmpValOpIdx = 11;
 
+    // TextureSample.
+    const unsigned kTextureSampleTexHandleOpIdx = 1;
+    const unsigned kTextureSampleSamplerHandleOpIdx = 2;
+    const unsigned kTextureSampleCoord0OpIdx = 3;
+    const unsigned kTextureSampleCoord1OpIdx = 4;
+    const unsigned kTextureSampleCoord2OpIdx = 5;
+    const unsigned kTextureSampleCoord3OpIdx = 6;
+    const unsigned kTextureSampleOffset0OpIdx = 7;
+    const unsigned kTextureSampleOffset1OpIdx = 8;
+    const unsigned kTextureSampleOffset2OpIdx = 9;
+    const unsigned kTextureSampleClampOpIdx = 10;
+
     // AtomicBinOp.
     const unsigned kAtomicBinOpCoord0OpIdx = 3;
     const unsigned kAtomicBinOpCoord1OpIdx = 4;

+ 2 - 0
include/dxc/HLSL/DxilGenerationPass.h

@@ -43,6 +43,7 @@ ModulePass *createHLEmitMetadataPass();
 ModulePass *createHLEnsureMetadataPass();
 ModulePass *createDxilEmitMetadataPass();
 ModulePass *createDxilPrecisePropagatePass();
+FunctionPass *createDxilLegalizeSampleOffsetPass();
 FunctionPass *createSimplifyInstPass();
 
 void initializeDxilCondenseResourcesPass(llvm::PassRegistry&);
@@ -51,6 +52,7 @@ void initializeHLEnsureMetadataPass(llvm::PassRegistry&);
 void initializeHLEmitMetadataPass(llvm::PassRegistry&);
 void initializeDxilEmitMetadataPass(llvm::PassRegistry&);
 void initializeDxilPrecisePropagatePassPass(llvm::PassRegistry&);
+void initializeDxilLegalizeSampleOffsetPassPass(llvm::PassRegistry&);
 void initializeSimplifyInstPass(llvm::PassRegistry&);
 
 bool AreDxilResourcesDense(llvm::Module *M, hlsl::DxilResourceBase **ppNonDense);

+ 2 - 0
include/dxc/HLSL/DxilModule.h

@@ -87,6 +87,7 @@ public:
   const std::vector<std::unique_ptr<DxilResource> > &GetUAVs() const;
 
   void RemoveUnusedResources();
+  void RemoveFunction(llvm::Function *F);
 
   // Signatures.
   DxilSignature &GetInputSignature();
@@ -121,6 +122,7 @@ public:
   void ResetPatchConstantSignature(DxilSignature *pValue);
   void ResetRootSignature(RootSignatureHandle *pValue);
   void ResetTypeSystem(DxilTypeSystem *pValue);
+  void ResetOP(hlsl::OP *hlslOP);
 
   void StripDebugRelatedCode();
   llvm::DebugInfoFinder &GetOrCreateDebugInfoFinder();

+ 5 - 1
include/dxc/HLSL/DxilOperations.h

@@ -23,6 +23,7 @@ class Instruction;
 #include "llvm/IR/Attributes.h"
 
 #include "DxilConstants.h"
+#include <unordered_map>
 
 namespace hlsl {
 
@@ -37,6 +38,8 @@ public:
   OP(llvm::LLVMContext &Ctx, llvm::Module *pModule);
 
   llvm::Function *GetOpFunc(OpCode OpCode, llvm::Type *pOverloadType);
+  llvm::ArrayRef<llvm::Function *> GetOpFuncList(OpCode OpCode) const;
+  void RemoveFunction(llvm::Function *F);
   llvm::Type *GetOverloadType(OpCode OpCode, llvm::Function *F);
   llvm::LLVMContext &GetCtx() { return m_Ctx; }
   llvm::Type *GetHandleType() const;
@@ -97,7 +100,8 @@ private:
     llvm::Function *pOverloads[kNumTypeOverloads];
   };
   OpCodeCacheItem m_OpCodeClassCache[(unsigned)OpCodeClass::NumOpClasses];
-
+  std::unordered_map<llvm::Function *, OpCodeClass> m_FunctionToOpClass;
+  void RefreshCache(llvm::Module *pModule);
 private:
   // Static properties.
   struct OpCodeProperty {

+ 1 - 0
include/dxc/HLSL/HLModule.h

@@ -230,6 +230,7 @@ public:
   DxilSignature *ReleaseOutputSignature();
   DxilSignature *ReleasePatchConstantSignature();
   DxilTypeSystem *ReleaseTypeSystem();
+  OP *ReleaseOP();
   RootSignatureHandle *ReleaseRootSignature();
 
   llvm::DebugInfoFinder &GetOrCreateDebugInfoFinder();

+ 1 - 0
lib/HLSL/CMakeLists.txt

@@ -9,6 +9,7 @@ add_llvm_library(LLVMHLSL
   DxilContainerReflection.cpp
   DxilGenerationPass.cpp
   DxilInterpolationMode.cpp
+  DxilLegalizeSampleOffsetPass.cpp
   DxilMetadataHelper.cpp
   DxilModule.cpp
   DXILOperations.cpp

+ 1 - 0
lib/HLSL/DxcOptimizer.cpp

@@ -75,6 +75,7 @@ HRESULT SetupRegistryPassForHLSL() {
     initializeDxilCondenseResourcesPass(Registry);
     initializeDxilEmitMetadataPass(Registry);
     initializeDxilGenerationPassPass(Registry);
+    initializeDxilLegalizeSampleOffsetPassPass(Registry);
     initializeDxilPrecisePropagatePassPass(Registry);
     initializeDynamicIndexingVectorToArrayPass(Registry);
     initializeEarlyCSELegacyPassPass(Registry);

+ 2 - 1
lib/HLSL/DxilCondenseResources.cpp

@@ -66,7 +66,8 @@ public:
     DxilModule &DM = M.GetOrCreateDxilModule();
 
     // Switch tbuffers to SRVs, as they have been treated as cbuffers up to this point.
-    PatchTBuffers(DM);
+    if (DM.GetCBuffers().size())
+      PatchTBuffers(DM);
 
     // Remove unused resource.
     DM.RemoveUnusedResources();

+ 2 - 0
lib/HLSL/DxilGenerationPass.cpp

@@ -363,6 +363,8 @@ void InitDxilModuleFromHLModule(HLModule &H, DxilModule &M, bool HasDebugInfo) {
 
   // DXIL type system.
   M.ResetTypeSystem(H.ReleaseTypeSystem());
+  // Dxil OP.
+  M.ResetOP(H.ReleaseOP());
   // Keep llvm used.
   M.EmitLLVMUsed();
 }

+ 211 - 0
lib/HLSL/DxilLegalizeSampleOffsetPass.cpp

@@ -0,0 +1,211 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// DxilSignature.cpp                                                         //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// DxilLegalizeSampleOffsetPass implementation.                              //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#include "dxc/HLSL/DxilGenerationPass.h"
+#include "dxc/HLSL/DxilModule.h"
+#include "dxc/HLSL/DxilOperations.h"
+
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
+
+#include <unordered_set>
+
+using std::vector;
+using std::unique_ptr;
+using namespace llvm;
+using namespace hlsl;
+
+///////////////////////////////////////////////////////////////////////////////
+// Legalize Sample offset.
+
+namespace {
+// When optimizations are disabled, try to legalize sample offset.
+class DxilLegalizeSampleOffsetPass : public FunctionPass {
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+  explicit DxilLegalizeSampleOffsetPass() : FunctionPass(ID) {}
+
+  const char *getPassName() const override {
+    return "DXIL legalize sample offset";
+  }
+
+  bool runOnFunction(Function &F) override {
+    DxilModule &DM = F.getParent()->GetOrCreateDxilModule();
+    hlsl::OP *hlslOP = DM.GetOP();
+
+    std::vector<Instruction *> illegalOffsets;
+
+    CollectIllegalOffsets(illegalOffsets, F, hlslOP);
+
+    if (illegalOffsets.empty())
+      return false;
+
+    // Loop unroll if has offset inside loop.
+    TryUnrollLoop(illegalOffsets, F);
+
+    // Collect offset again after mem2reg.
+    std::vector<Instruction *> ssaIllegalOffsets;
+    CollectIllegalOffsets(ssaIllegalOffsets, F, hlslOP);
+
+    // Run simple optimization to legalize offsets.
+    LegalizeOffsets(ssaIllegalOffsets);
+
+    // Remove PHINodes to keep code shape.
+    legacy::FunctionPassManager PM(F.getParent());
+    PM.add(createDemoteRegisterToMemoryHlslPass());
+    PM.run(F);
+
+    FinalCheck(illegalOffsets, F, hlslOP);
+
+    return true;
+  }
+
+private:
+  void TryUnrollLoop(std::vector<Instruction *> &illegalOffsets, Function &F);
+  void CollectIllegalOffsets(std::vector<Instruction *> &illegalOffsets,
+                             Function &F, hlsl::OP *hlslOP);
+  void CollectIllegalOffsets(std::vector<Instruction *> &illegalOffsets,
+                             Function &F, DXIL::OpCode opcode,
+                             hlsl::OP *hlslOP);
+  void LegalizeOffsets(const std::vector<Instruction *> &illegalOffsets);
+  void FinalCheck(std::vector<Instruction *> &illegalOffsets, Function &F,
+                  hlsl::OP *hlslOP);
+};
+
+char DxilLegalizeSampleOffsetPass::ID = 0;
+
+bool HasIllegalOffsetInLoop(std::vector<Instruction *> &illegalOffsets,
+                            Function &F) {
+  DominatorTreeAnalysis DTA;
+  DominatorTree DT = DTA.run(F);
+  LoopInfo LI;
+  LI.Analyze(DT);
+
+  bool findOffset = false;
+
+  for (Instruction *I : illegalOffsets) {
+    BasicBlock *BB = I->getParent();
+    if (LI.getLoopFor(BB)) {
+      findOffset = true;
+      break;
+    }
+  }
+  return findOffset;
+}
+
+void CollectIllegalOffset(CallInst *CI,
+                          std::vector<Instruction *> &illegalOffsets) {
+  Value *offset0 =
+      CI->getArgOperand(DXIL::OperandIndex::kTextureSampleOffset0OpIdx);
+  // No offset.
+  if (isa<UndefValue>(offset0))
+    return;
+
+  for (unsigned i = DXIL::OperandIndex::kTextureSampleOffset0OpIdx;
+       i <= DXIL::OperandIndex::kTextureSampleOffset2OpIdx; i++) {
+    Value *offset = CI->getArgOperand(i);
+    if (Instruction *I = dyn_cast<Instruction>(offset))
+      illegalOffsets.emplace_back(I);
+  }
+}
+}
+
+void DxilLegalizeSampleOffsetPass::FinalCheck(
+    std::vector<Instruction *> &illegalOffsets, Function &F, hlsl::OP *hlslOP) {
+  // Collect offset to make sure no illegal offsets.
+  std::vector<Instruction *> finalIllegalOffsets;
+  CollectIllegalOffsets(finalIllegalOffsets, F, hlslOP);
+
+  if (!finalIllegalOffsets.empty()) {
+    const StringRef kIllegalOffsetError =
+        "Offsets for Sample* must be immediated value. "
+        "Consider unroll the loop manually and use O3, it may help in some "
+        "cases\n";
+    std::string errorMsg;
+    raw_string_ostream errorStr(errorMsg);
+    for (Instruction *offset : finalIllegalOffsets) {
+      if (const DebugLoc &L = offset->getDebugLoc())
+        L.print(errorStr);
+      errorStr << " " << kIllegalOffsetError;
+    }
+    errorStr.flush();
+    F.getContext().emitError(errorMsg);
+  }
+}
+
+void DxilLegalizeSampleOffsetPass::TryUnrollLoop(
+    std::vector<Instruction *> &illegalOffsets, Function &F) {
+  legacy::FunctionPassManager PM(F.getParent());
+  // Always need mem2reg for simplify illegal offsets.
+  PM.add(createPromoteMemoryToRegisterPass());
+
+  if (HasIllegalOffsetInLoop(illegalOffsets, F)) {
+    PM.add(createCFGSimplificationPass());
+    PM.add(createLCSSAPass());
+    PM.add(createLoopSimplifyPass());
+    PM.add(createLoopRotatePass());
+    PM.add(createLoopUnrollPass(-2, -1, 0, 0));
+  }
+  PM.run(F);
+}
+
+void DxilLegalizeSampleOffsetPass::CollectIllegalOffsets(
+    std::vector<Instruction *> &illegalOffsets, Function &CurF,
+    hlsl::OP *hlslOP) {
+  CollectIllegalOffsets(illegalOffsets, CurF, DXIL::OpCode::Sample, hlslOP);
+  CollectIllegalOffsets(illegalOffsets, CurF, DXIL::OpCode::SampleBias, hlslOP);
+  CollectIllegalOffsets(illegalOffsets, CurF, DXIL::OpCode::SampleCmp, hlslOP);
+  CollectIllegalOffsets(illegalOffsets, CurF, DXIL::OpCode::SampleCmpLevelZero,
+                        hlslOP);
+  CollectIllegalOffsets(illegalOffsets, CurF, DXIL::OpCode::SampleGrad, hlslOP);
+  CollectIllegalOffsets(illegalOffsets, CurF, DXIL::OpCode::SampleLevel,
+                        hlslOP);
+}
+
+void DxilLegalizeSampleOffsetPass::CollectIllegalOffsets(
+    std::vector<Instruction *> &illegalOffsets, Function &CurF,
+    DXIL::OpCode opcode, hlsl::OP *hlslOP) {
+  ArrayRef<Function *> intrFuncList = hlslOP->GetOpFuncList(opcode);
+  for (Function *intrFunc : intrFuncList) {
+    if (!intrFunc)
+      continue;
+    for (User *U : intrFunc->users()) {
+      CallInst *CI = cast<CallInst>(U);
+      // Skip inst not in current function.
+      if (CI->getParent()->getParent() != &CurF)
+        continue;
+
+      CollectIllegalOffset(CI, illegalOffsets);
+    }
+  }
+}
+
+void DxilLegalizeSampleOffsetPass::LegalizeOffsets(
+    const std::vector<Instruction *> &illegalOffsets) {
+  for (Instruction *I : illegalOffsets)
+    llvm::recursivelySimplifyInstruction(I);
+}
+
+FunctionPass *llvm::createDxilLegalizeSampleOffsetPass() {
+  return new DxilLegalizeSampleOffsetPass();
+}
+
+INITIALIZE_PASS(DxilLegalizeSampleOffsetPass, "dxil-legalize-sample-offset",
+                "DXIL legalize sample offset", false, false)

+ 11 - 0
lib/HLSL/DxilModule.cpp

@@ -742,6 +742,13 @@ static void ConvertUsedResource(std::unordered_set<unsigned> &immResID,
   }
 }
 
+void DxilModule::RemoveFunction(llvm::Function *F) {
+  DXASSERT_NOMSG(F != nullptr);
+  if (m_pTypeSystem.get()->GetFunctionAnnotation(F))
+    m_pTypeSystem.get()->EraseFunctionAnnotation(F);
+  m_pOP->RemoveFunction(F);
+}
+
 void DxilModule::RemoveUnusedResources() {
   hlsl::OP *hlslOP = GetOP();
   Function *createHandleFunc = hlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(GetCtx()));
@@ -865,6 +872,10 @@ void DxilModule::ResetTypeSystem(DxilTypeSystem *pValue) {
   m_pTypeSystem.reset(pValue);
 }
 
+void DxilModule::ResetOP(hlsl::OP *hlslOP) {
+  m_pOP.reset(hlslOP);
+}
+
 void DxilModule::EmitLLVMUsed() {
   if (m_LLVMUsed.empty())
     return;

+ 49 - 10
lib/HLSL/DxilOperations.cpp

@@ -410,6 +410,30 @@ OP::OP(LLVMContext &Ctx, Module *pModule)
 
   Type *Int4Types[4] = { Type::getInt32Ty(m_Ctx), Type::getInt32Ty(m_Ctx), Type::getInt32Ty(m_Ctx), Type::getInt32Ty(m_Ctx) }; // HiHi, HiLo, LoHi, LoLo
   m_pInt4Type = GetOrCreateStructType(m_Ctx, Int4Types, "dx.types.fouri32", pModule);
+  // Try to find existing intrinsic function.
+  RefreshCache(pModule);
+}
+
+void OP::RefreshCache(llvm::Module *pModule) {
+  for (Function &F : pModule->functions()) {
+    if (OP::IsDxilOpFunc(&F) && !F.user_empty()) {
+      CallInst *CI = cast<CallInst>(*F.user_begin());
+      OpCode OpCode = OP::GetDxilOpFuncCallInst(CI);
+      Type *pOverloadType = OP::GetOverloadType(OpCode, &F);
+
+      DXASSERT(0 <= (unsigned)OpCode && OpCode < OpCode::NumOpCodes,
+               "otherwise caller passed OOB OpCode");
+      _Analysis_assume_(0 <= (unsigned)OpCode && OpCode < OpCode::NumOpCodes);
+      DXASSERT(IsOverloadLegal(OpCode, pOverloadType),
+               "otherwise the caller requested illegal operation overload (eg "
+               "HLSL function with unsupported types for mapped intrinsic "
+               "function)");
+      unsigned TypeSlot = GetTypeSlot(pOverloadType);
+      OpCodeClass opClass = m_OpCodeProps[(unsigned)OpCode].OpCodeClass;
+      m_OpCodeClassCache[(unsigned)opClass].pOverloads[TypeSlot] = &F;
+      m_FunctionToOpClass[&F] = opClass;
+    }
+  }
 }
 
 Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
@@ -417,7 +441,8 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
   _Analysis_assume_(0 <= (unsigned)OpCode && OpCode < OpCode::NumOpCodes);
   DXASSERT(IsOverloadLegal(OpCode, pOverloadType), "otherwise the caller requested illegal operation overload (eg HLSL function with unsupported types for mapped intrinsic function)");
   unsigned TypeSlot = GetTypeSlot(pOverloadType);
-  Function *&F = m_OpCodeClassCache[(unsigned)m_OpCodeProps[(unsigned)OpCode].OpCodeClass].pOverloads[TypeSlot];
+  OpCodeClass opClass = m_OpCodeProps[(unsigned)OpCode].OpCodeClass;
+  Function *&F = m_OpCodeClassCache[(unsigned)opClass].pOverloads[TypeSlot];
   if (F != nullptr)
     return F;
 
@@ -667,15 +692,10 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
   FunctionType *pFT;
   DXASSERT(ArgTypes.size() > 1, "otherwise forgot to initialize arguments");
   pFT = FunctionType::get(ArgTypes[0], ArrayRef<Type*>(&ArgTypes[1], ArgTypes.size()-1), false);
-  if (pOverloadType != pV) {
-    F = Function::Create(pFT, GlobalValue::LinkageTypes::ExternalLinkage, 
-                         funcName,
-                         m_pModule);
-  } else {
-    F = Function::Create(pFT, GlobalValue::LinkageTypes::ExternalLinkage, 
-                         funcName,
-                         m_pModule);
-  }
+
+  F = cast<Function>(m_pModule->getOrInsertFunction(funcName, pFT));
+
+  m_FunctionToOpClass[F] = opClass;
   F->setCallingConv(CallingConv::C);
   F->addFnAttr(Attribute::NoUnwind);
   if (m_OpCodeProps[(unsigned)OpCode].FuncAttr != Attribute::None)
@@ -684,6 +704,25 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
   return F;
 }
 
+llvm::ArrayRef<llvm::Function *> OP::GetOpFuncList(OpCode OpCode) const {
+  DXASSERT(0 <= (unsigned)OpCode && OpCode < OpCode::NumOpCodes, "otherwise caller passed OOB OpCode");
+  _Analysis_assume_(0 <= (unsigned)OpCode && OpCode < OpCode::NumOpCodes);
+  return m_OpCodeClassCache[(unsigned)m_OpCodeProps[(unsigned)OpCode].OpCodeClass].pOverloads;
+}
+
+void OP::RemoveFunction(Function *F) {
+  if (OP::IsDxilOpFunc(F)) {
+    OpCodeClass opClass = m_FunctionToOpClass[F];
+    for (unsigned i=0;i<kNumTypeOverloads;i++) {
+      if (F == m_OpCodeClassCache[(unsigned)opClass].pOverloads[i]) {
+        m_OpCodeClassCache[(unsigned)opClass].pOverloads[i] = nullptr;
+        m_FunctionToOpClass.erase(F);
+        break;
+      }
+    }
+  }
+}
+
 llvm::Type *OP::GetOverloadType(OpCode OpCode, llvm::Function *F) {
   DXASSERT(F, "not work on nullptr");
   Type *Ty = F->getReturnType();

+ 5 - 0
lib/HLSL/HLModule.cpp

@@ -212,6 +212,7 @@ void HLModule::RemoveFunction(llvm::Function *F) {
   m_HLFunctionPropsMap.erase(F);
   if (m_pTypeSystem.get()->GetFunctionAnnotation(F))
     m_pTypeSystem.get()->EraseFunctionAnnotation(F);
+  m_pOP->RemoveFunction(F);
 }
 
 template <typename TResource>
@@ -315,6 +316,10 @@ DxilTypeSystem *HLModule::ReleaseTypeSystem() {
   return m_pTypeSystem.release();
 }
 
+hlsl::OP *HLModule::ReleaseOP() {
+  return m_pOP.release();
+}
+
 RootSignatureHandle *HLModule::ReleaseRootSignature() {
   return m_RootSignature.release();
 }

+ 3 - 0
lib/IR/Function.cpp

@@ -28,6 +28,7 @@
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "dxc/HLSL/HLModule.h" // HLSL Change
+#include "dxc/HLSL/DxilModule.h" // HLSL Change
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/RWMutex.h"
 #include "llvm/Support/StringPool.h"
@@ -237,11 +238,13 @@ Type *Function::getReturnType() const {
 
 void Function::removeFromParent() {
   if (getParent()->HasHLModule()) getParent()->GetHLModule().RemoveFunction(this); // HLSL Change
+  if (getParent()->HasDxilModule()) getParent()->GetDxilModule().RemoveFunction(this); // HLSL Change
   getParent()->getFunctionList().remove(this);
 }
 
 void Function::eraseFromParent() {
   if (getParent()->HasHLModule()) getParent()->GetHLModule().RemoveFunction(this); // HLSL Change
+  if (getParent()->HasDxilModule()) getParent()->GetDxilModule().RemoveFunction(this); // HLSL Change
   getParent()->getFunctionList().erase(this);
 }
 

+ 3 - 0
lib/Transforms/IPO/PassManagerBuilder.cpp

@@ -262,6 +262,7 @@ void PassManagerBuilder::populateModulePassManager(
     if (!HLSLHighLevel) {
       MPM.add(createMultiDimArrayToOneDimArrayPass());// HLSL Change
       MPM.add(createDxilCondenseResourcesPass()); // HLSL Change
+      MPM.add(createDxilLegalizeSampleOffsetPass()); // HLSL Change
       MPM.add(createDxilEmitMetadataPass());      // HLSL Change
     }
     // HLSL Change Ends.
@@ -527,6 +528,8 @@ void PassManagerBuilder::populateModulePassManager(
   if (!HLSLHighLevel) {
     MPM.add(createMultiDimArrayToOneDimArrayPass());// HLSL Change
     MPM.add(createDxilCondenseResourcesPass());
+    if (DisableUnrollLoops)
+      MPM.add(createDxilLegalizeSampleOffsetPass()); // HLSL Change
     MPM.add(createDxilEmitMetadataPass());
   }
   // HLSL Change Ends.

+ 16 - 0
tools/clang/test/CodeGenHLSL/optForNoOpt.hlsl

@@ -0,0 +1,16 @@
+// RUN: %dxc -E main -T ps_6_0  -Zi -Od %s | FileCheck %s
+
+// CHECK: main
+
+SamplerState samp1 : register(s5);
+Texture2D<float4> text1 : register(t3);
+
+
+float4 main(float2 a : A) : SV_Target {
+  float4 r = 0;
+  for (uint x=0; x<2;x++)
+  for (uint y=0; y<2;y++) {
+    r += text1.Sample(samp1, a, int2(x+y,x-y));
+  }
+  return r;
+}

+ 16 - 0
tools/clang/test/CodeGenHLSL/optForNoOpt2.hlsl

@@ -0,0 +1,16 @@
+// RUN: %dxc -E main -T ps_6_0  -Zi -Od %s | FileCheck %s
+
+// CHECK: main
+
+SamplerState samp1 : register(s5);
+Texture2D<float4> text1 : register(t3);
+
+
+float4 main(float2 a : A) : SV_Target {
+  float4 r = 0;
+  int x = 3;
+  int y = 2;
+  r = text1.Sample(samp1, a, int2(x+y,x-y));
+
+  return r;
+}

+ 17 - 0
tools/clang/test/CodeGenHLSL/optForNoOpt3.hlsl

@@ -0,0 +1,17 @@
+// RUN: %dxc -E main -T ps_6_0  -Zi -Od %s | FileCheck %s
+
+// CHECK: Offsets for Sample* must be immediated value
+
+SamplerState samp1 : register(s5);
+Texture2D<float4> text1 : register(t3);
+
+
+int x;
+int y;
+
+float4 main(float2 a : A) : SV_Target {
+  float4 r = 0;
+  r = text1.Sample(samp1, a, int2(x+y,x-y));
+
+  return r;
+}

+ 17 - 0
tools/clang/test/CodeGenHLSL/optForNoOpt4.hlsl

@@ -0,0 +1,17 @@
+// RUN: %dxc -E main -T ps_6_0  -Zi -Od %s | FileCheck %s
+
+// CHECK: Offsets for Sample* must be immediated value
+
+SamplerState samp1 : register(s5);
+Texture2D<float4> text1 : register(t3);
+
+int i;
+
+float4 main(float2 a : A) : SV_Target {
+  float4 r = 0;
+  for (uint x=0; x<i;x++)
+  for (uint y=0; y<2;y++) {
+    r += text1.Sample(samp1, a, int2(x+y,x-y));
+  }
+  return r;
+}

+ 10 - 0
tools/clang/unittests/HLSL/CompilerTest.cpp

@@ -424,6 +424,8 @@ public:
   TEST_METHOD(CodeGenNeg2)
   TEST_METHOD(CodeGenNegabs1)
   TEST_METHOD(CodeGenNonUniform)
+  TEST_METHOD(CodeGenOptForNoOpt)
+  TEST_METHOD(CodeGenOptForNoOpt2)
   TEST_METHOD(CodeGenOptionGis)
   TEST_METHOD(CodeGenOptionWX)
   TEST_METHOD(CodeGenOutput1)
@@ -2349,6 +2351,14 @@ TEST_F(CompilerTest, CodeGenNonUniform) {
   CodeGenTestCheck(L"..\\CodeGenHLSL\\NonUniform.hlsl");
 }
 
+TEST_F(CompilerTest, CodeGenOptForNoOpt) {
+  CodeGenTestCheck(L"..\\CodeGenHLSL\\optForNoOpt.hlsl");
+}
+
+TEST_F(CompilerTest, CodeGenOptForNoOpt2) {
+  CodeGenTestCheck(L"..\\CodeGenHLSL\\optForNoOpt2.hlsl");
+}
+
 TEST_F(CompilerTest, CodeGenOptionGis) {
   CodeGenTestCheck(L"..\\CodeGenHLSL\\option_gis.hlsl");
 }

+ 10 - 0
tools/clang/unittests/HLSL/ValidationTest.cpp

@@ -135,6 +135,8 @@ public:
   TEST_METHOD(SimpleGs8)
   TEST_METHOD(SimpleGs9)
   TEST_METHOD(SimpleGs10)
+  TEST_METHOD(IllegalSampleOffset3)
+  TEST_METHOD(IllegalSampleOffset4)
   TEST_METHOD(NoFunctionParam)
   TEST_METHOD(I8Type)
   TEST_METHOD(EmptyStructInBuffer)
@@ -1445,6 +1447,14 @@ TEST_F(ValidationTest, SimpleGs10) {
   TestCheck(L"..\\CodeGenHLSL\\SimpleGS10.hlsl");
 }
 
+TEST_F(ValidationTest, IllegalSampleOffset3) {
+  TestCheck(L"..\\CodeGenHLSL\\optForNoOpt3.hlsl");
+}
+
+TEST_F(ValidationTest, IllegalSampleOffset4) {
+  TestCheck(L"..\\CodeGenHLSL\\optForNoOpt4.hlsl");
+}
+
 TEST_F(ValidationTest, NoFunctionParam) {
   RewriteAssemblyCheckMsg(L"..\\CodeGenHLSL\\abs2.hlsl", "ps_6_0",
     {"define void @main\\(\\)",               "void \\(\\)\\* @main, !([0-9]+)\\}(.*)!\\1 = !\\{!([0-9]+)\\}",  "void \\(\\)\\* @main"},

+ 1 - 0
utils/hct/hctdb.py

@@ -1213,6 +1213,7 @@ class db_dxil(object):
         add_pass('simplify-inst', 'SimplifyInst', 'Simplify Instructions', [])
         add_pass('mem2reg', 'PromotePass', 'Promote Memory to Register', [])
         add_pass('hlsl-dxil-precise', 'DxilPrecisePropagatePass', 'DXIL precise attribute propagate', [])
+        add_pass('dxil-legalize-sample-offset', 'DxilLegalizeSampleOffsetPass', 'DXIL legalize sample offset', [])
         add_pass('scalarizer', 'Scalarizer', 'Scalarize vector operations', [])
         add_pass('multi-dim-one-dim', 'MultiDimArrayToOneDimArray', 'Flatten multi-dim array into one-dim array', [])
         add_pass('hlsl-dxil-condense', 'DxilCondenseResources', 'DXIL Condense Resources', [])