Explorar el Código

Errors on non-immediate load/gather offsets (#3283)


Gather operations that take four separate offsets are meant to allow for
programmable, non-immediate values. This removes them from the immediate
and range validations by giving the immediate versions their own opcodes
For these intrinsics and loads, errors are generated when offsets are non-literal
or out of range. These largely use the slightly altered validation paths that the
sample intrinsics use.

Reword error when texture access offsets are not immediates to be
more all encompassing and grammatical.

Incidentally remove duplicate shaders being used for the validation
test from the old directory while identical copies in the validation
directory went unused. Redirected validation test to the appropriate
copies. This is consistent with the test shader re-org's stated intent

Sample operations have a maximum of 3 args, but gather has a maximum of
two since it always operates on 2D images. So gather operations only
pass in two offset args to the offset validation, which resulted in an
invalid access.

Rather than adding a specialized condition to evade this, just iterate
over the number of elements in the array. For sample it will be 3 and
for gather 2 and it will still check for expected undefined args
appropriately.

For the offset legalization pass, the opcode is used to determine the
start and end of the offset args

Only produce the loop unroll suggestion when within a loop

Base error line on call instruction instead of source of the offset

Sort by location in source when possible and remove duplicates

Adapt tests to verify and match these changes

Fixes #2590
Fixes #2713
Greg Roth hace 4 años
padre
commit
3bd5f9ccfa

+ 2 - 0
docs/DXIL.rst

@@ -2318,6 +2318,8 @@ ID  Name                                                  Description
 219 Unpack4x8                                             unpacks 4 8-bit signed or unsigned values into int32 or int16 vector
 220 Pack4x8                                               packs vector of 4 signed or unsigned values into a packed datatype, drops or clamps unused bits
 221 IsHelperLane                                          returns true on helper lanes in pixel shaders
+222 TextureGatherImm                                      same as TextureGather, except offsets are limited to immediate values between -8 and 7
+223 TextureGatherCmpImm                                   same as TextureGatherCmp, except offsets are limited to immediate values between -8 and 7
 === ===================================================== =======================================================================================================================================================================================================================
 
 

+ 12 - 4
include/dxc/DXIL/DxilConstants.h

@@ -593,6 +593,8 @@ namespace DXIL {
     // Resources - gather
     TextureGather = 73, // gathers the four texels that would be used in a bi-linear filtering operation
     TextureGatherCmp = 74, // same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp
+    TextureGatherCmpImm = 223, // same as TextureGatherCmp, except offsets are limited to immediate values between -8 and 7
+    TextureGatherImm = 222, // same as TextureGather, except offsets are limited to immediate values between -8 and 7
   
     // Resources - sample
     RenderTargetGetSampleCount = 77, // gets the number of samples for a render target
@@ -718,7 +720,7 @@ namespace DXIL {
     NumOpCodes_Dxil_1_5 = 216,
     NumOpCodes_Dxil_1_6 = 222,
   
-    NumOpCodes = 222 // exclusive last value of enumeration
+    NumOpCodes = 224 // exclusive last value of enumeration
   };
   // OPCODE-ENUM:END
 
@@ -900,6 +902,8 @@ namespace DXIL {
     // Resources - gather
     TextureGather,
     TextureGatherCmp,
+    TextureGatherCmpImm,
+    TextureGatherImm,
   
     // Resources - sample
     RenderTargetGetSampleCount,
@@ -983,7 +987,7 @@ namespace DXIL {
     NumOpClasses_Dxil_1_5 = 143,
     NumOpClasses_Dxil_1_6 = 149,
   
-    NumOpClasses = 149 // exclusive last value of enumeration
+    NumOpClasses = 151 // exclusive last value of enumeration
   };
   // OPCODECLASS-ENUM:END
 
@@ -1073,8 +1077,7 @@ namespace DXIL {
     const unsigned kTextureGatherCoord3OpIdx = 6;
     const unsigned kTextureGatherOffset0OpIdx = 7;
     const unsigned kTextureGatherOffset1OpIdx = 8;
-    const unsigned kTextureGatherOffset2OpIdx = 9;
-    const unsigned kTextureGatherChannelOpIdx = 10;
+    const unsigned kTextureGatherChannelOpIdx = 9;
     // TextureGatherCmp.
     const unsigned kTextureGatherCmpCmpValOpIdx = 11;
 
@@ -1090,6 +1093,11 @@ namespace DXIL {
     const unsigned kTextureSampleOffset2OpIdx = 9;
     const unsigned kTextureSampleClampOpIdx = 10;
 
+    // TextureLoad.
+    const unsigned kTextureLoadOffset0OpIdx = 6;
+    const unsigned kTextureLoadOffset1OpIdx = 8;
+    const unsigned kTextureLoadOffset2OpIdx = 9;
+
     // AtomicBinOp.
     const unsigned kAtomicBinOpHandleOpIdx = 1;
     const unsigned kAtomicBinOpCoord0OpIdx = 3;

+ 101 - 0
include/dxc/DXIL/DxilInstructions.h

@@ -7164,5 +7164,106 @@ struct DxilInst_IsHelperLane {
   // Metadata
   bool requiresUniformInputs() const { return false; }
 };
+
+/// This instruction same as TextureGather, except offsets are limited to immediate values between -8 and 7
+struct DxilInst_TextureGatherImm {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_TextureGatherImm(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::TextureGatherImm);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (10 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_srv = 1,
+    arg_sampler = 2,
+    arg_coord0 = 3,
+    arg_coord1 = 4,
+    arg_coord2 = 5,
+    arg_coord3 = 6,
+    arg_offset0 = 7,
+    arg_offset1 = 8,
+    arg_channel = 9,
+  };
+  // Accessors
+  llvm::Value *get_srv() const { return Instr->getOperand(1); }
+  void set_srv(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_sampler() const { return Instr->getOperand(2); }
+  void set_sampler(llvm::Value *val) { Instr->setOperand(2, val); }
+  llvm::Value *get_coord0() const { return Instr->getOperand(3); }
+  void set_coord0(llvm::Value *val) { Instr->setOperand(3, val); }
+  llvm::Value *get_coord1() const { return Instr->getOperand(4); }
+  void set_coord1(llvm::Value *val) { Instr->setOperand(4, val); }
+  llvm::Value *get_coord2() const { return Instr->getOperand(5); }
+  void set_coord2(llvm::Value *val) { Instr->setOperand(5, val); }
+  llvm::Value *get_coord3() const { return Instr->getOperand(6); }
+  void set_coord3(llvm::Value *val) { Instr->setOperand(6, val); }
+  llvm::Value *get_offset0() const { return Instr->getOperand(7); }
+  void set_offset0(llvm::Value *val) { Instr->setOperand(7, val); }
+  llvm::Value *get_offset1() const { return Instr->getOperand(8); }
+  void set_offset1(llvm::Value *val) { Instr->setOperand(8, val); }
+  llvm::Value *get_channel() const { return Instr->getOperand(9); }
+  void set_channel(llvm::Value *val) { Instr->setOperand(9, val); }
+};
+
+/// This instruction same as TextureGatherCmp, except offsets are limited to immediate values between -8 and 7
+struct DxilInst_TextureGatherCmpImm {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_TextureGatherCmpImm(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::TextureGatherCmpImm);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (11 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Metadata
+  bool requiresUniformInputs() const { return false; }
+  // Operand indexes
+  enum OperandIdx {
+    arg_srv = 1,
+    arg_sampler = 2,
+    arg_coord0 = 3,
+    arg_coord1 = 4,
+    arg_coord2 = 5,
+    arg_coord3 = 6,
+    arg_offset0 = 7,
+    arg_offset1 = 8,
+    arg_channel = 9,
+    arg_compareVale = 10,
+  };
+  // Accessors
+  llvm::Value *get_srv() const { return Instr->getOperand(1); }
+  void set_srv(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_sampler() const { return Instr->getOperand(2); }
+  void set_sampler(llvm::Value *val) { Instr->setOperand(2, val); }
+  llvm::Value *get_coord0() const { return Instr->getOperand(3); }
+  void set_coord0(llvm::Value *val) { Instr->setOperand(3, val); }
+  llvm::Value *get_coord1() const { return Instr->getOperand(4); }
+  void set_coord1(llvm::Value *val) { Instr->setOperand(4, val); }
+  llvm::Value *get_coord2() const { return Instr->getOperand(5); }
+  void set_coord2(llvm::Value *val) { Instr->setOperand(5, val); }
+  llvm::Value *get_coord3() const { return Instr->getOperand(6); }
+  void set_coord3(llvm::Value *val) { Instr->setOperand(6, val); }
+  llvm::Value *get_offset0() const { return Instr->getOperand(7); }
+  void set_offset0(llvm::Value *val) { Instr->setOperand(7, val); }
+  llvm::Value *get_offset1() const { return Instr->getOperand(8); }
+  void set_offset1(llvm::Value *val) { Instr->setOperand(8, val); }
+  llvm::Value *get_channel() const { return Instr->getOperand(9); }
+  void set_channel(llvm::Value *val) { Instr->setOperand(9, val); }
+  llvm::Value *get_compareVale() const { return Instr->getOperand(10); }
+  void set_compareVale(llvm::Value *val) { Instr->setOperand(10, val); }
+};
 // INSTR-HELPER:END
 } // namespace hlsl

+ 6 - 4
lib/DXIL/DxilCounters.cpp

@@ -173,8 +173,9 @@ bool CountDxilOp_tex_bias(unsigned op) {
   return op == 61;
 }
 bool CountDxilOp_tex_cmp(unsigned op) {
-  // Instructions: SampleCmp=64, SampleCmpLevelZero=65, TextureGatherCmp=74
-  return (64 <= op && op <= 65) || op == 74;
+  // Instructions: SampleCmp=64, SampleCmpLevelZero=65, TextureGatherCmp=74,
+  // TextureGatherCmpImm=223
+  return (64 <= op && op <= 65) || op == 74 || op == 223;
 }
 bool CountDxilOp_tex_grad(unsigned op) {
   // Instructions: SampleGrad=63
@@ -185,8 +186,9 @@ bool CountDxilOp_tex_load(unsigned op) {
   return op == 66 || op == 68 || op == 139;
 }
 bool CountDxilOp_tex_norm(unsigned op) {
-  // Instructions: Sample=60, SampleLevel=62, TextureGather=73
-  return op == 60 || op == 62 || op == 73;
+  // Instructions: Sample=60, SampleLevel=62, TextureGather=73,
+  // TextureGatherImm=222
+  return op == 60 || op == 62 || op == 73 || op == 222;
 }
 bool CountDxilOp_tex_store(unsigned op) {
   // Instructions: TextureStore=67, BufferStore=69, RawBufferStore=140,

+ 15 - 0
lib/DXIL/DxilOperations.cpp

@@ -404,6 +404,10 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
 
   // Helper Lanes                                                                                                            void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,   obj ,  function attribute
   {  OC::IsHelperLane,            "IsHelperLane",             OCC::IsHelperLane,             "isHelperLane",              { false, false, false, false,  true, false, false, false, false, false, false}, Attribute::ReadOnly, },
+
+  // Resources - gather                                                                                                      void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,   obj ,  function attribute
+  {  OC::TextureGatherImm,        "TextureGatherImm",         OCC::TextureGatherImm,         "textureGatherImm",          { false,  true,  true, false, false, false,  true,  true, false, false, false}, Attribute::ReadOnly, },
+  {  OC::TextureGatherCmpImm,     "TextureGatherCmpImm",      OCC::TextureGatherCmpImm,      "textureGatherCmpImm",       { false,  true,  true, false, false, false,  true,  true, false, false, false}, Attribute::ReadOnly, },
 };
 // OPCODE-OLOADS:END
 
@@ -847,6 +851,11 @@ void OP::GetMinShaderModelAndMask(OpCode C, bool bWithTranslation,
     major = 6;  minor = 6;
     return;
   }
+  // Instructions: TextureGatherImm=222, TextureGatherCmpImm=223
+  if ((222 <= op && op <= 223)) {
+    major = 6;  minor = 15;
+    return;
+  }
   // OPCODE-SMMASK:END
 }
 
@@ -1433,6 +1442,10 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
 
     // Helper Lanes
   case OpCode::IsHelperLane:           A(pI1);      A(pI32); break;
+
+    // Resources - gather
+  case OpCode::TextureGatherImm:       RRT(pETy);   A(pI32); A(pRes); A(pRes); A(pF32); A(pF32); A(pF32); A(pF32); A(pI32); A(pI32); A(pI32); break;
+  case OpCode::TextureGatherCmpImm:    RRT(pETy);   A(pI32); A(pRes); A(pRes); A(pF32); A(pF32); A(pF32); A(pF32); A(pI32); A(pI32); A(pI32); A(pF32); break;
   // OPCODE-OLOAD-FUNCS:END
   default: DXASSERT(false, "otherwise unhandled case"); break;
   }
@@ -1705,6 +1718,8 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) {
   case OpCode::TextureGatherCmp:
   case OpCode::RawBufferLoad:
   case OpCode::Unpack4x8:
+  case OpCode::TextureGatherImm:
+  case OpCode::TextureGatherCmpImm:
   {
     StructType *ST = cast<StructType>(Ty);
     return ST->getElementType(0);

+ 128 - 46
lib/HLSL/DxilLegalizeSampleOffsetPass.cpp

@@ -17,6 +17,7 @@
 
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
@@ -37,9 +38,19 @@ using namespace hlsl;
 // Legalize Sample offset.
 
 namespace {
+
+// record of the offset value and the call that uses it
+// Used mainly for error detection and reporting
+struct Offset {
+  Value *offset;
+  CallInst *call;
+};
+
 // When optimizations are disabled, try to legalize sample offset.
 class DxilLegalizeSampleOffsetPass : public FunctionPass {
 
+  LoopInfo LI;
+
 public:
   static char ID; // Pass identification, replacement for typeid
   explicit DxilLegalizeSampleOffsetPass() : FunctionPass(ID) {}
@@ -57,7 +68,7 @@ public:
     DxilModule &DM = F.getParent()->GetOrCreateDxilModule();
     hlsl::OP *hlslOP = DM.GetOP();
 
-    std::vector<Instruction *> illegalOffsets;
+    std::vector<Offset> illegalOffsets;
 
     CollectIllegalOffsets(illegalOffsets, F, hlslOP);
 
@@ -68,94 +79,162 @@ public:
     TryUnrollLoop(illegalOffsets, F);
 
     // Collect offset again after mem2reg.
-    std::vector<Instruction *> ssaIllegalOffsets;
+    std::vector<Offset> ssaIllegalOffsets;
     CollectIllegalOffsets(ssaIllegalOffsets, F, hlslOP);
 
     // Run simple optimization to legalize offsets.
     LegalizeOffsets(ssaIllegalOffsets);
 
-    FinalCheck(illegalOffsets, F, hlslOP);
+    FinalCheck(F, hlslOP);
 
     return true;
   }
 
 private:
-  void TryUnrollLoop(std::vector<Instruction *> &illegalOffsets, Function &F);
-  void CollectIllegalOffsets(std::vector<Instruction *> &illegalOffsets,
+  void TryUnrollLoop(std::vector<Offset> &illegalOffsets, Function &F);
+  void CollectIllegalOffsets(std::vector<Offset> &illegalOffsets,
                              Function &F, hlsl::OP *hlslOP);
-  void CollectIllegalOffsets(std::vector<Instruction *> &illegalOffsets,
+  void CollectIllegalOffsets(std::vector<Offset> &illegalOffsets,
                              Function &F, DXIL::OpCode opcode,
                              hlsl::OP *hlslOP);
-  void LegalizeOffsets(const std::vector<Instruction *> &illegalOffsets);
-  void FinalCheck(std::vector<Instruction *> &illegalOffsets, Function &F,
-                  hlsl::OP *hlslOP);
+  void LegalizeOffsets(const std::vector<Offset> &illegalOffsets);
+  void FinalCheck(Function &F, hlsl::OP *hlslOP);
 };
 
 char DxilLegalizeSampleOffsetPass::ID = 0;
 
-bool HasIllegalOffsetInLoop(std::vector<Instruction *> &illegalOffsets,
+bool HasIllegalOffsetInLoop(std::vector<Offset> &illegalOffsets, LoopInfo &LI,
                             Function &F) {
   DominatorTreeAnalysis DTA;
   DominatorTree DT = DTA.run(F);
-  LoopInfo LI;
   LI.Analyze(DT);
 
   bool findOffset = false;
 
-  for (Instruction *I : illegalOffsets) {
-    BasicBlock *BB = I->getParent();
-    if (LI.getLoopFor(BB)) {
-      findOffset = true;
-      break;
+  for (auto it : illegalOffsets) {
+    if (const Instruction *I = dyn_cast<Instruction>(it.offset)) {
+      const BasicBlock *BB = I->getParent();
+      // TODO: determine whether values are actually loop dependent, not just in a loop
+      if (LI.getLoopFor(BB)) {
+        findOffset = true;
+        break;
+      }
     }
   }
   return findOffset;
 }
 
-void CollectIllegalOffset(CallInst *CI,
-                          std::vector<Instruction *> &illegalOffsets) {
+void GetOffsetRange(DXIL::OpCode opcode, unsigned &offsetStart, unsigned &offsetEnd)
+{
+  switch(opcode) {
+  case DXIL::OpCode::TextureLoad:
+    offsetStart = DXIL::OperandIndex::kTextureLoadOffset0OpIdx;
+    offsetEnd = DXIL::OperandIndex::kTextureLoadOffset2OpIdx;
+    break;
+  case DXIL::OpCode::TextureGather:
+  case DXIL::OpCode::TextureGatherCmp:
+  case DXIL::OpCode::TextureGatherImm:
+  case DXIL::OpCode::TextureGatherCmpImm:
+    offsetStart = DXIL::OperandIndex::kTextureGatherOffset0OpIdx;
+    offsetEnd = DXIL::OperandIndex::kTextureGatherOffset1OpIdx;
+    break;
+  default:
+    // everything else are sample variants
+    offsetStart = DXIL::OperandIndex::kTextureSampleOffset0OpIdx;
+    offsetEnd = DXIL::OperandIndex::kTextureSampleOffset2OpIdx;
+    break;
+  }
+}
+
+void CollectIllegalOffset(CallInst *CI, DXIL::OpCode opcode,
+                          std::vector<Offset> &illegalOffsets) {
+
+  unsigned offsetStart = 0, offsetEnd = 0;
+
+  GetOffsetRange(opcode, offsetStart, offsetEnd);
+
   Value *offset0 =
-      CI->getArgOperand(DXIL::OperandIndex::kTextureSampleOffset0OpIdx);
-  // No offset.
+      CI->getArgOperand(offsetStart);
+  // No offsets
   if (isa<UndefValue>(offset0))
     return;
 
-  for (unsigned i = DXIL::OperandIndex::kTextureSampleOffset0OpIdx;
-       i <= DXIL::OperandIndex::kTextureSampleOffset2OpIdx; i++) {
+  for (unsigned i = offsetStart; i <= offsetEnd; i++) {
     Value *offset = CI->getArgOperand(i);
-    if (Instruction *I = dyn_cast<Instruction>(offset))
-      illegalOffsets.emplace_back(I);
+    if (Instruction *I = dyn_cast<Instruction>(offset)) {
+      Offset offset = {I, CI};
+      illegalOffsets.emplace_back(offset);
+    }
+    else if(ConstantInt *cOffset = dyn_cast<ConstantInt>(offset)) {
+      int64_t val = cOffset->getValue().getSExtValue();
+      if (val > 7 || val < -8) {
+        Offset offset = {cOffset, CI};
+        illegalOffsets.emplace_back(offset);
+      }
+    }
   }
 }
 }
 
-void DxilLegalizeSampleOffsetPass::FinalCheck(
-    std::vector<Instruction *> &illegalOffsets, Function &F, hlsl::OP *hlslOP) {
+// Return true if the call instruction in pair a and b are the same
+bool InstEq(const Offset &a, const Offset &b) {
+  return a.call == b.call;
+}
+
+// Return true if the call instruction in pair a is before that in pair b
+bool InstLT(const Offset &a, const Offset &b) {
+  DebugLoc aLoc = a.call->getDebugLoc();
+  DebugLoc bLoc = b.call->getDebugLoc();
+
+  if (aLoc && bLoc) {
+    DIScope *aScope = cast<DIScope>(aLoc->getRawScope());
+    DIScope *bScope = cast<DIScope>(bLoc->getRawScope());
+    std::string aFile = aScope->getFilename();
+    std::string bFile = bScope->getFilename();
+    return aFile < bFile || (aFile == bFile && aLoc.getLine() < bLoc.getLine());
+  }
+  // No line numbers, just compare pointers so that matching instructions will be adjacent
+  return a.call < b.call;
+}
+
+void DxilLegalizeSampleOffsetPass::FinalCheck(Function &F, hlsl::OP *hlslOP) {
   // Collect offset to make sure no illegal offsets.
-  std::vector<Instruction *> finalIllegalOffsets;
+  std::vector<Offset> finalIllegalOffsets;
   CollectIllegalOffsets(finalIllegalOffsets, F, hlslOP);
 
   if (!finalIllegalOffsets.empty()) {
-    const StringRef kIllegalOffsetError =
-        "Offsets for Sample* must be immediated value. "
-        "Consider unrolling the loop manually and use -O3, "
-        "it may help in some cases.\n";
-    std::string errorMsg;
-    raw_string_ostream errorStr(errorMsg);
-    for (Instruction *offset : finalIllegalOffsets)
-      dxilutil::EmitErrorOnInstruction(offset, kIllegalOffsetError);
+    std::string errorMsg = "Offsets to texture access operations must be immediate values. ";
+
+    auto offsetBegin = finalIllegalOffsets.begin();
+    auto offsetEnd = finalIllegalOffsets.end();
+
+    std::sort(offsetBegin, offsetEnd, InstLT);
+    offsetEnd = std::unique(offsetBegin, offsetEnd, InstEq);
+
+    for (auto it = offsetBegin; it != offsetEnd; it++) {
+      CallInst *CI = it->call;
+      if (Instruction *offset = dyn_cast<Instruction>(it->offset)) {
+        if (LI.getLoopFor(offset->getParent()))
+          dxilutil::EmitErrorOnInstruction(CI, errorMsg + "Unrolling the loop containing the offset value"
+                                           " manually and using -O3 may help in some cases.\n");
+        else
+          dxilutil::EmitErrorOnInstruction(CI, errorMsg);
+      } else {
+        dxilutil::EmitErrorOnInstruction(CI, "Offsets to texture access operations must be between -8 and 7. ");
+      }
+    }
   }
 }
 
 void DxilLegalizeSampleOffsetPass::TryUnrollLoop(
-    std::vector<Instruction *> &illegalOffsets, Function &F) {
+    std::vector<Offset> &illegalOffsets, Function &F) {
   legacy::FunctionPassManager PM(F.getParent());
   // Scalarize aggregates as mem2reg only applies on scalars.
   PM.add(createSROAPass());
   // Always need mem2reg for simplify illegal offsets.
   PM.add(createPromoteMemoryToRegisterPass());
 
-  bool UnrollLoop = HasIllegalOffsetInLoop(illegalOffsets, F);
+  bool UnrollLoop = HasIllegalOffsetInLoop(illegalOffsets, LI, F);
   if (UnrollLoop) {
     PM.add(createCFGSimplificationPass());
     PM.add(createLCSSAPass());
@@ -172,7 +251,7 @@ void DxilLegalizeSampleOffsetPass::TryUnrollLoop(
 }
 
 void DxilLegalizeSampleOffsetPass::CollectIllegalOffsets(
-    std::vector<Instruction *> &illegalOffsets, Function &CurF,
+    std::vector<Offset> &illegalOffsets, Function &CurF,
     hlsl::OP *hlslOP) {
   CollectIllegalOffsets(illegalOffsets, CurF, DXIL::OpCode::Sample, hlslOP);
   CollectIllegalOffsets(illegalOffsets, CurF, DXIL::OpCode::SampleBias, hlslOP);
@@ -182,10 +261,13 @@ void DxilLegalizeSampleOffsetPass::CollectIllegalOffsets(
   CollectIllegalOffsets(illegalOffsets, CurF, DXIL::OpCode::SampleGrad, hlslOP);
   CollectIllegalOffsets(illegalOffsets, CurF, DXIL::OpCode::SampleLevel,
                         hlslOP);
+  CollectIllegalOffsets(illegalOffsets, CurF, DXIL::OpCode::TextureGatherImm, hlslOP);
+  CollectIllegalOffsets(illegalOffsets, CurF, DXIL::OpCode::TextureGatherCmpImm, hlslOP);
+  CollectIllegalOffsets(illegalOffsets, CurF, DXIL::OpCode::TextureLoad, hlslOP);
 }
 
 void DxilLegalizeSampleOffsetPass::CollectIllegalOffsets(
-    std::vector<Instruction *> &illegalOffsets, Function &CurF,
+    std::vector<Offset> &illegalOffsets, Function &CurF,
     DXIL::OpCode opcode, hlsl::OP *hlslOP) {
   auto &intrFuncList = hlslOP->GetOpFuncList(opcode);
   for (auto it : intrFuncList) {
@@ -198,19 +280,19 @@ void DxilLegalizeSampleOffsetPass::CollectIllegalOffsets(
       if (CI->getParent()->getParent() != &CurF)
         continue;
 
-      CollectIllegalOffset(CI, illegalOffsets);
+      CollectIllegalOffset(CI, opcode, illegalOffsets);
     }
   }
 }
 
 void DxilLegalizeSampleOffsetPass::LegalizeOffsets(
-    const std::vector<Instruction *> &illegalOffsets) {
-  if (illegalOffsets.size()) {
+    const std::vector<Offset> &illegalOffsets) {
+  if (!illegalOffsets.empty()) {
     DxilValueCache *DVC = &getAnalysis<DxilValueCache>();
-    for (Instruction *I : illegalOffsets) {
-      if (Value *V = DVC->GetValue(I)) {
-        I->replaceAllUsesWith(V);
-      }
+    for (auto it : illegalOffsets) {
+      if (Instruction *I = dyn_cast<Instruction>(it.offset))
+        if (Value *V = DVC->GetValue(I))
+          I->replaceAllUsesWith(V);
     }
   }
 }

+ 30 - 0
lib/HLSL/DxilPreparePasses.cpp

@@ -381,6 +381,29 @@ public:
     }
   }
 
+  // Replace all fromOpcode call instructions with toOpcode equivalents
+  void ReplaceIntrinsics(Module &M, hlsl::OP *hlslOp, DXIL::OpCode fromOpcode, DXIL::OpCode toOpcode) {
+    for (auto it : hlslOp->GetOpFuncList(fromOpcode)) {
+      Function *F = it.second;
+      if (!F)
+        continue;
+      Type *Ty = OP::GetOverloadType(fromOpcode, F);
+      for (auto uit = F->user_begin(); uit != F->user_end(); uit++) {
+        CallInst *CI = cast<CallInst>(*uit);
+        IRBuilder<> Builder(CI);
+        std::vector<Value*> args;
+        args.emplace_back(hlslOp->GetU32Const((unsigned)toOpcode));
+        for (unsigned i = 1; i < CI->getNumArgOperands(); i++)
+          args.emplace_back(CI->getOperand(i));
+
+        Function *newF = hlslOp->GetOpFunc(toOpcode, Ty);
+        CallInst *NewCI = Builder.CreateCall(newF, args);
+        CI->replaceAllUsesWith(NewCI);
+        CI->eraseFromParent();
+      }
+    }
+  }
+
   ///////////////////////////////////////////////////
   // IsHelperLane() lowering for SM < 6.6
 
@@ -732,6 +755,13 @@ public:
       if (DXIL::CompareVersions(DxilMajor, DxilMinor, 1, 6) < 0) {
         patchDxil_1_6(M, hlslOP, ValMajor, ValMinor);
       }
+
+      // Patch all existing dxil versions for some future one
+      // that differentiates immediate and programmable gathers
+      ReplaceIntrinsics(M, hlslOP, OP::OpCode::TextureGatherImm, OP::OpCode::TextureGather);
+      ReplaceIntrinsics(M, hlslOP, OP::OpCode::TextureGatherCmpImm, OP::OpCode::TextureGatherCmp);
+
+
       // Remove store undef output.
       RemoveStoreUndefOutput(M, hlslOP);
 

+ 16 - 4
lib/HLSL/DxilValidation.cpp

@@ -975,6 +975,9 @@ static bool ValidateOpcodeInProfile(DXIL::OpCode opcode,
   // CreateHandleFromHeap=218, Unpack4x8=219, Pack4x8=220, IsHelperLane=221
   if ((216 <= op && op <= 221))
     return (major > 6 || (major == 6 && minor >= 6));
+  // Instructions: TextureGatherImm=222, TextureGatherCmpImm=223
+  if ((222 <= op && op <= 223))
+    return (major > 6 || (major == 6 && minor >= 15));
   return true;
   // VALOPCODESM-TEXT:END
 }
@@ -1234,7 +1237,6 @@ static void ValidateCalcLODResourceDimensionCoord(CallInst *CI, DXIL::ResourceKi
 static void ValidateResourceOffset(CallInst *CI, DXIL::ResourceKind resKind,
                                    ArrayRef<Value *> offsets,
                                    ValidationContext &ValCtx) {
-  const unsigned kMaxNumOffsets = 3;
   unsigned numOffsets = DxilResource::GetNumOffsets(resKind);
   bool hasOffset = !isa<UndefValue>(offsets[0]);
 
@@ -1253,7 +1255,7 @@ static void ValidateResourceOffset(CallInst *CI, DXIL::ResourceKind resKind,
     validateOffset(offsets[0]);
   }
 
-  for (unsigned i = 1; i < kMaxNumOffsets; i++) {
+  for (unsigned i = 1; i < offsets.size(); i++) {
     if (i < numOffsets) {
       if (hasOffset) {
         if (isa<UndefValue>(offsets[i]))
@@ -1393,8 +1395,11 @@ static void ValidateGather(CallInst *CI, Value *srvHandle, Value *samplerHandle,
   default:
     // Invalid resource type for gather.
     ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceKindForGather);
-    break;
+    return;
   }
+  if (OP::IsDxilOpFuncCallInst(CI, DXIL::OpCode::TextureGatherImm) ||
+      OP::IsDxilOpFuncCallInst(CI, DXIL::OpCode::TextureGatherCmpImm))
+    ValidateResourceOffset(CI, resKind, offsets, ValCtx);
 }
 
 static unsigned StoreValueToMask(ArrayRef<Value *> vals) {
@@ -1943,6 +1948,7 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
 
     ValidateDerivativeOp(CI, ValCtx);
   } break;
+  case DXIL::OpCode::TextureGatherImm:
   case DXIL::OpCode::TextureGather: {
     DxilInst_TextureGather gather(CI);
     ValidateGather(CI, gather.get_srv(), gather.get_sampler(),
@@ -1951,6 +1957,7 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
                    {gather.get_offset0(), gather.get_offset1()},
                    /*IsSampleC*/ false, ValCtx);
   } break;
+  case DXIL::OpCode::TextureGatherCmpImm:
   case DXIL::OpCode::TextureGatherCmp: {
     DxilInst_TextureGatherCmp gather(CI);
     ValidateGather(CI, gather.get_srv(), gather.get_sampler(),
@@ -2202,8 +2209,11 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
     default:
       ValCtx.EmitInstrError(CI,
                             ValidationRule::InstrResourceKindForTextureLoad);
-      break;
+      return;
     }
+
+    ValidateResourceOffset(CI, resKind, {texLd.get_offset0(), texLd.get_offset1(),
+                                         texLd.get_offset2()}, ValCtx);
   } break;
   case DXIL::OpCode::CBufferLoad: {
     DxilInst_CBufferLoad CBLoad(CI);
@@ -2381,6 +2391,8 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
   case DXIL::OpCode::CalculateLOD:
   case DXIL::OpCode::TextureGather:
   case DXIL::OpCode::TextureGatherCmp:
+  case DXIL::OpCode::TextureGatherImm:
+  case DXIL::OpCode::TextureGatherCmpImm:
   case DXIL::OpCode::Sample:
   case DXIL::OpCode::SampleCmp:
   case DXIL::OpCode::SampleCmpLevelZero:

+ 14 - 9
lib/HLSL/HLOperationLower.cpp

@@ -3167,9 +3167,12 @@ GatherHelper::GatherHelper(
       if (ch != GatherChannel::GatherAll)
         TranslateSampleOffset(CI, HLOperandIndex::kGatherSampleOffsetArgIndex,
                               offsetSize);
-      statusIdx =
-          hasSampleOffsets ? HLOperandIndex::kGatherStatusWithSampleOffsetArgIndex
-                           : HLOperandIndex::kGatherStatusArgIndex;
+      if (hasSampleOffsets) {
+        statusIdx = HLOperandIndex::kGatherStatusWithSampleOffsetArgIndex;
+      } else {
+        opcode = OP::OpCode::TextureGatherImm;
+        statusIdx = HLOperandIndex::kGatherStatusArgIndex;
+      }
     }
     SetStatus(CI, statusIdx);
   } break;
@@ -3185,10 +3188,12 @@ GatherHelper::GatherHelper(
       if (ch != GatherChannel::GatherAll)
         TranslateSampleOffset(CI, HLOperandIndex::kGatherCmpSampleOffsetArgIndex,
                               offsetSize);
-      statusIdx =
-          hasSampleOffsets
-              ? HLOperandIndex::kGatherCmpStatusWithSampleOffsetArgIndex
-              : HLOperandIndex::kGatherCmpStatusArgIndex;
+      if (hasSampleOffsets) {
+        statusIdx = HLOperandIndex::kGatherCmpStatusWithSampleOffsetArgIndex;
+      } else {
+        opcode = OP::OpCode::TextureGatherCmpImm;
+        statusIdx = HLOperandIndex::kGatherCmpStatusArgIndex;
+      }
     }
     SetStatus(CI, statusIdx);
   } break;
@@ -3283,9 +3288,9 @@ Value *TranslateGather(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
   }
   Type *Ty = CI->getType();
 
-  Function *F = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
+  Function *F = hlslOP->GetOpFunc(gatherHelper.opcode, Ty->getScalarType());
 
-  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
+  Constant *opArg = hlslOP->GetU32Const((unsigned)gatherHelper.opcode);
   Value *channelArg = hlslOP->GetU32Const(gatherHelper.channel);
 
   switch (opcode) {

+ 0 - 17
tools/clang/test/CodeGenHLSL/optForNoOpt3.hlsl

@@ -1,17 +0,0 @@
-// RUN: %dxc -E main -T ps_6_0 -Od %s | FileCheck %s
-
-// CHECK: Offsets for Sample* must be immediated value
-
-SamplerState samp1 : register(s5);
-Texture2D<float4> text1 : register(t3);
-
-
-int x;
-int y;
-
-float4 main(float2 a : A) : SV_Target {
-  float4 r = 0;
-  r = text1.Sample(samp1, a, int2(x+y,x-y));
-
-  return r;
-}

+ 0 - 17
tools/clang/test/CodeGenHLSL/optForNoOpt4.hlsl

@@ -1,17 +0,0 @@
-// RUN: %dxc -E main -T ps_6_0 -Od %s | FileCheck %s
-
-// CHECK: Offsets for Sample* must be immediated value
-
-SamplerState samp1 : register(s5);
-Texture2D<float4> text1 : register(t3);
-
-int i;
-
-float4 main(float2 a : A) : SV_Target {
-  float4 r = 0;
-  for (uint x=0; x<i;x++)
-  for (uint y=0; y<2;y++) {
-    r += text1.Sample(samp1, a, int2(x+y,x-y));
-  }
-  return r;
-}

+ 1 - 1
tools/clang/test/DXILValidation/optForNoOpt3.hlsl

@@ -1,6 +1,6 @@
 // RUN: %dxc -E main -T ps_6_0 -Od %s | FileCheck %s
 
-// CHECK: Offsets for Sample* must be immediated value
+// CHECK: Offsets to texture access operations must be immediate values
 
 SamplerState samp1 : register(s5);
 Texture2D<float4> text1 : register(t3);

+ 1 - 1
tools/clang/test/DXILValidation/optForNoOpt4.hlsl

@@ -1,6 +1,6 @@
 // RUN: %dxc -E main -T ps_6_0 -Od %s | FileCheck %s
 
-// CHECK: Offsets for Sample* must be immediated value
+// CHECK: Offsets to texture access operations must be immediate values
 
 SamplerState samp1 : register(s5);
 Texture2D<float4> text1 : register(t3);

+ 1 - 1
tools/clang/test/HLSLFileCheck/dxil/debug/value_cache/phi.hlsl

@@ -1,6 +1,6 @@
 // RUN: %dxc -E main -T ps_6_0 %s -Od | FileCheck %s
 
-// CHECK: Offsets for Sample* must be immediated value
+// CHECK: Offsets to texture access operations must be immediate values
 
 // Regression test that DxilValueCache (DVC) isn't so over-zealous.
 

+ 123 - 0
tools/clang/test/HLSLFileCheck/hlsl/diagnostics/errors/offsets.hlsl

@@ -0,0 +1,123 @@
+// RUN: %dxc -E Range -T ps_6_0 %s | FileCheck %s -check-prefix=CHK_RANGE
+
+// RUN: %dxc -E VarOffset -T ps_6_0 -DOFFSETS=argOffsets %s | FileCheck %s -check-prefix=CHK_VAROFF
+// RUN: %dxc -E VarOffset -T ps_6_0 -DOFFSETS=cbufOffsets %s | FileCheck %s -check-prefix=CHK_VAROFF
+// RUN: %dxc -E VarOffset -T ps_6_0 -DOFFSETS=constOffsets %s | FileCheck %s -check-prefix=CHK_VAROFF
+// RUN: %dxc -E VarOffset -T ps_6_0 -DOFFSETS=validOffsets %s | FileCheck %s -check-prefix=CHK_VALID
+
+// RUN: %dxc -E ValidOffset -T ps_6_0 -DOFFSETS=argOffsets %s | FileCheck %s -check-prefix=CHK_VALID
+// RUN: %dxc -E ValidOffset -T ps_6_0 -DOFFSETS=cbufOffsets %s | FileCheck %s -check-prefix=CHK_VALID
+// RUN: %dxc -E ValidOffset -T ps_6_0 -DOFFSETS=constOffsets %s | FileCheck %s -check-prefix=CHK_VALID
+// RUN: %dxc -E ValidOffset -T ps_6_0 -DOFFSETS=validOffsets %s | FileCheck %s -check-prefix=CHK_VALID
+
+// CHK_RANGE: error: Offsets to texture access operations must be between -8 and 7.
+// CHK_RANGE: error: Offsets to texture access operations must be between -8 and 7.
+// CHK_RANGE: error: Offsets to texture access operations must be between -8 and 7.
+// CHK_RANGE: error: Offsets to texture access operations must be between -8 and 7.
+// CHK_RANGE: error: Offsets to texture access operations must be between -8 and 7.
+// CHK_RANGE: error: Offsets to texture access operations must be between -8 and 7.
+// CHK_RANGE: error: Offsets to texture access operations must be between -8 and 7.
+// CHK_RANGE: error: Offsets to texture access operations must be between -8 and 7.
+// CHK_RANGE: error: Offsets to texture access operations must be between -8 and 7.
+// CHK_RANGE: error: Offsets to texture access operations must be between -8 and 7.
+
+// CHK_VAROFF: Offsets to texture access operations must be immediate values
+// CHK_VAROFF: Offsets to texture access operations must be immediate values
+// CHK_VAROFF: Offsets to texture access operations must be immediate values
+// CHK_VAROFF: Offsets to texture access operations must be immediate values
+// CHK_VAROFF: Offsets to texture access operations must be immediate values
+// CHK_VAROFF: Offsets to texture access operations must be immediate values
+// CHK_VAROFF: Offsets to texture access operations must be immediate values
+// CHK_VAROFF: Offsets to texture access operations must be immediate values
+// CHK_VAROFF: Offsets to texture access operations must be immediate values
+// CHK_VAROFF: Offsets to texture access operations must be immediate values
+
+
+// Just make sure it compiles without errors
+// CHK_VALID: define void
+// CHK_VALID: ret void
+
+Texture1D t1;
+Texture2D t2;
+Texture3D t3;
+SamplerState s;
+SamplerComparisonState sc;
+
+float4 Range(float3 str : STR) : SV_TARGET
+{
+    float4 res = 0.0;
+    res += t1.Sample(s, str.x, -10);
+    res += t2.Sample(s, str.xy, int2(-18,19));
+    res += t3.Sample(s, str, int3(-10,1,3));
+
+    res += t1.Load(0, 90);
+    res += t2.Load(1, int2(80, 90));
+    res += t3.Load(2, int3(-1, -2, 11));
+
+    res += t2.Gather     (s, str.xy, int2(9,8));
+    res += t2.GatherRed  (s, str.xy, int2(-9,-8));
+    res += t2.GatherCmp     (sc, str.xy, 0.0, int2(999999, -999999));
+    res += t2.GatherCmpRed  (sc, str.xy, 0.0, int2(0, 10));
+
+    return res;
+}
+
+#ifndef OFFSETS
+#define OFFSETS argOffsets
+#endif
+
+uint3 cbufOffsets[4];
+
+float4 VarOffset(float3 str : STR, uint3 argOffsets[4] : O, uint a : A) : SV_TARGET
+{
+    uint b = 3 + a;
+    uint v = 3;
+    const uint3 constOffsets[4] = {uint3(a,a,a), argOffsets[0], cbufOffsets[0], uint3(b,b,b)};
+    uint3 validOffsets[4] = {uint3(v,v,v), uint3(1,1,1), uint3(2,2,2), uint3(3,3,3)};
+    float4 res = 0.0;
+    res += t2.Sample(s, str.x, OFFSETS[0].x);
+    res += t2.Sample(s, str.xy, OFFSETS[0].xy);
+    res += t3.Sample(s, str, OFFSETS[0]);
+
+    res += t1.Load(0, OFFSETS[0].x);
+    res += t2.Load(1, OFFSETS[0].xy);
+    res += t3.Load(2, OFFSETS[0]);
+
+    res += t2.Gather     (s, str.xy, OFFSETS[0].xy);
+    res += t2.GatherRed  (s, str.xy, OFFSETS[1].xy);
+    res += t2.GatherCmp     (sc, str.xy, 0.0, OFFSETS[0].xy);
+    res += t2.GatherCmpRed  (sc, str.xy, 0.0, OFFSETS[1].xy);
+
+    return res;
+}
+
+float4 ValidOffset(float3 str : STR, uint3 argOffsets[4] : O, uint a : A) : SV_TARGET
+{
+    uint b = 3 + a;
+    uint v = 3;
+    const uint3 constOffsets[4] = {uint3(a,a,a), argOffsets[0], cbufOffsets[0], uint3(b,b,b)};
+    uint3 validOffsets[4] = {uint3(v,v,v), uint3(1,1,1), uint3(2,2,2), uint3(3,3,3)};
+    float4 res = 0.0;
+
+    res += t2.GatherRed  (s, str.xy, int2(0,0), int2(1,1), int2(2,2), int2(-11, 1));
+    res += t2.GatherGreen(s, str.xy, int2(0,0), int2(1,1), int2(0,-9), int2(3,3));
+    res += t2.GatherBlue (s, str.xy, int2(0,0), int2(3,33), int2(2,2), int2(3,3));
+    res += t2.GatherAlpha(s, str.xy, int2(11,1), int2(1,1), int2(2,2), int2(3,3));
+
+    res += t2.GatherCmpRed  (sc, str.xy, 0.0, int2(0,0), int2(1,1), int2(2,2), int2(3,-9));
+    res += t2.GatherCmpGreen(sc, str.xy, 0.0, int2(0,0), int2(1,1), int2(10, 5), int2(3,3));
+    res += t2.GatherCmpBlue (sc, str.xy, 0.0, int2(0,0), int2(-11,6), int2(2,2), int2(3,3));
+    res += t2.GatherCmpAlpha(sc, str.xy, 0.0, int2(9,9), int2(1,1), int2(2,2), int2(3,3));
+
+    res += t2.GatherRed  (s, str.xy, int2(0,0), int2(1,1), int2(2,2), OFFSETS[3].xy);
+    res += t2.GatherGreen(s, str.xy, int2(0,0), int2(1,1), OFFSETS[2].xy, int2(3,3));
+    res += t2.GatherBlue (s, str.xy, int2(0,0), OFFSETS[1].xy, int2(2,2), int2(3,3));
+    res += t2.GatherAlpha(s, str.xy, OFFSETS[0].xy, int2(1,1), int2(2,2), int2(3,3));
+
+    res += t2.GatherCmpRed  (sc, str.xy, 0.0, int2(0,0), int2(1,1), int2(2,2), OFFSETS[3].xy);
+    res += t2.GatherCmpGreen(sc, str.xy, 0.0, int2(0,0), int2(1,1), OFFSETS[2].xy, int2(3,3));
+    res += t2.GatherCmpBlue (sc, str.xy, 0.0, int2(0,0), OFFSETS[1].xy, int2(2,2), int2(3,3));
+    res += t2.GatherCmpAlpha(sc, str.xy, 0.0, OFFSETS[0].xy, int2(1,1), int2(2,2), int2(3,3));
+
+    return res;
+}

+ 2 - 5
tools/clang/test/HLSLFileCheck/hlsl/diagnostics/errors/optForNoOpt3.hlsl

@@ -1,11 +1,8 @@
 // RUN: %dxc -Zi -E main -Od -T ps_6_0 %s | FileCheck %s -check-prefix=CHK_DB
 // RUN: %dxc -E main -Od -T ps_6_0 %s | FileCheck %s -check-prefix=CHK_NODB
 
-// CHK_DB: 20:36: error: Offsets for Sample* must be immediated value
-// CHK_DB: 20:40: error: Offsets for Sample* must be immediated value
-// CHK_NODB: Offsets for Sample* must be immediated value.
-// CHK_NODB-SAME Use /Zi for source location.
-// CHK_NODB: Offsets for Sample* must be immediated value.
+// CHK_DB: 17:7: error: Offsets to texture access operations must be immediate values
+// CHK_NODB: Offsets to texture access operations must be immediate values.
 // CHK_NODB-SAME Use /Zi for source location.
 
 SamplerState samp1 : register(s5);

+ 4 - 10
tools/clang/test/HLSLFileCheck/hlsl/diagnostics/errors/optForNoOpt4.hlsl

@@ -1,18 +1,12 @@
 // RUN: %dxc -Zi -E main -Od -T ps_6_0 %s | FileCheck %s -check-prefix=CHK_DB
 // RUN: %dxc -E main -Od -T ps_6_0 %s | FileCheck %s -check-prefix=CHK_NODB
 
-// CHK_DB: 27:39: error: Offsets for Sample* must be immediated value
-// CHK_DB: 27:43: error: Offsets for Sample* must be immediated value
-// CHK_DB: 27:10: error: Offsets for Sample* must be immediated value
-// CHK_DB: 27:10: error: Offsets for Sample* must be immediated value
+// CHK_DB: 21:10: error: Offsets to texture access operations must be immediate values. Unrolling the loop containing the offset value manually and using -O3 may help in some cases.
+// CHK_DB: 21:10: error: Offsets to texture access operations must be immediate values. Unrolling the loop containing the offset value manually and using -O3 may help in some cases.
 
-// CHK_NODB: error: Offsets for Sample* must be immediated value.
+// CHK_NODB: error: Offsets to texture access operations must be immediate values. Unrolling the loop containing the offset value manually and using -O3 may help in some cases.
 // CHK_NODB-SAME Use /Zi for source location.
-// CHK_NODB: error: Offsets for Sample* must be immediated value.
-// CHK_NODB-SAME Use /Zi for source location.
-// CHK_NODB: error: Offsets for Sample* must be immediated value.
-// CHK_NODB-SAME Use /Zi for source location.
-// CHK_NODB: error: Offsets for Sample* must be immediated value.
+// CHK_NODB: error: Offsets to texture access operations must be immediate values. Unrolling the loop containing the offset value manually and using -O3 may help in some cases.
 // CHK_NODB-SAME Use /Zi for source location.
 
 SamplerState samp1 : register(s5);

+ 8 - 8
tools/clang/test/HLSLFileCheck/hlsl/objects/Texture/gather.hlsl

@@ -14,26 +14,26 @@ float4 main(float2 a : A) : SV_Target
   r += text1.Gather(samp1, a, uint2(-3, 2), status); r += status;
 
   r += text1.GatherAlpha(samp1, a);
-  r += text1.GatherAlpha(samp1, a, uint2(-3,8));  
-  r += text1.GatherAlpha(samp1, a, uint2(-3,8),status); r += status;    
+  r += text1.GatherAlpha(samp1, a, uint2(-3,7));
+  r += text1.GatherAlpha(samp1, a, uint2(-3,7),status); r += status;
   r += text1.GatherAlpha(samp1, a, uint2(-3,8),uint2(-2,3), uint2(-3,8),uint2(-2,3));  
   r += text1.GatherAlpha(samp1, a, uint2(-3,8),uint2(8,-3), uint2(8,-3), uint2(-3,2), status); r+=status;  
   
   r += text1.GatherBlue(samp1, a);
-  r += text1.GatherBlue(samp1, a, uint2(-3,8));  
-  r += text1.GatherBlue(samp1, a, uint2(-3,8),status); r += status;    
+  r += text1.GatherBlue(samp1, a, uint2(-3,7));
+  r += text1.GatherBlue(samp1, a, uint2(-3,7),status); r += status;
   r += text1.GatherBlue(samp1, a, uint2(-3,8),uint2(-2,3), uint2(-3,8),uint2(-2,3));  
   r += text1.GatherBlue(samp1, a, uint2(-3,8),uint2(8,-3), uint2(8,-3), uint2(-3,2), status); r+=status;  
     
   r += text1.GatherGreen(samp1, a);
-  r += text1.GatherGreen(samp1, a, uint2(-3,8));  
-  r += text1.GatherGreen(samp1, a, uint2(-3,8),status); r += status;    
+  r += text1.GatherGreen(samp1, a, uint2(-3,7));
+  r += text1.GatherGreen(samp1, a, uint2(-3,7),status); r += status;
   r += text1.GatherGreen(samp1, a, uint2(-3,8),uint2(-2,3), uint2(-3,8),uint2(-2,3));  
   r += text1.GatherGreen(samp1, a, uint2(-3,8),uint2(8,-3), uint2(8,-3), uint2(-3,2), status); r+=status;  
     
   r += text1.GatherRed(samp1, a);
-  r += text1.GatherRed(samp1, a, uint2(-3,8));  
-  r += text1.GatherRed(samp1, a, uint2(-3,8),status); r += status;    
+  r += text1.GatherRed(samp1, a, uint2(-3,7));
+  r += text1.GatherRed(samp1, a, uint2(-3,7),status); r += status;
   r += text1.GatherRed(samp1, a, uint2(-3,8),uint2(-2,3), uint2(-3,8),uint2(-2,3));  
   r += text1.GatherRed(samp1, a, uint2(-3,8),uint2(8,-3), uint2(8,-3), uint2(-3,2), status); r+=status;  
   

+ 8 - 8
tools/clang/test/HLSLFileCheck/hlsl/objects/Texture/gatherCmp.hlsl

@@ -14,26 +14,26 @@ float4 main(float2 a : A) : SV_Target
   r += text1.GatherCmp(samp1, a, cmpVal, uint2(-3, 2), status); r += status;
 
   r += text1.GatherCmpAlpha(samp1, a, cmpVal);
-  r += text1.GatherCmpAlpha(samp1, a, cmpVal, uint2(-3,8));  
-  r += text1.GatherCmpAlpha(samp1, a, cmpVal, uint2(-3,8),status); r += status;    
+  r += text1.GatherCmpAlpha(samp1, a, cmpVal, uint2(-3,7));
+  r += text1.GatherCmpAlpha(samp1, a, cmpVal, uint2(-3,7),status); r += status;
   r += text1.GatherCmpAlpha(samp1, a, cmpVal, uint2(-3,8),uint2(-2,3), uint2(-3,8),uint2(-2,3));  
   r += text1.GatherCmpAlpha(samp1, a, cmpVal, uint2(-3,8),uint2(8,-3), uint2(8,-3), uint2(-3,2), status); r+=status;  
   
   r += text1.GatherCmpBlue(samp1, a, cmpVal);
-  r += text1.GatherCmpBlue(samp1, a, cmpVal, uint2(-3,8));  
-  r += text1.GatherCmpBlue(samp1, a, cmpVal, uint2(-3,8),status); r += status;    
+  r += text1.GatherCmpBlue(samp1, a, cmpVal, uint2(-3,7));
+  r += text1.GatherCmpBlue(samp1, a, cmpVal, uint2(-3,7),status); r += status;
   r += text1.GatherCmpBlue(samp1, a, cmpVal, uint2(-3,8),uint2(-2,3), uint2(-3,8),uint2(-2,3));  
   r += text1.GatherCmpBlue(samp1, a, cmpVal, uint2(-3,8),uint2(8,-3), uint2(8,-3), uint2(-3,2), status); r+=status;  
     
   r += text1.GatherCmpGreen(samp1, a, cmpVal);
-  r += text1.GatherCmpGreen(samp1, a, cmpVal, uint2(-3,8));  
-  r += text1.GatherCmpGreen(samp1, a, cmpVal, uint2(-3,8),status); r += status;    
+  r += text1.GatherCmpGreen(samp1, a, cmpVal, uint2(-3,7));
+  r += text1.GatherCmpGreen(samp1, a, cmpVal, uint2(-3,7),status); r += status;
   r += text1.GatherCmpGreen(samp1, a, cmpVal, uint2(-3,8),uint2(-2,3), uint2(-3,8),uint2(-2,3));  
   r += text1.GatherCmpGreen(samp1, a, cmpVal, uint2(-3,8),uint2(8,-3), uint2(8,-3), uint2(-3,2), status); r+=status;  
     
   r += text1.GatherCmpRed(samp1, a, cmpVal);
-  r += text1.GatherCmpRed(samp1, a, cmpVal, uint2(-3,8));  
-  r += text1.GatherCmpRed(samp1, a, cmpVal, uint2(-3,8),status); r += status;    
+  r += text1.GatherCmpRed(samp1, a, cmpVal, uint2(-3,7));
+  r += text1.GatherCmpRed(samp1, a, cmpVal, uint2(-3,7),status); r += status;
   r += text1.GatherCmpRed(samp1, a, cmpVal, uint2(-3,8),uint2(-2,3), uint2(-3,8),uint2(-2,3));  
   r += text1.GatherCmpRed(samp1, a, cmpVal, uint2(-3,8),uint2(8,-3), uint2(8,-3), uint2(-3,2), status); r+=status;  
   

+ 8 - 8
tools/clang/test/HLSLFileCheck/hlsl/objects/Texture/gather_cs.hlsl

@@ -26,26 +26,26 @@ void main(uint id : SV_GroupIndex)
   a *= 1.125; // Prevent GatherCmpRed from being optimized to equivalent GatherCmp above
 
   r += text1.GatherAlpha(samp1, a);
-  r += text1.GatherAlpha(samp1, a, uint2(-3,8));
-  r += text1.GatherAlpha(samp1, a, uint2(-3,8),status); r += CheckAccessFullyMapped(status);
+  r += text1.GatherAlpha(samp1, a, uint2(-3,7));
+  r += text1.GatherAlpha(samp1, a, uint2(-3,7),status); r += CheckAccessFullyMapped(status);
   r += text1.GatherAlpha(samp1, a, uint2(-3,8),uint2(-2,3), uint2(-3,8),uint2(-2,3));
   r += text1.GatherAlpha(samp1, a, uint2(-3,8),uint2(8,-3), uint2(8,-3), uint2(-3,2), status); r+=CheckAccessFullyMapped(status);
 
   r += text1.GatherBlue(samp1, a);
-  r += text1.GatherBlue(samp1, a, uint2(-3,8));
-  r += text1.GatherBlue(samp1, a, uint2(-3,8),status); r += CheckAccessFullyMapped(status);
+  r += text1.GatherBlue(samp1, a, uint2(-3,7));
+  r += text1.GatherBlue(samp1, a, uint2(-3,7),status); r += CheckAccessFullyMapped(status);
   r += text1.GatherBlue(samp1, a, uint2(-3,8),uint2(-2,3), uint2(-3,8),uint2(-2,3));
   r += text1.GatherBlue(samp1, a, uint2(-3,8),uint2(8,-3), uint2(8,-3), uint2(-3,2), status); r+=CheckAccessFullyMapped(status);
 
   r += text1.GatherGreen(samp1, a);
-  r += text1.GatherGreen(samp1, a, uint2(-3,8));
-  r += text1.GatherGreen(samp1, a, uint2(-3,8),status); r += CheckAccessFullyMapped(status);
+  r += text1.GatherGreen(samp1, a, uint2(-3,7));
+  r += text1.GatherGreen(samp1, a, uint2(-3,7),status); r += CheckAccessFullyMapped(status);
   r += text1.GatherGreen(samp1, a, uint2(-3,8),uint2(-2,3), uint2(-3,8),uint2(-2,3));
   r += text1.GatherGreen(samp1, a, uint2(-3,8),uint2(8,-3), uint2(8,-3), uint2(-3,2), status); r+=CheckAccessFullyMapped(status);
 
   r += text1.GatherRed(samp1, a);
-  r += text1.GatherRed(samp1, a, uint2(-3,8));
-  r += text1.GatherRed(samp1, a, uint2(-3,8),status); r += CheckAccessFullyMapped(status);
+  r += text1.GatherRed(samp1, a, uint2(-3,7));
+  r += text1.GatherRed(samp1, a, uint2(-3,7),status); r += CheckAccessFullyMapped(status);
   r += text1.GatherRed(samp1, a, uint2(-3,8),uint2(-2,3), uint2(-3,8),uint2(-2,3));
   r += text1.GatherRed(samp1, a, uint2(-3,8),uint2(8,-3), uint2(8,-3), uint2(-3,2), status); r+=CheckAccessFullyMapped(status);
 

+ 3 - 1
tools/clang/tools/dxcompiler/dxcdisassembler.cpp

@@ -1293,7 +1293,9 @@ static const char *OpCodeSignatures[] = {
   "(index,samplerHeap,nonUniformIndex)",  // CreateHandleFromHeap
   "(unpackMode,pk)",  // Unpack4x8
   "(packMode,x,y,z,w)",  // Pack4x8
-  "()"  // IsHelperLane
+  "()",  // IsHelperLane
+  "(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,channel)",  // TextureGatherImm
+  "(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,channel,compareVale)"  // TextureGatherCmpImm
 };
 // OPCODE-SIGS:END
 

+ 2 - 2
tools/clang/unittests/HLSL/ValidationTest.cpp

@@ -1585,11 +1585,11 @@ TEST_F(ValidationTest, SimpleGs10) {
 }
 
 TEST_F(ValidationTest, IllegalSampleOffset3) {
-  TestCheck(L"..\\CodeGenHLSL\\optForNoOpt3.hlsl");
+  TestCheck(L"..\\DXILValidation\\optForNoOpt3.hlsl");
 }
 
 TEST_F(ValidationTest, IllegalSampleOffset4) {
-  TestCheck(L"..\\CodeGenHLSL\\optForNoOpt4.hlsl");
+  TestCheck(L"..\\DXILValidation\\optForNoOpt4.hlsl");
 }
 
 TEST_F(ValidationTest, NoFunctionParam) {

+ 31 - 0
utils/hct/hctdb.py

@@ -298,6 +298,9 @@ class db_dxil(object):
             self.name_idx[i].shader_stages = ("pixel",)
         for i in "TextureGather,TextureGatherCmp".split(","):
             self.name_idx[i].category = "Resources - gather"
+        for i in "TextureGatherImm,TextureGatherCmpImm".split(","):
+            self.name_idx[i].category = "Resources - gather"
+            self.name_idx[i].shader_model = 6,15 # Dummy large shader model to prevent accidental inclusion
         for i in "AtomicBinOp,AtomicCompareExchange,Barrier".split(","):
             self.name_idx[i].category = "Synchronization"
         for i in "CalculateLOD,DerivCoarseX,DerivCoarseY,DerivFineX,DerivFineY".split(","):
@@ -1869,6 +1872,34 @@ class db_dxil(object):
         self.set_op_count_for_version(1, 6, next_op_idx)
         assert next_op_idx == 222, "222 is expected next operation index but encountered %d and thus opcodes are broken" % next_op_idx
 
+        self.add_dxil_op("TextureGatherImm", next_op_idx, "TextureGatherImm", "same as TextureGather, except offsets are limited to immediate values between -8 and 7", "hfwi", "ro", [
+            db_dxil_param(0, "$r", "", "dimension information for texture"),
+            db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
+            db_dxil_param(3, "res", "sampler", "handle of sampler to use"),
+            db_dxil_param(4, "f", "coord0", "coordinate"),
+            db_dxil_param(5, "f", "coord1", "coordinate, undef for Texture1D"),
+            db_dxil_param(6, "f", "coord2", "coordinate, undef for Texture1D, Texture1DArray or Texture2D"),
+            db_dxil_param(7, "f", "coord3", "coordinate, defined only for TextureCubeArray"),
+            db_dxil_param(8, "i32", "offset0", "optional offset, applicable to Texture1D, Texture1DArray, and as part of offset1"),
+            db_dxil_param(9, "i32", "offset1", "optional offset, applicable to Texture2D, Texture2DArray, and as part of offset2"),
+            db_dxil_param(10, "i32", "channel", "channel to sample")],
+            counters=('tex_norm',))
+        next_op_idx += 1
+        self.add_dxil_op("TextureGatherCmpImm", next_op_idx, "TextureGatherCmpImm", "same as TextureGatherCmp, except offsets are limited to immediate values between -8 and 7", "hfwi", "ro", [
+            db_dxil_param(0, "$r", "", "gathered texels"),
+            db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
+            db_dxil_param(3, "res", "sampler", "handle of sampler to use"),
+            db_dxil_param(4, "f", "coord0", "coordinate"),
+            db_dxil_param(5, "f", "coord1", "coordinate, undef for Texture1D"),
+            db_dxil_param(6, "f", "coord2", "coordinate, undef for Texture1D, Texture1DArray or Texture2D"),
+            db_dxil_param(7, "f", "coord3", "coordinate, defined only for TextureCubeArray"),
+            db_dxil_param(8, "i32", "offset0", "optional offset, applicable to Texture1D, Texture1DArray, and as part of offset1"),
+            db_dxil_param(9, "i32", "offset1", "optional offset, applicable to Texture2D, Texture2DArray, and as part of offset2"),
+            db_dxil_param(10, "i32", "channel", "channel to sample"),
+            db_dxil_param(11, "f", "compareVale", "value to compare with")],
+            counters=('tex_cmp',))
+        next_op_idx += 1
+
         # Set interesting properties.
         self.build_indices()
         for i in "CalculateLOD,DerivCoarseX,DerivCoarseY,DerivFineX,DerivFineY,Sample,SampleBias,SampleCmp".split(","):