Selaa lähdekoodia

Merge pull request #3769 from pow2clk/rel2104

April Release refresh
Greg Roth 4 vuotta sitten
vanhempi
commit
3c0a264fbf
44 muutettua tiedostoa jossa 573 lisäystä ja 406 poistoa
  1. 5 1
      CMakeLists.txt
  2. 0 2
      docs/DXIL.rst
  3. 2 6
      include/dxc/DXIL/DxilConstants.h
  4. 0 101
      include/dxc/DXIL/DxilInstructions.h
  5. 1 0
      include/dxc/DXIL/DxilPDB.h
  6. 9 0
      include/dxc/Support/ErrorCodes.h
  7. 29 0
      include/dxc/dxcerrors.h
  8. 2 0
      include/llvm/IR/DebugInfoMetadata.h
  9. 5 0
      include/llvm/IR/IntrinsicInst.h
  10. 7 6
      include/llvm/Support/Casting.h
  11. 9 3
      include/llvm/Support/ErrorHandling.h
  12. 4 6
      lib/DXIL/DxilCounters.cpp
  13. 0 15
      lib/DXIL/DxilOperations.cpp
  14. 8 2
      lib/DXIL/DxilPDB.cpp
  15. 88 66
      lib/DxilPIXPasses/DxilDbgValueToDbgDeclare.cpp
  16. 3 15
      lib/HLSL/DxilLegalizeSampleOffsetPass.cpp
  17. 7 5
      lib/HLSL/DxilLinker.cpp
  18. 5 4
      lib/HLSL/DxilLoopDeletion.cpp
  19. 0 29
      lib/HLSL/DxilPreparePasses.cpp
  20. 0 10
      lib/HLSL/DxilValidation.cpp
  21. 7 12
      lib/HLSL/HLOperationLower.cpp
  22. 16 6
      lib/Support/ErrorHandling.cpp
  23. 1 1
      lib/Transforms/IPO/PassManagerBuilder.cpp
  24. 1 1
      lib/Transforms/Scalar/DxilRemoveUnstructuredLoopExits.cpp
  25. 3 3
      lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
  26. 5 2
      tools/clang/include/clang/Basic/Diagnostic.h
  27. 2 1
      tools/clang/lib/SPIRV/EmitVisitor.cpp
  28. 2 2
      tools/clang/lib/Sema/SemaHLSL.cpp
  29. 5 2
      tools/clang/lib/Sema/TreeTransform.h
  30. 1 1
      tools/clang/test/CodeGenSPIRV/ternary-op.cond-op.hlsl
  31. 2 53
      tools/clang/test/HLSLFileCheck/hlsl/diagnostics/errors/offsets.hlsl
  32. 178 0
      tools/clang/test/HLSLFileCheck/hlsl/diagnostics/errors/offsets_gather.hlsl
  33. 15 0
      tools/clang/test/HLSLFileCheck/hlsl/objects/Texture/gatherOffset2.hlsl
  34. 12 0
      tools/clang/test/HLSLFileCheck/hlsl/operators/select/sel_vec1_mixed.hlsl
  35. 2 1
      tools/clang/test/HLSLFileCheck/hlsl/operators/swizzle/swizzleInCorrectDelayedTyposInExpr.hlsl
  36. 6 0
      tools/clang/test/HLSLFileCheck/hlsl/payload_qualifier/general.hlsl
  37. 40 0
      tools/clang/test/HLSLFileCheck/passes/dxil/dxil_loop_deletion/loop_deletion_inst_simplification.hlsl
  38. 35 0
      tools/clang/test/HLSLFileCheck/shader_targets/mesh/as-groupshared-nested-payload.hlsl
  39. 3 2
      tools/clang/test/HLSLFileCheck/shader_targets/mesh/as-groupshared-payload.hlsl
  40. 40 9
      tools/clang/tools/dxclib/dxc.cpp
  41. 1 3
      tools/clang/tools/dxcompiler/dxcdisassembler.cpp
  42. 3 1
      tools/clang/tools/dxcompiler/dxcompilerobj.cpp
  43. 9 4
      tools/clang/unittests/HLSL/PixTest.cpp
  44. 0 31
      utils/hct/hctdb.py

+ 5 - 1
CMakeLists.txt

@@ -100,7 +100,11 @@ option(HLSL_SUPPORT_QUERY_GIT_COMMIT_INFO "Supports querying Git commit info." O
 if ( HLSL_SUPPORT_QUERY_GIT_COMMIT_INFO )
   add_definitions(-DSUPPORT_QUERY_GIT_COMMIT_INFO)
 endif()
-# HLSL Chnage Ends
+# adjust link option to enable debugging from kernel mode; not compatible with incremental linking
+if(NOT CMAKE_VERSION VERSION_LESS "3.13" AND WIN32)
+  add_link_options(/DEBUGTYPE:CV,FIXUP,PDATA /INCREMENTAL:NO)
+endif()
+# HLSL Change Ends
 
 # HLSL Change Starts - set flag for Appveyor CI
 if ( "$ENV{CI}" AND "$ENV{APPVEYOR}" )

+ 0 - 2
docs/DXIL.rst

@@ -2318,8 +2318,6 @@ ID  Name                                                  Description
 219 Unpack4x8                                             unpacks 4 8-bit signed or unsigned values into int32 or int16 vector
 220 Pack4x8                                               packs vector of 4 signed or unsigned values into a packed datatype, drops or clamps unused bits
 221 IsHelperLane                                          returns true on helper lanes in pixel shaders
-222 TextureGatherImm                                      same as TextureGather, except offsets are limited to immediate values between -8 and 7
-223 TextureGatherCmpImm                                   same as TextureGatherCmp, except offsets are limited to immediate values between -8 and 7
 === ===================================================== =======================================================================================================================================================================================================================
 
 

+ 2 - 6
include/dxc/DXIL/DxilConstants.h

@@ -593,8 +593,6 @@ namespace DXIL {
     // Resources - gather
     TextureGather = 73, // gathers the four texels that would be used in a bi-linear filtering operation
     TextureGatherCmp = 74, // same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp
-    TextureGatherCmpImm = 223, // same as TextureGatherCmp, except offsets are limited to immediate values between -8 and 7
-    TextureGatherImm = 222, // same as TextureGather, except offsets are limited to immediate values between -8 and 7
   
     // Resources - sample
     RenderTargetGetSampleCount = 77, // gets the number of samples for a render target
@@ -720,7 +718,7 @@ namespace DXIL {
     NumOpCodes_Dxil_1_5 = 216,
     NumOpCodes_Dxil_1_6 = 222,
   
-    NumOpCodes = 224 // exclusive last value of enumeration
+    NumOpCodes = 222 // exclusive last value of enumeration
   };
   // OPCODE-ENUM:END
 
@@ -902,8 +900,6 @@ namespace DXIL {
     // Resources - gather
     TextureGather,
     TextureGatherCmp,
-    TextureGatherCmpImm,
-    TextureGatherImm,
   
     // Resources - sample
     RenderTargetGetSampleCount,
@@ -987,7 +983,7 @@ namespace DXIL {
     NumOpClasses_Dxil_1_5 = 143,
     NumOpClasses_Dxil_1_6 = 149,
   
-    NumOpClasses = 151 // exclusive last value of enumeration
+    NumOpClasses = 149 // exclusive last value of enumeration
   };
   // OPCODECLASS-ENUM:END
 

+ 0 - 101
include/dxc/DXIL/DxilInstructions.h

@@ -7164,106 +7164,5 @@ struct DxilInst_IsHelperLane {
   // Metadata
   bool requiresUniformInputs() const { return false; }
 };
-
-/// This instruction same as TextureGather, except offsets are limited to immediate values between -8 and 7
-struct DxilInst_TextureGatherImm {
-  llvm::Instruction *Instr;
-  // Construction and identification
-  DxilInst_TextureGatherImm(llvm::Instruction *pInstr) : Instr(pInstr) {}
-  operator bool() const {
-    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::TextureGatherImm);
-  }
-  // Validation support
-  bool isAllowed() const { return true; }
-  bool isArgumentListValid() const {
-    if (10 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
-    return true;
-  }
-  // Metadata
-  bool requiresUniformInputs() const { return false; }
-  // Operand indexes
-  enum OperandIdx {
-    arg_srv = 1,
-    arg_sampler = 2,
-    arg_coord0 = 3,
-    arg_coord1 = 4,
-    arg_coord2 = 5,
-    arg_coord3 = 6,
-    arg_offset0 = 7,
-    arg_offset1 = 8,
-    arg_channel = 9,
-  };
-  // Accessors
-  llvm::Value *get_srv() const { return Instr->getOperand(1); }
-  void set_srv(llvm::Value *val) { Instr->setOperand(1, val); }
-  llvm::Value *get_sampler() const { return Instr->getOperand(2); }
-  void set_sampler(llvm::Value *val) { Instr->setOperand(2, val); }
-  llvm::Value *get_coord0() const { return Instr->getOperand(3); }
-  void set_coord0(llvm::Value *val) { Instr->setOperand(3, val); }
-  llvm::Value *get_coord1() const { return Instr->getOperand(4); }
-  void set_coord1(llvm::Value *val) { Instr->setOperand(4, val); }
-  llvm::Value *get_coord2() const { return Instr->getOperand(5); }
-  void set_coord2(llvm::Value *val) { Instr->setOperand(5, val); }
-  llvm::Value *get_coord3() const { return Instr->getOperand(6); }
-  void set_coord3(llvm::Value *val) { Instr->setOperand(6, val); }
-  llvm::Value *get_offset0() const { return Instr->getOperand(7); }
-  void set_offset0(llvm::Value *val) { Instr->setOperand(7, val); }
-  llvm::Value *get_offset1() const { return Instr->getOperand(8); }
-  void set_offset1(llvm::Value *val) { Instr->setOperand(8, val); }
-  llvm::Value *get_channel() const { return Instr->getOperand(9); }
-  void set_channel(llvm::Value *val) { Instr->setOperand(9, val); }
-};
-
-/// This instruction same as TextureGatherCmp, except offsets are limited to immediate values between -8 and 7
-struct DxilInst_TextureGatherCmpImm {
-  llvm::Instruction *Instr;
-  // Construction and identification
-  DxilInst_TextureGatherCmpImm(llvm::Instruction *pInstr) : Instr(pInstr) {}
-  operator bool() const {
-    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::TextureGatherCmpImm);
-  }
-  // Validation support
-  bool isAllowed() const { return true; }
-  bool isArgumentListValid() const {
-    if (11 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
-    return true;
-  }
-  // Metadata
-  bool requiresUniformInputs() const { return false; }
-  // Operand indexes
-  enum OperandIdx {
-    arg_srv = 1,
-    arg_sampler = 2,
-    arg_coord0 = 3,
-    arg_coord1 = 4,
-    arg_coord2 = 5,
-    arg_coord3 = 6,
-    arg_offset0 = 7,
-    arg_offset1 = 8,
-    arg_channel = 9,
-    arg_compareVale = 10,
-  };
-  // Accessors
-  llvm::Value *get_srv() const { return Instr->getOperand(1); }
-  void set_srv(llvm::Value *val) { Instr->setOperand(1, val); }
-  llvm::Value *get_sampler() const { return Instr->getOperand(2); }
-  void set_sampler(llvm::Value *val) { Instr->setOperand(2, val); }
-  llvm::Value *get_coord0() const { return Instr->getOperand(3); }
-  void set_coord0(llvm::Value *val) { Instr->setOperand(3, val); }
-  llvm::Value *get_coord1() const { return Instr->getOperand(4); }
-  void set_coord1(llvm::Value *val) { Instr->setOperand(4, val); }
-  llvm::Value *get_coord2() const { return Instr->getOperand(5); }
-  void set_coord2(llvm::Value *val) { Instr->setOperand(5, val); }
-  llvm::Value *get_coord3() const { return Instr->getOperand(6); }
-  void set_coord3(llvm::Value *val) { Instr->setOperand(6, val); }
-  llvm::Value *get_offset0() const { return Instr->getOperand(7); }
-  void set_offset0(llvm::Value *val) { Instr->setOperand(7, val); }
-  llvm::Value *get_offset1() const { return Instr->getOperand(8); }
-  void set_offset1(llvm::Value *val) { Instr->setOperand(8, val); }
-  llvm::Value *get_channel() const { return Instr->getOperand(9); }
-  void set_channel(llvm::Value *val) { Instr->setOperand(9, val); }
-  llvm::Value *get_compareVale() const { return Instr->getOperand(10); }
-  void set_compareVale(llvm::Value *val) { Instr->setOperand(10, val); }
-};
 // INSTR-HELPER:END
 } // namespace hlsl

+ 1 - 0
include/dxc/DXIL/DxilPDB.h

@@ -22,5 +22,6 @@ namespace pdb {
   HRESULT LoadDataFromStream(IMalloc *pMalloc, IStream *pIStream, IDxcBlob **ppHash, IDxcBlob **ppContainer);
   HRESULT LoadDataFromStream(IMalloc *pMalloc, IStream *pIStream, IDxcBlob **pOutContainer);
   HRESULT WriteDxilPDB(IMalloc *pMalloc, IDxcBlob *pContainer, llvm::ArrayRef<BYTE> HashData, IDxcBlob **ppOutBlob);
+  HRESULT WriteDxilPDB(IMalloc *pMalloc, llvm::ArrayRef<BYTE> ContainerData, llvm::ArrayRef<BYTE> HashData, IDxcBlob **ppOutBlob);
 }
 }

+ 9 - 0
include/dxc/Support/ErrorCodes.h

@@ -107,3 +107,12 @@
 
 // 0X80AA001A - Error in extension mechanism.
 #define DXC_E_EXTENSION_ERROR                         DXC_MAKE_HRESULT(DXC_SEVERITY_ERROR,FACILITY_DXC,(0x001A))
+
+// 0X80AA001B - LLVM Fatal Error
+#define DXC_E_LLVM_FATAL_ERROR                         DXC_MAKE_HRESULT(DXC_SEVERITY_ERROR,FACILITY_DXC,(0x001B))
+
+// 0X80AA001C - LLVM Unreachable code
+#define DXC_E_LLVM_UNREACHABLE                         DXC_MAKE_HRESULT(DXC_SEVERITY_ERROR,FACILITY_DXC,(0x001C))
+
+// 0X80AA001D - LLVM Cast Failure
+#define DXC_E_LLVM_CAST_ERROR                         DXC_MAKE_HRESULT(DXC_SEVERITY_ERROR,FACILITY_DXC,(0x001D))

+ 29 - 0
include/dxc/dxcerrors.h

@@ -0,0 +1,29 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// dxcerror.h                                                                //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Provides definition of error codes.                                        //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef __DXC_ERRORS__
+#define __DXC_ERRORS__
+
+#ifndef FACILITY_GRAPHICS
+#define FACILITY_GRAPHICS 36
+#endif
+
+#define DXC_EXCEPTION_CODE(name, status)                                 \
+    static constexpr DWORD EXCEPTION_##name =                 \
+    (0xc0000000u | (FACILITY_GRAPHICS << 16) | (0xff00u | (status & 0xffu)));
+
+DXC_EXCEPTION_CODE(LOAD_LIBRARY_FAILED, 0x00u)
+DXC_EXCEPTION_CODE(NO_HMODULE,          0x01u)
+DXC_EXCEPTION_CODE(GET_PROC_FAILED,     0x02u)
+
+#undef DXC_EXCEPTION_CODE
+
+#endif

+ 2 - 0
include/llvm/IR/DebugInfoMetadata.h

@@ -545,6 +545,7 @@ public:
 
 
   Metadata *getRawScope() const { return getOperand(1); }
+  void setScope(Metadata *scope) { setOperand(1, scope); } // HLSL Change
   MDString *getRawName() const { return getOperandAs<MDString>(2); }
 
   void setFlags(unsigned NewFlags) {
@@ -1826,6 +1827,7 @@ public:
     return "";
   }
 
+  void setScope(DIScope *scope) { setOperand(0, scope); } // HLSL Change
   Metadata *getRawScope() const { return getOperand(0); }
   MDString *getRawName() const { return getOperandAs<MDString>(1); }
   Metadata *getRawFile() const { return getOperand(2); }

+ 5 - 0
include/llvm/IR/IntrinsicInst.h

@@ -29,6 +29,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Metadata.h"
+#include "llvm/IR/DebugInfoMetadata.h" // HLSL Change
 
 namespace llvm {
   /// IntrinsicInst - A useful wrapper class for inspecting calls to intrinsic
@@ -89,6 +90,8 @@ namespace llvm {
       return cast<DIExpression>(getRawExpression());
     }
 
+    void setVariable(DIVariable *v) { setArgOperand(1, MetadataAsValue::get(getContext(), v)); } // HLSL Change
+
     Metadata *getRawVariable() const {
       return cast<MetadataAsValue>(getArgOperand(1))->getMetadata();
     }
@@ -122,6 +125,8 @@ namespace llvm {
       return cast<DIExpression>(getRawExpression());
     }
 
+    void setVariable(DIVariable *v) { setArgOperand(2, MetadataAsValue::get(getContext(), v)); } // HLSL Change
+
     Metadata *getRawVariable() const {
       return cast<MetadataAsValue>(getArgOperand(2))->getMetadata();
     }

+ 7 - 6
include/llvm/Support/Casting.h

@@ -16,6 +16,7 @@
 #define LLVM_SUPPORT_CASTING_H
 
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/type_traits.h"
 #include <cassert>
 
@@ -221,21 +222,21 @@ template <class X, class Y>
 inline typename std::enable_if<!is_simple_type<Y>::value,
                                typename cast_retty<X, const Y>::ret_type>::type
 cast(const Y &Val) {
-  assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!");
+  llvm_cast_assert(X, Val); // HLSL change
   return cast_convert_val<
       X, const Y, typename simplify_type<const Y>::SimpleType>::doit(Val);
 }
 
 template <class X, class Y>
 inline typename cast_retty<X, Y>::ret_type cast(Y &Val) {
-  assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!");
+  llvm_cast_assert(X, Val); // HLSL change
   return cast_convert_val<X, Y,
                           typename simplify_type<Y>::SimpleType>::doit(Val);
 }
 
 template <class X, class Y>
 inline typename cast_retty<X, Y *>::ret_type cast(Y *Val) {
-  assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!");
+  llvm_cast_assert(X, Val); // HLSL change
   return cast_convert_val<X, Y*,
                           typename simplify_type<Y*>::SimpleType>::doit(Val);
 }
@@ -249,7 +250,7 @@ LLVM_ATTRIBUTE_UNUSED_RESULT inline typename std::enable_if<
 cast_or_null(const Y &Val) {
   if (!Val)
     return nullptr;
-  assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!");
+  llvm_cast_assert(X, Val); // HLSL change
   return cast<X>(Val);
 }
 
@@ -259,7 +260,7 @@ LLVM_ATTRIBUTE_UNUSED_RESULT inline typename std::enable_if<
 cast_or_null(Y &Val) {
   if (!Val)
     return nullptr;
-  assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!");
+  llvm_cast_assert(X, Val); // HLSL change
   return cast<X>(Val);
 }
 
@@ -267,7 +268,7 @@ template <class X, class Y>
 LLVM_ATTRIBUTE_UNUSED_RESULT inline typename cast_retty<X, Y *>::ret_type
 cast_or_null(Y *Val) {
   if (!Val) return nullptr;
-  assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!");
+  llvm_cast_assert(X, Val); // HLSL change
   return cast<X>(Val);
 }
 

+ 9 - 3
include/llvm/Support/ErrorHandling.h

@@ -84,6 +84,9 @@ namespace llvm {
   LLVM_ATTRIBUTE_NORETURN void
   llvm_unreachable_internal(const char *msg=nullptr, const char *file=nullptr,
                             unsigned line=0);
+
+  // HLSL Change - throw special exception for cast mismatch
+  void llvm_cast_assert_internal(const char *func);
 }
 
 /// Marks that the current location is not supposed to be reachable.
@@ -94,13 +97,16 @@ namespace llvm {
 ///
 /// Use this instead of assert(0).  It conveys intent more clearly and
 /// allows compilers to omit some unnecessary code.
-#ifndef NDEBUG
+#if 1 // HLSL Change - always throw exception with message for unreachable
 #define llvm_unreachable(msg) \
   ::llvm::llvm_unreachable_internal(msg, __FILE__, __LINE__)
-//#elif defined(LLVM_BUILTIN_UNREACHABLE) // HLSL Change - always throw exception for unreachable
-//#define llvm_unreachable(msg) LLVM_BUILTIN_UNREACHABLE
+#elif defined(LLVM_BUILTIN_UNREACHABLE)
+#define llvm_unreachable(msg) LLVM_BUILTIN_UNREACHABLE
 #else
 #define llvm_unreachable(msg) ::llvm::llvm_unreachable_internal()
 #endif
 
+// HLSL Change - throw special exception for cast type mismatch
+#define llvm_cast_assert(X, Val) ((void)( (!!(isa<X>(Val))) || (::llvm::llvm_cast_assert_internal(__FUNCTION__), 0) ))
+
 #endif

+ 4 - 6
lib/DXIL/DxilCounters.cpp

@@ -173,9 +173,8 @@ bool CountDxilOp_tex_bias(unsigned op) {
   return op == 61;
 }
 bool CountDxilOp_tex_cmp(unsigned op) {
-  // Instructions: SampleCmp=64, SampleCmpLevelZero=65, TextureGatherCmp=74,
-  // TextureGatherCmpImm=223
-  return (64 <= op && op <= 65) || op == 74 || op == 223;
+  // Instructions: SampleCmp=64, SampleCmpLevelZero=65, TextureGatherCmp=74
+  return (64 <= op && op <= 65) || op == 74;
 }
 bool CountDxilOp_tex_grad(unsigned op) {
   // Instructions: SampleGrad=63
@@ -186,9 +185,8 @@ bool CountDxilOp_tex_load(unsigned op) {
   return op == 66 || op == 68 || op == 139;
 }
 bool CountDxilOp_tex_norm(unsigned op) {
-  // Instructions: Sample=60, SampleLevel=62, TextureGather=73,
-  // TextureGatherImm=222
-  return op == 60 || op == 62 || op == 73 || op == 222;
+  // Instructions: Sample=60, SampleLevel=62, TextureGather=73
+  return op == 60 || op == 62 || op == 73;
 }
 bool CountDxilOp_tex_store(unsigned op) {
   // Instructions: TextureStore=67, BufferStore=69, RawBufferStore=140,

+ 0 - 15
lib/DXIL/DxilOperations.cpp

@@ -404,10 +404,6 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
 
   // Helper Lanes                                                                                                            void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,   obj ,  function attribute
   {  OC::IsHelperLane,            "IsHelperLane",             OCC::IsHelperLane,             "isHelperLane",              { false, false, false, false,  true, false, false, false, false, false, false}, Attribute::ReadOnly, },
-
-  // Resources - gather                                                                                                      void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64,   udt,   obj ,  function attribute
-  {  OC::TextureGatherImm,        "TextureGatherImm",         OCC::TextureGatherImm,         "textureGatherImm",          { false,  true,  true, false, false, false,  true,  true, false, false, false}, Attribute::ReadOnly, },
-  {  OC::TextureGatherCmpImm,     "TextureGatherCmpImm",      OCC::TextureGatherCmpImm,      "textureGatherCmpImm",       { false,  true,  true, false, false, false,  true,  true, false, false, false}, Attribute::ReadOnly, },
 };
 // OPCODE-OLOADS:END
 
@@ -851,11 +847,6 @@ void OP::GetMinShaderModelAndMask(OpCode C, bool bWithTranslation,
     major = 6;  minor = 6;
     return;
   }
-  // Instructions: TextureGatherImm=222, TextureGatherCmpImm=223
-  if ((222 <= op && op <= 223)) {
-    major = 6;  minor = 15;
-    return;
-  }
   // OPCODE-SMMASK:END
 }
 
@@ -1442,10 +1433,6 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
 
     // Helper Lanes
   case OpCode::IsHelperLane:           A(pI1);      A(pI32); break;
-
-    // Resources - gather
-  case OpCode::TextureGatherImm:       RRT(pETy);   A(pI32); A(pRes); A(pRes); A(pF32); A(pF32); A(pF32); A(pF32); A(pI32); A(pI32); A(pI32); break;
-  case OpCode::TextureGatherCmpImm:    RRT(pETy);   A(pI32); A(pRes); A(pRes); A(pF32); A(pF32); A(pF32); A(pF32); A(pI32); A(pI32); A(pI32); A(pF32); break;
   // OPCODE-OLOAD-FUNCS:END
   default: DXASSERT(false, "otherwise unhandled case"); break;
   }
@@ -1718,8 +1705,6 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) {
   case OpCode::TextureGatherCmp:
   case OpCode::RawBufferLoad:
   case OpCode::Unpack4x8:
-  case OpCode::TextureGatherImm:
-  case OpCode::TextureGatherCmpImm:
   {
     StructType *ST = cast<StructType>(Ty);
     return ST->getElementType(0);

+ 8 - 2
lib/DXIL/DxilPDB.cpp

@@ -297,7 +297,13 @@ SmallVector<char, 0> WritePdbStream(ArrayRef<BYTE> Hash) {
 }
 
 HRESULT hlsl::pdb::WriteDxilPDB(IMalloc *pMalloc, IDxcBlob *pContainer, ArrayRef<BYTE> HashData, IDxcBlob **ppOutBlob) {
-  if (!hlsl::IsValidDxilContainer((hlsl::DxilContainerHeader *)pContainer->GetBufferPointer(), pContainer->GetBufferSize()))
+  return hlsl::pdb::WriteDxilPDB(pMalloc,
+    llvm::ArrayRef<BYTE>((const BYTE *)pContainer->GetBufferPointer(), pContainer->GetBufferSize()),
+    HashData, ppOutBlob);
+}
+
+HRESULT hlsl::pdb::WriteDxilPDB(IMalloc *pMalloc, llvm::ArrayRef<BYTE> ContainerData, llvm::ArrayRef<BYTE> HashData, IDxcBlob **ppOutBlob) {
+  if (!hlsl::IsValidDxilContainer((const hlsl::DxilContainerHeader *)ContainerData.data(), ContainerData.size()))
     return E_FAIL;
 
   SmallVector<char, 0> PdbStream = WritePdbStream(HashData);
@@ -311,7 +317,7 @@ HRESULT hlsl::pdb::WriteDxilPDB(IMalloc *pMalloc, IDxcBlob *pContainer, ArrayRef
   Writer.AddEmptyStream(); // DBI
   Writer.AddEmptyStream(); // IPI
   
-  Writer.AddStream({ (char *)pContainer->GetBufferPointer(), pContainer->GetBufferSize() }); // Actual data block
+  Writer.AddStream(llvm::ArrayRef<char>((const char *)ContainerData.data(), ContainerData.size() )); // Actual data block
   
   CComPtr<hlsl::AbstractMemoryStream> pStream;
   IFR(hlsl::CreateMemoryStream(pMalloc, &pStream));

+ 88 - 66
lib/DxilPIXPasses/DxilDbgValueToDbgDeclare.cpp

@@ -46,6 +46,32 @@ using SizeInBits = unsigned;
 // operand does not match exactly the Variable operand's type.
 class OffsetManager
 {
+    unsigned DescendTypeToGetAlignMask(llvm::DIType* Ty)
+    {
+      unsigned AlignMask = Ty->getAlignInBits();
+      
+      if (AlignMask == 0) {
+        if (auto *DerivedTy = llvm::dyn_cast<llvm::DIDerivedType>(Ty)) {
+          const llvm::DITypeIdentifierMap EmptyMap;
+          switch (DerivedTy->getTag()) {
+          case llvm::dwarf::DW_TAG_restrict_type:
+          case llvm::dwarf::DW_TAG_reference_type:
+          case llvm::dwarf::DW_TAG_const_type:
+          case llvm::dwarf::DW_TAG_typedef: {
+            llvm::DIType *baseType = DerivedTy->getBaseType().resolve(EmptyMap);
+            if (baseType != nullptr) {
+              if (baseType->getAlignInBits() == 0) {
+                (void)baseType->getAlignInBits();
+              }
+              return DescendTypeToGetAlignMask(baseType);
+            }
+          }
+          }
+        }
+      }
+
+      return AlignMask;
+    }
 public:
   OffsetManager() = default;
 
@@ -54,44 +80,31 @@ public:
       llvm::DIType *Ty
   )
   {
-    // This is some magic arithmetic. Here's an example:
-    //
-    // Assume the natural alignment for Ty is 16 bits. Then
-    //
-    //     AlignMask = 0x0000000f(15)
-    //
-    // If the current aligned offset is 
-    //
-    //     CurrentAlignedOffset = 0x00000048(72)
-    //
-    // Then
-    //
-    //     T = CurrentAlignOffset + AlignMask = 0x00000057(87)
-    //
-    // Which mean
-    //
-    //     T & ~CurrentOffset = 0x00000050(80)
-    //
-    // is the aligned offset where Ty should be placed.
-    unsigned AlignMask = Ty->getAlignInBits();
-
-    if (AlignMask == 0)
-    {
-      if (auto *DerivedTy = llvm::dyn_cast<llvm::DIDerivedType>(Ty)) {
-        const llvm::DITypeIdentifierMap EmptyMap;
-        switch (DerivedTy->getTag()) {
-        case llvm::dwarf::DW_TAG_restrict_type:
-        case llvm::dwarf::DW_TAG_reference_type:
-        case llvm::dwarf::DW_TAG_const_type:
-        case llvm::dwarf::DW_TAG_typedef:
-            AlignMask = DerivedTy->getBaseType().resolve(EmptyMap)->getAlignInBits();
-            assert(AlignMask != 0);
-        }
-      }
+    unsigned AlignMask = DescendTypeToGetAlignMask(Ty);
+    if (AlignMask) {
+      // This is some magic arithmetic. Here's an example:
+      //
+      // Assume the natural alignment for Ty is 16 bits. Then
+      //
+      //     AlignMask = 0x0000000f(15)
+      //
+      // If the current aligned offset is 
+      //
+      //     CurrentAlignedOffset = 0x00000048(72)
+      //
+      // Then
+      //
+      //     T = CurrentAlignOffset + AlignMask = 0x00000057(87)
+      //
+      // Which mean
+      //
+      //     T & ~CurrentOffset = 0x00000050(80)
+      //
+      // is the aligned offset where Ty should be placed.
+      AlignMask = AlignMask - 1;
+      m_CurrentAlignedOffset =
+          (m_CurrentAlignedOffset + AlignMask) & ~AlignMask;
     }
-    AlignMask = AlignMask - 1;
-    m_CurrentAlignedOffset =
-        (m_CurrentAlignedOffset + AlignMask) & ~AlignMask;
   }
 
   // Add is used to "add" an aggregate element (struct field, array element)
@@ -146,13 +159,13 @@ public:
         AlignedOffset);
   }
 
-  OffsetInBits GetPackedOffsetFromAlignedOffset(
+  bool GetPackedOffsetFromAlignedOffset(
       OffsetInBits AlignedOffset,
       OffsetInBits *PackedOffset
   ) const
   {
     return GetOffsetWithMap(
-        m_PackedOffsetToAlignedOffset,
+        m_AlignedOffsetToPackedOffset,
         AlignedOffset,
         PackedOffset);
   }
@@ -417,34 +430,39 @@ void DxilDbgValueToDbgDeclare::handleDbgValue(
 
   const OffsetInBits InitialOffset = PackedOffsetFromVar;
   llvm::IRBuilder<> B(DbgValue->getCalledFunction()->getContext());
-  B.SetInsertPoint(DbgValue);
-  B.SetCurrentDebugLocation(llvm::DebugLoc());
-  auto *Zero = B.getInt32(0);
-
-  // Now traverse a list of pairs {Scalar Value, InitialOffset + Offset}.
-  // InitialOffset is the offset from DbgValue's expression (i.e., the
-  // offset from the Variable's start), and Offset is the Scalar Value's
-  // packed offset from DbgValue's value. 
-  for (const ValueAndOffset &VO : SplitValue(V, InitialOffset, B))
-  {
-    OffsetInBits AlignedOffset;
-    if (!Offsets.GetAlignedOffsetFromPackedOffset(VO.m_PackedOffset,
-                                                  &AlignedOffset))
-    {
-      continue;
-    }
+  auto* instruction = llvm::dyn_cast<llvm::Instruction>(V);
+  if (instruction != nullptr) {
+    instruction = instruction->getNextNode();
+    if (instruction != nullptr) {
+      B.SetInsertPoint(instruction);
+
+      B.SetCurrentDebugLocation(llvm::DebugLoc());
+      auto *Zero = B.getInt32(0);
+
+      // Now traverse a list of pairs {Scalar Value, InitialOffset + Offset}.
+      // InitialOffset is the offset from DbgValue's expression (i.e., the
+      // offset from the Variable's start), and Offset is the Scalar Value's
+      // packed offset from DbgValue's value.
+      for (const ValueAndOffset &VO : SplitValue(V, InitialOffset, B)) {
+
+        OffsetInBits AlignedOffset;
+        if (!Offsets.GetAlignedOffsetFromPackedOffset(VO.m_PackedOffset,
+                                                      &AlignedOffset)) {
+          continue;
+        }
 
-    auto* AllocaInst = Register->GetRegisterForAlignedOffset(AlignedOffset);
-    if (AllocaInst == nullptr)
-    {
-      assert(!"Failed to find alloca for var[offset]");
-      continue;
-    }
+        auto *AllocaInst = Register->GetRegisterForAlignedOffset(AlignedOffset);
+        if (AllocaInst == nullptr) {
+          assert(!"Failed to find alloca for var[offset]");
+          continue;
+        }
 
-    if (AllocaInst->getAllocatedType()->getArrayElementType() == VO.m_V->getType())
-    {
-      auto* GEP = B.CreateGEP(AllocaInst, { Zero, Zero });
-      B.CreateStore(VO.m_V, GEP);
+        if (AllocaInst->getAllocatedType()->getArrayElementType() ==
+            VO.m_V->getType()) {
+          auto *GEP = B.CreateGEP(AllocaInst, {Zero, Zero});
+          B.CreateStore(VO.m_V, GEP);
+        }
+      }
     }
   }
 }
@@ -511,9 +529,9 @@ void VariableRegisters::PopulateAllocaMap(
     llvm::DIType *Ty
 )
 {
+  const llvm::DITypeIdentifierMap EmptyMap;
   if (auto *DerivedTy = llvm::dyn_cast<llvm::DIDerivedType>(Ty))
   {
-    const llvm::DITypeIdentifierMap EmptyMap;
     switch (DerivedTy->getTag())
     {
     default:
@@ -550,6 +568,10 @@ void VariableRegisters::PopulateAllocaMap(
     case llvm::dwarf::DW_TAG_class_type:
       PopulateAllocaMap_StructType(CompositeTy);
       return;
+    case llvm::dwarf::DW_TAG_enumeration_type:
+      // enum base type is int:
+      PopulateAllocaMap(CompositeTy->getBaseType().resolve(EmptyMap));
+      return;
     }
   }
   else if (auto *BasicTy = llvm::dyn_cast<llvm::DIBasicType>(Ty))

+ 3 - 15
lib/HLSL/DxilLegalizeSampleOffsetPass.cpp

@@ -126,23 +126,13 @@ bool HasIllegalOffsetInLoop(std::vector<Offset> &illegalOffsets, LoopInfo &LI,
 
 void GetOffsetRange(DXIL::OpCode opcode, unsigned &offsetStart, unsigned &offsetEnd)
 {
-  switch(opcode) {
-  case DXIL::OpCode::TextureLoad:
+  if (DXIL::OpCode::TextureLoad == opcode) {
     offsetStart = DXIL::OperandIndex::kTextureLoadOffset0OpIdx;
     offsetEnd = DXIL::OperandIndex::kTextureLoadOffset2OpIdx;
-    break;
-  case DXIL::OpCode::TextureGather:
-  case DXIL::OpCode::TextureGatherCmp:
-  case DXIL::OpCode::TextureGatherImm:
-  case DXIL::OpCode::TextureGatherCmpImm:
-    offsetStart = DXIL::OperandIndex::kTextureGatherOffset0OpIdx;
-    offsetEnd = DXIL::OperandIndex::kTextureGatherOffset1OpIdx;
-    break;
-  default:
-    // everything else are sample variants
+  } else {
+    // assume samples
     offsetStart = DXIL::OperandIndex::kTextureSampleOffset0OpIdx;
     offsetEnd = DXIL::OperandIndex::kTextureSampleOffset2OpIdx;
-    break;
   }
 }
 
@@ -261,8 +251,6 @@ void DxilLegalizeSampleOffsetPass::CollectIllegalOffsets(
   CollectIllegalOffsets(illegalOffsets, CurF, DXIL::OpCode::SampleGrad, hlslOP);
   CollectIllegalOffsets(illegalOffsets, CurF, DXIL::OpCode::SampleLevel,
                         hlslOP);
-  CollectIllegalOffsets(illegalOffsets, CurF, DXIL::OpCode::TextureGatherImm, hlslOP);
-  CollectIllegalOffsets(illegalOffsets, CurF, DXIL::OpCode::TextureGatherCmpImm, hlslOP);
   CollectIllegalOffsets(illegalOffsets, CurF, DXIL::OpCode::TextureLoad, hlslOP);
 }
 

+ 7 - 5
lib/HLSL/DxilLinker.cpp

@@ -1046,12 +1046,14 @@ void DxilLinkJob::StripDeadDebugInfo(Module &M) {
 
       // If the function referenced by DISP is not null, the function is live.
       if (Function *Func = DISP->getFunction()) {
-        if (Func->getParent() == &M)
-          LiveSubprograms.push_back(DISP);
-        else
-          SubprogramChange = true;
+        LiveSubprograms.push_back(DISP);
+        if (Func->getParent() != &M)
+          DISP->replaceFunction(nullptr);
       } else {
-        SubprogramChange = true;
+        // Copy it in anyway even if there's no function. When function is inlined
+        // the function reference is gone, but the subprogram is still valid as
+        // scope.
+        LiveSubprograms.push_back(DISP);
       }
     }
 

+ 5 - 4
lib/HLSL/DxilLoopDeletion.cpp

@@ -45,9 +45,10 @@ bool DxilLoopDeletion::runOnFunction(Function &F) {
   DeleteLoopPM.add(createLoopDeletionPass());
   bool bUpdated = false;
 
-  legacy::FunctionPassManager SimpilfyPM(F.getParent());
-  SimpilfyPM.add(createCFGSimplificationPass());
-  SimpilfyPM.add(createDeadCodeEliminationPass());
+  legacy::FunctionPassManager SimplifyPM(F.getParent());
+  SimplifyPM.add(createCFGSimplificationPass());
+  SimplifyPM.add(createDeadCodeEliminationPass());
+  SimplifyPM.add(createInstructionCombiningPass());
 
   const unsigned kMaxIteration = 3;
   unsigned i=0;
@@ -55,7 +56,7 @@ bool DxilLoopDeletion::runOnFunction(Function &F) {
     if (!DeleteLoopPM.run(F))
       break;
 
-    SimpilfyPM.run(F);
+    SimplifyPM.run(F);
     i++;
     bUpdated = true;
   }

+ 0 - 29
lib/HLSL/DxilPreparePasses.cpp

@@ -381,29 +381,6 @@ public:
     }
   }
 
-  // Replace all fromOpcode call instructions with toOpcode equivalents
-  void ReplaceIntrinsics(Module &M, hlsl::OP *hlslOp, DXIL::OpCode fromOpcode, DXIL::OpCode toOpcode) {
-    for (auto it : hlslOp->GetOpFuncList(fromOpcode)) {
-      Function *F = it.second;
-      if (!F)
-        continue;
-      Type *Ty = OP::GetOverloadType(fromOpcode, F);
-      for (auto uit = F->user_begin(); uit != F->user_end(); uit++) {
-        CallInst *CI = cast<CallInst>(*uit);
-        IRBuilder<> Builder(CI);
-        std::vector<Value*> args;
-        args.emplace_back(hlslOp->GetU32Const((unsigned)toOpcode));
-        for (unsigned i = 1; i < CI->getNumArgOperands(); i++)
-          args.emplace_back(CI->getOperand(i));
-
-        Function *newF = hlslOp->GetOpFunc(toOpcode, Ty);
-        CallInst *NewCI = Builder.CreateCall(newF, args);
-        CI->replaceAllUsesWith(NewCI);
-        CI->eraseFromParent();
-      }
-    }
-  }
-
   ///////////////////////////////////////////////////
   // IsHelperLane() lowering for SM < 6.6
 
@@ -762,12 +739,6 @@ public:
         patchDxil_1_6(M, hlslOP, ValMajor, ValMinor);
       }
 
-      // Patch all existing dxil versions for some future one
-      // that differentiates immediate and programmable gathers
-      ReplaceIntrinsics(M, hlslOP, OP::OpCode::TextureGatherImm, OP::OpCode::TextureGather);
-      ReplaceIntrinsics(M, hlslOP, OP::OpCode::TextureGatherCmpImm, OP::OpCode::TextureGatherCmp);
-
-
       // Remove store undef output.
       RemoveStoreUndefOutput(M, hlslOP);
 

+ 0 - 10
lib/HLSL/DxilValidation.cpp

@@ -1017,9 +1017,6 @@ static bool ValidateOpcodeInProfile(DXIL::OpCode opcode,
   // CreateHandleFromHeap=218, Unpack4x8=219, Pack4x8=220, IsHelperLane=221
   if ((216 <= op && op <= 221))
     return (major > 6 || (major == 6 && minor >= 6));
-  // Instructions: TextureGatherImm=222, TextureGatherCmpImm=223
-  if ((222 <= op && op <= 223))
-    return (major > 6 || (major == 6 && minor >= 15));
   return true;
   // VALOPCODESM-TEXT:END
 }
@@ -1439,9 +1436,6 @@ static void ValidateGather(CallInst *CI, Value *srvHandle, Value *samplerHandle,
     ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceKindForGather);
     return;
   }
-  if (OP::IsDxilOpFuncCallInst(CI, DXIL::OpCode::TextureGatherImm) ||
-      OP::IsDxilOpFuncCallInst(CI, DXIL::OpCode::TextureGatherCmpImm))
-    ValidateResourceOffset(CI, resKind, offsets, ValCtx);
 }
 
 static unsigned StoreValueToMask(ArrayRef<Value *> vals) {
@@ -1990,7 +1984,6 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
 
     ValidateDerivativeOp(CI, ValCtx);
   } break;
-  case DXIL::OpCode::TextureGatherImm:
   case DXIL::OpCode::TextureGather: {
     DxilInst_TextureGather gather(CI);
     ValidateGather(CI, gather.get_srv(), gather.get_sampler(),
@@ -1999,7 +1992,6 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode,
                    {gather.get_offset0(), gather.get_offset1()},
                    /*IsSampleC*/ false, ValCtx);
   } break;
-  case DXIL::OpCode::TextureGatherCmpImm:
   case DXIL::OpCode::TextureGatherCmp: {
     DxilInst_TextureGatherCmp gather(CI);
     ValidateGather(CI, gather.get_srv(), gather.get_sampler(),
@@ -2433,8 +2425,6 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
   case DXIL::OpCode::CalculateLOD:
   case DXIL::OpCode::TextureGather:
   case DXIL::OpCode::TextureGatherCmp:
-  case DXIL::OpCode::TextureGatherImm:
-  case DXIL::OpCode::TextureGatherCmpImm:
   case DXIL::OpCode::Sample:
   case DXIL::OpCode::SampleCmp:
   case DXIL::OpCode::SampleCmpLevelZero:

+ 7 - 12
lib/HLSL/HLOperationLower.cpp

@@ -3166,12 +3166,9 @@ GatherHelper::GatherHelper(
       if (ch != GatherChannel::GatherAll)
         TranslateSampleOffset(CI, HLOperandIndex::kGatherSampleOffsetArgIndex,
                               offsetSize);
-      if (hasSampleOffsets) {
-        statusIdx = HLOperandIndex::kGatherStatusWithSampleOffsetArgIndex;
-      } else {
-        opcode = OP::OpCode::TextureGatherImm;
-        statusIdx = HLOperandIndex::kGatherStatusArgIndex;
-      }
+      statusIdx =
+          hasSampleOffsets ? HLOperandIndex::kGatherStatusWithSampleOffsetArgIndex
+                           : HLOperandIndex::kGatherStatusArgIndex;
     }
     SetStatus(CI, statusIdx);
   } break;
@@ -3187,12 +3184,10 @@ GatherHelper::GatherHelper(
       if (ch != GatherChannel::GatherAll)
         TranslateSampleOffset(CI, HLOperandIndex::kGatherCmpSampleOffsetArgIndex,
                               offsetSize);
-      if (hasSampleOffsets) {
-        statusIdx = HLOperandIndex::kGatherCmpStatusWithSampleOffsetArgIndex;
-      } else {
-        opcode = OP::OpCode::TextureGatherCmpImm;
-        statusIdx = HLOperandIndex::kGatherCmpStatusArgIndex;
-      }
+      statusIdx =
+          hasSampleOffsets
+              ? HLOperandIndex::kGatherCmpStatusWithSampleOffsetArgIndex
+              : HLOperandIndex::kGatherCmpStatusArgIndex;
     }
     SetStatus(CI, statusIdx);
   } break;

+ 16 - 6
lib/Support/ErrorHandling.cpp

@@ -32,6 +32,7 @@
 #ifdef _WIN32
 #include "windows.h"  // HLSL Change
 #endif
+#include "dxc/Support/exception.h"  // HLSL Change
 
 #if defined(HAVE_UNISTD_H)
 # include <unistd.h>
@@ -112,7 +113,8 @@ void llvm::report_fatal_error(const Twine &Reason, bool GenCrashDiag) {
   if (handler) {
     handler(handlerData, Reason.str(), GenCrashDiag);
   }
-  RaiseException(STATUS_LLVM_FATAL, 0, 0, 0);
+
+  throw hlsl::Exception(DXC_E_LLVM_FATAL_ERROR, std::string("LLVM ERROR: ") + Reason.str() + "\n");
 #endif
 }
 
@@ -121,19 +123,27 @@ void llvm::llvm_unreachable_internal(const char *msg, const char *file,
   // This code intentionally doesn't call the ErrorHandler callback, because
   // llvm_unreachable is intended to be used to indicate "impossible"
   // situations, and not legitimate runtime errors.
+  // HLSL Change - collect full message in string
+  SmallVector<char, 64> Buffer;
+  raw_svector_ostream OS(Buffer);
   if (msg)
-    dbgs() << msg << "\n";
-  dbgs() << "UNREACHABLE executed";
+    OS << msg << "\n";
+  OS << "UNREACHABLE executed";
   if (file)
-    dbgs() << " at " << file << ":" << line;
-  dbgs() << "!\n";
+    OS << " at " << file << ":" << line;
+  OS << "!\n";
 #ifndef LLVM_ON_WIN32 // HLSL Change - unwind if necessary, but don't terminate the process
+  dbgs() << OS.str();
   abort();
 #else
-  RaiseException(STATUS_LLVM_UNREACHABLE, 0, 0, 0);
+  throw hlsl::Exception(DXC_E_LLVM_UNREACHABLE, OS.str());
 #endif
 }
 
+void llvm::llvm_cast_assert_internal(const char *func) {
+  throw hlsl::Exception(DXC_E_LLVM_CAST_ERROR, std::string(func) + "<X>() argument of incompatible type!\n");
+}
+
 static void bindingsErrorHandler(void *user_data, const std::string& reason,
                                  bool gen_crash_diag) {
   LLVMFatalErrorHandler handler =

+ 1 - 1
lib/Transforms/IPO/PassManagerBuilder.cpp

@@ -617,7 +617,7 @@ void PassManagerBuilder::populateModulePassManager(
   addExtensionsToPM(EP_Peephole, MPM);
   MPM.add(createCFGSimplificationPass());
   MPM.add(createDxilLoopDeletionPass()); // HLSL Change - try to delete loop again.
-  MPM.add(createInstructionCombiningPass());
+  //MPM.add(createInstructionCombiningPass()); // HLSL Change - pass is included in above
 
   if (!DisableUnrollLoops) {
     MPM.add(createLoopUnrollPass(/* HLSL Change begin */-1, -1, -1, -1, this->StructurizeLoopExitsForUnroll /* HLSL Change end */));    // Unroll small loops

+ 1 - 1
lib/Transforms/Scalar/DxilRemoveUnstructuredLoopExits.cpp

@@ -421,7 +421,7 @@ static bool RemoveUnstructuredLoopExitsIteration(BasicBlock *exiting_block, Loop
   assert(new_exit_cond);
 
   // Split the block where we're now exiting from, and branch to latch exit
-  StringRef old_name = new_exiting_block->getName();
+  std::string old_name = new_exiting_block->getName().str();
   BasicBlock *new_not_exiting_block = new_exiting_block->splitBasicBlock(new_exiting_block->getFirstNonPHI());
   new_exiting_block->setName("dx.struct_exit.new_exiting");
   new_not_exiting_block->setName(old_name);

+ 3 - 3
lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp

@@ -334,12 +334,12 @@ static unsigned IsPtrUsedByLoweredFn(
             "otherwise, multiple uses in single call");
       }
 
-    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(user)) {
+    } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(user)) {
       // Not what we are looking for if GEP result is not [array of] struct.
       // If use is under struct member, we can still SROA the outer struct.
       if (!dxilutil::StripArrayTypes(GEP->getType()->getPointerElementType())
             ->isStructTy() ||
-          FindFirstStructMemberIdxInGEP(cast<GEPOperator>(GEP)))
+          FindFirstStructMemberIdxInGEP(GEP))
         continue;
       if (IsPtrUsedByLoweredFn(user, CollectedUses))
         bFound = true;
@@ -350,7 +350,7 @@ static unsigned IsPtrUsedByLoweredFn(
 
     } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(user)) {
       unsigned opcode = CE->getOpcode();
-      if (opcode == Instruction::AddrSpaceCast || opcode == Instruction::GetElementPtr)
+      if (opcode == Instruction::AddrSpaceCast)
         if (IsPtrUsedByLoweredFn(user, CollectedUses))
           bFound = true;
     }

+ 5 - 2
tools/clang/include/clang/Basic/Diagnostic.h

@@ -1154,8 +1154,11 @@ inline DiagnosticBuilder DiagnosticsEngine::ReportOnce(unsigned DiagID) {
 inline DiagnosticBuilder DiagnosticsEngine::ReportOnce(SourceLocation Loc,
                                                        unsigned DiagID) {
   if (std::find(DiagOnceDiagnostics.begin(), DiagOnceDiagnostics.end(),
-                DiagID) != DiagOnceDiagnostics.end())
-    return DiagnosticBuilder(this);
+                DiagID) != DiagOnceDiagnostics.end()) {
+    auto DisabledDiag =  DiagnosticBuilder(this);
+    DisabledDiag.IsActive = false;
+    return DisabledDiag;
+  }
 
   DiagOnceDiagnostics.push_back(DiagID);
   return Report(Loc, DiagID);

+ 2 - 1
tools/clang/lib/SPIRV/EmitVisitor.cpp

@@ -535,8 +535,9 @@ bool EmitVisitor::visit(SpirvSource *inst) {
   // Chop up the source into multiple segments if it is too long.
   llvm::Optional<llvm::StringRef> firstSnippet = llvm::None;
   llvm::SmallVector<llvm::StringRef, 2> choppedSrcCode;
+  std::string text;
   if (spvOptions.debugInfoSource && inst->hasFile()) {
-    auto text = ReadSourceCode(inst->getFile()->getString());
+    text = ReadSourceCode(inst->getFile()->getString());
     if (!text.empty()) {
       chopString(text, &choppedSrcCode);
       if (!choppedSrcCode.empty()) {

+ 2 - 2
tools/clang/lib/Sema/SemaHLSL.cpp

@@ -9436,8 +9436,8 @@ clang::QualType HLSLExternalSource::CheckVectorConditional(
     Cond.set(CreateLValueToRValueCast(Cond.get()));
 
   // Convert condition component type to bool, using result component dimensions
-  if (condElementKind != AR_BASIC_BOOL) {
-    QualType boolType = NewSimpleAggregateType(AR_TOBJ_INVALID, AR_BASIC_BOOL, 0, rowCount, colCount)->getCanonicalTypeInternal();
+  QualType boolType = NewSimpleAggregateType(AR_TOBJ_INVALID, AR_BASIC_BOOL, 0, rowCount, colCount)->getCanonicalTypeInternal();
+  if (condElementKind != AR_BASIC_BOOL || condType != boolType) {
     StandardConversionSequence standard;
     if (ValidateCast(SourceLocation(), Cond.get(), boolType, ExplicitConversionFalse, SuppressWarningsFalse, SuppressErrorsFalse, &standard)) {
       if (standard.First != ICK_Identity || !standard.isIdentityConversion())

+ 5 - 2
tools/clang/lib/Sema/TreeTransform.h

@@ -2022,8 +2022,11 @@ public:
     ExprResult result;
     DeclarationName Name(&Accessor);
 
-    return hlsl::LookupVectorMemberExprForHLSL(&getSema(), *Base, Name, IsArrowFalse, OpLoc, AccessorLoc);
-
+    ExprResult ER = hlsl::MaybeConvertMemberAccess(&getSema(), Base);
+    if (ER.isInvalid()) {
+      return ExprError();
+    }
+    return hlsl::LookupVectorMemberExprForHLSL(&getSema(), *ER.get(), Name, IsArrowFalse, OpLoc, AccessorLoc);
   }
 
   // HLSL Changes End

+ 1 - 1
tools/clang/test/CodeGenSPIRV/ternary-op.cond-op.hlsl

@@ -62,9 +62,9 @@ void main() {
   w = cond3 ? u : v;
 
   // CHECK:       [[cond:%\d+]] = OpLoad %bool %cond
+  // CHECK-NEXT: [[splat:%\d+]] = OpCompositeConstruct %v3bool [[cond]] [[cond]] [[cond]]
   // CHECK-NEXT:     [[u:%\d+]] = OpLoad %v3int %u
   // CHECK-NEXT:     [[v:%\d+]] = OpLoad %v3int %v
-  // CHECK-NEXT: [[splat:%\d+]] = OpCompositeConstruct %v3bool [[cond]] [[cond]] [[cond]]
   // CHECK-NEXT:       {{%\d+}} = OpSelect %v3int [[splat]] [[u]] [[v]]
   w = cond ? u : v;
 

+ 2 - 53
tools/clang/test/HLSLFileCheck/hlsl/diagnostics/errors/offsets.hlsl

@@ -5,18 +5,10 @@
 // RUN: %dxc -E VarOffset -T ps_6_0 -DOFFSETS=constOffsets %s | FileCheck %s -check-prefix=CHK_VAROFF
 // RUN: %dxc -E VarOffset -T ps_6_0 -DOFFSETS=validOffsets %s | FileCheck %s -check-prefix=CHK_VALID
 
-// RUN: %dxc -E ValidOffset -T ps_6_0 -DOFFSETS=argOffsets %s | FileCheck %s -check-prefix=CHK_VALID
-// RUN: %dxc -E ValidOffset -T ps_6_0 -DOFFSETS=cbufOffsets %s | FileCheck %s -check-prefix=CHK_VALID
-// RUN: %dxc -E ValidOffset -T ps_6_0 -DOFFSETS=constOffsets %s | FileCheck %s -check-prefix=CHK_VALID
-// RUN: %dxc -E ValidOffset -T ps_6_0 -DOFFSETS=validOffsets %s | FileCheck %s -check-prefix=CHK_VALID
-
-// CHK_RANGE: error: Offsets to texture access operations must be between -8 and 7.
-// CHK_RANGE: error: Offsets to texture access operations must be between -8 and 7.
-// CHK_RANGE: error: Offsets to texture access operations must be between -8 and 7.
-// CHK_RANGE: error: Offsets to texture access operations must be between -8 and 7.
 // CHK_RANGE: error: Offsets to texture access operations must be between -8 and 7.
 // CHK_RANGE: error: Offsets to texture access operations must be between -8 and 7.
 // CHK_RANGE: error: Offsets to texture access operations must be between -8 and 7.
+
 // CHK_RANGE: error: Offsets to texture access operations must be between -8 and 7.
 // CHK_RANGE: error: Offsets to texture access operations must be between -8 and 7.
 // CHK_RANGE: error: Offsets to texture access operations must be between -8 and 7.
@@ -24,10 +16,7 @@
 // CHK_VAROFF: Offsets to texture access operations must be immediate values
 // CHK_VAROFF: Offsets to texture access operations must be immediate values
 // CHK_VAROFF: Offsets to texture access operations must be immediate values
-// CHK_VAROFF: Offsets to texture access operations must be immediate values
-// CHK_VAROFF: Offsets to texture access operations must be immediate values
-// CHK_VAROFF: Offsets to texture access operations must be immediate values
-// CHK_VAROFF: Offsets to texture access operations must be immediate values
+
 // CHK_VAROFF: Offsets to texture access operations must be immediate values
 // CHK_VAROFF: Offsets to texture access operations must be immediate values
 // CHK_VAROFF: Offsets to texture access operations must be immediate values
@@ -54,11 +43,6 @@ float4 Range(float3 str : STR) : SV_TARGET
     res += t2.Load(1, int2(80, 90));
     res += t3.Load(2, int3(-1, -2, 11));
 
-    res += t2.Gather     (s, str.xy, int2(9,8));
-    res += t2.GatherRed  (s, str.xy, int2(-9,-8));
-    res += t2.GatherCmp     (sc, str.xy, 0.0, int2(999999, -999999));
-    res += t2.GatherCmpRed  (sc, str.xy, 0.0, int2(0, 10));
-
     return res;
 }
 
@@ -83,41 +67,6 @@ float4 VarOffset(float3 str : STR, uint3 argOffsets[4] : O, uint a : A) : SV_TAR
     res += t2.Load(1, OFFSETS[0].xy);
     res += t3.Load(2, OFFSETS[0]);
 
-    res += t2.Gather     (s, str.xy, OFFSETS[0].xy);
-    res += t2.GatherRed  (s, str.xy, OFFSETS[1].xy);
-    res += t2.GatherCmp     (sc, str.xy, 0.0, OFFSETS[0].xy);
-    res += t2.GatherCmpRed  (sc, str.xy, 0.0, OFFSETS[1].xy);
-
     return res;
 }
 
-float4 ValidOffset(float3 str : STR, uint3 argOffsets[4] : O, uint a : A) : SV_TARGET
-{
-    uint b = 3 + a;
-    uint v = 3;
-    const uint3 constOffsets[4] = {uint3(a,a,a), argOffsets[0], cbufOffsets[0], uint3(b,b,b)};
-    uint3 validOffsets[4] = {uint3(v,v,v), uint3(1,1,1), uint3(2,2,2), uint3(3,3,3)};
-    float4 res = 0.0;
-
-    res += t2.GatherRed  (s, str.xy, int2(0,0), int2(1,1), int2(2,2), int2(-11, 1));
-    res += t2.GatherGreen(s, str.xy, int2(0,0), int2(1,1), int2(0,-9), int2(3,3));
-    res += t2.GatherBlue (s, str.xy, int2(0,0), int2(3,33), int2(2,2), int2(3,3));
-    res += t2.GatherAlpha(s, str.xy, int2(11,1), int2(1,1), int2(2,2), int2(3,3));
-
-    res += t2.GatherCmpRed  (sc, str.xy, 0.0, int2(0,0), int2(1,1), int2(2,2), int2(3,-9));
-    res += t2.GatherCmpGreen(sc, str.xy, 0.0, int2(0,0), int2(1,1), int2(10, 5), int2(3,3));
-    res += t2.GatherCmpBlue (sc, str.xy, 0.0, int2(0,0), int2(-11,6), int2(2,2), int2(3,3));
-    res += t2.GatherCmpAlpha(sc, str.xy, 0.0, int2(9,9), int2(1,1), int2(2,2), int2(3,3));
-
-    res += t2.GatherRed  (s, str.xy, int2(0,0), int2(1,1), int2(2,2), OFFSETS[3].xy);
-    res += t2.GatherGreen(s, str.xy, int2(0,0), int2(1,1), OFFSETS[2].xy, int2(3,3));
-    res += t2.GatherBlue (s, str.xy, int2(0,0), OFFSETS[1].xy, int2(2,2), int2(3,3));
-    res += t2.GatherAlpha(s, str.xy, OFFSETS[0].xy, int2(1,1), int2(2,2), int2(3,3));
-
-    res += t2.GatherCmpRed  (sc, str.xy, 0.0, int2(0,0), int2(1,1), int2(2,2), OFFSETS[3].xy);
-    res += t2.GatherCmpGreen(sc, str.xy, 0.0, int2(0,0), int2(1,1), OFFSETS[2].xy, int2(3,3));
-    res += t2.GatherCmpBlue (sc, str.xy, 0.0, int2(0,0), OFFSETS[1].xy, int2(2,2), int2(3,3));
-    res += t2.GatherCmpAlpha(sc, str.xy, 0.0, OFFSETS[0].xy, int2(1,1), int2(2,2), int2(3,3));
-
-    return res;
-}

+ 178 - 0
tools/clang/test/HLSLFileCheck/hlsl/diagnostics/errors/offsets_gather.hlsl

@@ -0,0 +1,178 @@
+// RUN: %dxc -E GatherRange -T ps_6_0 %s | FileCheck %s
+
+// RUN: %dxc -E Gather1 -T ps_6_0 -DOFFSETS=argOffsets %s | FileCheck %s
+// RUN: %dxc -E Gather1 -T ps_6_0 -DOFFSETS=cbufOffsets %s | FileCheck %s
+// RUN: %dxc -E Gather1 -T ps_6_0 -DOFFSETS=constOffsets %s | FileCheck %s
+// RUN: %dxc -E Gather1 -T ps_6_0 -DOFFSETS=validOffsets %s | FileCheck %s
+
+// RUN: %dxc -E Gather4 -T ps_6_0 -DOFFSETS=argOffsets %s | FileCheck %s -check-prefix=CHK_VALID4
+// RUN: %dxc -E Gather4 -T ps_6_0 -DOFFSETS=cbufOffsets %s | FileCheck %s -check-prefix=CHK_VALID4
+// RUN: %dxc -E Gather4 -T ps_6_0 -DOFFSETS=constOffsets %s | FileCheck %s -check-prefix=CHK_VALID4
+// RUN: %dxc -E Gather4 -T ps_6_0 -DOFFSETS=validOffsets %s | FileCheck %s -check-prefix=CHK_VALID4
+
+// CHECK:  @dx.op.textureGather.f32(i32 73, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 {{%?.+}}, i32 {{%?.+}}, i32 0)
+// CHECK:  @dx.op.textureGather.f32(i32 73, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 {{%?.+}}, i32 {{%?.+}}, i32 0)
+// CHECK:  @dx.op.textureGatherCmp.f32(i32 74, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 {{%?.+}}, i32 {{%?.+}}, i32 0,
+// CHECK:  @dx.op.textureGatherCmp.f32(i32 74, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 {{%?.+}}, i32 {{%?.+}}, i32 0,
+
+
+Texture1D t1;
+Texture2D t2;
+Texture3D t3;
+SamplerState s;
+SamplerComparisonState sc;
+
+float4 GatherRange(float3 str : STR) : SV_TARGET
+{
+    float4 res = 0.0;
+    res += t2.Gather     (s, str.xy, int2(9,8));
+    res += t2.GatherRed  (s, str.xy, int2(-9,-8));
+    res += t2.GatherCmp     (sc, str.xy, 0.0, int2(999999, -999999));
+    res += t2.GatherCmpRed  (sc, str.xy, 0.0, int2(0, 10));
+
+    return res;
+}
+
+#ifndef OFFSETS
+#define OFFSETS argOffsets
+#endif
+
+uint3 cbufOffsets[4];
+
+float4 Gather1(float3 str : STR, uint3 argOffsets[4] : O, uint a : A) : SV_TARGET
+{
+    uint b = 3 + a;
+    uint v = 3;
+    const uint3 constOffsets[4] = {uint3(a,a,a), argOffsets[0], cbufOffsets[0], uint3(b,b,b)};
+    uint3 validOffsets[4] = {uint3(v,v,v), uint3(1,1,1), uint3(2,2,2), uint3(3,3,3)};
+    float4 res = 0.0;
+    res += t2.Gather     (s, str.xy, OFFSETS[0].xy);
+    res += t2.GatherRed  (s, str.xy, OFFSETS[1].xy);
+    res += t2.GatherCmp     (sc, str.xy, 0.0, OFFSETS[0].xy);
+    res += t2.GatherCmpRed  (sc, str.xy, 0.0, OFFSETS[1].xy);
+
+    return res;
+}
+
+float4 Gather4(float3 str : STR, uint3 argOffsets[4] : O, uint a : A) : SV_TARGET
+{
+    uint b = 3 + a;
+    uint v = 4;
+    const uint3 constOffsets[4] = {uint3(a,a,a), argOffsets[0], cbufOffsets[0], uint3(b,b,b)};
+    uint3 validOffsets[4] = {uint3(v,v,v), uint3(1,1,1), uint3(2,2,2), uint3(3,3,3)};
+    float4 res = 0.0;
+
+// CHK_VALID4:  @dx.op.textureGather.f32(i32 73, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 0, i32 0, i32 0)
+// CHK_VALID4:  @dx.op.textureGather.f32(i32 73, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 1, i32 1, i32 0)
+// CHK_VALID4:  @dx.op.textureGather.f32(i32 73, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 2, i32 2, i32 0)
+// CHK_VALID4:  @dx.op.textureGather.f32(i32 73, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 -11, i32 1, i32 0)
+
+    res += t2.GatherRed  (s, str.xy, int2(0,0), int2(1,1), int2(2,2), int2(-11, 1));
+
+// CHK_VALID4:  @dx.op.textureGather.f32(i32 73, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 0, i32 0, i32 1)
+// CHK_VALID4:  @dx.op.textureGather.f32(i32 73, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 1, i32 1, i32 1)
+// CHK_VALID4:  @dx.op.textureGather.f32(i32 73, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 0, i32 -9, i32 1)
+// CHK_VALID4:  @dx.op.textureGather.f32(i32 73, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 3, i32 3, i32 1)
+
+    res += t2.GatherGreen(s, str.xy, int2(0,0), int2(1,1), int2(0,-9), int2(3,3));
+
+// CHK_VALID4:  @dx.op.textureGather.f32(i32 73, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 0, i32 0, i32 2)
+// CHK_VALID4:  @dx.op.textureGather.f32(i32 73, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 3, i32 33, i32 2)
+// CHK_VALID4:  @dx.op.textureGather.f32(i32 73, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 2, i32 2, i32 2)
+// CHK_VALID4:  @dx.op.textureGather.f32(i32 73, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 3, i32 3, i32 2)
+
+    res += t2.GatherBlue (s, str.xy, int2(0,0), int2(3,33), int2(2,2), int2(3,3));
+
+// CHK_VALID4:  @dx.op.textureGather.f32(i32 73, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 11, i32 1, i32 3)
+// CHK_VALID4:  @dx.op.textureGather.f32(i32 73, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 1, i32 1, i32 3)
+// CHK_VALID4:  @dx.op.textureGather.f32(i32 73, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 2, i32 2, i32 3)
+// CHK_VALID4:  @dx.op.textureGather.f32(i32 73, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 3, i32 3, i32 3)
+
+    res += t2.GatherAlpha(s, str.xy, int2(11,1), int2(1,1), int2(2,2), int2(3,3));
+
+// CHK_VALID4:  @dx.op.textureGatherCmp.f32(i32 74, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 0, i32 0, i32 0,
+// CHK_VALID4:  @dx.op.textureGatherCmp.f32(i32 74, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 1, i32 1, i32 0,
+// CHK_VALID4:  @dx.op.textureGatherCmp.f32(i32 74, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 2, i32 2, i32 0,
+// CHK_VALID4:  @dx.op.textureGatherCmp.f32(i32 74, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 3, i32 -9, i32 0,
+
+    res += t2.GatherCmpRed  (sc, str.xy, 0.0, int2(0,0), int2(1,1), int2(2,2), int2(3,-9));
+
+// CHK_VALID4:  @dx.op.textureGatherCmp.f32(i32 74, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 0, i32 0, i32 1,
+// CHK_VALID4:  @dx.op.textureGatherCmp.f32(i32 74, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 1, i32 1, i32 1,
+// CHK_VALID4:  @dx.op.textureGatherCmp.f32(i32 74, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 10, i32 5, i32 1,
+// CHK_VALID4:  @dx.op.textureGatherCmp.f32(i32 74, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 3, i32 3, i32 1,
+
+    res += t2.GatherCmpGreen(sc, str.xy, 0.0, int2(0,0), int2(1,1), int2(10, 5), int2(3,3));
+
+// CHK_VALID4:  @dx.op.textureGatherCmp.f32(i32 74, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 0, i32 0, i32 2,
+// CHK_VALID4:  @dx.op.textureGatherCmp.f32(i32 74, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 -11, i32 6, i32 2,
+// CHK_VALID4:  @dx.op.textureGatherCmp.f32(i32 74, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 2, i32 2, i32 2,
+// CHK_VALID4:  @dx.op.textureGatherCmp.f32(i32 74, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 3, i32 3, i32 2,
+
+    res += t2.GatherCmpBlue (sc, str.xy, 0.0, int2(0,0), int2(-11,6), int2(2,2), int2(3,3));
+
+// CHK_VALID4:  @dx.op.textureGatherCmp.f32(i32 74, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 9, i32 9, i32 3,
+// CHK_VALID4:  @dx.op.textureGatherCmp.f32(i32 74, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 1, i32 1, i32 3,
+// CHK_VALID4:  @dx.op.textureGatherCmp.f32(i32 74, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 2, i32 2, i32 3,
+// CHK_VALID4:  @dx.op.textureGatherCmp.f32(i32 74, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 3, i32 3, i32 3,
+
+    res += t2.GatherCmpAlpha(sc, str.xy, 0.0, int2(9,9), int2(1,1), int2(2,2), int2(3,3));
+
+// CHK_VALID4:  @dx.op.textureGather.f32(i32 73, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 0, i32 1, i32 0)
+// CHK_VALID4:  @dx.op.textureGather.f32(i32 73, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 2, i32 3, i32 0)
+// CHK_VALID4:  @dx.op.textureGather.f32(i32 73, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 4, i32 5, i32 0)
+// CHK_VALID4:  @dx.op.textureGather.f32(i32 73, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 {{%?.+}}, i32 {{%?.+}}, i32 0)
+
+    res += t2.GatherRed  (s, str.xy, int2(0,1), int2(2,3), int2(4,5), OFFSETS[3].xy);
+
+// CHK_VALID4:  @dx.op.textureGather.f32(i32 73, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 1, i32 2, i32 1)
+// CHK_VALID4:  @dx.op.textureGather.f32(i32 73, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 3, i32 4, i32 1)
+// CHK_VALID4:  @dx.op.textureGather.f32(i32 73, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 {{%?.+}}, i32 {{%?.+}}, i32 1)
+// CHK_VALID4:  @dx.op.textureGather.f32(i32 73, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 7, i32 0, i32 1)
+
+    res += t2.GatherGreen(s, str.xy, int2(1,2), int2(3,4), OFFSETS[2].xy, int2(7,0));
+
+// CHK_VALID4:  @dx.op.textureGather.f32(i32 73, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 2, i32 3, i32 2)
+// CHK_VALID4:  @dx.op.textureGather.f32(i32 73, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 {{%?.+}}, i32 {{%?.+}}, i32 2)
+// CHK_VALID4:  @dx.op.textureGather.f32(i32 73, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 6, i32 7, i32 2)
+// CHK_VALID4:  @dx.op.textureGather.f32(i32 73, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 0, i32 1, i32 2)
+
+    res += t2.GatherBlue (s, str.xy, int2(2,3), OFFSETS[1].xy, int2(6,7), int2(0,1));
+
+// CHK_VALID4:  @dx.op.textureGather.f32(i32 73, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 {{%?.+}}, i32 {{%?.+}}, i32 3)
+// CHK_VALID4:  @dx.op.textureGather.f32(i32 73, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 5, i32 6, i32 3)
+// CHK_VALID4:  @dx.op.textureGather.f32(i32 73, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 7, i32 0, i32 3)
+// CHK_VALID4:  @dx.op.textureGather.f32(i32 73, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 1, i32 2, i32 3)
+
+    res += t2.GatherAlpha(s, str.xy, OFFSETS[0].xy, int2(5,6), int2(7,0), int2(1,2));
+
+// CHK_VALID4:  @dx.op.textureGatherCmp.f32(i32 74, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 4, i32 5, i32 0,
+// CHK_VALID4:  @dx.op.textureGatherCmp.f32(i32 74, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 6, i32 7, i32 0,
+// CHK_VALID4:  @dx.op.textureGatherCmp.f32(i32 74, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 0, i32 1, i32 0,
+// CHK_VALID4:  @dx.op.textureGatherCmp.f32(i32 74, %dx.types.Handle {{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 {{%?.+}}, i32 {{%?.+}}, i32 0,
+
+    res += t2.GatherCmpRed  (sc, str.xy, 0.0, int2(4,5), int2(6,7), int2(0,1), OFFSETS[3].xy);
+
+// CHK_VALID4:  @dx.op.textureGatherCmp.f32(i32 74, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 5, i32 6, i32 1,
+// CHK_VALID4:  @dx.op.textureGatherCmp.f32(i32 74, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 7, i32 0, i32 1,
+// CHK_VALID4:  @dx.op.textureGatherCmp.f32(i32 74, %dx.types.Handle {{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 {{%?.+}}, i32 {{%?.+}}, i32 1,
+// CHK_VALID4:  @dx.op.textureGatherCmp.f32(i32 74, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 3, i32 4, i32 1,
+
+    res += t2.GatherCmpGreen(sc, str.xy, 0.0, int2(5,6), int2(7,0), OFFSETS[2].xy, int2(3,4));
+
+// CHK_VALID4:  @dx.op.textureGatherCmp.f32(i32 74, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 6, i32 7, i32 2,
+// CHK_VALID4:  @dx.op.textureGatherCmp.f32(i32 74, %dx.types.Handle {{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 {{%?.+}}, i32 {{%?.+}}, i32 2,
+// CHK_VALID4:  @dx.op.textureGatherCmp.f32(i32 74, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 2, i32 3, i32 2,
+// CHK_VALID4:  @dx.op.textureGatherCmp.f32(i32 74, %dx.types.Handle {{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 4, i32 5, i32 2,
+
+    res += t2.GatherCmpBlue (sc, str.xy, 0.0, int2(6,7), OFFSETS[1].xy, int2(2,3), int2(4,5));
+
+// CHK_VALID4:  @dx.op.textureGatherCmp.f32(i32 74, %dx.types.Handle {{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 {{%?.+}}, i32 {{%?.+}}, i32 3,
+// CHK_VALID4:  @dx.op.textureGatherCmp.f32(i32 74, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 1, i32 2, i32 3,
+// CHK_VALID4:  @dx.op.textureGatherCmp.f32(i32 74, %dx.types.Handle %{{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 3, i32 4, i32 3,
+// CHK_VALID4:  @dx.op.textureGatherCmp.f32(i32 74, %dx.types.Handle {{.+}}, %dx.types.Handle %{{.+}}, float %{{.+}}, float %{{.+}}, float undef, float undef, i32 5, i32 6, i32 3,
+
+    res += t2.GatherCmpAlpha(sc, str.xy, 0.0, OFFSETS[0].xy, int2(1,2), int2(3,4), int2(5,6));
+
+    return res;
+}

+ 15 - 0
tools/clang/test/HLSLFileCheck/hlsl/objects/Texture/gatherOffset2.hlsl

@@ -0,0 +1,15 @@
+// RUN: %dxc -T ps_6_0 %s | FileCheck %s
+
+// Test for access violation that previously occured with these gathers
+
+// CHECK: @main
+
+Texture2D<float> shadowMap;
+SamplerComparisonState BilinearClampCmpSampler;
+SamplerState BilinearClampSampler;
+
+float4 main(float3 uv_depth: TEXCOORD0): SV_Target
+{
+    return shadowMap.GatherCmp(BilinearClampCmpSampler, uv_depth.xy, uv_depth.z, int2(0, 0))
+    + shadowMap.GatherRed(BilinearClampSampler, uv_depth.xy, int2(0, 0));
+}

+ 12 - 0
tools/clang/test/HLSLFileCheck/hlsl/operators/select/sel_vec1_mixed.hlsl

@@ -0,0 +1,12 @@
+// RUN: %dxc -E main -T vs_6_0 %s | FileCheck %s
+
+// Make sure 1-component vector in conditional compiles
+// CHECK: = select i1
+
+float1 A[2];
+float B;
+
+float main() : OUT {
+  float1 foo = (A[0] > 0.0f) ? A[0] : B;
+  return foo;
+}

+ 2 - 1
tools/clang/test/HLSLFileCheck/hlsl/operators/swizzle/swizzleInCorrectDelayedTyposInExpr.hlsl

@@ -5,6 +5,7 @@
 // It requires a number of conditions to get there.
 
 // CHECK: error: use of undeclared identifier 'some_var_2'; did you mean 'some_var_1'
+// CHECK: error: use of undeclared identifier 'some_var_3'
 
 float3 some_fn(float4 a, float b) { return b; }
 float4 foo(int i) { return i; }
@@ -17,7 +18,7 @@ float3 repro() {
   // to resolve the vector member expression
   // using hlsl::LookupVectorMemberExprForHLSL
   float4 some_var_1;
-  return some_fn(some_var_2, foo(0).xyz);
+  return some_fn(some_var_2, foo(0).xyz) + some_other_fn(1.0.xxxx, 0.0.xxxx, some_var_3);
 }
 float3 main(float4 input : IN) : OUT {
   return repro();

+ 6 - 0
tools/clang/test/HLSLFileCheck/hlsl/payload_qualifier/general.hlsl

@@ -15,6 +15,8 @@
 // check if we get DXIL and the payload type is there 
 // CHK4: Invalid target for payload access qualifiers. Only lib_6_6 and beyond are supported.
 // CHK5: warning: payload access qualifieres are only supported for target lib_6_6 and beyond. You can opt-in for lib_6_6 with the -enable-payload-qualifiers flag. Qualifiers will be dropped.
+// CHK5: struct [raypayload] Payload {
+// CHK5-NOT: struct [raypayload] OtherPayload {
 // CHK6: %struct.Payload = type { i32, i32 }
 
 // CHK7: error: type 'Payload' used as payload requires that it is annotated with the {{\[[a-z]*\]}} attribute
@@ -24,6 +26,10 @@ struct [raypayload] Payload {
     int a : read(closesthit) : write(caller);
     int b : write(closesthit) : read(caller);
 };
+struct [raypayload] OtherPayload {
+    int a : read(closesthit) : write(caller);
+    int b : write(closesthit) : read(caller);
+};
 #else 
 struct Payload {
     int a;

+ 40 - 0
tools/clang/test/HLSLFileCheck/passes/dxil/dxil_loop_deletion/loop_deletion_inst_simplification.hlsl

@@ -0,0 +1,40 @@
+// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
+
+// Test complicated loop elimination that requires intruction simplification
+// to remove all the useless loops
+
+// We are literally returning zero. There is no need of branches
+// CHECK: @main()
+// CHECK-NOT:br
+
+Texture2D<float> g_tex : register(t0);
+SamplerState g_samp : register(s0);
+
+float4 main(float2 tangent : TANGENT0) : SV_Target
+{
+  float2 uv = 0;
+  float cond = 0;
+  for (uint i = 0; i < 16; i++) {
+    cond = g_tex.SampleGrad(g_samp, uv, 0, 0);
+    uv += tangent * cond;
+  }
+  if (cond < 0) {
+    uv = 0.5f * (uv);
+    for (uint i = 0; i < 6; i++) {
+      cond = g_tex.SampleGrad(g_samp, uv, 0, 0);
+      if (cond)
+        break;
+      if (cond > 0)
+        uv += 1;
+    }
+  }
+
+  float cond2 = 0;
+  for (uint i = 0; i < 16; i++) {
+    if (cond2 >= 3)
+      break;
+    cond2 += g_tex.SampleGrad(g_samp, 0, uv, uv);
+  }
+
+  return 0;
+}

+ 35 - 0
tools/clang/test/HLSLFileCheck/shader_targets/mesh/as-groupshared-nested-payload.hlsl

@@ -0,0 +1,35 @@
+// RUN: %dxc -E amplification -T as_6_5 %s | FileCheck %s
+
+// Make sure we pass groupshared mesh payload directly into DispatchMesh,
+// with correct type, and no alloca involved.
+
+// CHECK: define void @amplification
+// CHECK-NOT: alloca
+// CHECK-NOT: addrspacecast
+// CHECK-NOT: bitcast
+// CHECK: call void @dx.op.dispatchMesh.struct.MeshPayload{{[^ ]*}}(i32 173, i32 1, i32 1, i32 1, %struct.MeshPayload{{[^ ]*}} addrspace(3)*
+// CHECK-NOT: addrspacecast
+// CHECK: ret void
+
+struct MeshPayload
+{
+  float arr[3];
+  uint4 data;
+};
+
+struct GSStruct
+{
+  MeshPayload pld;
+  MeshPayload pld2;
+};
+
+groupshared GSStruct gs;
+GSStruct cb_gs;
+
+[numthreads(4,1,1)]
+void amplification(uint gtid : SV_GroupIndex)
+{
+//  gs = cb_gs;
+  gs.pld.data[gtid] = gtid;
+  DispatchMesh(1,1,1,gs.pld);
+}

+ 3 - 2
tools/clang/test/HLSLFileCheck/shader_targets/mesh/as-groupshared-payload.hlsl

@@ -1,13 +1,14 @@
 // RUN: %dxc -E amplification -T as_6_5 %s | FileCheck %s
 
-// Make sure we pass constant gep of groupshared mesh payload directly
+// Make sure we pass groupshared mesh payload directly
 // in to DispatchMesh, with no alloca involved.
 
 // CHECK: define void @amplification
 // CHECK-NOT: alloca
 // CHECK-NOT: addrspacecast
 // CHECK-NOT: bitcast
-// CHECK: call void @dx.op.dispatchMesh.struct.MeshPayload{{[^ ]*}}(i32 173, i32 1, i32 1, i32 1, %struct.MeshPayload{{[^ ]*}} addrspace(3)* getelementptr inbounds (%struct.GSStruct{{[^ ]*}}, %struct.GSStruct{{[^ ]*}} addrspace(3)* @"\01?gs@@3UGSStruct@@A{{[^ ]*}}", i32 0, i32 1))
+// CHECK: call void @dx.op.dispatchMesh.struct.MeshPayload{{[^ ]*}}(i32 173, i32 1, i32 1, i32 1, %struct.MeshPayload{{[^ ]*}} addrspace(3)*
+// CHECK-NOT: addrspacecast
 // CHECK: ret void
 
 struct MeshPayload

+ 40 - 9
tools/clang/tools/dxclib/dxc.cpp

@@ -41,6 +41,7 @@
 #include "dxc/Support/Unicode.h"
 #include "dxc/Support/WinIncludes.h"
 #include "dxc/Support/WinFunctions.h"
+#include "dxc/dxcerrors.h"
 #include "dxc.h"
 #include <vector>
 #include <string>
@@ -1201,8 +1202,18 @@ static LONG CALLBACK ExceptionFilter(PEXCEPTION_POINTERS pExceptionInfo)
   case STATUS_LLVM_FATAL:
     fputs("LLVM Fatal Error\n", stderr);
     break;
+  case EXCEPTION_LOAD_LIBRARY_FAILED:
+    if (pExceptionInfo->ExceptionRecord->ExceptionInformation[0]) {
+      fputs("cannot not load ", stderr);
+      fputws((const wchar_t*)pExceptionInfo->ExceptionRecord->ExceptionInformation[0], stderr);
+      fputs(" library.\n", stderr);
+    }
+    else{
+      fputs("cannot not load library.\n", stderr);
+    }
+    break;
   default:
-    fputs("Error ", stderr);
+    fputs("Terminal Error ", stderr);
     sprintf_s(scratch, _countof(scratch), "0x%08x\n", pExceptionInfo->ExceptionRecord->ExceptionCode);
     fputs(scratch, stderr);
   }
@@ -1307,30 +1318,50 @@ int dxc::main(int argc, const char **argv_) {
       Unicode::acp_char printBuffer[128]; // printBuffer is safe to treat as
                                           // UTF-8 because we use ASCII only errors
       if (msg == nullptr || *msg == '\0') {
-        if (hlslException.hr == DXC_E_DUPLICATE_PART) {
+        switch (hlslException.hr) {
+        case DXC_E_DUPLICATE_PART:
           sprintf_s(
               printBuffer, _countof(printBuffer),
               "dxc failed : DXIL container already contains the given part.");
-        } else if (hlslException.hr == DXC_E_MISSING_PART) {
+          break;
+        case DXC_E_MISSING_PART:
           sprintf_s(
               printBuffer, _countof(printBuffer),
               "dxc failed : DXIL container does not contain the given part.");
-        } else if (hlslException.hr == DXC_E_CONTAINER_INVALID) {
+          break;
+        case DXC_E_CONTAINER_INVALID:
           sprintf_s(printBuffer, _countof(printBuffer),
                     "dxc failed : Invalid DXIL container.");
-        } else if (hlslException.hr == DXC_E_CONTAINER_MISSING_DXIL) {
+          break;
+        case DXC_E_CONTAINER_MISSING_DXIL:
           sprintf_s(printBuffer, _countof(printBuffer),
                     "dxc failed : DXIL container is missing DXIL part.");
-        } else if (hlslException.hr == DXC_E_CONTAINER_MISSING_DEBUG) {
+          break;
+        case DXC_E_CONTAINER_MISSING_DEBUG:
           sprintf_s(printBuffer, _countof(printBuffer),
                     "dxc failed : DXIL container is missing Debug Info part.");
-        } else if (hlslException.hr == E_OUTOFMEMORY) {
+          break;
+        case DXC_E_LLVM_FATAL_ERROR:
+          sprintf_s(printBuffer, _countof(printBuffer),
+                    "dxc failed : Internal Compiler Error - LLVM Fatal Error!");
+          break;
+        case DXC_E_LLVM_UNREACHABLE:
+          sprintf_s(printBuffer, _countof(printBuffer),
+                    "dxc failed : Internal Compiler Error - UNREACHABLE executed!");
+          break;
+        case DXC_E_LLVM_CAST_ERROR:
+          sprintf_s(printBuffer, _countof(printBuffer),
+                    "dxc failed : Internal Compiler Error - Cast of incompatible type!");
+          break;
+        case E_OUTOFMEMORY:
           sprintf_s(printBuffer, _countof(printBuffer),
                     "dxc failed : Out of Memory.");
-        } else if (hlslException.hr == E_INVALIDARG) {
+          break;
+        case E_INVALIDARG:
           sprintf_s(printBuffer, _countof(printBuffer),
                     "dxc failed : Invalid argument.");
-        } else {
+          break;
+        default:
           sprintf_s(printBuffer, _countof(printBuffer),
             "dxc failed : error code 0x%08x.\n", hlslException.hr);
         }

+ 1 - 3
tools/clang/tools/dxcompiler/dxcdisassembler.cpp

@@ -1293,9 +1293,7 @@ static const char *OpCodeSignatures[] = {
   "(index,samplerHeap,nonUniformIndex)",  // CreateHandleFromHeap
   "(unpackMode,pk)",  // Unpack4x8
   "(packMode,x,y,z,w)",  // Pack4x8
-  "()",  // IsHelperLane
-  "(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,channel)",  // TextureGatherImm
-  "(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,channel,compareVale)"  // TextureGatherCmpImm
+  "()"  // IsHelperLane
 };
 // OPCODE-SIGS:END
 

+ 3 - 1
tools/clang/tools/dxcompiler/dxcompilerobj.cpp

@@ -1126,9 +1126,11 @@ public:
       _Analysis_assume_(DXC_FAILED(e.hr));
       CComPtr<IDxcResult> pResult;
       hr = e.hr;
+      std::string msg("Internal Compiler error: ");
+      msg += e.msg;
       if (SUCCEEDED(DxcResult::Create(e.hr, DXC_OUT_NONE, {
               DxcOutputObject::ErrorOutput(CP_UTF8,
-                e.msg.c_str(), e.msg.size())
+                msg.c_str(), msg.size())
             }, &pResult)) &&
           SUCCEEDED(pResult->QueryInterface(riid, ppResult))) {
         hr = S_OK;

+ 9 - 4
tools/clang/unittests/HLSL/PixTest.cpp

@@ -1887,12 +1887,17 @@ void main()
 
     auto Testables = TestStructAnnotationCase(hlsl, optimization);
 
+    // 2 in unoptimized case (one for each instance of smallPayload)
+    // 1 in optimized case (cuz p2 aliases over p)
     VERIFY_IS_TRUE(Testables.OffsetAndSizes.size() >= 1);
-    VERIFY_ARE_EQUAL(1, Testables.OffsetAndSizes[0].countOfMembers);
-    VERIFY_ARE_EQUAL(0, Testables.OffsetAndSizes[0].offset);
-    VERIFY_ARE_EQUAL(32, Testables.OffsetAndSizes[0].size);
 
-    VERIFY_ARE_EQUAL(2, Testables.AllocaWrites.size());
+    for (const auto& os : Testables.OffsetAndSizes) {
+      VERIFY_ARE_EQUAL(1, os.countOfMembers);
+      VERIFY_ARE_EQUAL(0, os.offset);
+      VERIFY_ARE_EQUAL(32, os.size);
+    }
+
+    VERIFY_ARE_EQUAL(1, Testables.AllocaWrites.size());
   }
 }
 

+ 0 - 31
utils/hct/hctdb.py

@@ -298,9 +298,6 @@ class db_dxil(object):
             self.name_idx[i].shader_stages = ("pixel",)
         for i in "TextureGather,TextureGatherCmp".split(","):
             self.name_idx[i].category = "Resources - gather"
-        for i in "TextureGatherImm,TextureGatherCmpImm".split(","):
-            self.name_idx[i].category = "Resources - gather"
-            self.name_idx[i].shader_model = 6,15 # Dummy large shader model to prevent accidental inclusion
         for i in "AtomicBinOp,AtomicCompareExchange,Barrier".split(","):
             self.name_idx[i].category = "Synchronization"
         for i in "CalculateLOD,DerivCoarseX,DerivCoarseY,DerivFineX,DerivFineY".split(","):
@@ -1872,34 +1869,6 @@ class db_dxil(object):
         self.set_op_count_for_version(1, 6, next_op_idx)
         assert next_op_idx == 222, "222 is expected next operation index but encountered %d and thus opcodes are broken" % next_op_idx
 
-        self.add_dxil_op("TextureGatherImm", next_op_idx, "TextureGatherImm", "same as TextureGather, except offsets are limited to immediate values between -8 and 7", "hfwi", "ro", [
-            db_dxil_param(0, "$r", "", "dimension information for texture"),
-            db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
-            db_dxil_param(3, "res", "sampler", "handle of sampler to use"),
-            db_dxil_param(4, "f", "coord0", "coordinate"),
-            db_dxil_param(5, "f", "coord1", "coordinate, undef for Texture1D"),
-            db_dxil_param(6, "f", "coord2", "coordinate, undef for Texture1D, Texture1DArray or Texture2D"),
-            db_dxil_param(7, "f", "coord3", "coordinate, defined only for TextureCubeArray"),
-            db_dxil_param(8, "i32", "offset0", "optional offset, applicable to Texture1D, Texture1DArray, and as part of offset1"),
-            db_dxil_param(9, "i32", "offset1", "optional offset, applicable to Texture2D, Texture2DArray, and as part of offset2"),
-            db_dxil_param(10, "i32", "channel", "channel to sample")],
-            counters=('tex_norm',))
-        next_op_idx += 1
-        self.add_dxil_op("TextureGatherCmpImm", next_op_idx, "TextureGatherCmpImm", "same as TextureGatherCmp, except offsets are limited to immediate values between -8 and 7", "hfwi", "ro", [
-            db_dxil_param(0, "$r", "", "gathered texels"),
-            db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
-            db_dxil_param(3, "res", "sampler", "handle of sampler to use"),
-            db_dxil_param(4, "f", "coord0", "coordinate"),
-            db_dxil_param(5, "f", "coord1", "coordinate, undef for Texture1D"),
-            db_dxil_param(6, "f", "coord2", "coordinate, undef for Texture1D, Texture1DArray or Texture2D"),
-            db_dxil_param(7, "f", "coord3", "coordinate, defined only for TextureCubeArray"),
-            db_dxil_param(8, "i32", "offset0", "optional offset, applicable to Texture1D, Texture1DArray, and as part of offset1"),
-            db_dxil_param(9, "i32", "offset1", "optional offset, applicable to Texture2D, Texture2DArray, and as part of offset2"),
-            db_dxil_param(10, "i32", "channel", "channel to sample"),
-            db_dxil_param(11, "f", "compareVale", "value to compare with")],
-            counters=('tex_cmp',))
-        next_op_idx += 1
-
         # Set interesting properties.
         self.build_indices()
         for i in "CalculateLOD,DerivCoarseX,DerivCoarseY,DerivFineX,DerivFineY,Sample,SampleBias,SampleCmp".split(","):