Parcourir la source

RawBufferLoad and new methods for ByteAddressBuffer (#762)

This change is an extension of float16 support. We are adding LoadHalf, LoadFloat, and LoadDouble method to byte address buffer so that users can access data from byte address buffer by these types. Also starting shader model 6.2, we are mapping byte address buffer and structure buffer load/store operations to RawBufferLoad/Store to differentiate raw buffer load from typed buffer load. Unlike BufferLoad for typed buffers, RawBufferLoad for min precision types will not have its min precision values as its return types, but their actual scalar size in buffer (i.e rawBufferLoad.i32 for min16int and rawBufferLoad.f32 for min16float). RawBufferLoad/Store contains additional parameters, where mask was required for correct status behavior for CheckAccessFullyMapped, and alignment is for relative alignment for future potential benefit for backend.
Young Kim il y a 8 ans
Parent
commit
2c140f795c
40 fichiers modifiés avec 2269 ajouts et 287 suppressions
  1. 35 8
      docs/DXIL.rst
  2. 26 2
      include/dxc/HLSL/DxilConstants.h
  3. 2 0
      include/dxc/HLSL/DxilGenerationPass.h
  4. 90 0
      include/dxc/HLSL/DxilInstructions.h
  5. 3 0
      include/dxc/HLSL/DxilOperations.h
  6. 25 0
      include/dxc/HLSL/DxilTypeSystem.h
  7. 20 0
      include/dxc/HlslIntrinsicOp.h
  8. 4 3
      include/dxc/dxcapi.internal.h
  9. 1 0
      lib/HLSL/DxcOptimizer.cpp
  10. 278 0
      lib/HLSL/DxilGenerationPass.cpp
  11. 14 0
      lib/HLSL/DxilOperations.cpp
  12. 40 0
      lib/HLSL/DxilTypeSystem.cpp
  13. 3 0
      lib/HLSL/DxilValidation.cpp
  14. 235 71
      lib/HLSL/HLOperationLower.cpp
  15. 1 0
      lib/Transforms/IPO/PassManagerBuilder.cpp
  16. 4 2
      tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
  17. 49 14
      tools/clang/lib/Sema/SemaHLSL.cpp
  18. 389 9
      tools/clang/lib/Sema/gen_intrin_main_tables_15.h
  19. 31 0
      tools/clang/test/CodeGenHLSL/raw_buf2.hlsl
  20. 54 0
      tools/clang/test/CodeGenHLSL/raw_buf3.hlsl
  21. 71 0
      tools/clang/test/CodeGenHLSL/raw_buf4.hlsl
  22. 98 0
      tools/clang/test/CodeGenHLSL/raw_buf5.hlsl
  23. 6 1
      tools/clang/test/CodeGenHLSL/struct_buf1.hlsl
  24. 121 0
      tools/clang/test/CodeGenHLSL/struct_buf2.hlsl
  25. 88 0
      tools/clang/test/CodeGenHLSL/struct_buf3.hlsl
  26. 127 0
      tools/clang/test/CodeGenHLSL/struct_buf4.hlsl
  27. 55 0
      tools/clang/test/CodeGenHLSL/struct_buf5.hlsl
  28. 41 0
      tools/clang/test/CodeGenHLSL/struct_buf6.hlsl
  29. 12 12
      tools/clang/test/CodeGenHLSL/struct_buf_new_layout.hlsl
  30. 8 2
      tools/clang/test/CodeGenHLSL/uav_typed_load_store1.hlsl
  31. 32 0
      tools/clang/test/CodeGenHLSL/uav_typed_load_store3.hlsl
  32. 5 0
      tools/clang/test/HLSL/intrinsic-examples.hlsl
  33. 2 2
      tools/clang/test/HLSL/matrix-syntax-exact-precision.hlsl
  34. 51 51
      tools/clang/test/HLSL/scalar-assignments-exact-precision.hlsl
  35. 103 103
      tools/clang/test/HLSL/scalar-operators-exact-precision.hlsl
  36. 2 2
      tools/clang/test/HLSL/vector-syntax-exact-precision.hlsl
  37. 3 1
      tools/clang/tools/dxcompiler/dxcdisassembler.cpp
  38. 60 1
      tools/clang/unittests/HLSL/CompilerTest.cpp
  39. 50 0
      utils/hct/gen_intrin_main.txt
  40. 30 3
      utils/hct/hctdb.py

+ 35 - 8
docs/DXIL.rst

@@ -1698,6 +1698,7 @@ BufferLoad
 The following signature shows the operation syntax::
 
   ; overloads: SM5.1: f32|i32,  SM6.0: f32|i32
+  ; returns: status
   declare %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(
       i32,                  ; opcode
       %dx.types.Handle,     ; resource handle
@@ -1706,13 +1707,38 @@ The following signature shows the operation syntax::
 
 The call respects SM5.1 OOB and alignment rules.
 
-=================== =====================================================
-Valid resource type # of active coordinates
-=================== =====================================================
-[RW]TypedBuffer     1 (c0 in elements)
-[RW]RawBuffer       1 (c0 in bytes)
-[RW]TypedBuffer     2 (c0 in elements, c1 = byte offset into the element)
-=================== =====================================================
+====================  =====================================================
+Valid resource type   # of active coordinates
+====================  =====================================================
+[RW]TypedBuffer       1 (c0 in elements)
+[RW]RawBuffer         1 (c0 in bytes)
+[RW]StructuredBuffer  2 (c0 in elements, c1 = byte offset into the element)
+====================  =====================================================
+
+RawBufferLoad
+~~~~~~~~~~
+
+The following signature shows the operation syntax::
+
+  ; overloads: SM5.1: f32|i32,  SM6.0: f32|i32
+  ; returns: status
+  declare %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(
+      i32,                  ; opcode
+      %dx.types.Handle,     ; resource handle
+      i32,                  ; coordinate c0
+      i32,                  ; coordinate c1
+      i8,                   ; mask
+      i32,                  ; alignment
+  )
+
+The call respects SM5.1 OOB and alignment rules.
+
+====================  =====================================================
+Valid resource type   # of active coordinates
+====================  =====================================================
+[RW]RawBuffer         1 (c0 in bytes)
+[RW]StructuredBuffer  2 (c0 in elements, c1 = byte offset into the element)
+====================  =====================================================
 
 BufferStore
 ~~~~~~~~~~~
@@ -1720,7 +1746,6 @@ BufferStore
 The following signature shows the operation syntax::
 
   ; overloads: SM5.1: f32|i32,  SM6.0: f32|i32
-  ; returns: status
   declare void @dx.op.bufferStore.f32(
       i32,                  ; opcode
       %dx.types.Handle,     ; resource handle
@@ -2110,6 +2135,8 @@ ID  Name                          Description
 136 WavePrefixBitCount            returns the count of bits set to 1 on prior lanes
 137 AttributeAtVertex_            returns the values of the attributes at the vertex.
 138 ViewID                        returns the view index
+139 RawBufferLoad                 reads from a raw buffer and structured buffer
+140 RawBufferStore                writes to a RWByteAddressBuffer or RWStructuredBuffer
 === ============================= =================================================================================================================
 
 

+ 26 - 2
include/dxc/HLSL/DxilConstants.h

@@ -395,6 +395,8 @@ namespace DXIL {
     CheckAccessFullyMapped = 71, // determines whether all values from a Sample, Gather, or Load operation accessed mapped tiles in a tiled resource
     CreateHandle = 57, // creates the handle to a resource
     GetDimensions = 72, // gets texture size information
+    RawBufferLoad = 139, // reads from a raw buffer and structured buffer
+    RawBufferStore = 140, // writes to a RWByteAddressBuffer or RWStructuredBuffer
     TextureLoad = 66, // reads texel data without any filtering or sampling
     TextureStore = 67, // reads texel data without any filtering or sampling
   
@@ -477,8 +479,9 @@ namespace DXIL {
   
     NumOpCodes_Dxil_1_0 = 137,
     NumOpCodes_Dxil_1_1 = 139,
+    NumOpCodes_Dxil_1_2 = 141,
   
-    NumOpCodes = 139 // exclusive last value of enumeration
+    NumOpCodes = 141 // exclusive last value of enumeration
   };
   // OPCODE-ENUM:END
 
@@ -591,6 +594,8 @@ namespace DXIL {
     CheckAccessFullyMapped,
     CreateHandle,
     GetDimensions,
+    RawBufferLoad,
+    RawBufferStore,
     TextureLoad,
     TextureStore,
   
@@ -635,8 +640,9 @@ namespace DXIL {
   
     NumOpClasses_Dxil_1_0 = 93,
     NumOpClasses_Dxil_1_1 = 95,
+    NumOpClasses_Dxil_1_2 = 97,
   
-    NumOpClasses = 95 // exclusive last value of enumeration
+    NumOpClasses = 97 // exclusive last value of enumeration
   };
   // OPCODECLASS-ENUM:END
 
@@ -687,6 +693,24 @@ namespace DXIL {
     const unsigned kBufferStoreVal3OpIdx = 7;
     const unsigned kBufferStoreMaskOpIdx = 8;
 
+    // RawBufferLoad.
+    const unsigned kRawBufferLoadHandleOpIdx        = 1;
+    const unsigned kRawBufferLoadIndexOpIdx         = 2;
+    const unsigned kRawBufferLoadElementOffsetOpIdx = 3;
+    const unsigned kRawBufferLoadMaskOpIdx          = 4;
+    const unsigned kRawBufferLoadAlignmentOpIdx     = 5;
+
+    // RawBufferStore
+    const unsigned kRawBufferStoreHandleOpIdx = 1;
+    const unsigned kRawBufferStoreIndexOpIdx = 2;
+    const unsigned kRawBufferStoreElementOffsetOpIdx = 3;
+    const unsigned kRawBufferStoreVal0OpIdx = 4;
+    const unsigned kRawBufferStoreVal1OpIdx = 5;
+    const unsigned kRawBufferStoreVal2OpIdx = 6;
+    const unsigned kRawBufferStoreVal3OpIdx = 7;
+    const unsigned kRawBufferStoreMaskOpIdx = 8;
+    const unsigned kRawBufferStoreAlignmentOpIdx = 8;
+
     // TextureStore.
     const unsigned kTextureStoreHandleOpIdx = 1;
     const unsigned kTextureStoreCoord0OpIdx = 2;

+ 2 - 0
include/dxc/HLSL/DxilGenerationPass.h

@@ -67,6 +67,7 @@ ModulePass *createDxilRemoveDiscardsPass();
 ModulePass *createDxilReduceMSAAToSingleSamplePass();
 ModulePass *createDxilForceEarlyZPass();
 ModulePass *createDxilDebugInstrumentationPass();
+ModulePass *createDxilTranslateRawBuffer();
 ModulePass *createNoPausePassesPass();
 ModulePass *createPausePassesPass();
 ModulePass *createResumePassesPass();
@@ -92,6 +93,7 @@ void initializeSimplifyInstPass(llvm::PassRegistry&);
 void initializeDxilAddPixelHitInstrumentationPass(llvm::PassRegistry&);
 void initializeDxilOutputColorBecomesConstantPass(llvm::PassRegistry&);
 void initializeDxilRemoveDiscardsPass(llvm::PassRegistry&);
+void initializeDxilTranslateRawBufferPass(llvm::PassRegistry&);
 void initializeDxilReduceMSAAToSingleSamplePass(llvm::PassRegistry&);
 void initializeDxilForceEarlyZPass(llvm::PassRegistry&);
 void initializeDxilDebugInstrumentationPass(llvm::PassRegistry&);

+ 90 - 0
include/dxc/HLSL/DxilInstructions.h

@@ -4448,5 +4448,95 @@ struct DxilInst_ViewID {
     return true;
   }
 };
+
+/// This instruction reads from a raw buffer and structured buffer
+struct DxilInst_RawBufferLoad {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_RawBufferLoad(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::RawBufferLoad);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (6 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Operand indexes
+  enum OperandIdx {
+    arg_srv = 1,
+    arg_index = 2,
+    arg_elementOffset = 3,
+    arg_mask = 4,
+    arg_alignment = 5,
+  };
+  // Accessors
+  llvm::Value *get_srv() const { return Instr->getOperand(1); }
+  void set_srv(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_index() const { return Instr->getOperand(2); }
+  void set_index(llvm::Value *val) { Instr->setOperand(2, val); }
+  llvm::Value *get_elementOffset() const { return Instr->getOperand(3); }
+  void set_elementOffset(llvm::Value *val) { Instr->setOperand(3, val); }
+  llvm::Value *get_mask() const { return Instr->getOperand(4); }
+  void set_mask(llvm::Value *val) { Instr->setOperand(4, val); }
+  int8_t get_mask_val() const { return (int8_t)(llvm::dyn_cast<llvm::ConstantInt>(Instr->getOperand(4))->getZExtValue()); }
+  void set_mask_val(int8_t val) { Instr->setOperand(4, llvm::Constant::getIntegerValue(llvm::IntegerType::get(Instr->getContext(), 8), llvm::APInt(8, (uint64_t)val))); }
+  llvm::Value *get_alignment() const { return Instr->getOperand(5); }
+  void set_alignment(llvm::Value *val) { Instr->setOperand(5, val); }
+  int32_t get_alignment_val() const { return (int32_t)(llvm::dyn_cast<llvm::ConstantInt>(Instr->getOperand(5))->getZExtValue()); }
+  void set_alignment_val(int32_t val) { Instr->setOperand(5, llvm::Constant::getIntegerValue(llvm::IntegerType::get(Instr->getContext(), 32), llvm::APInt(32, (uint64_t)val))); }
+};
+
+/// This instruction writes to a RWByteAddressBuffer or RWStructuredBuffer
+struct DxilInst_RawBufferStore {
+  llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_RawBufferStore(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::RawBufferStore);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (10 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Operand indexes
+  enum OperandIdx {
+    arg_uav = 1,
+    arg_index = 2,
+    arg_elementOffset = 3,
+    arg_value0 = 4,
+    arg_value1 = 5,
+    arg_value2 = 6,
+    arg_value3 = 7,
+    arg_mask = 8,
+    arg_alignment = 9,
+  };
+  // Accessors
+  llvm::Value *get_uav() const { return Instr->getOperand(1); }
+  void set_uav(llvm::Value *val) { Instr->setOperand(1, val); }
+  llvm::Value *get_index() const { return Instr->getOperand(2); }
+  void set_index(llvm::Value *val) { Instr->setOperand(2, val); }
+  llvm::Value *get_elementOffset() const { return Instr->getOperand(3); }
+  void set_elementOffset(llvm::Value *val) { Instr->setOperand(3, val); }
+  llvm::Value *get_value0() const { return Instr->getOperand(4); }
+  void set_value0(llvm::Value *val) { Instr->setOperand(4, val); }
+  llvm::Value *get_value1() const { return Instr->getOperand(5); }
+  void set_value1(llvm::Value *val) { Instr->setOperand(5, val); }
+  llvm::Value *get_value2() const { return Instr->getOperand(6); }
+  void set_value2(llvm::Value *val) { Instr->setOperand(6, val); }
+  llvm::Value *get_value3() const { return Instr->getOperand(7); }
+  void set_value3(llvm::Value *val) { Instr->setOperand(7, val); }
+  llvm::Value *get_mask() const { return Instr->getOperand(8); }
+  void set_mask(llvm::Value *val) { Instr->setOperand(8, val); }
+  int8_t get_mask_val() const { return (int8_t)(llvm::dyn_cast<llvm::ConstantInt>(Instr->getOperand(8))->getZExtValue()); }
+  void set_mask_val(int8_t val) { Instr->setOperand(8, llvm::Constant::getIntegerValue(llvm::IntegerType::get(Instr->getContext(), 8), llvm::APInt(8, (uint64_t)val))); }
+  llvm::Value *get_alignment() const { return Instr->getOperand(9); }
+  void set_alignment(llvm::Value *val) { Instr->setOperand(9, val); }
+  int32_t get_alignment_val() const { return (int32_t)(llvm::dyn_cast<llvm::ConstantInt>(Instr->getOperand(9))->getZExtValue()); }
+  void set_alignment_val(int32_t val) { Instr->setOperand(9, llvm::Constant::getIntegerValue(llvm::IntegerType::get(Instr->getContext(), 32), llvm::APInt(32, (uint64_t)val))); }
+};
 // INSTR-HELPER:END
 } // namespace hlsl

+ 3 - 0
include/dxc/HLSL/DxilOperations.h

@@ -66,6 +66,9 @@ public:
   // To check if operation uses strict precision types
   bool UseMinPrecision();
 
+  // Get the size of the type for a given layout
+  uint64_t GetAllocSizeForType(llvm::Type *Ty);
+
   // LLVM helpers. Perhaps, move to a separate utility class.
   llvm::Constant *GetI1Const(bool v);
   llvm::Constant *GetI8Const(char v);

+ 25 - 0
include/dxc/HLSL/DxilTypeSystem.h

@@ -202,4 +202,29 @@ private:
 
 DXIL::SigPointKind SigPointFromInputQual(DxilParamInputQual Q, DXIL::ShaderKind SK, bool isPC);
 
+class DxilStructTypeIterator
+    : public std::iterator<std::input_iterator_tag,
+                           std::pair<llvm::Type *, DxilFieldAnnotation *>> {
+private:
+  llvm::StructType *STy;
+  DxilStructAnnotation *SAnnotation;
+  unsigned index;
+
+public:
+  DxilStructTypeIterator(llvm::StructType *sTy,
+                         DxilStructAnnotation *sAnnotation, unsigned idx = 0);
+  // prefix
+  DxilStructTypeIterator &operator++();
+  // postfix
+  DxilStructTypeIterator operator++(int);
+
+  bool operator==(DxilStructTypeIterator iter);
+  bool operator!=(DxilStructTypeIterator iter);
+  std::pair<llvm::Type *, DxilFieldAnnotation *> operator*();
+};
+
+DxilStructTypeIterator begin(llvm::StructType *STy,
+                             DxilStructAnnotation *SAnno);
+DxilStructTypeIterator end(llvm::StructType *STy, DxilStructAnnotation *SAnno);
+
 } // namespace hlsl

+ 20 - 0
include/dxc/HlslIntrinsicOp.h

@@ -207,6 +207,16 @@ import hctdb_instrhelp
   MOP_Load2,
   MOP_Load3,
   MOP_Load4,
+  MOP_LoadDouble,
+  MOP_LoadDouble2,
+  MOP_LoadFloat,
+  MOP_LoadFloat2,
+  MOP_LoadFloat3,
+  MOP_LoadFloat4,
+  MOP_LoadHalf,
+  MOP_LoadHalf2,
+  MOP_LoadHalf3,
+  MOP_LoadHalf4,
   MOP_InterlockedAdd,
   MOP_InterlockedAnd,
   MOP_InterlockedCompareExchange,
@@ -220,6 +230,16 @@ import hctdb_instrhelp
   MOP_Store2,
   MOP_Store3,
   MOP_Store4,
+  MOP_StoreDouble,
+  MOP_StoreDouble2,
+  MOP_StoreFloat,
+  MOP_StoreFloat2,
+  MOP_StoreFloat3,
+  MOP_StoreFloat4,
+  MOP_StoreHalf,
+  MOP_StoreHalf2,
+  MOP_StoreHalf3,
+  MOP_StoreHalf4,
   MOP_DecrementCounter,
   MOP_IncrementCounter,
   MOP_Consume,

+ 4 - 3
include/dxc/dxcapi.internal.h

@@ -59,8 +59,8 @@ enum LEGAL_INTRINSIC_COMPTYPES {
   LICOMPTYPE_ANY_INT32 = 5,       // i32, u32, int-literal
   LICOMPTYPE_UINT_ONLY = 6,       // u32, u64, int-literal; no casts allowed
   LICOMPTYPE_FLOAT = 7,           // f32, partial-precision-f32, float-literal
-  LICOMPTYPE_ANY_FLOAT = 8,       // f32, partial-precision-f32, f64, float-literal, min10-float, min16-float
-  LICOMPTYPE_FLOAT_LIKE = 9,      // f32, partial-precision-f32, float-literal, min10-float, min16-float
+  LICOMPTYPE_ANY_FLOAT = 8,       // f32, partial-precision-f32, f64, float-literal, min10-float, min16-float, half
+  LICOMPTYPE_FLOAT_LIKE = 9,      // f32, partial-precision-f32, float-literal, min10-float, min16-float, half
   LICOMPTYPE_FLOAT_DOUBLE = 10,   // f32, partial-precision-f32, f64, float-literal
   LICOMPTYPE_DOUBLE = 11,         // f64, float-literal
   LICOMPTYPE_DOUBLE_ONLY = 12,    // f64; no casts allowed
@@ -78,8 +78,9 @@ enum LEGAL_INTRINSIC_COMPTYPES {
   LICOMPTYPE_WAVE = 24,
   LICOMPTYPE_UINT64 = 25,         // u64, int-literal
   LICOMPTYPE_UINT32_64 = 26,      // u32, u64, int-literal
+  LICOMPTYPE_HALF = 27,
 
-  LICOMPTYPE_COUNT = 27
+  LICOMPTYPE_COUNT = 28
 };
 
 static const BYTE IA_SPECIAL_BASE = 0xf0;

+ 1 - 0
lib/HLSL/DxcOptimizer.cpp

@@ -104,6 +104,7 @@ HRESULT SetupRegistryPassForHLSL() {
     initializeDxilPreserveAllOutputsPass(Registry);
     initializeDxilReduceMSAAToSingleSamplePass(Registry);
     initializeDxilRemoveDiscardsPass(Registry);
+    initializeDxilTranslateRawBufferPass(Registry);
     initializeDynamicIndexingVectorToArrayPass(Registry);
     initializeEarlyCSELegacyPassPass(Registry);
     initializeEliminateAvailableExternallyPass(Registry);

+ 278 - 0
lib/HLSL/DxilGenerationPass.cpp

@@ -37,6 +37,7 @@
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
 #include <memory>
 #include <unordered_set>
+#include <iterator>
 
 using namespace llvm;
 using namespace hlsl;
@@ -1832,3 +1833,280 @@ ModulePass *llvm::createDxilLegalizeEvalOperationsPass() {
 INITIALIZE_PASS(DxilLegalizeEvalOperations,
                 "hlsl-dxil-legalize-eval-operations",
                 "DXIL legalize eval operations", false, false)
+
+///////////////////////////////////////////////////////////////////////////////
+// Translate RawBufferLoad/RawBufferStore
+// This pass is to make sure that we generate correct buffer load for DXIL
+// For DXIL < 1.2, rawBufferLoad will be translated to BufferLoad instruction
+// without mask.
+// For DXIL >= 1.2, if min precision is enabled, currently generation pass is
+// producing i16/f16 return type for min precisions. For rawBuffer, we will
+// change this so that min precisions are returning its actual scalar type (i32/f32)
+// and will be truncated to their corresponding types after loading / before storing.
+namespace {
+
+class DxilTranslateRawBuffer : public ModulePass {
+public:
+  static char ID;
+  explicit DxilTranslateRawBuffer() : ModulePass(ID) {}
+  bool runOnModule(Module &M) {
+    unsigned major, minor;
+    M.GetDxilModule().GetDxilVersion(major, minor);
+    DxilModule::ShaderFlags flag = M.GetDxilModule().m_ShaderFlags;
+    if (major == 1 && minor < 2) {
+      for (auto F = M.functions().begin(), E = M.functions().end(); F != E;) {
+        Function *func = &*(F++);
+        if (func->hasName()) {
+          if (func->getName().startswith("dx.op.rawBufferLoad")) {
+            ReplaceRawBufferLoad(func, M);
+            func->eraseFromParent();
+          } else if (func->getName().startswith("dx.op.rawBufferStore")) {
+            ReplaceRawBufferStore(func, M);
+            func->eraseFromParent();
+          }
+        }
+      }
+    } else if (!flag.GetUseNativeLowPrecision()) {
+      for (auto F = M.functions().begin(), E = M.functions().end(); F != E;) {
+        Function *func = &*(F++);
+        if (func->hasName()) {
+          if (func->getName().startswith("dx.op.rawBufferLoad")) {
+            ReplaceMinPrecisionRawBufferLoad(func, M);
+          } else if (func->getName().startswith("dx.op.rawBufferStore")) {
+            ReplaceMinPrecisionRawBufferStore(func, M);
+          }
+        }
+      }
+    }
+    return true;
+  }
+
+private:
+  // Replace RawBufferLoad/Store to BufferLoad/Store for DXIL < 1.2
+  void ReplaceRawBufferLoad(Function *F, Module &M);
+  void ReplaceRawBufferStore(Function *F, Module &M);
+  // Replace RawBufferLoad/Store of min-precision types to have its actual storage size
+  void ReplaceMinPrecisionRawBufferLoad(Function *F, Module &M);
+  void ReplaceMinPrecisionRawBufferStore(Function *F, Module &M);
+  void ReplaceMinPrecisionRawBufferLoadByType(Function *F, Type *FromTy,
+                                              Type *ToTy, OP *Op,
+                                              const DataLayout &DL);
+};
+} // namespace
+
+void DxilTranslateRawBuffer::ReplaceRawBufferLoad(Function *F,
+                                                                Module &M) {
+  OP *op = M.GetDxilModule().GetOP();
+  Type *RTy = F->getReturnType();
+  if (StructType *STy = dyn_cast<StructType>(RTy)) {
+    Type *ETy = STy->getElementType(0);
+    Function *newFunction = op->GetOpFunc(hlsl::DXIL::OpCode::BufferLoad, ETy);
+    for (auto U = F->user_begin(), E = F->user_end(); U != E;) {
+      User *user = *(U++);
+      if (CallInst *CI = dyn_cast<CallInst>(user)) {
+        IRBuilder<> Builder(CI);
+        SmallVector<Value *, 4> args;
+        args.emplace_back(op->GetI32Const((unsigned)DXIL::OpCode::BufferLoad));
+        for (unsigned i = 1; i < 4; ++i) {
+          args.emplace_back(CI->getArgOperand(i));
+        }
+        CallInst *newCall = Builder.CreateCall(newFunction, args);
+        CI->replaceAllUsesWith(newCall);
+        CI->eraseFromParent();
+      } else {
+        DXASSERT(false, "function can only be used with call instructions.");
+      }
+    }
+  } else {
+    DXASSERT(false, "RawBufferLoad should return struct type.");
+  }
+}
+
+void DxilTranslateRawBuffer::ReplaceRawBufferStore(Function *F,
+  Module &M) {
+  OP *op = M.GetDxilModule().GetOP();
+  Type *RTy = F->getReturnType();
+  DXASSERT(RTy->isVoidTy(), "rawBufferStore should return a void type.");
+  Type *ETy = F->getFunctionType()->getParamType(4); // value
+  Function *newFunction = op->GetOpFunc(hlsl::DXIL::OpCode::BufferStore, ETy);
+  for (auto U = F->user_begin(), E = F->user_end(); U != E;) {
+    User *user = *(U++);
+    if (CallInst *CI = dyn_cast<CallInst>(user)) {
+      IRBuilder<> Builder(CI);
+      SmallVector<Value *, 4> args;
+      args.emplace_back(op->GetI32Const((unsigned)DXIL::OpCode::BufferStore));
+      for (unsigned i = 1; i < 9; ++i) {
+        args.emplace_back(CI->getArgOperand(i));
+      }
+      Builder.CreateCall(newFunction, args);
+      CI->eraseFromParent();
+    }
+    else {
+      DXASSERT(false, "function can only be used with call instructions.");
+    }
+  }
+}
+
+void DxilTranslateRawBuffer::ReplaceMinPrecisionRawBufferLoad(Function *F,
+                                                              Module &M) {
+  OP *Op = M.GetDxilModule().GetOP();
+  Type *RetTy = F->getReturnType();
+  if (StructType *STy = dyn_cast<StructType>(RetTy)) {
+    Type *EltTy = STy->getElementType(0);
+    if (EltTy->isHalfTy()) {
+      ReplaceMinPrecisionRawBufferLoadByType(F, Type::getHalfTy(M.getContext()),
+                                             Type::getFloatTy(M.getContext()),
+                                             Op, M.getDataLayout());
+    } else if (EltTy == Type::getInt16Ty(M.getContext())) {
+      ReplaceMinPrecisionRawBufferLoadByType(
+          F, Type::getInt16Ty(M.getContext()), Type::getInt32Ty(M.getContext()),
+          Op, M.getDataLayout());
+    }
+  } else {
+    DXASSERT(false, "RawBufferLoad should return struct type.");
+  }
+}
+
+void DxilTranslateRawBuffer::ReplaceMinPrecisionRawBufferStore(Function *F,
+                                                              Module &M) {
+  Type *RetTy = F->getReturnType();
+  DXASSERT(RetTy->isVoidTy(), "rawBufferStore should return a void type.");
+  Type *ETy = F->getFunctionType()->getParamType(4); // value
+  Type *NewETy;
+  if (ETy->isHalfTy()) {
+    NewETy = Type::getFloatTy(M.getContext());
+  }
+  else if (ETy == Type::getInt16Ty(M.getContext())) {
+    NewETy = Type::getInt32Ty(M.getContext());
+  }
+  else {
+    return; // not a min precision type
+  }
+  Function *newFunction = M.GetDxilModule().GetOP()->GetOpFunc(
+      DXIL::OpCode::RawBufferStore, NewETy);
+  // for each function
+  // add argument 4-7 to its upconverted values
+  // replace function call
+  for (auto FuncUser = F->user_begin(), FuncEnd = F->user_end(); FuncUser != FuncEnd;) {
+    CallInst *CI = dyn_cast<CallInst>(*(FuncUser++));
+    DXASSERT(CI, "function user must be a call instruction.");
+    IRBuilder<> CIBuilder(CI);
+    SmallVector<Value *, 9> Args;
+    for (unsigned i = 0; i < 4; ++i) {
+      Args.emplace_back(CI->getArgOperand(i));
+    }
+    // values to store should be converted to its higher precision types
+    if (ETy->isHalfTy()) {
+      for (unsigned i = 4; i < 8; ++i) {
+        Value *NewV = CIBuilder.CreateFPExt(CI->getArgOperand(i),
+                                            Type::getFloatTy(M.getContext()));
+        Args.emplace_back(NewV);
+      }
+    }
+    else if (ETy == Type::getInt16Ty(M.getContext())) {
+      // This case only applies to typed buffer since Store operation of byte
+      // address buffer for min precision is handled by implicit conversion on
+      // intrinsic call. Since we are extending integer, we have to know if we
+      // should sign ext or zero ext. We can do this by iterating checking the
+      // size of the element at struct type and comp type at type annotation
+      CallInst *handleCI = dyn_cast<CallInst>(CI->getArgOperand(1));
+      DXASSERT(handleCI, "otherwise handle was not an argument to buffer store.");
+      ConstantInt *resClass = dyn_cast<ConstantInt>(handleCI->getArgOperand(1));
+      DXASSERT(resClass && resClass->getSExtValue() ==
+                               (unsigned)DXIL::ResourceClass::UAV,
+               "otherwise buffer store called on non uav kind.");
+      ConstantInt *rangeID = dyn_cast<ConstantInt>(handleCI->getArgOperand(2)); // range id or idx?
+      DXASSERT(rangeID, "wrong createHandle call.");
+      DxilResource dxilRes = M.GetDxilModule().GetUAV(rangeID->getSExtValue());
+      StructType *STy = dyn_cast<StructType>(dxilRes.GetRetType());
+      DxilStructAnnotation *SAnnot = M.GetDxilModule().GetTypeSystem().GetStructAnnotation(STy);
+      ConstantInt *offsetInt = dyn_cast<ConstantInt>(CI->getArgOperand(3));
+      unsigned offset = offsetInt->getSExtValue();
+      unsigned currentOffset = 0;
+      for (DxilStructTypeIterator iter = begin(STy, SAnnot), ItEnd = end(STy, SAnnot); iter != ItEnd; ++iter) {
+        std::pair<Type *, DxilFieldAnnotation*> pair = *iter;
+        currentOffset += M.getDataLayout().getTypeAllocSize(pair.first);
+        if (currentOffset > offset) {
+          if (pair.second->GetCompType().IsUIntTy()) {
+            for (unsigned i = 4; i < 8; ++i) {
+              Value *NewV = CIBuilder.CreateZExt(CI->getArgOperand(i), Type::getInt32Ty(M.getContext()));
+              Args.emplace_back(NewV);
+            }
+            break;
+          }
+          else if (pair.second->GetCompType().IsIntTy()) {
+            for (unsigned i = 4; i < 8; ++i) {
+              Value *NewV = CIBuilder.CreateSExt(CI->getArgOperand(i), Type::getInt32Ty(M.getContext()));
+              Args.emplace_back(NewV);
+            }
+            break;
+          }
+          else {
+            DXASSERT(false, "Invalid comp type");
+          }
+        }
+      }
+    }
+
+    // mask
+    Args.emplace_back(CI->getArgOperand(8));
+    // alignment
+    Args.emplace_back(M.GetDxilModule().GetOP()->GetI32Const(
+        M.getDataLayout().getTypeAllocSize(NewETy)));
+    CIBuilder.CreateCall(newFunction, Args);
+    CI->eraseFromParent();
+   }
+}
+
+
+void DxilTranslateRawBuffer::ReplaceMinPrecisionRawBufferLoadByType(
+    Function *F, Type *FromTy, Type *ToTy, OP *Op, const DataLayout &DL) {
+  Function *newFunction = Op->GetOpFunc(DXIL::OpCode::RawBufferLoad, ToTy);
+  for (auto FUser = F->user_begin(), FEnd = F->user_end(); FUser != FEnd;) {
+    User *UserCI = *(FUser++);
+    if (CallInst *CI = dyn_cast<CallInst>(UserCI)) {
+      IRBuilder<> CIBuilder(CI);
+      SmallVector<Value *, 5> newFuncArgs;
+      // opcode, handle, index, elementOffset, mask
+      for (unsigned i = 0; i < 5; ++i) {
+        newFuncArgs.emplace_back(CI->getArgOperand(i));
+      }
+      // new alignment for new type
+      newFuncArgs.emplace_back(Op->GetI32Const(DL.getTypeAllocSize(ToTy)));
+      CallInst *newCI = CIBuilder.CreateCall(newFunction, newFuncArgs);
+      for (auto CIUser = CI->user_begin(), CIEnd = CI->user_end();
+           CIUser != CIEnd;) {
+        User *UserEV = *(CIUser++);
+        if (ExtractValueInst *EV = dyn_cast<ExtractValueInst>(UserEV)) {
+          IRBuilder<> EVBuilder(EV);
+          ArrayRef<unsigned> Indices = EV->getIndices();
+          DXASSERT(Indices.size() == 1, "Otherwise we have wrong extract value.");
+          Value *newEV = EVBuilder.CreateExtractValue(newCI, Indices);
+          Value *newTruncV;
+          if (4 == Indices[0]) { // Don't truncate status
+            newTruncV = newEV;
+          }
+          else if (FromTy->isHalfTy()) {
+            newTruncV = EVBuilder.CreateFPTrunc(newEV, FromTy);
+          } else if (FromTy->isIntegerTy()) {
+            newTruncV = EVBuilder.CreateTrunc(newEV, FromTy);
+          } else {
+            DXASSERT(false, "unexpected type conversion");
+          }
+          EV->replaceAllUsesWith(newTruncV);
+          EV->eraseFromParent();
+        }
+      }
+      CI->eraseFromParent();
+    }
+  }
+  F->eraseFromParent();
+}
+
+char DxilTranslateRawBuffer::ID = 0;
+ModulePass *llvm::createDxilTranslateRawBuffer() {
+  return new DxilTranslateRawBuffer();
+}
+
+INITIALIZE_PASS(DxilTranslateRawBuffer, "hlsl-translate-dxil-raw-buffer",
+                "Translate raw buffer load", false, false)

+ 14 - 0
lib/HLSL/DxilOperations.cpp

@@ -246,6 +246,10 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
 
   // Graphics shader                                                                                                        void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
   {  OC::ViewID,                  "ViewID",                   OCC::ViewID,                   "viewID",                     false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
+
+  // Resources                                                                                                              void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
+  {  OC::RawBufferLoad,           "RawBufferLoad",            OCC::RawBufferLoad,            "rawBufferLoad",              false,  true,  true, false, false, false,  true,  true, false, Attribute::ReadOnly, },
+  {  OC::RawBufferStore,          "RawBufferStore",           OCC::RawBufferStore,           "rawBufferStore",             false,  true,  true, false, false, false,  true,  true, false, Attribute::None,     },
 };
 // OPCODE-OLOADS:END
 
@@ -736,6 +740,10 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
 
     // Graphics shader
   case OpCode::ViewID:                 A(pI32);     A(pI32); break;
+
+    // Resources
+  case OpCode::RawBufferLoad:          RRT(pETy);   A(pI32); A(pRes); A(pI32); A(pI32); A(pI8);  A(pI32); break;
+  case OpCode::RawBufferStore:         A(pV);       A(pI32); A(pRes); A(pI32); A(pI32); A(pETy); A(pETy); A(pETy); A(pETy); A(pI8);  A(pI32); break;
   // OPCODE-OLOAD-FUNCS:END
   default: DXASSERT(false, "otherwise unhandled case"); break;
   }
@@ -803,6 +811,10 @@ bool OP::UseMinPrecision() {
   return m_LowPrecisionMode == DXIL::LowPrecisionMode::UseMinPrecision;
 }
 
+uint64_t OP::GetAllocSizeForType(llvm::Type *Ty) {
+  return m_pModule->getDataLayout().getTypeAllocSize(Ty);
+}
+
 llvm::Type *OP::GetOverloadType(OpCode OpCode, llvm::Function *F) {
   DXASSERT(F, "not work on nullptr");
   Type *Ty = F->getReturnType();
@@ -817,6 +829,7 @@ llvm::Type *OP::GetOverloadType(OpCode OpCode, llvm::Function *F) {
   case OpCode::StoreOutput:
   case OpCode::BufferStore:
   case OpCode::StorePatchConstant:
+  case OpCode::RawBufferStore:
     DXASSERT_NOMSG(FT->getNumParams() > 4);
     return FT->getParamType(4);
   case OpCode::IsNaN:
@@ -902,6 +915,7 @@ llvm::Type *OP::GetOverloadType(OpCode OpCode, llvm::Function *F) {
   case OpCode::BufferLoad:
   case OpCode::TextureGather:
   case OpCode::TextureGatherCmp:
+  case OpCode::RawBufferLoad:
   {
     StructType *ST = cast<StructType>(Ty);
     return ST->getElementType(0);

+ 40 - 0
lib/HLSL/DxilTypeSystem.cpp

@@ -429,4 +429,44 @@ bool DxilTypeSystem::UseMinPrecision() {
   return m_LowPrecisionMode == DXIL::LowPrecisionMode::UseMinPrecision;
 }
 
+DxilStructTypeIterator::DxilStructTypeIterator(llvm::StructType *sTy, DxilStructAnnotation *sAnnotation,
+  unsigned idx)
+  : STy(sTy), SAnnotation(sAnnotation), index(idx) {
+  DXASSERT(
+    sTy->getNumElements() == sAnnotation->GetNumFields(),
+    "Otherwise the pairing of annotation and struct type does not match.");
+}
+
+// prefix
+DxilStructTypeIterator &DxilStructTypeIterator::operator++() {
+  index++;
+  return *this;
+}
+// postfix
+DxilStructTypeIterator DxilStructTypeIterator::operator++(int) {
+  DxilStructTypeIterator iter(STy, SAnnotation, index);
+  index++;
+  return iter;
+}
+
+bool DxilStructTypeIterator::operator==(DxilStructTypeIterator iter) {
+  return iter.STy == STy && iter.SAnnotation == SAnnotation &&
+    iter.index == index;
+}
+
+bool DxilStructTypeIterator::operator!=(DxilStructTypeIterator iter) { return !(operator==(iter)); }
+
+std::pair<llvm::Type *, DxilFieldAnnotation *> DxilStructTypeIterator::operator*() {
+  return std::pair<llvm::Type *, DxilFieldAnnotation *>(
+    STy->getElementType(index), &SAnnotation->GetFieldAnnotation(index));
+}
+
+DxilStructTypeIterator begin(llvm::StructType *STy, DxilStructAnnotation *SAnno) {
+  return { STy, SAnno, 0 };
+}
+
+DxilStructTypeIterator end(llvm::StructType *STy, DxilStructAnnotation *SAnno) {
+  return { STy, SAnno, STy->getNumElements() };
+}
+
 } // namespace hlsl

+ 3 - 0
lib/HLSL/DxilValidation.cpp

@@ -599,6 +599,9 @@ static bool ValidateOpcodeInProfile(DXIL::OpCode opcode,
   if (op == 138)
     return (pSM->GetMajor() > 6 || (pSM->GetMajor() == 6 && pSM->GetMinor() >= 1))
         && (pSM->IsVS() || pSM->IsHS() || pSM->IsDS() || pSM->IsGS() || pSM->IsPS());
+  // Instructions: RawBufferLoad=139, RawBufferStore=140
+  if (139 <= op && op <= 140)
+    return (pSM->GetMajor() > 6 || (pSM->GetMajor() == 6 && pSM->GetMinor() >= 2));
   return true;
   // VALOPCODESM-TEXT:END
 }

+ 235 - 71
lib/HLSL/HLOperationLower.cpp

@@ -38,16 +38,15 @@ struct HLOperationLowerHelper {
   DxilTypeSystem &dxilTypeSys;
   DxilFunctionProps *functionProps;
   bool bLegacyCBufferLoad;
-  bool bNewDataLayout;
-  DataLayout legacyDataLayout;
-  DataLayout newDataLayout;
+  DataLayout dataLayout;
   HLOperationLowerHelper(HLModule &HLM);
 };
 
 HLOperationLowerHelper::HLOperationLowerHelper(HLModule &HLM)
     : hlslOP(*HLM.GetOP()), dxilTypeSys(HLM.GetTypeSystem()),
-      legacyDataLayout(hlsl::DXIL::kLegacyLayoutString),
-      newDataLayout(hlsl::DXIL::kNewLayoutString) {
+      dataLayout(DataLayout(HLM.GetHLOptions().bUseMinPrecision
+                                  ? hlsl::DXIL::kLegacyLayoutString
+                                  : hlsl::DXIL::kNewLayoutString)) {
   llvm::LLVMContext &Ctx = HLM.GetCtx();
   voidTy = Type::getVoidTy(Ctx);
   f32Ty = Type::getFloatTy(Ctx);
@@ -59,7 +58,6 @@ HLOperationLowerHelper::HLOperationLowerHelper(HLModule &HLM)
   if (HLM.HasDxilFunctionProps(EntryFunc))
     functionProps = &HLM.GetDxilFunctionProps(EntryFunc);
   bLegacyCBufferLoad = HLM.GetHLOptions().bLegacyCBufferLoad;
-  bNewDataLayout = !HLM.GetHLOptions().bUseMinPrecision;
 }
 
 struct HLObjectOperationLowerHelper {
@@ -2199,7 +2197,7 @@ Value *TranslateGetDimensions(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
   if (RK == DxilResource::Kind::StructuredBuffer) {
     // Set stride.
     Value *stridePtr = CI->getArgOperand(widthOpIdx + 1);
-    const DataLayout &DL = helper.legacyDataLayout;
+    const DataLayout &DL = helper.dataLayout;
     Value *buf = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
     Type *bufTy = buf->getType();
     Type *bufRetTy = bufTy->getStructElementType(0);
@@ -2915,14 +2913,18 @@ Value *TranslateGather(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
 // Load/Store intrinsics.
 struct ResLoadHelper {
   ResLoadHelper(CallInst *CI, DxilResource::Kind RK, DxilResourceBase::Class RC,
-                Value *h, bool bForSubscript=false);
+                Value *h, IntrinsicOp IOP, bool bForSubscript=false);
   ResLoadHelper(CallInst *CI, DxilResource::Kind RK, DxilResourceBase::Class RC,
                 Value *h, Value *mip);
   // For double subscript.
   ResLoadHelper(Instruction *ldInst, Value *h, Value *idx, Value *mip)
-      : opcode(OP::OpCode::TextureLoad), handle(h), retVal(ldInst), addr(idx),
-        offset(nullptr), status(nullptr), mipLevel(mip) {}
+      : opcode(OP::OpCode::TextureLoad),
+        intrinsicOpCode(IntrinsicOp::Num_Intrinsics), handle(h), retVal(ldInst),
+        addr(idx), offset(nullptr), status(nullptr), mipLevel(mip) {}
   OP::OpCode opcode;
+  IntrinsicOp intrinsicOpCode;
+  unsigned dxilMajor;
+  unsigned dxilMinor;
   Value *handle;
   Value *retVal;
   Value *addr;
@@ -2932,12 +2934,14 @@ struct ResLoadHelper {
 };
 
 ResLoadHelper::ResLoadHelper(CallInst *CI, DxilResource::Kind RK,
-                             DxilResourceBase::Class RC, Value *hdl, bool bForSubscript)
-    : handle(hdl), offset(nullptr), status(nullptr) {
+                             DxilResourceBase::Class RC, Value *hdl, IntrinsicOp IOP, bool bForSubscript)
+    : intrinsicOpCode(IOP), handle(hdl), offset(nullptr), status(nullptr) {
   switch (RK) {
   case DxilResource::Kind::RawBuffer:
-  case DxilResource::Kind::TypedBuffer:
   case DxilResource::Kind::StructuredBuffer:
+    opcode = OP::OpCode::RawBufferLoad;
+    break;
+  case DxilResource::Kind::TypedBuffer:
     opcode = OP::OpCode::BufferLoad;
     break;
   case DxilResource::Kind::Invalid:
@@ -2947,7 +2951,6 @@ ResLoadHelper::ResLoadHelper(CallInst *CI, DxilResource::Kind RK,
     opcode = OP::OpCode::TextureLoad;
     break;
   }
-
   retVal = CI;
   const unsigned kAddrIdx = HLOperandIndex::kBufLoadAddrOpIdx;
   addr = CI->getArgOperand(kAddrIdx);
@@ -3060,6 +3063,87 @@ void Make64bitResultForLoad(Type *EltTy, ArrayRef<Value *> resultElts32,
   }
 }
 
+static uint8_t GetRawBufferMaskFromIOP(IntrinsicOp IOP, hlsl::OP *OP) {
+  switch (IOP) {
+    // one component
+    case IntrinsicOp::MOP_Load:
+    case IntrinsicOp::MOP_LoadHalf:
+    case IntrinsicOp::MOP_StoreHalf:
+    case IntrinsicOp::MOP_LoadFloat:
+    case IntrinsicOp::MOP_StoreFloat:
+      return DXIL::kCompMask_X;
+    // two component
+    case IntrinsicOp::MOP_Load2:
+    case IntrinsicOp::MOP_LoadHalf2:
+    case IntrinsicOp::MOP_StoreHalf2:
+    case IntrinsicOp::MOP_LoadFloat2:
+    case IntrinsicOp::MOP_StoreFloat2:
+    case IntrinsicOp::MOP_LoadDouble: // double takes 2 components
+    case IntrinsicOp::MOP_StoreDouble:
+      return DXIL::kCompMask_X | DXIL::kCompMask_Y;
+    // three component
+    case IntrinsicOp::MOP_Load3:
+    case IntrinsicOp::MOP_LoadHalf3:
+    case IntrinsicOp::MOP_StoreHalf3:
+    case IntrinsicOp::MOP_LoadFloat3:
+    case IntrinsicOp::MOP_StoreFloat3:
+      return DXIL::kCompMask_X | DXIL::kCompMask_Y | DXIL::kCompMask_Z;
+    // three component
+    case IntrinsicOp::MOP_Load4:
+    case IntrinsicOp::MOP_LoadHalf4:
+    case IntrinsicOp::MOP_StoreHalf4:
+    case IntrinsicOp::MOP_LoadFloat4:
+    case IntrinsicOp::MOP_StoreFloat4:
+    case IntrinsicOp::MOP_LoadDouble2: // double2 takes 4 components
+    case IntrinsicOp::MOP_StoreDouble2:
+      return DXIL::kCompMask_All;
+    default:
+      DXASSERT(false, "Invalid Intrinsic for computing load mask.");
+      return 0;
+  }
+}
+
+static Constant *GetRawBufferMaskForETy(Type *Ty, unsigned NumComponents, hlsl::OP *OP) {
+  Type *ETy = Ty->getScalarType();
+  bool is64 = ETy->isDoubleTy() || ETy == Type::getInt64Ty(ETy->getContext());
+  unsigned mask = 0;
+  if (is64) {
+    switch (NumComponents) {
+    case 0:
+      break;
+    case 1:
+      mask = DXIL::kCompMask_X | DXIL::kCompMask_Y;
+      break;
+    case 2:
+      mask = DXIL::kCompMask_All;
+      break;
+    default:
+      DXASSERT(false, "Cannot load more than 2 components for 64bit types.");
+    }
+  }
+  else {
+    switch (NumComponents) {
+    case 0:
+      break;
+    case 1:
+      mask = DXIL::kCompMask_X;
+      break;
+    case 2:
+      mask = DXIL::kCompMask_X | DXIL::kCompMask_Y;
+      break;
+    case 3:
+      mask = DXIL::kCompMask_X | DXIL::kCompMask_Y | DXIL::kCompMask_Z;
+      break;
+    case 4:
+      mask = DXIL::kCompMask_All;
+      break;
+    default:
+      DXASSERT(false, "Cannot load more than 2 components for 64bit types.");
+    }
+  }
+  return OP->GetI8Const(mask);
+}
+
 void TranslateLoad(ResLoadHelper &helper, HLResource::Kind RK,
                    IRBuilder<> &Builder, hlsl::OP *OP, const DataLayout &DL) {
 
@@ -3076,6 +3160,7 @@ void TranslateLoad(ResLoadHelper &helper, HLResource::Kind RK,
   Type *i64Ty = Builder.getInt64Ty();
   Type *doubleTy = Builder.getDoubleTy();
   Type *EltTy = Ty->getScalarType();
+  Constant *Alignment = OP->GetI32Const(OP->GetAllocSizeForType(EltTy));
   bool is64 = EltTy == i64Ty || EltTy == doubleTy;
   if (is64) {
     EltTy = i32Ty;
@@ -3140,13 +3225,22 @@ void TranslateLoad(ResLoadHelper &helper, HLResource::Kind RK,
   }
 
   // Offset 1
-  if (RK == DxilResource::Kind::RawBuffer ||
-      RK == DxilResource::Kind::TypedBuffer) {
+  if (RK == DxilResource::Kind::RawBuffer) {
+    // elementOffset, mask, alignment
     loadArgs.emplace_back(undefI);
-  } else if (RK == DxilResource::Kind::StructuredBuffer)
+    loadArgs.emplace_back(OP->GetI8Const(GetRawBufferMaskFromIOP(helper.intrinsicOpCode, OP)));
+    loadArgs.emplace_back(Alignment);
+  }
+  else if (RK == DxilResource::Kind::TypedBuffer) {
+    loadArgs.emplace_back(undefI);
+  }
+  else if (RK == DxilResource::Kind::StructuredBuffer) {
+    // elementOffset, mask, alignment
     loadArgs.emplace_back(
-        OP->GetU32Const(0)); // For case use built-in types in structure buffer.
-
+      OP->GetU32Const(0)); // For case use built-in types in structure buffer.
+    loadArgs.emplace_back(OP->GetU8Const(0)); // When is this case hit?
+    loadArgs.emplace_back(Alignment);
+  }
   Value *ResRet =
       Builder.CreateCall(F, loadArgs, OP->GetOpCodeName(opcode));
 
@@ -3190,8 +3284,8 @@ Value *TranslateResourceLoad(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
   DXIL::ResourceClass RC = pObjHelper->GetRC(handle);
   DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
 
-  ResLoadHelper loadHelper(CI, RK, RC, handle);
-  TranslateLoad(loadHelper, RK, Builder, hlslOP, helper.legacyDataLayout);
+  ResLoadHelper loadHelper(CI, RK, RC, handle, IOP);
+  TranslateLoad(loadHelper, RK, Builder, hlslOP, helper.dataLayout);
   // CI is replaced in TranslateLoad.
   return nullptr;
 }
@@ -3242,6 +3336,9 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
   OP::OpCode opcode;
   switch (RK) {
   case DxilResource::Kind::RawBuffer:
+  case DxilResource::Kind::StructuredBuffer:
+    opcode = OP::OpCode::RawBufferStore;
+    break;
   case DxilResource::Kind::TypedBuffer:
     opcode = OP::OpCode::BufferStore;
     break;
@@ -3257,6 +3354,7 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
   Type *i64Ty = Builder.getInt64Ty();
   Type *doubleTy = Builder.getDoubleTy();
   Type *EltTy = Ty->getScalarType();
+  Constant *Alignment = OP->GetI32Const(OP->GetAllocSizeForType(EltTy));
   bool is64 = EltTy == i64Ty || EltTy == doubleTy;
   if (is64) {
     EltTy = i32Ty;
@@ -3343,12 +3441,11 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
   }
 
   if (is64) {
-    DXASSERT(mask == DXIL::kCompMask_All, "only typed buffer could have 64bit");
     unsigned size = 1;
     if (Ty->isVectorTy()) {
       size = Ty->getVectorNumElements();
     }
-    DXASSERT(size <= 2, "typed buffer only allow 4 dwords");
+    DXASSERT(size <= 2, "raw/typed buffer only allow 4 dwords");
     unsigned val0OpIdx = opcode == DXIL::OpCode::TextureStore
                              ? DXIL::OperandIndex::kTextureStoreVal0OpIdx
                              : DXIL::OperandIndex::kBufferStoreVal0OpIdx;
@@ -3367,10 +3464,16 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
     for (unsigned i = 0; i < 4; i++) {
       storeArgs[val0OpIdx + i] = vals32[i];
     }
+    // change mask for double
+    if (opcode == DXIL::OpCode::RawBufferStore) {
+      mask = size == 1 ?
+        DXIL::kCompMask_X | DXIL::kCompMask_Y : DXIL::kCompMask_All;
+    }
   }
 
-  storeArgs.emplace_back(OP->GetU8Const(mask));
-
+  storeArgs.emplace_back(OP->GetU8Const(mask)); // mask
+  if (opcode == DXIL::OpCode::RawBufferStore)
+    storeArgs.emplace_back(Alignment); // alignment only for raw buffer
   Builder.CreateCall(F, storeArgs);
 }
 
@@ -4172,6 +4275,7 @@ Value *StreamOutputLower(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode,
   return nullptr;
 }
 
+// This table has to match IntrinsicOp orders
 IntrinsicLower gLowerTable[static_cast<unsigned>(IntrinsicOp::Num_Intrinsics)] = {
     {IntrinsicOp::IOP_AddUint64,  TranslateAddUint64,  DXIL::OpCode::UAddc},
     {IntrinsicOp::IOP_AllMemoryBarrier, TrivialBarrier, DXIL::OpCode::Barrier},
@@ -4360,6 +4464,16 @@ IntrinsicLower gLowerTable[static_cast<unsigned>(IntrinsicOp::Num_Intrinsics)] =
     {IntrinsicOp::MOP_Load2, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::MOP_Load3, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::MOP_Load4, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::MOP_LoadDouble, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::MOP_LoadDouble2, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::MOP_LoadFloat, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::MOP_LoadFloat2, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::MOP_LoadFloat3, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::MOP_LoadFloat4, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::MOP_LoadHalf, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::MOP_LoadHalf2, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::MOP_LoadHalf3, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::MOP_LoadHalf4, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::MOP_InterlockedAdd, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::MOP_InterlockedAnd, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::MOP_InterlockedCompareExchange, TranslateMopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
@@ -4373,6 +4487,16 @@ IntrinsicLower gLowerTable[static_cast<unsigned>(IntrinsicOp::Num_Intrinsics)] =
     {IntrinsicOp::MOP_Store2, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::MOP_Store3, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::MOP_Store4, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::MOP_StoreDouble, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::MOP_StoreDouble2, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::MOP_StoreFloat, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::MOP_StoreFloat2, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::MOP_StoreFloat3, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::MOP_StoreFloat4, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::MOP_StoreHalf, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::MOP_StoreHalf2, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::MOP_StoreHalf3, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
+    {IntrinsicOp::MOP_StoreHalf4, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::MOP_DecrementCounter, GenerateUpdateCounter, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::MOP_IncrementCounter, GenerateUpdateCounter, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::MOP_Consume, EmptyLower, DXIL::OpCode::NumOpCodes},
@@ -5408,13 +5532,12 @@ Value *GEPIdxToOffset(GetElementPtrInst *GEP, IRBuilder<> &Builder,
 void GenerateStructBufLd(Value *handle, Value *bufIdx, Value *offset,
                          Value *status, Type *EltTy,
                          MutableArrayRef<Value *> resultElts, hlsl::OP *OP,
-                         IRBuilder<> &Builder) {
-  OP::OpCode opcode = OP::OpCode::BufferLoad;
+                         IRBuilder<> &Builder, unsigned NumComponents, Constant *alignment) {
+  OP::OpCode opcode = OP::OpCode::RawBufferLoad;
 
   DXASSERT(resultElts.size() <= 4,
            "buffer load cannot load more than 4 values");
 
-  Value *Args[] = {OP->GetU32Const((unsigned)opcode), handle, bufIdx, offset};
 
   Type *i64Ty = Builder.getInt64Ty();
   Type *doubleTy = Builder.getDoubleTy();
@@ -5422,6 +5545,8 @@ void GenerateStructBufLd(Value *handle, Value *bufIdx, Value *offset,
 
   if (!is64) {
     Function *dxilF = OP->GetOpFunc(opcode, EltTy);
+    Constant *mask = GetRawBufferMaskForETy(EltTy, NumComponents, OP);
+    Value *Args[] = {OP->GetU32Const((unsigned)opcode), handle, bufIdx, offset, mask, alignment};
     Value *Ld = Builder.CreateCall(dxilF, Args, OP::GetOpCodeName(opcode));
 
     for (unsigned i = 0; i < resultElts.size(); i++) {
@@ -5434,6 +5559,8 @@ void GenerateStructBufLd(Value *handle, Value *bufIdx, Value *offset,
   } else {
     // 64 bit.
     Function *dxilF = OP->GetOpFunc(opcode, Builder.getInt32Ty());
+    Constant *mask = GetRawBufferMaskForETy(EltTy, NumComponents < 2 ? NumComponents : 2, OP);
+    Value *Args[] = {OP->GetU32Const((unsigned)opcode), handle, bufIdx, offset, mask, alignment};
     Value *Ld = Builder.CreateCall(dxilF, Args, OP::GetOpCodeName(opcode));
     Value *resultElts32[8];
     unsigned size = resultElts.size();
@@ -5441,8 +5568,11 @@ void GenerateStructBufLd(Value *handle, Value *bufIdx, Value *offset,
     for (unsigned i = 0; i < size; i++) {
       if (i == 2) {
         // Update offset 4 by 4 bytes.
-        Args[DXIL::OperandIndex::kBufferLoadCoord1OpIdx] =
+        Args[DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx] =
             Builder.CreateAdd(offset, Builder.getInt32(4 * 4));
+        // Update Mask
+        Args[DXIL::OperandIndex::kRawBufferLoadMaskOpIdx] =
+          GetRawBufferMaskForETy(EltTy, NumComponents < 3 ? 0 : NumComponents - 2, OP);
         Ld = Builder.CreateCall(dxilF, Args, OP::GetOpCodeName(opcode));
         eltBase = 4;
       }
@@ -5463,8 +5593,8 @@ void GenerateStructBufLd(Value *handle, Value *bufIdx, Value *offset,
 
 void GenerateStructBufSt(Value *handle, Value *bufIdx, Value *offset,
                          Type *EltTy, hlsl::OP *OP, IRBuilder<> &Builder,
-                         ArrayRef<Value *> vals, uint8_t mask) {
-  OP::OpCode opcode = OP::OpCode::BufferStore;
+                         ArrayRef<Value *> vals, uint8_t mask, Constant *alignment) {
+  OP::OpCode opcode = OP::OpCode::RawBufferStore;
   DXASSERT(vals.size() == 4, "buffer store need 4 values");
   Type *i64Ty = Builder.getInt64Ty();
   Type *doubleTy = Builder.getDoubleTy();
@@ -5478,7 +5608,8 @@ void GenerateStructBufSt(Value *handle, Value *bufIdx, Value *offset,
                      vals[1],
                      vals[2],
                      vals[3],
-                     OP->GetU8Const(mask)};
+                     OP->GetU8Const(mask),
+                     alignment};
     Function *dxilF = OP->GetOpFunc(opcode, EltTy);
     Builder.CreateCall(dxilF, Args);
   } else {
@@ -5525,7 +5656,8 @@ void GenerateStructBufSt(Value *handle, Value *bufIdx, Value *offset,
                      vals32[1],
                      vals32[2],
                      vals32[3],
-                     OP->GetU8Const(maskLo)};
+                     OP->GetU8Const(maskLo),
+                     alignment};
     Builder.CreateCall(dxilF, Args);
     if (maskHi) {
       // Update offset 4 by 4 bytes.
@@ -5538,7 +5670,8 @@ void GenerateStructBufSt(Value *handle, Value *bufIdx, Value *offset,
                        vals32[5],
                        vals32[6],
                        vals32[7],
-                       OP->GetU8Const(maskHi)};
+                       OP->GetU8Const(maskHi),
+                       alignment};
       Builder.CreateCall(dxilF, Args);
     }
   }
@@ -5547,9 +5680,10 @@ void GenerateStructBufSt(Value *handle, Value *bufIdx, Value *offset,
 Value *TranslateStructBufMatLd(Type *matType, IRBuilder<> &Builder,
                                Value *handle, hlsl::OP *OP, Value *status,
                                Value *bufIdx, Value *baseOffset,
-                               bool colMajor) {
+                               bool colMajor, const DataLayout &DL) {
   unsigned col, row;
   Type *EltTy = HLMatrixLower::GetMatrixInfo(matType, col, row);
+  Constant* alignment = OP->GetI32Const(DL.getTypeAllocSize(EltTy));
 
   Value *offset = baseOffset;
   if (baseOffset == nullptr)
@@ -5561,7 +5695,7 @@ Value *TranslateStructBufMatLd(Type *matType, IRBuilder<> &Builder,
   unsigned rest = (matSize % 4);
   if (rest) {
     Value *ResultElts[4];
-    GenerateStructBufLd(handle, bufIdx, offset, status, EltTy, ResultElts, OP, Builder);
+    GenerateStructBufLd(handle, bufIdx, offset, status, EltTy, ResultElts, OP, Builder, 3, alignment);
     for (unsigned i = 0; i < rest; i++)
       elts[i] = ResultElts[i];
     offset = Builder.CreateAdd(offset, OP->GetU32Const(4 * rest));
@@ -5569,7 +5703,7 @@ Value *TranslateStructBufMatLd(Type *matType, IRBuilder<> &Builder,
 
   for (unsigned i = rest; i < matSize; i += 4) {
     Value *ResultElts[4];
-    GenerateStructBufLd(handle, bufIdx, offset, status, EltTy, ResultElts, OP, Builder);
+    GenerateStructBufLd(handle, bufIdx, offset, status, EltTy, ResultElts, OP, Builder, 4, alignment);
     elts[i] = ResultElts[0];
     elts[i + 1] = ResultElts[1];
     elts[i + 2] = ResultElts[2];
@@ -5584,10 +5718,10 @@ Value *TranslateStructBufMatLd(Type *matType, IRBuilder<> &Builder,
 
 void TranslateStructBufMatSt(Type *matType, IRBuilder<> &Builder, Value *handle,
                              hlsl::OP *OP, Value *bufIdx, Value *baseOffset,
-                             Value *val, bool colMajor) {
+                             Value *val, bool colMajor, const DataLayout &DL) {
   unsigned col, row;
   Type *EltTy = HLMatrixLower::GetMatrixInfo(matType, col, row);
-
+  Constant *Alignment = OP->GetI32Const(DL.getTypeAllocSize(EltTy));
   Value *offset = baseOffset;
   if (baseOffset == nullptr)
     offset = OP->GetU32Const(0);
@@ -5620,7 +5754,8 @@ void TranslateStructBufMatSt(Type *matType, IRBuilder<> &Builder, Value *handle,
         mask |= (1<<j);
     }
     GenerateStructBufSt(handle, bufIdx, offset, EltTy, OP, Builder,
-                        {elts[i], elts[i + 1], elts[i + 2], elts[i + 3]}, mask);
+                        {elts[i], elts[i + 1], elts[i + 2], elts[i + 3]}, mask,
+                        Alignment);
     // Update offset by 4*4bytes.
     offset = Builder.CreateAdd(offset, OP->GetU32Const(4 * 4));
   }
@@ -5628,7 +5763,7 @@ void TranslateStructBufMatSt(Type *matType, IRBuilder<> &Builder, Value *handle,
 
 void TranslateStructBufMatLdSt(CallInst *CI, Value *handle, hlsl::OP *OP,
                                Value *status, Value *bufIdx,
-                               Value *baseOffset) {
+                               Value *baseOffset, const DataLayout &DL) {
   IRBuilder<> Builder(CI);
   HLOpcodeGroup group = hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction());
   unsigned opcode = GetHLOpcode(CI);
@@ -5640,14 +5775,14 @@ void TranslateStructBufMatLdSt(CallInst *CI, Value *handle, hlsl::OP *OP,
     Value *ptr = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx);
     Value *NewLd = TranslateStructBufMatLd(
         ptr->getType()->getPointerElementType(), Builder, handle, OP, status,
-        bufIdx, baseOffset, /*colMajor*/ true);
+        bufIdx, baseOffset, /*colMajor*/ true, DL);
     CI->replaceAllUsesWith(NewLd);
   } break;
   case HLMatLoadStoreOpcode::RowMatLoad: {
     Value *ptr = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx);
     Value *NewLd = TranslateStructBufMatLd(
         ptr->getType()->getPointerElementType(), Builder, handle, OP, status,
-        bufIdx, baseOffset, /*colMajor*/ false);
+        bufIdx, baseOffset, /*colMajor*/ false, DL);
     CI->replaceAllUsesWith(NewLd);
   } break;
   case HLMatLoadStoreOpcode::ColMatStore: {
@@ -5655,14 +5790,14 @@ void TranslateStructBufMatLdSt(CallInst *CI, Value *handle, hlsl::OP *OP,
     Value *val = CI->getArgOperand(HLOperandIndex::kMatStoreValOpIdx);
     TranslateStructBufMatSt(ptr->getType()->getPointerElementType(), Builder,
                             handle, OP, bufIdx, baseOffset, val,
-                            /*colMajor*/ true);
+                            /*colMajor*/ true, DL);
   } break;
   case HLMatLoadStoreOpcode::RowMatStore: {
     Value *ptr = CI->getArgOperand(HLOperandIndex::kMatStoreDstPtrOpIdx);
     Value *val = CI->getArgOperand(HLOperandIndex::kMatStoreValOpIdx);
     TranslateStructBufMatSt(ptr->getType()->getPointerElementType(), Builder,
                             handle, OP, bufIdx, baseOffset, val,
-                            /*colMajor*/ false);
+                            /*colMajor*/ false, DL);
   } break;
   }
 
@@ -5673,6 +5808,7 @@ void TranslateStructBufSubscriptUser(Instruction *user, Value *handle,
                                      Value *bufIdx, Value *baseOffset,
                                      Value *status, hlsl::OP *OP, const DataLayout &DL);
 
+// subscript operator for matrix of struct element.
 void TranslateStructBufMatSubscript(CallInst *CI, Value *handle,
                                     hlsl::OP *hlslOP, Value *bufIdx,
                                     Value *baseOffset, Value *status,
@@ -5687,6 +5823,7 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle,
   Type *matType = basePtr->getType()->getPointerElementType();
   unsigned col, row;
   Type *EltTy = HLMatrixLower::GetMatrixInfo(matType, col, row);
+  Constant *alignment = hlslOP->GetI32Const(DL.getTypeAllocSize(EltTy));
 
   Value *EltByteSize = ConstantInt::get(
       baseOffset->getType(), GetEltTypeByteSizeForConstBuf(EltTy, DL));
@@ -5754,13 +5891,13 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle,
           uint8_t mask = DXIL::kCompMask_X;
           GenerateStructBufSt(handle, bufIdx, idxList[i], EltTy, hlslOP,
                               stBuilder, {EltVal, undefElt, undefElt, undefElt},
-                              mask);
+                              mask, alignment);
         }
       } else {
         uint8_t mask = DXIL::kCompMask_X;
         GenerateStructBufSt(handle, bufIdx, idxList[0], EltTy, hlslOP,
                             stBuilder, {Val, undefElt, undefElt, undefElt},
-                            mask);
+                            mask, alignment);
       }
 
       stUser->eraseFromParent();
@@ -5772,14 +5909,15 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle,
       if (resultType->isVectorTy()) {
         for (unsigned i = 0; i < resultSize; i++) {
           Value *ResultElt;
+          // TODO: This can be inefficient for row major matrix load
           GenerateStructBufLd(handle, bufIdx, idxList[i],
                               /*status*/ nullptr, EltTy, ResultElt, hlslOP,
-                              ldBuilder);
+                              ldBuilder, 1, alignment);
           ldData = ldBuilder.CreateInsertElement(ldData, ResultElt, i);
         }
       } else {
         GenerateStructBufLd(handle, bufIdx, idxList[0], /*status*/ nullptr,
-                            EltTy, ldData, hlslOP, ldBuilder);
+                            EltTy, ldData, hlslOP, ldBuilder, 4, alignment);
       }
       ldUser->replaceAllUsesWith(ldData);
       ldUser->eraseFromParent();
@@ -5794,7 +5932,7 @@ void TranslateStructBufSubscriptUser(Instruction *user, Value *handle,
                                      Value *status, hlsl::OP *OP, const DataLayout &DL) {
   IRBuilder<> Builder(user);
   if (CallInst *userCall = dyn_cast<CallInst>(user)) {
-    HLOpcodeGroup group =
+    HLOpcodeGroup group = // user call?
         hlsl::GetHLOpcodeGroupByName(userCall->getCalledFunction());
     unsigned opcode = GetHLOpcode(userCall);
     // For case element type of structure buffer is not structure type.
@@ -5880,7 +6018,7 @@ void TranslateStructBufSubscriptUser(Instruction *user, Value *handle,
     } else if (group == HLOpcodeGroup::HLMatLoadStore)
       // TODO: support 64 bit.
       TranslateStructBufMatLdSt(userCall, handle, OP, status, bufIdx,
-                                baseOffset);
+                                baseOffset, DL);
     else if (group == HLOpcodeGroup::HLSubscript) {
       TranslateStructBufMatSubscript(userCall, handle, OP, bufIdx, baseOffset, status, DL);
     }
@@ -5908,8 +6046,17 @@ void TranslateStructBufSubscriptUser(Instruction *user, Value *handle,
     if (ldInst) {
       auto LdElement = [&](Value *offset, IRBuilder<> &Builder) -> Value * {
         Value *ResultElts[4];
+        unsigned numComponents = 0;
+        if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
+          numComponents = VTy->getNumElements();
+        }
+        else {
+          numComponents = 1;
+        }
+        Constant *alignment =
+            OP->GetI32Const(DL.getTypeAllocSize(Ty->getScalarType()));
         GenerateStructBufLd(handle, bufIdx, offset, status, pOverloadTy,
-                            ResultElts, OP, Builder);
+                            ResultElts, OP, Builder, numComponents, alignment);
         return ScalarizeElements(Ty, ResultElts, Builder);
       };
 
@@ -5943,9 +6090,10 @@ void TranslateStructBufSubscriptUser(Instruction *user, Value *handle,
           vals[0] = val;
           mask = DXIL::kCompMask_X;
         }
-
+        Constant *alignment =
+          OP->GetI32Const(DL.getTypeAllocSize(Ty->getScalarType()));
         GenerateStructBufSt(handle, bufIdx, offset, pOverloadTy, OP, Builder,
-                            vals, mask);
+                            vals, mask, alignment);
       };
       if (arraySize > 1)
         val = Builder.CreateExtractValue(val, 0);
@@ -6004,7 +6152,7 @@ Value *TranslateTypedBufLoad(CallInst *CI, DXIL::ResourceKind RK,
                              DXIL::ResourceClass RC, Value *handle,
                              LoadInst *ldInst, IRBuilder<> &Builder,
                              hlsl::OP *hlslOP, const DataLayout &DL) {
-  ResLoadHelper ldHelper(CI, RK, RC, handle, /*bForSubscript*/ true);
+  ResLoadHelper ldHelper(CI, RK, RC, handle, IntrinsicOp::MOP_Load, /*bForSubscript*/ true);
   // Default sampleIdx for 2DMS textures.
   if (RK == DxilResource::Kind::Texture2DMS ||
       RK == DxilResource::Kind::Texture2DMSArray)
@@ -6072,7 +6220,7 @@ void TranslateDefaultSubscript(CallInst *CI, HLOperationLowerHelper &helper,  HL
     Instruction *I = cast<Instruction>(user);
     IRBuilder<> Builder(I);
     if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) {
-      TranslateTypedBufLoad(CI, RK, RC, handle, ldInst, Builder, hlslOP, helper.legacyDataLayout);
+      TranslateTypedBufLoad(CI, RK, RC, handle, ldInst, Builder, hlslOP, helper.dataLayout);
     } else if (StoreInst *stInst = dyn_cast<StoreInst>(user)) {
       Value *val = stInst->getValueOperand();
       TranslateStore(RK, handle, val,
@@ -6095,7 +6243,7 @@ void TranslateDefaultSubscript(CallInst *CI, HLOperationLowerHelper &helper,  HL
           LoadInst *tmpLd = StBuilder.CreateLoad(CI);
 
           Value *ldVal = TranslateTypedBufLoad(CI, RK, RC, handle, tmpLd, StBuilder,
-                                          hlslOP, helper.legacyDataLayout);
+                                          hlslOP, helper.dataLayout);
           // Update vector.
           ldVal = UpdateVectorElt(ldVal, SI->getValueOperand(), EltIdx,
                                   vectorSize, SI);
@@ -6189,63 +6337,63 @@ void TranslateDefaultSubscript(CallInst *CI, HLOperationLowerHelper &helper,  HL
         }
         switch (IOP) {
         case IntrinsicOp::IOP_InterlockedAdd: {
-          ResLoadHelper helper(CI, RK, RC, handle);
+          ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedAdd);
           AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
                                   helper.addr, /*offset*/ nullptr);
           TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::Add,
                                          Builder, hlslOP);
         } break;
         case IntrinsicOp::IOP_InterlockedAnd: {
-          ResLoadHelper helper(CI, RK, RC, handle);
+          ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedAnd);
           AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
                                   helper.addr, /*offset*/ nullptr);
           TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::And,
                                          Builder, hlslOP);
         } break;
         case IntrinsicOp::IOP_InterlockedExchange: {
-          ResLoadHelper helper(CI, RK, RC, handle);
+          ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedExchange);
           AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
                                   helper.addr, /*offset*/ nullptr);
           TranslateAtomicBinaryOperation(
               atomHelper, DXIL::AtomicBinOpCode::Exchange, Builder, hlslOP);
         } break;
         case IntrinsicOp::IOP_InterlockedMax: {
-          ResLoadHelper helper(CI, RK, RC, handle);
+          ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedMax);
           AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
                                   helper.addr, /*offset*/ nullptr);
           TranslateAtomicBinaryOperation(
               atomHelper, DXIL::AtomicBinOpCode::IMax, Builder, hlslOP);
         } break;
         case IntrinsicOp::IOP_InterlockedMin: {
-          ResLoadHelper helper(CI, RK, RC, handle);
+          ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedMin);
           AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
                                   helper.addr, /*offset*/ nullptr);
           TranslateAtomicBinaryOperation(
               atomHelper, DXIL::AtomicBinOpCode::IMin, Builder, hlslOP);
         } break;
         case IntrinsicOp::IOP_InterlockedUMax: {
-          ResLoadHelper helper(CI, RK, RC, handle);
+          ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedUMax);
           AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
                                   helper.addr, /*offset*/ nullptr);
           TranslateAtomicBinaryOperation(
               atomHelper, DXIL::AtomicBinOpCode::UMax, Builder, hlslOP);
         } break;
         case IntrinsicOp::IOP_InterlockedUMin: {
-          ResLoadHelper helper(CI, RK, RC, handle);
+          ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedUMin);
           AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
                                   helper.addr, /*offset*/ nullptr);
           TranslateAtomicBinaryOperation(
               atomHelper, DXIL::AtomicBinOpCode::UMin, Builder, hlslOP);
         } break;
         case IntrinsicOp::IOP_InterlockedOr: {
-          ResLoadHelper helper(CI, RK, RC, handle);
+          ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedOr);
           AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
                                   helper.addr, /*offset*/ nullptr);
           TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::Or,
                                          Builder, hlslOP);
         } break;
         case IntrinsicOp::IOP_InterlockedXor: {
-          ResLoadHelper helper(CI, RK, RC, handle);
+          ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedXor);
           AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
                                   helper.addr, /*offset*/ nullptr);
           TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::Xor,
@@ -6253,7 +6401,7 @@ void TranslateDefaultSubscript(CallInst *CI, HLOperationLowerHelper &helper,  HL
         } break;
         case IntrinsicOp::IOP_InterlockedCompareStore:
         case IntrinsicOp::IOP_InterlockedCompareExchange: {
-          ResLoadHelper helper(CI, RK, RC, handle);
+          ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedCompareExchange);
           AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicCompareExchange,
                                   handle, helper.addr, /*offset*/ nullptr);
           TranslateAtomicCmpXChg(atomHelper, Builder, hlslOP);
@@ -6285,7 +6433,7 @@ void TranslateHLSubscript(CallInst *CI, HLSubscriptOpcode opcode,
     Value *handle = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx);
     if (helper.bLegacyCBufferLoad)
       TranslateCBOperationsLegacy(handle, CI, hlslOP, helper.dxilTypeSys,
-                                  helper.legacyDataLayout, pObjHelper);
+                                  helper.dataLayout, pObjHelper);
     else {
       TranslateCBOperations(handle, CI, /*offset*/ hlslOP->GetU32Const(0),
                             hlslOP, helper.dxilTypeSys,
@@ -6307,7 +6455,7 @@ void TranslateHLSubscript(CallInst *CI, HLSubscriptOpcode opcode,
     Instruction *ldInst = cast<Instruction>(*U);
     ResLoadHelper ldHelper(ldInst, handle, coord, mipLevel);
     IRBuilder<> Builder(CI);
-    TranslateLoad(ldHelper, RK, Builder, hlslOP, helper.legacyDataLayout);
+    TranslateLoad(ldHelper, RK, Builder, hlslOP, helper.dataLayout);
     ldInst->eraseFromParent();
     Translated = true;
     return;
@@ -6326,11 +6474,11 @@ void TranslateHLSubscript(CallInst *CI, HLSubscriptOpcode opcode,
       Type *RetTy = ObjTy->getStructElementType(0);
       if (RK == DxilResource::Kind::StructuredBuffer) {
         TranslateStructBufSubscript(CI, handle, /*status*/ nullptr, hlslOP,
-                                    helper.bNewDataLayout ? helper.newDataLayout : helper.legacyDataLayout);
+                                    helper.dataLayout);
       } else if (RetTy->isAggregateType() &&
                  RK == DxilResource::Kind::TypedBuffer) {
         TranslateStructBufSubscript(CI, handle, /*status*/ nullptr, hlslOP,
-                                    helper.bNewDataLayout ? helper.newDataLayout : helper.legacyDataLayout);
+                                    helper.dataLayout);
         // Clear offset for typed buf.
         for (auto User : handle->users()) {
           CallInst *CI = cast<CallInst>(User);
@@ -6354,6 +6502,22 @@ void TranslateHLSubscript(CallInst *CI, HLSubscriptOpcode opcode,
             CI->setArgOperand(DXIL::OperandIndex::kAtomicCmpExchangeCoord1OpIdx,
                               UndefValue::get(helper.i32Ty));
           } break;
+          case DXIL::OpCode::RawBufferLoad: {
+            // Structured buffer inside a typed buffer must be converted to typed buffer load.
+            // Typed buffer load is equivalent to raw buffer load, except there is no mask.
+            StructType *STy = cast<StructType>(CI->getFunctionType()->getReturnType());
+            Type *ETy = STy->getElementType(0);
+            SmallVector<Value *, 4> Args;
+            Args.emplace_back(hlslOP->GetI32Const((unsigned)DXIL::OpCode::BufferLoad));
+            Args.emplace_back(CI->getArgOperand(1)); // handle
+            Args.emplace_back(CI->getArgOperand(2)); // index
+            Args.emplace_back(UndefValue::get(helper.i32Ty)); // offset
+            IRBuilder<> builder(CI);
+            Function *newFunction = hlslOP->GetOpFunc(DXIL::OpCode::BufferLoad, ETy);
+            CallInst *newCall = builder.CreateCall(newFunction, Args);
+            CI->replaceAllUsesWith(newCall);
+            CI->eraseFromParent();
+          } break;
           default:
             DXASSERT(0, "Invalid operation on resource handle");
             break;

+ 1 - 0
lib/Transforms/IPO/PassManagerBuilder.cpp

@@ -263,6 +263,7 @@ static void addHLSLPasses(bool HLSLHighLevel, bool NoOpt, hlsl::HLSLExtensionsCo
   MPM.add(createCFGSimplificationPass());
 
   MPM.add(createDeadCodeEliminationPass());
+  MPM.add(createDxilTranslateRawBuffer());
 }
 // HLSL Change Ends
 

+ 4 - 2
tools/clang/include/clang/Basic/DiagnosticSemaKinds.td

@@ -7444,7 +7444,9 @@ def err_hlsl_attribute_valid_on_function_only: Error<
   "attribute is valid only on functions">;
 def err_hlsl_cannot_convert: Error<
   "cannot %select{implicitly |}0convert %select{|output parameter }1from %2 to %3">;
-def err_hlsl_interfaces_cannot_inherit : Error<
+def err_hlsl_half_load_store: Error<
+  "LoadHalf and StoreHalf are not supported for min precision mode">;
+def err_hlsl_interfaces_cannot_inherit: Error<
   "interfaces cannot inherit from other types">;
 def err_hlsl_invalid_range_1_4: Error<
   "invalid value, valid range is between 1 and 4 inclusive">;
@@ -7642,9 +7644,9 @@ def warn_hlsl_effect_object : Warning <
 def warn_hlsl_unused_call : Warning<
   "ignoring return value of function that only reads data">,
   InGroup<UnusedValue>;
-}
 def err_hlsl_func_in_func_decl : Error<
    "function declaration is not allowed in function parameters">;
+}
 // HLSL Change Ends
 
 let CategoryName = "OpenMP Issue" in {

+ 49 - 14
tools/clang/lib/Sema/SemaHLSL.cpp

@@ -1068,6 +1068,13 @@ static const ArBasicKind g_UInt3264CT[] =
   AR_BASIC_UNKNOWN
 };
 
+static const ArBasicKind g_HalfCT[] =
+{
+  AR_BASIC_FLOAT16,
+  AR_BASIC_LITERAL_FLOAT,
+  AR_BASIC_UNKNOWN
+};
+
 // Basic kinds, indexed by a LEGAL_INTRINSIC_COMPTYPES value.
 const ArBasicKind* g_LegalIntrinsicCompTypes[] =
 {
@@ -1097,7 +1104,8 @@ const ArBasicKind* g_LegalIntrinsicCompTypes[] =
   g_StringCT,           // LICOMPTYPE_STRING
   g_WaveCT,             // LICOMPTYPE_WAVE
   g_UInt64CT,           // LICOMPTYPE_UINT64
-  g_UInt3264CT          // LICOMPTYPE_UINT32_64
+  g_UInt3264CT,         // LICOMPTYPE_UINT32_64
+  g_HalfCT              // LICOMPTYPE_HALF
 };
 C_ASSERT(ARRAYSIZE(g_LegalIntrinsicCompTypes) == LICOMPTYPE_COUNT);
 
@@ -3050,15 +3058,14 @@ public:
       if (type == HLSLScalarType_float_min16) {
         m_sema->Diag(loc, diag::warn_hlsl_sema_minprecision_promotion) << "min16float" << "half";
       }
-// TODO: Enable this once we support true int16/uint16 support.
-#if 0
       else if (type == HLSLScalarType_int_min16) {
-        m_sema->Diag(loc, diag::warn_hlsl_sema_minprecision_promotion) << "min16int" << "int16";
+        // TODO: change promotion to short once we support int16
+        m_sema->Diag(loc, diag::warn_hlsl_sema_minprecision_promotion) << "min16int" << "int";
       }
       else if (type == HLSLScalarType_uint_min16) {
-        m_sema->Diag(loc, diag::warn_hlsl_sema_minprecision_promotion) << "min16uint" << "uint16";
+        // TODO: change promotion to unsigned short once we support int16
+        m_sema->Diag(loc, diag::warn_hlsl_sema_minprecision_promotion) << "min16uint" << "uint";
       }
-#endif
     }
   }
 
@@ -3394,7 +3401,7 @@ public:
     case AR_OBJECT_NULL:          return m_context->VoidTy;
     case AR_BASIC_BOOL:           return m_context->BoolTy;
     case AR_BASIC_LITERAL_FLOAT:  return m_context->LitFloatTy;
-    case AR_BASIC_FLOAT16:        return m_context->getLangOpts().UseMinPrecision ? m_context->FloatTy : m_context->HalfTy;
+    case AR_BASIC_FLOAT16:        return m_context->HalfTy;
     case AR_BASIC_FLOAT32_PARTIAL_PRECISION: return m_context->FloatTy;
     case AR_BASIC_FLOAT32:        return m_context->FloatTy;
     case AR_BASIC_FLOAT64:        return m_context->DoubleTy;
@@ -3555,7 +3562,7 @@ public:
   /// <param name="argCount">After execution, number of arguments in argTypes.</param>
   /// <remarks>On success, argTypes includes the clang Types to use for the signature, with the first being the return type.</remarks>
   bool MatchArguments(
-    _In_ const HLSL_INTRINSIC *pIntrinsic,
+    const _In_ HLSL_INTRINSIC *pIntrinsic,
     _In_ QualType objectElement,
     _In_ ArrayRef<Expr *> Args, 
     _Out_writes_(g_MaxIntrinsicParamCount + 1) QualType(&argTypes)[g_MaxIntrinsicParamCount + 1],
@@ -4177,6 +4184,24 @@ public:
     }
 
     IntrinsicOp intrinOp = static_cast<IntrinsicOp>(intrinsic->Op);
+
+    if (intrinOp == IntrinsicOp::MOP_LoadHalf ||
+      intrinOp == IntrinsicOp::MOP_LoadHalf2 ||
+      intrinOp == IntrinsicOp::MOP_LoadHalf3 ||
+      intrinOp == IntrinsicOp::MOP_LoadHalf4 ||
+      intrinOp == IntrinsicOp::MOP_StoreHalf ||
+      intrinOp == IntrinsicOp::MOP_StoreHalf2 ||
+      intrinOp == IntrinsicOp::MOP_StoreHalf3 ||
+      intrinOp == IntrinsicOp::MOP_StoreHalf4
+      ) {
+      if (getSema()->getLangOpts().UseMinPrecision) {
+        DXASSERT(Args.size() >= 1, "Otherwise wrong load store call.");
+        getSema()->Diag(
+            Args.front()->getExprLoc(),
+            diag::err_hlsl_half_load_store);
+      }
+    }
+
     if (intrinOp == IntrinsicOp::MOP_SampleBias) {
       // Remove this when update intrinsic table not affect other things.
       // Change vector<float,1> into float for bias.
@@ -4429,9 +4454,10 @@ void HLSLExternalSource::AddBaseTypes()
   m_baseTypes[HLSLScalarType_double] = m_context->DoubleTy;
   m_baseTypes[HLSLScalarType_float_min10] = m_context->HalfTy;
   m_baseTypes[HLSLScalarType_float_min16] = m_context->HalfTy;
-  m_baseTypes[HLSLScalarType_int_min12] = m_context->ShortTy;
-  m_baseTypes[HLSLScalarType_int_min16] = m_context->ShortTy;
-  m_baseTypes[HLSLScalarType_uint_min16] = m_context->UnsignedShortTy;
+   // TODO: Change promotion to other type once we introduce int16
+  m_baseTypes[HLSLScalarType_int_min12] = m_context->getLangOpts().UseMinPrecision ? m_context->ShortTy : m_context->IntTy;
+  m_baseTypes[HLSLScalarType_int_min16] = m_context->getLangOpts().UseMinPrecision ? m_context->ShortTy : m_context->IntTy;
+  m_baseTypes[HLSLScalarType_uint_min16] = m_context->getLangOpts().UseMinPrecision ? m_context->UnsignedShortTy : m_context->UnsignedIntTy;
   m_baseTypes[HLSLScalarType_float_lit] = m_context->LitFloatTy;
   m_baseTypes[HLSLScalarType_int_lit] = m_context->LitIntTy;
   m_baseTypes[HLSLScalarType_int64] = m_context->LongLongTy;
@@ -4903,8 +4929,17 @@ bool HLSLExternalSource::MatchArguments(
       if (pIntrinsic->pArgs[0].uComponentTypeId != INTRIN_COMPTYPE_FROM_TYPE_ELT0) {
         DXASSERT_NOMSG(pIntrinsic->pArgs[0].uComponentTypeId < MaxIntrinsicArgs);
         if (AR_BASIC_UNKNOWN == ComponentType[pIntrinsic->pArgs[0].uComponentTypeId]) {
-          ComponentType[pIntrinsic->pArgs[0].uComponentTypeId] =
-            g_LegalIntrinsicCompTypes[pIntrinsic->pArgs[0].uLegalComponentTypes][0];
+          // half return type should map to float for min precision
+          if (pIntrinsic->pArgs[0].uLegalComponentTypes ==
+                  LEGAL_INTRINSIC_COMPTYPES::LICOMPTYPE_HALF &&
+              getSema()->getLangOpts().UseMinPrecision) {
+            ComponentType[pIntrinsic->pArgs[0].uComponentTypeId] =
+              ArBasicKind::AR_BASIC_FLOAT32;
+          }
+          else {
+            ComponentType[pIntrinsic->pArgs[0].uComponentTypeId] =
+              g_LegalIntrinsicCompTypes[pIntrinsic->pArgs[0].uLegalComponentTypes][0];
+          }
         }
       }
     }
@@ -4934,7 +4969,7 @@ bool HLSLExternalSource::MatchArguments(
 
       if (AR_TOBJ_UNKNOWN == *pTT)
         return false;
-    }
+      }
     else if (pTT) {
       Template[i] = *pTT;
     }

+ 389 - 9
tools/clang/lib/Sema/gen_intrin_main_tables_15.h

@@ -5158,6 +5158,136 @@ static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args8[] =
     {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
 };
 
+static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args9[] =
+{
+    {"LoadDouble", AR_QUAL_OUT, 0, LITEMPLATE_SCALAR, 0, LICOMPTYPE_DOUBLE, 1, 1},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args10[] =
+{
+    {"LoadDouble", AR_QUAL_OUT, 0, LITEMPLATE_SCALAR, 0, LICOMPTYPE_DOUBLE, 1, 1},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args11[] =
+{
+    {"LoadDouble2", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_DOUBLE, 1, 2},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args12[] =
+{
+    {"LoadDouble2", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_DOUBLE, 1, 2},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args13[] =
+{
+    {"LoadFloat", AR_QUAL_OUT, 0, LITEMPLATE_SCALAR, 0, LICOMPTYPE_FLOAT, 1, 1},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args14[] =
+{
+    {"LoadFloat", AR_QUAL_OUT, 0, LITEMPLATE_SCALAR, 0, LICOMPTYPE_FLOAT, 1, 1},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args15[] =
+{
+    {"LoadFloat2", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT, 1, 2},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args16[] =
+{
+    {"LoadFloat2", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT, 1, 2},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args17[] =
+{
+    {"LoadFloat3", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT, 1, 3},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args18[] =
+{
+    {"LoadFloat3", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT, 1, 3},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args19[] =
+{
+    {"LoadFloat4", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT, 1, 4},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args20[] =
+{
+    {"LoadFloat4", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT, 1, 4},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args21[] =
+{
+    {"LoadHalf", AR_QUAL_OUT, 0, LITEMPLATE_SCALAR, 0, LICOMPTYPE_HALF, 1, 1},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args22[] =
+{
+    {"LoadHalf", AR_QUAL_OUT, 0, LITEMPLATE_SCALAR, 0, LICOMPTYPE_HALF, 1, 1},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args23[] =
+{
+    {"LoadHalf2", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_HALF, 1, 2},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args24[] =
+{
+    {"LoadHalf2", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_HALF, 1, 2},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args25[] =
+{
+    {"LoadHalf3", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_HALF, 1, 3},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args26[] =
+{
+    {"LoadHalf3", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_HALF, 1, 3},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args27[] =
+{
+    {"LoadHalf4", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_HALF, 1, 4},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args28[] =
+{
+    {"LoadHalf4", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_HALF, 1, 4},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
+};
+
 static const HLSL_INTRINSIC g_ByteAddressBufferMethods[] =
 {
     {(UINT)hlsl::IntrinsicOp::MOP_GetDimensions, false, false, -1, 2, g_ByteAddressBufferMethods_Args0},
@@ -5169,6 +5299,26 @@ static const HLSL_INTRINSIC g_ByteAddressBufferMethods[] =
     {(UINT)hlsl::IntrinsicOp::MOP_Load3, false, false, -1, 3, g_ByteAddressBufferMethods_Args6},
     {(UINT)hlsl::IntrinsicOp::MOP_Load4, true, false, -1, 2, g_ByteAddressBufferMethods_Args7},
     {(UINT)hlsl::IntrinsicOp::MOP_Load4, false, false, -1, 3, g_ByteAddressBufferMethods_Args8},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadDouble, true, false, -1, 2, g_ByteAddressBufferMethods_Args9},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadDouble, false, false, -1, 3, g_ByteAddressBufferMethods_Args10},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadDouble2, true, false, -1, 2, g_ByteAddressBufferMethods_Args11},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadDouble2, false, false, -1, 3, g_ByteAddressBufferMethods_Args12},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadFloat, true, false, -1, 2, g_ByteAddressBufferMethods_Args13},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadFloat, false, false, -1, 3, g_ByteAddressBufferMethods_Args14},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadFloat2, true, false, -1, 2, g_ByteAddressBufferMethods_Args15},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadFloat2, false, false, -1, 3, g_ByteAddressBufferMethods_Args16},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadFloat3, true, false, -1, 2, g_ByteAddressBufferMethods_Args17},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadFloat3, false, false, -1, 3, g_ByteAddressBufferMethods_Args18},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadFloat4, true, false, -1, 2, g_ByteAddressBufferMethods_Args19},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadFloat4, false, false, -1, 3, g_ByteAddressBufferMethods_Args20},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadHalf, true, false, -1, 2, g_ByteAddressBufferMethods_Args21},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadHalf, false, false, -1, 3, g_ByteAddressBufferMethods_Args22},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadHalf2, true, false, -1, 2, g_ByteAddressBufferMethods_Args23},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadHalf2, false, false, -1, 3, g_ByteAddressBufferMethods_Args24},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadHalf3, true, false, -1, 2, g_ByteAddressBufferMethods_Args25},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadHalf3, false, false, -1, 3, g_ByteAddressBufferMethods_Args26},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadHalf4, true, false, -1, 2, g_ByteAddressBufferMethods_Args27},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadHalf4, false, false, -1, 3, g_ByteAddressBufferMethods_Args28},
 };
 
 //
@@ -5349,33 +5499,233 @@ static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args23[] =
 };
 
 static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args24[] =
+{
+    {"LoadDouble", AR_QUAL_OUT, 0, LITEMPLATE_SCALAR, 0, LICOMPTYPE_DOUBLE, 1, 1},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args25[] =
+{
+    {"LoadDouble", AR_QUAL_OUT, 0, LITEMPLATE_SCALAR, 0, LICOMPTYPE_DOUBLE, 1, 1},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args26[] =
+{
+    {"LoadDouble2", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_DOUBLE, 1, 2},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args27[] =
+{
+    {"LoadDouble2", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_DOUBLE, 1, 2},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args28[] =
+{
+    {"LoadFloat", AR_QUAL_OUT, 0, LITEMPLATE_SCALAR, 0, LICOMPTYPE_FLOAT, 1, 1},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args29[] =
+{
+    {"LoadFloat", AR_QUAL_OUT, 0, LITEMPLATE_SCALAR, 0, LICOMPTYPE_FLOAT, 1, 1},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args30[] =
+{
+    {"LoadFloat2", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT, 1, 2},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args31[] =
+{
+    {"LoadFloat2", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT, 1, 2},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args32[] =
+{
+    {"LoadFloat3", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT, 1, 3},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args33[] =
+{
+    {"LoadFloat3", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT, 1, 3},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args34[] =
+{
+    {"LoadFloat4", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT, 1, 4},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args35[] =
+{
+    {"LoadFloat4", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT, 1, 4},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args36[] =
+{
+    {"LoadHalf", AR_QUAL_OUT, 0, LITEMPLATE_SCALAR, 0, LICOMPTYPE_HALF, 1, 1},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args37[] =
+{
+    {"LoadHalf", AR_QUAL_OUT, 0, LITEMPLATE_SCALAR, 0, LICOMPTYPE_HALF, 1, 1},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args38[] =
+{
+    {"LoadHalf2", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_HALF, 1, 2},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args39[] =
+{
+    {"LoadHalf2", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_HALF, 1, 2},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args40[] =
+{
+    {"LoadHalf3", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_HALF, 1, 3},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args41[] =
+{
+    {"LoadHalf3", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_HALF, 1, 3},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args42[] =
+{
+    {"LoadHalf4", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_HALF, 1, 4},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args43[] =
+{
+    {"LoadHalf4", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_HALF, 1, 4},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args44[] =
 {
     {"Store", 0, 0, LITEMPLATE_VOID, 0, LICOMPTYPE_VOID, 0, 0},
     {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
     {"value", AR_QUAL_IN, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT, 1, 1},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args25[] =
+static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args45[] =
 {
     {"Store2", 0, 0, LITEMPLATE_VOID, 0, LICOMPTYPE_VOID, 0, 0},
     {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
     {"value", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_UINT, 1, 2},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args26[] =
+static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args46[] =
 {
     {"Store3", 0, 0, LITEMPLATE_VOID, 0, LICOMPTYPE_VOID, 0, 0},
     {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
     {"value", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_UINT, 1, 3},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args27[] =
+static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args47[] =
 {
     {"Store4", 0, 0, LITEMPLATE_VOID, 0, LICOMPTYPE_VOID, 0, 0},
     {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
     {"value", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_UINT, 1, 4},
 };
 
+static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args48[] =
+{
+    {"StoreDouble", 0, 0, LITEMPLATE_VOID, 0, LICOMPTYPE_VOID, 0, 0},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+    {"value", AR_QUAL_IN, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_DOUBLE, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args49[] =
+{
+    {"StoreDouble2", 0, 0, LITEMPLATE_VOID, 0, LICOMPTYPE_VOID, 0, 0},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+    {"value", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_DOUBLE, 1, 2},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args50[] =
+{
+    {"StoreFloat", 0, 0, LITEMPLATE_VOID, 0, LICOMPTYPE_VOID, 0, 0},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+    {"value", AR_QUAL_IN, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_FLOAT, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args51[] =
+{
+    {"StoreFloat2", 0, 0, LITEMPLATE_VOID, 0, LICOMPTYPE_VOID, 0, 0},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+    {"value", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_FLOAT, 1, 2},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args52[] =
+{
+    {"StoreFloat3", 0, 0, LITEMPLATE_VOID, 0, LICOMPTYPE_VOID, 0, 0},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+    {"value", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_FLOAT, 1, 3},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args53[] =
+{
+    {"StoreFloat4", 0, 0, LITEMPLATE_VOID, 0, LICOMPTYPE_VOID, 0, 0},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+    {"value", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_FLOAT, 1, 4},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args54[] =
+{
+    {"StoreHalf", 0, 0, LITEMPLATE_VOID, 0, LICOMPTYPE_VOID, 0, 0},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+    {"value", AR_QUAL_IN, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_HALF, 1, 1},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args55[] =
+{
+    {"StoreHalf2", 0, 0, LITEMPLATE_VOID, 0, LICOMPTYPE_VOID, 0, 0},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+    {"value", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_HALF, 1, 2},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args56[] =
+{
+    {"StoreHalf3", 0, 0, LITEMPLATE_VOID, 0, LICOMPTYPE_VOID, 0, 0},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+    {"value", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_HALF, 1, 3},
+};
+
+static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args57[] =
+{
+    {"StoreHalf4", 0, 0, LITEMPLATE_VOID, 0, LICOMPTYPE_VOID, 0, 0},
+    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+    {"value", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_HALF, 1, 4},
+};
+
 static const HLSL_INTRINSIC g_RWByteAddressBufferMethods[] =
 {
     {(UINT)hlsl::IntrinsicOp::MOP_GetDimensions, false, false, -1, 2, g_RWByteAddressBufferMethods_Args0},
@@ -5402,10 +5752,40 @@ static const HLSL_INTRINSIC g_RWByteAddressBufferMethods[] =
     {(UINT)hlsl::IntrinsicOp::MOP_Load3, false, false, -1, 3, g_RWByteAddressBufferMethods_Args21},
     {(UINT)hlsl::IntrinsicOp::MOP_Load4, true, false, -1, 2, g_RWByteAddressBufferMethods_Args22},
     {(UINT)hlsl::IntrinsicOp::MOP_Load4, false, false, -1, 3, g_RWByteAddressBufferMethods_Args23},
-    {(UINT)hlsl::IntrinsicOp::MOP_Store, false, false, -1, 3, g_RWByteAddressBufferMethods_Args24},
-    {(UINT)hlsl::IntrinsicOp::MOP_Store2, false, false, -1, 3, g_RWByteAddressBufferMethods_Args25},
-    {(UINT)hlsl::IntrinsicOp::MOP_Store3, false, false, -1, 3, g_RWByteAddressBufferMethods_Args26},
-    {(UINT)hlsl::IntrinsicOp::MOP_Store4, false, false, -1, 3, g_RWByteAddressBufferMethods_Args27},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadDouble, true, false, -1, 2, g_RWByteAddressBufferMethods_Args24},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadDouble, false, false, -1, 3, g_RWByteAddressBufferMethods_Args25},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadDouble2, true, false, -1, 2, g_RWByteAddressBufferMethods_Args26},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadDouble2, false, false, -1, 3, g_RWByteAddressBufferMethods_Args27},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadFloat, true, false, -1, 2, g_RWByteAddressBufferMethods_Args28},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadFloat, false, false, -1, 3, g_RWByteAddressBufferMethods_Args29},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadFloat2, true, false, -1, 2, g_RWByteAddressBufferMethods_Args30},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadFloat2, false, false, -1, 3, g_RWByteAddressBufferMethods_Args31},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadFloat3, true, false, -1, 2, g_RWByteAddressBufferMethods_Args32},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadFloat3, false, false, -1, 3, g_RWByteAddressBufferMethods_Args33},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadFloat4, true, false, -1, 2, g_RWByteAddressBufferMethods_Args34},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadFloat4, false, false, -1, 3, g_RWByteAddressBufferMethods_Args35},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadHalf, true, false, -1, 2, g_RWByteAddressBufferMethods_Args36},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadHalf, false, false, -1, 3, g_RWByteAddressBufferMethods_Args37},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadHalf2, true, false, -1, 2, g_RWByteAddressBufferMethods_Args38},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadHalf2, false, false, -1, 3, g_RWByteAddressBufferMethods_Args39},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadHalf3, true, false, -1, 2, g_RWByteAddressBufferMethods_Args40},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadHalf3, false, false, -1, 3, g_RWByteAddressBufferMethods_Args41},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadHalf4, true, false, -1, 2, g_RWByteAddressBufferMethods_Args42},
+    {(UINT)hlsl::IntrinsicOp::MOP_LoadHalf4, false, false, -1, 3, g_RWByteAddressBufferMethods_Args43},
+    {(UINT)hlsl::IntrinsicOp::MOP_Store, false, false, -1, 3, g_RWByteAddressBufferMethods_Args44},
+    {(UINT)hlsl::IntrinsicOp::MOP_Store2, false, false, -1, 3, g_RWByteAddressBufferMethods_Args45},
+    {(UINT)hlsl::IntrinsicOp::MOP_Store3, false, false, -1, 3, g_RWByteAddressBufferMethods_Args46},
+    {(UINT)hlsl::IntrinsicOp::MOP_Store4, false, false, -1, 3, g_RWByteAddressBufferMethods_Args47},
+    {(UINT)hlsl::IntrinsicOp::MOP_StoreDouble, false, false, -1, 3, g_RWByteAddressBufferMethods_Args48},
+    {(UINT)hlsl::IntrinsicOp::MOP_StoreDouble2, false, false, -1, 3, g_RWByteAddressBufferMethods_Args49},
+    {(UINT)hlsl::IntrinsicOp::MOP_StoreFloat, false, false, -1, 3, g_RWByteAddressBufferMethods_Args50},
+    {(UINT)hlsl::IntrinsicOp::MOP_StoreFloat2, false, false, -1, 3, g_RWByteAddressBufferMethods_Args51},
+    {(UINT)hlsl::IntrinsicOp::MOP_StoreFloat3, false, false, -1, 3, g_RWByteAddressBufferMethods_Args52},
+    {(UINT)hlsl::IntrinsicOp::MOP_StoreFloat4, false, false, -1, 3, g_RWByteAddressBufferMethods_Args53},
+    {(UINT)hlsl::IntrinsicOp::MOP_StoreHalf, false, false, -1, 3, g_RWByteAddressBufferMethods_Args54},
+    {(UINT)hlsl::IntrinsicOp::MOP_StoreHalf2, false, false, -1, 3, g_RWByteAddressBufferMethods_Args55},
+    {(UINT)hlsl::IntrinsicOp::MOP_StoreHalf3, false, false, -1, 3, g_RWByteAddressBufferMethods_Args56},
+    {(UINT)hlsl::IntrinsicOp::MOP_StoreHalf4, false, false, -1, 3, g_RWByteAddressBufferMethods_Args57},
 };
 
 //
@@ -5532,11 +5912,11 @@ static const HLSL_INTRINSIC g_ConsumeStructuredBufferMethods[] =
 // HLSL-INTRINSIC-STATS:BEGIN
 static const UINT g_uAppendStructuredBufferMethodsCount = 2;
 static const UINT g_uBufferMethodsCount = 3;
-static const UINT g_uByteAddressBufferMethodsCount = 9;
+static const UINT g_uByteAddressBufferMethodsCount = 29;
 static const UINT g_uConsumeStructuredBufferMethodsCount = 2;
 static const UINT g_uIntrinsicsCount = 179;
 static const UINT g_uRWBufferMethodsCount = 3;
-static const UINT g_uRWByteAddressBufferMethodsCount = 28;
+static const UINT g_uRWByteAddressBufferMethodsCount = 58;
 static const UINT g_uRWStructuredBufferMethodsCount = 5;
 static const UINT g_uRWTexture1DArrayMethodsCount = 4;
 static const UINT g_uRWTexture1DMethodsCount = 4;

+ 31 - 0
tools/clang/test/CodeGenHLSL/raw_buf2.hlsl

@@ -0,0 +1,31 @@
+// RUN: %dxc -E main -T ps_6_2 %s | FileCheck %s
+
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 1, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 3, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 7, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 15, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 1, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 3, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 7, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 15, i32 4)
+// CHECK: uitofp
+
+ByteAddressBuffer buf1;
+RWByteAddressBuffer buf2;
+
+float4 main(uint idx1 : IDX1, uint idx2 : IDX2) : SV_Target {
+  uint status;
+  float4 r = float4(0,0,0,0);
+
+  r.x += buf1.Load(idx1);
+  r.xy += buf1.Load2(idx1, status);
+  r.xyz += buf1.Load3(idx1);
+  r.xyzw += buf1.Load4(idx1, status);
+
+  r.x += buf2.Load(idx2, status);
+  r.xy += buf2.Load2(idx2);
+  r.xyz += buf2.Load3(idx2, status);
+  r.xyzw += buf2.Load4(idx2);
+ 
+  return r;
+}

+ 54 - 0
tools/clang/test/CodeGenHLSL/raw_buf3.hlsl

@@ -0,0 +1,54 @@
+// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
+
+// CHECK-NOT: @dx.op.rawBufferLoad
+// CHECK: call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32
+// CHECK: call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32
+// CHECK: call double @dx.op.makeDouble.f64
+// CHECK: call double @dx.op.makeDouble.f64
+// CHECK: call double @dx.op.makeDouble.f64
+// CHECK: call double @dx.op.makeDouble.f64
+// CHECK: call void @dx.op.bufferStore.i32
+// CHECK: call void @dx.op.bufferStore.i32
+// CHECK: call void @dx.op.bufferStore.i32
+// CHECK: call void @dx.op.bufferStore.i32
+
+ByteAddressBuffer buf1;
+RWByteAddressBuffer buf2;
+
+float4 main(uint idx1 : IDX1, uint idx2 : IDX2) : SV_Target {
+  uint status;
+  float4 r = float4(0,0,0,0);
+
+  r.x += buf1.Load(idx1);
+  r.xy += buf1.Load2(idx1, status);
+  r.xyz += buf1.Load3(idx1);
+  r.xyzw += buf1.Load4(idx1, status);
+
+  r.x += buf2.Load(idx2, status);
+  r.xy += buf2.Load2(idx2);
+  r.xyz += buf2.Load3(idx2, status);
+  r.xyzw += buf2.Load4(idx2);
+
+  r.x += buf1.LoadFloat(idx1, status);
+  r.xy += buf1.LoadFloat2(idx1);
+  r.xyz += buf1.LoadFloat3(idx1, status);
+  r.xyzw += buf1.LoadFloat4(idx1);
+
+  r.x += buf2.LoadFloat(idx2);
+  r.xy += buf2.LoadFloat2(idx2, status);
+  r.xyz += buf2.LoadFloat3(idx2);
+  r.xyzw += buf2.LoadFloat4(idx2, status);
+
+  r.x += buf1.LoadDouble(idx1);
+  r.xy += buf1.LoadDouble2(idx1, status);
+
+  r.x += buf2.LoadDouble(idx2, status);
+  r.xy += buf2.LoadDouble2(idx2);
+
+  buf2.Store(1, r.x);
+  buf2.Store2(1, r.xy);
+  buf2.Store3(1, r.xyz);
+  buf2.Store4(1, r);
+
+  return r;
+}

+ 71 - 0
tools/clang/test/CodeGenHLSL/raw_buf4.hlsl

@@ -0,0 +1,71 @@
+// RUN: %dxc -E main -T ps_6_2 %s | FileCheck %s
+
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf1_texture_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 1, i32 4)
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf1_texture_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 3, i32 4)
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf1_texture_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 7, i32 4)
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf1_texture_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 15, i32 4)
+
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf2_UAV_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 1, i32 4)
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf2_UAV_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 3, i32 4)
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf2_UAV_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 7, i32 4)
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf2_UAV_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 15, i32 4)
+
+// CHECK-NOT: call %dx.types.ResRet.f16
+
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 3, i32 8)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 15, i32 8)
+// CHECK: call double @dx.op.makeDouble.f64
+
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 3, i32 8)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 15, i32 8)
+// CHECK: call double @dx.op.makeDouble.f64
+
+// CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %buf2_UAV_rawbuf, i32 1, i32 undef, i32 %{{.*}}, i32 undef, i32 undef, i32 undef, i8 1, i32 4)
+// CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %buf2_UAV_rawbuf, i32 1, i32 undef, i32 %{{.*}}, i32 %{{.*}}, i32 undef, i32 undef, i8 3, i32 4)
+// CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %buf2_UAV_rawbuf, i32 1, i32 undef, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 undef, i8 7, i32 4)
+// CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %buf2_UAV_rawbuf, i32 1, i32 undef, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i8 15, i32 4)
+// CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %buf2_UAV_rawbuf, i32 1, i32 undef, float %{{.*}}, float undef, float undef, float undef, i8 1, i32 4)
+// CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %buf2_UAV_rawbuf, i32 1, i32 undef, float %{{.*}}, float %{{.*}}, float undef, float undef, i8 3, i32 4)
+// CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %buf2_UAV_rawbuf, i32 1, i32 undef, float %{{.*}}, float %{{.*}}, float %{{.*}}, float undef, i8 7, i32 4)
+// CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %buf2_UAV_rawbuf, i32 1, i32 undef, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, i8 15, i32 4)
+// CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %buf2_UAV_rawbuf, i32 1, i32 undef, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i8 3, i32 8)
+// CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %buf2_UAV_rawbuf, i32 1, i32 undef, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i8 15, i32 8)
+
+ByteAddressBuffer buf1;
+RWByteAddressBuffer buf2;
+
+float4 main(uint idx1 : IDX1, uint idx2 : IDX2) : SV_Target {
+  uint status;
+  float4 r = float4(0,0,0,0);
+
+  r.x += buf1.LoadFloat(idx1, status);
+  r.xy += buf1.LoadFloat2(idx1);
+  r.xyz += buf1.LoadFloat3(idx1, status);
+  r.xyzw += buf1.LoadFloat4(idx1);
+
+  r.x += buf2.LoadFloat(idx2);
+  r.xy += buf2.LoadFloat2(idx2, status);
+  r.xyz += buf2.LoadFloat3(idx2);
+  r.xyzw += buf2.LoadFloat4(idx2, status);
+
+  r.x += buf1.LoadDouble(idx1);
+  r.xy += buf1.LoadDouble2(idx1, status);
+
+  r.x += buf2.LoadDouble(idx2, status);
+  r.xy += buf2.LoadDouble2(idx2);
+
+  buf2.Store(1, r.x);
+  buf2.Store2(1, r.xy);
+  buf2.Store3(1, r.xyz);
+  buf2.Store4(1, r);
+
+  buf2.StoreFloat(1, r.x);
+  buf2.StoreFloat2(1, r.xy);
+  buf2.StoreFloat3(1, r.xyz);
+  buf2.StoreFloat4(1, r);
+
+  buf2.StoreDouble(1, r.x);
+  buf2.StoreDouble2(1, r.xy); 
+
+  return r;
+}

+ 98 - 0
tools/clang/test/CodeGenHLSL/raw_buf5.hlsl

@@ -0,0 +1,98 @@
+// RUN: %dxc -E main -T ps_6_2 -no-min-precision %s | FileCheck %s
+
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf1_texture_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 1, i32 4)
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf1_texture_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 3, i32 4)
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf1_texture_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 7, i32 4)
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf1_texture_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 15, i32 4)
+
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf2_UAV_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 1, i32 4)
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf2_UAV_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 3, i32 4)
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf2_UAV_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 7, i32 4)
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf2_UAV_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 15, i32 4)
+
+// CHECK: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %buf1_texture_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 1, i32 2)
+// CHECK: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %buf1_texture_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 3, i32 2)
+// CHECK: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %buf1_texture_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 7, i32 2)
+// CHECK: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %buf1_texture_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 15, i32 2)
+
+// CHECK: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %buf2_UAV_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 1, i32 2)
+// CHECK: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %buf2_UAV_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 3, i32 2)
+// CHECK: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %buf2_UAV_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 7, i32 2)
+// CHECK: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %buf2_UAV_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 15, i32 2)
+
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 3, i32 8)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 15, i32 8)
+// CHECK: call double @dx.op.makeDouble.f64
+
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 3, i32 8)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 15, i32 8)
+// CHECK: call double @dx.op.makeDouble.f64
+
+// CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %buf2_UAV_rawbuf, i32 1, i32 undef, i32 %{{.*}}, i32 undef, i32 undef, i32 undef, i8 1, i32 4)
+// CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %buf2_UAV_rawbuf, i32 1, i32 undef, i32 %{{.*}}, i32 %{{.*}}, i32 undef, i32 undef, i8 3, i32 4)
+// CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %buf2_UAV_rawbuf, i32 1, i32 undef, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 undef, i8 7, i32 4)
+// CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %buf2_UAV_rawbuf, i32 1, i32 undef, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i8 15, i32 4)
+// CHECK: call void @dx.op.rawBufferStore.f16(i32 140, %dx.types.Handle %buf2_UAV_rawbuf, i32 1, i32 undef, half %{{.*}}, half undef, half undef, half undef, i8 1, i32 2)
+// CHECK: call void @dx.op.rawBufferStore.f16(i32 140, %dx.types.Handle %buf2_UAV_rawbuf, i32 1, i32 undef, half %{{.*}}, half %{{.*}}, half undef, half undef, i8 3, i32 2)
+// CHECK: call void @dx.op.rawBufferStore.f16(i32 140, %dx.types.Handle %buf2_UAV_rawbuf, i32 1, i32 undef, half %{{.*}}, half %{{.*}}, half %{{.*}}, half undef, i8 7, i32 2)
+// CHECK: call void @dx.op.rawBufferStore.f16(i32 140, %dx.types.Handle %buf2_UAV_rawbuf, i32 1, i32 undef, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}, i8 15, i32 2)
+// CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %buf2_UAV_rawbuf, i32 1, i32 undef, float %{{.*}}, float undef, float undef, float undef, i8 1, i32 4)
+// CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %buf2_UAV_rawbuf, i32 1, i32 undef, float %{{.*}}, float %{{.*}}, float undef, float undef, i8 3, i32 4)
+// CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %buf2_UAV_rawbuf, i32 1, i32 undef, float %{{.*}}, float %{{.*}}, float %{{.*}}, float undef, i8 7, i32 4)
+// CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %buf2_UAV_rawbuf, i32 1, i32 undef, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, i8 15, i32 4)
+// CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %buf2_UAV_rawbuf, i32 1, i32 undef, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i8 3, i32 8)
+// CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %buf2_UAV_rawbuf, i32 1, i32 undef, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i8 15, i32 8)
+
+ByteAddressBuffer buf1;
+RWByteAddressBuffer buf2;
+
+float4 main(uint idx1 : IDX1, uint idx2 : IDX2) : SV_Target {
+  uint status;
+  float4 r = float4(0,0,0,0);
+
+  r.x += buf1.LoadFloat(idx1, status);
+  r.xy += buf1.LoadFloat2(idx1);
+  r.xyz += buf1.LoadFloat3(idx1, status);
+  r.xyzw += buf1.LoadFloat4(idx1);
+
+  r.x += buf2.LoadFloat(idx2);
+  r.xy += buf2.LoadFloat2(idx2, status);
+  r.xyz += buf2.LoadFloat3(idx2);
+  r.xyzw += buf2.LoadFloat4(idx2, status);
+
+  r.x += buf1.LoadHalf(idx1, status);
+  r.xy += buf1.LoadHalf2(idx1);
+  r.xyz += buf1.LoadHalf3(idx1, status);
+  r.xyzw += buf1.LoadHalf4(idx1);
+
+  r.x += buf2.LoadHalf(idx2);
+  r.xy += buf2.LoadHalf2(idx2, status);
+  r.xyz += buf2.LoadHalf3(idx2);
+  r.xyzw += buf2.LoadHalf4(idx2, status);
+
+  r.x += buf1.LoadDouble(idx1);
+  r.xy += buf1.LoadDouble2(idx1, status);
+
+  r.x += buf2.LoadDouble(idx2, status);
+  r.xy += buf2.LoadDouble2(idx2);
+
+  buf2.Store(1, r.x);
+  buf2.Store2(1, r.xy);
+  buf2.Store3(1, r.xyz);
+  buf2.Store4(1, r);
+
+  buf2.StoreHalf(1, r.x);
+  buf2.StoreHalf2(1, r.xy);
+  buf2.StoreHalf3(1, r.xyz);
+  buf2.StoreHalf4(1, r);
+
+  buf2.StoreFloat(1, r.x);
+  buf2.StoreFloat2(1, r.xy);
+  buf2.StoreFloat3(1, r.xyz);
+  buf2.StoreFloat4(1, r);
+
+  buf2.StoreDouble(1, r.x);
+  buf2.StoreDouble2(1, r.xy);
+
+  return r;
+}

+ 6 - 1
tools/clang/test/CodeGenHLSL/struct_buf1.hlsl

@@ -1,4 +1,9 @@
-// RUN: %dxc -E main -T ps_6_0 %s
+// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
+
+// CHECK: @dx.op.bufferLoad
+// CHECK: @dx.op.bufferStore
+// CHECK-NOT: @dx.op.rawBufferLoad
+// CHECK-NOT: @dx.op.rawBufferStore
 
 struct Foo
 {

+ 121 - 0
tools/clang/test/CodeGenHLSL/struct_buf2.hlsl

@@ -0,0 +1,121 @@
+// RUN: %dxc -E main -T ps_6_2 %s | FileCheck %s
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 0, i8 1, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 4, i8 3, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 12, i8 7, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 24, i8 15, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 40, i8 1, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 44, i8 3, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 52, i8 7, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 64, i8 15, i32 4)
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 80, i8 1, i32 4)
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 84, i8 3, i32 4)
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 92, i8 7, i32 4)
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 104, i8 15, i32 4)
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 120, i8 1, i32 4)
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 124, i8 3, i32 4)
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 132, i8 7, i32 4)
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 144, i8 15, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 160, i8 3, i32 8)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 168, i8 15, i32 8)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 184, i8 15, i32 8)
+// second half of double3
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 200, i8 3, i32 8)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 208, i8 15, i32 8)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 224, i8 15, i32 8)
+
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 0, i8 1, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 4, i8 3, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 12, i8 7, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 24, i8 15, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 40, i8 1, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 44, i8 3, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 52, i8 7, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 64, i8 15, i32 4)
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 80, i8 1, i32 4)
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 84, i8 3, i32 4)
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 92, i8 7, i32 4)
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 104, i8 15, i32 4)
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 120, i8 1, i32 4)
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 124, i8 3, i32 4)
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 132, i8 7, i32 4)
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 144, i8 15, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 160, i8 3, i32 8)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 168, i8 15, i32 8)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 184, i8 15, i32 8)
+// second half of double3
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 200, i8 3, i32 8)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 208, i8 15, i32 8)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 224, i8 15, i32 8)
+
+struct MyStruct {
+  int   i1;
+  int2  i2;
+  int3  i3;
+  int4  i4;
+  uint  u1;
+  uint2 u2;
+  uint3 u3;
+  uint4 u4;
+  half  h1;
+  half2 h2;
+  half3 h3;
+  half4 h4;
+  float f1;
+  float2 f2;
+  float3 f3;
+  float4 f4;
+  double d1;
+  double2 d2;
+  double3 d3;
+  double4 d4;
+};
+StructuredBuffer<MyStruct> buf1;
+RWStructuredBuffer<MyStruct> buf2;
+int4 main(float idx1 : IDX1, float idx2 : IDX2) : SV_Target {
+  uint status;
+  float4 r = 0;
+  r.x += buf1.Load(idx1, status).i1;
+  r.xy += buf1.Load(idx1, status).i2;
+  r.xyz += buf1.Load(idx1, status).i3;
+  r.xyzw += buf1.Load(idx1, status).i4;
+  r.x += buf1.Load(idx1, status).u1;
+  r.xy += buf1.Load(idx1, status).u2;
+  r.xyz += buf1.Load(idx1, status).u3;
+  r.xyzw += buf1.Load(idx1, status).u4;
+  r.x += buf1.Load(idx1, status).h1;
+  r.xy += buf1.Load(idx1, status).h2;
+  r.xyz += buf1.Load(idx1, status).h3;
+  r.xyzw += buf1.Load(idx1, status).h4;
+  r.x += buf1.Load(idx1, status).f1;
+  r.xy += buf1.Load(idx1, status).f2;
+  r.xyz += buf1.Load(idx1, status).f3;
+  r.xyzw += buf1.Load(idx1, status).f4;
+  r.x += buf1.Load(idx1, status).d1;
+  r.xy += buf1.Load(idx1, status).d2;
+  r.xyz += buf1.Load(idx1, status).d3;
+  r.xyzw += buf1.Load(idx1, status).d4;
+
+  r.x += buf2.Load(idx2, status).i1;
+  r.xy += buf2.Load(idx2, status).i2;
+  r.xyz += buf2.Load(idx2, status).i3;
+  r.xyzw += buf2.Load(idx2, status).i4;
+  r.x += buf2.Load(idx2, status).u1;
+  r.xy += buf2.Load(idx2, status).u2;
+  r.xyz += buf2.Load(idx2, status).u3;
+  r.xyzw += buf2.Load(idx2, status).u4;
+  r.x += buf2.Load(idx2, status).h1;
+  r.xy += buf2.Load(idx2, status).h2;
+  r.xyz += buf2.Load(idx2, status).h3;
+  r.xyzw += buf2.Load(idx2, status).h4;
+  r.x += buf2.Load(idx2, status).f1;
+  r.xy += buf2.Load(idx2, status).f2;
+  r.xyz += buf2.Load(idx2, status).f3;
+  r.xyzw += buf2.Load(idx2, status).f4;
+  r.x += buf2.Load(idx2, status).d1;
+  r.xy += buf2.Load(idx2, status).d2;
+  r.xyz += buf2.Load(idx2, status).d3;
+  r.xyzw += buf2.Load(idx2, status).d4;
+
+  buf2[0].f4 = r;
+  return r;
+}

+ 88 - 0
tools/clang/test/CodeGenHLSL/struct_buf3.hlsl

@@ -0,0 +1,88 @@
+// RUN: %dxc -E main -T ps_6_2 %s | FileCheck %s
+
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32
+// CHECK: trunc i32 %{{[a-zA-Z0-9]+}} to i16
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32
+// CHECK: trunc i32 %{{[a-zA-Z0-9]+}} to i16
+// CHECK: trunc i32 %{{[a-zA-Z0-9]+}} to i16
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32
+// CHECK: trunc i32 %{{[a-zA-Z0-9]+}} to i16
+// CHECK: trunc i32 %{{[a-zA-Z0-9]+}} to i16
+// CHECK: trunc i32 %{{[a-zA-Z0-9]+}} to i16
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32
+// CHECK: trunc i32 %{{[a-zA-Z0-9]+}} to i16
+// CHECK: trunc i32 %{{[a-zA-Z0-9]+}} to i16
+// CHECK: trunc i32 %{{[a-zA-Z0-9]+}} to i16
+// CHECK: trunc i32 %{{[a-zA-Z0-9]+}} to i16
+
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32
+// CHECK: trunc i32 %{{[a-zA-Z0-9]+}} to i16
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32
+// CHECK: trunc i32 %{{[a-zA-Z0-9]+}} to i16
+// CHECK: trunc i32 %{{[a-zA-Z0-9]+}} to i16
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32
+// CHECK: trunc i32 %{{[a-zA-Z0-9]+}} to i16
+// CHECK: trunc i32 %{{[a-zA-Z0-9]+}} to i16
+// CHECK: trunc i32 %{{[a-zA-Z0-9]+}} to i16
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32
+// CHECK: trunc i32 %{{[a-zA-Z0-9]+}} to i16
+// CHECK: trunc i32 %{{[a-zA-Z0-9]+}} to i16
+// CHECK: trunc i32 %{{[a-zA-Z0-9]+}} to i16
+// CHECK: trunc i32 %{{[a-zA-Z0-9]+}} to i16
+
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32
+// CHECK: fptrunc float %{{[a-zA-Z0-9]+}} to half
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32
+// CHECK: fptrunc float %{{[a-zA-Z0-9]+}} to half
+// CHECK: fptrunc float %{{[a-zA-Z0-9]+}} to half
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32
+// CHECK: fptrunc float %{{[a-zA-Z0-9]+}} to half
+// CHECK: fptrunc float %{{[a-zA-Z0-9]+}} to half
+// CHECK: fptrunc float %{{[a-zA-Z0-9]+}} to half
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32
+// CHECK: fptrunc float %{{[a-zA-Z0-9]+}} to half
+// CHECK: fptrunc float %{{[a-zA-Z0-9]+}} to half
+// CHECK: fptrunc float %{{[a-zA-Z0-9]+}} to half
+// CHECK: fptrunc float %{{[a-zA-Z0-9]+}} to half
+
+// CHECK: sext i16 %{{.*}} to i32
+// CHECK: call void @dx.op.rawBufferStore.i32
+// CHECK: zext i16 %{{.*}} to i32
+// CHECK: call void @dx.op.rawBufferStore.i32
+
+struct MyStruct {
+  min16int mi1;
+  min16int2 mi2;
+  min16int3 mi3;
+  min16int4 mi4;
+  min16uint mu1;
+  min16uint2 mu2;
+  min16uint3 mu3;
+  min16uint4 mu4;
+  min16float mf1;
+  min16float2 mf2;
+  min16float3 mf3;
+  min16float4 mf4;
+};
+StructuredBuffer<MyStruct> buf1;
+RWStructuredBuffer<MyStruct> buf2;
+int4 main(float idx1 : IDX1, float idx2 : IDX2) : SV_Target {
+  uint status;
+  min16int4 r = 0;
+  r.x += buf2.Load(idx2, status).mi1;
+  r.xy += buf2.Load(idx2, status).mi2;
+  r.xyz += buf2.Load(idx2, status).mi3;
+  r.xyzw += buf2.Load(idx2, status).mi4;
+  r.x += buf2.Load(idx2, status).mu1;
+  r.xy += buf2.Load(idx2, status).mu2;
+  r.xyz += buf2.Load(idx2, status).mu3;
+  r.xyzw += buf2.Load(idx2, status).mu4;
+  r.x += buf2.Load(idx2, status).mf1;
+  r.xy += buf2.Load(idx2, status).mf2;
+  r.xyz += buf2.Load(idx2, status).mf3;
+  r.xyzw += buf2.Load(idx2, status).mf4;
+
+  buf2[0].mi4 = r;
+  buf2[0].mu4 = (min16uint4)r;
+  return r;
+}

+ 127 - 0
tools/clang/test/CodeGenHLSL/struct_buf4.hlsl

@@ -0,0 +1,127 @@
+// RUN: %dxc -E main -T ps_6_2 -no-min-precision %s | FileCheck %s
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 0, i8 1, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 4, i8 3, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 12, i8 7, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 24, i8 15, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 40, i8 1, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 44, i8 3, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 52, i8 7, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 64, i8 15, i32 4)
+// CHECK: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 80, i8 1, i32 2)
+// CHECK: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 82, i8 3, i32 2)
+// CHECK: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 86, i8 7, i32 2)
+// CHECK: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 92, i8 15, i32 2)
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 100, i8 1, i32 4)
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 104, i8 3, i32 4)
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 112, i8 7, i32 4)
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 124, i8 15, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 144, i8 3, i32 8)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 152, i8 15, i32 8)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 168, i8 15, i32 8)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 184, i8 3, i32 8)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 192, i8 15, i32 8)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 208, i8 15, i32 8)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 224, i8 1, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 236, i8 1, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf1_texture_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 248, i8 1, i32 4)
+
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 0, i8 1, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 4, i8 3, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 12, i8 7, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 24, i8 15, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 40, i8 1, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 44, i8 3, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 52, i8 7, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 64, i8 15, i32 4)
+// CHECK: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 80, i8 1, i32 2)
+// CHECK: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 82, i8 3, i32 2)
+// CHECK: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 86, i8 7, i32 2)
+// CHECK: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 92, i8 15, i32 2)
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 100, i8 1, i32 4)
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 104, i8 3, i32 4)
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 112, i8 7, i32 4)
+// CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 124, i8 15, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 144, i8 3, i32 8)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 152, i8 15, i32 8)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 168, i8 15, i32 8)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 184, i8 3, i32 8)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 192, i8 15, i32 8)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 208, i8 15, i32 8)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 224, i8 1, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 236, i8 1, i32 4)
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_structbuf, i32 %{{[a-zA-Z0-9]+}}, i32 248, i8 1, i32 4)
+
+struct MyStruct {
+  int   i1;
+  int2  i2;
+  int3  i3;
+  int4  i4;
+  uint  u1;
+  uint2 u2;
+  uint3 u3;
+  uint4 u4;
+  half  h1;
+  half2 h2;
+  half3 h3;
+  half4 h4;
+  float f1;
+  float2 f2;
+  float3 f3;
+  float4 f4;
+  double d1;
+  double2 d2;
+  double3 d3;
+  double4 d4;
+  int3x3 i3x3;
+};
+StructuredBuffer<MyStruct> buf1;
+RWStructuredBuffer<MyStruct> buf2;
+int4 main(float idx1 : IDX1, float idx2 : IDX2) : SV_Target {
+  uint status;
+  float4 r = 0;
+  r.x += buf1.Load(idx1, status).i1;
+  r.xy += buf1.Load(idx1, status).i2;
+  r.xyz += buf1.Load(idx1, status).i3;
+  r.xyzw += buf1.Load(idx1, status).i4;
+  r.x += buf1.Load(idx1, status).u1;
+  r.xy += buf1.Load(idx1, status).u2;
+  r.xyz += buf1.Load(idx1, status).u3;
+  r.xyzw += buf1.Load(idx1, status).u4;
+  r.x += buf1.Load(idx1, status).h1;
+  r.xy += buf1.Load(idx1, status).h2;
+  r.xyz += buf1.Load(idx1, status).h3;
+  r.xyzw += buf1.Load(idx1, status).h4;
+  r.x += buf1.Load(idx1, status).f1;
+  r.xy += buf1.Load(idx1, status).f2;
+  r.xyz += buf1.Load(idx1, status).f3;
+  r.xyzw += buf1.Load(idx1, status).f4;
+  r.x += buf1.Load(idx1, status).d1;
+  r.xy += buf1.Load(idx1, status).d2;
+  r.xyz += buf1.Load(idx1, status).d3;
+  r.xyzw += buf1.Load(idx1, status).d4;
+  r.xyz += buf1.Load(idx1, status).i3x3[0];
+
+  r.x += buf2.Load(idx2, status).i1;
+  r.xy += buf2.Load(idx2, status).i2;
+  r.xyz += buf2.Load(idx2, status).i3;
+  r.xyzw += buf2.Load(idx2, status).i4;
+  r.x += buf2.Load(idx2, status).u1;
+  r.xy += buf2.Load(idx2, status).u2;
+  r.xyz += buf2.Load(idx2, status).u3;
+  r.xyzw += buf2.Load(idx2, status).u4;
+  r.x += buf2.Load(idx2, status).h1;
+  r.xy += buf2.Load(idx2, status).h2;
+  r.xyz += buf2.Load(idx2, status).h3;
+  r.xyzw += buf2.Load(idx2, status).h4;
+  r.x += buf2.Load(idx2, status).f1;
+  r.xy += buf2.Load(idx2, status).f2;
+  r.xyz += buf2.Load(idx2, status).f3;
+  r.xyzw += buf2.Load(idx2, status).f4;
+  r.x += buf2.Load(idx2, status).d1;
+  r.xy += buf2.Load(idx2, status).d2;
+  r.xyz += buf2.Load(idx2, status).d3;
+  r.xyzw += buf2.Load(idx2, status).d4;
+  r.xyz += buf2.Load(idx2, status).i3x3[0];
+
+  return r;
+}

+ 55 - 0
tools/clang/test/CodeGenHLSL/struct_buf5.hlsl

@@ -0,0 +1,55 @@
+// RUN: %dxc -E main -T ps_6_2 -no-min-precision %s | FileCheck %s
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32
+
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32
+
+// CHECK: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16
+// CHECK: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16
+// CHECK: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16
+// CHECK: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16
+
+// CHECK: call void @dx.op.rawBufferStore.i32
+// CHECK: call void @dx.op.rawBufferStore.i32
+
+struct MyStruct {
+  min16int mi1;
+  min16int2 mi2;
+  min16int3 mi3;
+  min16int4 mi4;
+  min16uint mu1;
+  min16uint2 mu2;
+  min16uint3 mu3;
+  min16uint4 mu4;
+  min16float mf1;
+  min16float2 mf2;
+  min16float3 mf3;
+  min16float4 mf4;
+};
+StructuredBuffer<MyStruct> buf1;
+RWStructuredBuffer<MyStruct> buf2;
+int4 main(float idx1 : IDX1, float idx2 : IDX2) : SV_Target {
+  uint status;
+  min16uint4 r = 0;
+  r.x += buf2.Load(idx2, status).mi1;
+  r.xy += buf2.Load(idx2, status).mi2;
+  r.xyz += buf2.Load(idx2, status).mi3;
+  r.xyzw += buf2.Load(idx2, status).mi4;
+  r.x += buf2.Load(idx2, status).mu1;
+  r.xy += buf2.Load(idx2, status).mu2;
+  r.xyz += buf2.Load(idx2, status).mu3;
+  r.xyzw += buf2.Load(idx2, status).mu4;
+  r.x += buf2.Load(idx2, status).mf1;
+  r.xy += buf2.Load(idx2, status).mf2;
+  r.xyz += buf2.Load(idx2, status).mf3;
+  r.xyzw += buf2.Load(idx2, status).mf4;
+
+  buf2[0].mi4 = r;
+  buf2[0].mu4 = (min16uint4)r;
+  return r;
+}

+ 41 - 0
tools/clang/test/CodeGenHLSL/struct_buf6.hlsl

@@ -0,0 +1,41 @@
+// RUN: %dxc -E main -T ps_6_2 %s | FileCheck %s
+
+// CHECK-NOT: @dx.op.bufferLoad
+// CHECK-NOT: @dx.op.bufferStore
+// CHECK: @dx.op.rawBufferLoad
+// CHECK: @dx.op.rawBufferStore
+
+struct Foo
+{
+  float2 a;
+  float3 b;
+  int2 c[4];
+};
+
+StructuredBuffer<Foo> buf1;
+RWStructuredBuffer<Foo> buf2;
+
+float4 main(float idx1 : Idx1, float idx2 : Idx2) : SV_Target
+{
+  uint status;
+  float4 r = 0;
+  r.xy += buf1.Load(idx1).a;
+  r.xyz += buf1.Load(idx1).b;
+  r.wy += buf1.Load(idx1).c[idx2];
+  r.xy += buf1.Load(idx2, status).a; r += status;
+  r.xyz += buf1.Load(idx2, status).b; r += status;
+  r.wy += buf1.Load(idx2, status).c[idx2]; r += status;
+
+  r.xy += buf2.Load(idx1+200).a;
+  r.xyz += buf2.Load(idx1+200).b;
+  r.wy += buf2.Load(idx1+200).c[idx2];
+  r.xy += buf2.Load(idx2+200, status).a; r += status;
+  r.xyz += buf2.Load(idx2+200, status).b; r += status;
+  r.wy += buf2.Load(idx2+200, status).c[idx2]; r += status;
+
+  buf2[idx1*3].a = r.xy;
+  buf2[idx1*3].b = r.xyz;
+  buf2[idx1*3].c[idx2] = r.yw;
+  buf2[0].a = buf1.Load(1).b.xy;
+  return r;
+}

+ 12 - 12
tools/clang/test/CodeGenHLSL/struct_buf_new_layout.hlsl

@@ -1,17 +1,17 @@
 // RUN: %dxc -E main -T ps_6_2 -no-min-precision %s  | FileCheck %s
 
-// CHECK: call void @dx.op.bufferStore.f16(i32 69, %dx.types.Handle %g_sb1_UAV_structbuf, i32 0, i32 0, half 0xH3C00, half 0xH3C00, half 0xH3C00, half undef, i8 7)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
-// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %g_sb1_UAV_structbuf, i32 0, i32 8, i32 2, i32 2, i32 2, i32 2, i8 15)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
-// CHECK: call void @dx.op.bufferStore.f16(i32 69, %dx.types.Handle %g_sb1_UAV_structbuf, i32 0, i32 24, half 0xH4200, half 0xH4200, half 0xH4200, half undef, i8 7)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
-// CHECK: call void @dx.op.bufferStore.f16(i32 69, %dx.types.Handle %g_sb1_UAV_structbuf, i32 0, i32 30, half 0xH4400, half 0xH4400, half 0xH4400, half 0xH4400, i8 15)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
-// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %g_sb1_UAV_structbuf, i32 0, i32 40, i32 %1, i32 %2, i32 undef, i32 undef, i8 3)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
-// CHECK: call void @dx.op.bufferStore.f16(i32 69, %dx.types.Handle %g_sb1_UAV_structbuf, i32 0, i32 48, half 0xH4600, half undef, half undef, half undef, i8 1)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
-// CHECK: call void @dx.op.bufferStore.f16(i32 69, %dx.types.Handle %g_sb1_UAV_structbuf, i32 0, i32 50, half 0xH4700, half undef, half undef, half undef, i8 1)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
-// CHECK: call void @dx.op.bufferStore.f16(i32 69, %dx.types.Handle %g_sb1_UAV_structbuf, i32 0, i32 52, half 0xH4800, half undef, half undef, half undef, i8 1)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
-// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %g_sb1_UAV_structbuf, i32 0, i32 56, i32 9, i32 undef, i32 undef, i32 undef, i8 1)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
-// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %g_sb2_UAV_structbuf, i32 0, i32 0, i32 %4, i32 %5, i32 undef, i32 undef, i8 3)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
-// CHECK: call void @dx.op.bufferStore.f16(i32 69, %dx.types.Handle %g_sb2_UAV_structbuf, i32 0, i32 8, half 0xH4000, half 0xH4000, half 0xH4000, half undef, i8 7)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
-// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %g_sb2_UAV_structbuf, i32 0, i32 16, i32 3, i32 undef, i32 undef, i32 undef, i8 1)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
+// CHECK: call void @dx.op.rawBufferStore.f16(i32 140, %dx.types.Handle %g_sb1_UAV_structbuf, i32 0, i32 0, half 0xH3C00, half 0xH3C00, half 0xH3C00, half undef, i8 7, i32 2)
+// CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %g_sb1_UAV_structbuf, i32 0, i32 8, i32 2, i32 2, i32 2, i32 2, i8 15, i32 4)
+// CHECK: call void @dx.op.rawBufferStore.f16(i32 140, %dx.types.Handle %g_sb1_UAV_structbuf, i32 0, i32 24, half 0xH4200, half 0xH4200, half 0xH4200, half undef, i8 7, i32 2)
+// CHECK: call void @dx.op.rawBufferStore.f16(i32 140, %dx.types.Handle %g_sb1_UAV_structbuf, i32 0, i32 30, half 0xH4400, half 0xH4400, half 0xH4400, half 0xH4400, i8 15, i32 2)
+// CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %g_sb1_UAV_structbuf, i32 0, i32 40, i32 %1, i32 %2, i32 undef, i32 undef, i8 3, i32 8)
+// CHECK: call void @dx.op.rawBufferStore.f16(i32 140, %dx.types.Handle %g_sb1_UAV_structbuf, i32 0, i32 48, half 0xH4600, half undef, half undef, half undef, i8 1, i32 2)
+// CHECK: call void @dx.op.rawBufferStore.f16(i32 140, %dx.types.Handle %g_sb1_UAV_structbuf, i32 0, i32 50, half 0xH4700, half undef, half undef, half undef, i8 1, i32 2)
+// CHECK: call void @dx.op.rawBufferStore.f16(i32 140, %dx.types.Handle %g_sb1_UAV_structbuf, i32 0, i32 52, half 0xH4800, half undef, half undef, half undef, i8 1, i32 2)
+// CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %g_sb1_UAV_structbuf, i32 0, i32 56, i32 9, i32 undef, i32 undef, i32 undef, i8 1, i32 4)
+// CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %g_sb2_UAV_structbuf, i32 0, i32 0, i32 %4, i32 %5, i32 undef, i32 undef, i8 3, i32 8)
+// CHECK: call void @dx.op.rawBufferStore.f16(i32 140, %dx.types.Handle %g_sb2_UAV_structbuf, i32 0, i32 8, half 0xH4000, half 0xH4000, half 0xH4000, half undef, i8 7, i32 2)
+// CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %g_sb2_UAV_structbuf, i32 0, i32 16, i32 3, i32 undef, i32 undef, i32 undef, i8 1, i32 4)
 
 struct MyStruct1
 {

+ 8 - 2
tools/clang/test/CodeGenHLSL/uav_typed_load_store1.hlsl

@@ -8,8 +8,14 @@ float4 main(uint2 a : A, uint2 b : B) : SV_Target
   uint status;
   r += uav1[b];
   r += uav1.Load(a);
-  uav1.Load(a, status); r += status;
-  uav1.Load(a, status); r += status;
+  uav1.Load(a, status);
+  if (CheckAccessFullyMapped(status)) {
+    r += 3;
+  }
+  uav1.Load(a, status);
+  if (CheckAccessFullyMapped(status)) {
+    r += 3;
+  }
   uav1[b] = r;
   return r;
 }

+ 32 - 0
tools/clang/test/CodeGenHLSL/uav_typed_load_store3.hlsl

@@ -0,0 +1,32 @@
+// RUN: %dxc -E main -T ps_6_2 -no-min-precision %s | FileCheck %s
+
+// CHECK: call %dx.types.ResRet.f32 @dx.op.textureLoad.f32
+// CHECK: call i1 @dx.op.checkAccessFullyMapped.i32
+// CHECK: call %dx.types.ResRet.f16 @dx.op.textureLoad.f16
+// CHECK: call i1 @dx.op.checkAccessFullyMapped.i32
+
+RWTexture2D<float4> uav1 : register(u3);
+RWTexture1D<half4> uav2 : register(u5);
+
+float4 main(uint2 a : A, uint2 b : B) : SV_Target
+{
+  float4 r = 0;
+  uint status;
+  r += uav1[b];
+  r += uav1.Load(a);
+  uav1.Load(a, status);
+  if (CheckAccessFullyMapped(status)) {
+    r += 3;
+  }
+
+  r += uav2[b.x];
+  r += uav2.Load(a);
+  uav2.Load(a, status);
+  if (CheckAccessFullyMapped(status)) {
+    r += 3;
+  }
+
+  uav1[b] = r;
+  uav2[b.x] = r;
+  return r;
+}

+ 5 - 0
tools/clang/test/HLSL/intrinsic-examples.hlsl

@@ -22,6 +22,11 @@ float4 RWByteAddressBufferMain(uint2 a : A, uint2 b : B) : SV_Target
   r += status;
   uav1[b] = r; // expected-error {{type 'RWByteAddressBuffer' does not provide a subscript operator}} fxc-error {{X3121: array, matrix, vector, or indexable object type expected in index expression}}
   uav1.Load(a.x, status);
+  min16float4 h = min16float4(1,2,3,4);
+  uav1.LoadHalf(h.x, status);                               /* expected-error {{LoadHalf and StoreHalf are not supported for min precision mode}} */
+  uav1.LoadHalf2(h.x);                                      /* expected-error {{LoadHalf and StoreHalf are not supported for min precision mode}} expected-warning {{ignoring return value of function that only reads data}} */
+  uav1.StoreHalf3(4, h.xyz);                                /* expected-error {{LoadHalf and StoreHalf are not supported for min precision mode}} */
+  uav1.StoreHalf4(8, h);                                    /* expected-error {{LoadHalf and StoreHalf are not supported for min precision mode}} */
   return r;
 }
 

+ 2 - 2
tools/clang/test/HLSL/matrix-syntax-exact-precision.hlsl

@@ -61,10 +61,10 @@ void matrix_out_of_bounds() {
 
 void matrix_unsigned() {
    unsigned int4x2 intMatrix;
-   unsigned min16int4x3 min16Matrix;
+   unsigned min16int4x3 min16Matrix; /* expected-warning {{min16int is promoted to int}} */
    unsigned int64_t3x3 int64Matrix;
    unsigned uint3x4 uintMatrix;
-   unsigned min16uint4x1 min16uintMatrix;
+   unsigned min16uint4x1 min16uintMatrix;                   /* expected-warning {{min16uint is promoted to uint}} */
    unsigned uint64_t2x2 int64uintMatrix;
    unsigned dword3x2 dwordvector; /* fxc-error {{X3000: unrecognized identifier 'dword3x1'}} */
 

+ 51 - 51
tools/clang/test/HLSL/scalar-assignments-exact-precision.hlsl

@@ -39,9 +39,9 @@ bool left5; float right5; left5 = right5;
 bool left6; double right6; left6 = right6;
 bool left7; min16float right7; left7 = right7;              /* expected-warning {{min16float is promoted to half}} */
 bool left8; min10float right8; left8 = right8;  // expected-warning {{min10float is promoted to half}}
-bool left9; min16int right9; left9 = right9;
+bool left9; min16int right9; left9 = right9;    /* expected-warning {{min16int is promoted to int}} */
 bool left10; min12int right10; left10 = right10;  // expected-warning {{min12int is promoted to min16int}}
-bool left11; min16uint right11; left11 = right11;
+bool left11; min16uint right11; left11 = right11; /* expected-warning {{min16uint is promoted to uint}} */
 int left12; bool right12; left12 = right12;
 int left13; int right13; left13 = right13;
 int left14; uint right14; left14 = right14;
@@ -51,9 +51,9 @@ int left17; float right17; left17 = right17;
 int left18; double right18; left18 = right18; // expected-warning {{conversion from larger type 'double' to smaller type 'int', possible loss of data}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}}
 int left19; min16float right19; left19 = right19;           /* expected-warning {{min16float is promoted to half}} */
 int left20; min10float right20; left20 = right20;  // expected-warning {{min10float is promoted to half}}
-int left21; min16int right21; left21 = right21;
+int left21; min16int right21; left21 = right21;    /* expected-warning {{min16int is promoted to int}} */
 int left22; min12int right22; left22 = right22;  // expected-warning {{min12int is promoted to min16int}}
-int left23; min16uint right23; left23 = right23;
+int left23; min16uint right23; left23 = right23; /* expected-warning {{min16uint is promoted to uint}} */
 uint left24; bool right24; left24 = right24;
 uint left25; int right25; left25 = right25;
 uint left26; uint right26; left26 = right26;
@@ -63,9 +63,9 @@ uint left29; float right29; left29 = right29;
 uint left30; double right30; left30 = right30; // expected-warning {{conversion from larger type 'double' to smaller type 'uint', possible loss of data}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}}
 uint left31; min16float right31; left31 = right31;          /* expected-warning {{min16float is promoted to half}} */
 uint left32; min10float right32; left32 = right32;  // expected-warning {{min10float is promoted to half}}
-uint left33; min16int right33; left33 = right33;
+uint left33; min16int right33; left33 = right33;    /* expected-warning {{min16int is promoted to int}} */
 uint left34; min12int right34; left34 = right34;  // expected-warning {{min12int is promoted to min16int}}
-uint left35; min16uint right35; left35 = right35;
+uint left35; min16uint right35; left35 = right35; /* expected-warning {{min16uint is promoted to uint}} */
 dword left36; bool right36; left36 = right36;
 dword left37; int right37; left37 = right37;
 dword left38; uint right38; left38 = right38;
@@ -75,9 +75,9 @@ dword left41; float right41; left41 = right41;
 dword left42; double right42; left42 = right42; // expected-warning {{conversion from larger type 'double' to smaller type 'dword', possible loss of data}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}}
 dword left43; min16float right43; left43 = right43;         /* expected-warning {{min16float is promoted to half}} */
 dword left44; min10float right44; left44 = right44;  // expected-warning {{min10float is promoted to half}}
-dword left45; min16int right45; left45 = right45;
+dword left45; min16int right45; left45 = right45;    /* expected-warning {{min16int is promoted to int}} */
 dword left46; min12int right46; left46 = right46;  // expected-warning {{min12int is promoted to min16int}}
-dword left47; min16uint right47; left47 = right47;
+dword left47; min16uint right47; left47 = right47; /* expected-warning {{min16uint is promoted to uint}} */
 half left48; bool right48; left48 = right48;
 half left49; int right49; left49 = right49;        /* expected-warning {{conversion from larger type 'int' to smaller type 'half', possible loss of data}} */
 half left50; uint right50; left50 = right50;       /* expected-warning {{conversion from larger type 'uint' to smaller type 'half', possible loss of data}} */
@@ -87,9 +87,9 @@ half left53; float right53; left53 = right53;      /* expected-warning {{convers
 half left54; double right54; left54 = right54; // expected-warning {{conversion from larger type 'double' to smaller type 'half', possible loss of data}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}}
 half left55; min16float right55; left55 = right55;          /* expected-warning {{min16float is promoted to half}} */
 half left56; min10float right56; left56 = right56;  // expected-warning {{min10float is promoted to half}}
-half left57; min16int right57; left57 = right57;
-half left58; min12int right58; left58 = right58;  // expected-warning {{min12int is promoted to min16int}}
-half left59; min16uint right59; left59 = right59;
+half left57; min16int right57; left57 = right57;    /* expected-warning {{conversion from larger type 'min16int' to smaller type 'half', possible loss of data}} expected-warning {{min16int is promoted to int}} */
+half left58; min12int right58; left58 = right58;  // expected-warning {{conversion from larger type 'min12int' to smaller type 'half', possible loss of data}} expected-warning {{min12int is promoted to min16int}}
+half left59; min16uint right59; left59 = right59; /* expected-warning {{conversion from larger type 'min16uint' to smaller type 'half', possible loss of data}} expected-warning {{min16uint is promoted to uint}} */
 float left60; bool right60; left60 = right60;
 float left61; int right61; left61 = right61;
 float left62; uint right62; left62 = right62;
@@ -99,9 +99,9 @@ float left65; float right65; left65 = right65;
 float left66; double right66; left66 = right66; // expected-warning {{conversion from larger type 'double' to smaller type 'float', possible loss of data}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}}
 float left67; min16float right67; left67 = right67;         /* expected-warning {{min16float is promoted to half}} */
 float left68; min10float right68; left68 = right68;  // expected-warning {{min10float is promoted to half}}
-float left69; min16int right69; left69 = right69;
+float left69; min16int right69; left69 = right69;    /* expected-warning {{min16int is promoted to int}} */
 float left70; min12int right70; left70 = right70;  // expected-warning {{min12int is promoted to min16int}}
-float left71; min16uint right71; left71 = right71;
+float left71; min16uint right71; left71 = right71; /* expected-warning {{min16uint is promoted to uint}} */
 double left72; bool right72; left72 = right72;
 double left73; int right73; left73 = right73;
 double left74; uint right74; left74 = right74;
@@ -111,9 +111,9 @@ double left77; float right77; left77 = right77;
 double left78; double right78; left78 = right78;
 double left79; min16float right79; left79 = right79;        /* expected-warning {{min16float is promoted to half}} */
 double left80; min10float right80; left80 = right80;  // expected-warning {{min10float is promoted to half}}
-double left81; min16int right81; left81 = right81;
+double left81; min16int right81; left81 = right81;    /* expected-warning {{min16int is promoted to int}} */
 double left82; min12int right82; left82 = right82;  // expected-warning {{min12int is promoted to min16int}}
-double left83; min16uint right83; left83 = right83;
+double left83; min16uint right83; left83 = right83; /* expected-warning {{min16uint is promoted to uint}} */
 min16float left84; bool right84; left84 = right84;  /* expected-warning {{min16float is promoted to half}} */
 min16float left85; int right85; left85 = right85; // expected-warning {{conversion from larger type 'int' to smaller type 'min16float', possible loss of data}} expected-warning {{min16float is promoted to half}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}}
 min16float left86; uint right86; left86 = right86; // expected-warning {{conversion from larger type 'uint' to smaller type 'min16float', possible loss of data}} expected-warning {{min16float is promoted to half}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}}
@@ -123,9 +123,9 @@ min16float left89; float right89; left89 = right89; // expected-warning {{conver
 min16float left90; double right90; left90 = right90; // expected-warning {{conversion from larger type 'double' to smaller type 'min16float', possible loss of data}} expected-warning {{min16float is promoted to half}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}}
 min16float left91; min16float right91; left91 = right91;    /* expected-warning {{min16float is promoted to half}} expected-warning {{min16float is promoted to half}} */
 min16float left92; min10float right92; left92 = right92;  // expected-warning {{min10float is promoted to half}} expected-warning {{min16float is promoted to half}}
-min16float left93; min16int right93; left93 = right93;    /* expected-warning {{min16float is promoted to half}} */
-min16float left94; min12int right94; left94 = right94;  // expected-warning {{min12int is promoted to min16int}} expected-warning {{min16float is promoted to half}}
-min16float left95; min16uint right95; left95 = right95; /* expected-warning {{min16float is promoted to half}} */
+min16float left93; min16int right93; left93 = right93;    /* expected-warning {{conversion from larger type 'min16int' to smaller type 'min16float', possible loss of data}} expected-warning {{min16float is promoted to half}} expected-warning {{min16int is promoted to int}} */
+min16float left94; min12int right94; left94 = right94;  // expected-warning {{conversion from larger type 'min12int' to smaller type 'min16float', possible loss of data}} expected-warning {{min12int is promoted to min16int}} expected-warning {{min16float is promoted to half}}
+min16float left95; min16uint right95; left95 = right95; /* expected-warning {{conversion from larger type 'min16uint' to smaller type 'min16float', possible loss of data}} expected-warning {{min16float is promoted to half}} expected-warning {{min16uint is promoted to uint}} */
 min10float left96; bool right96; left96 = right96;  // expected-warning {{min10float is promoted to half}}
 min10float left97; int right97; left97 = right97; // expected-warning {{conversion from larger type 'int' to smaller type 'min10float', possible loss of data}} expected-warning {{min10float is promoted to half}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} //
 min10float left98; uint right98; left98 = right98; // expected-warning {{conversion from larger type 'uint' to smaller type 'min10float', possible loss of data}} expected-warning {{min10float is promoted to half}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} //
@@ -135,45 +135,45 @@ min10float left101; float right101; left101 = right101; // expected-warning {{co
 min10float left102; double right102; left102 = right102; // expected-warning {{conversion from larger type 'double' to smaller type 'min10float', possible loss of data}} expected-warning {{min10float is promoted to half}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} //
 min10float left103; min16float right103; left103 = right103; // expected-warning {{min10float is promoted to half}} expected-warning {{min16float is promoted to half}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} //
 min10float left104; min10float right104; left104 = right104;  // expected-warning {{min10float is promoted to half}} expected-warning {{min10float is promoted to half}} //
-min10float left105; min16int right105; left105 = right105; // expected-warning {{min10float is promoted to half}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} //
-min10float left106; min12int right106; left106 = right106; // expected-warning {{min10float is promoted to half}} expected-warning {{min12int is promoted to min16int}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} //
-min10float left107; min16uint right107; left107 = right107; // expected-warning {{min10float is promoted to half}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} //
-min16int left108; bool right108; left108 = right108;
-min16int left109; int right109; left109 = right109; // expected-warning {{conversion from larger type 'int' to smaller type 'min16int', possible loss of data}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}}
-min16int left110; uint right110; left110 = right110; // expected-warning {{conversion from larger type 'uint' to smaller type 'min16int', possible loss of data}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}}
-min16int left111; dword right111; left111 = right111; // expected-warning {{conversion from larger type 'dword' to smaller type 'min16int', possible loss of data}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}}
-min16int left112; half right112; left112 = right112; // fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}}
-min16int left113; float right113; left113 = right113; // expected-warning {{conversion from larger type 'float' to smaller type 'min16int', possible loss of data}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}}
-min16int left114; double right114; left114 = right114; // expected-warning {{conversion from larger type 'double' to smaller type 'min16int', possible loss of data}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}}
-min16int left115; min16float right115; left115 = right115;    /* expected-warning {{min16float is promoted to half}} */
-min16int left116; min10float right116; left116 = right116;  // expected-warning {{min10float is promoted to half}}
-min16int left117; min16int right117; left117 = right117;
-min16int left118; min12int right118; left118 = right118;  // expected-warning {{min12int is promoted to min16int}}
-min16int left119; min16uint right119; left119 = right119;
+min10float left105; min16int right105; left105 = right105; // expected-warning {{conversion from larger type 'min16int' to smaller type 'min10float', possible loss of data}} expected-warning {{min10float is promoted to half}} expected-warning {{min16int is promoted to int}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} //
+min10float left106; min12int right106; left106 = right106; // expected-warning {{conversion from larger type 'min12int' to smaller type 'min10float', possible loss of data}} expected-warning {{min10float is promoted to half}} expected-warning {{min12int is promoted to min16int}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} //
+min10float left107; min16uint right107; left107 = right107; // expected-warning {{conversion from larger type 'min16uint' to smaller type 'min10float', possible loss of data}} expected-warning {{min10float is promoted to half}} expected-warning {{min16uint is promoted to uint}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} //
+min16int left108; bool right108; left108 = right108;        /* expected-warning {{min16int is promoted to int}} */
+min16int left109; int right109; left109 = right109; // expected-warning {{min16int is promoted to int}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}}
+min16int left110; uint right110; left110 = right110; // expected-warning {{min16int is promoted to int}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}}
+min16int left111; dword right111; left111 = right111; // expected-warning {{min16int is promoted to int}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}}
+min16int left112; half right112; left112 = right112; // expected-warning {{min16int is promoted to int}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}}
+min16int left113; float right113; left113 = right113; // expected-warning {{min16int is promoted to int}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}}
+min16int left114; double right114; left114 = right114; // expected-warning {{conversion from larger type 'double' to smaller type 'min16int', possible loss of data}} expected-warning {{min16int is promoted to int}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}}
+min16int left115; min16float right115; left115 = right115;    /* expected-warning {{min16float is promoted to half}} expected-warning {{min16int is promoted to int}} */
+min16int left116; min10float right116; left116 = right116;  // expected-warning {{min10float is promoted to half}} expected-warning {{min16int is promoted to int}}
+min16int left117; min16int right117; left117 = right117;    /* expected-warning {{min16int is promoted to int}} expected-warning {{min16int is promoted to int}} */
+min16int left118; min12int right118; left118 = right118;  // expected-warning {{min12int is promoted to min16int}} expected-warning {{min16int is promoted to int}}
+min16int left119; min16uint right119; left119 = right119; /* expected-warning {{min16int is promoted to int}} expected-warning {{min16uint is promoted to uint}} */
 min12int left120; bool right120; left120 = right120;  // expected-warning {{min12int is promoted to min16int}}
-min12int left121; int right121; left121 = right121; // expected-warning {{conversion from larger type 'int' to smaller type 'min12int', possible loss of data}} expected-warning {{min12int is promoted to min16int}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} //
-min12int left122; uint right122; left122 = right122; // expected-warning {{conversion from larger type 'uint' to smaller type 'min12int', possible loss of data}} expected-warning {{min12int is promoted to min16int}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} //
-min12int left123; dword right123; left123 = right123; // expected-warning {{conversion from larger type 'dword' to smaller type 'min12int', possible loss of data}} expected-warning {{min12int is promoted to min16int}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} //
+min12int left121; int right121; left121 = right121; // expected-warning {{min12int is promoted to min16int}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} //
+min12int left122; uint right122; left122 = right122; // expected-warning {{min12int is promoted to min16int}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} //
+min12int left123; dword right123; left123 = right123; // expected-warning {{min12int is promoted to min16int}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} //
 min12int left124; half right124; left124 = right124; // expected-warning {{min12int is promoted to min16int}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} //
-min12int left125; float right125; left125 = right125; // expected-warning {{conversion from larger type 'float' to smaller type 'min12int', possible loss of data}} expected-warning {{min12int is promoted to min16int}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} //
+min12int left125; float right125; left125 = right125; // expected-warning {{min12int is promoted to min16int}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} //
 min12int left126; double right126; left126 = right126; // expected-warning {{conversion from larger type 'double' to smaller type 'min12int', possible loss of data}} expected-warning {{min12int is promoted to min16int}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} //
 min12int left127; min16float right127; left127 = right127; // expected-warning {{min12int is promoted to min16int}} expected-warning {{min16float is promoted to half}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} //
 min12int left128; min10float right128; left128 = right128;  // expected-warning {{min10float is promoted to half}} expected-warning {{min12int is promoted to min16int}} //
-min12int left129; min16int right129; left129 = right129; // expected-warning {{min12int is promoted to min16int}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} //
+min12int left129; min16int right129; left129 = right129; // expected-warning {{min12int is promoted to min16int}} expected-warning {{min16int is promoted to int}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} //
 min12int left130; min12int right130; left130 = right130;  // expected-warning {{min12int is promoted to min16int}} expected-warning {{min12int is promoted to min16int}} //
-min12int left131; min16uint right131; left131 = right131; // expected-warning {{min12int is promoted to min16int}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} //
-min16uint left132; bool right132; left132 = right132;
-min16uint left133; int right133; left133 = right133; // expected-warning {{conversion from larger type 'int' to smaller type 'min16uint', possible loss of data}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}}
-min16uint left134; uint right134; left134 = right134; // expected-warning {{conversion from larger type 'uint' to smaller type 'min16uint', possible loss of data}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}}
-min16uint left135; dword right135; left135 = right135; // expected-warning {{conversion from larger type 'dword' to smaller type 'min16uint', possible loss of data}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}}
-min16uint left136; half right136; left136 = right136; // fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}}
-min16uint left137; float right137; left137 = right137; // expected-warning {{conversion from larger type 'float' to smaller type 'min16uint', possible loss of data}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}}
-min16uint left138; double right138; left138 = right138; // expected-warning {{conversion from larger type 'double' to smaller type 'min16uint', possible loss of data}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}}
-min16uint left139; min16float right139; left139 = right139;    /* expected-warning {{min16float is promoted to half}} */
-min16uint left140; min10float right140; left140 = right140;  // expected-warning {{min10float is promoted to half}}
-min16uint left141; min16int right141; left141 = right141;
-min16uint left142; min12int right142; left142 = right142;  // expected-warning {{min12int is promoted to min16int}}
-min16uint left143; min16uint right143; left143 = right143;
+min12int left131; min16uint right131; left131 = right131; // expected-warning {{min12int is promoted to min16int}} expected-warning {{min16uint is promoted to uint}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} //
+min16uint left132; bool right132; left132 = right132;     /* expected-warning {{min16uint is promoted to uint}} */
+min16uint left133; int right133; left133 = right133; // expected-warning {{min16uint is promoted to uint}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}}
+min16uint left134; uint right134; left134 = right134; // expected-warning {{min16uint is promoted to uint}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}}
+min16uint left135; dword right135; left135 = right135; // expected-warning {{min16uint is promoted to uint}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}}
+min16uint left136; half right136; left136 = right136; // expected-warning {{min16uint is promoted to uint}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}}
+min16uint left137; float right137; left137 = right137; // expected-warning {{min16uint is promoted to uint}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}}
+min16uint left138; double right138; left138 = right138; // expected-warning {{conversion from larger type 'double' to smaller type 'min16uint', possible loss of data}} expected-warning {{min16uint is promoted to uint}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}}
+min16uint left139; min16float right139; left139 = right139;    /* expected-warning {{min16float is promoted to half}} expected-warning {{min16uint is promoted to uint}} */
+min16uint left140; min10float right140; left140 = right140;  // expected-warning {{min10float is promoted to half}} expected-warning {{min16uint is promoted to uint}}
+min16uint left141; min16int right141; left141 = right141;    /* expected-warning {{min16int is promoted to int}} expected-warning {{min16uint is promoted to uint}} */
+min16uint left142; min12int right142; left142 = right142;  // expected-warning {{min12int is promoted to min16int}} expected-warning {{min16uint is promoted to uint}}
+min16uint left143; min16uint right143; left143 = right143; /* expected-warning {{min16uint is promoted to uint}} expected-warning {{min16uint is promoted to uint}} */
 
 // Now with unorm and snorm modifiers.
 /*

+ 103 - 103
tools/clang/test/HLSL/scalar-operators-exact-precision.hlsl

@@ -25,9 +25,9 @@ float4 plain(float4 param4 : FOO) : FOO {
     double      doubles     = 0;
     min16float  min16floats = 0; /* expected-warning {{min16float is promoted to half}} */
     min10float  min10floats = 0; // expected-warning {{min10float is promoted to half}}
-    min16int    min16ints   = 0;
+    min16int    min16ints   = 0; /* expected-warning {{min16int is promoted to int}} */
     min12int    min12ints   = 0; // expected-warning {{min12int is promoted to min16int}}
-    min16uint   min16uints  = 0;
+    min16uint   min16uints  = 0; /* expected-warning {{min16uint is promoted to uint}} */
 
     // _Static_assert(std::is_same<bool, bool>::value, "bool, bool failed");
     _Static_assert(std::is_same<bool, __decltype(bools)>::value, "bool, __decltype(bools) failed");
@@ -35,7 +35,7 @@ float4 plain(float4 param4 : FOO) : FOO {
 
 
     // float result = ints + floats;
-    _Static_assert(std::is_same<min16uint, __decltype(min16ints  + min16uints)>::value, "");
+    _Static_assert(std::is_same<min16uint, __decltype(min16ints  + min16uints)>::value, "");    /* expected-warning {{min16uint is promoted to uint}} */
 
     // Promotion cases with addition.
     // Two unsigned types will widen to widest type.
@@ -49,8 +49,8 @@ float4 plain(float4 param4 : FOO) : FOO {
     // Mixed signed-unsigned will widen to largest unsigned.
     _Static_assert(std::is_same<uint, __decltype(ints  + uints)>::value, "");
     _Static_assert(std::is_same<uint, __decltype(uints + ints )>::value, "");
-    _Static_assert(std::is_same<min16uint, __decltype(min16ints  + min16uints)>::value, "");
-    _Static_assert(std::is_same<min16uint, __decltype(min16uints + min16ints )>::value, "");
+    _Static_assert(std::is_same<min16uint, __decltype(min16ints  + min16uints)>::value, "");    /* expected-warning {{min16uint is promoted to uint}} */
+    _Static_assert(std::is_same<min16uint, __decltype(min16uints + min16ints )>::value, "");    /* expected-warning {{min16uint is promoted to uint}} */
 
     // Mixed integral/floating point will turn to floating-point.
     _Static_assert(std::is_same<float, __decltype(ints    + floats)>::value, "");
@@ -159,7 +159,7 @@ float4 plain(float4 param4 : FOO) : FOO {
   _Static_assert(std::is_same<min10float, __decltype(min10floats + min16ints)>::value, "");  // expected-warning {{min10float is promoted to half}}
   _Static_assert(std::is_same<min10float, __decltype(min10floats + min12ints)>::value, "");  // expected-warning {{min10float is promoted to half}}
   _Static_assert(std::is_same<min10float, __decltype(min10floats + min16uints)>::value, ""); // expected-warning {{min10float is promoted to half}}
-  _Static_assert(std::is_same<min16int, __decltype(min16ints + bools)>::value, "");
+  _Static_assert(std::is_same<min16int, __decltype(min16ints + bools)>::value, "");          /* expected-warning {{min16int is promoted to int}} */
   _Static_assert(std::is_same<int, __decltype(min16ints + ints)>::value, "");
   _Static_assert(std::is_same<uint, __decltype(min16ints + uints)>::value, "");
   _Static_assert(std::is_same<half, __decltype(min16ints + halfs)>::value, "");
@@ -167,9 +167,9 @@ float4 plain(float4 param4 : FOO) : FOO {
   _Static_assert(std::is_same<double, __decltype(min16ints + doubles)>::value, "");
   _Static_assert(std::is_same<min16float, __decltype(min16ints + min16floats)>::value, "");  /* expected-warning {{min16float is promoted to half}} */
   _Static_assert(std::is_same<min10float, __decltype(min16ints + min10floats)>::value, "");  // expected-warning {{min10float is promoted to half}}
-  _Static_assert(std::is_same<min16int, __decltype(min16ints + min16ints)>::value, "");
-  _Static_assert(std::is_same<min16int, __decltype(min16ints + min12ints)>::value, "");
-  _Static_assert(std::is_same<min16uint, __decltype(min16ints + min16uints)>::value, "");
+  _Static_assert(std::is_same<min16int, __decltype(min16ints + min16ints)>::value, "");      /* expected-warning {{min16int is promoted to int}} */
+  _Static_assert(std::is_same<min16int, __decltype(min16ints + min12ints)>::value, "");      /* expected-warning {{min16int is promoted to int}} */
+  _Static_assert(std::is_same<min16uint, __decltype(min16ints + min16uints)>::value, "");    /* expected-warning {{min16uint is promoted to uint}} */
   _Static_assert(std::is_same<min12int, __decltype(min12ints + bools)>::value, "");   // expected-warning {{min12int is promoted to min16int}}
   _Static_assert(std::is_same<int, __decltype(min12ints + ints)>::value, "");
   _Static_assert(std::is_same<uint, __decltype(min12ints + uints)>::value, "");
@@ -178,10 +178,10 @@ float4 plain(float4 param4 : FOO) : FOO {
   _Static_assert(std::is_same<double, __decltype(min12ints + doubles)>::value, "");
   _Static_assert(std::is_same<min16float, __decltype(min12ints + min16floats)>::value, "");    /* expected-warning {{min16float is promoted to half}} */
   _Static_assert(std::is_same<min10float, __decltype(min12ints + min10floats)>::value, "");  // expected-warning {{min10float is promoted to half}}
-  _Static_assert(std::is_same<min16int, __decltype(min12ints + min16ints)>::value, "");
+  _Static_assert(std::is_same<min16int, __decltype(min12ints + min16ints)>::value, "");      /* expected-warning {{min16int is promoted to int}} */
   _Static_assert(std::is_same<min12int, __decltype(min12ints + min12ints)>::value, "");    // expected-warning {{min12int is promoted to min16int}}
-  _Static_assert(std::is_same<min16uint, __decltype(min12ints + min16uints)>::value, "");
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints + bools)>::value, "");
+  _Static_assert(std::is_same<min16uint, __decltype(min12ints + min16uints)>::value, "");  /* expected-warning {{min16uint is promoted to uint}} */
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints + bools)>::value, "");      /* expected-warning {{min16uint is promoted to uint}} */
   _Static_assert(std::is_same<uint, __decltype(min16uints + ints)>::value, "");
   _Static_assert(std::is_same<uint, __decltype(min16uints + uints)>::value, "");
   _Static_assert(std::is_same<half, __decltype(min16uints + halfs)>::value, "");
@@ -189,9 +189,9 @@ float4 plain(float4 param4 : FOO) : FOO {
   _Static_assert(std::is_same<double, __decltype(min16uints + doubles)>::value, "");
   _Static_assert(std::is_same<min16float, __decltype(min16uints + min16floats)>::value, "");    /* expected-warning {{min16float is promoted to half}} */
   _Static_assert(std::is_same<min10float, __decltype(min16uints + min10floats)>::value, "");   // expected-warning {{min10float is promoted to half}}
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints + min16ints)>::value, "");
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints + min12ints)>::value, "");
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints + min16uints)>::value, "");
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints + min16ints)>::value, "");      /* expected-warning {{min16uint is promoted to uint}} */
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints + min12ints)>::value, "");      /* expected-warning {{min16uint is promoted to uint}} */
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints + min16uints)>::value, "");     /* expected-warning {{min16uint is promoted to uint}} */
   _Static_assert(std::is_same<int, __decltype(bools - bools)>::value, "");
   _Static_assert(std::is_same<int, __decltype(bools - ints)>::value, "");
   _Static_assert(std::is_same<uint, __decltype(bools - uints)>::value, "");
@@ -280,7 +280,7 @@ float4 plain(float4 param4 : FOO) : FOO {
   _Static_assert(std::is_same<min10float, __decltype(min10floats - min16ints)>::value, "");    // expected-warning {{min10float is promoted to half}}
   _Static_assert(std::is_same<min10float, __decltype(min10floats - min12ints)>::value, "");    // expected-warning {{min10float is promoted to half}}
   _Static_assert(std::is_same<min10float, __decltype(min10floats - min16uints)>::value, "");   // expected-warning {{min10float is promoted to half}}
-  _Static_assert(std::is_same<min16int, __decltype(min16ints - bools)>::value, "");
+  _Static_assert(std::is_same<min16int, __decltype(min16ints - bools)>::value, "");            /* expected-warning {{min16int is promoted to int}} */
   _Static_assert(std::is_same<int, __decltype(min16ints - ints)>::value, "");
   _Static_assert(std::is_same<uint, __decltype(min16ints - uints)>::value, "");
   _Static_assert(std::is_same<half, __decltype(min16ints - halfs)>::value, "");
@@ -288,9 +288,9 @@ float4 plain(float4 param4 : FOO) : FOO {
   _Static_assert(std::is_same<double, __decltype(min16ints - doubles)>::value, "");
   _Static_assert(std::is_same<min16float, __decltype(min16ints - min16floats)>::value, "");    /* expected-warning {{min16float is promoted to half}} */
   _Static_assert(std::is_same<min10float, __decltype(min16ints - min10floats)>::value, "");   // expected-warning {{min10float is promoted to half}}
-  _Static_assert(std::is_same<min16int, __decltype(min16ints - min16ints)>::value, "");
-  _Static_assert(std::is_same<min16int, __decltype(min16ints - min12ints)>::value, "");
-  _Static_assert(std::is_same<min16uint, __decltype(min16ints - min16uints)>::value, "");
+  _Static_assert(std::is_same<min16int, __decltype(min16ints - min16ints)>::value, "");       /* expected-warning {{min16int is promoted to int}} */
+  _Static_assert(std::is_same<min16int, __decltype(min16ints - min12ints)>::value, "");       /* expected-warning {{min16int is promoted to int}} */
+  _Static_assert(std::is_same<min16uint, __decltype(min16ints - min16uints)>::value, "");     /* expected-warning {{min16uint is promoted to uint}} */
   _Static_assert(std::is_same<min12int, __decltype(min12ints - bools)>::value, "");  // expected-warning {{min12int is promoted to min16int}}
   _Static_assert(std::is_same<int, __decltype(min12ints - ints)>::value, "");
   _Static_assert(std::is_same<uint, __decltype(min12ints - uints)>::value, "");
@@ -299,10 +299,10 @@ float4 plain(float4 param4 : FOO) : FOO {
   _Static_assert(std::is_same<double, __decltype(min12ints - doubles)>::value, "");
   _Static_assert(std::is_same<min16float, __decltype(min12ints - min16floats)>::value, "");    /* expected-warning {{min16float is promoted to half}} */
   _Static_assert(std::is_same<min10float, __decltype(min12ints - min10floats)>::value, "");   // expected-warning {{min10float is promoted to half}}
-  _Static_assert(std::is_same<min16int, __decltype(min12ints - min16ints)>::value, "");
+  _Static_assert(std::is_same<min16int, __decltype(min12ints - min16ints)>::value, "");       /* expected-warning {{min16int is promoted to int}} */
   _Static_assert(std::is_same<min12int, __decltype(min12ints - min12ints)>::value, "");  // expected-warning {{min12int is promoted to min16int}}
-  _Static_assert(std::is_same<min16uint, __decltype(min12ints - min16uints)>::value, "");
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints - bools)>::value, "");
+  _Static_assert(std::is_same<min16uint, __decltype(min12ints - min16uints)>::value, "");    /* expected-warning {{min16uint is promoted to uint}} */
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints - bools)>::value, "");    /* expected-warning {{min16uint is promoted to uint}} */
   _Static_assert(std::is_same<uint, __decltype(min16uints - ints)>::value, "");
   _Static_assert(std::is_same<uint, __decltype(min16uints - uints)>::value, "");
   _Static_assert(std::is_same<half, __decltype(min16uints - halfs)>::value, "");
@@ -310,9 +310,9 @@ float4 plain(float4 param4 : FOO) : FOO {
   _Static_assert(std::is_same<double, __decltype(min16uints - doubles)>::value, "");
   _Static_assert(std::is_same<min16float, __decltype(min16uints - min16floats)>::value, "");    /* expected-warning {{min16float is promoted to half}} */
   _Static_assert(std::is_same<min10float, __decltype(min16uints - min10floats)>::value, "");   // expected-warning {{min10float is promoted to half}}
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints - min16ints)>::value, "");
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints - min12ints)>::value, "");
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints - min16uints)>::value, "");
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints - min16ints)>::value, "");      /* expected-warning {{min16uint is promoted to uint}} */
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints - min12ints)>::value, "");      /* expected-warning {{min16uint is promoted to uint}} */
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints - min16uints)>::value, "");     /* expected-warning {{min16uint is promoted to uint}} */
   _Static_assert(std::is_same<int, __decltype(bools / bools)>::value, "");
   _Static_assert(std::is_same<int, __decltype(bools / ints)>::value, "");
   _Static_assert(std::is_same<uint, __decltype(bools / uints)>::value, "");
@@ -402,7 +402,7 @@ float4 plain(float4 param4 : FOO) : FOO {
   _Static_assert(std::is_same<min10float, __decltype(min10floats / min12ints)>::value, "");    // expected-warning {{min10float is promoted to half}}
   _Static_assert(std::is_same<min10float, __decltype(min10floats / min16uints)>::value, "");   // expected-warning {{min10float is promoted to half}}
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-52): error X3706: signed integer division is not supported on minimum-precision types. Cast to int to use 32-bit division.;compilation failed; no code produced;
-  min16ints = (min16ints / bools); // expected-error {{signed integer division is not supported on minimum-precision types, cast to int to use 32-bit division}} fxc-error {{X3706: signed integer division is not supported on minimum-precision types. Cast to int to use 32-bit division.}}
+  min16ints = (min16ints / bools); // fxc-error {{X3706: signed integer division is not supported on minimum-precision types. Cast to int to use 32-bit division.}}
   _Static_assert(std::is_same<int, __decltype(min16ints / ints)>::value, "");
   _Static_assert(std::is_same<uint, __decltype(min16ints / uints)>::value, "");
   _Static_assert(std::is_same<half, __decltype(min16ints / halfs)>::value, "");
@@ -411,12 +411,12 @@ float4 plain(float4 param4 : FOO) : FOO {
   _Static_assert(std::is_same<min16float, __decltype(min16ints / min16floats)>::value, "");    /* expected-warning {{min16float is promoted to half}} */
   _Static_assert(std::is_same<min10float, __decltype(min16ints / min10floats)>::value, "");  // expected-warning {{min10float is promoted to half}}
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-56): error X3706: signed integer division is not supported on minimum-precision types. Cast to int to use 32-bit division.;compilation failed; no code produced;
-  min16ints = (min16ints / min16ints); // expected-error {{signed integer division is not supported on minimum-precision types, cast to int to use 32-bit division}} fxc-error {{X3706: signed integer division is not supported on minimum-precision types. Cast to int to use 32-bit division.}}
+  min16ints = (min16ints / min16ints); // fxc-error {{X3706: signed integer division is not supported on minimum-precision types. Cast to int to use 32-bit division.}}
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-56): error X3706: signed integer division is not supported on minimum-precision types. Cast to int to use 32-bit division.;compilation failed; no code produced;
-  min16ints = (min16ints / min12ints); // expected-error {{signed integer division is not supported on minimum-precision types, cast to int to use 32-bit division}} fxc-error {{X3706: signed integer division is not supported on minimum-precision types. Cast to int to use 32-bit division.}}
-  _Static_assert(std::is_same<min16uint, __decltype(min16ints / min16uints)>::value, "");
+  min16ints = (min16ints / min12ints); // fxc-error {{X3706: signed integer division is not supported on minimum-precision types. Cast to int to use 32-bit division.}}
+  _Static_assert(std::is_same<min16uint, __decltype(min16ints / min16uints)>::value, "");    /* expected-warning {{min16uint is promoted to uint}} */
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-52): error X3706: signed integer division is not supported on minimum-precision types. Cast to int to use 32-bit division.;compilation failed; no code produced;
-  min12ints = (min12ints / bools); // expected-error {{signed integer division is not supported on minimum-precision types, cast to int to use 32-bit division}} fxc-error {{X3706: signed integer division is not supported on minimum-precision types. Cast to int to use 32-bit division.}}
+  min12ints = (min12ints / bools); // fxc-error {{X3706: signed integer division is not supported on minimum-precision types. Cast to int to use 32-bit division.}}
   _Static_assert(std::is_same<int, __decltype(min12ints / ints)>::value, "");
   _Static_assert(std::is_same<uint, __decltype(min12ints / uints)>::value, "");
   _Static_assert(std::is_same<half, __decltype(min12ints / halfs)>::value, "");
@@ -425,11 +425,11 @@ float4 plain(float4 param4 : FOO) : FOO {
   _Static_assert(std::is_same<min16float, __decltype(min12ints / min16floats)>::value, "");    /* expected-warning {{min16float is promoted to half}} */
   _Static_assert(std::is_same<min10float, __decltype(min12ints / min10floats)>::value, "");  // expected-warning {{min10float is promoted to half}}
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-56): error X3706: signed integer division is not supported on minimum-precision types. Cast to int to use 32-bit division.;compilation failed; no code produced;
-  min12ints = (min12ints / min16ints); // expected-error {{signed integer division is not supported on minimum-precision types, cast to int to use 32-bit division}} fxc-error {{X3706: signed integer division is not supported on minimum-precision types. Cast to int to use 32-bit division.}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}}
+  min12ints = (min12ints / min16ints); // fxc-error {{X3706: signed integer division is not supported on minimum-precision types. Cast to int to use 32-bit division.}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}}
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-56): error X3706: signed integer division is not supported on minimum-precision types. Cast to int to use 32-bit division.;compilation failed; no code produced;
-  min12ints = (min12ints / min12ints); // expected-error {{signed integer division is not supported on minimum-precision types, cast to int to use 32-bit division}} fxc-error {{X3706: signed integer division is not supported on minimum-precision types. Cast to int to use 32-bit division.}}
-  _Static_assert(std::is_same<min16uint, __decltype(min12ints / min16uints)>::value, "");
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints / bools)>::value, "");
+  min12ints = (min12ints / min12ints); // fxc-error {{X3706: signed integer division is not supported on minimum-precision types. Cast to int to use 32-bit division.}}
+  _Static_assert(std::is_same<min16uint, __decltype(min12ints / min16uints)>::value, "");    /* expected-warning {{min16uint is promoted to uint}} */
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints / bools)>::value, "");    /* expected-warning {{min16uint is promoted to uint}} */
   _Static_assert(std::is_same<uint, __decltype(min16uints / ints)>::value, "");
   _Static_assert(std::is_same<uint, __decltype(min16uints / uints)>::value, "");
   _Static_assert(std::is_same<half, __decltype(min16uints / halfs)>::value, "");
@@ -437,9 +437,9 @@ float4 plain(float4 param4 : FOO) : FOO {
   _Static_assert(std::is_same<double, __decltype(min16uints / doubles)>::value, "");
   _Static_assert(std::is_same<min16float, __decltype(min16uints / min16floats)>::value, "");    /* expected-warning {{min16float is promoted to half}} */
   _Static_assert(std::is_same<min10float, __decltype(min16uints / min10floats)>::value, "");  // expected-warning {{min10float is promoted to half}}
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints / min16ints)>::value, "");
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints / min12ints)>::value, "");
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints / min16uints)>::value, "");
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints / min16ints)>::value, "");     /* expected-warning {{min16uint is promoted to uint}} */
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints / min12ints)>::value, "");     /* expected-warning {{min16uint is promoted to uint}} */
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints / min16uints)>::value, "");    /* expected-warning {{min16uint is promoted to uint}} */
   _Static_assert(std::is_same<int, __decltype(bools % bools)>::value, "");
   _Static_assert(std::is_same<int, __decltype(bools % ints)>::value, "");
   _Static_assert(std::is_same<uint, __decltype(bools % uints)>::value, "");
@@ -547,7 +547,7 @@ float4 plain(float4 param4 : FOO) : FOO {
   _Static_assert(std::is_same<min10float, __decltype(min10floats % min12ints)>::value, "");  // expected-warning {{min10float is promoted to half}}
   _Static_assert(std::is_same<min10float, __decltype(min10floats % min16uints)>::value, "");  // expected-warning {{min10float is promoted to half}}
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-52): error X3706: signed integer remainder is not supported on minimum-precision types. Cast to int to use 32-bit division.;compilation failed; no code produced;
-  min16ints = (min16ints % bools); // expected-error {{signed integer division is not supported on minimum-precision types, cast to int to use 32-bit division}} fxc-error {{X3706: signed integer remainder is not supported on minimum-precision types. Cast to int to use 32-bit division.}}
+  min16ints = (min16ints % bools); // fxc-error {{X3706: signed integer remainder is not supported on minimum-precision types. Cast to int to use 32-bit division.}}
   _Static_assert(std::is_same<int, __decltype(min16ints % ints)>::value, "");
   _Static_assert(std::is_same<uint, __decltype(min16ints % uints)>::value, "");
   _Static_assert(std::is_same<half, __decltype(min16ints % halfs)>::value, "");
@@ -557,12 +557,12 @@ float4 plain(float4 param4 : FOO) : FOO {
   _Static_assert(std::is_same<min16float, __decltype(min16ints % min16floats)>::value, "");    /* expected-warning {{min16float is promoted to half}} */
   _Static_assert(std::is_same<min10float, __decltype(min16ints % min10floats)>::value, "");  // expected-warning {{min10float is promoted to half}}
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-56): error X3706: signed integer remainder is not supported on minimum-precision types. Cast to int to use 32-bit division.;compilation failed; no code produced;
-  min16ints = (min16ints % min16ints); // expected-error {{signed integer division is not supported on minimum-precision types, cast to int to use 32-bit division}} fxc-error {{X3706: signed integer remainder is not supported on minimum-precision types. Cast to int to use 32-bit division.}}
+  min16ints = (min16ints % min16ints); // fxc-error {{X3706: signed integer remainder is not supported on minimum-precision types. Cast to int to use 32-bit division.}}
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-56): error X3706: signed integer remainder is not supported on minimum-precision types. Cast to int to use 32-bit division.;compilation failed; no code produced;
-  min16ints = (min16ints % min12ints); // expected-error {{signed integer division is not supported on minimum-precision types, cast to int to use 32-bit division}} fxc-error {{X3706: signed integer remainder is not supported on minimum-precision types. Cast to int to use 32-bit division.}}
-  _Static_assert(std::is_same<min16uint, __decltype(min16ints % min16uints)>::value, "");
+  min16ints = (min16ints % min12ints); // fxc-error {{X3706: signed integer remainder is not supported on minimum-precision types. Cast to int to use 32-bit division.}}
+  _Static_assert(std::is_same<min16uint, __decltype(min16ints % min16uints)>::value, "");    /* expected-warning {{min16uint is promoted to uint}} */
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-52): error X3706: signed integer remainder is not supported on minimum-precision types. Cast to int to use 32-bit division.;compilation failed; no code produced;
-  min16ints = (min12ints % bools); // expected-error {{signed integer division is not supported on minimum-precision types, cast to int to use 32-bit division}} fxc-error {{X3706: signed integer remainder is not supported on minimum-precision types. Cast to int to use 32-bit division.}}
+  min16ints = (min12ints % bools); // fxc-error {{X3706: signed integer remainder is not supported on minimum-precision types. Cast to int to use 32-bit division.}}
   _Static_assert(std::is_same<int, __decltype(min12ints % ints)>::value, "");
   _Static_assert(std::is_same<uint, __decltype(min12ints % uints)>::value, "");
   _Static_assert(std::is_same<half, __decltype(min12ints % halfs)>::value, "");
@@ -572,11 +572,11 @@ float4 plain(float4 param4 : FOO) : FOO {
   _Static_assert(std::is_same<min16float, __decltype(min12ints % min16floats)>::value, "");    /* expected-warning {{min16float is promoted to half}} */
   _Static_assert(std::is_same<min10float, __decltype(min12ints % min10floats)>::value, "");  // expected-warning {{min10float is promoted to half}}
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-56): error X3706: signed integer remainder is not supported on minimum-precision types. Cast to int to use 32-bit division.;compilation failed; no code produced;
-  min16ints = (min12ints % min16ints); // expected-error {{signed integer division is not supported on minimum-precision types, cast to int to use 32-bit division}} fxc-error {{X3706: signed integer remainder is not supported on minimum-precision types. Cast to int to use 32-bit division.}}
+  min16ints = (min12ints % min16ints); // fxc-error {{X3706: signed integer remainder is not supported on minimum-precision types. Cast to int to use 32-bit division.}}
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-56): error X3706: signed integer remainder is not supported on minimum-precision types. Cast to int to use 32-bit division.;compilation failed; no code produced;
-  min16ints = (min12ints % min12ints); // expected-error {{signed integer division is not supported on minimum-precision types, cast to int to use 32-bit division}} fxc-error {{X3706: signed integer remainder is not supported on minimum-precision types. Cast to int to use 32-bit division.}}
-  _Static_assert(std::is_same<min16uint, __decltype(min12ints % min16uints)>::value, "");
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints % bools)>::value, "");
+  min16ints = (min12ints % min12ints); // fxc-error {{X3706: signed integer remainder is not supported on minimum-precision types. Cast to int to use 32-bit division.}}
+  _Static_assert(std::is_same<min16uint, __decltype(min12ints % min16uints)>::value, "");    /* expected-warning {{min16uint is promoted to uint}} */
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints % bools)>::value, "");    /* expected-warning {{min16uint is promoted to uint}} */
   _Static_assert(std::is_same<uint, __decltype(min16uints % ints)>::value, "");
   _Static_assert(std::is_same<uint, __decltype(min16uints % uints)>::value, "");
   _Static_assert(std::is_same<half, __decltype(min16uints % halfs)>::value, "");
@@ -585,9 +585,9 @@ float4 plain(float4 param4 : FOO) : FOO {
   min16ints = (min16uints % doubles); // expected-error {{modulo cannot be used with doubles, cast to float first}} expected-warning {{conversion from larger type 'double' to smaller type 'min16int', possible loss of data}} fxc-error {{X3684: modulo cannot be used with doubles, cast to float first}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}}
   _Static_assert(std::is_same<min16float, __decltype(min16uints % min16floats)>::value, "");    /* expected-warning {{min16float is promoted to half}} */
   _Static_assert(std::is_same<min10float, __decltype(min16uints % min10floats)>::value, "");  // expected-warning {{min10float is promoted to half}}
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints % min16ints)>::value, "");
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints % min12ints)>::value, "");
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints % min16uints)>::value, "");
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints % min16ints)>::value, "");     /* expected-warning {{min16uint is promoted to uint}} */
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints % min12ints)>::value, "");     /* expected-warning {{min16uint is promoted to uint}} */
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints % min16uints)>::value, "");    /* expected-warning {{min16uint is promoted to uint}} */
   _Static_assert(std::is_same<bool, __decltype(bools < bools)>::value, "bool, __decltype(bools < bools) failed");
   _Static_assert(std::is_same<bool, __decltype(bools < ints)>::value, "");
   _Static_assert(std::is_same<bool, __decltype(bools < uints)>::value, "");
@@ -1472,9 +1472,9 @@ float4 plain(float4 param4 : FOO) : FOO {
   min10floats = (min10floats << min12ints); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-60): error X3082: int or unsigned int type required;X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,12-61): error X3013: 'get_value': no matching 1 parameter function;compilation failed; no code produced
   min10floats = (min10floats << min16uints); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
-  _Static_assert(std::is_same<min16int, __decltype(min16ints << bools)>::value, "");
-  _Static_assert(std::is_same<min16int, __decltype(min16ints << ints)>::value, "");
-  _Static_assert(std::is_same<min16int, __decltype(min16ints << uints)>::value, "");
+  _Static_assert(std::is_same<min16int, __decltype(min16ints << bools)>::value, "");    /* expected-warning {{min16int is promoted to int}} */
+  _Static_assert(std::is_same<min16int, __decltype(min16ints << ints)>::value, "");    /* expected-warning {{min16int is promoted to int}} */
+  _Static_assert(std::is_same<min16int, __decltype(min16ints << uints)>::value, "");    /* expected-warning {{min16int is promoted to int}} */
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-53): error X3082: int or unsigned int type required;X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,12-54): error X3013: 'get_value': no matching 1 parameter function;compilation failed; no code produced
   min16ints = (min16ints << halfs); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-54): error X3082: int or unsigned int type required;X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,12-55): error X3013: 'get_value': no matching 1 parameter function;compilation failed; no code produced
@@ -1485,9 +1485,9 @@ float4 plain(float4 param4 : FOO) : FOO {
   min16ints = (min16ints << min16floats); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-59): error X3082: int or unsigned int type required;X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,12-60): error X3013: 'get_value': no matching 1 parameter function;compilation failed; no code produced
   min16ints = (min16ints << min10floats); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
-  _Static_assert(std::is_same<min16int, __decltype(min16ints << min16ints)>::value, "");
-  _Static_assert(std::is_same<min16int, __decltype(min16ints << min12ints)>::value, "");
-  _Static_assert(std::is_same<min16int, __decltype(min16ints << min16uints)>::value, "");
+  _Static_assert(std::is_same<min16int, __decltype(min16ints << min16ints)>::value, "");    /* expected-warning {{min16int is promoted to int}} */
+  _Static_assert(std::is_same<min16int, __decltype(min16ints << min12ints)>::value, "");    /* expected-warning {{min16int is promoted to int}} */
+  _Static_assert(std::is_same<min16int, __decltype(min16ints << min16uints)>::value, "");    /* expected-warning {{min16int is promoted to int}} */
   _Static_assert(std::is_same<min12int, __decltype(min12ints << bools)>::value, "");  // expected-warning {{min12int is promoted to min16int}}
   _Static_assert(std::is_same<min12int, __decltype(min12ints << ints)>::value, "");  // expected-warning {{min12int is promoted to min16int}}
   _Static_assert(std::is_same<min12int, __decltype(min12ints << uints)>::value, "");  // expected-warning {{min12int is promoted to min16int}}
@@ -1504,9 +1504,9 @@ float4 plain(float4 param4 : FOO) : FOO {
   _Static_assert(std::is_same<min12int, __decltype(min12ints << min16ints)>::value, "");  // expected-warning {{min12int is promoted to min16int}}
   _Static_assert(std::is_same<min12int, __decltype(min12ints << min12ints)>::value, "");  // expected-warning {{min12int is promoted to min16int}}
   _Static_assert(std::is_same<min12int, __decltype(min12ints << min16uints)>::value, "");  // expected-warning {{min12int is promoted to min16int}}
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints << bools)>::value, "");
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints << ints)>::value, "");
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints << uints)>::value, "");
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints << bools)>::value, "");     /* expected-warning {{min16uint is promoted to uint}} */
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints << ints)>::value, "");      /* expected-warning {{min16uint is promoted to uint}} */
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints << uints)>::value, "");     /* expected-warning {{min16uint is promoted to uint}} */
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-54): error X3082: int or unsigned int type required;X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,12-55): error X3013: 'get_value': no matching 1 parameter function;compilation failed; no code produced
   min16uints = (min16uints << halfs); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-55): error X3082: int or unsigned int type required;X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,12-56): error X3013: 'get_value': no matching 1 parameter function;compilation failed; no code produced
@@ -1517,9 +1517,9 @@ float4 plain(float4 param4 : FOO) : FOO {
   min16uints = (min16uints << min16floats); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-60): error X3082: int or unsigned int type required;X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,12-61): error X3013: 'get_value': no matching 1 parameter function;compilation failed; no code produced
   min16uints = (min16uints << min10floats); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints << min16ints)>::value, "");
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints << min12ints)>::value, "");
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints << min16uints)>::value, "");
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints << min16ints)>::value, "");    /* expected-warning {{min16uint is promoted to uint}} */
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints << min12ints)>::value, "");    /* expected-warning {{min16uint is promoted to uint}} */
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints << min16uints)>::value, "");    /* expected-warning {{min16uint is promoted to uint}} */
   _Static_assert(std::is_same<int, __decltype(bools >> bools)>::value, "");
   _Static_assert(std::is_same<int, __decltype(bools >> ints)>::value, "");
   _Static_assert(std::is_same<int, __decltype(bools >> uints)>::value, "");
@@ -1678,9 +1678,9 @@ float4 plain(float4 param4 : FOO) : FOO {
   min10floats = (min10floats >> min12ints); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-60): error X3082: int or unsigned int type required;X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,12-61): error X3013: 'get_value': no matching 1 parameter function;compilation failed; no code produced
   min10floats = (min10floats >> min16uints); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
-  _Static_assert(std::is_same<min16int, __decltype(min16ints >> bools)>::value, "");
-  _Static_assert(std::is_same<min16int, __decltype(min16ints >> ints)>::value, "");
-  _Static_assert(std::is_same<min16int, __decltype(min16ints >> uints)>::value, "");
+  _Static_assert(std::is_same<min16int, __decltype(min16ints >> bools)>::value, "");    /* expected-warning {{min16int is promoted to int}} */
+  _Static_assert(std::is_same<min16int, __decltype(min16ints >> ints)>::value, "");    /* expected-warning {{min16int is promoted to int}} */
+  _Static_assert(std::is_same<min16int, __decltype(min16ints >> uints)>::value, "");    /* expected-warning {{min16int is promoted to int}} */
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-53): error X3082: int or unsigned int type required;X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,12-54): error X3013: 'get_value': no matching 1 parameter function;compilation failed; no code produced
   min16ints = (min16ints >> halfs); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-54): error X3082: int or unsigned int type required;X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,12-55): error X3013: 'get_value': no matching 1 parameter function;compilation failed; no code produced
@@ -1691,9 +1691,9 @@ float4 plain(float4 param4 : FOO) : FOO {
   min16ints = (min16ints >> min16floats); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-59): error X3082: int or unsigned int type required;X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,12-60): error X3013: 'get_value': no matching 1 parameter function;compilation failed; no code produced
   min16ints = (min16ints >> min10floats); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
-  _Static_assert(std::is_same<min16int, __decltype(min16ints >> min16ints)>::value, "");
-  _Static_assert(std::is_same<min16int, __decltype(min16ints >> min12ints)>::value, "");
-  _Static_assert(std::is_same<min16int, __decltype(min16ints >> min16uints)>::value, "");
+  _Static_assert(std::is_same<min16int, __decltype(min16ints >> min16ints)>::value, "");    /* expected-warning {{min16int is promoted to int}} */
+  _Static_assert(std::is_same<min16int, __decltype(min16ints >> min12ints)>::value, "");    /* expected-warning {{min16int is promoted to int}} */
+  _Static_assert(std::is_same<min16int, __decltype(min16ints >> min16uints)>::value, "");    /* expected-warning {{min16int is promoted to int}} */
   _Static_assert(std::is_same<min12int, __decltype(min12ints >> bools)>::value, "");  // expected-warning {{min12int is promoted to min16int}}
   _Static_assert(std::is_same<min12int, __decltype(min12ints >> ints)>::value, "");  // expected-warning {{min12int is promoted to min16int}}
   _Static_assert(std::is_same<min12int, __decltype(min12ints >> uints)>::value, "");  // expected-warning {{min12int is promoted to min16int}}
@@ -1710,9 +1710,9 @@ float4 plain(float4 param4 : FOO) : FOO {
   _Static_assert(std::is_same<min12int, __decltype(min12ints >> min16ints)>::value, "");  // expected-warning {{min12int is promoted to min16int}}
   _Static_assert(std::is_same<min12int, __decltype(min12ints >> min12ints)>::value, "");  // expected-warning {{min12int is promoted to min16int}}
   _Static_assert(std::is_same<min12int, __decltype(min12ints >> min16uints)>::value, "");  // expected-warning {{min12int is promoted to min16int}}
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints >> bools)>::value, "");
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints >> ints)>::value, "");
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints >> uints)>::value, "");
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints >> bools)>::value, "");     /* expected-warning {{min16uint is promoted to uint}} */
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints >> ints)>::value, "");      /* expected-warning {{min16uint is promoted to uint}} */
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints >> uints)>::value, "");     /* expected-warning {{min16uint is promoted to uint}} */
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-54): error X3082: int or unsigned int type required;X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,12-55): error X3013: 'get_value': no matching 1 parameter function;compilation failed; no code produced
   min16uints = (min16uints >> halfs); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-55): error X3082: int or unsigned int type required;X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,12-56): error X3013: 'get_value': no matching 1 parameter function;compilation failed; no code produced
@@ -1723,9 +1723,9 @@ float4 plain(float4 param4 : FOO) : FOO {
   min16uints = (min16uints >> min16floats); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-60): error X3082: int or unsigned int type required;X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,12-61): error X3013: 'get_value': no matching 1 parameter function;compilation failed; no code produced
   min16uints = (min16uints >> min10floats); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints >> min16ints)>::value, "");
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints >> min12ints)>::value, "");
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints >> min16uints)>::value, "");
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints >> min16ints)>::value, "");    /* expected-warning {{min16uint is promoted to uint}} */
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints >> min12ints)>::value, "");    /* expected-warning {{min16uint is promoted to uint}} */
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints >> min16uints)>::value, "");    /* expected-warning {{min16uint is promoted to uint}} */
   _Static_assert(std::is_same<int, __decltype(bools & bools)>::value, "");
   _Static_assert(std::is_same<int, __decltype(bools & ints)>::value, "");
   _Static_assert(std::is_same<uint, __decltype(bools & uints)>::value, "");
@@ -1884,7 +1884,7 @@ float4 plain(float4 param4 : FOO) : FOO {
   min10floats = (min10floats & min12ints); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-59): error X3082: int or unsigned int type required;X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,12-60): error X3013: 'get_value': no matching 1 parameter function;compilation failed; no code produced
   min10floats = (min10floats & min16uints); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
-  _Static_assert(std::is_same<min16int, __decltype(min16ints & bools)>::value, "");
+  _Static_assert(std::is_same<min16int, __decltype(min16ints & bools)>::value, "");    /* expected-warning {{min16int is promoted to int}} */
   _Static_assert(std::is_same<int, __decltype(min16ints & ints)>::value, "");
   _Static_assert(std::is_same<uint, __decltype(min16ints & uints)>::value, "");
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-52): error X3082: int or unsigned int type required;X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,12-53): error X3013: 'get_value': no matching 1 parameter function;compilation failed; no code produced
@@ -1897,9 +1897,9 @@ float4 plain(float4 param4 : FOO) : FOO {
   min16ints = (min16ints & min16floats); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-58): error X3082: int or unsigned int type required;X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,12-59): error X3013: 'get_value': no matching 1 parameter function;compilation failed; no code produced
   min16ints = (min16ints & min10floats); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
-  _Static_assert(std::is_same<min16int, __decltype(min16ints & min16ints)>::value, "");
-  _Static_assert(std::is_same<min16int, __decltype(min16ints & min12ints)>::value, "");
-  _Static_assert(std::is_same<min16uint, __decltype(min16ints & min16uints)>::value, "");
+  _Static_assert(std::is_same<min16int, __decltype(min16ints & min16ints)>::value, "");    /* expected-warning {{min16int is promoted to int}} */
+  _Static_assert(std::is_same<min16int, __decltype(min16ints & min12ints)>::value, "");    /* expected-warning {{min16int is promoted to int}} */
+  _Static_assert(std::is_same<min16uint, __decltype(min16ints & min16uints)>::value, "");    /* expected-warning {{min16uint is promoted to uint}} */
   _Static_assert(std::is_same<min12int, __decltype(min12ints & bools)>::value, "");  // expected-warning {{min12int is promoted to min16int}}
   _Static_assert(std::is_same<int, __decltype(min12ints & ints)>::value, "");
   _Static_assert(std::is_same<uint, __decltype(min12ints & uints)>::value, "");
@@ -1913,10 +1913,10 @@ float4 plain(float4 param4 : FOO) : FOO {
   min12ints = (min12ints & min16floats); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-58): error X3082: int or unsigned int type required;X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,12-59): error X3013: 'get_value': no matching 1 parameter function;compilation failed; no code produced
   min12ints = (min12ints & min10floats); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
-  _Static_assert(std::is_same<min16int, __decltype(min12ints & min16ints)>::value, "");
+  _Static_assert(std::is_same<min16int, __decltype(min12ints & min16ints)>::value, "");    /* expected-warning {{min16int is promoted to int}} */
   _Static_assert(std::is_same<min12int, __decltype(min12ints & min12ints)>::value, "");  // expected-warning {{min12int is promoted to min16int}}
-  _Static_assert(std::is_same<min16uint, __decltype(min12ints & min16uints)>::value, "");
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints & bools)>::value, "");
+  _Static_assert(std::is_same<min16uint, __decltype(min12ints & min16uints)>::value, "");    /* expected-warning {{min16uint is promoted to uint}} */
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints & bools)>::value, "");    /* expected-warning {{min16uint is promoted to uint}} */
   _Static_assert(std::is_same<uint, __decltype(min16uints & ints)>::value, "");
   _Static_assert(std::is_same<uint, __decltype(min16uints & uints)>::value, "");
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-53): error X3082: int or unsigned int type required;X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,12-54): error X3013: 'get_value': no matching 1 parameter function;compilation failed; no code produced
@@ -1929,9 +1929,9 @@ float4 plain(float4 param4 : FOO) : FOO {
   min16uints = (min16uints & min16floats); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-59): error X3082: int or unsigned int type required;X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,12-60): error X3013: 'get_value': no matching 1 parameter function;compilation failed; no code produced
   min16uints = (min16uints & min10floats); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints & min16ints)>::value, "");
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints & min12ints)>::value, "");
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints & min16uints)>::value, "");
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints & min16ints)>::value, "");    /* expected-warning {{min16uint is promoted to uint}} */
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints & min12ints)>::value, "");    /* expected-warning {{min16uint is promoted to uint}} */
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints & min16uints)>::value, "");    /* expected-warning {{min16uint is promoted to uint}} */
   _Static_assert(std::is_same<int, __decltype(bools | bools)>::value, "");
   _Static_assert(std::is_same<int, __decltype(bools | ints)>::value, "");
   _Static_assert(std::is_same<uint, __decltype(bools | uints)>::value, "");
@@ -2090,7 +2090,7 @@ float4 plain(float4 param4 : FOO) : FOO {
   min10floats = (min10floats | min12ints); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-59): error X3082: int or unsigned int type required;X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,12-60): error X3013: 'get_value': no matching 1 parameter function;compilation failed; no code produced
   min10floats = (min10floats | min16uints); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
-  _Static_assert(std::is_same<min16int, __decltype(min16ints | bools)>::value, "");
+  _Static_assert(std::is_same<min16int, __decltype(min16ints | bools)>::value, "");    /* expected-warning {{min16int is promoted to int}} */
   _Static_assert(std::is_same<int, __decltype(min16ints | ints)>::value, "");
   _Static_assert(std::is_same<uint, __decltype(min16ints | uints)>::value, "");
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-52): error X3082: int or unsigned int type required;X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,12-53): error X3013: 'get_value': no matching 1 parameter function;compilation failed; no code produced
@@ -2103,9 +2103,9 @@ float4 plain(float4 param4 : FOO) : FOO {
   min16ints = (min16ints | min16floats); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-58): error X3082: int or unsigned int type required;X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,12-59): error X3013: 'get_value': no matching 1 parameter function;compilation failed; no code produced
   min16ints = (min16ints | min10floats); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
-  _Static_assert(std::is_same<min16int, __decltype(min16ints | min16ints)>::value, "");
-  _Static_assert(std::is_same<min16int, __decltype(min16ints | min12ints)>::value, "");
-  _Static_assert(std::is_same<min16uint, __decltype(min16ints | min16uints)>::value, "");
+  _Static_assert(std::is_same<min16int, __decltype(min16ints | min16ints)>::value, "");    /* expected-warning {{min16int is promoted to int}} */
+  _Static_assert(std::is_same<min16int, __decltype(min16ints | min12ints)>::value, "");    /* expected-warning {{min16int is promoted to int}} */
+  _Static_assert(std::is_same<min16uint, __decltype(min16ints | min16uints)>::value, "");    /* expected-warning {{min16uint is promoted to uint}} */
   _Static_assert(std::is_same<min12int, __decltype(min12ints | bools)>::value, "");  // expected-warning {{min12int is promoted to min16int}}
   _Static_assert(std::is_same<int, __decltype(min12ints | ints)>::value, "");
   _Static_assert(std::is_same<uint, __decltype(min12ints | uints)>::value, "");
@@ -2119,10 +2119,10 @@ float4 plain(float4 param4 : FOO) : FOO {
   min12ints = (min12ints | min16floats); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-58): error X3082: int or unsigned int type required;X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,12-59): error X3013: 'get_value': no matching 1 parameter function;compilation failed; no code produced
   min12ints = (min12ints | min10floats); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
-  _Static_assert(std::is_same<min16int, __decltype(min12ints | min16ints)>::value, "");
+  _Static_assert(std::is_same<min16int, __decltype(min12ints | min16ints)>::value, "");    /* expected-warning {{min16int is promoted to int}} */
   _Static_assert(std::is_same<min12int, __decltype(min12ints | min12ints)>::value, "");  // expected-warning {{min12int is promoted to min16int}}
-  _Static_assert(std::is_same<min16uint, __decltype(min12ints | min16uints)>::value, "");
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints | bools)>::value, "");
+  _Static_assert(std::is_same<min16uint, __decltype(min12ints | min16uints)>::value, "");    /* expected-warning {{min16uint is promoted to uint}} */
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints | bools)>::value, "");    /* expected-warning {{min16uint is promoted to uint}} */
   _Static_assert(std::is_same<uint, __decltype(min16uints | ints)>::value, "");
   _Static_assert(std::is_same<uint, __decltype(min16uints | uints)>::value, "");
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-53): error X3082: int or unsigned int type required;X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,12-54): error X3013: 'get_value': no matching 1 parameter function;compilation failed; no code produced
@@ -2135,9 +2135,9 @@ float4 plain(float4 param4 : FOO) : FOO {
   min16uints = (min16uints | min16floats); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-59): error X3082: int or unsigned int type required;X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,12-60): error X3013: 'get_value': no matching 1 parameter function;compilation failed; no code produced
   min16uints = (min16uints | min10floats); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints | min16ints)>::value, "");
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints | min12ints)>::value, "");
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints | min16uints)>::value, "");
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints | min16ints)>::value, "");    /* expected-warning {{min16uint is promoted to uint}} */
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints | min12ints)>::value, "");    /* expected-warning {{min16uint is promoted to uint}} */
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints | min16uints)>::value, "");    /* expected-warning {{min16uint is promoted to uint}} */
   _Static_assert(std::is_same<int, __decltype(bools ^ bools)>::value, "");
   _Static_assert(std::is_same<int, __decltype(bools ^ ints)>::value, "");
   _Static_assert(std::is_same<uint, __decltype(bools ^ uints)>::value, "");
@@ -2296,7 +2296,7 @@ float4 plain(float4 param4 : FOO) : FOO {
   min10floats = (min10floats ^ min12ints); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-59): error X3082: int or unsigned int type required;X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,12-60): error X3013: 'get_value': no matching 1 parameter function;compilation failed; no code produced
   min10floats = (min10floats ^ min16uints); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
-  _Static_assert(std::is_same<min16int, __decltype(min16ints ^ bools)>::value, "");
+  _Static_assert(std::is_same<min16int, __decltype(min16ints ^ bools)>::value, "");    /* expected-warning {{min16int is promoted to int}} */
   _Static_assert(std::is_same<int, __decltype(min16ints ^ ints)>::value, "");
   _Static_assert(std::is_same<uint, __decltype(min16ints ^ uints)>::value, "");
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-52): error X3082: int or unsigned int type required;X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,12-53): error X3013: 'get_value': no matching 1 parameter function;compilation failed; no code produced
@@ -2309,9 +2309,9 @@ float4 plain(float4 param4 : FOO) : FOO {
   min16ints = (min16ints ^ min16floats); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-58): error X3082: int or unsigned int type required;X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,12-59): error X3013: 'get_value': no matching 1 parameter function;compilation failed; no code produced
   min16ints = (min16ints ^ min10floats); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
-  _Static_assert(std::is_same<min16int, __decltype(min16ints ^ min16ints)>::value, "");
-  _Static_assert(std::is_same<min16int, __decltype(min16ints ^ min12ints)>::value, "");
-  _Static_assert(std::is_same<min16uint, __decltype(min16ints ^ min16uints)>::value, "");
+  _Static_assert(std::is_same<min16int, __decltype(min16ints ^ min16ints)>::value, "");    /* expected-warning {{min16int is promoted to int}} */
+  _Static_assert(std::is_same<min16int, __decltype(min16ints ^ min12ints)>::value, "");    /* expected-warning {{min16int is promoted to int}} */
+  _Static_assert(std::is_same<min16uint, __decltype(min16ints ^ min16uints)>::value, "");    /* expected-warning {{min16uint is promoted to uint}} */
   _Static_assert(std::is_same<min12int, __decltype(min12ints ^ bools)>::value, "");  // expected-warning {{min12int is promoted to min16int}}
   _Static_assert(std::is_same<int, __decltype(min12ints ^ ints)>::value, "");
   _Static_assert(std::is_same<uint, __decltype(min12ints ^ uints)>::value, "");
@@ -2325,10 +2325,10 @@ float4 plain(float4 param4 : FOO) : FOO {
   min12ints = (min12ints ^ min16floats); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-58): error X3082: int or unsigned int type required;X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,12-59): error X3013: 'get_value': no matching 1 parameter function;compilation failed; no code produced
   min12ints = (min12ints ^ min10floats); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
-  _Static_assert(std::is_same<min16int, __decltype(min12ints ^ min16ints)>::value, "");
+  _Static_assert(std::is_same<min16int, __decltype(min12ints ^ min16ints)>::value, "");    /* expected-warning {{min16int is promoted to int}} */
   _Static_assert(std::is_same<min12int, __decltype(min12ints ^ min12ints)>::value, "");  // expected-warning {{min12int is promoted to min16int}}
-  _Static_assert(std::is_same<min16uint, __decltype(min12ints ^ min16uints)>::value, "");
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints ^ bools)>::value, "");
+  _Static_assert(std::is_same<min16uint, __decltype(min12ints ^ min16uints)>::value, "");    /* expected-warning {{min16uint is promoted to uint}} */
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints ^ bools)>::value, "");    /* expected-warning {{min16uint is promoted to uint}} */
   _Static_assert(std::is_same<uint, __decltype(min16uints ^ ints)>::value, "");
   _Static_assert(std::is_same<uint, __decltype(min16uints ^ uints)>::value, "");
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-53): error X3082: int or unsigned int type required;X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,12-54): error X3013: 'get_value': no matching 1 parameter function;compilation failed; no code produced
@@ -2341,9 +2341,9 @@ float4 plain(float4 param4 : FOO) : FOO {
   min16uints = (min16uints ^ min16floats); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
   // X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,22-59): error X3082: int or unsigned int type required;X:\temp\Sfbl_grfx_dev_p\x86\chk\operators.js.hlsl(16,12-60): error X3013: 'get_value': no matching 1 parameter function;compilation failed; no code produced
   min16uints = (min16uints ^ min10floats); // expected-error {{int or unsigned int type required}} fxc-error {{X3082: int or unsigned int type required}}
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints ^ min16ints)>::value, "");
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints ^ min12ints)>::value, "");
-  _Static_assert(std::is_same<min16uint, __decltype(min16uints ^ min16uints)>::value, "");
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints ^ min16ints)>::value, "");    /* expected-warning {{min16uint is promoted to uint}} */
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints ^ min12ints)>::value, "");    /* expected-warning {{min16uint is promoted to uint}} */
+  _Static_assert(std::is_same<min16uint, __decltype(min16uints ^ min16uints)>::value, "");    /* expected-warning {{min16uint is promoted to uint}} */
   _Static_assert(std::is_same<bool, __decltype(bools && bools)>::value, "");
   _Static_assert(std::is_same<bool, __decltype(bools && ints)>::value, "");
   _Static_assert(std::is_same<bool, __decltype(bools && uints)>::value, "");

+ 2 - 2
tools/clang/test/HLSL/vector-syntax-exact-precision.hlsl

@@ -90,10 +90,10 @@ void vector_out_of_bounds() {
 
 void vector_unsigned() {
    unsigned int4 intvector;
-   unsigned min16int4 min16vector;
+   unsigned min16int4 min16vector;                          /* expected-warning {{min16int is promoted to int}} */
    unsigned int64_t3 int64vector;
    unsigned uint3 uintvector;
-   unsigned min16uint4 min16uintvector;
+   unsigned min16uint4 min16uintvector;                     /* expected-warning {{min16uint is promoted to uint}} */
    unsigned uint64_t2 int64uintvector;
    unsigned dword3 dwordvector; /* fxc-error {{X3000: unrecognized identifier 'dword3'}} */
 

+ 3 - 1
tools/clang/tools/dxcompiler/dxcdisassembler.cpp

@@ -964,7 +964,9 @@ static const char *OpCodeSignatures[] = {
   "(value)",  // WaveAllBitCount
   "(value)",  // WavePrefixBitCount
   "(inputSigId,inputRowIndex,inputColIndex,VertexID)",  // AttributeAtVertex
-  "()"  // ViewID
+  "()",  // ViewID
+  "(srv,index,elementOffset,mask,alignment)",  // RawBufferLoad
+  "(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment)"  // RawBufferStore
 };
 // OPCODE-SIGS:END
 

+ 60 - 1
tools/clang/unittests/HLSL/CompilerTest.cpp

@@ -701,6 +701,10 @@ public:
   TEST_METHOD(CodeGenPreserveAllOutputs)
   TEST_METHOD(CodeGenRaceCond2)
   TEST_METHOD(CodeGenRaw_Buf1)
+  TEST_METHOD(CodeGenRaw_Buf2)
+  TEST_METHOD(CodeGenRaw_Buf3)
+  TEST_METHOD(CodeGenRaw_Buf4)
+  TEST_METHOD(CodeGenRaw_Buf5)
   TEST_METHOD(CodeGenRcp1)
   TEST_METHOD(CodeGenReadFromOutput)
   TEST_METHOD(CodeGenReadFromOutput2)
@@ -781,6 +785,11 @@ public:
   TEST_METHOD(CodeGenStaticResource)
   TEST_METHOD(CodeGenStaticResource2)
   TEST_METHOD(CodeGenStruct_Buf1)
+  TEST_METHOD(CodeGenStruct_Buf2)
+  TEST_METHOD(CodeGenStruct_Buf3)
+  TEST_METHOD(CodeGenStruct_Buf4)
+  TEST_METHOD(CodeGenStruct_Buf5)
+  TEST_METHOD(CodeGenStruct_Buf6)
   TEST_METHOD(CodeGenStruct_Buf_New_Layout)
   TEST_METHOD(CodeGenStruct_BufHasCounter)
   TEST_METHOD(CodeGenStruct_BufHasCounter2)
@@ -806,6 +815,7 @@ public:
   TEST_METHOD(CodeGenUav_Raw1)
   TEST_METHOD(CodeGenUav_Typed_Load_Store1)
   TEST_METHOD(CodeGenUav_Typed_Load_Store2)
+  TEST_METHOD(CodeGenUav_Typed_Load_Store3)
   TEST_METHOD(CodeGenUint64_1)
   TEST_METHOD(CodeGenUint64_2)
   TEST_METHOD(CodeGenUintSample)
@@ -3954,6 +3964,25 @@ TEST_F(CompilerTest, CodeGenRaw_Buf1) {
   CodeGenTest(L"..\\CodeGenHLSL\\raw_buf1.hlsl");
 }
 
+TEST_F(CompilerTest, CodeGenRaw_Buf2) {
+  if (m_ver.SkipDxilVersion(1, 2)) return;
+  CodeGenTestCheck(L"..\\CodeGenHLSL\\raw_buf2.hlsl");
+}
+
+TEST_F(CompilerTest, CodeGenRaw_Buf3) {
+  CodeGenTestCheck(L"..\\CodeGenHLSL\\raw_buf3.hlsl");
+}
+
+TEST_F(CompilerTest, CodeGenRaw_Buf4) {
+  if (m_ver.SkipDxilVersion(1, 2)) return;
+  CodeGenTestCheck(L"..\\CodeGenHLSL\\raw_buf4.hlsl");
+}
+
+TEST_F(CompilerTest, CodeGenRaw_Buf5) {
+  if (m_ver.SkipDxilVersion(1, 2)) return;
+  CodeGenTestCheck(L"..\\CodeGenHLSL\\raw_buf5.hlsl");
+}
+
 TEST_F(CompilerTest, CodeGenRcp1) {
   CodeGenTest(L"..\\CodeGenHLSL\\rcp1.hlsl");
 }
@@ -4272,7 +4301,32 @@ TEST_F(CompilerTest, CodeGenStaticResource2) {
 }
 
 TEST_F(CompilerTest, CodeGenStruct_Buf1) {
-  CodeGenTest(L"..\\CodeGenHLSL\\struct_buf1.hlsl");
+  CodeGenTestCheck(L"..\\CodeGenHLSL\\struct_buf1.hlsl");
+}
+
+TEST_F(CompilerTest, CodeGenStruct_Buf2) {
+  if (m_ver.SkipDxilVersion(1, 2)) return;
+  CodeGenTestCheck(L"..\\CodeGenHLSL\\struct_buf2.hlsl");
+}
+
+TEST_F(CompilerTest, CodeGenStruct_Buf3) {
+  if (m_ver.SkipDxilVersion(1, 2)) return;
+  CodeGenTestCheck(L"..\\CodeGenHLSL\\struct_buf3.hlsl");
+}
+
+TEST_F(CompilerTest, CodeGenStruct_Buf4) {
+  if (m_ver.SkipDxilVersion(1, 2)) return;
+  CodeGenTestCheck(L"..\\CodeGenHLSL\\struct_buf4.hlsl");
+}
+
+TEST_F(CompilerTest, CodeGenStruct_Buf5) {
+  if (m_ver.SkipDxilVersion(1, 2)) return;
+  CodeGenTestCheck(L"..\\CodeGenHLSL\\struct_buf5.hlsl");
+}
+
+TEST_F(CompilerTest, CodeGenStruct_Buf6) {
+  if (m_ver.SkipDxilVersion(1, 2)) return;
+  CodeGenTestCheck(L"..\\CodeGenHLSL\\struct_buf6.hlsl");
 }
 
 TEST_F(CompilerTest, CodeGenStruct_Buf_New_Layout) {
@@ -4376,6 +4430,11 @@ TEST_F(CompilerTest, CodeGenUav_Typed_Load_Store2) {
   CodeGenTest(L"..\\CodeGenHLSL\\uav_typed_load_store2.hlsl");
 }
 
+TEST_F(CompilerTest, CodeGenUav_Typed_Load_Store3) {
+  if (m_ver.SkipDxilVersion(1,2)) return;
+  CodeGenTestCheck(L"..\\CodeGenHLSL\\uav_typed_load_store3.hlsl");
+}
+
 TEST_F(CompilerTest, CodeGenUint64_1) {
   CodeGenTestCheck(L"..\\CodeGenHLSL\\uint64_1.hlsl");
 }

+ 50 - 0
utils/hct/gen_intrin_main.txt

@@ -712,10 +712,30 @@ uint [[ro]] Load(in uint byteOffset) : byteaddress_load;
 uint<2> [[ro]] Load2(in uint byteOffset) : byteaddress_load;
 uint<3> [[ro]] Load3(in uint byteOffset) : byteaddress_load;
 uint<4> [[ro]] Load4(in uint byteOffset) : byteaddress_load;
+half [[ro]] LoadHalf(in uint byteOffset) : byteaddress_load;
+half<2> [[ro]] LoadHalf2(in uint byteOffset) : byteaddress_load;
+half<3> [[ro]] LoadHalf3(in uint byteOffset) : byteaddress_load;
+half<4> [[ro]] LoadHalf4(in uint byteOffset) : byteaddress_load;
+float [[ro]] LoadFloat(in uint byteOffset) : byteaddress_load;
+float<2> [[ro]] LoadFloat2(in uint byteOffset) : byteaddress_load;
+float<3> [[ro]] LoadFloat3(in uint byteOffset) : byteaddress_load;
+float<4> [[ro]] LoadFloat4(in uint byteOffset) : byteaddress_load;
+double [[ro]] LoadDouble(in uint byteOffset) : byteaddress_load;
+double<2> [[ro]] LoadDouble2(in uint byteOffset) : byteaddress_load;
 uint [[]] Load(in uint byteOffset, out uint_only status) : byteaddress_load_s;
 uint<2> [[]] Load2(in uint byteOffset, out uint_only status) : byteaddress_load_s;
 uint<3> [[]] Load3(in uint byteOffset, out uint_only status) : byteaddress_load_s;
 uint<4> [[]] Load4(in uint byteOffset, out uint_only status) : byteaddress_load_s;
+half [[]] LoadHalf(in uint byteOffset, out uint_only status) : byteaddress_load_s;
+half<2> [[]] LoadHalf2(in uint byteOffset, out uint_only status) : byteaddress_load_s;
+half<3> [[]] LoadHalf3(in uint byteOffset, out uint_only status) : byteaddress_load_s;
+half<4> [[]] LoadHalf4(in uint byteOffset, out uint_only status) : byteaddress_load_s;
+float [[]] LoadFloat(in uint byteOffset, out uint_only status) : byteaddress_load_s;
+float<2> [[]] LoadFloat2(in uint byteOffset, out uint_only status) : byteaddress_load_s;
+float<3> [[]] LoadFloat3(in uint byteOffset, out uint_only status) : byteaddress_load_s;
+float<4> [[]] LoadFloat4(in uint byteOffset, out uint_only status) : byteaddress_load_s;
+double [[]] LoadDouble(in uint byteOffset, out uint_only status) : byteaddress_load_s;
+double<2> [[]] LoadDouble2(in uint byteOffset, out uint_only status) : byteaddress_load_s;
 
 } namespace
 
@@ -726,14 +746,44 @@ uint [[ro]] Load(in uint byteOffset) : byteaddress_load;
 uint<2> [[ro]] Load2(in uint byteOffset) : byteaddress_load;
 uint<3> [[ro]] Load3(in uint byteOffset) : byteaddress_load;
 uint<4> [[ro]] Load4(in uint byteOffset) : byteaddress_load;
+half [[ro]] LoadHalf(in uint byteOffset) : byteaddress_load;
+half<2> [[ro]] LoadHalf2(in uint byteOffset) : byteaddress_load;
+half<3> [[ro]] LoadHalf3(in uint byteOffset) : byteaddress_load;
+half<4> [[ro]] LoadHalf4(in uint byteOffset) : byteaddress_load;
+float [[ro]] LoadFloat(in uint byteOffset) : byteaddress_load;
+float<2> [[ro]] LoadFloat2(in uint byteOffset) : byteaddress_load;
+float<3> [[ro]] LoadFloat3(in uint byteOffset) : byteaddress_load;
+float<4> [[ro]] LoadFloat4(in uint byteOffset) : byteaddress_load;
+double [[ro]] LoadDouble(in uint byteOffset) : byteaddress_load;
+double<2> [[ro]] LoadDouble2(in uint byteOffset) : byteaddress_load;
 uint [[]] Load(in uint byteOffset, out uint_only status) : byteaddress_load_s;
 uint<2> [[]] Load2(in uint byteOffset, out uint_only status) : byteaddress_load_s;
 uint<3> [[]] Load3(in uint byteOffset, out uint_only status) : byteaddress_load_s;
 uint<4> [[]] Load4(in uint byteOffset, out uint_only status) : byteaddress_load_s;
+half [[]] LoadHalf(in uint byteOffset, out uint_only status) : byteaddress_load_s;
+half<2> [[]] LoadHalf2(in uint byteOffset, out uint_only status) : byteaddress_load_s;
+half<3> [[]] LoadHalf3(in uint byteOffset, out uint_only status) : byteaddress_load_s;
+half<4> [[]] LoadHalf4(in uint byteOffset, out uint_only status) : byteaddress_load_s;
+float [[]] LoadFloat(in uint byteOffset, out uint_only status) : byteaddress_load_s;
+float<2> [[]] LoadFloat2(in uint byteOffset, out uint_only status) : byteaddress_load_s;
+float<3> [[]] LoadFloat3(in uint byteOffset, out uint_only status) : byteaddress_load_s;
+float<4> [[]] LoadFloat4(in uint byteOffset, out uint_only status) : byteaddress_load_s;
+double [[]] LoadDouble(in uint byteOffset, out uint_only status) : byteaddress_load_s;
+double<2> [[]] LoadDouble2(in uint byteOffset, out uint_only status) : byteaddress_load_s;
 void [[]] Store(in uint byteOffset, in uint value) : byteaddress_store;
 void [[]] Store2(in uint byteOffset, in uint<2> value) : byteaddress_store;
 void [[]] Store3(in uint byteOffset, in uint<3> value) : byteaddress_store;
 void [[]] Store4(in uint byteOffset, in uint<4> value) : byteaddress_store;
+void [[]] StoreHalf(in uint byteOffset, in half value) : byteaddress_store;
+void [[]] StoreHalf2(in uint byteOffset, in half<2> value) : byteaddress_store;
+void [[]] StoreHalf3(in uint byteOffset, in half<3> value) : byteaddress_store;
+void [[]] StoreHalf4(in uint byteOffset, in half<4> value) : byteaddress_store;
+void [[]] StoreFloat(in uint byteOffset, in float value) : byteaddress_store;
+void [[]] StoreFloat2(in uint byteOffset, in float<2> value) : byteaddress_store;
+void [[]] StoreFloat3(in uint byteOffset, in float<3> value) : byteaddress_store;
+void [[]] StoreFloat4(in uint byteOffset, in float<4> value) : byteaddress_store;
+void [[]] StoreDouble(in uint byteOffset, in double value) : byteaddress_store;
+void [[]] StoreDouble2(in uint byteOffset, in double<2> value) : byteaddress_store;
 void [[]] InterlockedAdd(in uint byteOffset, in uint value);
 void [[]] InterlockedAdd(in uint byteOffset, in uint value, out uint original) : interlockedadd_immediate;
 void [[unsigned_op=InterlockedUMin,overload=1]] InterlockedMin(in uint byteOffset, in any_int32 value) : interlockedmin;

+ 30 - 3
utils/hct/hctdb.py

@@ -1,4 +1,3 @@
-# Copyright (C) Microsoft Corporation. All rights reserved.
 # This file is distributed under the University of Illinois Open Source License. See LICENSE.TXT for details.
 ###############################################################################
 # DXIL information.                                                           #
@@ -239,7 +238,7 @@ class db_dxil(object):
             self.name_idx[i].category = "Quaternary"
         for i in "Dot2,Dot3,Dot4".split(","):
             self.name_idx[i].category = "Dot"
-        for i in "CreateHandle,CBufferLoad,CBufferLoadLegacy,TextureLoad,TextureStore,BufferLoad,BufferStore,BufferUpdateCounter,CheckAccessFullyMapped,GetDimensions".split(","):
+        for i in "CreateHandle,CBufferLoad,CBufferLoadLegacy,TextureLoad,TextureStore,BufferLoad,BufferStore,BufferUpdateCounter,CheckAccessFullyMapped,GetDimensions,RawBufferLoad,RawBufferStore".split(","):
             self.name_idx[i].category = "Resources"
         for i in "Sample,SampleBias,SampleLevel,SampleGrad,SampleCmp,SampleCmpLevelZero,Texture2DMSGetSamplePosition,RenderTargetGetSamplePosition,RenderTargetGetSampleCount".split(","):
             self.name_idx[i].category = "Resources - sample"
@@ -284,6 +283,8 @@ class db_dxil(object):
                 i.category = "Bitcasts with different sizes"
         for i in "ViewID,AttributeAtVertex".split(","):
             self.name_idx[i].shader_model = 6,1
+        for i in "RawBufferLoad,RawBufferStore".split(","):
+            self.name_idx[i].shader_model = 6,2
 
     def populate_llvm_instructions(self):
         # Add instructions that map to LLVM instructions.
@@ -1072,7 +1073,31 @@ class db_dxil(object):
         # End of DXIL 1.1 opcodes.
         self.set_op_count_for_version(1, 1, next_op_idx)
 
-        assert next_op_idx == 139, "next operation index is %d rather than 139 and thus opcodes are broken" % next_op_idx
+        self.add_dxil_op("RawBufferLoad", next_op_idx, "RawBufferLoad", "reads from a raw buffer and structured buffer", "hfwi", "ro", [
+            db_dxil_param(0, "$r", "", "the loaded value"),
+            db_dxil_param(2, "res", "srv", "handle of TypedBuffer SRV to sample"),
+            db_dxil_param(3, "i32", "index", "element index for StructuredBuffer, or byte offset for ByteAddressBuffer"),
+            db_dxil_param(4, "i32", "elementOffset", "offset into element for StructuredBuffer, or undef for ByteAddressBuffer"),
+            db_dxil_param(5, "i8", "mask", "loading value mask", is_const=True),
+            db_dxil_param(6, "i32", "alignment", "relative load access alignment", is_const=True)])
+        next_op_idx += 1
+
+        self.add_dxil_op("RawBufferStore", next_op_idx, "RawBufferStore", "writes to a RWByteAddressBuffer or RWStructuredBuffer", "hfwi", "", [
+            db_dxil_param(0, "v", "", ""),
+            db_dxil_param(2, "res", "uav", "handle of UAV to store to"),
+            db_dxil_param(3, "i32", "index", "element index for StructuredBuffer, or byte offset for ByteAddressBuffer"),
+            db_dxil_param(4, "i32", "elementOffset", "offset into element for StructuredBuffer, or undef for ByteAddressBuffer"),
+            db_dxil_param(5, "$o", "value0", "value"),
+            db_dxil_param(6, "$o", "value1", "value"),
+            db_dxil_param(7, "$o", "value2", "value"),
+            db_dxil_param(8, "$o", "value3", "value"),
+            db_dxil_param(9, "i8", "mask", "mask of contiguous components stored starting at first component (valid: 1, 3, 7, 15)", is_const=True),
+            db_dxil_param(10, "i32", "alignment", "relative store access alignment", is_const=True)])
+        next_op_idx += 1
+
+        # End of DXIL 1.2 opcodes.
+        self.set_op_count_for_version(1, 2, next_op_idx)
+        assert next_op_idx == 141, "next operation index is %d rather than 141 and thus opcodes are broken" % next_op_idx
 
         # Set interesting properties.
         self.build_indices()
@@ -1368,6 +1393,7 @@ class db_dxil(object):
             {'n':'lowerbitsets-avoid-reuse', 'i':'AvoidReuse', 't':'bool', 'd':'Try to avoid reuse of byte array addresses using aliases'}])
         add_pass('red', 'ReducibilityAnalysis', 'Reducibility Analysis', [])
         add_pass('viewid-state', 'ComputeViewIdState', 'Compute information related to ViewID', [])
+        add_pass('hlsl-translate-dxil-opcode-version', 'DxilTranslateRawBuffer', 'Translates one version of dxil to another', [])
         # TODO: turn STATISTICS macros into ETW events
         # assert no duplicate names
         self.pass_idx_args = set()
@@ -1899,6 +1925,7 @@ class db_hlsl(object):
             "any_int": "LICOMPTYPE_ANY_INT",
             "any_int32": "LICOMPTYPE_ANY_INT32",
             "uint_only": "LICOMPTYPE_UINT_ONLY",
+            "half": "LICOMPTYPE_HALF",
             "float": "LICOMPTYPE_FLOAT",
             "fldbl": "LICOMPTYPE_FLOAT_DOUBLE",
             "any_float": "LICOMPTYPE_ANY_FLOAT",