瀏覽代碼

Template argument for byteaddressbuffer load store (#804)

This change removes new Load variations (LoadHalf, LoadFloat, etc) for RWByteAddressBuffer/ByteAddressBuffer methods and add templated Load intrinsics. This templated Load only works on scalar or vector types, and its variations (Load2, Load3, Load4, etc) do not work with templates and work as it did before (only storing uints). For Store operation, you can store any scalar or vector types of up to 16 bytes, while prior to 2018 Store only supported storing uint scalar.
Young Kim 7 年之前
父節點
當前提交
3cad152a90

+ 0 - 20
include/dxc/HlslIntrinsicOp.h

@@ -207,16 +207,6 @@ import hctdb_instrhelp
   MOP_Load2,
   MOP_Load3,
   MOP_Load4,
-  MOP_LoadDouble,
-  MOP_LoadDouble2,
-  MOP_LoadFloat,
-  MOP_LoadFloat2,
-  MOP_LoadFloat3,
-  MOP_LoadFloat4,
-  MOP_LoadHalf,
-  MOP_LoadHalf2,
-  MOP_LoadHalf3,
-  MOP_LoadHalf4,
   MOP_InterlockedAdd,
   MOP_InterlockedAnd,
   MOP_InterlockedCompareExchange,
@@ -230,16 +220,6 @@ import hctdb_instrhelp
   MOP_Store2,
   MOP_Store3,
   MOP_Store4,
-  MOP_StoreDouble,
-  MOP_StoreDouble2,
-  MOP_StoreFloat,
-  MOP_StoreFloat2,
-  MOP_StoreFloat3,
-  MOP_StoreFloat4,
-  MOP_StoreHalf,
-  MOP_StoreHalf2,
-  MOP_StoreHalf3,
-  MOP_StoreHalf4,
   MOP_DecrementCounter,
   MOP_IncrementCounter,
   MOP_Consume,

+ 29 - 4
lib/DxcSupport/HLSLOptions.cpp

@@ -171,6 +171,20 @@ StringRefUtf16::StringRefUtf16(llvm::StringRef value) {
     m_value = Unicode::UTF8ToUTF16StringOrThrow(value.data());
 }
 
+static bool GetTargetVersionFromString(llvm::StringRef ref, unsigned *major, unsigned *minor) {
+  try {
+    *major = (unsigned)std::stoul(std::string(1, ref[ref.size() - 3]));
+    *minor = (unsigned)std::stoul(std::string(1, ref[ref.size() - 1]));
+    return true;
+  }
+  catch (std::invalid_argument &) {
+    return false;
+  }
+  catch (std::out_of_range &) {
+    return false;
+  }
+}
+
 namespace hlsl {
 namespace options {
 
@@ -252,7 +266,7 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
   }
 
   llvm::StringRef ver = Args.getLastArgValue(OPT_hlsl_version);
-  if (ver.empty()) { opts.HLSLVersion = 2016; }   // Default to 2016
+  if (ver.empty()) { opts.HLSLVersion = 2018; }   // Default to latest version
   else {
     try {
       opts.HLSLVersion = std::stoul(std::string(ver));
@@ -327,13 +341,24 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
   }
 
   // Check options only allowed in shader model >= 6.2FPDenormalMode
-  if (opts.TargetProfile.empty() || !opts.TargetProfile.endswith_lower("6_2")) {
+  unsigned Major = 0;
+  unsigned Minor = 0;
+  if (!opts.TargetProfile.empty()) {
+    GetTargetVersionFromString(opts.TargetProfile, &Major, &Minor);
+  }
+
+  if (opts.TargetProfile.empty() || Major < 6 || (Major == 6 && Minor < 2)) {
     if (!opts.FloatDenormalMode.empty()) {
       errors << "denorm option is only allowed for shader model 6.2 and above.";
       return 1;
     }
-    if (opts.Enable16BitTypes) {
-      errors << "enable-16bit-types is only allowed for shader model 6.2 and above.";
+  }
+
+  // /enable-16bit-types only allowed for HLSL 2018 and shader model 6.2
+  if (opts.Enable16BitTypes) {
+    if (opts.TargetProfile.empty() || opts.HLSLVersion < 2018
+      || Major < 6 || (Major == 6 && Minor < 2)) {
+      errors << "enable-16bit-types is only allowed for shader model >= 6.2 and HLSL Language >= 2018.";
       return 1;
     }
   }

+ 4 - 2
lib/HLSL/DxilValidation.cpp

@@ -2605,8 +2605,10 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
           continue;
         }
 
-        bool IsMinPrecisionTy = ValCtx.DL.getTypeStoreSize(FromTy) < 4 ||
-                          ValCtx.DL.getTypeStoreSize(ToTy) < 4;
+        bool IsMinPrecisionTy =
+            (ValCtx.DL.getTypeStoreSize(FromTy) < 4 ||
+             ValCtx.DL.getTypeStoreSize(ToTy) < 4) &&
+            !ValCtx.DxilMod.m_ShaderFlags.GetUseNativeLowPrecision();
         if (IsMinPrecisionTy) {
           ValCtx.EmitInstrError(Cast, ValidationRule::InstrMinPrecisonBitCast);
         }

+ 8 - 43
lib/HLSL/HLOperationLower.cpp

@@ -3067,35 +3067,15 @@ static uint8_t GetRawBufferMaskFromIOP(IntrinsicOp IOP, hlsl::OP *OP) {
   switch (IOP) {
     // one component
     case IntrinsicOp::MOP_Load:
-    case IntrinsicOp::MOP_LoadHalf:
-    case IntrinsicOp::MOP_StoreHalf:
-    case IntrinsicOp::MOP_LoadFloat:
-    case IntrinsicOp::MOP_StoreFloat:
       return DXIL::kCompMask_X;
     // two component
     case IntrinsicOp::MOP_Load2:
-    case IntrinsicOp::MOP_LoadHalf2:
-    case IntrinsicOp::MOP_StoreHalf2:
-    case IntrinsicOp::MOP_LoadFloat2:
-    case IntrinsicOp::MOP_StoreFloat2:
-    case IntrinsicOp::MOP_LoadDouble: // double takes 2 components
-    case IntrinsicOp::MOP_StoreDouble:
       return DXIL::kCompMask_X | DXIL::kCompMask_Y;
     // three component
     case IntrinsicOp::MOP_Load3:
-    case IntrinsicOp::MOP_LoadHalf3:
-    case IntrinsicOp::MOP_StoreHalf3:
-    case IntrinsicOp::MOP_LoadFloat3:
-    case IntrinsicOp::MOP_StoreFloat3:
       return DXIL::kCompMask_X | DXIL::kCompMask_Y | DXIL::kCompMask_Z;
-    // three component
+    // four component
     case IntrinsicOp::MOP_Load4:
-    case IntrinsicOp::MOP_LoadHalf4:
-    case IntrinsicOp::MOP_StoreHalf4:
-    case IntrinsicOp::MOP_LoadFloat4:
-    case IntrinsicOp::MOP_StoreFloat4:
-    case IntrinsicOp::MOP_LoadDouble2: // double2 takes 4 components
-    case IntrinsicOp::MOP_StoreDouble2:
       return DXIL::kCompMask_All;
     default:
       DXASSERT(false, "Invalid Intrinsic for computing load mask.");
@@ -3206,7 +3186,6 @@ void TranslateLoad(ResLoadHelper &helper, HLResource::Kind RK,
       loadArgs.emplace_back(helper.addr); // offset
     }
   }
-
   // offset 0
   if (opcode == OP::OpCode::TextureLoad) {
     if (helper.offset && !isa<llvm::UndefValue>(helper.offset)) {
@@ -3228,7 +3207,13 @@ void TranslateLoad(ResLoadHelper &helper, HLResource::Kind RK,
   if (RK == DxilResource::Kind::RawBuffer) {
     // elementOffset, mask, alignment
     loadArgs.emplace_back(undefI);
-    loadArgs.emplace_back(OP->GetI8Const(GetRawBufferMaskFromIOP(helper.intrinsicOpCode, OP)));
+    Type *rtnTy = helper.retVal->getType();
+    unsigned numComponents = 1;
+    if (VectorType *VTy = dyn_cast<VectorType>(rtnTy)) {
+      rtnTy = VTy->getElementType();
+      numComponents = VTy->getNumElements();
+    }
+    loadArgs.emplace_back(GetRawBufferMaskForETy(rtnTy, numComponents, OP));
     loadArgs.emplace_back(Alignment);
   }
   else if (RK == DxilResource::Kind::TypedBuffer) {
@@ -4464,16 +4449,6 @@ IntrinsicLower gLowerTable[static_cast<unsigned>(IntrinsicOp::Num_Intrinsics)] =
     {IntrinsicOp::MOP_Load2, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::MOP_Load3, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::MOP_Load4, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
-    {IntrinsicOp::MOP_LoadDouble, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
-    {IntrinsicOp::MOP_LoadDouble2, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
-    {IntrinsicOp::MOP_LoadFloat, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
-    {IntrinsicOp::MOP_LoadFloat2, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
-    {IntrinsicOp::MOP_LoadFloat3, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
-    {IntrinsicOp::MOP_LoadFloat4, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
-    {IntrinsicOp::MOP_LoadHalf, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
-    {IntrinsicOp::MOP_LoadHalf2, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
-    {IntrinsicOp::MOP_LoadHalf3, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
-    {IntrinsicOp::MOP_LoadHalf4, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::MOP_InterlockedAdd, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::MOP_InterlockedAnd, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::MOP_InterlockedCompareExchange, TranslateMopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
@@ -4487,16 +4462,6 @@ IntrinsicLower gLowerTable[static_cast<unsigned>(IntrinsicOp::Num_Intrinsics)] =
     {IntrinsicOp::MOP_Store2, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::MOP_Store3, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::MOP_Store4, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
-    {IntrinsicOp::MOP_StoreDouble, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
-    {IntrinsicOp::MOP_StoreDouble2, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
-    {IntrinsicOp::MOP_StoreFloat, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
-    {IntrinsicOp::MOP_StoreFloat2, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
-    {IntrinsicOp::MOP_StoreFloat3, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
-    {IntrinsicOp::MOP_StoreFloat4, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
-    {IntrinsicOp::MOP_StoreHalf, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
-    {IntrinsicOp::MOP_StoreHalf2, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
-    {IntrinsicOp::MOP_StoreHalf3, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
-    {IntrinsicOp::MOP_StoreHalf4, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::MOP_DecrementCounter, GenerateUpdateCounter, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::MOP_IncrementCounter, GenerateUpdateCounter, DXIL::OpCode::NumOpCodes},
     {IntrinsicOp::MOP_Consume, EmptyLower, DXIL::OpCode::NumOpCodes},

+ 6 - 0
tools/clang/include/clang/Basic/DiagnosticSemaKinds.td

@@ -7651,6 +7651,12 @@ def err_hlsl_unsupported_keyword_for_version : Error<
    "%0 is only allowed for HLSL %1 and above.">;
 def err_hlsl_unsupported_keyword_for_min_precision : Error<
    "%0 is only supported with -enable-16bit-types option">;
+def err_hlsl_intrinsic_template_arg_unsupported: Error<
+   "Explicit template arguments on intrinsic %0 are not supported.">;
+def err_hlsl_intrinsic_template_arg_requires_2018: Error<
+   "Explicit template arguments on intrinsic %0 requires HLSL version 2018 or above.">;
+def err_hlsl_intrinsic_template_arg_scalar_vector_16: Error<
+   "Explicit template arguments on intrinsic %0 are limited one to scalar or vector type up to 16 bytes in size.">;
 }
 // HLSL Change Ends
 

+ 26 - 20
tools/clang/lib/AST/ASTContextHLSL.cpp

@@ -185,38 +185,44 @@ static HLSLScalarType FindScalarTypeByName(const char *typeName, const size_t ty
     switch (typeLen) {
     case 7: // int16_t, int32_t
       if (typeName[0] == 'i' && typeName[1] == 'n') {
-        if (typeName[3] == '1') {
-          if (strncmp(typeName, "int16_t", 7))
-            break;
-          return HLSLScalarType_int16;
+        if (!langOptions.UseMinPrecision) {
+          if (typeName[3] == '1') {
+            if (strncmp(typeName, "int16_t", 7))
+              break;
+            return HLSLScalarType_int16;
+          }
         }
-        else if (typeName[3] == '3') {
+        if (typeName[3] == '3') {
           if (strncmp(typeName, "int32_t", 7))
             break;
           return HLSLScalarType_int32;
         }
       }
     case 8: // uint16_t, uint32_t
-      if (typeName[0] == 'u' && typeName[1] == 'i') {
-        if (typeName[4] == '1') {
-          if (strncmp(typeName, "uint16_t", 8))
-            break;
-          return HLSLScalarType_uint16;
-        }
-        else if (typeName[4] == '3') {
-          if (strncmp(typeName, "uint32_t", 8))
-            break;
-          return HLSLScalarType_uint32;
+      if (!langOptions.UseMinPrecision) {
+        if (typeName[0] == 'u' && typeName[1] == 'i') {
+          if (typeName[4] == '1') {
+            if (strncmp(typeName, "uint16_t", 8))
+              break;
+            return HLSLScalarType_uint16;
+          }
         }
       }
+      if (typeName[4] == '3') {
+        if (strncmp(typeName, "uint32_t", 8))
+          break;
+        return HLSLScalarType_uint32;
+      }
     case 9: // float16_t, float32_t, float64_t
       if (typeName[0] == 'f' && typeName[1] == 'l') {
-        if (typeName[5] == '1') {
-          if (strncmp(typeName, "float16_t", 9))
-            break;
-          return HLSLScalarType_float16;
+        if (!langOptions.UseMinPrecision) {
+          if (typeName[5] == '1') {
+            if (strncmp(typeName, "float16_t", 9))
+              break;
+            return HLSLScalarType_float16;
+          }
         }
-        else if (typeName[5] == '3') {
+        if (typeName[5] == '3') {
           if (strncmp(typeName, "float32_t", 9))
             break;
           return HLSLScalarType_float32;

+ 1 - 1
tools/clang/lib/Frontend/CompilerInvocation.cpp

@@ -1721,7 +1721,7 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
   Opts.SanitizerBlacklistFiles = Args.getAllArgValues(OPT_fsanitize_blacklist);
 #else
   llvm::StringRef ver = Args.getLastArgValue(OPT_hlsl_version);
-  if (ver.empty()) { Opts.HLSLVersion = 2016; }   // Default to 2016
+  if (ver.empty()) { Opts.HLSLVersion = 2018; }   // Default to latest
   else {
     try {
       Opts.HLSLVersion = std::stoi(std::string(ver));

+ 65 - 25
tools/clang/lib/Sema/SemaHLSL.cpp

@@ -3076,7 +3076,7 @@ public:
     }
   }
 
-  void DiagnoseHLSLScalarType(HLSLScalarType type, SourceLocation Loc) {
+  bool DiagnoseHLSLScalarType(HLSLScalarType type, SourceLocation Loc) {
     if (getSema()->getLangOpts().HLSLVersion < 2018) {
       switch (type) {
       case HLSLScalarType_float16:
@@ -3088,7 +3088,7 @@ public:
       case HLSLScalarType_uint32:
         m_sema->Diag(Loc, diag::err_hlsl_unsupported_keyword_for_version)
             << HLSLScalarTypeNames[type] << "2018";
-        break;
+        return false;
       default:
         break;
       }
@@ -3100,11 +3100,12 @@ public:
       case HLSLScalarType_uint16:
         m_sema->Diag(Loc, diag::err_hlsl_unsupported_keyword_for_min_precision)
             << HLSLScalarTypeNames[type];
-        break;
+        return false;
       default:
         break;
       }
     }
+    return true;
   }
 
   bool LookupUnqualified(LookupResult &R, Scope *S) override
@@ -4223,23 +4224,6 @@ public:
 
     IntrinsicOp intrinOp = static_cast<IntrinsicOp>(intrinsic->Op);
 
-    if (intrinOp == IntrinsicOp::MOP_LoadHalf ||
-      intrinOp == IntrinsicOp::MOP_LoadHalf2 ||
-      intrinOp == IntrinsicOp::MOP_LoadHalf3 ||
-      intrinOp == IntrinsicOp::MOP_LoadHalf4 ||
-      intrinOp == IntrinsicOp::MOP_StoreHalf ||
-      intrinOp == IntrinsicOp::MOP_StoreHalf2 ||
-      intrinOp == IntrinsicOp::MOP_StoreHalf3 ||
-      intrinOp == IntrinsicOp::MOP_StoreHalf4
-      ) {
-      if (getSema()->getLangOpts().UseMinPrecision) {
-        DXASSERT(Args.size() >= 1, "Otherwise wrong load store call.");
-        getSema()->Diag(
-            Args.front()->getExprLoc(),
-            diag::err_hlsl_half_load_store);
-      }
-    }
-
     if (intrinOp == IntrinsicOp::MOP_SampleBias) {
       // Remove this when update intrinsic table not affect other things.
       // Change vector<float,1> into float for bias.
@@ -8263,10 +8247,6 @@ Sema::TemplateDeductionResult HLSLExternalSource::DeduceTemplateArgumentsForHLSL
 {
   DXASSERT_NOMSG(FunctionTemplate != nullptr);
 
-  DXASSERT(
-    ExplicitTemplateArgs == nullptr ||
-    ExplicitTemplateArgs->size() == 0, "otherwise parser failed to reject explicit template argument syntax");
-
   // Get information about the function we have.
   CXXMethodDecl* functionMethod = dyn_cast<CXXMethodDecl>(FunctionTemplate->getTemplatedDecl());
   DXASSERT(functionMethod != nullptr,
@@ -8340,6 +8320,66 @@ Sema::TemplateDeductionResult HLSLExternalSource::DeduceTemplateArgumentsForHLSL
       continue;
     }
 
+    // Currently only intrinsic we allow for explicit template arguments are
+    // for Load return types for ByteAddressBuffer/RWByteAddressBuffer
+    // TODO: handle template arguments for future intrinsics in a more natural way
+
+    // Check Explicit template arguments
+    UINT intrinsicOp = (*cursor)->Op;
+    LPCSTR intrinsicName = (*cursor)->pArgs[0].pName;
+    bool Is2018 = getSema()->getLangOpts().HLSLVersion >= 2018;
+    bool IsBAB =
+        objectName == g_ArBasicTypeNames[AR_OBJECT_BYTEADDRESS_BUFFER] ||
+        objectName == g_ArBasicTypeNames[AR_OBJECT_RWBYTEADDRESS_BUFFER];
+    bool IsBABLoad = IsBAB && intrinsicOp == (UINT)IntrinsicOp::MOP_Load;
+    bool IsBABStore = IsBAB && intrinsicOp == (UINT)IntrinsicOp::MOP_Store;
+    if (ExplicitTemplateArgs && ExplicitTemplateArgs->size() > 0) {
+      bool isLegalTemplate = false;
+      SourceLocation Loc = ExplicitTemplateArgs->getLAngleLoc();
+      auto TemplateDiag =
+          !IsBABLoad
+              ? diag::err_hlsl_intrinsic_template_arg_unsupported
+              : !Is2018 ? diag::err_hlsl_intrinsic_template_arg_requires_2018
+                        : diag::err_hlsl_intrinsic_template_arg_requires_2018;
+      if (IsBABLoad && Is2018 && ExplicitTemplateArgs->size() == 1) {
+        Loc = (*ExplicitTemplateArgs)[0].getLocation();
+        QualType explicitType = (*ExplicitTemplateArgs)[0].getArgument().getAsType();
+        ArTypeObjectKind explicitKind = GetTypeObjectKind(explicitType);
+        if (explicitKind == AR_TOBJ_BASIC || explicitKind == AR_TOBJ_VECTOR) {
+          isLegalTemplate = GET_BASIC_BITS(GetTypeElementKind(explicitType)) != BPROP_BITS64 ||
+            GetNumElements(explicitType) <= 2;
+        }
+        if (isLegalTemplate) {
+          argTypes[0] = explicitType;
+        }
+      }
+
+      if (!isLegalTemplate) {
+        getSema()->Diag(Loc, TemplateDiag) << intrinsicName;
+        return Sema::TemplateDeductionResult::TDK_Invalid;
+      }
+    } else if (IsBABStore) {
+      // Prior to HLSL 2018, Store operation only stored scalar uint.
+      if (!Is2018) {
+        if (GetNumElements(argTypes[2]) != 1) {
+          getSema()->Diag(Args[1]->getLocStart(),
+                          diag::err_ovl_no_viable_member_function_in_call)
+              << intrinsicName;
+          return Sema::TemplateDeductionResult::TDK_Invalid;
+        }
+        argTypes[2] = getSema()->getASTContext().getIntTypeForBitwidth(
+            32, /*signed*/ false);
+      } else {
+        // not supporting types > 16 bytes yet.
+        if (GET_BASIC_BITS(GetTypeElementKind(argTypes[2])) == BPROP_BITS64 &&
+            GetNumElements(argTypes[2]) > 2) {
+          getSema()->Diag(Args[1]->getLocStart(),
+                          diag::err_ovl_no_viable_member_function_in_call)
+              << intrinsicName;
+          return Sema::TemplateDeductionResult::TDK_Invalid;
+        }
+      }
+    }
     Specialization = AddHLSLIntrinsicMethod(cursor.GetTableName(), cursor.GetLoweringStrategy(), *cursor, FunctionTemplate, Args, argTypes, argCount);
     DXASSERT_NOMSG(Specialization->getPrimaryTemplate()->getCanonicalDecl() ==
       FunctionTemplate->getCanonicalDecl());
@@ -10990,7 +11030,7 @@ bool Sema::DiagnoseHLSLLookup(const LookupResult &R) {
     if (TryParseAny(nameIdentifier.data(), nameIdentifier.size(), &parsedType, &rowCount, &colCount, getLangOpts())) {
       HLSLExternalSource *hlslExternalSource = HLSLExternalSource::FromSema(this);
       hlslExternalSource->WarnMinPrecision(parsedType, R.getNameLoc());
-      hlslExternalSource->DiagnoseHLSLScalarType(parsedType, R.getNameLoc());
+      return hlslExternalSource->DiagnoseHLSLScalarType(parsedType, R.getNameLoc());
     }
   }
   return true;

+ 13 - 385
tools/clang/lib/Sema/gen_intrin_main_tables_15.h

@@ -5158,136 +5158,6 @@ static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args8[] =
     {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args9[] =
-{
-    {"LoadDouble", AR_QUAL_OUT, 0, LITEMPLATE_SCALAR, 0, LICOMPTYPE_DOUBLE, 1, 1},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args10[] =
-{
-    {"LoadDouble", AR_QUAL_OUT, 0, LITEMPLATE_SCALAR, 0, LICOMPTYPE_DOUBLE, 1, 1},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args11[] =
-{
-    {"LoadDouble2", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_DOUBLE, 1, 2},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args12[] =
-{
-    {"LoadDouble2", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_DOUBLE, 1, 2},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args13[] =
-{
-    {"LoadFloat", AR_QUAL_OUT, 0, LITEMPLATE_SCALAR, 0, LICOMPTYPE_FLOAT, 1, 1},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args14[] =
-{
-    {"LoadFloat", AR_QUAL_OUT, 0, LITEMPLATE_SCALAR, 0, LICOMPTYPE_FLOAT, 1, 1},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args15[] =
-{
-    {"LoadFloat2", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT, 1, 2},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args16[] =
-{
-    {"LoadFloat2", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT, 1, 2},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args17[] =
-{
-    {"LoadFloat3", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT, 1, 3},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args18[] =
-{
-    {"LoadFloat3", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT, 1, 3},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args19[] =
-{
-    {"LoadFloat4", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT, 1, 4},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args20[] =
-{
-    {"LoadFloat4", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT, 1, 4},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args21[] =
-{
-    {"LoadHalf", AR_QUAL_OUT, 0, LITEMPLATE_SCALAR, 0, LICOMPTYPE_HALF, 1, 1},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args22[] =
-{
-    {"LoadHalf", AR_QUAL_OUT, 0, LITEMPLATE_SCALAR, 0, LICOMPTYPE_HALF, 1, 1},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args23[] =
-{
-    {"LoadHalf2", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_HALF, 1, 2},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args24[] =
-{
-    {"LoadHalf2", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_HALF, 1, 2},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args25[] =
-{
-    {"LoadHalf3", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_HALF, 1, 3},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args26[] =
-{
-    {"LoadHalf3", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_HALF, 1, 3},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args27[] =
-{
-    {"LoadHalf4", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_HALF, 1, 4},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_ByteAddressBufferMethods_Args28[] =
-{
-    {"LoadHalf4", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_HALF, 1, 4},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
-};
-
 static const HLSL_INTRINSIC g_ByteAddressBufferMethods[] =
 {
     {(UINT)hlsl::IntrinsicOp::MOP_GetDimensions, false, false, -1, 2, g_ByteAddressBufferMethods_Args0},
@@ -5299,26 +5169,6 @@ static const HLSL_INTRINSIC g_ByteAddressBufferMethods[] =
     {(UINT)hlsl::IntrinsicOp::MOP_Load3, false, false, -1, 3, g_ByteAddressBufferMethods_Args6},
     {(UINT)hlsl::IntrinsicOp::MOP_Load4, true, false, -1, 2, g_ByteAddressBufferMethods_Args7},
     {(UINT)hlsl::IntrinsicOp::MOP_Load4, false, false, -1, 3, g_ByteAddressBufferMethods_Args8},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadDouble, true, false, -1, 2, g_ByteAddressBufferMethods_Args9},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadDouble, false, false, -1, 3, g_ByteAddressBufferMethods_Args10},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadDouble2, true, false, -1, 2, g_ByteAddressBufferMethods_Args11},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadDouble2, false, false, -1, 3, g_ByteAddressBufferMethods_Args12},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadFloat, true, false, -1, 2, g_ByteAddressBufferMethods_Args13},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadFloat, false, false, -1, 3, g_ByteAddressBufferMethods_Args14},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadFloat2, true, false, -1, 2, g_ByteAddressBufferMethods_Args15},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadFloat2, false, false, -1, 3, g_ByteAddressBufferMethods_Args16},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadFloat3, true, false, -1, 2, g_ByteAddressBufferMethods_Args17},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadFloat3, false, false, -1, 3, g_ByteAddressBufferMethods_Args18},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadFloat4, true, false, -1, 2, g_ByteAddressBufferMethods_Args19},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadFloat4, false, false, -1, 3, g_ByteAddressBufferMethods_Args20},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadHalf, true, false, -1, 2, g_ByteAddressBufferMethods_Args21},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadHalf, false, false, -1, 3, g_ByteAddressBufferMethods_Args22},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadHalf2, true, false, -1, 2, g_ByteAddressBufferMethods_Args23},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadHalf2, false, false, -1, 3, g_ByteAddressBufferMethods_Args24},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadHalf3, true, false, -1, 2, g_ByteAddressBufferMethods_Args25},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadHalf3, false, false, -1, 3, g_ByteAddressBufferMethods_Args26},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadHalf4, true, false, -1, 2, g_ByteAddressBufferMethods_Args27},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadHalf4, false, false, -1, 3, g_ByteAddressBufferMethods_Args28},
 };
 
 //
@@ -5500,232 +5350,39 @@ static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args23[] =
 
 static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args24[] =
 {
-    {"LoadDouble", AR_QUAL_OUT, 0, LITEMPLATE_SCALAR, 0, LICOMPTYPE_DOUBLE, 1, 1},
+    {"Store", 0, 0, LITEMPLATE_VOID, 0, LICOMPTYPE_VOID, 0, 0},
     {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
+    {"value", AR_QUAL_IN, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_NUMERIC, 1, 1},
 };
 
 static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args25[] =
-{
-    {"LoadDouble", AR_QUAL_OUT, 0, LITEMPLATE_SCALAR, 0, LICOMPTYPE_DOUBLE, 1, 1},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args26[] =
-{
-    {"LoadDouble2", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_DOUBLE, 1, 2},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args27[] =
-{
-    {"LoadDouble2", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_DOUBLE, 1, 2},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args28[] =
-{
-    {"LoadFloat", AR_QUAL_OUT, 0, LITEMPLATE_SCALAR, 0, LICOMPTYPE_FLOAT, 1, 1},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args29[] =
-{
-    {"LoadFloat", AR_QUAL_OUT, 0, LITEMPLATE_SCALAR, 0, LICOMPTYPE_FLOAT, 1, 1},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args30[] =
-{
-    {"LoadFloat2", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT, 1, 2},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args31[] =
-{
-    {"LoadFloat2", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT, 1, 2},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args32[] =
-{
-    {"LoadFloat3", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT, 1, 3},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args33[] =
-{
-    {"LoadFloat3", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT, 1, 3},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args34[] =
-{
-    {"LoadFloat4", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT, 1, 4},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args35[] =
-{
-    {"LoadFloat4", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT, 1, 4},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args36[] =
-{
-    {"LoadHalf", AR_QUAL_OUT, 0, LITEMPLATE_SCALAR, 0, LICOMPTYPE_HALF, 1, 1},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args37[] =
-{
-    {"LoadHalf", AR_QUAL_OUT, 0, LITEMPLATE_SCALAR, 0, LICOMPTYPE_HALF, 1, 1},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args38[] =
-{
-    {"LoadHalf2", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_HALF, 1, 2},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args39[] =
-{
-    {"LoadHalf2", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_HALF, 1, 2},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args40[] =
-{
-    {"LoadHalf3", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_HALF, 1, 3},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args41[] =
-{
-    {"LoadHalf3", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_HALF, 1, 3},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args42[] =
-{
-    {"LoadHalf4", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_HALF, 1, 4},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args43[] =
-{
-    {"LoadHalf4", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_HALF, 1, 4},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-    {"status", AR_QUAL_OUT, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT_ONLY, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args44[] =
 {
     {"Store", 0, 0, LITEMPLATE_VOID, 0, LICOMPTYPE_VOID, 0, 0},
     {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-    {"value", AR_QUAL_IN, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_UINT, 1, 1},
+    {"value", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_NUMERIC, 1, IA_C},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args45[] =
+static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args26[] =
 {
     {"Store2", 0, 0, LITEMPLATE_VOID, 0, LICOMPTYPE_VOID, 0, 0},
     {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
     {"value", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_UINT, 1, 2},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args46[] =
+static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args27[] =
 {
     {"Store3", 0, 0, LITEMPLATE_VOID, 0, LICOMPTYPE_VOID, 0, 0},
     {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
     {"value", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_UINT, 1, 3},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args47[] =
+static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args28[] =
 {
     {"Store4", 0, 0, LITEMPLATE_VOID, 0, LICOMPTYPE_VOID, 0, 0},
     {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
     {"value", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_UINT, 1, 4},
 };
 
-static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args48[] =
-{
-    {"StoreDouble", 0, 0, LITEMPLATE_VOID, 0, LICOMPTYPE_VOID, 0, 0},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-    {"value", AR_QUAL_IN, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_DOUBLE, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args49[] =
-{
-    {"StoreDouble2", 0, 0, LITEMPLATE_VOID, 0, LICOMPTYPE_VOID, 0, 0},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-    {"value", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_DOUBLE, 1, 2},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args50[] =
-{
-    {"StoreFloat", 0, 0, LITEMPLATE_VOID, 0, LICOMPTYPE_VOID, 0, 0},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-    {"value", AR_QUAL_IN, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_FLOAT, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args51[] =
-{
-    {"StoreFloat2", 0, 0, LITEMPLATE_VOID, 0, LICOMPTYPE_VOID, 0, 0},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-    {"value", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_FLOAT, 1, 2},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args52[] =
-{
-    {"StoreFloat3", 0, 0, LITEMPLATE_VOID, 0, LICOMPTYPE_VOID, 0, 0},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-    {"value", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_FLOAT, 1, 3},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args53[] =
-{
-    {"StoreFloat4", 0, 0, LITEMPLATE_VOID, 0, LICOMPTYPE_VOID, 0, 0},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-    {"value", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_FLOAT, 1, 4},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args54[] =
-{
-    {"StoreHalf", 0, 0, LITEMPLATE_VOID, 0, LICOMPTYPE_VOID, 0, 0},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-    {"value", AR_QUAL_IN, 2, LITEMPLATE_SCALAR, 2, LICOMPTYPE_HALF, 1, 1},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args55[] =
-{
-    {"StoreHalf2", 0, 0, LITEMPLATE_VOID, 0, LICOMPTYPE_VOID, 0, 0},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-    {"value", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_HALF, 1, 2},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args56[] =
-{
-    {"StoreHalf3", 0, 0, LITEMPLATE_VOID, 0, LICOMPTYPE_VOID, 0, 0},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-    {"value", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_HALF, 1, 3},
-};
-
-static const HLSL_INTRINSIC_ARGUMENT g_RWByteAddressBufferMethods_Args57[] =
-{
-    {"StoreHalf4", 0, 0, LITEMPLATE_VOID, 0, LICOMPTYPE_VOID, 0, 0},
-    {"byteOffset", AR_QUAL_IN, 1, LITEMPLATE_SCALAR, 1, LICOMPTYPE_UINT, 1, 1},
-    {"value", AR_QUAL_IN, 2, LITEMPLATE_VECTOR, 2, LICOMPTYPE_HALF, 1, 4},
-};
-
 static const HLSL_INTRINSIC g_RWByteAddressBufferMethods[] =
 {
     {(UINT)hlsl::IntrinsicOp::MOP_GetDimensions, false, false, -1, 2, g_RWByteAddressBufferMethods_Args0},
@@ -5752,40 +5409,11 @@ static const HLSL_INTRINSIC g_RWByteAddressBufferMethods[] =
     {(UINT)hlsl::IntrinsicOp::MOP_Load3, false, false, -1, 3, g_RWByteAddressBufferMethods_Args21},
     {(UINT)hlsl::IntrinsicOp::MOP_Load4, true, false, -1, 2, g_RWByteAddressBufferMethods_Args22},
     {(UINT)hlsl::IntrinsicOp::MOP_Load4, false, false, -1, 3, g_RWByteAddressBufferMethods_Args23},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadDouble, true, false, -1, 2, g_RWByteAddressBufferMethods_Args24},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadDouble, false, false, -1, 3, g_RWByteAddressBufferMethods_Args25},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadDouble2, true, false, -1, 2, g_RWByteAddressBufferMethods_Args26},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadDouble2, false, false, -1, 3, g_RWByteAddressBufferMethods_Args27},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadFloat, true, false, -1, 2, g_RWByteAddressBufferMethods_Args28},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadFloat, false, false, -1, 3, g_RWByteAddressBufferMethods_Args29},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadFloat2, true, false, -1, 2, g_RWByteAddressBufferMethods_Args30},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadFloat2, false, false, -1, 3, g_RWByteAddressBufferMethods_Args31},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadFloat3, true, false, -1, 2, g_RWByteAddressBufferMethods_Args32},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadFloat3, false, false, -1, 3, g_RWByteAddressBufferMethods_Args33},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadFloat4, true, false, -1, 2, g_RWByteAddressBufferMethods_Args34},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadFloat4, false, false, -1, 3, g_RWByteAddressBufferMethods_Args35},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadHalf, true, false, -1, 2, g_RWByteAddressBufferMethods_Args36},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadHalf, false, false, -1, 3, g_RWByteAddressBufferMethods_Args37},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadHalf2, true, false, -1, 2, g_RWByteAddressBufferMethods_Args38},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadHalf2, false, false, -1, 3, g_RWByteAddressBufferMethods_Args39},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadHalf3, true, false, -1, 2, g_RWByteAddressBufferMethods_Args40},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadHalf3, false, false, -1, 3, g_RWByteAddressBufferMethods_Args41},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadHalf4, true, false, -1, 2, g_RWByteAddressBufferMethods_Args42},
-    {(UINT)hlsl::IntrinsicOp::MOP_LoadHalf4, false, false, -1, 3, g_RWByteAddressBufferMethods_Args43},
-    {(UINT)hlsl::IntrinsicOp::MOP_Store, false, false, -1, 3, g_RWByteAddressBufferMethods_Args44},
-    {(UINT)hlsl::IntrinsicOp::MOP_Store2, false, false, -1, 3, g_RWByteAddressBufferMethods_Args45},
-    {(UINT)hlsl::IntrinsicOp::MOP_Store3, false, false, -1, 3, g_RWByteAddressBufferMethods_Args46},
-    {(UINT)hlsl::IntrinsicOp::MOP_Store4, false, false, -1, 3, g_RWByteAddressBufferMethods_Args47},
-    {(UINT)hlsl::IntrinsicOp::MOP_StoreDouble, false, false, -1, 3, g_RWByteAddressBufferMethods_Args48},
-    {(UINT)hlsl::IntrinsicOp::MOP_StoreDouble2, false, false, -1, 3, g_RWByteAddressBufferMethods_Args49},
-    {(UINT)hlsl::IntrinsicOp::MOP_StoreFloat, false, false, -1, 3, g_RWByteAddressBufferMethods_Args50},
-    {(UINT)hlsl::IntrinsicOp::MOP_StoreFloat2, false, false, -1, 3, g_RWByteAddressBufferMethods_Args51},
-    {(UINT)hlsl::IntrinsicOp::MOP_StoreFloat3, false, false, -1, 3, g_RWByteAddressBufferMethods_Args52},
-    {(UINT)hlsl::IntrinsicOp::MOP_StoreFloat4, false, false, -1, 3, g_RWByteAddressBufferMethods_Args53},
-    {(UINT)hlsl::IntrinsicOp::MOP_StoreHalf, false, false, -1, 3, g_RWByteAddressBufferMethods_Args54},
-    {(UINT)hlsl::IntrinsicOp::MOP_StoreHalf2, false, false, -1, 3, g_RWByteAddressBufferMethods_Args55},
-    {(UINT)hlsl::IntrinsicOp::MOP_StoreHalf3, false, false, -1, 3, g_RWByteAddressBufferMethods_Args56},
-    {(UINT)hlsl::IntrinsicOp::MOP_StoreHalf4, false, false, -1, 3, g_RWByteAddressBufferMethods_Args57},
+    {(UINT)hlsl::IntrinsicOp::MOP_Store, false, false, -1, 3, g_RWByteAddressBufferMethods_Args24},
+    {(UINT)hlsl::IntrinsicOp::MOP_Store, false, false, -1, 3, g_RWByteAddressBufferMethods_Args25},
+    {(UINT)hlsl::IntrinsicOp::MOP_Store2, false, false, -1, 3, g_RWByteAddressBufferMethods_Args26},
+    {(UINT)hlsl::IntrinsicOp::MOP_Store3, false, false, -1, 3, g_RWByteAddressBufferMethods_Args27},
+    {(UINT)hlsl::IntrinsicOp::MOP_Store4, false, false, -1, 3, g_RWByteAddressBufferMethods_Args28},
 };
 
 //
@@ -5912,11 +5540,11 @@ static const HLSL_INTRINSIC g_ConsumeStructuredBufferMethods[] =
 // HLSL-INTRINSIC-STATS:BEGIN
 static const UINT g_uAppendStructuredBufferMethodsCount = 2;
 static const UINT g_uBufferMethodsCount = 3;
-static const UINT g_uByteAddressBufferMethodsCount = 29;
+static const UINT g_uByteAddressBufferMethodsCount = 9;
 static const UINT g_uConsumeStructuredBufferMethodsCount = 2;
 static const UINT g_uIntrinsicsCount = 179;
 static const UINT g_uRWBufferMethodsCount = 3;
-static const UINT g_uRWByteAddressBufferMethodsCount = 58;
+static const UINT g_uRWByteAddressBufferMethodsCount = 29;
 static const UINT g_uRWStructuredBufferMethodsCount = 5;
 static const UINT g_uRWTexture1DArrayMethodsCount = 4;
 static const UINT g_uRWTexture1DMethodsCount = 4;

+ 1 - 1
tools/clang/test/CodeGenHLSL/fixedWidth.hlsl

@@ -1,4 +1,4 @@
-// RUN: %dxc -E main -T ps_6_2 %s | FileCheck %s
+// RUN: %dxc -E main -T ps_6_2 -HV 2017 %s | FileCheck %s
 
 // CHECK: error: unknown type name 'int16_t'
 // CHECK: error: unknown type name 'int32_t'

+ 11 - 3
tools/clang/test/CodeGenHLSL/fixedWidth16Bit.hlsl

@@ -1,15 +1,23 @@
 // RUN: %dxc -E main -T ps_6_2 -HV 2018 %s | FileCheck %s
 
-// CHECK: int16_t is only supported with -enable-16bit-types option
-// CHECK: uint16_t is only supported with -enable-16bit-types option
-// CHECK: float16_t is only supported with -enable-16bit-types option
+// CHECK: error: unknown type name 'int16_t'
+// CHECK-NOT: error: unknown type name 'int32_t'
+// CHECK: error: unknown type name 'uint16_t'
+// CHECK-NOT: error: unknown type name 'uint32_t'
+// CHECK: error: unknown type name 'float16_t'
+// CHECK-NOT: error: unknown type name 'float32_t'
+// CHECK-NOT: error: unknown type name 'float64_t'
 
 // int64_t/uint64_t already supported from 6.0
 
 float4 main(float col : COL) : SV_TARGET
 {
     int16_t i0;
+    int32_t i1;
     uint16_t i2;
+    uint32_t i3;
     float16_t f0;
+    float32_t f1;
+    float64_t f2;
     return col;
 }

+ 15 - 14
tools/clang/test/CodeGenHLSL/raw_buf3.hlsl

@@ -1,4 +1,4 @@
-// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
+// RUN: %dxc -E main -T ps_6_0 -HV 2018 %s | FileCheck %s
 
 // CHECK-NOT: @dx.op.rawBufferLoad
 // CHECK: call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32
@@ -7,7 +7,8 @@
 // CHECK: call double @dx.op.makeDouble.f64
 // CHECK: call double @dx.op.makeDouble.f64
 // CHECK: call double @dx.op.makeDouble.f64
-// CHECK: call void @dx.op.bufferStore.i32
+// Store by default will store what's passed in
+// CHECK: call void @dx.op.bufferStore.f32
 // CHECK: call void @dx.op.bufferStore.i32
 // CHECK: call void @dx.op.bufferStore.i32
 // CHECK: call void @dx.op.bufferStore.i32
@@ -29,21 +30,21 @@ float4 main(uint idx1 : IDX1, uint idx2 : IDX2) : SV_Target {
   r.xyz += buf2.Load3(idx2, status);
   r.xyzw += buf2.Load4(idx2);
 
-  r.x += buf1.LoadFloat(idx1, status);
-  r.xy += buf1.LoadFloat2(idx1);
-  r.xyz += buf1.LoadFloat3(idx1, status);
-  r.xyzw += buf1.LoadFloat4(idx1);
+  r.x += buf1.Load<float>(idx1, status);
+  r.xy += buf1.Load<float2>(idx1);
+  r.xyz += buf1.Load<float3>(idx1, status);
+  r.xyzw += buf1.Load<float4>(idx1);
 
-  r.x += buf2.LoadFloat(idx2);
-  r.xy += buf2.LoadFloat2(idx2, status);
-  r.xyz += buf2.LoadFloat3(idx2);
-  r.xyzw += buf2.LoadFloat4(idx2, status);
+  r.x += buf2.Load<float>(idx2);
+  r.xy += buf2.Load<float2>(idx2, status);
+  r.xyz += buf2.Load<float3>(idx2);
+  r.xyzw += buf2.Load<float4>(idx2, status);
 
-  r.x += buf1.LoadDouble(idx1);
-  r.xy += buf1.LoadDouble2(idx1, status);
+  r.x += buf1.Load<double>(idx1);
+  r.xy += buf1.Load<double2>(idx1, status);
 
-  r.x += buf2.LoadDouble(idx2, status);
-  r.xy += buf2.LoadDouble2(idx2);
+  r.x += buf2.Load<double>(idx2, status);
+  r.xy += buf2.Load<double2>(idx2);
 
   buf2.Store(1, r.x);
   buf2.Store2(1, r.xy);

+ 20 - 20
tools/clang/test/CodeGenHLSL/raw_buf4.hlsl

@@ -1,4 +1,4 @@
-// RUN: %dxc -E main -T ps_6_2 %s | FileCheck %s
+// RUN: %dxc -E main -T ps_6_2 -HV 2018 %s | FileCheck %s
 
 // CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf1_texture_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 1, i32 4)
 // CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf1_texture_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 3, i32 4)
@@ -20,7 +20,7 @@
 // CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 15, i32 8)
 // CHECK: call double @dx.op.makeDouble.f64
 
-// CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %buf2_UAV_rawbuf, i32 1, i32 undef, i32 %{{.*}}, i32 undef, i32 undef, i32 undef, i8 1, i32 4)
+// CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %buf2_UAV_rawbuf, i32 1, i32 undef, float %{{.*}}, float undef, float undef, float undef, i8 1, i32 4)
 // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %buf2_UAV_rawbuf, i32 1, i32 undef, i32 %{{.*}}, i32 %{{.*}}, i32 undef, i32 undef, i8 3, i32 4)
 // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %buf2_UAV_rawbuf, i32 1, i32 undef, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 undef, i8 7, i32 4)
 // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %buf2_UAV_rawbuf, i32 1, i32 undef, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i8 15, i32 4)
@@ -38,34 +38,34 @@ float4 main(uint idx1 : IDX1, uint idx2 : IDX2) : SV_Target {
   uint status;
   float4 r = float4(0,0,0,0);
 
-  r.x += buf1.LoadFloat(idx1, status);
-  r.xy += buf1.LoadFloat2(idx1);
-  r.xyz += buf1.LoadFloat3(idx1, status);
-  r.xyzw += buf1.LoadFloat4(idx1);
+  r.x += buf1.Load<float>(idx1, status);
+  r.xy += buf1.Load<float2>(idx1);
+  r.xyz += buf1.Load<float3>(idx1, status);
+  r.xyzw += buf1.Load<float4>(idx1);
 
-  r.x += buf2.LoadFloat(idx2);
-  r.xy += buf2.LoadFloat2(idx2, status);
-  r.xyz += buf2.LoadFloat3(idx2);
-  r.xyzw += buf2.LoadFloat4(idx2, status);
+  r.x += buf2.Load<float>(idx2);
+  r.xy += buf2.Load<float2>(idx2, status);
+  r.xyz += buf2.Load<float3>(idx2);
+  r.xyzw += buf2.Load<float4>(idx2, status);
 
-  r.x += buf1.LoadDouble(idx1);
-  r.xy += buf1.LoadDouble2(idx1, status);
+  r.x += buf1.Load<double>(idx1);
+  r.xy += buf1.Load<double2>(idx1, status);
 
-  r.x += buf2.LoadDouble(idx2, status);
-  r.xy += buf2.LoadDouble2(idx2);
+  r.x += buf2.Load<double>(idx2, status);
+  r.xy += buf2.Load<double2>(idx2);
 
   buf2.Store(1, r.x);
   buf2.Store2(1, r.xy);
   buf2.Store3(1, r.xyz);
   buf2.Store4(1, r);
 
-  buf2.StoreFloat(1, r.x);
-  buf2.StoreFloat2(1, r.xy);
-  buf2.StoreFloat3(1, r.xyz);
-  buf2.StoreFloat4(1, r);
+  buf2.Store(1, r.x);
+  buf2.Store(1, r.xy);
+  buf2.Store(1, r.xyz);
+  buf2.Store(1, r);
 
-  buf2.StoreDouble(1, r.x);
-  buf2.StoreDouble2(1, r.xy); 
+  buf2.Store(1, (double)r.x);
+  buf2.Store(1, (double2)r.xy);
 
   return r;
 }

+ 33 - 32
tools/clang/test/CodeGenHLSL/raw_buf5.hlsl

@@ -1,4 +1,4 @@
-// RUN: %dxc -E main -T ps_6_2 -enable-16bit-types %s | FileCheck %s
+// RUN: %dxc -E main -T ps_6_2 -enable-16bit-types -HV 2018 %s | FileCheck %s
 
 // CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf1_texture_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 1, i32 4)
 // CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %buf1_texture_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 3, i32 4)
@@ -28,7 +28,8 @@
 // CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %buf2_UAV_rawbuf, i32 %{{[0-9]+}}, i32 undef, i8 15, i32 8)
 // CHECK: call double @dx.op.makeDouble.f64
 
-// CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %buf2_UAV_rawbuf, i32 1, i32 undef, i32 %{{.*}}, i32 undef, i32 undef, i32 undef, i8 1, i32 4)
+// Store by default will store what's passed in
+// CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %buf2_UAV_rawbuf, i32 1, i32 undef, float %{{.*}}, float undef, float undef, float undef, i8 1, i32 4)
 // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %buf2_UAV_rawbuf, i32 1, i32 undef, i32 %{{.*}}, i32 %{{.*}}, i32 undef, i32 undef, i8 3, i32 4)
 // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %buf2_UAV_rawbuf, i32 1, i32 undef, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 undef, i8 7, i32 4)
 // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %buf2_UAV_rawbuf, i32 1, i32 undef, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i8 15, i32 4)
@@ -50,49 +51,49 @@ float4 main(uint idx1 : IDX1, uint idx2 : IDX2) : SV_Target {
   uint status;
   float4 r = float4(0,0,0,0);
 
-  r.x += buf1.LoadFloat(idx1, status);
-  r.xy += buf1.LoadFloat2(idx1);
-  r.xyz += buf1.LoadFloat3(idx1, status);
-  r.xyzw += buf1.LoadFloat4(idx1);
+  r.x += buf1.Load<float>(idx1, status);
+  r.xy += buf1.Load<float2>(idx1);
+  r.xyz += buf1.Load<float3>(idx1, status);
+  r.xyzw += buf1.Load<float4>(idx1);
 
-  r.x += buf2.LoadFloat(idx2);
-  r.xy += buf2.LoadFloat2(idx2, status);
-  r.xyz += buf2.LoadFloat3(idx2);
-  r.xyzw += buf2.LoadFloat4(idx2, status);
+  r.x += buf2.Load<float>(idx2);
+  r.xy += buf2.Load<float2>(idx2, status);
+  r.xyz += buf2.Load<float3>(idx2);
+  r.xyzw += buf2.Load<float4>(idx2, status);
 
-  r.x += buf1.LoadHalf(idx1, status);
-  r.xy += buf1.LoadHalf2(idx1);
-  r.xyz += buf1.LoadHalf3(idx1, status);
-  r.xyzw += buf1.LoadHalf4(idx1);
+  r.x += buf1.Load<half>(idx1, status);
+  r.xy += buf1.Load<half2>(idx1);
+  r.xyz += buf1.Load<half3>(idx1, status);
+  r.xyzw += buf1.Load<half4>(idx1);
 
-  r.x += buf2.LoadHalf(idx2);
-  r.xy += buf2.LoadHalf2(idx2, status);
-  r.xyz += buf2.LoadHalf3(idx2);
-  r.xyzw += buf2.LoadHalf4(idx2, status);
+  r.x += buf2.Load<half>(idx2);
+  r.xy += buf2.Load<half2>(idx2, status);
+  r.xyz += buf2.Load<half3>(idx2);
+  r.xyzw += buf2.Load<half4>(idx2, status);
 
-  r.x += buf1.LoadDouble(idx1);
-  r.xy += buf1.LoadDouble2(idx1, status);
+  r.x += buf1.Load<double>(idx1);
+  r.xy += buf1.Load<double2>(idx1, status);
 
-  r.x += buf2.LoadDouble(idx2, status);
-  r.xy += buf2.LoadDouble2(idx2);
+  r.x += buf2.Load<double>(idx2, status);
+  r.xy += buf2.Load<double2>(idx2);
 
   buf2.Store(1, r.x);
   buf2.Store2(1, r.xy);
   buf2.Store3(1, r.xyz);
   buf2.Store4(1, r);
 
-  buf2.StoreHalf(1, r.x);
-  buf2.StoreHalf2(1, r.xy);
-  buf2.StoreHalf3(1, r.xyz);
-  buf2.StoreHalf4(1, r);
+  buf2.Store(1, (half)r.x);
+  buf2.Store(1, (half2)r.xy);
+  buf2.Store(1, (half3)r.xyz);
+  buf2.Store(1, (half4)r);
 
-  buf2.StoreFloat(1, r.x);
-  buf2.StoreFloat2(1, r.xy);
-  buf2.StoreFloat3(1, r.xyz);
-  buf2.StoreFloat4(1, r);
+  buf2.Store(1, r.x);
+  buf2.Store(1, r.xy);
+  buf2.Store(1, r.xyz);
+  buf2.Store(1, r);
 
-  buf2.StoreDouble(1, r.x);
-  buf2.StoreDouble2(1, r.xy);
+  buf2.Store(1, (double)r.x);
+  buf2.Store(1, (double2)r.xy);
 
   return r;
 }

+ 1 - 1
tools/clang/test/CodeGenHLSL/typedef_new_type.hlsl

@@ -1,4 +1,4 @@
-// RUN: %dxc -E main -T ps_6_0  %s | FileCheck %s
+// RUN: %dxc -E main -T ps_6_0 -HV 2017 %s | FileCheck %s
 
 // Checking that typedef for fixed width types for HLSL before 2018 works properly
 

+ 4 - 4
tools/clang/test/HLSL/cpp-errors.hlsl

@@ -251,8 +251,8 @@ void vla(int size) {
   return n[0];
 }
 
-enum MyEnum  { MyEnum_MyVal1, MyEnum_MyVal2 }; // expected-error {{enum is unsupported in HLSL before 2017}} expected-warning {{declaration does not declare anything}}
-enum class MyEnumWithClass { MyEnumWithClass_MyVal1, MyEnumWithClass_MyVal2 }; // expected-error {{enum is unsupported in HLSL before 2017}} expected-warning {{declaration does not declare anything}}
+enum MyEnum  { MyEnum_MyVal1, MyEnum_MyVal2 }; //
+enum class MyEnumWithClass { MyEnumWithClass_MyVal1, MyEnumWithClass_MyVal2 }; //
 
 float4 fn_with_semantic() : SV_Target0{
   return 0;
@@ -440,7 +440,7 @@ my_label: local_i = 1; // expected-error {{label is unsupported in HLSL}}
   case 1 + 2:
     local_i = 3;
     break;
-  case local_i: // expected-error {{expression is not an integral constant expression}} expected-note {{read of non-const variable 'local_i' is not allowed in a constant expression}}
+  case local_i: // expected-error {{case value is not a constant expression}} expected-note {{read of non-const variable 'local_i' is not allowed in a constant expression}}
     break;
   case 10 ... 12: // expected-error {{case range is unsupported in HLSL}}
     break;
@@ -647,4 +647,4 @@ float4 plain(float4 param4 /* : FOO */) /*: FOO */{
   const j; // expected-error {{HLSL requires a type specifier for all declarations}}
   long long ll; // expected-error {{'long' is a reserved keyword in HLSL}} expected-error {{'long' is a reserved keyword in HLSL}} expected-error {{HLSL requires a type specifier for all declarations}}
   return is_supported();
-}
+}

+ 53 - 7
tools/clang/test/HLSL/intrinsic-examples.hlsl

@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fsyntax-only -ffreestanding -verify %s
+// RUN: %clang_cc1 -fsyntax-only -ffreestanding -verify -HV 2018 -enable-16bit-types %s
 
 // :FXC_VERIFY_ARGUMENTS: /E FontPixelShader /T ps_5_1 /Gec
 
@@ -22,11 +22,57 @@ float4 RWByteAddressBufferMain(uint2 a : A, uint2 b : B) : SV_Target
   r += status;
   uav1[b] = r; // expected-error {{type 'RWByteAddressBuffer' does not provide a subscript operator}} fxc-error {{X3121: array, matrix, vector, or indexable object type expected in index expression}}
   uav1.Load(a.x, status);
-  min16float4 h = min16float4(1,2,3,4);
-  uav1.LoadHalf(h.x, status);                               /* expected-error {{LoadHalf and StoreHalf are not supported for min precision mode}} */
-  uav1.LoadHalf2(h.x);                                      /* expected-error {{LoadHalf and StoreHalf are not supported for min precision mode}} expected-warning {{ignoring return value of function that only reads data}} */
-  uav1.StoreHalf3(4, h.xyz);                                /* expected-error {{LoadHalf and StoreHalf are not supported for min precision mode}} */
-  uav1.StoreHalf4(8, h);                                    /* expected-error {{LoadHalf and StoreHalf are not supported for min precision mode}} */
+  min16float4 h = min16float4(1,2,3,4);                     /* expected-warning {{min16float is promoted to float16_t}} expected-warning {{min16float is promoted to float16_t}} */
+
+  // valid template argument
+  r += uav1.Load<half4>(0);
+  r += uav1.Load<float4>(12);
+  r += uav1.Load<int16_t2>(16).xyxy;
+  r += uav1.Load<int32_t3>(20).xyzx;
+  r += uav1.Load<float16_t>(20);
+  r += uav1.Load<float32_t1>(20);
+
+  r += uav1.Load<half4>(4, status);
+  r += uav1.Load<float4>(12, status);
+  r += uav1.Load<int16_t2>(16, status).xyxy;
+  r += uav1.Load<int32_t3>(20, status).xyzx;
+  r += uav1.Load<float16_t>(20, status);
+  r += uav1.Load<float32_t1>(20, status);
+
+  // errors
+  r += uav1.Load<float, float3>(16);                        /* expected-error {{Explicit template arguments on intrinsic Load requires HLSL version 2018 or above.}} */
+  r += uav1.Load<double3>(16);                              /* expected-error {{Explicit template arguments on intrinsic Load requires HLSL version 2018 or above.}} expected-error {{cannot convert from 'vector<double, 3>' to 'float4'}} */
+  r += uav1.Load2<float>(16);                               /* expected-error {{Explicit template arguments on intrinsic Load2 are not supported.}} */
+  r += uav1.Load3<int>(20);                                 /* expected-error {{Explicit template arguments on intrinsic Load3 are not supported.}} */
+  r += uav1.Load4<int16_t>(24);                             /* expected-error {{Explicit template arguments on intrinsic Load4 are not supported.}} */
+  r += uav1.Load<half3x4>(24);                              /* expected-error {{Explicit template arguments on intrinsic Load requires HLSL version 2018 or above.}} expected-error {{cannot convert from 'matrix<__fp16, 3, 4>' to 'float4'}} */
+  r += uav1.Load<float, float3>(16, status);                /* expected-error {{Explicit template arguments on intrinsic Load requires HLSL version 2018 or above.}} */
+  r += uav1.Load<double3>(16, status);                      /* expected-error {{Explicit template arguments on intrinsic Load requires HLSL version 2018 or above.}} expected-error {{cannot convert from 'vector<double, 3>' to 'float4'}} */
+  r += uav1.Load2<float>(16, status);                       /* expected-error {{Explicit template arguments on intrinsic Load2 are not supported.}} */
+  r += uav1.Load3<int>(20, status);                         /* expected-error {{Explicit template arguments on intrinsic Load3 are not supported.}} */
+  r += uav1.Load4<int16_t>(24, status);                     /* expected-error {{Explicit template arguments on intrinsic Load4 are not supported.}} */
+  r += uav1.Load<half3x4>(24, status);                      /* expected-error {{Explicit template arguments on intrinsic Load requires HLSL version 2018 or above.}} expected-error {{cannot convert from 'matrix<__fp16, 3, 4>' to 'float4'}} */
+  // valid template argument
+  uav1.Store(0, r);
+  uav1.Store(0, r.x);
+  uav1.Store(0, (half2)r.xy);
+  uav1.Store(0, (int3)r.xyz);
+  uav1.Store(0, (double2)r.xy);
+  // errors
+  struct MyStruct {
+    float4 x;
+  };
+  uav1.Store<float>(0, r);                                  /* expected-error {{Explicit template arguments on intrinsic Store are not supported.}} */
+  uav1.Store<int64_t4>(0, r);                               /* expected-error {{Explicit template arguments on intrinsic Store are not supported.}} */
+  uav1.Store2<float>(0, r.xy);                              /* expected-error {{Explicit template arguments on intrinsic Store2 are not supported.}} */
+  uav1.Store3<float>(0, r.xyz);                             /* expected-error {{Explicit template arguments on intrinsic Store3 are not supported.}} */
+  uav1.Store4<float>(0, r);                                 /* expected-error {{Explicit template arguments on intrinsic Store4 are not supported.}} */
+  uav1.Store(0, float2x4(1,2,3,4,5,6,7,8));                 /* expected-error {{no matching member function for call to 'Store'}} */
+  uav1.Store<float3x2>(0, float3x2(1,2,3,4,5,6));           /* expected-error {{no matching member function for call to 'Store'}} */
+  uav1.Store(0, (double3)r.xyz);                            /* expected-error {{no matching member function for call to 'Store'}} expected-error {{no matching member function for call to Store}} expected-note@? {{candidate template ignored: couldn't infer template argument 'TResult'}}*/
+  uav1.Store(0, (uint64_t4)r);                              /* expected-error {{no matching member function for call to 'Store'}} expected-error {{no matching member function for call to Store}} expected-note@? {{candidate template ignored: couldn't infer template argument 'TResult'}}*/
+  MyStruct myStruct;
+  uav1.Store(0, myStruct);                                  /* expected-error {{no matching member function for call to 'Store'}} */
   return r;
 }
 
@@ -213,4 +259,4 @@ float4 FontPixelShader( VS_OUT In ) : COLOR0
     */
 
     return Color;
-};
+};

+ 2 - 51
utils/hct/gen_intrin_main.txt

@@ -712,30 +712,10 @@ uint [[ro]] Load(in uint byteOffset) : byteaddress_load;
 uint<2> [[ro]] Load2(in uint byteOffset) : byteaddress_load;
 uint<3> [[ro]] Load3(in uint byteOffset) : byteaddress_load;
 uint<4> [[ro]] Load4(in uint byteOffset) : byteaddress_load;
-half [[ro]] LoadHalf(in uint byteOffset) : byteaddress_load;
-half<2> [[ro]] LoadHalf2(in uint byteOffset) : byteaddress_load;
-half<3> [[ro]] LoadHalf3(in uint byteOffset) : byteaddress_load;
-half<4> [[ro]] LoadHalf4(in uint byteOffset) : byteaddress_load;
-float [[ro]] LoadFloat(in uint byteOffset) : byteaddress_load;
-float<2> [[ro]] LoadFloat2(in uint byteOffset) : byteaddress_load;
-float<3> [[ro]] LoadFloat3(in uint byteOffset) : byteaddress_load;
-float<4> [[ro]] LoadFloat4(in uint byteOffset) : byteaddress_load;
-double [[ro]] LoadDouble(in uint byteOffset) : byteaddress_load;
-double<2> [[ro]] LoadDouble2(in uint byteOffset) : byteaddress_load;
 uint [[]] Load(in uint byteOffset, out uint_only status) : byteaddress_load_s;
 uint<2> [[]] Load2(in uint byteOffset, out uint_only status) : byteaddress_load_s;
 uint<3> [[]] Load3(in uint byteOffset, out uint_only status) : byteaddress_load_s;
 uint<4> [[]] Load4(in uint byteOffset, out uint_only status) : byteaddress_load_s;
-half [[]] LoadHalf(in uint byteOffset, out uint_only status) : byteaddress_load_s;
-half<2> [[]] LoadHalf2(in uint byteOffset, out uint_only status) : byteaddress_load_s;
-half<3> [[]] LoadHalf3(in uint byteOffset, out uint_only status) : byteaddress_load_s;
-half<4> [[]] LoadHalf4(in uint byteOffset, out uint_only status) : byteaddress_load_s;
-float [[]] LoadFloat(in uint byteOffset, out uint_only status) : byteaddress_load_s;
-float<2> [[]] LoadFloat2(in uint byteOffset, out uint_only status) : byteaddress_load_s;
-float<3> [[]] LoadFloat3(in uint byteOffset, out uint_only status) : byteaddress_load_s;
-float<4> [[]] LoadFloat4(in uint byteOffset, out uint_only status) : byteaddress_load_s;
-double [[]] LoadDouble(in uint byteOffset, out uint_only status) : byteaddress_load_s;
-double<2> [[]] LoadDouble2(in uint byteOffset, out uint_only status) : byteaddress_load_s;
 
 } namespace
 
@@ -746,44 +726,15 @@ uint [[ro]] Load(in uint byteOffset) : byteaddress_load;
 uint<2> [[ro]] Load2(in uint byteOffset) : byteaddress_load;
 uint<3> [[ro]] Load3(in uint byteOffset) : byteaddress_load;
 uint<4> [[ro]] Load4(in uint byteOffset) : byteaddress_load;
-half [[ro]] LoadHalf(in uint byteOffset) : byteaddress_load;
-half<2> [[ro]] LoadHalf2(in uint byteOffset) : byteaddress_load;
-half<3> [[ro]] LoadHalf3(in uint byteOffset) : byteaddress_load;
-half<4> [[ro]] LoadHalf4(in uint byteOffset) : byteaddress_load;
-float [[ro]] LoadFloat(in uint byteOffset) : byteaddress_load;
-float<2> [[ro]] LoadFloat2(in uint byteOffset) : byteaddress_load;
-float<3> [[ro]] LoadFloat3(in uint byteOffset) : byteaddress_load;
-float<4> [[ro]] LoadFloat4(in uint byteOffset) : byteaddress_load;
-double [[ro]] LoadDouble(in uint byteOffset) : byteaddress_load;
-double<2> [[ro]] LoadDouble2(in uint byteOffset) : byteaddress_load;
 uint [[]] Load(in uint byteOffset, out uint_only status) : byteaddress_load_s;
 uint<2> [[]] Load2(in uint byteOffset, out uint_only status) : byteaddress_load_s;
 uint<3> [[]] Load3(in uint byteOffset, out uint_only status) : byteaddress_load_s;
 uint<4> [[]] Load4(in uint byteOffset, out uint_only status) : byteaddress_load_s;
-half [[]] LoadHalf(in uint byteOffset, out uint_only status) : byteaddress_load_s;
-half<2> [[]] LoadHalf2(in uint byteOffset, out uint_only status) : byteaddress_load_s;
-half<3> [[]] LoadHalf3(in uint byteOffset, out uint_only status) : byteaddress_load_s;
-half<4> [[]] LoadHalf4(in uint byteOffset, out uint_only status) : byteaddress_load_s;
-float [[]] LoadFloat(in uint byteOffset, out uint_only status) : byteaddress_load_s;
-float<2> [[]] LoadFloat2(in uint byteOffset, out uint_only status) : byteaddress_load_s;
-float<3> [[]] LoadFloat3(in uint byteOffset, out uint_only status) : byteaddress_load_s;
-float<4> [[]] LoadFloat4(in uint byteOffset, out uint_only status) : byteaddress_load_s;
-double [[]] LoadDouble(in uint byteOffset, out uint_only status) : byteaddress_load_s;
-double<2> [[]] LoadDouble2(in uint byteOffset, out uint_only status) : byteaddress_load_s;
-void [[]] Store(in uint byteOffset, in uint value) : byteaddress_store;
+void [[]] Store(in uint byteOffset, in numeric value) : byteaddress_store;
+void [[]] Store(in uint byteOffset, in numeric<c> value) : byteaddress_store;
 void [[]] Store2(in uint byteOffset, in uint<2> value) : byteaddress_store;
 void [[]] Store3(in uint byteOffset, in uint<3> value) : byteaddress_store;
 void [[]] Store4(in uint byteOffset, in uint<4> value) : byteaddress_store;
-void [[]] StoreHalf(in uint byteOffset, in half value) : byteaddress_store;
-void [[]] StoreHalf2(in uint byteOffset, in half<2> value) : byteaddress_store;
-void [[]] StoreHalf3(in uint byteOffset, in half<3> value) : byteaddress_store;
-void [[]] StoreHalf4(in uint byteOffset, in half<4> value) : byteaddress_store;
-void [[]] StoreFloat(in uint byteOffset, in float value) : byteaddress_store;
-void [[]] StoreFloat2(in uint byteOffset, in float<2> value) : byteaddress_store;
-void [[]] StoreFloat3(in uint byteOffset, in float<3> value) : byteaddress_store;
-void [[]] StoreFloat4(in uint byteOffset, in float<4> value) : byteaddress_store;
-void [[]] StoreDouble(in uint byteOffset, in double value) : byteaddress_store;
-void [[]] StoreDouble2(in uint byteOffset, in double<2> value) : byteaddress_store;
 void [[]] InterlockedAdd(in uint byteOffset, in uint value);
 void [[]] InterlockedAdd(in uint byteOffset, in uint value, out uint original) : interlockedadd_immediate;
 void [[unsigned_op=InterlockedUMin,overload=1]] InterlockedMin(in uint byteOffset, in any_int32 value) : interlockedmin;

+ 7 - 0
utils/hct/hcttestcmds.cmd

@@ -597,6 +597,13 @@ if %errorlevel% equ 0 (
   exit /b 1
 )
 
+dxc.exe %script_dir%\smoke.hlsl /Tps_6_2 /enable-16bit-types /HV 2017 2>nul
+if %errorlevel% equ 0 (
+  echo dxc incorrectly compiled %script_dir%\smoke.hlsl shader model 6.2 with /enable-16bit-types and /HV 2017 option
+  call :cleanup 2>nul
+  exit /b 1
+)
+
 rem SPIR-V Change Starts
 echo Smoke test for SPIR-V CodeGen ...
 set spirv_smoke_success=0