Browse Source

Enable RawBufferLoad/RawBufferStore for 64-bit vectors bigger than 16 bytes (#1786)

Add tests to verify correct 64-bit rawBufferLoad/Store is generated.
Verify that for model shader 6.2 the 64bit load/store is split into two 32-bit load/store instructions.
Fix buffer indexing bug when splitting 64-bit load/store.
Helena Kotas 6 years ago
parent
commit
c48de29be9

+ 23 - 4
lib/HLSL/DxilGenerationPass.cpp

@@ -1412,8 +1412,17 @@ void DxilTranslateRawBuffer::ReplaceRawBufferLoad64Bit(Function *F, Type *EltTy,
       for (unsigned i = 0; i < size; i++) {
         if (i == 2) {
           // Update offset 4 by 4 bytes.
-          args[DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx] =
+          if (isa<UndefValue>(offset)) {
+            // [RW]ByteAddressBuffer has undef element offset -> update index
+            Value *index = CI->getArgOperand(DXIL::OperandIndex::kRawBufferLoadIndexOpIdx);
+            args[DXIL::OperandIndex::kRawBufferLoadIndexOpIdx] =
+              Builder.CreateAdd(index, Builder.getInt32(4 * 4));
+          }
+          else {
+            // [RW]StructuredBuffer -> update element offset
+            args[DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx] =
               Builder.CreateAdd(offset, Builder.getInt32(4 * 4));
+          }
           args[DXIL::OperandIndex::kRawBufferLoadMaskOpIdx] =
               Builder.getInt8(maskHi);
           newLd = Builder.CreateCall(bufLd, args);
@@ -1531,10 +1540,20 @@ void DxilTranslateRawBuffer::ReplaceRawBufferStore64Bit(Function *F, Type *ETy,
       Builder.CreateCall(newFunction, args);
 
       if (maskHi) {
-        Value *offset = args[DXIL::OperandIndex::kBufferStoreCoord1OpIdx];
         // Update offset 4 by 4 bytes.
-        offset = Builder.CreateAdd(offset, Builder.getInt32(4 * 4));
-        args[DXIL::OperandIndex::kRawBufferStoreElementOffsetOpIdx] = offset;
+        Value *offset = args[DXIL::OperandIndex::kBufferStoreCoord1OpIdx];
+        if (isa<UndefValue>(offset)) {
+          // [RW]ByteAddressBuffer has element offset == undef -> update index instead
+          Value *index = args[DXIL::OperandIndex::kBufferStoreCoord0OpIdx];
+          index = Builder.CreateAdd(index, Builder.getInt32(4 * 4));
+          args[DXIL::OperandIndex::kRawBufferStoreIndexOpIdx] = index;
+        }
+        else {
+          // [RW]StructuredBuffer -> update element offset
+          offset = Builder.CreateAdd(offset, Builder.getInt32(4 * 4));
+          args[DXIL::OperandIndex::kRawBufferStoreElementOffsetOpIdx] = offset;
+        }
+        
         args[DXIL::OperandIndex::kRawBufferStoreMaskOpIdx] =
             Builder.getInt8(maskHi);
         args[DXIL::OperandIndex::kRawBufferStoreVal0OpIdx] = vals32[4];

+ 2 - 2
tools/clang/include/clang/Basic/DiagnosticSemaKinds.td

@@ -7670,8 +7670,8 @@ def err_hlsl_intrinsic_template_arg_unsupported: Error<
    "Explicit template arguments on intrinsic %0 are not supported.">;
 def err_hlsl_intrinsic_template_arg_requires_2018: Error<
    "Explicit template arguments on intrinsic %0 requires HLSL version 2018 or above.">;
-def err_hlsl_intrinsic_template_arg_scalar_vector_16: Error<
-   "Explicit template arguments on intrinsic %0 are limited one to scalar or vector type up to 16 bytes in size.">;
+def err_hlsl_intrinsic_template_arg_scalar_vector: Error<
+   "Explicit template arguments on intrinsic %0 are limited one to scalar or vector type.">;
 }
 def err_hlsl_no_struct_user_defined_type: Error<
    "User defined type intrinsic arg must be struct">;

+ 2 - 14
tools/clang/lib/Sema/SemaHLSL.cpp

@@ -9026,16 +9026,13 @@ Sema::TemplateDeductionResult HLSLExternalSource::DeduceTemplateArgumentsForHLSL
           !IsBABLoad
               ? diag::err_hlsl_intrinsic_template_arg_unsupported
               : !Is2018 ? diag::err_hlsl_intrinsic_template_arg_requires_2018
-                        : diag::err_hlsl_intrinsic_template_arg_requires_2018;
+                        : diag::err_hlsl_intrinsic_template_arg_scalar_vector;
       if (IsBABLoad && Is2018 && ExplicitTemplateArgs->size() == 1) {
         Loc = (*ExplicitTemplateArgs)[0].getLocation();
         QualType explicitType = (*ExplicitTemplateArgs)[0].getArgument().getAsType();
         ArTypeObjectKind explicitKind = GetTypeObjectKind(explicitType);
         if (explicitKind == AR_TOBJ_BASIC || explicitKind == AR_TOBJ_VECTOR) {
-          isLegalTemplate = GET_BASIC_BITS(GetTypeElementKind(explicitType)) != BPROP_BITS64 ||
-            GetNumElements(explicitType) <= 2;
-        }
-        if (isLegalTemplate) {
+          isLegalTemplate = true;
           argTypes[0] = explicitType;
         }
       }
@@ -9055,15 +9052,6 @@ Sema::TemplateDeductionResult HLSLExternalSource::DeduceTemplateArgumentsForHLSL
         }
         argTypes[2] = getSema()->getASTContext().getIntTypeForBitwidth(
             32, /*signed*/ false);
-      } else {
-        // not supporting types > 16 bytes yet.
-        if (GET_BASIC_BITS(GetTypeElementKind(argTypes[2])) == BPROP_BITS64 &&
-            GetNumElements(argTypes[2]) > 2) {
-          getSema()->Diag(Args[1]->getLocStart(),
-                          diag::err_ovl_no_viable_member_function_in_call)
-              << intrinsicName;
-          return Sema::TemplateDeductionResult::TDK_Invalid;
-        }
       }
     }
     Specialization = AddHLSLIntrinsicMethod(cursor.GetTableName(), cursor.GetLoweringStrategy(), *cursor, FunctionTemplate, Args, argTypes, argCount);

+ 104 - 0
tools/clang/test/CodeGenHLSL/quick-test/rawbufferloadstore_64bit_6_2.hlsl

@@ -0,0 +1,104 @@
+// // RUN: %dxc -E main -T cs_6_2 %s | FileCheck %s
+
+struct TestData { 
+  int64_t3 v3;
+  int64_t4 v4;
+};
+
+ByteAddressBuffer srv0 : register(t0); 
+RWByteAddressBuffer uav0 : register(u0); 
+
+StructuredBuffer<TestData> srv1 : register(t1);
+RWStructuredBuffer<TestData> uav1 : register(u1);
+
+[numthreads(1, 1, 1)]
+void main(uint GI : SV_GroupIndex) {
+
+  int64_t3 vec3 = srv0.Load<int64_t3>(0);
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %srv0_texture_rawbuf, i32 0, i32 undef, i8 15, i32 8)
+// CHECK: extractvalue %dx.types.ResRet.i32 %{{[a-zA-Z0-9]+}}, 0
+// CHECK: extractvalue %dx.types.ResRet.i32 %{{[a-zA-Z0-9]+}}, 1
+// CHECK: extractvalue %dx.types.ResRet.i32 %{{[a-zA-Z0-9]+}}, 2
+// CHECK: extractvalue %dx.types.ResRet.i32 %{{[a-zA-Z0-9]+}}, 3
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %srv0_texture_rawbuf, i32 16, i32 undef, i8 3, i32 8)
+// CHECK: extractvalue %dx.types.ResRet.i32 %{{[a-zA-Z0-9]+}}, 0
+// CHECK: extractvalue %dx.types.ResRet.i32 %{{[a-zA-Z0-9]+}}, 1
+// CHECK: zext i32 %{{[0-9]+}} to i64
+// CHECK: zext i32 %{{[0-9]+}} to i64
+// CHECK: shl i64 %{{[0-9]+}}, 32
+// CHECK: or i64 %{{[0-9]+}}, %{{[0-9]+}}
+
+   uav0.Store(0, vec3);
+// CHECK: trunc i64 %{{[0-9]+}} to i32
+// CHECK: lshr i64 %{{[0-9]+}}, 32
+// CHECK: trunc i64 %{{[0-9]+}} to i32
+// CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %uav0_UAV_rawbuf, i32 0, i32 undef, i32 %{{[0-9]+}}, i32 %{{[0-9]+}}, i32 %{{[0-9]+}}, i32 %{{[0-9]+}}, i8 15, i32 8) 
+// CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %uav0_UAV_rawbuf, i32 16, i32 undef, i32 %{{[0-9]+}}, i32 %{{[0-9]+}}, i32 undef, i32 undef, i8 3, i32 8) 
+
+  int64_t4 vec4 = srv0.Load<int64_t4>(0);
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %srv0_texture_rawbuf, i32 0, i32 undef, i8 15, i32 8)
+// CHECK: extractvalue %dx.types.ResRet.i32 %{{[a-zA-Z0-9]+}}, 0
+// CHECK: extractvalue %dx.types.ResRet.i32 %{{[a-zA-Z0-9]+}}, 1
+// CHECK: extractvalue %dx.types.ResRet.i32 %{{[a-zA-Z0-9]+}}, 2
+// CHECK: extractvalue %dx.types.ResRet.i32 %{{[a-zA-Z0-9]+}}, 3
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %srv0_texture_rawbuf, i32 16, i32 undef, i8 15, i32 8)
+// CHECK: extractvalue %dx.types.ResRet.i32 %{{[a-zA-Z0-9]+}}, 0
+// CHECK: extractvalue %dx.types.ResRet.i32 %{{[a-zA-Z0-9]+}}, 1
+// CHECK: extractvalue %dx.types.ResRet.i32 %{{[a-zA-Z0-9]+}}, 2
+// CHECK: extractvalue %dx.types.ResRet.i32 %{{[a-zA-Z0-9]+}}, 3
+// CHECK: zext i32 %{{[0-9]+}} to i64
+// CHECK: zext i32 %{{[0-9]+}} to i64
+// CHECK: shl i64 %{{[0-9]+}}, 32
+// CHECK: or i64 %{{[0-9]+}}, %{{[0-9]+}}
+
+  uav0.Store(0, vec4);
+// CHECK: trunc i64 %{{[0-9]+}} to i32
+// CHECK: lshr i64 %{{[0-9]+}}, 32
+// CHECK: trunc i64 %{{[0-9]+}} to i32
+// CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %uav0_UAV_rawbuf, i32 0, i32 undef, i32 %{{[0-9]+}}, i32 %{{[0-9]+}}, i32 %{{[0-9]+}}, i32 %{{[0-9]+}}, i8 15, i32 8) 
+// CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %uav0_UAV_rawbuf, i32 16, i32 undef, i32 %{{[0-9]+}}, i32 %{{[0-9]+}}, i32 %{{[0-9]+}}, i32 %{{[0-9]+}}, i8 15, i32 8) 
+
+  int64_t3 svec3 = srv1[0].v3;
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %srv1_texture_structbuf, i32 0, i32 0, i8 15, i32 8)
+// CHECK: extractvalue %dx.types.ResRet.i32 %{{[a-zA-Z0-9]+}}, 0
+// CHECK: extractvalue %dx.types.ResRet.i32 %{{[a-zA-Z0-9]+}}, 1
+// CHECK: extractvalue %dx.types.ResRet.i32 %{{[a-zA-Z0-9]+}}, 2
+// CHECK: extractvalue %dx.types.ResRet.i32 %{{[a-zA-Z0-9]+}}, 3
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %srv1_texture_structbuf, i32 0, i32 16, i8 3, i32 8)
+// CHECK: extractvalue %dx.types.ResRet.i32 %{{[a-zA-Z0-9]+}}, 0
+// CHECK: extractvalue %dx.types.ResRet.i32 %{{[a-zA-Z0-9]+}}, 1
+// CHECK: zext i32 %{{[0-9]+}} to i64
+// CHECK: zext i32 %{{[0-9]+}} to i64
+// CHECK: shl i64 %{{[0-9]+}}, 32
+// CHECK: or i64 %{{[0-9]+}}, %{{[0-9]+}}
+
+  uav1[0].v3 = svec3;
+// CHECK: trunc i64 %{{[0-9]+}} to i32
+// CHECK: lshr i64 %{{[0-9]+}}, 32
+// CHECK: trunc i64 %{{[0-9]+}} to i32
+// CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %uav1_UAV_structbuf, i32 0, i32 0, i32 %{{[0-9]+}}, i32 %{{[0-9]+}}, i32 %{{[0-9]+}}, i32 %{{[0-9]+}}, i8 15, i32 8) 
+// CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %uav1_UAV_structbuf, i32 0, i32 16, i32 %{{[0-9]+}}, i32 %{{[0-9]+}}, i32 undef, i32 undef, i8 3, i32 8) 
+
+  int64_t4 svec4 = srv1[0].v4;
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %srv1_texture_structbuf, i32 0, i32 24, i8 15, i32 8)
+// CHECK: extractvalue %dx.types.ResRet.i32 %{{[a-zA-Z0-9]+}}, 0
+// CHECK: extractvalue %dx.types.ResRet.i32 %{{[a-zA-Z0-9]+}}, 1
+// CHECK: extractvalue %dx.types.ResRet.i32 %{{[a-zA-Z0-9]+}}, 2
+// CHECK: extractvalue %dx.types.ResRet.i32 %{{[a-zA-Z0-9]+}}, 3
+// CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %srv1_texture_structbuf, i32 0, i32 40, i8 15, i32 8)
+// CHECK: extractvalue %dx.types.ResRet.i32 %{{[a-zA-Z0-9]+}}, 0
+// CHECK: extractvalue %dx.types.ResRet.i32 %{{[a-zA-Z0-9]+}}, 1
+// CHECK: extractvalue %dx.types.ResRet.i32 %{{[a-zA-Z0-9]+}}, 2
+// CHECK: extractvalue %dx.types.ResRet.i32 %{{[a-zA-Z0-9]+}}, 3
+// CHECK: zext i32 %{{[0-9]+}} to i64
+// CHECK: zext i32 %{{[0-9]+}} to i64
+// CHECK: shl i64 %{{[0-9]+}}, 32
+// CHECK: or i64 %{{[0-9]+}}, %{{[0-9]+}}
+
+  uav1[0].v4 = svec4;
+// CHECK: trunc i64 %{{[0-9]+}} to i32
+// CHECK: lshr i64 %{{[0-9]+}}, 32
+// CHECK: trunc i64 %{{[0-9]+}} to i32
+// CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %uav1_UAV_structbuf, i32 0, i32 24, i32 %{{[0-9]+}}, i32 %{{[0-9]+}}, i32 %{{[0-9]+}}, i32 %{{[0-9]+}}, i8 15, i32 8) 
+// CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %uav1_UAV_structbuf, i32 0, i32 40, i32 %{{[0-9]+}}, i32 %{{[0-9]+}}, i32 %{{[0-9]+}}, i32 %{{[0-9]+}}, i8 15, i32 8) 
+};

+ 54 - 0
tools/clang/test/CodeGenHLSL/quick-test/rawbufferloadstore_64bit_6_3.hlsl

@@ -0,0 +1,54 @@
+// // RUN: %dxc -E main -T cs_6_3 %s | FileCheck %s
+
+struct TestData { 
+  int64_t3 v3;
+  int64_t4 v4;
+};
+
+ByteAddressBuffer srv0 : register(t0); 
+RWByteAddressBuffer uav0 : register(u0); 
+
+StructuredBuffer<TestData> srv1 : register(t1);
+RWStructuredBuffer<TestData> uav1 : register(u1);
+
+[numthreads(1, 1, 1)]
+void main(uint GI : SV_GroupIndex) {
+
+  int64_t3 vec3 = srv0.Load<int64_t3>(0);
+// CHECK: call %dx.types.ResRet.i64 @dx.op.rawBufferLoad.i64(i32 139, %dx.types.Handle %srv0_texture_rawbuf, i32 0, i32 undef, i8 7, i32 8)
+// CHECK: extractvalue %dx.types.ResRet.i64 %{{[0-9a-zA-Z]+}}, 0
+// CHECK: extractvalue %dx.types.ResRet.i64 %{{[0-9a-zA-Z]+}}, 1
+// CHECK: extractvalue %dx.types.ResRet.i64 %{{[0-9a-zA-Z]+}}, 2
+
+  uav0.Store(0, vec3);
+// CHECK: call void @dx.op.rawBufferStore.i64(i32 140, %dx.types.Handle %uav0_UAV_rawbuf, i32 0, i32 undef, i64 %{{[0-9]+}}, i64 %{{[0-9]+}}, i64 %{{[0-9]+}}, i64 undef, i8 7, i32 8)
+
+  int64_t4 vec4 = srv0.Load<int64_t4>(0);
+// CHECK: call %dx.types.ResRet.i64 @dx.op.rawBufferLoad.i64(i32 139, %dx.types.Handle %srv0_texture_rawbuf, i32 0, i32 undef, i8 15, i32 8)
+// CHECK: extractvalue %dx.types.ResRet.i64 %{{[0-9a-zA-Z]+}}, 0
+// CHECK: extractvalue %dx.types.ResRet.i64 %{{[0-9a-zA-Z]+}}, 1
+// CHECK: extractvalue %dx.types.ResRet.i64 %{{[0-9a-zA-Z]+}}, 2
+// CHECK: extractvalue %dx.types.ResRet.i64 %{{[0-9a-zA-Z]+}}, 3
+
+  uav0.Store(0, vec4);
+// CHECK: call void @dx.op.rawBufferStore.i64(i32 140, %dx.types.Handle %uav0_UAV_rawbuf, i32 0, i32 undef, i64 %{{[0-9]+}}, i64 %{{[0-9]+}}, i64 %{{[0-9]+}}, i64 %{{[0-9]+}}, i8 15, i32 8)
+
+  int64_t3 svec3 = srv1[0].v3;
+// CHECK: call %dx.types.ResRet.i64 @dx.op.rawBufferLoad.i64(i32 139, %dx.types.Handle %srv1_texture_structbuf, i32 0, i32 0, i8 7, i32 8)
+// CHECK: extractvalue %dx.types.ResRet.i64 %{{[0-9a-zA-Z]+}}, 0
+// CHECK: extractvalue %dx.types.ResRet.i64 %{{[0-9a-zA-Z]+}}, 1
+// CHECK: extractvalue %dx.types.ResRet.i64 %{{[0-9a-zA-Z]+}}, 2
+
+  uav1[0].v3 = svec3;
+// CHECK: call void @dx.op.rawBufferStore.i64(i32 140, %dx.types.Handle %uav1_UAV_structbuf, i32 0, i32 0, i64 %{{[0-9]+}}, i64 %{{[0-9]+}}, i64 %{{[0-9]+}}, i64 undef, i8 7, i32 8)
+
+  int64_t4 svec4 = srv1[0].v4;
+// CHECK: call %dx.types.ResRet.i64 @dx.op.rawBufferLoad.i64(i32 139, %dx.types.Handle %srv1_texture_structbuf, i32 0, i32 24, i8 15, i32 8)
+// CHECK: extractvalue %dx.types.ResRet.i64 %{{[0-9a-zA-Z]+}}, 0
+// CHECK: extractvalue %dx.types.ResRet.i64 %{{[0-9a-zA-Z]+}}, 1
+// CHECK: extractvalue %dx.types.ResRet.i64 %{{[0-9a-zA-Z]+}}, 2
+// CHECK: extractvalue %dx.types.ResRet.i64 %{{[0-9a-zA-Z]+}}, 3
+
+  uav1[0].v4 = svec4;
+// CHECK: call void @dx.op.rawBufferStore.i64(i32 140, %dx.types.Handle %uav1_UAV_structbuf, i32 0, i32 24, i64 %{{[0-9]+}}, i64 %{{[0-9]+}}, i64 %{{[0-9]+}}, i64 %{{[0-9]+}}, i8 15, i32 8)
+};

+ 8 - 8
tools/clang/test/HLSL/intrinsic-examples.hlsl

@@ -40,18 +40,18 @@ float4 RWByteAddressBufferMain(uint2 a : A, uint2 b : B) : SV_Target
   r += uav1.Load<float32_t1>(20, status);
 
   // errors
-  r += uav1.Load<float, float3>(16);                        /* expected-error {{Explicit template arguments on intrinsic Load requires HLSL version 2018 or above.}} */
-  r += uav1.Load<double3>(16);                              /* expected-error {{Explicit template arguments on intrinsic Load requires HLSL version 2018 or above.}} expected-error {{cannot convert from 'vector<double, 3>' to 'float4'}} */
+  r += uav1.Load<float, float3>(16);                        /* expected-error {{Explicit template arguments on intrinsic Load are limited one to scalar or vector type.}} */
+  r += uav1.Load<double3>(16);                              /* expected-error {{cannot convert from 'double3' to 'float4'}} */
   r += uav1.Load2<float>(16);                               /* expected-error {{Explicit template arguments on intrinsic Load2 are not supported.}} */
   r += uav1.Load3<int>(20);                                 /* expected-error {{Explicit template arguments on intrinsic Load3 are not supported.}} */
   r += uav1.Load4<int16_t>(24);                             /* expected-error {{Explicit template arguments on intrinsic Load4 are not supported.}} */
-  r += uav1.Load<half3x4>(24);                              /* expected-error {{Explicit template arguments on intrinsic Load requires HLSL version 2018 or above.}} expected-error {{cannot convert from 'matrix<half, 3, 4>' to 'float4'}} */
-  r += uav1.Load<float, float3>(16, status);                /* expected-error {{Explicit template arguments on intrinsic Load requires HLSL version 2018 or above.}} */
-  r += uav1.Load<double3>(16, status);                      /* expected-error {{Explicit template arguments on intrinsic Load requires HLSL version 2018 or above.}} expected-error {{cannot convert from 'vector<double, 3>' to 'float4'}} */
+  r += uav1.Load<half3x4>(24);                              /* expected-error {{Explicit template arguments on intrinsic Load are limited one to scalar or vector type.}} expected-error {{cannot convert from 'matrix<half, 3, 4>' to 'float4'}} */
+  r += uav1.Load<float, float3>(16, status);                /* expected-error {{Explicit template arguments on intrinsic Load are limited one to scalar or vector type.}} */
+  r += uav1.Load<double3>(16, status);                      /* expected-error {{cannot convert from 'double3' to 'float4'}} */
   r += uav1.Load2<float>(16, status);                       /* expected-error {{Explicit template arguments on intrinsic Load2 are not supported.}} */
   r += uav1.Load3<int>(20, status);                         /* expected-error {{Explicit template arguments on intrinsic Load3 are not supported.}} */
   r += uav1.Load4<int16_t>(24, status);                     /* expected-error {{Explicit template arguments on intrinsic Load4 are not supported.}} */
-  r += uav1.Load<half3x4>(24, status);                      /* expected-error {{Explicit template arguments on intrinsic Load requires HLSL version 2018 or above.}} expected-error {{cannot convert from 'matrix<half, 3, 4>' to 'float4'}} */
+  r += uav1.Load<half3x4>(24, status);                      /* expected-error {{Explicit template arguments on intrinsic Load are limited one to scalar or vector type.}} expected-error {{cannot convert from 'matrix<half, 3, 4>' to 'float4'}} */
   // valid template argument
   uav1.Store(0, r);
   uav1.Store(0, r.x);
@@ -69,8 +69,8 @@ float4 RWByteAddressBufferMain(uint2 a : A, uint2 b : B) : SV_Target
   uav1.Store4<float>(0, r);                                 /* expected-error {{Explicit template arguments on intrinsic Store4 are not supported.}} */
   uav1.Store(0, float2x4(1,2,3,4,5,6,7,8));                 /* expected-error {{no matching member function for call to 'Store'}} */
   uav1.Store<float3x2>(0, float3x2(1,2,3,4,5,6));           /* expected-error {{no matching member function for call to 'Store'}} */
-  uav1.Store(0, (double3)r.xyz);                            /* expected-error {{no matching member function for call to 'Store'}} expected-error {{no matching member function for call to Store}} expected-note@? {{candidate template ignored: couldn't infer template argument 'TResult'}}*/
-  uav1.Store(0, (uint64_t4)r);                              /* expected-error {{no matching member function for call to 'Store'}} expected-error {{no matching member function for call to Store}} expected-note@? {{candidate template ignored: couldn't infer template argument 'TResult'}}*/
+  uav1.Store(0, (double3)r.xyz);                            
+  uav1.Store(0, (uint64_t4)r);                              
   MyStruct myStruct;
   uav1.Store(0, myStruct);                                  /* expected-error {{no matching member function for call to 'Store'}} */
   return r;