4 lat temu · 5fbaf73466
--- a/docs/DXIL.rst
+++ b/docs/DXIL.rst
@@ -3184,6 +3184,7 @@ SM.SEMANTIC                               Semantic must be defined in target sha
 
															 SM.STREAMINDEXRANGE                       Stream index (%0) must between 0 and %1.
														
 
															 SM.TESSFACTORFORDOMAIN                    Required TessFactor for domain not found declared anywhere in Patch Constant data.
														
 
															 SM.TESSFACTORSIZEMATCHDOMAIN              TessFactor rows, columns (%0, %1) invalid for domain %2.  Expected %3 rows and 1 column.
														
 
															+SM.TGSMUNSUPPORTED                        Thread Group Shared Memory not supported %0.
														
 
															 SM.THREADGROUPCHANNELRANGE                Declared Thread Group %0 size %1 outside valid range [%2..%3].
														
 
															 SM.TRIOUTPUTPRIMITIVEMISMATCH             Hull Shader declared with Tri Domain must specify output primitive point, triangle_cw or triangle_ccw. Line output is not compatible with the Tri domain.
														
 
															 SM.UNDEFINEDOUTPUT                        Not all elements of output %0 were written.
														
--- a/include/dxc/HLSL/DxilValidation.h
+++ b/include/dxc/HLSL/DxilValidation.h
@@ -258,6 +258,7 @@ enum class ValidationRule : unsigned {
 
															   SmSampleCountOnlyOn2DMS, // Only Texture2DMS/2DMSArray could has sample count.
														
 
															   SmSemantic, // Semantic must be defined in target shader model
														
 
															   SmStreamIndexRange, // Stream index (%0) must between 0 and %1.
														
 
															+  SmTGSMUnsupported, // Thread Group Shared Memory not supported %0.
														
 
															   SmTessFactorForDomain, // Required TessFactor for domain not found declared anywhere in Patch Constant data.
														
 
															   SmTessFactorSizeMatchDomain, // TessFactor rows, columns (%0, %1) invalid for domain %2.  Expected %3 rows and 1 column.
														
 
															   SmThreadGroupChannelRange, // Declared Thread Group %0 size %1 outside valid range [%2..%3].
														
--- a/lib/DxilContainer/DxilContainerAssembler.cpp
+++ b/lib/DxilContainer/DxilContainerAssembler.cpp
@@ -1110,7 +1110,18 @@ private:
 
															           info.minMinor = minor;
														
 
															         }
														
 
															         info.mask &= mask;
														
 
															+      } else if (const llvm::LoadInst *LI = dyn_cast<LoadInst>(user)) {
														
 
															+        // If loading a groupshared variable, limit to CS/AS/MS
														
 
															+#define SFLAG(stage) ((unsigned)1 << (unsigned)DXIL::ShaderKind::stage)
														
 
															+        if (LI->getPointerAddressSpace() == DXIL::kTGSMAddrSpace) {
														
 
															+          const llvm::Function *F = cast<const llvm::Function>(CI->getParent()->getParent());
														
 
															+          ShaderCompatInfo &info = m_FuncToShaderCompat[F];
														
 
															+          info.mask &= (SFLAG(Compute) | SFLAG(Mesh) | SFLAG(Amplification));
														
 
															+        }
														
 
															+#undef SFLAG
														
 
															+
														
 
															       }
														
 
															+
														
 
															     }
														
 
															   }
														
--- a/lib/HLSL/DxilValidation.cpp
+++ b/lib/HLSL/DxilValidation.cpp
@@ -219,6 +219,7 @@ const char *hlsl::GetValidationRuleText(ValidationRule value) {
 
															     case hlsl::ValidationRule::SmThreadGroupChannelRange: return "Declared Thread Group %0 size %1 outside valid range [%2..%3].";
														
 
															     case hlsl::ValidationRule::SmMaxTheadGroup: return "Declared Thread Group Count %0 (X*Y*Z) is beyond the valid maximum of %1.";
														
 
															     case hlsl::ValidationRule::SmMaxTGSMSize: return "Total Thread Group Shared Memory storage is %0, exceeded %1.";
														
 
															+    case hlsl::ValidationRule::SmTGSMUnsupported: return "Thread Group Shared Memory not supported %0.";
														
 
															     case hlsl::ValidationRule::SmWaveSizeValue: return "Declared WaveSize %0 outside valid range [%1..%2], or not a power of 2.";
														
 
															     case hlsl::ValidationRule::SmWaveSizeNeedsDxil16Plus: return "WaveSize is valid only for DXIL version 1.6 and higher.";
														
 
															     case hlsl::ValidationRule::SmROVOnlyInPS: return "RasterizerOrdered objects are only allowed in 5.0+ pixel shaders.";
														
@@ -3769,12 +3770,33 @@ static void ValidateTGSMRaceCondition(std::vector<StoreInst *> &fixAddrTGSMList,
 
															 static void ValidateGlobalVariables(ValidationContext &ValCtx) {
														
 
															   DxilModule &M = ValCtx.DxilMod;
														
 
															+  const ShaderModel *pSM = ValCtx.DxilMod.GetShaderModel();
														
 
															+  bool TGSMAllowed = pSM->IsCS() || pSM->IsAS() || pSM->IsMS() || pSM->IsLib();
														
 
															+
														
 
															   unsigned TGSMSize = 0;
														
 
															   std::vector<StoreInst*> fixAddrTGSMList;
														
 
															   const DataLayout &DL = M.GetModule()->getDataLayout();
														
 
															   for (GlobalVariable &GV : M.GetModule()->globals()) {
														
 
															     ValidateGlobalVariable(GV, ValCtx);
														
 
															     if (GV.getType()->getAddressSpace() == DXIL::kTGSMAddrSpace) {
														
 
															+      if (!TGSMAllowed)
														
 
															+        ValCtx.EmitGlobalVariableFormatError(&GV, ValidationRule::SmTGSMUnsupported,
														
 
															+                                             { std::string("in Shader Model ") + M.GetShaderModel()->GetName() });
														
 
															+      // Lib targets need to check the usage to know if it's allowed
														
 
															+      if (pSM->IsLib()) {
														
 
															+        for (User *U : GV.users()) {
														
 
															+          if (Instruction *I = dyn_cast<Instruction>(U)) {
														
 
															+            llvm::Function *F = I->getParent()->getParent();
														
 
															+            if (M.HasDxilEntryProps(F)) {
														
 
															+              DxilFunctionProps &props = M.GetDxilEntryProps(F).props;
														
 
															+              if (!props.IsCS() && !props.IsAS() && !props.IsMS()) {
														
 
															+                ValCtx.EmitInstrFormatError(I, ValidationRule::SmTGSMUnsupported,
														
 
															+                                            { "from non-compute entry points" });
														
 
															+              }
														
 
															+            }
														
 
															+          }
														
 
															+        }
														
 
															+      }
														
 
															       TGSMSize += DL.getTypeAllocSize(GV.getType()->getElementType());
														
 
															       CollectFixAddressAccess(&GV, fixAddrTGSMList);
														
 
															     }
														
--- a/tools/clang/test/HLSL/ShaderOpArith.xml
+++ b/tools/clang/test/HLSL/ShaderOpArith.xml
@@ -1641,7 +1641,7 @@
 
															           uint addVal = ix; // 32 bits isn't enough room to dupliate upper and lower
														
 
															           uint uminMaxVal = ~value*(~value&1) + value*(value&1);
														
 
															           int sminMaxVal = ~value*(~value&1) + value*(value&1);
														
 
															-          uint xorVal = 1 << (ix%(bitSize-1));
														
 
															+          uint xorVal = 1U << (ix%(bitSize-1));
														
 
															           // make higher bits differ while lower bits match
														
 
															           uint xchgVal = (ix << (bitSize/2)) | ((ix/3)%64);
														
 
															           uint output = 0;
														
@@ -1713,30 +1713,27 @@
 
															         }
														
 
															         void InitSharedMem(uint ix) {
														
 
															-          // Zero-init shared memory
														
 
															-          g_uintShare[ix%6] = 0;
														
 
															-          g_sintShare[ix%3] = 0;
														
 
															-          g_xchgShare[ix%64] = 0;
														
 
															+          // Zero-init shared memory, with special cases
														
 
															+          if (ix < 6)
														
 
															+            g_uintShare[ix] = ix == 1 ? 99999999 : ix == 3 ? -1 : 0;
														
 
															+          if (ix < 3)
														
 
															+            g_sintShare[ix] = ix == 1 ? 99999999 : 0;
														
 
															+          if (ix < 64)
														
 
															+            g_xchgShare[ix] = 0;
														
 
															           GroupMemoryBarrierWithGroupSync();
														
 
															-
														
 
															-          InterlockedCompareStore(g_uintShare[1], 0, 99999999);
														
 
															-          InterlockedCompareStore(g_uintShare[3], 0, -1);
														
 
															-          InterlockedCompareStore(g_sintShare[1], 0, 99999999);
														
 
															         }
														
 
															         void InitSharedMem64(uint ix) {
														
 
															-          // Zero-init shared memory
														
 
															-          g_uint64Share[ix%6] = 0;
														
 
															-          g_sint64Share[ix%3] = 0;
														
 
															-          g_xchg64Share[ix%64] = 0;
														
 
															+          // Zero-init shared memory, with special cases
														
 
															+          if (ix < 6)
														
 
															+            g_uint64Share[ix] = ix == 1 ? 99999999ULL | (99999999ULL << 32) : ix == 3 ? ~0ULL : 0;
														
 
															+          if (ix < 3)
														
 
															+            g_sint64Share[ix] = ix == 1 ? 99999999ULL | (99999999ULL << 32) : 0;
														
 
															+          if (ix < 64)
														
 
															+            g_xchg64Share[ix] = 0;
														
 
															           GroupMemoryBarrierWithGroupSync();
														
 
															-
														
 
															-          InterlockedCompareStore(g_uint64Share[1], 0, 99999999ULL | (99999999ULL << 32));
														
 
															-          InterlockedCompareStore(g_uint64Share[3], 0, ~0ULL);
														
 
															-          InterlockedCompareStore(g_sint64Share[1], 0, 99999999ULL | (99999999ULL << 32));
														
 
															-
														
 
															         }
														
 
															         void AtomicGroupSharedTest(uint ix) {
														
@@ -1746,7 +1743,7 @@
 
															           uint addVal = ix; // 32 bits isn't enough room to dupliate upper and lower
														
 
															           uint uminMaxVal = ~value*(~value&1) + value*(value&1);
														
 
															           int sminMaxVal = ~value*(~value&1) + value*(value&1);
														
 
															-          uint xorVal = 1 << (ix%(bitSize-1));
														
 
															+          uint xorVal = 1U << (ix%(bitSize-1));
														
 
															           uint xchgVal = (ix << (bitSize/2)) | ((ix/3)%64);
														
 
															           uint output = 0;
														
--- a/tools/clang/test/HLSLFileCheck/hlsl/control_flow/loops/enable-partial-unroll-test01.hlsl
+++ b/tools/clang/test/HLSLFileCheck/hlsl/control_flow/loops/enable-partial-unroll-test01.hlsl
@@ -1,18 +1,19 @@
 
															-// RUN: %dxc /Tps_6_0 /Emain > %s | FileCheck %s
														
 
															+// RUN: %dxc /Tcs_6_0 /Emain > %s | FileCheck %s
														
 
															 // CHECK: define void @main()
														
 
															 // CHECK: entry
														
 
															 #define MAX_INDEX 5
														
 
															 groupshared float g_Array[2][(MAX_INDEX * MAX_INDEX)];
														
 
															+RWStructuredBuffer<float4> output;
														
 
															-[RootSignature("")] float4 main(uint GroupIndex
														
 
															-                                : A) : SV_Target {
														
 
															+[numthreads(1,1,1)] void main(uint GroupIndex
														
 
															+                                : SV_GroupIndex) {
														
 
															   uint idx;
														
 
															   float l_Array[(MAX_INDEX * MAX_INDEX)] = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
														
 
															   for (idx = 0; idx < (MAX_INDEX * MAX_INDEX); idx++) {
														
 
															     g_Array[GroupIndex][idx] = l_Array[idx];
														
 
															   }
														
 
															-  return float4(g_Array[GroupIndex][0], g_Array[GroupIndex][1], g_Array[GroupIndex][2], g_Array[GroupIndex][3]);
														
 
															+  output[GroupIndex] = float4(g_Array[GroupIndex][0], g_Array[GroupIndex][1], g_Array[GroupIndex][2], g_Array[GroupIndex][3]);
														
 
															 }
														
--- a/tools/clang/test/HLSLFileCheck/hlsl/control_flow/loops/enable-partial-unroll-test02.hlsl
+++ b/tools/clang/test/HLSLFileCheck/hlsl/control_flow/loops/enable-partial-unroll-test02.hlsl
@@ -1,17 +1,18 @@
 
															-// RUN: %dxc /Tps_6_0 /Emain > %s | FileCheck %s
														
 
															+// RUN: %dxc /Tcs_6_0 /Emain > %s | FileCheck %s
														
 
															 // CHECK: define void @main()
														
 
															 // CHECK: entry
														
 
															 #define MAX_INDEX 14
														
 
															 groupshared float g_Array[2][(MAX_INDEX * MAX_INDEX)];
														
 
															+RWStructuredBuffer<float4> output;
														
 
															-[RootSignature("")] float4 main(uint GroupIndex
														
 
															-                                : A) : SV_Target {
														
 
															+[numthreads(1,1,1)] void main(uint GroupIndex
														
 
															+                                : SV_GroupIndex) {
														
 
															   uint idx;
														
 
															   for (idx = 0; idx < (MAX_INDEX * MAX_INDEX); idx++) {
														
 
															     g_Array[GroupIndex][idx] = 0.0f;
														
 
															   }
														
 
															-  return float4(g_Array[GroupIndex][0], g_Array[GroupIndex][1], g_Array[GroupIndex][2], g_Array[GroupIndex][3]);
														
 
															+  output[GroupIndex] = float4(g_Array[GroupIndex][0], g_Array[GroupIndex][1], g_Array[GroupIndex][2], g_Array[GroupIndex][3]);
														
 
															 }
														
--- a/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomic_cmpstr_i64_and_i32.hlsl
+++ b/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomic_cmpstr_i64_and_i32.hlsl
@@ -1,8 +1,8 @@
 
															-// RUN: %dxc -T ps_6_6 %s | FileCheck %s -check-prefix=GSCHECK
														
 
															+// RUN: %dxc -E CSMain -T cs_6_6 %s | FileCheck %s -check-prefix=GSCHECK
														
 
															 // RUN: %dxc -T ps_6_6 -DMEMTYPE=RWBuffer %s | FileCheck %s -check-prefixes=CHECK,TYCHECK
														
 
															 // RUN: %dxc -T ps_6_6 -DMEMTYPE=RWStructuredBuffer %s | FileCheck %s -check-prefix=CHECK
														
 
															-// RUN: %dxilver 1.6 | %dxc -T ps_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
														
 
															+// RUN: %dxilver 1.6 | %dxc -E CSMain -T cs_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
														
 
															 // RUN: %dxilver 1.6 | %dxc -T ps_6_5 -DMEMTYPE=RWBuffer %s | FileCheck %s -check-prefix=ERRCHECK
														
 
															 // RUN: %dxilver 1.6 | %dxc -T ps_6_5 -DMEMTYPE=RWStructuredBuffer %s | FileCheck %s -check-prefix=ERRCHECK
														
@@ -25,7 +25,7 @@ groupshared int64_t  resI64[256];
 
															 // GSCHECK: Note: shader requires additional functionality:
														
 
															 // GSCHECK: 64-bit Atomics on Group Shared
														
 
															-void main( uint a : A, uint b: B, uint c :C) : SV_Target
														
 
															+void dotest( uint a, uint b, uint c)
														
 
															 {
														
 
															   resU[a] = a;
														
 
															   resI[a] = a;
														
@@ -191,3 +191,14 @@ void main( uint a : A, uint b: B, uint c :C) : SV_Target
 
															   InterlockedCompareStore( resI64[a], iv, liv2 );
														
 
															   InterlockedCompareStore( resI64[a], liv, iv2 );
														
 
															 }
														
 
															+
														
 
															+void main( uint a : A, uint b: B, uint c :C) : SV_Target
														
 
															+{
														
 
															+  dotest(a,b,c);
														
 
															+}
														
 
															+
														
 
															+[numthreads(1,1,1)]
														
 
															+void CSMain( uint3 gtid : SV_GroupThreadID)
														
 
															+{
														
 
															+  dotest(gtid.x, gtid.y, gtid.z);
														
 
															+}
														
--- a/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomic_cmpxchg_i64_and_i32.hlsl
+++ b/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomic_cmpxchg_i64_and_i32.hlsl
@@ -1,8 +1,8 @@
 
															-// RUN: %dxc -T ps_6_6 %s | FileCheck %s -check-prefix=GSCHECK
														
 
															+// RUN: %dxc -E CSMain -T cs_6_6 %s | FileCheck %s -check-prefix=GSCHECK
														
 
															 // RUN: %dxc -T ps_6_6 -DMEMTYPE=RWBuffer %s | FileCheck %s -check-prefixes=CHECK,TYCHECK
														
 
															 // RUN: %dxc -T ps_6_6 -DMEMTYPE=RWStructuredBuffer %s | FileCheck %s -check-prefix=CHECK
														
 
															-// RUN: %dxilver 1.6 | %dxc -T ps_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
														
 
															+// RUN: %dxilver 1.6 | %dxc -E CSMain -T cs_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
														
 
															 // RUN: %dxilver 1.6 | %dxc -T ps_6_5 -DMEMTYPE=RWBuffer %s | FileCheck %s -check-prefix=ERRCHECK
														
 
															 // RUN: %dxilver 1.6 | %dxc -T ps_6_5 -DMEMTYPE=RWStructuredBuffer %s | FileCheck %s -check-prefix=ERRCHECK
														
@@ -25,7 +25,7 @@ groupshared int64_t  resI64[256];
 
															 // GSCHECK: Note: shader requires additional functionality:
														
 
															 // GSCHECK: 64-bit Atomics on Group Shared
														
 
															-void main( uint a : A, uint b: B, uint c :C) : SV_Target
														
 
															+void dotest( uint a, uint b, uint c)
														
 
															 {
														
 
															   resU[a] = a;
														
 
															   resI[a] = a;
														
@@ -213,3 +213,14 @@ void main( uint a : A, uint b: B, uint c :C) : SV_Target
 
															   InterlockedCompareExchange( resI64[a], liv, iv2, oliv );
														
 
															   InterlockedCompareExchange( resI64[a], liv, liv2, oiv );
														
 
															 }
														
 
															+
														
 
															+void main( uint a : A, uint b: B, uint c :C) : SV_Target
														
 
															+{
														
 
															+  dotest(a,b,c);
														
 
															+}
														
 
															+
														
 
															+[numthreads(1,1,1)]
														
 
															+void CSMain( uint3 gtid : SV_GroupThreadID)
														
 
															+{
														
 
															+  dotest(gtid.x, gtid.y, gtid.z);
														
 
															+}
														
--- a/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomic_float.hlsl
+++ b/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomic_float.hlsl
@@ -1,4 +1,4 @@
 
															-// RUN: %dxc -T ps_6_6 %s | FileCheck %s -check-prefix=GSCHECK
														
 
															+// RUN: %dxc -E CSMain -T cs_6_6 %s | FileCheck %s -check-prefix=GSCHECK
														
 
															 // RUN: %dxc -T ps_6_6 -DMEMTYPE=RWBuffer %s | FileCheck %s -check-prefixes=CHECK,TYCHECK
														
 
															 // RUN: %dxc -T ps_6_6 -DMEMTYPE=RWStructuredBuffer %s | FileCheck %s -check-prefix=CHECK
														
@@ -12,8 +12,7 @@ groupshared int      resI[256];
 
															 groupshared int64_t  resI64[256];
														
 
															 #endif
														
 
															-
														
 
															-float4 main( uint a : A, uint b: B, uint c :C) : SV_Target
														
 
															+float4 dotest( uint a, uint b, uint c)
														
 
															 {
														
 
															   float fv = b - c;
														
 
															   float fv2 = b + c;
														
@@ -112,3 +111,15 @@ float4 main( uint a : A, uint b: B, uint c :C) : SV_Target
 
															   return ofv;
														
 
															 }
														
 
															+
														
 
															+float4 main( uint a : A, uint b: B, uint c :C) : SV_Target
														
 
															+{
														
 
															+  return dotest(a,b,c);
														
 
															+}
														
 
															+
														
 
															+RWStructuredBuffer<float4> output;
														
 
															+[numthreads(1,1,1)]
														
 
															+void CSMain( uint3 gtid : SV_GroupThreadID, uint ix : SV_GroupIndex)
														
 
															+{
														
 
															+  output[ix] = dotest(gtid.x, gtid.y, gtid.z);
														
 
															+}
														
--- a/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomic_float_errors.hlsl
+++ b/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomic_float_errors.hlsl
@@ -1,4 +1,4 @@
 
															-// RUN: %dxc -T ps_6_6 %s | FileCheck %s
														
 
															+// RUN: %dxc -T cs_6_6 %s | FileCheck %s
														
 
															 // Verify that the first arg determines the overload and the others can be what they will
														
@@ -9,8 +9,12 @@ RWBuffer<uint64_t> resBI64;
 
															 RWByteAddressBuffer Rres;
														
 
															-void main( uint a : A, uint b: B, uint c :C) : SV_Target
														
 
															+[numthreads(1,1,1)]
														
 
															+void main( uint3 gtid : SV_GroupThreadID)
														
 
															 {
														
 
															+  uint a = gtid.x;
														
 
															+  uint b = gtid.y;
														
 
															+  uint c = gtid.z;
														
 
															   resGI[a] = a;
														
 
															   resGI64[a] = a;
														
 
															   resBI[a] = a;
														
--- a/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomic_overload.hlsl
+++ b/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomic_overload.hlsl
@@ -1,19 +1,19 @@
 
															-// RUN: %dxc -no-warnings -T vs_6_0 -DTYPE=double  %s | %FileCheck %s -check-prefixes=INTFAIL,FLTFAIL
														
 
															-// RUN: %dxc -no-warnings -T vs_6_2 -DTYPE=float16_t -enable-16bit-types  %s | %FileCheck %s -check-prefixes=INTFAIL,FLTFAIL
														
 
															-// RUN: %dxc -no-warnings -T vs_6_2 -DTYPE=int16_t -enable-16bit-types  %s | %FileCheck %s -check-prefixes=INTFAIL,FLTFAIL
														
 
															-// RUN: %dxc -no-warnings -T vs_6_2 -DTYPE=uint16_t -enable-16bit-types  %s | %FileCheck %s -check-prefixes=INTFAIL,FLTFAIL
														
 
															-// RUN: %dxc -no-warnings -T vs_6_0 -DTYPE=bool  %s | %FileCheck %s -check-prefixes=INTFAIL,FLTFAIL
														
 
															-// RUN: %dxilver 1.6 | %dxc -no-warnings -T vs_6_5 -DTYPE=int64_t  %s | %FileCheck %s -check-prefix=VALFAIL
														
 
															-// RUN: %dxilver 1.6 | %dxc -no-warnings -T vs_6_5 -DTYPE=uint64_t  %s | %FileCheck %s -check-prefix=VALFAIL
														
 
															+// RUN: %dxc -no-warnings -T cs_6_0 -DTYPE=double  %s | %FileCheck %s -check-prefixes=INTFAIL,FLTFAIL
														
 
															+// RUN: %dxc -no-warnings -T cs_6_2 -DTYPE=float16_t -enable-16bit-types  %s | %FileCheck %s -check-prefixes=INTFAIL,FLTFAIL
														
 
															+// RUN: %dxc -no-warnings -T cs_6_2 -DTYPE=int16_t -enable-16bit-types  %s | %FileCheck %s -check-prefixes=INTFAIL,FLTFAIL
														
 
															+// RUN: %dxc -no-warnings -T cs_6_2 -DTYPE=uint16_t -enable-16bit-types  %s | %FileCheck %s -check-prefixes=INTFAIL,FLTFAIL
														
 
															+// RUN: %dxc -no-warnings -T cs_6_0 -DTYPE=bool  %s | %FileCheck %s -check-prefixes=INTFAIL,FLTFAIL
														
 
															+// RUN: %dxilver 1.6 | %dxc -no-warnings -T cs_6_5 -DTYPE=int64_t  %s | %FileCheck %s -check-prefix=VALFAIL
														
 
															+// RUN: %dxilver 1.6 | %dxc -no-warnings -T cs_6_5 -DTYPE=uint64_t  %s | %FileCheck %s -check-prefix=VALFAIL
														
 
															-// RUN: %dxc -no-warnings -T vs_6_0 -DTYPE=float  %s | %FileCheck %s -check-prefixes=INTFAIL,
														
 
															-// RUN: %dxc -no-warnings -T vs_6_0 -DTYPE=half  %s | %FileCheck %s -check-prefixes=INTFAIL
														
 
															+// RUN: %dxc -no-warnings -T cs_6_0 -DTYPE=float  %s | %FileCheck %s -check-prefixes=INTFAIL,
														
 
															+// RUN: %dxc -no-warnings -T cs_6_0 -DTYPE=half  %s | %FileCheck %s -check-prefixes=INTFAIL
														
 
															-// RUN: %dxc -no-warnings -T vs_6_6 -DTYPE=int64_t  %s | %FileCheck %s -check-prefixes=INTCHK
														
 
															-// RUN: %dxc -no-warnings -T vs_6_6 -DTYPE=uint64_t  %s | %FileCheck %s -check-prefixes=INTCHK
														
 
															-// RUN: %dxc -no-warnings -T vs_6_0 -DTYPE=int  %s | %FileCheck %s -check-prefixes=INTCHK
														
 
															-// RUN: %dxc -no-warnings -T vs_6_0 -DTYPE=uint  %s | %FileCheck %s -check-prefixes=INTCHK
														
 
															+// RUN: %dxc -no-warnings -T cs_6_6 -DTYPE=int64_t  %s | %FileCheck %s -check-prefixes=INTCHK
														
 
															+// RUN: %dxc -no-warnings -T cs_6_6 -DTYPE=uint64_t  %s | %FileCheck %s -check-prefixes=INTCHK
														
 
															+// RUN: %dxc -no-warnings -T cs_6_0 -DTYPE=int  %s | %FileCheck %s -check-prefixes=INTCHK
														
 
															+// RUN: %dxc -no-warnings -T cs_6_0 -DTYPE=uint  %s | %FileCheck %s -check-prefixes=INTCHK
														
 
															 // Test various Interlocked ops using different memory types with invalid types
														
@@ -22,7 +22,10 @@ RWBuffer<TYPE> rw_res;
 
															 groupshared TYPE gs_res;
														
 
															 RWByteAddressBuffer ba_res;
														
 
															-float main() :OUT{
														
 
															+RWStructuredBuffer<float4> output;
														
 
															+
														
 
															+[numthreads(1,1,1)]
														
 
															+void main(uint ix : SV_GroupIndex) {
														
 
															   int val = 1;
														
 
															   TYPE comp = 1;
														
 
															   TYPE orig;
														
@@ -213,5 +216,5 @@ float main() :OUT{
 
															   InterlockedCompareExchange(rw_res[0], comp, val, orig);
														
 
															   InterlockedCompareExchange(gs_res, comp, val, orig);
														
 
															-  return (float)rw_res[0] + gs_res;
														
 
															+  output[ix] = (float)rw_res[0] + gs_res;
														
 
															 }
														
--- a/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomic_xchg_i64_and_i32.hlsl
+++ b/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomic_xchg_i64_and_i32.hlsl
@@ -1,8 +1,8 @@
 
															-// RUN: %dxc -T ps_6_6 %s | FileCheck %s -check-prefix=GSCHECK
														
 
															+// RUN: %dxc -E CSMain -T cs_6_6 %s | FileCheck %s -check-prefix=GSCHECK
														
 
															 // RUN: %dxc -T ps_6_6 -DMEMTYPE=RWBuffer %s | FileCheck %s -check-prefixes=CHECK,TYCHECK
														
 
															 // RUN: %dxc -T ps_6_6 -DMEMTYPE=RWStructuredBuffer %s | FileCheck %s -check-prefix=CHECK
														
 
															-// RUN: %dxilver 1.6 | %dxc -T ps_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
														
 
															+// RUN: %dxilver 1.6 | %dxc -E CSMain -T cs_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
														
 
															 // RUN: %dxilver 1.6 | %dxc -T ps_6_5 -DMEMTYPE=RWBuffer %s | FileCheck %s -check-prefix=ERRCHECK
														
 
															 // RUN: %dxilver 1.6 | %dxc -T ps_6_5 -DMEMTYPE=RWStructuredBuffer %s | FileCheck %s -check-prefix=ERRCHECK
														
@@ -25,7 +25,7 @@ groupshared int64_t  resI64[256];
 
															 // GSCHECK: Note: shader requires additional functionality:
														
 
															 // GSCHECK: 64-bit Atomics on Group Shared
														
 
															-void main( uint a : A, uint b: B, uint c :C) : SV_Target
														
 
															+void dotest( uint a, uint b, uint c)
														
 
															 {
														
 
															   resU[a] = a;
														
 
															   resI[a] = a;
														
@@ -183,3 +183,14 @@ void main( uint a : A, uint b: B, uint c :C) : SV_Target
 
															   InterlockedExchange( resI64[a], iv, liv2 );
														
 
															   InterlockedExchange( resI64[a], liv, iv2 );
														
 
															 }
														
 
															+
														
 
															+void main( uint a : A, uint b: B, uint c :C) : SV_Target
														
 
															+{
														
 
															+  dotest(a,b,c);
														
 
															+}
														
 
															+
														
 
															+[numthreads(1,1,1)]
														
 
															+void CSMain( uint3 gtid : SV_GroupThreadID)
														
 
															+{
														
 
															+  dotest(gtid.x, gtid.y, gtid.z);
														
 
															+}
														
--- a/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomicop_float.hlsl
+++ b/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomicop_float.hlsl
@@ -1,10 +1,11 @@
 
															-// RUN: %dxc -T ps_6_6 %s | FileCheck %s
														
 
															+// RUN: %dxc -T cs_6_6 %s | FileCheck %s
														
 
															 groupshared float   resG[256];
														
 
															 RWBuffer<float>     resB;
														
 
															 RWStructuredBuffer<float> resS;
														
 
															-void main( float a : A, int b: B, float c :C) : SV_Target
														
 
															+[numthreads(1,1,1)]
														
 
															+void main( float a : A, int b: B, float c :C)
														
 
															 {
														
 
															   // Test some disallowed atomic binop intrinsics with floats as both args
														
--- a/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomicop_i64.hlsl
+++ b/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomicop_i64.hlsl
@@ -1,4 +1,4 @@
 
															-// RUN: %dxc -T ps_6_6 %s | FileCheck %s
														
 
															+// RUN: %dxc -T cs_6_6 %s | FileCheck %s
														
 
															 // A test to verify that 64-bit atomic binary operation intrinsics select the right variant
														
@@ -11,8 +11,11 @@ groupshared uint64_t ugs[256];
 
															 RWBuffer<uint64_t> utb;
														
 
															 RWStructuredBuffer<uint64_t> usb;
														
 
															-void main( uint a : A, uint b: B) : SV_Target
														
 
															+[numthreads(1,1,1)]
														
 
															+void main( uint3 gtid : SV_GroupThreadID)
														
 
															 {
														
 
															+  uint a = gtid.x;
														
 
															+  uint b = gtid.y;
														
 
															   uint64_t luv = a * b;
														
 
															   int64_t liv = a + b;
														
 
															   uint ix = 0;
														
--- a/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomicop_i64_and_i32.hlsl
+++ b/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomicop_i64_and_i32.hlsl
@@ -1,9 +1,9 @@
 
															-// RUN: %dxc -DINTRIN=InterlockedAdd -T ps_6_6 %s | FileCheck %s -check-prefix=GSCHECK
														
 
															-// RUN: %dxc -DINTRIN=InterlockedMin -T ps_6_6 %s | FileCheck %s -check-prefix=GSCHECK
														
 
															-// RUN: %dxc -DINTRIN=InterlockedMax -T ps_6_6 %s | FileCheck %s -check-prefix=GSCHECK
														
 
															-// RUN: %dxc -DINTRIN=InterlockedAnd -T ps_6_6 %s | FileCheck %s -check-prefix=GSCHECK
														
 
															-// RUN: %dxc -DINTRIN=InterlockedOr -T ps_6_6 %s | FileCheck %s -check-prefix=GSCHECK
														
 
															-// RUN: %dxc -DINTRIN=InterlockedXor -T ps_6_6 %s | FileCheck %s -check-prefix=GSCHECK
														
 
															+// RUN: %dxc -DINTRIN=InterlockedAdd -E CSMain -T cs_6_6 %s | FileCheck %s -check-prefix=GSCHECK
														
 
															+// RUN: %dxc -DINTRIN=InterlockedMin -E CSMain -T cs_6_6 %s | FileCheck %s -check-prefix=GSCHECK
														
 
															+// RUN: %dxc -DINTRIN=InterlockedMax -E CSMain -T cs_6_6 %s | FileCheck %s -check-prefix=GSCHECK
														
 
															+// RUN: %dxc -DINTRIN=InterlockedAnd -E CSMain -T cs_6_6 %s | FileCheck %s -check-prefix=GSCHECK
														
 
															+// RUN: %dxc -DINTRIN=InterlockedOr -E CSMain -T cs_6_6 %s | FileCheck %s -check-prefix=GSCHECK
														
 
															+// RUN: %dxc -DINTRIN=InterlockedXor -E CSMain -T cs_6_6 %s | FileCheck %s -check-prefix=GSCHECK
														
 
															 // RUN: %dxc -DMEMTYPE=RWBuffer -DINTRIN=InterlockedAdd -T ps_6_6 %s | FileCheck %s -check-prefixes=CHECK,TYCHECK
														
 
															 // RUN: %dxc -DMEMTYPE=RWBuffer -DINTRIN=InterlockedMin -T ps_6_6 %s | FileCheck %s -check-prefixes=CHECK,TYCHECK
														
@@ -26,12 +26,12 @@
 
															 // RUN: %dxilver 1.6 | %dxc -DMEMTYPE=RWBuffer -DINTRIN=InterlockedOr -T ps_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
														
 
															 // RUN: %dxilver 1.6 | %dxc -DMEMTYPE=RWBuffer -DINTRIN=InterlockedXor -T ps_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
														
 
															-// RUN: %dxilver 1.6 | %dxc -DINTRIN=InterlockedAdd -T ps_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
														
 
															-// RUN: %dxilver 1.6 | %dxc -DINTRIN=InterlockedMin -T ps_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
														
 
															-// RUN: %dxilver 1.6 | %dxc -DINTRIN=InterlockedMax -T ps_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
														
 
															-// RUN: %dxilver 1.6 | %dxc -DINTRIN=InterlockedAnd -T ps_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
														
 
															-// RUN: %dxilver 1.6 | %dxc -DINTRIN=InterlockedOr -T ps_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
														
 
															-// RUN: %dxilver 1.6 | %dxc -DINTRIN=InterlockedXor -T ps_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
														
 
															+// RUN: %dxilver 1.6 | %dxc -DINTRIN=InterlockedAdd -E CSMain -T cs_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
														
 
															+// RUN: %dxilver 1.6 | %dxc -DINTRIN=InterlockedMin -E CSMain -T cs_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
														
 
															+// RUN: %dxilver 1.6 | %dxc -DINTRIN=InterlockedMax -E CSMain -T cs_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
														
 
															+// RUN: %dxilver 1.6 | %dxc -DINTRIN=InterlockedAnd -E CSMain -T cs_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
														
 
															+// RUN: %dxilver 1.6 | %dxc -DINTRIN=InterlockedOr -E CSMain -T cs_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
														
 
															+// RUN: %dxilver 1.6 | %dxc -DINTRIN=InterlockedXor -E CSMain -T cs_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
														
 
															 // Verify that the first arg determines the overload and the others can be what they will
														
@@ -52,7 +52,7 @@ groupshared int64_t  resI64[256];
 
															 // GSCHECK: Note: shader requires additional functionality:
														
 
															 // GSCHECK: 64-bit Atomics on Group Shared
														
 
															-void main( uint a : A, uint b: B, uint c :C) : SV_Target
														
 
															+void dotest( uint a, uint b, uint c)
														
 
															 {
														
 
															   resU[a] = a;
														
 
															   resI[a] = a;
														
@@ -126,3 +126,14 @@ void main( uint a : A, uint b: B, uint c :C) : SV_Target
 
															   INTRIN( resU64[a], 3.0 );
														
 
															   INTRIN( resI64[a], 4.0 );
														
 
															 }
														
 
															+
														
 
															+void main( uint a : A, uint b: B, uint c :C) : SV_Target
														
 
															+{
														
 
															+  dotest(a,b,c);
														
 
															+}
														
 
															+
														
 
															+[numthreads(1,1,1)]
														
 
															+void CSMain( uint3 gtid : SV_GroupThreadID)
														
 
															+{
														
 
															+  dotest(gtid.x, gtid.y, gtid.z);
														
 
															+}
														
--- a/tools/clang/test/HLSLFileCheck/hlsl/types/conversions/varmods-syntax_Mod.hlsl
+++ b/tools/clang/test/HLSLFileCheck/hlsl/types/conversions/varmods-syntax_Mod.hlsl
@@ -1,4 +1,4 @@
 
															-// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
														
 
															+// RUN: %dxc -E main -T cs_6_0 %s | FileCheck %s
														
 
															 // CHECK: @main
														
@@ -288,6 +288,7 @@ float4 foo_interpolation_different_decl(sample float4 val) {
 
															 //////////////////////////////////////////////////////////////////////////////
														
 
															 // Locals.
														
 
															+[numthreads(1,1,1)]
														
 
															 void main() {
														
 
															     // <py::lines('GENERATED_CODE')>modify(lines, gen_code('%(mods)s float l_%(id)s;', storage_combos))</py>
														
 
															     // GENERATED_CODE:BEGIN
														
--- a/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix_subscript.hlsl
+++ b/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix_subscript.hlsl
@@ -2,10 +2,15 @@
 
															 // RUN: %dxc -DMIDX=i -DVIDX=2 -T ps_6_0 %s | FileCheck %s
														
 
															 // RUN: %dxc -DMIDX=1 -DVIDX=j -T ps_6_0 %s | FileCheck %s
														
 
															 // RUN: %dxc -DMIDX=i -DVIDX=j -T ps_6_0 %s | FileCheck %s
														
 
															-// RUN: %dxc -DMIDX=1 -DVIDX=2 -T lib_6_3 %s | FileCheck %s
														
 
															-// RUN: %dxc -DMIDX=i -DVIDX=2 -T lib_6_3 %s | FileCheck %s
														
 
															-// RUN: %dxc -DMIDX=1 -DVIDX=j -T lib_6_3 %s | FileCheck %s
														
 
															-// RUN: %dxc -DMIDX=i -DVIDX=j -T lib_6_3 %s | FileCheck %s
														
 
															+
														
 
															+// RUN: %dxc -DMIDX=1 -DVIDX=2 -T cs_6_0 -E CSMain -DGS %s | FileCheck %s -check-prefix=CSCHK
														
 
															+// RUN: %dxc -DMIDX=i -DVIDX=2 -T cs_6_0 -E CSMain -DGS %s | FileCheck %s -check-prefix=CSCHK
														
 
															+// RUN: %dxc -DMIDX=1 -DVIDX=j -T cs_6_0 -E CSMain -DGS %s | FileCheck %s -check-prefix=CSCHK
														
 
															+// RUN: %dxc -DMIDX=i -DVIDX=j -T cs_6_0 -E CSMain -DGS %s | FileCheck %s -check-prefix=CSCHK
														
 
															+// RUN: %dxc -DMIDX=1 -DVIDX=2 -T lib_6_3 %s -DGS | FileCheck %s -check-prefixes=CSCHK,CHECK
														
 
															+// RUN: %dxc -DMIDX=i -DVIDX=2 -T lib_6_3 %s -DGS | FileCheck %s -check-prefixes=CSCHK,CHECK
														
 
															+// RUN: %dxc -DMIDX=1 -DVIDX=j -T lib_6_3 %s -DGS | FileCheck %s -check-prefixes=CSCHK,CHECK
														
 
															+// RUN: %dxc -DMIDX=i -DVIDX=j -T lib_6_3 %s -DGS | FileCheck %s -check-prefixes=CSCHK,CHECK
														
 
															 // Test for general subscript operations on matrix arrays.
														
 
															 // Specifically focused on shader inputs which failed to lower previously
														
@@ -26,6 +31,26 @@ struct MtxArray {
 
															   float3x3 mtx[2];
														
 
															 };
														
 
															+RWStructuredBuffer<float3> output;
														
 
															+
														
 
															+[shader("compute")]
														
 
															+[numthreads(8,8,1)]
														
 
															+void CSMain(uint3 gtid : SV_GroupThreadID, uint ix : SV_GroupIndex)
														
 
															+{
														
 
															+  float3 ret = 0.0;
														
 
															+  uint i = gtid.x;
														
 
															+  uint j = gtid.y;
														
 
															+
														
 
															+  // CSCHK: load float, float addrspace(3)*
														
 
															+  // CSCHK: load float, float addrspace(3)*
														
 
															+  // CSCHK: load float, float addrspace(3)*
														
 
															+  ret += gs[MIDX][VIDX];
														
 
															+
														
 
															+  ret += GetRow(gs[MIDX], VIDX);
														
 
															+
														
 
															+  output[ix] = ret;
														
 
															+}
														
 
															+
														
 
															 [shader("pixel")]
														
 
															 float3 main(const int i : I, const int j : J, const float3x3 m[2]: M, JustMtx jm[2] : JM, MtxArray ma : A) : SV_Target
														
 
															 {
														
@@ -36,11 +61,6 @@ float3 main(const int i : I, const int j : J, const float3x3 m[2]: M, JustMtx jm
 
															   // CHECK: call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32
														
 
															   ret += g[MIDX][VIDX];
														
 
															-  // CHECK: load float, float addrspace(3)*
														
 
															-  // CHECK: load float, float addrspace(3)*
														
 
															-  // CHECK: load float, float addrspace(3)*
														
 
															-  ret += gs[MIDX][VIDX];
														
 
															-
														
 
															   // CHECK: call float @dx.op.loadInput.f32(i32 4, i32 2, i32 {{%?[0-9]*}}, i8 2, i32 undef)
														
 
															   // CHECK: call float @dx.op.loadInput.f32(i32 4, i32 2, i32 {{%?[0-9]*}}, i8 2, i32 undef)
														
 
															   // CHECK: call float @dx.op.loadInput.f32(i32 4, i32 2, i32 {{%?[0-9]*}}, i8 2, i32 undef)
														
@@ -57,7 +77,6 @@ float3 main(const int i : I, const int j : J, const float3x3 m[2]: M, JustMtx jm
 
															   ret += ma.mtx[MIDX][VIDX];
														
 
															   ret += GetRow(g[MIDX], VIDX);
														
 
															-  ret += GetRow(gs[MIDX], VIDX);
														
 
															   ret += GetRow(m[MIDX], VIDX);
														
 
															   ret += GetRow(jm[MIDX].mtx, VIDX);
														
 
															   ret += GetRow(ma.mtx[MIDX], VIDX);
														
@@ -67,3 +86,4 @@ float3 main(const int i : I, const int j : J, const float3x3 m[2]: M, JustMtx jm
 
															   // CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %{{.*}})
														
 
															   return ret;
														
 
															 }
														
 
															+
														
--- a/tools/clang/test/HLSLFileCheck/hlsl/types/modifiers/global/global-var-write-test05.hlsl
+++ b/tools/clang/test/HLSLFileCheck/hlsl/types/modifiers/global/global-var-write-test05.hlsl
@@ -1,14 +1,18 @@
 
															-// RUN: %dxc -E main -T ps_6_0 /Gec -HV 2016 > %s | FileCheck %s
														
 
															+// RUN: %dxc -E main -T cs_6_0 /Gec -HV 2016 > %s | FileCheck %s
														
 
															 // CHECK: define void @main()
														
 
															 // CHECK: ret void
														
 
															+Texture2D<float3> InColor : register(t0);
														
 
															 RWTexture2D<float3> Color : register(u0);
														
 
															+RWTexture2D<float3> OutColor : register(u1);
														
 
															 groupshared uint PixelCountH;
														
 
															-uint main( uint2 a : A, float3 b : B ) : SV_Target
														
 
															+[numthreads(64,16,1)]
														
 
															+void main( uint3 gtid : SV_GroupThreadID )
														
 
															 {
														
 
															- Color[a] = b; 
														
 
															+ uint2 a = gtid.xy;
														
 
															+ Color[a] = InColor[a];
														
 
															  PixelCountH = Color[a].x * 1;
														
 
															- return PixelCountH;
														
 
															+ OutColor[a] = PixelCountH;
														
 
															 }
														
--- a/tools/clang/test/HLSLFileCheck/hlsl/types/modifiers/groupshared/groupshared_shadermodels.hlsl
+++ b/tools/clang/test/HLSLFileCheck/hlsl/types/modifiers/groupshared/groupshared_shadermodels.hlsl
@@ -0,0 +1,217 @@
 
															+// RUN: %dxc -E PSMain -T ps_6_0 %s | FileCheck %s
														
 
															+// RUN: %dxc -E VSMain -T vs_6_0 %s | FileCheck %s
														
 
															+// RUN: %dxc -E GSMain -T gs_6_0 %s | FileCheck %s
														
 
															+// RUN: %dxc -E HSMain -T hs_6_0 %s | FileCheck %s
														
 
															+// RUN: %dxc -E DSMain -T ds_6_0 %s | FileCheck %s
														
 
															+// RUN: %dxc -E CSMain -T lib_6_5 %s | FileCheck %s -check-prefix=LIBCHK
														
 
															+// RUN: %dxc -E CSMain -T cs_6_0 %s | FileCheck %s -check-prefix=CSCHK
														
 
															+// RUN: %dxc -E MSMain -T ms_6_5 %s | FileCheck %s -check-prefix=CSCHK
														
 
															+// RUN: %dxc -E ASMain -T as_6_5 %s | FileCheck %s -check-prefix=CSCHK
														
 
															+
														
 
															+// Test that the proper error for groupshared is produced when compiling in non-compute contexts
														
 
															+// and that everything is fine when we are
														
 
															+
														
 
															+
														
 
															+// CSCHK: @[[gs:.*]] = addrspace(3) global float
														
 
															+
														
 
															+// CHECK: error: Thread Group Shared Memory not supported in Shader Model
														
 
															+// CHECK: error: Thread Group Shared Memory not supported in Shader Model
														
 
															+// CHECK: error: Thread Group Shared Memory not supported in Shader Model
														
 
															+// CHECK: error: Thread Group Shared Memory not supported in Shader Model
														
 
															+groupshared float4 foo;
														
 
															+
														
 
															+RWStructuredBuffer<float4> output;
														
 
															+
														
 
															+int4 getit()
														
 
															+{
														
 
															+  // CSCHK: load float, float addrspace(3)* @[[gs]]
														
 
															+  return foo;
														
 
															+}
														
 
															+
														
 
															+// LIBCHK: error: Thread Group Shared Memory not supported from non-compute entry points.
														
 
															+// LIBCHK: of function 'VSMain'
														
 
															+[shader("vertex")]
														
 
															+float4 VSMain(uint ix : SV_VertexID) : OUT {
														
 
															+  output[ix] = getit();
														
 
															+  return 1.0;
														
 
															+}
														
 
															+
														
 
															+// LIBCHK: error: Thread Group Shared Memory not supported from non-compute entry points.
														
 
															+// LIBCHK: of function 'PSMain'
														
 
															+[shader("pixel")]
														
 
															+float4 PSMain(uint ix : SV_PrimitiveID) : SV_TARGET {
														
 
															+  output[ix] = getit();
														
 
															+  return 1.0;
														
 
															+}
														
 
															+
														
 
															+[shader("compute")]
														
 
															+[NumThreads(32, 32, 1)]
														
 
															+void CSMain(uint ix : SV_GroupIndex) {
														
 
															+  output[ix] = getit();
														
 
															+}
														
 
															+
														
 
															+struct payload_t { int nothing; };
														
 
															+
														
 
															+
														
 
															+[shader("amplification")]
														
 
															+[NumThreads(8, 8, 2)]
														
 
															+void ASMain(uint ix : SV_GroupIndex) {
														
 
															+  output[ix] = getit();
														
 
															+  payload_t p = {0};
														
 
															+  DispatchMesh(1, 1, 1, p);
														
 
															+}
														
 
															+
														
 
															+[shader("mesh")]
														
 
															+[NumThreads(8, 8, 2)]
														
 
															+[OutputTopology("triangle")]
														
 
															+void MSMain(uint ix : SV_GroupIndex) {
														
 
															+  output[ix] = getit();
														
 
															+}
														
 
															+
														
 
															+struct PosStruct {
														
 
															+  float4 pos : SV_Position;
														
 
															+};
														
 
															+
														
 
															+float4 a;
														
 
															+
														
 
															+// LIBCHK: error: Thread Group Shared Memory not supported from non-compute entry points.
														
 
															+// LIBCHK: of function 'GSMain'
														
 
															+[shader("geometry")]
														
 
															+[maxvertexcount(1)]
														
 
															+void GSMain(triangle float4 array[3] : SV_Position, uint ix : SV_GSInstanceID,
														
 
															+            inout PointStream<PosStruct> OutputStream)
														
 
															+{
														
 
															+  output[ix] = getit();
														
 
															+  PosStruct s;
														
 
															+  s.pos = a;
														
 
															+  OutputStream.Append(s);
														
 
															+  OutputStream.RestartStrip();
														
 
															+}
														
 
															+
														
 
															+struct PCStruct
														
 
															+{
														
 
															+  float Edges[3]  : SV_TessFactor;
														
 
															+  float Inside : SV_InsideTessFactor;
														
 
															+  float4 test : TEST;
														
 
															+};
														
 
															+
														
 
															+PCStruct HSPatch(InputPatch<PosStruct, 3> ip,
														
 
															+                 OutputPatch<PosStruct, 3> op,
														
 
															+                 uint ix : SV_PrimitiveID)
														
 
															+{
														
 
															+  output[ix] = getit();
														
 
															+  PCStruct a;
														
 
															+  a.Edges[0] = ip[0].pos.w;
														
 
															+  a.Edges[1] = ip[0].pos.w;
														
 
															+  a.Edges[2] = ip[0].pos.w;
														
 
															+  a.Inside = ip[0].pos.w;
														
 
															+  return a;
														
 
															+}
														
 
															+
														
 
															+// LIBCHK: error: Thread Group Shared Memory not supported from non-compute entry points.
														
 
															+// LIBCHK: of function 'HSMain'
														
 
															+[shader("hull")]
														
 
															+[domain("tri")]
														
 
															+[partitioning("fractional_odd")]
														
 
															+[outputtopology("triangle_cw")]
														
 
															+[outputcontrolpoints(3)]
														
 
															+[patchconstantfunc("HSPatch")]
														
 
															+PosStruct HSMain(InputPatch<PosStruct, 3> p,
														
 
															+                 uint ix : SV_OutputControlPointID)
														
 
															+{
														
 
															+  output[ix] = getit();
														
 
															+  PosStruct s;
														
 
															+  s.pos = p[ix].pos;
														
 
															+  return s;
														
 
															+}
														
 
															+
														
 
															+// LIBCHK: error: Thread Group Shared Memory not supported from non-compute entry points.
														
 
															+// LIBCHK: of function 'DSMain'
														
 
															+[shader("domain")]
														
 
															+[domain("tri")]
														
 
															+PosStruct DSMain(const OutputPatch<PosStruct, 3> patch,
														
 
															+                 uint ix : SV_PrimitiveID)
														
 
															+{
														
 
															+  output[ix] = getit();
														
 
															+  PosStruct v;
														
 
															+  v.pos = patch[0].pos;
														
 
															+  return v;
														
 
															+}
														
 
															+
														
 
															+struct MyPayload {
														
 
															+  float4 color;
														
 
															+  uint3 pos;
														
 
															+};
														
 
															+
														
 
															+struct MyAttributes {
														
 
															+  float2 bary;
														
 
															+  uint id;
														
 
															+};
														
 
															+
														
 
															+struct MyParam {
														
 
															+  float2 coord;
														
 
															+  float4 output;
														
 
															+};
														
 
															+
														
 
															+
														
 
															+RaytracingAccelerationStructure RTAS : register(t5);
														
 
															+
														
 
															+// LIBCHK: error: Thread Group Shared Memory not supported from non-compute entry points.
														
 
															+// LIBCHK: of function {{.*}}RGMain
														
 
															+[shader("raygeneration")]
														
 
															+void RGMain()
														
 
															+{
														
 
															+  MyPayload p = (MyPayload)0;
														
 
															+  p.pos = DispatchRaysIndex();
														
 
															+  p.color = getit();
														
 
															+  float3 origin = {0, 0, 0};
														
 
															+  float3 dir = normalize(p.pos / (float)DispatchRaysDimensions());
														
 
															+  RayDesc ray = { origin, 0.125, dir, 128.0};
														
 
															+  TraceRay(RTAS, RAY_FLAG_NONE, 0, 0, 1, 0, ray, p);
														
 
															+}
														
 
															+
														
 
															+// LIBCHK: error: Thread Group Shared Memory not supported from non-compute entry points.
														
 
															+// LIBCHK: of function {{.*}}ISMain
														
 
															+[shader("intersection")]
														
 
															+void ISMain()
														
 
															+{
														
 
															+  float hitT = RayTCurrent();
														
 
															+  MyAttributes attr = (MyAttributes)0;
														
 
															+  attr.bary = getit().xy;
														
 
															+  bool bReported = ReportHit(hitT, 0, attr);
														
 
															+}
														
 
															+
														
 
															+// LIBCHK: error: Thread Group Shared Memory not supported from non-compute entry points.
														
 
															+// LIBCHK: of function {{.*}}AHMain
														
 
															+[shader("anyhit")]
														
 
															+void AHMain( inout MyPayload payload : SV_RayPayload,
														
 
															+             in MyAttributes attr : SV_IntersectionAttributes )
														
 
															+{
														
 
															+  float3 hitLocation = ObjectRayOrigin() + ObjectRayDirection() * RayTCurrent();
														
 
															+  if (hitLocation.z < attr.bary.x)
														
 
															+    AcceptHitAndEndSearch();         // aborts function
														
 
															+  if (hitLocation.z < attr.bary.y)
														
 
															+    IgnoreHit();   // aborts function
														
 
															+  payload.color += getit();
														
 
															+}
														
 
															+
														
 
															+// LIBCHK: error: Thread Group Shared Memory not supported from non-compute entry points.
														
 
															+// LIBCHK: of function {{.*}}CHMain
														
 
															+[shader("closesthit")]
														
 
															+void CHMain( inout MyPayload payload : SV_RayPayload,
														
 
															+             in BuiltInTriangleIntersectionAttributes attr : SV_IntersectionAttributes )
														
 
															+{
														
 
															+  MyParam param = {attr.barycentrics, getit()};
														
 
															+  CallShader(7, param);
														
 
															+  payload.color += param.output;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+// LIBCHK: error: Thread Group Shared Memory not supported from non-compute entry points.
														
 
															+// LIBCHK: of function {{.*}}MissMain
														
 
															+[shader("miss")]
														
 
															+void MissMain(inout MyPayload payload : SV_RayPayload)
														
 
															+{
														
 
															+  payload.color = getit();
														
 
															+}
														
 
															+
														
--- a/tools/clang/test/HLSLFileCheck/hlsl/types/modifiers/groupshared/this_ptr_address_space.hlsl
+++ b/tools/clang/test/HLSLFileCheck/hlsl/types/modifiers/groupshared/this_ptr_address_space.hlsl
@@ -1,4 +1,4 @@
 
															-// RUN: %dxc -E main -T vs_6_2 %s | FileCheck %s
														
 
															+// RUN: %dxc -E main -T cs_6_2 %s | FileCheck %s
														
 
															 // Test that the address space of the this pointer is honored
														
 
															 // when accessing data members or calling member functions.
														
@@ -10,9 +10,12 @@ int i, j;
 
															 // CHECK: @[[gs:.*]] = addrspace(3) global [2 x i32] undef
														
 
															 groupshared Foo foo[2];
														
 
															-int4 main() : OUT
														
 
															+RWStructuredBuffer<int4> output;
														
 
															+
														
 
															+[numthreads(8,8,1)]
														
 
															+void main( uint gidx : SV_GroupIndex )
														
 
															 {
														
 
															-  return int4(
														
 
															+  output[gidx] = int4(
														
 
															     // getelementptr & addrspacecast constant expressions
														
 
															     // CHECK: load i32, i32 addrspace(3)* getelementptr inbounds ([2 x i32], [2 x i32] addrspace(3)* @[[gs]], i32 0, i32 0)
														
 
															     foo[0].x, 
														
--- a/tools/clang/test/HLSLFileCheck/passes/dxil/dxil_cleanup_addrspacecast/remove-addrspacecastinst.hlsl
+++ b/tools/clang/test/HLSLFileCheck/passes/dxil/dxil_cleanup_addrspacecast/remove-addrspacecastinst.hlsl
@@ -1,4 +1,4 @@
 
															-// RUN: %dxc -E main -T vs_6_0 %s | FileCheck %s
														
 
															+// RUN: %dxc -E main -T cs_6_0 %s | FileCheck %s
														
 
															 // CHECK: @main()
														
 
															 // CHECK-NOT: addrspacecast
														
@@ -6,5 +6,7 @@
 
															 struct Foo { int x; int getX() { return x; } };
														
 
															 groupshared Foo foo[2];
														
 
															+RWStructuredBuffer<int> output;
														
 
															 int i;
														
 
															-int main() : OUT { return foo[i].getX(); }
														
 
															+[numthreads(1,1,1)]
														
 
															+void main() { output[i] =  foo[i].getX(); }
														
--- a/tools/clang/test/HLSLFileCheck/passes/dxil/dxil_o0_legalize/store_undef.hlsl
+++ b/tools/clang/test/HLSLFileCheck/passes/dxil/dxil_o0_legalize/store_undef.hlsl
@@ -1,4 +1,4 @@
 
															-// RUN: %dxc %s -T ps_6_0 -Od | FileCheck %s
														
 
															+// RUN: %dxc %s -T cs_6_0 -Od | FileCheck %s
														
 
															 // Regression test for validation failure in O0 due to
														
 
															 // storing structure with uninitialized member.
														
@@ -39,11 +39,10 @@ float bar(Foo f) {
 
															   return f.e;
														
 
															 }
														
 
															-float main(uint3 off : OFF) : SV_Target {
														
 
															+RWStructuredBuffer<float> output;
														
 
															+
														
 
															+[numthreads(1,1,1)]
														
 
															+void main() {
														
 
															   foo(1, 2, 0);
														
 
															-  return bar(foos[3]);
														
 
															+  output[0] = bar(foos[3]);
														
 
															 }
														
 
															-
														
 
															-
														
 
															-
														
 
															-
														
--- a/tools/clang/test/HLSLFileCheck/passes/hl/sroa_hlsl/groupshared_array_struct_matrix_regression.hlsl
+++ b/tools/clang/test/HLSLFileCheck/passes/hl/sroa_hlsl/groupshared_array_struct_matrix_regression.hlsl
@@ -1,4 +1,4 @@
 
															-// RUN: %dxc -E main -T vs_6_2 %s | FileCheck %s
														
 
															+// RUN: %dxc -E main -T cs_6_2 %s | FileCheck %s
														
 
															 // Regression test for GitHub #1631, where SROA would generate more uses
														
 
															 // of a value while processing it (due to expanding a memcpy) and fail
														
@@ -11,4 +11,5 @@
 
															 struct S { int1x1 x, y; };
														
 
															 groupshared S gs[1];
														
 
															 void f(S s[1]) {}
														
 
															+[numthreads(1,1,1)]
														
 
															 void main() { f(gs); }
														
--- a/utils/hct/hctdb.py
+++ b/utils/hct/hctdb.py
@@ -2582,6 +2582,7 @@ class db_dxil(object):
 
															         self.add_valrule("Sm.ThreadGroupChannelRange", "Declared Thread Group %0 size %1 outside valid range [%2..%3].")
														
 
															         self.add_valrule("Sm.MaxTheadGroup", "Declared Thread Group Count %0 (X*Y*Z) is beyond the valid maximum of %1.")
														
 
															         self.add_valrule("Sm.MaxTGSMSize", "Total Thread Group Shared Memory storage is %0, exceeded %1.")
														
 
															+        self.add_valrule("Sm.TGSMUnsupported", "Thread Group Shared Memory not supported %0.")
														
 
															         self.add_valrule("Sm.WaveSizeValue", "Declared WaveSize %0 outside valid range [%1..%2], or not a power of 2.")
														
 
															         self.add_valrule("Sm.WaveSizeNeedsDxil16Plus", "WaveSize is valid only for DXIL version 1.6 and higher.")
														
 
															         self.add_valrule("Sm.ROVOnlyInPS", "RasterizerOrdered objects are only allowed in 5.0+ pixel shaders.")