Przeglądaj źródła

Error for groupshared outside of compute (#3472)

groupshared variables shouldn't be allowed outside of compute and
compute-like shaders. This adds a validation error when such
variables are used.

Add shader mask where groupshared are used

Correct several tests that used groupshared incorrectly

Incidental fix to atomics tests where the special case initializations
might have allowed the threads that failed to write to proceed
before the write took place and read the improperly initialized values

Changed up the assignments to avoid write collisions and be more
readable

Fixes #2603, #2677
Greg Roth 4 lat temu
rodzic
commit
5fbaf73466
25 zmienionych plików z 449 dodań i 101 usunięć
  1. 1 0
      docs/DXIL.rst
  2. 1 0
      include/dxc/HLSL/DxilValidation.h
  3. 11 0
      lib/DxilContainer/DxilContainerAssembler.cpp
  4. 22 0
      lib/HLSL/DxilValidation.cpp
  5. 16 19
      tools/clang/test/HLSL/ShaderOpArith.xml
  6. 5 4
      tools/clang/test/HLSLFileCheck/hlsl/control_flow/loops/enable-partial-unroll-test01.hlsl
  7. 5 4
      tools/clang/test/HLSLFileCheck/hlsl/control_flow/loops/enable-partial-unroll-test02.hlsl
  8. 14 3
      tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomic_cmpstr_i64_and_i32.hlsl
  9. 14 3
      tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomic_cmpxchg_i64_and_i32.hlsl
  10. 14 3
      tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomic_float.hlsl
  11. 6 2
      tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomic_float_errors.hlsl
  12. 18 15
      tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomic_overload.hlsl
  13. 14 3
      tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomic_xchg_i64_and_i32.hlsl
  14. 3 2
      tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomicop_float.hlsl
  15. 5 2
      tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomicop_i64.hlsl
  16. 24 13
      tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomicop_i64_and_i32.hlsl
  17. 2 1
      tools/clang/test/HLSLFileCheck/hlsl/types/conversions/varmods-syntax_Mod.hlsl
  18. 30 10
      tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix_subscript.hlsl
  19. 8 4
      tools/clang/test/HLSLFileCheck/hlsl/types/modifiers/global/global-var-write-test05.hlsl
  20. 217 0
      tools/clang/test/HLSLFileCheck/hlsl/types/modifiers/groupshared/groupshared_shadermodels.hlsl
  21. 6 3
      tools/clang/test/HLSLFileCheck/hlsl/types/modifiers/groupshared/this_ptr_address_space.hlsl
  22. 4 2
      tools/clang/test/HLSLFileCheck/passes/dxil/dxil_cleanup_addrspacecast/remove-addrspacecastinst.hlsl
  23. 6 7
      tools/clang/test/HLSLFileCheck/passes/dxil/dxil_o0_legalize/store_undef.hlsl
  24. 2 1
      tools/clang/test/HLSLFileCheck/passes/hl/sroa_hlsl/groupshared_array_struct_matrix_regression.hlsl
  25. 1 0
      utils/hct/hctdb.py

+ 1 - 0
docs/DXIL.rst

@@ -3184,6 +3184,7 @@ SM.SEMANTIC                               Semantic must be defined in target sha
 SM.STREAMINDEXRANGE                       Stream index (%0) must between 0 and %1.
 SM.STREAMINDEXRANGE                       Stream index (%0) must between 0 and %1.
 SM.TESSFACTORFORDOMAIN                    Required TessFactor for domain not found declared anywhere in Patch Constant data.
 SM.TESSFACTORFORDOMAIN                    Required TessFactor for domain not found declared anywhere in Patch Constant data.
 SM.TESSFACTORSIZEMATCHDOMAIN              TessFactor rows, columns (%0, %1) invalid for domain %2.  Expected %3 rows and 1 column.
 SM.TESSFACTORSIZEMATCHDOMAIN              TessFactor rows, columns (%0, %1) invalid for domain %2.  Expected %3 rows and 1 column.
+SM.TGSMUNSUPPORTED                        Thread Group Shared Memory not supported %0.
 SM.THREADGROUPCHANNELRANGE                Declared Thread Group %0 size %1 outside valid range [%2..%3].
 SM.THREADGROUPCHANNELRANGE                Declared Thread Group %0 size %1 outside valid range [%2..%3].
 SM.TRIOUTPUTPRIMITIVEMISMATCH             Hull Shader declared with Tri Domain must specify output primitive point, triangle_cw or triangle_ccw. Line output is not compatible with the Tri domain.
 SM.TRIOUTPUTPRIMITIVEMISMATCH             Hull Shader declared with Tri Domain must specify output primitive point, triangle_cw or triangle_ccw. Line output is not compatible with the Tri domain.
 SM.UNDEFINEDOUTPUT                        Not all elements of output %0 were written.
 SM.UNDEFINEDOUTPUT                        Not all elements of output %0 were written.

+ 1 - 0
include/dxc/HLSL/DxilValidation.h

@@ -258,6 +258,7 @@ enum class ValidationRule : unsigned {
   SmSampleCountOnlyOn2DMS, // Only Texture2DMS/2DMSArray could has sample count.
   SmSampleCountOnlyOn2DMS, // Only Texture2DMS/2DMSArray could has sample count.
   SmSemantic, // Semantic must be defined in target shader model
   SmSemantic, // Semantic must be defined in target shader model
   SmStreamIndexRange, // Stream index (%0) must between 0 and %1.
   SmStreamIndexRange, // Stream index (%0) must between 0 and %1.
+  SmTGSMUnsupported, // Thread Group Shared Memory not supported %0.
   SmTessFactorForDomain, // Required TessFactor for domain not found declared anywhere in Patch Constant data.
   SmTessFactorForDomain, // Required TessFactor for domain not found declared anywhere in Patch Constant data.
   SmTessFactorSizeMatchDomain, // TessFactor rows, columns (%0, %1) invalid for domain %2.  Expected %3 rows and 1 column.
   SmTessFactorSizeMatchDomain, // TessFactor rows, columns (%0, %1) invalid for domain %2.  Expected %3 rows and 1 column.
   SmThreadGroupChannelRange, // Declared Thread Group %0 size %1 outside valid range [%2..%3].
   SmThreadGroupChannelRange, // Declared Thread Group %0 size %1 outside valid range [%2..%3].

+ 11 - 0
lib/DxilContainer/DxilContainerAssembler.cpp

@@ -1110,7 +1110,18 @@ private:
           info.minMinor = minor;
           info.minMinor = minor;
         }
         }
         info.mask &= mask;
         info.mask &= mask;
+      } else if (const llvm::LoadInst *LI = dyn_cast<LoadInst>(user)) {
+        // If loading a groupshared variable, limit to CS/AS/MS
+#define SFLAG(stage) ((unsigned)1 << (unsigned)DXIL::ShaderKind::stage)
+        if (LI->getPointerAddressSpace() == DXIL::kTGSMAddrSpace) {
+          const llvm::Function *F = cast<const llvm::Function>(CI->getParent()->getParent());
+          ShaderCompatInfo &info = m_FuncToShaderCompat[F];
+          info.mask &= (SFLAG(Compute) | SFLAG(Mesh) | SFLAG(Amplification));
+        }
+#undef SFLAG
+
       }
       }
+
     }
     }
   }
   }
 
 

+ 22 - 0
lib/HLSL/DxilValidation.cpp

@@ -219,6 +219,7 @@ const char *hlsl::GetValidationRuleText(ValidationRule value) {
     case hlsl::ValidationRule::SmThreadGroupChannelRange: return "Declared Thread Group %0 size %1 outside valid range [%2..%3].";
     case hlsl::ValidationRule::SmThreadGroupChannelRange: return "Declared Thread Group %0 size %1 outside valid range [%2..%3].";
     case hlsl::ValidationRule::SmMaxTheadGroup: return "Declared Thread Group Count %0 (X*Y*Z) is beyond the valid maximum of %1.";
     case hlsl::ValidationRule::SmMaxTheadGroup: return "Declared Thread Group Count %0 (X*Y*Z) is beyond the valid maximum of %1.";
     case hlsl::ValidationRule::SmMaxTGSMSize: return "Total Thread Group Shared Memory storage is %0, exceeded %1.";
     case hlsl::ValidationRule::SmMaxTGSMSize: return "Total Thread Group Shared Memory storage is %0, exceeded %1.";
+    case hlsl::ValidationRule::SmTGSMUnsupported: return "Thread Group Shared Memory not supported %0.";
     case hlsl::ValidationRule::SmWaveSizeValue: return "Declared WaveSize %0 outside valid range [%1..%2], or not a power of 2.";
     case hlsl::ValidationRule::SmWaveSizeValue: return "Declared WaveSize %0 outside valid range [%1..%2], or not a power of 2.";
     case hlsl::ValidationRule::SmWaveSizeNeedsDxil16Plus: return "WaveSize is valid only for DXIL version 1.6 and higher.";
     case hlsl::ValidationRule::SmWaveSizeNeedsDxil16Plus: return "WaveSize is valid only for DXIL version 1.6 and higher.";
     case hlsl::ValidationRule::SmROVOnlyInPS: return "RasterizerOrdered objects are only allowed in 5.0+ pixel shaders.";
     case hlsl::ValidationRule::SmROVOnlyInPS: return "RasterizerOrdered objects are only allowed in 5.0+ pixel shaders.";
@@ -3769,12 +3770,33 @@ static void ValidateTGSMRaceCondition(std::vector<StoreInst *> &fixAddrTGSMList,
 static void ValidateGlobalVariables(ValidationContext &ValCtx) {
 static void ValidateGlobalVariables(ValidationContext &ValCtx) {
   DxilModule &M = ValCtx.DxilMod;
   DxilModule &M = ValCtx.DxilMod;
 
 
+  const ShaderModel *pSM = ValCtx.DxilMod.GetShaderModel();
+  bool TGSMAllowed = pSM->IsCS() || pSM->IsAS() || pSM->IsMS() || pSM->IsLib();
+
   unsigned TGSMSize = 0;
   unsigned TGSMSize = 0;
   std::vector<StoreInst*> fixAddrTGSMList;
   std::vector<StoreInst*> fixAddrTGSMList;
   const DataLayout &DL = M.GetModule()->getDataLayout();
   const DataLayout &DL = M.GetModule()->getDataLayout();
   for (GlobalVariable &GV : M.GetModule()->globals()) {
   for (GlobalVariable &GV : M.GetModule()->globals()) {
     ValidateGlobalVariable(GV, ValCtx);
     ValidateGlobalVariable(GV, ValCtx);
     if (GV.getType()->getAddressSpace() == DXIL::kTGSMAddrSpace) {
     if (GV.getType()->getAddressSpace() == DXIL::kTGSMAddrSpace) {
+      if (!TGSMAllowed)
+        ValCtx.EmitGlobalVariableFormatError(&GV, ValidationRule::SmTGSMUnsupported,
+                                             { std::string("in Shader Model ") + M.GetShaderModel()->GetName() });
+      // Lib targets need to check the usage to know if it's allowed
+      if (pSM->IsLib()) {
+        for (User *U : GV.users()) {
+          if (Instruction *I = dyn_cast<Instruction>(U)) {
+            llvm::Function *F = I->getParent()->getParent();
+            if (M.HasDxilEntryProps(F)) {
+              DxilFunctionProps &props = M.GetDxilEntryProps(F).props;
+              if (!props.IsCS() && !props.IsAS() && !props.IsMS()) {
+                ValCtx.EmitInstrFormatError(I, ValidationRule::SmTGSMUnsupported,
+                                            { "from non-compute entry points" });
+              }
+            }
+          }
+        }
+      }
       TGSMSize += DL.getTypeAllocSize(GV.getType()->getElementType());
       TGSMSize += DL.getTypeAllocSize(GV.getType()->getElementType());
       CollectFixAddressAccess(&GV, fixAddrTGSMList);
       CollectFixAddressAccess(&GV, fixAddrTGSMList);
     }
     }

+ 16 - 19
tools/clang/test/HLSL/ShaderOpArith.xml

@@ -1641,7 +1641,7 @@
           uint addVal = ix; // 32 bits isn't enough room to dupliate upper and lower
           uint addVal = ix; // 32 bits isn't enough room to dupliate upper and lower
           uint uminMaxVal = ~value*(~value&1) + value*(value&1);
           uint uminMaxVal = ~value*(~value&1) + value*(value&1);
           int sminMaxVal = ~value*(~value&1) + value*(value&1);
           int sminMaxVal = ~value*(~value&1) + value*(value&1);
-          uint xorVal = 1 << (ix%(bitSize-1));
+          uint xorVal = 1U << (ix%(bitSize-1));
           // make higher bits differ while lower bits match
           // make higher bits differ while lower bits match
           uint xchgVal = (ix << (bitSize/2)) | ((ix/3)%64);
           uint xchgVal = (ix << (bitSize/2)) | ((ix/3)%64);
           uint output = 0;
           uint output = 0;
@@ -1713,30 +1713,27 @@
         }
         }
 
 
         void InitSharedMem(uint ix) {
         void InitSharedMem(uint ix) {
-          // Zero-init shared memory
-          g_uintShare[ix%6] = 0;
-          g_sintShare[ix%3] = 0;
-          g_xchgShare[ix%64] = 0;
+          // Zero-init shared memory, with special cases
+          if (ix < 6)
+            g_uintShare[ix] = ix == 1 ? 99999999 : ix == 3 ? -1 : 0;
+          if (ix < 3)
+            g_sintShare[ix] = ix == 1 ? 99999999 : 0;
+          if (ix < 64)
+            g_xchgShare[ix] = 0;
 
 
           GroupMemoryBarrierWithGroupSync();
           GroupMemoryBarrierWithGroupSync();
-
-          InterlockedCompareStore(g_uintShare[1], 0, 99999999);
-          InterlockedCompareStore(g_uintShare[3], 0, -1);
-          InterlockedCompareStore(g_sintShare[1], 0, 99999999);
         }
         }
 
 
         void InitSharedMem64(uint ix) {
         void InitSharedMem64(uint ix) {
-          // Zero-init shared memory
-          g_uint64Share[ix%6] = 0;
-          g_sint64Share[ix%3] = 0;
-          g_xchg64Share[ix%64] = 0;
+          // Zero-init shared memory, with special cases
+          if (ix < 6)
+            g_uint64Share[ix] = ix == 1 ? 99999999ULL | (99999999ULL << 32) : ix == 3 ? ~0ULL : 0;
+          if (ix < 3)
+            g_sint64Share[ix] = ix == 1 ? 99999999ULL | (99999999ULL << 32) : 0;
+          if (ix < 64)
+            g_xchg64Share[ix] = 0;
 
 
           GroupMemoryBarrierWithGroupSync();
           GroupMemoryBarrierWithGroupSync();
-
-          InterlockedCompareStore(g_uint64Share[1], 0, 99999999ULL | (99999999ULL << 32));
-          InterlockedCompareStore(g_uint64Share[3], 0, ~0ULL);
-          InterlockedCompareStore(g_sint64Share[1], 0, 99999999ULL | (99999999ULL << 32));
-
         }
         }
 
 
         void AtomicGroupSharedTest(uint ix) {
         void AtomicGroupSharedTest(uint ix) {
@@ -1746,7 +1743,7 @@
           uint addVal = ix; // 32 bits isn't enough room to dupliate upper and lower
           uint addVal = ix; // 32 bits isn't enough room to dupliate upper and lower
           uint uminMaxVal = ~value*(~value&1) + value*(value&1);
           uint uminMaxVal = ~value*(~value&1) + value*(value&1);
           int sminMaxVal = ~value*(~value&1) + value*(value&1);
           int sminMaxVal = ~value*(~value&1) + value*(value&1);
-          uint xorVal = 1 << (ix%(bitSize-1));
+          uint xorVal = 1U << (ix%(bitSize-1));
           uint xchgVal = (ix << (bitSize/2)) | ((ix/3)%64);
           uint xchgVal = (ix << (bitSize/2)) | ((ix/3)%64);
           uint output = 0;
           uint output = 0;
 
 

+ 5 - 4
tools/clang/test/HLSLFileCheck/hlsl/control_flow/loops/enable-partial-unroll-test01.hlsl

@@ -1,18 +1,19 @@
-// RUN: %dxc /Tps_6_0 /Emain > %s | FileCheck %s
+// RUN: %dxc /Tcs_6_0 /Emain > %s | FileCheck %s
 // CHECK: define void @main()
 // CHECK: define void @main()
 // CHECK: entry
 // CHECK: entry
 
 
 #define MAX_INDEX 5
 #define MAX_INDEX 5
 
 
 groupshared float g_Array[2][(MAX_INDEX * MAX_INDEX)];
 groupshared float g_Array[2][(MAX_INDEX * MAX_INDEX)];
+RWStructuredBuffer<float4> output;
 
 
-[RootSignature("")] float4 main(uint GroupIndex
-                                : A) : SV_Target {
+[numthreads(1,1,1)] void main(uint GroupIndex
+                                : SV_GroupIndex) {
   uint idx;
   uint idx;
   float l_Array[(MAX_INDEX * MAX_INDEX)] = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
   float l_Array[(MAX_INDEX * MAX_INDEX)] = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
   for (idx = 0; idx < (MAX_INDEX * MAX_INDEX); idx++) {
   for (idx = 0; idx < (MAX_INDEX * MAX_INDEX); idx++) {
     g_Array[GroupIndex][idx] = l_Array[idx];
     g_Array[GroupIndex][idx] = l_Array[idx];
   }
   }
 
 
-  return float4(g_Array[GroupIndex][0], g_Array[GroupIndex][1], g_Array[GroupIndex][2], g_Array[GroupIndex][3]);
+  output[GroupIndex] = float4(g_Array[GroupIndex][0], g_Array[GroupIndex][1], g_Array[GroupIndex][2], g_Array[GroupIndex][3]);
 }
 }

+ 5 - 4
tools/clang/test/HLSLFileCheck/hlsl/control_flow/loops/enable-partial-unroll-test02.hlsl

@@ -1,17 +1,18 @@
-// RUN: %dxc /Tps_6_0 /Emain > %s | FileCheck %s
+// RUN: %dxc /Tcs_6_0 /Emain > %s | FileCheck %s
 // CHECK: define void @main()
 // CHECK: define void @main()
 // CHECK: entry
 // CHECK: entry
 
 
 #define MAX_INDEX 14
 #define MAX_INDEX 14
 
 
 groupshared float g_Array[2][(MAX_INDEX * MAX_INDEX)];
 groupshared float g_Array[2][(MAX_INDEX * MAX_INDEX)];
+RWStructuredBuffer<float4> output;
 
 
-[RootSignature("")] float4 main(uint GroupIndex
-                                : A) : SV_Target {
+[numthreads(1,1,1)] void main(uint GroupIndex
+                                : SV_GroupIndex) {
   uint idx;
   uint idx;
   for (idx = 0; idx < (MAX_INDEX * MAX_INDEX); idx++) {
   for (idx = 0; idx < (MAX_INDEX * MAX_INDEX); idx++) {
     g_Array[GroupIndex][idx] = 0.0f;
     g_Array[GroupIndex][idx] = 0.0f;
   }
   }
 
 
-  return float4(g_Array[GroupIndex][0], g_Array[GroupIndex][1], g_Array[GroupIndex][2], g_Array[GroupIndex][3]);
+  output[GroupIndex] = float4(g_Array[GroupIndex][0], g_Array[GroupIndex][1], g_Array[GroupIndex][2], g_Array[GroupIndex][3]);
 }
 }

+ 14 - 3
tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomic_cmpstr_i64_and_i32.hlsl

@@ -1,8 +1,8 @@
-// RUN: %dxc -T ps_6_6 %s | FileCheck %s -check-prefix=GSCHECK
+// RUN: %dxc -E CSMain -T cs_6_6 %s | FileCheck %s -check-prefix=GSCHECK
 // RUN: %dxc -T ps_6_6 -DMEMTYPE=RWBuffer %s | FileCheck %s -check-prefixes=CHECK,TYCHECK
 // RUN: %dxc -T ps_6_6 -DMEMTYPE=RWBuffer %s | FileCheck %s -check-prefixes=CHECK,TYCHECK
 // RUN: %dxc -T ps_6_6 -DMEMTYPE=RWStructuredBuffer %s | FileCheck %s -check-prefix=CHECK
 // RUN: %dxc -T ps_6_6 -DMEMTYPE=RWStructuredBuffer %s | FileCheck %s -check-prefix=CHECK
 
 
-// RUN: %dxilver 1.6 | %dxc -T ps_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
+// RUN: %dxilver 1.6 | %dxc -E CSMain -T cs_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
 // RUN: %dxilver 1.6 | %dxc -T ps_6_5 -DMEMTYPE=RWBuffer %s | FileCheck %s -check-prefix=ERRCHECK
 // RUN: %dxilver 1.6 | %dxc -T ps_6_5 -DMEMTYPE=RWBuffer %s | FileCheck %s -check-prefix=ERRCHECK
 // RUN: %dxilver 1.6 | %dxc -T ps_6_5 -DMEMTYPE=RWStructuredBuffer %s | FileCheck %s -check-prefix=ERRCHECK
 // RUN: %dxilver 1.6 | %dxc -T ps_6_5 -DMEMTYPE=RWStructuredBuffer %s | FileCheck %s -check-prefix=ERRCHECK
 
 
@@ -25,7 +25,7 @@ groupshared int64_t  resI64[256];
 // GSCHECK: Note: shader requires additional functionality:
 // GSCHECK: Note: shader requires additional functionality:
 // GSCHECK: 64-bit Atomics on Group Shared
 // GSCHECK: 64-bit Atomics on Group Shared
 
 
-void main( uint a : A, uint b: B, uint c :C) : SV_Target
+void dotest( uint a, uint b, uint c)
 {
 {
   resU[a] = a;
   resU[a] = a;
   resI[a] = a;
   resI[a] = a;
@@ -191,3 +191,14 @@ void main( uint a : A, uint b: B, uint c :C) : SV_Target
   InterlockedCompareStore( resI64[a], iv, liv2 );
   InterlockedCompareStore( resI64[a], iv, liv2 );
   InterlockedCompareStore( resI64[a], liv, iv2 );
   InterlockedCompareStore( resI64[a], liv, iv2 );
 }
 }
+
+void main( uint a : A, uint b: B, uint c :C) : SV_Target
+{
+  dotest(a,b,c);
+}
+
+[numthreads(1,1,1)]
+void CSMain( uint3 gtid : SV_GroupThreadID)
+{
+  dotest(gtid.x, gtid.y, gtid.z);
+}

+ 14 - 3
tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomic_cmpxchg_i64_and_i32.hlsl

@@ -1,8 +1,8 @@
-// RUN: %dxc -T ps_6_6 %s | FileCheck %s -check-prefix=GSCHECK
+// RUN: %dxc -E CSMain -T cs_6_6 %s | FileCheck %s -check-prefix=GSCHECK
 // RUN: %dxc -T ps_6_6 -DMEMTYPE=RWBuffer %s | FileCheck %s -check-prefixes=CHECK,TYCHECK
 // RUN: %dxc -T ps_6_6 -DMEMTYPE=RWBuffer %s | FileCheck %s -check-prefixes=CHECK,TYCHECK
 // RUN: %dxc -T ps_6_6 -DMEMTYPE=RWStructuredBuffer %s | FileCheck %s -check-prefix=CHECK
 // RUN: %dxc -T ps_6_6 -DMEMTYPE=RWStructuredBuffer %s | FileCheck %s -check-prefix=CHECK
 
 
-// RUN: %dxilver 1.6 | %dxc -T ps_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
+// RUN: %dxilver 1.6 | %dxc -E CSMain -T cs_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
 // RUN: %dxilver 1.6 | %dxc -T ps_6_5 -DMEMTYPE=RWBuffer %s | FileCheck %s -check-prefix=ERRCHECK
 // RUN: %dxilver 1.6 | %dxc -T ps_6_5 -DMEMTYPE=RWBuffer %s | FileCheck %s -check-prefix=ERRCHECK
 // RUN: %dxilver 1.6 | %dxc -T ps_6_5 -DMEMTYPE=RWStructuredBuffer %s | FileCheck %s -check-prefix=ERRCHECK
 // RUN: %dxilver 1.6 | %dxc -T ps_6_5 -DMEMTYPE=RWStructuredBuffer %s | FileCheck %s -check-prefix=ERRCHECK
 
 
@@ -25,7 +25,7 @@ groupshared int64_t  resI64[256];
 // GSCHECK: Note: shader requires additional functionality:
 // GSCHECK: Note: shader requires additional functionality:
 // GSCHECK: 64-bit Atomics on Group Shared
 // GSCHECK: 64-bit Atomics on Group Shared
 
 
-void main( uint a : A, uint b: B, uint c :C) : SV_Target
+void dotest( uint a, uint b, uint c)
 {
 {
   resU[a] = a;
   resU[a] = a;
   resI[a] = a;
   resI[a] = a;
@@ -213,3 +213,14 @@ void main( uint a : A, uint b: B, uint c :C) : SV_Target
   InterlockedCompareExchange( resI64[a], liv, iv2, oliv );
   InterlockedCompareExchange( resI64[a], liv, iv2, oliv );
   InterlockedCompareExchange( resI64[a], liv, liv2, oiv );
   InterlockedCompareExchange( resI64[a], liv, liv2, oiv );
 }
 }
+
+void main( uint a : A, uint b: B, uint c :C) : SV_Target
+{
+  dotest(a,b,c);
+}
+
+[numthreads(1,1,1)]
+void CSMain( uint3 gtid : SV_GroupThreadID)
+{
+  dotest(gtid.x, gtid.y, gtid.z);
+}

+ 14 - 3
tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomic_float.hlsl

@@ -1,4 +1,4 @@
-// RUN: %dxc -T ps_6_6 %s | FileCheck %s -check-prefix=GSCHECK
+// RUN: %dxc -E CSMain -T cs_6_6 %s | FileCheck %s -check-prefix=GSCHECK
 // RUN: %dxc -T ps_6_6 -DMEMTYPE=RWBuffer %s | FileCheck %s -check-prefixes=CHECK,TYCHECK
 // RUN: %dxc -T ps_6_6 -DMEMTYPE=RWBuffer %s | FileCheck %s -check-prefixes=CHECK,TYCHECK
 // RUN: %dxc -T ps_6_6 -DMEMTYPE=RWStructuredBuffer %s | FileCheck %s -check-prefix=CHECK
 // RUN: %dxc -T ps_6_6 -DMEMTYPE=RWStructuredBuffer %s | FileCheck %s -check-prefix=CHECK
 
 
@@ -12,8 +12,7 @@ groupshared int      resI[256];
 groupshared int64_t  resI64[256];
 groupshared int64_t  resI64[256];
 #endif
 #endif
 
 
-
-float4 main( uint a : A, uint b: B, uint c :C) : SV_Target
+float4 dotest( uint a, uint b, uint c)
 {
 {
   float fv = b - c;
   float fv = b - c;
   float fv2 = b + c;
   float fv2 = b + c;
@@ -112,3 +111,15 @@ float4 main( uint a : A, uint b: B, uint c :C) : SV_Target
 
 
   return ofv;
   return ofv;
 }
 }
+
+float4 main( uint a : A, uint b: B, uint c :C) : SV_Target
+{
+  return dotest(a,b,c);
+}
+
+RWStructuredBuffer<float4> output;
+[numthreads(1,1,1)]
+void CSMain( uint3 gtid : SV_GroupThreadID, uint ix : SV_GroupIndex)
+{
+  output[ix] = dotest(gtid.x, gtid.y, gtid.z);
+}

+ 6 - 2
tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomic_float_errors.hlsl

@@ -1,4 +1,4 @@
-// RUN: %dxc -T ps_6_6 %s | FileCheck %s
+// RUN: %dxc -T cs_6_6 %s | FileCheck %s
 
 
 // Verify that the first arg determines the overload and the others can be what they will
 // Verify that the first arg determines the overload and the others can be what they will
 
 
@@ -9,8 +9,12 @@ RWBuffer<uint64_t> resBI64;
 
 
 RWByteAddressBuffer Rres;
 RWByteAddressBuffer Rres;
 
 
-void main( uint a : A, uint b: B, uint c :C) : SV_Target
+[numthreads(1,1,1)]
+void main( uint3 gtid : SV_GroupThreadID)
 {
 {
+  uint a = gtid.x;
+  uint b = gtid.y;
+  uint c = gtid.z;
   resGI[a] = a;
   resGI[a] = a;
   resGI64[a] = a;
   resGI64[a] = a;
   resBI[a] = a;
   resBI[a] = a;

+ 18 - 15
tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomic_overload.hlsl

@@ -1,19 +1,19 @@
-// RUN: %dxc -no-warnings -T vs_6_0 -DTYPE=double  %s | %FileCheck %s -check-prefixes=INTFAIL,FLTFAIL
-// RUN: %dxc -no-warnings -T vs_6_2 -DTYPE=float16_t -enable-16bit-types  %s | %FileCheck %s -check-prefixes=INTFAIL,FLTFAIL
-// RUN: %dxc -no-warnings -T vs_6_2 -DTYPE=int16_t -enable-16bit-types  %s | %FileCheck %s -check-prefixes=INTFAIL,FLTFAIL
-// RUN: %dxc -no-warnings -T vs_6_2 -DTYPE=uint16_t -enable-16bit-types  %s | %FileCheck %s -check-prefixes=INTFAIL,FLTFAIL
-// RUN: %dxc -no-warnings -T vs_6_0 -DTYPE=bool  %s | %FileCheck %s -check-prefixes=INTFAIL,FLTFAIL
-// RUN: %dxilver 1.6 | %dxc -no-warnings -T vs_6_5 -DTYPE=int64_t  %s | %FileCheck %s -check-prefix=VALFAIL
-// RUN: %dxilver 1.6 | %dxc -no-warnings -T vs_6_5 -DTYPE=uint64_t  %s | %FileCheck %s -check-prefix=VALFAIL
+// RUN: %dxc -no-warnings -T cs_6_0 -DTYPE=double  %s | %FileCheck %s -check-prefixes=INTFAIL,FLTFAIL
+// RUN: %dxc -no-warnings -T cs_6_2 -DTYPE=float16_t -enable-16bit-types  %s | %FileCheck %s -check-prefixes=INTFAIL,FLTFAIL
+// RUN: %dxc -no-warnings -T cs_6_2 -DTYPE=int16_t -enable-16bit-types  %s | %FileCheck %s -check-prefixes=INTFAIL,FLTFAIL
+// RUN: %dxc -no-warnings -T cs_6_2 -DTYPE=uint16_t -enable-16bit-types  %s | %FileCheck %s -check-prefixes=INTFAIL,FLTFAIL
+// RUN: %dxc -no-warnings -T cs_6_0 -DTYPE=bool  %s | %FileCheck %s -check-prefixes=INTFAIL,FLTFAIL
+// RUN: %dxilver 1.6 | %dxc -no-warnings -T cs_6_5 -DTYPE=int64_t  %s | %FileCheck %s -check-prefix=VALFAIL
+// RUN: %dxilver 1.6 | %dxc -no-warnings -T cs_6_5 -DTYPE=uint64_t  %s | %FileCheck %s -check-prefix=VALFAIL
 
 
 
 
-// RUN: %dxc -no-warnings -T vs_6_0 -DTYPE=float  %s | %FileCheck %s -check-prefixes=INTFAIL,
-// RUN: %dxc -no-warnings -T vs_6_0 -DTYPE=half  %s | %FileCheck %s -check-prefixes=INTFAIL
+// RUN: %dxc -no-warnings -T cs_6_0 -DTYPE=float  %s | %FileCheck %s -check-prefixes=INTFAIL,
+// RUN: %dxc -no-warnings -T cs_6_0 -DTYPE=half  %s | %FileCheck %s -check-prefixes=INTFAIL
 
 
-// RUN: %dxc -no-warnings -T vs_6_6 -DTYPE=int64_t  %s | %FileCheck %s -check-prefixes=INTCHK
-// RUN: %dxc -no-warnings -T vs_6_6 -DTYPE=uint64_t  %s | %FileCheck %s -check-prefixes=INTCHK
-// RUN: %dxc -no-warnings -T vs_6_0 -DTYPE=int  %s | %FileCheck %s -check-prefixes=INTCHK
-// RUN: %dxc -no-warnings -T vs_6_0 -DTYPE=uint  %s | %FileCheck %s -check-prefixes=INTCHK
+// RUN: %dxc -no-warnings -T cs_6_6 -DTYPE=int64_t  %s | %FileCheck %s -check-prefixes=INTCHK
+// RUN: %dxc -no-warnings -T cs_6_6 -DTYPE=uint64_t  %s | %FileCheck %s -check-prefixes=INTCHK
+// RUN: %dxc -no-warnings -T cs_6_0 -DTYPE=int  %s | %FileCheck %s -check-prefixes=INTCHK
+// RUN: %dxc -no-warnings -T cs_6_0 -DTYPE=uint  %s | %FileCheck %s -check-prefixes=INTCHK
 
 
 
 
 // Test various Interlocked ops using different memory types with invalid types
 // Test various Interlocked ops using different memory types with invalid types
@@ -22,7 +22,10 @@ RWBuffer<TYPE> rw_res;
 groupshared TYPE gs_res;
 groupshared TYPE gs_res;
 RWByteAddressBuffer ba_res;
 RWByteAddressBuffer ba_res;
 
 
-float main() :OUT{
+RWStructuredBuffer<float4> output;
+
+[numthreads(1,1,1)]
+void main(uint ix : SV_GroupIndex) {
   int val = 1;
   int val = 1;
   TYPE comp = 1;
   TYPE comp = 1;
   TYPE orig;
   TYPE orig;
@@ -213,5 +216,5 @@ float main() :OUT{
   InterlockedCompareExchange(rw_res[0], comp, val, orig);
   InterlockedCompareExchange(rw_res[0], comp, val, orig);
   InterlockedCompareExchange(gs_res, comp, val, orig);
   InterlockedCompareExchange(gs_res, comp, val, orig);
 
 
-  return (float)rw_res[0] + gs_res;
+  output[ix] = (float)rw_res[0] + gs_res;
 }
 }

+ 14 - 3
tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomic_xchg_i64_and_i32.hlsl

@@ -1,8 +1,8 @@
-// RUN: %dxc -T ps_6_6 %s | FileCheck %s -check-prefix=GSCHECK
+// RUN: %dxc -E CSMain -T cs_6_6 %s | FileCheck %s -check-prefix=GSCHECK
 // RUN: %dxc -T ps_6_6 -DMEMTYPE=RWBuffer %s | FileCheck %s -check-prefixes=CHECK,TYCHECK
 // RUN: %dxc -T ps_6_6 -DMEMTYPE=RWBuffer %s | FileCheck %s -check-prefixes=CHECK,TYCHECK
 // RUN: %dxc -T ps_6_6 -DMEMTYPE=RWStructuredBuffer %s | FileCheck %s -check-prefix=CHECK
 // RUN: %dxc -T ps_6_6 -DMEMTYPE=RWStructuredBuffer %s | FileCheck %s -check-prefix=CHECK
 
 
-// RUN: %dxilver 1.6 | %dxc -T ps_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
+// RUN: %dxilver 1.6 | %dxc -E CSMain -T cs_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
 // RUN: %dxilver 1.6 | %dxc -T ps_6_5 -DMEMTYPE=RWBuffer %s | FileCheck %s -check-prefix=ERRCHECK
 // RUN: %dxilver 1.6 | %dxc -T ps_6_5 -DMEMTYPE=RWBuffer %s | FileCheck %s -check-prefix=ERRCHECK
 // RUN: %dxilver 1.6 | %dxc -T ps_6_5 -DMEMTYPE=RWStructuredBuffer %s | FileCheck %s -check-prefix=ERRCHECK
 // RUN: %dxilver 1.6 | %dxc -T ps_6_5 -DMEMTYPE=RWStructuredBuffer %s | FileCheck %s -check-prefix=ERRCHECK
 
 
@@ -25,7 +25,7 @@ groupshared int64_t  resI64[256];
 // GSCHECK: Note: shader requires additional functionality:
 // GSCHECK: Note: shader requires additional functionality:
 // GSCHECK: 64-bit Atomics on Group Shared
 // GSCHECK: 64-bit Atomics on Group Shared
 
 
-void main( uint a : A, uint b: B, uint c :C) : SV_Target
+void dotest( uint a, uint b, uint c)
 {
 {
   resU[a] = a;
   resU[a] = a;
   resI[a] = a;
   resI[a] = a;
@@ -183,3 +183,14 @@ void main( uint a : A, uint b: B, uint c :C) : SV_Target
   InterlockedExchange( resI64[a], iv, liv2 );
   InterlockedExchange( resI64[a], iv, liv2 );
   InterlockedExchange( resI64[a], liv, iv2 );
   InterlockedExchange( resI64[a], liv, iv2 );
 }
 }
+
+void main( uint a : A, uint b: B, uint c :C) : SV_Target
+{
+  dotest(a,b,c);
+}
+
+[numthreads(1,1,1)]
+void CSMain( uint3 gtid : SV_GroupThreadID)
+{
+  dotest(gtid.x, gtid.y, gtid.z);
+}

+ 3 - 2
tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomicop_float.hlsl

@@ -1,10 +1,11 @@
-// RUN: %dxc -T ps_6_6 %s | FileCheck %s
+// RUN: %dxc -T cs_6_6 %s | FileCheck %s
 
 
 groupshared float   resG[256];
 groupshared float   resG[256];
 RWBuffer<float>     resB;
 RWBuffer<float>     resB;
 RWStructuredBuffer<float> resS;
 RWStructuredBuffer<float> resS;
 
 
-void main( float a : A, int b: B, float c :C) : SV_Target
+[numthreads(1,1,1)]
+void main( float a : A, int b: B, float c :C)
 {
 {
   // Test some disallowed atomic binop intrinsics with floats as both args
   // Test some disallowed atomic binop intrinsics with floats as both args
 
 

+ 5 - 2
tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomicop_i64.hlsl

@@ -1,4 +1,4 @@
-// RUN: %dxc -T ps_6_6 %s | FileCheck %s
+// RUN: %dxc -T cs_6_6 %s | FileCheck %s
 
 
 // A test to verify that 64-bit atomic binary operation intrinsics select the right variant
 // A test to verify that 64-bit atomic binary operation intrinsics select the right variant
 
 
@@ -11,8 +11,11 @@ groupshared uint64_t ugs[256];
 RWBuffer<uint64_t> utb;
 RWBuffer<uint64_t> utb;
 RWStructuredBuffer<uint64_t> usb;
 RWStructuredBuffer<uint64_t> usb;
 
 
-void main( uint a : A, uint b: B) : SV_Target
+[numthreads(1,1,1)]
+void main( uint3 gtid : SV_GroupThreadID)
 {
 {
+  uint a = gtid.x;
+  uint b = gtid.y;
   uint64_t luv = a * b;
   uint64_t luv = a * b;
   int64_t liv = a + b;
   int64_t liv = a + b;
   uint ix = 0;
   uint ix = 0;

+ 24 - 13
tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomicop_i64_and_i32.hlsl

@@ -1,9 +1,9 @@
-// RUN: %dxc -DINTRIN=InterlockedAdd -T ps_6_6 %s | FileCheck %s -check-prefix=GSCHECK
-// RUN: %dxc -DINTRIN=InterlockedMin -T ps_6_6 %s | FileCheck %s -check-prefix=GSCHECK
-// RUN: %dxc -DINTRIN=InterlockedMax -T ps_6_6 %s | FileCheck %s -check-prefix=GSCHECK
-// RUN: %dxc -DINTRIN=InterlockedAnd -T ps_6_6 %s | FileCheck %s -check-prefix=GSCHECK
-// RUN: %dxc -DINTRIN=InterlockedOr -T ps_6_6 %s | FileCheck %s -check-prefix=GSCHECK
-// RUN: %dxc -DINTRIN=InterlockedXor -T ps_6_6 %s | FileCheck %s -check-prefix=GSCHECK
+// RUN: %dxc -DINTRIN=InterlockedAdd -E CSMain -T cs_6_6 %s | FileCheck %s -check-prefix=GSCHECK
+// RUN: %dxc -DINTRIN=InterlockedMin -E CSMain -T cs_6_6 %s | FileCheck %s -check-prefix=GSCHECK
+// RUN: %dxc -DINTRIN=InterlockedMax -E CSMain -T cs_6_6 %s | FileCheck %s -check-prefix=GSCHECK
+// RUN: %dxc -DINTRIN=InterlockedAnd -E CSMain -T cs_6_6 %s | FileCheck %s -check-prefix=GSCHECK
+// RUN: %dxc -DINTRIN=InterlockedOr -E CSMain -T cs_6_6 %s | FileCheck %s -check-prefix=GSCHECK
+// RUN: %dxc -DINTRIN=InterlockedXor -E CSMain -T cs_6_6 %s | FileCheck %s -check-prefix=GSCHECK
 
 
 // RUN: %dxc -DMEMTYPE=RWBuffer -DINTRIN=InterlockedAdd -T ps_6_6 %s | FileCheck %s -check-prefixes=CHECK,TYCHECK
 // RUN: %dxc -DMEMTYPE=RWBuffer -DINTRIN=InterlockedAdd -T ps_6_6 %s | FileCheck %s -check-prefixes=CHECK,TYCHECK
 // RUN: %dxc -DMEMTYPE=RWBuffer -DINTRIN=InterlockedMin -T ps_6_6 %s | FileCheck %s -check-prefixes=CHECK,TYCHECK
 // RUN: %dxc -DMEMTYPE=RWBuffer -DINTRIN=InterlockedMin -T ps_6_6 %s | FileCheck %s -check-prefixes=CHECK,TYCHECK
@@ -26,12 +26,12 @@
 // RUN: %dxilver 1.6 | %dxc -DMEMTYPE=RWBuffer -DINTRIN=InterlockedOr -T ps_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
 // RUN: %dxilver 1.6 | %dxc -DMEMTYPE=RWBuffer -DINTRIN=InterlockedOr -T ps_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
 // RUN: %dxilver 1.6 | %dxc -DMEMTYPE=RWBuffer -DINTRIN=InterlockedXor -T ps_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
 // RUN: %dxilver 1.6 | %dxc -DMEMTYPE=RWBuffer -DINTRIN=InterlockedXor -T ps_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
 
 
-// RUN: %dxilver 1.6 | %dxc -DINTRIN=InterlockedAdd -T ps_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
-// RUN: %dxilver 1.6 | %dxc -DINTRIN=InterlockedMin -T ps_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
-// RUN: %dxilver 1.6 | %dxc -DINTRIN=InterlockedMax -T ps_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
-// RUN: %dxilver 1.6 | %dxc -DINTRIN=InterlockedAnd -T ps_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
-// RUN: %dxilver 1.6 | %dxc -DINTRIN=InterlockedOr -T ps_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
-// RUN: %dxilver 1.6 | %dxc -DINTRIN=InterlockedXor -T ps_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
+// RUN: %dxilver 1.6 | %dxc -DINTRIN=InterlockedAdd -E CSMain -T cs_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
+// RUN: %dxilver 1.6 | %dxc -DINTRIN=InterlockedMin -E CSMain -T cs_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
+// RUN: %dxilver 1.6 | %dxc -DINTRIN=InterlockedMax -E CSMain -T cs_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
+// RUN: %dxilver 1.6 | %dxc -DINTRIN=InterlockedAnd -E CSMain -T cs_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
+// RUN: %dxilver 1.6 | %dxc -DINTRIN=InterlockedOr -E CSMain -T cs_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
+// RUN: %dxilver 1.6 | %dxc -DINTRIN=InterlockedXor -E CSMain -T cs_6_5 %s | FileCheck %s -check-prefix=ERRCHECK
 
 
 // Verify that the first arg determines the overload and the others can be what they will
 // Verify that the first arg determines the overload and the others can be what they will
 
 
@@ -52,7 +52,7 @@ groupshared int64_t  resI64[256];
 // GSCHECK: Note: shader requires additional functionality:
 // GSCHECK: Note: shader requires additional functionality:
 // GSCHECK: 64-bit Atomics on Group Shared
 // GSCHECK: 64-bit Atomics on Group Shared
 
 
-void main( uint a : A, uint b: B, uint c :C) : SV_Target
+void dotest( uint a, uint b, uint c)
 {
 {
   resU[a] = a;
   resU[a] = a;
   resI[a] = a;
   resI[a] = a;
@@ -126,3 +126,14 @@ void main( uint a : A, uint b: B, uint c :C) : SV_Target
   INTRIN( resU64[a], 3.0 );
   INTRIN( resU64[a], 3.0 );
   INTRIN( resI64[a], 4.0 );
   INTRIN( resI64[a], 4.0 );
 }
 }
+
+void main( uint a : A, uint b: B, uint c :C) : SV_Target
+{
+  dotest(a,b,c);
+}
+
+[numthreads(1,1,1)]
+void CSMain( uint3 gtid : SV_GroupThreadID)
+{
+  dotest(gtid.x, gtid.y, gtid.z);
+}

+ 2 - 1
tools/clang/test/HLSLFileCheck/hlsl/types/conversions/varmods-syntax_Mod.hlsl

@@ -1,4 +1,4 @@
-// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
+// RUN: %dxc -E main -T cs_6_0 %s | FileCheck %s
 
 
 // CHECK: @main
 // CHECK: @main
 
 
@@ -288,6 +288,7 @@ float4 foo_interpolation_different_decl(sample float4 val) {
 
 
 //////////////////////////////////////////////////////////////////////////////
 //////////////////////////////////////////////////////////////////////////////
 // Locals.
 // Locals.
+[numthreads(1,1,1)]
 void main() {
 void main() {
     // <py::lines('GENERATED_CODE')>modify(lines, gen_code('%(mods)s float l_%(id)s;', storage_combos))</py>
     // <py::lines('GENERATED_CODE')>modify(lines, gen_code('%(mods)s float l_%(id)s;', storage_combos))</py>
     // GENERATED_CODE:BEGIN
     // GENERATED_CODE:BEGIN

+ 30 - 10
tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix_subscript.hlsl

@@ -2,10 +2,15 @@
 // RUN: %dxc -DMIDX=i -DVIDX=2 -T ps_6_0 %s | FileCheck %s
 // RUN: %dxc -DMIDX=i -DVIDX=2 -T ps_6_0 %s | FileCheck %s
 // RUN: %dxc -DMIDX=1 -DVIDX=j -T ps_6_0 %s | FileCheck %s
 // RUN: %dxc -DMIDX=1 -DVIDX=j -T ps_6_0 %s | FileCheck %s
 // RUN: %dxc -DMIDX=i -DVIDX=j -T ps_6_0 %s | FileCheck %s
 // RUN: %dxc -DMIDX=i -DVIDX=j -T ps_6_0 %s | FileCheck %s
-// RUN: %dxc -DMIDX=1 -DVIDX=2 -T lib_6_3 %s | FileCheck %s
-// RUN: %dxc -DMIDX=i -DVIDX=2 -T lib_6_3 %s | FileCheck %s
-// RUN: %dxc -DMIDX=1 -DVIDX=j -T lib_6_3 %s | FileCheck %s
-// RUN: %dxc -DMIDX=i -DVIDX=j -T lib_6_3 %s | FileCheck %s
+
+// RUN: %dxc -DMIDX=1 -DVIDX=2 -T cs_6_0 -E CSMain -DGS %s | FileCheck %s -check-prefix=CSCHK
+// RUN: %dxc -DMIDX=i -DVIDX=2 -T cs_6_0 -E CSMain -DGS %s | FileCheck %s -check-prefix=CSCHK
+// RUN: %dxc -DMIDX=1 -DVIDX=j -T cs_6_0 -E CSMain -DGS %s | FileCheck %s -check-prefix=CSCHK
+// RUN: %dxc -DMIDX=i -DVIDX=j -T cs_6_0 -E CSMain -DGS %s | FileCheck %s -check-prefix=CSCHK
+// RUN: %dxc -DMIDX=1 -DVIDX=2 -T lib_6_3 %s -DGS | FileCheck %s -check-prefixes=CSCHK,CHECK
+// RUN: %dxc -DMIDX=i -DVIDX=2 -T lib_6_3 %s -DGS | FileCheck %s -check-prefixes=CSCHK,CHECK
+// RUN: %dxc -DMIDX=1 -DVIDX=j -T lib_6_3 %s -DGS | FileCheck %s -check-prefixes=CSCHK,CHECK
+// RUN: %dxc -DMIDX=i -DVIDX=j -T lib_6_3 %s -DGS | FileCheck %s -check-prefixes=CSCHK,CHECK
 
 
 // Test for general subscript operations on matrix arrays.
 // Test for general subscript operations on matrix arrays.
 // Specifically focused on shader inputs which failed to lower previously
 // Specifically focused on shader inputs which failed to lower previously
@@ -26,6 +31,26 @@ struct MtxArray {
   float3x3 mtx[2];
   float3x3 mtx[2];
 };
 };
 
 
+RWStructuredBuffer<float3> output;
+
+[shader("compute")]
+[numthreads(8,8,1)]
+void CSMain(uint3 gtid : SV_GroupThreadID, uint ix : SV_GroupIndex)
+{
+  float3 ret = 0.0;
+  uint i = gtid.x;
+  uint j = gtid.y;
+
+  // CSCHK: load float, float addrspace(3)*
+  // CSCHK: load float, float addrspace(3)*
+  // CSCHK: load float, float addrspace(3)*
+  ret += gs[MIDX][VIDX];
+
+  ret += GetRow(gs[MIDX], VIDX);
+
+  output[ix] = ret;
+}
+
 [shader("pixel")]
 [shader("pixel")]
 float3 main(const int i : I, const int j : J, const float3x3 m[2]: M, JustMtx jm[2] : JM, MtxArray ma : A) : SV_Target
 float3 main(const int i : I, const int j : J, const float3x3 m[2]: M, JustMtx jm[2] : JM, MtxArray ma : A) : SV_Target
 {
 {
@@ -36,11 +61,6 @@ float3 main(const int i : I, const int j : J, const float3x3 m[2]: M, JustMtx jm
   // CHECK: call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32
   // CHECK: call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32
   ret += g[MIDX][VIDX];
   ret += g[MIDX][VIDX];
 
 
-  // CHECK: load float, float addrspace(3)*
-  // CHECK: load float, float addrspace(3)*
-  // CHECK: load float, float addrspace(3)*
-  ret += gs[MIDX][VIDX];
-
   // CHECK: call float @dx.op.loadInput.f32(i32 4, i32 2, i32 {{%?[0-9]*}}, i8 2, i32 undef)
   // CHECK: call float @dx.op.loadInput.f32(i32 4, i32 2, i32 {{%?[0-9]*}}, i8 2, i32 undef)
   // CHECK: call float @dx.op.loadInput.f32(i32 4, i32 2, i32 {{%?[0-9]*}}, i8 2, i32 undef)
   // CHECK: call float @dx.op.loadInput.f32(i32 4, i32 2, i32 {{%?[0-9]*}}, i8 2, i32 undef)
   // CHECK: call float @dx.op.loadInput.f32(i32 4, i32 2, i32 {{%?[0-9]*}}, i8 2, i32 undef)
   // CHECK: call float @dx.op.loadInput.f32(i32 4, i32 2, i32 {{%?[0-9]*}}, i8 2, i32 undef)
@@ -57,7 +77,6 @@ float3 main(const int i : I, const int j : J, const float3x3 m[2]: M, JustMtx jm
   ret += ma.mtx[MIDX][VIDX];
   ret += ma.mtx[MIDX][VIDX];
 
 
   ret += GetRow(g[MIDX], VIDX);
   ret += GetRow(g[MIDX], VIDX);
-  ret += GetRow(gs[MIDX], VIDX);
   ret += GetRow(m[MIDX], VIDX);
   ret += GetRow(m[MIDX], VIDX);
   ret += GetRow(jm[MIDX].mtx, VIDX);
   ret += GetRow(jm[MIDX].mtx, VIDX);
   ret += GetRow(ma.mtx[MIDX], VIDX);
   ret += GetRow(ma.mtx[MIDX], VIDX);
@@ -67,3 +86,4 @@ float3 main(const int i : I, const int j : J, const float3x3 m[2]: M, JustMtx jm
   // CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %{{.*}})
   // CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %{{.*}})
   return ret;
   return ret;
 }
 }
+

+ 8 - 4
tools/clang/test/HLSLFileCheck/hlsl/types/modifiers/global/global-var-write-test05.hlsl

@@ -1,14 +1,18 @@
-// RUN: %dxc -E main -T ps_6_0 /Gec -HV 2016 > %s | FileCheck %s
+// RUN: %dxc -E main -T cs_6_0 /Gec -HV 2016 > %s | FileCheck %s
 
 
 // CHECK: define void @main()
 // CHECK: define void @main()
 // CHECK: ret void
 // CHECK: ret void
 
 
+Texture2D<float3> InColor : register(t0);
 RWTexture2D<float3> Color : register(u0);
 RWTexture2D<float3> Color : register(u0);
+RWTexture2D<float3> OutColor : register(u1);
 groupshared uint PixelCountH;
 groupshared uint PixelCountH;
 
 
-uint main( uint2 a : A, float3 b : B ) : SV_Target
+[numthreads(64,16,1)]
+void main( uint3 gtid : SV_GroupThreadID )
 {
 {
- Color[a] = b; 
+ uint2 a = gtid.xy;
+ Color[a] = InColor[a];
  PixelCountH = Color[a].x * 1;
  PixelCountH = Color[a].x * 1;
- return PixelCountH;
+ OutColor[a] = PixelCountH;
 }
 }

+ 217 - 0
tools/clang/test/HLSLFileCheck/hlsl/types/modifiers/groupshared/groupshared_shadermodels.hlsl

@@ -0,0 +1,217 @@
+// RUN: %dxc -E PSMain -T ps_6_0 %s | FileCheck %s
+// RUN: %dxc -E VSMain -T vs_6_0 %s | FileCheck %s
+// RUN: %dxc -E GSMain -T gs_6_0 %s | FileCheck %s
+// RUN: %dxc -E HSMain -T hs_6_0 %s | FileCheck %s
+// RUN: %dxc -E DSMain -T ds_6_0 %s | FileCheck %s
+// RUN: %dxc -E CSMain -T lib_6_5 %s | FileCheck %s -check-prefix=LIBCHK
+// RUN: %dxc -E CSMain -T cs_6_0 %s | FileCheck %s -check-prefix=CSCHK
+// RUN: %dxc -E MSMain -T ms_6_5 %s | FileCheck %s -check-prefix=CSCHK
+// RUN: %dxc -E ASMain -T as_6_5 %s | FileCheck %s -check-prefix=CSCHK
+
+// Test that the proper error for groupshared is produced when compiling in non-compute contexts
+// and that everything is fine when we are
+
+
+// CSCHK: @[[gs:.*]] = addrspace(3) global float
+
+// CHECK: error: Thread Group Shared Memory not supported in Shader Model
+// CHECK: error: Thread Group Shared Memory not supported in Shader Model
+// CHECK: error: Thread Group Shared Memory not supported in Shader Model
+// CHECK: error: Thread Group Shared Memory not supported in Shader Model
+groupshared float4 foo;
+
+RWStructuredBuffer<float4> output;
+
+int4 getit()
+{
+  // CSCHK: load float, float addrspace(3)* @[[gs]]
+  return foo;
+}
+
+// LIBCHK: error: Thread Group Shared Memory not supported from non-compute entry points.
+// LIBCHK: of function 'VSMain'
+[shader("vertex")]
+float4 VSMain(uint ix : SV_VertexID) : OUT {
+  output[ix] = getit();
+  return 1.0;
+}
+
+// LIBCHK: error: Thread Group Shared Memory not supported from non-compute entry points.
+// LIBCHK: of function 'PSMain'
+[shader("pixel")]
+float4 PSMain(uint ix : SV_PrimitiveID) : SV_TARGET {
+  output[ix] = getit();
+  return 1.0;
+}
+
+[shader("compute")]
+[NumThreads(32, 32, 1)]
+void CSMain(uint ix : SV_GroupIndex) {
+  output[ix] = getit();
+}
+
+struct payload_t { int nothing; };
+
+
+[shader("amplification")]
+[NumThreads(8, 8, 2)]
+void ASMain(uint ix : SV_GroupIndex) {
+  output[ix] = getit();
+  payload_t p = {0};
+  DispatchMesh(1, 1, 1, p);
+}
+
+[shader("mesh")]
+[NumThreads(8, 8, 2)]
+[OutputTopology("triangle")]
+void MSMain(uint ix : SV_GroupIndex) {
+  output[ix] = getit();
+}
+
+struct PosStruct {
+  float4 pos : SV_Position;
+};
+
+float4 a;
+
+// LIBCHK: error: Thread Group Shared Memory not supported from non-compute entry points.
+// LIBCHK: of function 'GSMain'
+[shader("geometry")]
+[maxvertexcount(1)]
+void GSMain(triangle float4 array[3] : SV_Position, uint ix : SV_GSInstanceID,
+            inout PointStream<PosStruct> OutputStream)
+{
+  output[ix] = getit();
+  PosStruct s;
+  s.pos = a;
+  OutputStream.Append(s);
+  OutputStream.RestartStrip();
+}
+
+struct PCStruct
+{
+  float Edges[3]  : SV_TessFactor;
+  float Inside : SV_InsideTessFactor;
+  float4 test : TEST;
+};
+
+PCStruct HSPatch(InputPatch<PosStruct, 3> ip,
+                 OutputPatch<PosStruct, 3> op,
+                 uint ix : SV_PrimitiveID)
+{
+  output[ix] = getit();
+  PCStruct a;
+  a.Edges[0] = ip[0].pos.w;
+  a.Edges[1] = ip[0].pos.w;
+  a.Edges[2] = ip[0].pos.w;
+  a.Inside = ip[0].pos.w;
+  return a;
+}
+
+// LIBCHK: error: Thread Group Shared Memory not supported from non-compute entry points.
+// LIBCHK: of function 'HSMain'
+[shader("hull")]
+[domain("tri")]
+[partitioning("fractional_odd")]
+[outputtopology("triangle_cw")]
+[outputcontrolpoints(3)]
+[patchconstantfunc("HSPatch")]
+PosStruct HSMain(InputPatch<PosStruct, 3> p,
+                 uint ix : SV_OutputControlPointID)
+{
+  output[ix] = getit();
+  PosStruct s;
+  s.pos = p[ix].pos;
+  return s;
+}
+
+// LIBCHK: error: Thread Group Shared Memory not supported from non-compute entry points.
+// LIBCHK: of function 'DSMain'
+[shader("domain")]
+[domain("tri")]
+PosStruct DSMain(const OutputPatch<PosStruct, 3> patch,
+                 uint ix : SV_PrimitiveID)
+{
+  output[ix] = getit();
+  PosStruct v;
+  v.pos = patch[0].pos;
+  return v;
+}
+
+struct MyPayload {
+  float4 color;
+  uint3 pos;
+};
+
+struct MyAttributes {
+  float2 bary;
+  uint id;
+};
+
+struct MyParam {
+  float2 coord;
+  float4 output;
+};
+
+
+RaytracingAccelerationStructure RTAS : register(t5);
+
+// LIBCHK: error: Thread Group Shared Memory not supported from non-compute entry points.
+// LIBCHK: of function {{.*}}RGMain
+[shader("raygeneration")]
+void RGMain()
+{
+  MyPayload p = (MyPayload)0;
+  p.pos = DispatchRaysIndex();
+  p.color = getit();
+  float3 origin = {0, 0, 0};
+  float3 dir = normalize(p.pos / (float)DispatchRaysDimensions());
+  RayDesc ray = { origin, 0.125, dir, 128.0};
+  TraceRay(RTAS, RAY_FLAG_NONE, 0, 0, 1, 0, ray, p);
+}
+
+// LIBCHK: error: Thread Group Shared Memory not supported from non-compute entry points.
+// LIBCHK: of function {{.*}}ISMain
+[shader("intersection")]
+void ISMain()
+{
+  float hitT = RayTCurrent();
+  MyAttributes attr = (MyAttributes)0;
+  attr.bary = getit().xy;
+  bool bReported = ReportHit(hitT, 0, attr);
+}
+
+// LIBCHK: error: Thread Group Shared Memory not supported from non-compute entry points.
+// LIBCHK: of function {{.*}}AHMain
+[shader("anyhit")]
+void AHMain( inout MyPayload payload : SV_RayPayload,
+             in MyAttributes attr : SV_IntersectionAttributes )
+{
+  float3 hitLocation = ObjectRayOrigin() + ObjectRayDirection() * RayTCurrent();
+  if (hitLocation.z < attr.bary.x)
+    AcceptHitAndEndSearch();         // aborts function
+  if (hitLocation.z < attr.bary.y)
+    IgnoreHit();   // aborts function
+  payload.color += getit();
+}
+
+// LIBCHK: error: Thread Group Shared Memory not supported from non-compute entry points.
+// LIBCHK: of function {{.*}}CHMain
+[shader("closesthit")]
+void CHMain( inout MyPayload payload : SV_RayPayload,
+             in BuiltInTriangleIntersectionAttributes attr : SV_IntersectionAttributes )
+{
+  MyParam param = {attr.barycentrics, getit()};
+  CallShader(7, param);
+  payload.color += param.output;
+}
+
+
+// LIBCHK: error: Thread Group Shared Memory not supported from non-compute entry points.
+// LIBCHK: of function {{.*}}MissMain
+[shader("miss")]
+void MissMain(inout MyPayload payload : SV_RayPayload)
+{
+  payload.color = getit();
+}
+

+ 6 - 3
tools/clang/test/HLSLFileCheck/hlsl/types/modifiers/groupshared/this_ptr_address_space.hlsl

@@ -1,4 +1,4 @@
-// RUN: %dxc -E main -T vs_6_2 %s | FileCheck %s
+// RUN: %dxc -E main -T cs_6_2 %s | FileCheck %s
 
 
 // Test that the address space of the this pointer is honored
 // Test that the address space of the this pointer is honored
 // when accessing data members or calling member functions.
 // when accessing data members or calling member functions.
@@ -10,9 +10,12 @@ int i, j;
 // CHECK: @[[gs:.*]] = addrspace(3) global [2 x i32] undef
 // CHECK: @[[gs:.*]] = addrspace(3) global [2 x i32] undef
 groupshared Foo foo[2];
 groupshared Foo foo[2];
 
 
-int4 main() : OUT
+RWStructuredBuffer<int4> output;
+
+[numthreads(8,8,1)]
+void main( uint gidx : SV_GroupIndex )
 {
 {
-  return int4(
+  output[gidx] = int4(
     // getelementptr & addrspacecast constant expressions
     // getelementptr & addrspacecast constant expressions
     // CHECK: load i32, i32 addrspace(3)* getelementptr inbounds ([2 x i32], [2 x i32] addrspace(3)* @[[gs]], i32 0, i32 0)
     // CHECK: load i32, i32 addrspace(3)* getelementptr inbounds ([2 x i32], [2 x i32] addrspace(3)* @[[gs]], i32 0, i32 0)
     foo[0].x, 
     foo[0].x, 

+ 4 - 2
tools/clang/test/HLSLFileCheck/passes/dxil/dxil_cleanup_addrspacecast/remove-addrspacecastinst.hlsl

@@ -1,4 +1,4 @@
-// RUN: %dxc -E main -T vs_6_0 %s | FileCheck %s
+// RUN: %dxc -E main -T cs_6_0 %s | FileCheck %s
 
 
 // CHECK: @main()
 // CHECK: @main()
 // CHECK-NOT: addrspacecast
 // CHECK-NOT: addrspacecast
@@ -6,5 +6,7 @@
 
 
 struct Foo { int x; int getX() { return x; } };
 struct Foo { int x; int getX() { return x; } };
 groupshared Foo foo[2];
 groupshared Foo foo[2];
+RWStructuredBuffer<int> output;
 int i;
 int i;
-int main() : OUT { return foo[i].getX(); }
+[numthreads(1,1,1)]
+void main() { output[i] =  foo[i].getX(); }

+ 6 - 7
tools/clang/test/HLSLFileCheck/passes/dxil/dxil_o0_legalize/store_undef.hlsl

@@ -1,4 +1,4 @@
-// RUN: %dxc %s -T ps_6_0 -Od | FileCheck %s
+// RUN: %dxc %s -T cs_6_0 -Od | FileCheck %s
 
 
 // Regression test for validation failure in O0 due to
 // Regression test for validation failure in O0 due to
 // storing structure with uninitialized member.
 // storing structure with uninitialized member.
@@ -39,11 +39,10 @@ float bar(Foo f) {
   return f.e;
   return f.e;
 }
 }
 
 
-float main(uint3 off : OFF) : SV_Target {
+RWStructuredBuffer<float> output;
+
+[numthreads(1,1,1)]
+void main() {
   foo(1, 2, 0);
   foo(1, 2, 0);
-  return bar(foos[3]);
+  output[0] = bar(foos[3]);
 }
 }
-
-
-
-

+ 2 - 1
tools/clang/test/HLSLFileCheck/passes/hl/sroa_hlsl/groupshared_array_struct_matrix_regression.hlsl

@@ -1,4 +1,4 @@
-// RUN: %dxc -E main -T vs_6_2 %s | FileCheck %s
+// RUN: %dxc -E main -T cs_6_2 %s | FileCheck %s
 
 
 // Regression test for GitHub #1631, where SROA would generate more uses
 // Regression test for GitHub #1631, where SROA would generate more uses
 // of a value while processing it (due to expanding a memcpy) and fail
 // of a value while processing it (due to expanding a memcpy) and fail
@@ -11,4 +11,5 @@
 struct S { int1x1 x, y; };
 struct S { int1x1 x, y; };
 groupshared S gs[1];
 groupshared S gs[1];
 void f(S s[1]) {}
 void f(S s[1]) {}
+[numthreads(1,1,1)]
 void main() { f(gs); }
 void main() { f(gs); }

+ 1 - 0
utils/hct/hctdb.py

@@ -2582,6 +2582,7 @@ class db_dxil(object):
         self.add_valrule("Sm.ThreadGroupChannelRange", "Declared Thread Group %0 size %1 outside valid range [%2..%3].")
         self.add_valrule("Sm.ThreadGroupChannelRange", "Declared Thread Group %0 size %1 outside valid range [%2..%3].")
         self.add_valrule("Sm.MaxTheadGroup", "Declared Thread Group Count %0 (X*Y*Z) is beyond the valid maximum of %1.")
         self.add_valrule("Sm.MaxTheadGroup", "Declared Thread Group Count %0 (X*Y*Z) is beyond the valid maximum of %1.")
         self.add_valrule("Sm.MaxTGSMSize", "Total Thread Group Shared Memory storage is %0, exceeded %1.")
         self.add_valrule("Sm.MaxTGSMSize", "Total Thread Group Shared Memory storage is %0, exceeded %1.")
+        self.add_valrule("Sm.TGSMUnsupported", "Thread Group Shared Memory not supported %0.")
         self.add_valrule("Sm.WaveSizeValue", "Declared WaveSize %0 outside valid range [%1..%2], or not a power of 2.")
         self.add_valrule("Sm.WaveSizeValue", "Declared WaveSize %0 outside valid range [%1..%2], or not a power of 2.")
         self.add_valrule("Sm.WaveSizeNeedsDxil16Plus", "WaveSize is valid only for DXIL version 1.6 and higher.")
         self.add_valrule("Sm.WaveSizeNeedsDxil16Plus", "WaveSize is valid only for DXIL version 1.6 and higher.")
         self.add_valrule("Sm.ROVOnlyInPS", "RasterizerOrdered objects are only allowed in 5.0+ pixel shaders.")
         self.add_valrule("Sm.ROVOnlyInPS", "RasterizerOrdered objects are only allowed in 5.0+ pixel shaders.")