Browse Source

Merge remote-tracking branch 'ms/master' into sep-reflect

Tex Riddell 6 years ago
parent
commit
0f23b6946c
28 changed files with 914 additions and 103 deletions
  1. 7 0
      cmake/modules/HandleLLVMOptions.cmake
  2. 47 32
      docs/SPIR-V.rst
  3. 1 1
      external/SPIRV-Tools
  4. 1 1
      include/dxc/DXIL/DxilModule.h
  5. 2 0
      include/dxc/Support/HLSLOptions.td
  6. 1 0
      include/dxc/Support/SPIRVOptions.h
  7. 85 12
      include/dxc/Support/WinAdapter.h
  8. 15 9
      lib/DXIL/DxilModule.cpp
  9. 3 0
      lib/DxcSupport/HLSLOptions.cpp
  10. 9 0
      lib/HLSL/DxilValidation.cpp
  11. 1 1
      lib/HLSL/HLOperationLower.cpp
  12. 5 3
      lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
  13. 97 32
      tools/clang/lib/SPIRV/DeclResultIdMapper.cpp
  14. 21 2
      tools/clang/lib/SPIRV/DeclResultIdMapper.h
  15. 3 3
      tools/clang/lib/SPIRV/GlPerVertex.cpp
  16. 6 5
      tools/clang/lib/SPIRV/SpirvEmitter.cpp
  17. 4 2
      tools/clang/lib/SPIRV/SpirvEmitter.h
  18. 18 0
      tools/clang/test/CodeGenHLSL/batch/expressions/intrinsics/frexp.hlsl
  19. 17 0
      tools/clang/test/CodeGenHLSL/batch/passes/sroa_hlsl/memcpy_dom.hlsl
  20. 202 0
      tools/clang/test/CodeGenSPIRV/meshshading.nv.buffer.mesh.hlsl
  21. 135 0
      tools/clang/test/CodeGenSPIRV/meshshading.nv.fncall.amplification.hlsl
  22. 19 0
      tools/clang/test/CodeGenSPIRV/vk.binding.cl.flatten-arrays.error.hlsl
  23. 36 0
      tools/clang/test/CodeGenSPIRV/vk.binding.cl.flatten-arrays.example1-optimized.hlsl
  24. 23 0
      tools/clang/test/CodeGenSPIRV/vk.binding.cl.flatten-arrays.example1.hlsl
  25. 41 0
      tools/clang/test/CodeGenSPIRV/vk.binding.cl.flatten-arrays.example2-optimized.hlsl
  26. 29 0
      tools/clang/test/CodeGenSPIRV/vk.binding.cl.flatten-arrays.example2.hlsl
  27. 62 0
      tools/clang/unittests/HLSL/DxilModuleTest.cpp
  28. 24 0
      tools/clang/unittests/SPIRV/CodeGenSpirvTest.cpp

+ 7 - 0
cmake/modules/HandleLLVMOptions.cmake

@@ -531,6 +531,13 @@ if (UNIX AND
   append("-fcolor-diagnostics" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
 endif()
 
+# HLSL Change Starts
+# Enable -fms-extensions for clang to use MS uuid extensions for COM.
+if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+  append("-fms-extensions -Wno-language-extension-token" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
+endif()
+# HLSL Change Ends
+
 # Add flags for add_dead_strip().
 # FIXME: With MSVS, consider compiling with /Gy and linking with /OPT:REF?
 # But MinSizeRel seems to add that automatically, so maybe disable these

+ 47 - 32
docs/SPIR-V.rst

@@ -3104,14 +3104,14 @@ Callable Stage
 Mesh and Amplification Shaders
 ------------------------------
 
-DirectX adds 2 new shader stages for using MeshShading pipeline namely Mesh and Amplification.
-Amplification shaders corresponds to Task Shaders in Vulkan.
-
+| DirectX adds 2 new shader stages for using MeshShading pipeline namely Mesh and Amplification.
+| Amplification shaders corresponds to Task Shaders in Vulkan.
+|
 | Refer to following HLSL and SPIR-V specs for details:
 | https://docs.microsoft.com/<TBD>
 | https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/NV/SPV_NV_mesh_shader.asciidoc
-
-This section describes how Mesh and Amplification shaders are translated to SPIR-V for Vulkan.
+|
+| This section describes how Mesh and Amplification shaders are translated to SPIR-V for Vulkan.
 
 Entry Point Attributes
 ~~~~~~~~~~~~~~~~~~~~~~
@@ -3120,18 +3120,19 @@ shaders and are translated to SPIR-V execution modes according to the table belo
 
 .. table:: Mapping from HLSL attribute to SPIR-V execution mode
 
-+--------------------+----------------+-------------------------+
-|  HLSL Attribute    |   Value        | SPIR-V Execution Mode   |
-+====================+================+=========================+
-|                    | ``point``      | ``OutputPoints``        |
-|                    +----------------+-------------------------+
-| ``outputtopology`` | ``line``       | ``OutputLinesNV``       |
-|   (Mesh shader)    +----------------+-------------------------+
-|                    | ``triangle``   | ``OutputTrianglesNV``   |
-+--------------------+----------------+-------------------------+
-| ``numthreads``     | ``X, Y, Z``    | ``LocalSize X, Y, Z``   |
-|                    | (X*Y*Z <= 128) |                         |
-+--------------------+----------------+-------------------------+
++-------------------+--------------------+-------------------------+
+|  HLSL Attribute   |   Value            | SPIR-V Execution Mode   |
++===================+====================+=========================+
+|``outputtopology`` | ``point``          | ``OutputPoints``        |
+|                   +--------------------+-------------------------+
+|``(Mesh shader)``  | ``line``           | ``OutputLinesNV``       |
+|                   +--------------------+-------------------------+
+|                   | ``triangle``       | ``OutputTrianglesNV``   |
++-------------------+--------------------+-------------------------+
+| ``numthreads``    | ``X, Y, Z``        | ``LocalSize X, Y, Z``   |
+|                   |                    |                         |
+|                   | ``(X*Y*Z <= 128)`` |                         |
++-------------------+--------------------+-------------------------+
 
 Intrinsics
 ~~~~~~~~~~
@@ -3140,24 +3141,29 @@ and are translated to SPIR-V intrinsics according to the table below:
 
 .. table:: Mapping from HLSL intrinsics to SPIR-V intrinsics
 
-+-------------------------+--------------------+-----------------------------------------+
-|  HLSL Intrinsic         |  Parameters        | SPIR-V Intrinsic                        |
-+=========================+====================+=========================================+
-| ``SetMeshOutputCounts`` | ``numVertices``    | ``PrimitiveCountNV numPrimitives``      |
-|     (Mesh shader)       | ``numPrimitives``  |                                         |
-+-------------------------+--------------------+-----------------------------------------+
-|                         | ``ThreadX``        |                                         |
-| ``DispatchMesh``        | ``ThreadY``        |  ``OpControlBarrier``                   |
-| (Amplification shader)  | ``ThreadZ``        | ``TaskCountNV ThreadX*ThreadY*ThreadZ`` |
-|                         | ``MeshPayload``    |                                         |
-+-------------------------+--------------------+-----------------------------------------+
-
-| *For DispatchMesh intrinsic, we also emit MeshPayload as output block with PerTaskNV decoration
++---------------------------+--------------------+-----------------------------------------+
+|  HLSL Intrinsic           |  Parameters        | SPIR-V Intrinsic                        |
++===========================+====================+=========================================+
+| ``SetMeshOutputCounts``   | ``numVertices``    | ``PrimitiveCountNV numPrimitives``      |
+|                           |                    |                                         |
+| ``(Mesh shader)``         | ``numPrimitives``  |                                         |
++---------------------------+--------------------+-----------------------------------------+
+| ``DispatchMesh``          | ``ThreadX``        | ``OpControlBarrier``                    |
+|                           |                    |                                         |
+| ``(Amplification shader)``| ``ThreadY``        | ``TaskCountNV ThreadX*ThreadY*ThreadZ`` |
+|                           |                    |                                         |
+|                           | ``ThreadZ``        |                                         |
+|                           |                    |                                         |
+|                           | ``MeshPayload``    |                                         |
++---------------------------+--------------------+-----------------------------------------+
+
+| Note : For ``DispatchMesh`` intrinsic, we also emit ``MeshPayload`` as output block with ``PerTaskNV`` decoration
 
 Mesh Interface Variables
 ~~~~~~~~~~~~~~~~~~~~~~~~
-Interface variables are defined for Mesh shaders using HLSL modifiers.
-Following table gives high level overview of the mapping:
+| Interface variables are defined for Mesh shaders using HLSL modifiers.
+| Following table gives high level overview of the mapping:
+|
 
 .. table:: Mapping from HLSL modifiers to SPIR-V definitions
 
@@ -3165,9 +3171,11 @@ Following table gives high level overview of the mapping:
 |  HLSL modifier  | SPIR-V definition                                                       |
 +=================+=========================================================================+
 | ``indices``     | Maps to SPIR-V intrinsic ``PrimitiveIndicesNV``                         |  
+|                 |                                                                         |
 |                 | Defines SPIR-V Execution Mode ``OutputPrimitivesNV <array-size>``       |
 +-----------------+-------------------------------------------------------------------------+
 | ``vertices``    | Maps to per-vertex out attributes                                       |
+|                 |                                                                         |
 |                 | Defines existing SPIR-V Execution Mode ``OutputVertices <array-size>``  |
 +-----------------+-------------------------------------------------------------------------+
 | ``primitives``  | Maps to per-primitive out attributes with ``PerPrimitiveNV`` decoration |
@@ -3395,6 +3403,13 @@ codegen for Vulkan:
 - ``-fspv-target-env=<env>``: Specifies the target environment for this compilation.
   The current valid options are ``vulkan1.0`` and ``vulkan1.1``. If no target
   environment is provided, ``vulkan1.0`` is used as default.
+- ``-fspv-flatten-resource-arrays``: Flattens arrays of textures and samplers
+  into individual resources, each taking one binding number. For example, an
+  array of 3 textures will become 3 texture resources taking 3 binding numbers.
+  This makes the behavior similar to DX. Without this option, you would get 1
+  array object taking 1 binding number. Note that arrays of
+  {RW|Append|Consume}StructuredBuffers are currently not supported in the
+  SPIR-V backend.
 - ``-Wno-vk-ignored-features``: Does not emit warnings on ignored features
   resulting from no Vulkan support, e.g., cbuffer member initializer.
 

+ 1 - 1
external/SPIRV-Tools

@@ -1 +1 @@
-Subproject commit aa9e8f538041db3055ea443080e0ccc315fa114f
+Subproject commit bbd80462f5c89e9a225edabaca1215032c62e459

+ 1 - 1
include/dxc/DXIL/DxilModule.h

@@ -225,7 +225,7 @@ public:
   // This funciton must be called after unused resources are removed from DxilModule
   bool ModuleHasMulticomponentUAVLoads();
 
-  // Compute shader.
+  // Compute/Mesh/Amplification shader.
   void SetNumThreads(unsigned x, unsigned y, unsigned z);
   unsigned GetNumThreads(unsigned idx) const;
 

+ 2 - 0
include/dxc/Support/HLSLOptions.td

@@ -281,6 +281,8 @@ def fspv_extension_EQ : Joined<["-"], "fspv-extension=">, Group<spirv_Group>, Fl
   HelpText<"Specify SPIR-V extension permitted to use">;
 def fspv_target_env_EQ : Joined<["-"], "fspv-target-env=">, Group<spirv_Group>, Flags<[CoreOption, DriverOption]>,
   HelpText<"Specify the target environment: vulkan1.0 (default) or vulkan1.1">;
+def fspv_flatten_resource_arrays: Flag<["-"], "fspv-flatten-resource-arrays">, Group<spirv_Group>, Flags<[CoreOption, DriverOption]>,
+  HelpText<"Flatten arrays of resources so each array element takes one binding number">;
 def Wno_vk_ignored_features : Joined<["-"], "Wno-vk-ignored-features">, Group<spirv_Group>, Flags<[CoreOption, DriverOption, HelpHidden]>,
   HelpText<"Do not emit warnings for ingored features resulting from no Vulkan support">;
 def Wno_vk_emulated_features : Joined<["-"], "Wno-vk-emulated-features">, Group<spirv_Group>, Flags<[CoreOption, DriverOption, HelpHidden]>,

+ 1 - 0
include/dxc/Support/SPIRVOptions.h

@@ -53,6 +53,7 @@ struct SpirvCodeGenOptions {
   bool useDxLayout;
   bool useGlLayout;
   bool useScalarLayout;
+  bool flattenResourceArrays;
   SpirvLayoutRule cBufferLayoutRule;
   SpirvLayoutRule sBufferLayoutRule;
   SpirvLayoutRule tBufferLayoutRule;

+ 85 - 12
include/dxc/Support/WinAdapter.h

@@ -46,16 +46,33 @@
 #define CoTaskMemFree free
 
 #define SysFreeString free
-#define SysAllocStringLen(ptr, size) (wchar_t*)realloc(ptr, (size + 1)*sizeof(wchar_t))
+#define SysAllocStringLen(ptr, size)                                           \
+  (wchar_t *)realloc(ptr, (size + 1) * sizeof(wchar_t))
 
 #define ARRAYSIZE(array) (sizeof(array) / sizeof(array[0]))
 
 #define _countof(a) (sizeof(a) / sizeof(*(a)))
 
+// If it is GCC, there is no UUID support and we must emulate it.
+#ifdef __APPLE__
+#define __EMULATE_UUID 1
+#else // __APPLE__
+#ifdef __GNUC__
+#ifndef __clang__
+#define __EMULATE_UUID 1
+#endif // __GNUC__
+#endif // __clang__
+#endif // __APPLE__
+
+#ifdef __EMULATE_UUID
 #define __declspec(x)
+#endif // __EMULATE_UUID
+
 #define DECLSPEC_SELECTANY
 
+#ifdef __EMULATE_UUID
 #define uuid(id)
+#endif // __EMULATE_UUID
 
 #define STDMETHODCALLTYPE
 #define STDAPI extern "C" HRESULT STDAPICALLTYPE
@@ -188,7 +205,8 @@
 #define OutputDebugStringA(msg) fputs(msg, stderr)
 #define OutputDebugFormatA(...) fprintf(stderr, __VA_ARGS__)
 
-#define CaptureStackBackTrace(FramesToSkip, FramesToCapture, BackTrace, BackTraceHash)\
+#define CaptureStackBackTrace(FramesToSkip, FramesToCapture, BackTrace,        \
+                              BackTraceHash)                                   \
   backtrace(BackTrace, FramesToCapture)
 
 // Event Tracing for Windows (ETW) provides application programmers the ability
@@ -413,19 +431,55 @@ typedef void *HMODULE;
 
 //===--------------------- ID Types and Macros for COM --------------------===//
 
-struct GUID {
+#ifdef __EMULATE_UUID
+struct GUID
+#else  // __EMULATE_UUID
+// These specific definitions are required by clang -fms-extensions.
+typedef struct _GUID
+#endif // __EMULATE_UUID
+{
   uint32_t Data1;
   uint16_t Data2;
   uint16_t Data3;
   uint8_t Data4[8];
-};
+}
+#ifdef __EMULATE_UUID
+;
+#else  // __EMULATE_UUID
+GUID;
+#endif // __EMULATE_UUID
 typedef GUID CLSID;
 typedef const GUID &REFGUID;
-typedef const void *REFIID;
 typedef const GUID &REFCLSID;
 
+#ifdef __EMULATE_UUID
+typedef const void *REFIID;
 #define IsEqualIID(a, b) a == b
 #define IsEqualCLSID(a, b) !memcmp(&a, &b, sizeof(GUID))
+#else  // __EMULATE_UUID
+typedef GUID IID;
+typedef IID *LPIID;
+typedef const IID &REFIID;
+inline bool IsEqualGUID(REFGUID rguid1, REFGUID rguid2) {
+  return !memcmp(&rguid1, &rguid2, sizeof(GUID));
+}
+
+inline bool operator==(REFGUID guidOne, REFGUID guidOther) {
+  return !!IsEqualGUID(guidOne, guidOther);
+}
+
+inline bool operator!=(REFGUID guidOne, REFGUID guidOther) {
+  return !(guidOne == guidOther);
+}
+
+inline bool IsEqualIID(REFIID riid1, REFIID riid2) {
+  return IsEqualGUID(riid1, riid2);
+}
+
+inline bool IsEqualCLSID(REFCLSID rclsid1, REFCLSID rclsid2) {
+  return IsEqualGUID(rclsid1, rclsid2);
+}
+#endif // __EMULATE_UUID
 
 //===--------------------- Struct Types -----------------------------------===//
 
@@ -503,22 +557,37 @@ enum tagSTATFLAG {
 
 //===--------------------- UUID Related Macros ----------------------------===//
 
+#ifdef __EMULATE_UUID
+
 // The following macros are defined to facilitate the lack of 'uuid' on Linux.
 #define DECLARE_CROSS_PLATFORM_UUIDOF(T)                                       \
 public:                                                                        \
   static REFIID uuidof() { return static_cast<REFIID>(&T##_ID); }              \
                                                                                \
 private:                                                                       \
-   __attribute__ ((visibility ("default"))) static const char T##_ID;
+  __attribute__((visibility("default"))) static const char T##_ID;
 
-#define DEFINE_CROSS_PLATFORM_UUIDOF(T) __attribute__ ((visibility ("default"))) const char T::T##_ID = '\0';
+#define DEFINE_CROSS_PLATFORM_UUIDOF(T)                                        \
+  __attribute__((visibility("default"))) const char T::T##_ID = '\0';
 #define __uuidof(T) T::uuidof()
 #define IID_PPV_ARGS(ppType)                                                   \
   (**(ppType)).uuidof(), reinterpret_cast<void **>(ppType)
 
+#else // __EMULATE_UUID
+
+#define DECLARE_CROSS_PLATFORM_UUIDOF(T)
+#define DEFINE_CROSS_PLATFORM_UUIDOF(T)
+
+template <typename T> inline void **IID_PPV_ARGS_Helper(T **pp) {
+  return reinterpret_cast<void **>(pp);
+}
+#define IID_PPV_ARGS(ppType) __uuidof(**(ppType)), IID_PPV_ARGS_Helper(ppType)
+
+#endif // __EMULATE_UUID
+
 //===--------------------- COM Interfaces ---------------------------------===//
 
-struct IUnknown {
+struct __declspec(uuid("00000000-0000-0000-C000-000000000046")) IUnknown {
   virtual HRESULT QueryInterface(REFIID riid, void **ppvObject) = 0;
   virtual ULONG AddRef();
   virtual ULONG Release();
@@ -533,25 +602,29 @@ private:
   DECLARE_CROSS_PLATFORM_UUIDOF(IUnknown)
 };
 
-struct INoMarshal : public IUnknown {
+struct __declspec(uuid("ECC8691B-C1DB-4DC0-855E-65F6C551AF49")) INoMarshal
+    : public IUnknown {
   DECLARE_CROSS_PLATFORM_UUIDOF(INoMarshal)
 };
 
-struct IMalloc : public IUnknown {
+struct __declspec(uuid("00000002-0000-0000-C000-000000000046")) IMalloc
+    : public IUnknown {
   virtual void *Alloc(size_t size);
   virtual void *Realloc(void *ptr, size_t size);
   virtual void Free(void *ptr);
   virtual HRESULT QueryInterface(REFIID riid, void **ppvObject);
 };
 
-struct ISequentialStream : public IUnknown {
+struct __declspec(uuid("0C733A30-2A1C-11CE-ADE5-00AA0044773D"))
+    ISequentialStream : public IUnknown {
   virtual HRESULT Read(void *pv, ULONG cb, ULONG *pcbRead) = 0;
   virtual HRESULT Write(const void *pv, ULONG cb, ULONG *pcbWritten) = 0;
 
   DECLARE_CROSS_PLATFORM_UUIDOF(ISequentialStream)
 };
 
-struct IStream : public ISequentialStream {
+struct __declspec(uuid("0000000c-0000-0000-C000-000000000046")) IStream
+    : public ISequentialStream {
   virtual HRESULT Seek(LARGE_INTEGER dlibMove, DWORD dwOrigin,
                        ULARGE_INTEGER *plibNewPosition) = 0;
   virtual HRESULT SetSize(ULARGE_INTEGER libNewSize) = 0;

+ 15 - 9
lib/DXIL/DxilModule.cpp

@@ -366,24 +366,30 @@ void DxilModule::CollectShaderFlagsForModule() {
 }
 
 void DxilModule::SetNumThreads(unsigned x, unsigned y, unsigned z) {
-  DXASSERT(m_DxilEntryPropsMap.size() == 1 && m_pSM->IsCS(),
-           "only works for CS profile");
+  DXASSERT(m_DxilEntryPropsMap.size() == 1 &&
+           (m_pSM->IsCS() || m_pSM->IsMS() || m_pSM->IsAS()),
+           "only works for CS/MS/AS profiles");
   DxilFunctionProps &props = m_DxilEntryPropsMap.begin()->second->props;
-  DXASSERT(props.IsCS(), "Must be CS profile");
-  unsigned *numThreads = props.ShaderProps.CS.numThreads;
+  DXASSERT_NOMSG(m_pSM->GetKind() == props.shaderKind);
+  unsigned *numThreads = props.IsCS() ? props.ShaderProps.CS.numThreads :
+    props.IsMS() ? props.ShaderProps.MS.numThreads : props.ShaderProps.AS.numThreads;
   numThreads[0] = x;
   numThreads[1] = y;
   numThreads[2] = z;
 }
 unsigned DxilModule::GetNumThreads(unsigned idx) const {
+  DXASSERT(m_DxilEntryPropsMap.size() == 1 &&
+           (m_pSM->IsCS() || m_pSM->IsMS() || m_pSM->IsAS()),
+           "only works for CS/MS/AS profiles");
   DXASSERT(idx < 3, "Thread dimension index must be 0-2");
-  if (!m_pSM->IsCS())
-    return 0;
-  DXASSERT(m_DxilEntryPropsMap.size() == 1, "should have one entry prop");
   __analysis_assume(idx < 3);
+  if (!(m_pSM->IsCS() || m_pSM->IsMS() || m_pSM->IsAS()))
+    return 0;
   const DxilFunctionProps &props = m_DxilEntryPropsMap.begin()->second->props;
-  DXASSERT(props.IsCS(), "Must be CS profile");
-  return props.ShaderProps.CS.numThreads[idx];
+  DXASSERT_NOMSG(m_pSM->GetKind() == props.shaderKind);
+  const unsigned *numThreads = props.IsCS() ? props.ShaderProps.CS.numThreads :
+    props.IsMS() ? props.ShaderProps.MS.numThreads : props.ShaderProps.AS.numThreads;
+  return numThreads[idx];
 }
 
 DXIL::InputPrimitive DxilModule::GetInputPrimitive() const {

+ 3 - 0
lib/DxcSupport/HLSLOptions.cpp

@@ -717,6 +717,8 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
   opts.SpirvOptions.enableReflect = Args.hasFlag(OPT_fspv_reflect, OPT_INVALID, false);
   opts.SpirvOptions.noWarnIgnoredFeatures = Args.hasFlag(OPT_Wno_vk_ignored_features, OPT_INVALID, false);
   opts.SpirvOptions.noWarnEmulatedFeatures = Args.hasFlag(OPT_Wno_vk_emulated_features, OPT_INVALID, false);
+  opts.SpirvOptions.flattenResourceArrays =
+      Args.hasFlag(OPT_fspv_flatten_resource_arrays, OPT_INVALID, false);
 
   if (!handleVkShiftArgs(Args, OPT_fvk_b_shift, "b", &opts.SpirvOptions.bShift, errors) ||
       !handleVkShiftArgs(Args, OPT_fvk_t_shift, "t", &opts.SpirvOptions.tShift, errors) ||
@@ -791,6 +793,7 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
       Args.hasFlag(OPT_fvk_use_gl_layout, OPT_INVALID, false) ||
       Args.hasFlag(OPT_fvk_use_dx_layout, OPT_INVALID, false) ||
       Args.hasFlag(OPT_fvk_use_scalar_layout, OPT_INVALID, false) ||
+      Args.hasFlag(OPT_fspv_flatten_resource_arrays, OPT_INVALID, false) ||
       Args.hasFlag(OPT_fspv_reflect, OPT_INVALID, false) ||
       Args.hasFlag(OPT_Wno_vk_ignored_features, OPT_INVALID, false) ||
       Args.hasFlag(OPT_Wno_vk_emulated_features, OPT_INVALID, false) ||

+ 9 - 0
lib/HLSL/DxilValidation.cpp

@@ -39,6 +39,7 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/IR/Verifier.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -3870,6 +3871,12 @@ static void ValidateTypeAnnotation(ValidationContext &ValCtx) {
   }
 }
 
+static void ValidateBitcode(ValidationContext &ValCtx) {
+  if (llvm::verifyModule(ValCtx.M, &ValCtx.DiagStream())) {
+    ValCtx.EmitError(ValidationRule::BitcodeValid);
+  }
+}
+
 static void ValidateMetadata(ValidationContext &ValCtx) {
   Module *pModule = &ValCtx.M;
   const std::string &target = pModule->getTargetTriple();
@@ -5626,6 +5633,8 @@ ValidateDxilModule(llvm::Module *pModule, llvm::Module *pDebugModule) {
 
   ValidationContext ValCtx(*pModule, pDebugModule, *pDxilModule, DiagPrinter);
 
+  ValidateBitcode(ValCtx);
+
   ValidateMetadata(ValCtx);
 
   ValidateShaderState(ValCtx);

+ 1 - 1
lib/HLSL/HLOperationLower.cpp

@@ -1981,7 +1981,7 @@ Value *TranslateFrexp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
 
   // bool ne = val != 0;
   Value *notZero = Builder.CreateFCmpUNE(val, zeroVal);
-  notZero = Builder.CreateZExt(notZero, dstTy);
+  notZero = Builder.CreateSExt(notZero, dstTy);
 
   Value *intVal = Builder.CreateBitCast(val, dstTy);
   // temp = intVal & exponentMask;

+ 5 - 3
lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp

@@ -3246,11 +3246,14 @@ static void ReplaceConstantWithInst(Constant *C, Value *V, IRBuilder<> &Builder)
   C->removeDeadConstantUsers();
 }
 
-static void ReplaceUnboundedArrayUses(Value *V, Value *Src, IRBuilder<> &Builder) {
+static void ReplaceUnboundedArrayUses(Value *V, Value *Src) {
   for (auto it = V->user_begin(); it != V->user_end(); ) {
     User *U = *(it++);
     if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
       SmallVector<Value *, 4> idxList(GEP->idx_begin(), GEP->idx_end());
+      // Must set the insert point to the GEP itself (instead of the memcpy),
+      // because the indices might not dominate the memcpy.
+      IRBuilder<> Builder(GEP);
       Value *NewGEP = Builder.CreateGEP(Src, idxList);
       GEP->replaceAllUsesWith(NewGEP);
     } else if (BitCastInst *BC = dyn_cast<BitCastInst>(U)) {
@@ -3392,8 +3395,7 @@ static void ReplaceMemcpy(Value *V, Value *Src, MemCpyInst *MC,
       }
     } else {
       DXASSERT(IsUnboundedArrayMemcpy(TyV, TySrc), "otherwise mismatched types in memcpy are not unbounded array");
-      IRBuilder<> Builder(MC);
-      ReplaceUnboundedArrayUses(V, Src, Builder);
+      ReplaceUnboundedArrayUses(V, Src);
     }
   }
 

+ 97 - 32
tools/clang/lib/SPIRV/DeclResultIdMapper.cpp

@@ -716,7 +716,7 @@ SpirvVariable *DeclResultIdMapper::createExternVar(const VarDecl *var) {
   const auto *bindingAttr = var->getAttr<VKBindingAttr>();
   const auto *counterBindingAttr = var->getAttr<VKCounterBindingAttr>();
 
-  resourceVars.emplace_back(varInstr, loc, regAttr, bindingAttr,
+  resourceVars.emplace_back(varInstr, var, loc, regAttr, bindingAttr,
                             counterBindingAttr);
 
   if (const auto *inputAttachment = var->getAttr<VKInputAttachmentIndexAttr>())
@@ -846,7 +846,7 @@ SpirvVariable *DeclResultIdMapper::createCTBuffer(const HLSLBufferDecl *decl) {
     astDecls[varDecl] = DeclSpirvInfo(bufferVar, index++);
   }
   resourceVars.emplace_back(
-      bufferVar, decl->getLocation(), getResourceBinding(decl),
+      bufferVar, decl, decl->getLocation(), getResourceBinding(decl),
       decl->getAttr<VKBindingAttr>(), decl->getAttr<VKCounterBindingAttr>());
 
   return bufferVar;
@@ -890,7 +890,7 @@ SpirvVariable *DeclResultIdMapper::createCTBuffer(const VarDecl *decl) {
   // We register the VarDecl here.
   astDecls[decl] = DeclSpirvInfo(bufferVar);
   resourceVars.emplace_back(
-      bufferVar, decl->getLocation(), getResourceBinding(context),
+      bufferVar, decl, decl->getLocation(), getResourceBinding(context),
       decl->getAttr<VKBindingAttr>(), decl->getAttr<VKCounterBindingAttr>());
 
   return bufferVar;
@@ -970,8 +970,8 @@ void DeclResultIdMapper::createGlobalsCBuffer(const VarDecl *var) {
       context, /*arraySize*/ 0, ContextUsageKind::Globals, "type.$Globals",
       "$Globals");
 
-  resourceVars.emplace_back(globals, SourceLocation(), nullptr, nullptr,
-                            nullptr, /*isCounterVar*/ false,
+  resourceVars.emplace_back(globals, /*decl*/ nullptr, SourceLocation(),
+                            nullptr, nullptr, nullptr, /*isCounterVar*/ false,
                             /*isGlobalsCBuffer*/ true);
 
   uint32_t index = 0;
@@ -1089,7 +1089,7 @@ void DeclResultIdMapper::createCounterVar(
   if (!isAlias) {
     // Non-alias counter variables should be put in to resourceVars so that
     // descriptors can be allocated for them.
-    resourceVars.emplace_back(counterInstr, decl->getLocation(),
+    resourceVars.emplace_back(counterInstr, decl, decl->getLocation(),
                               getResourceBinding(decl),
                               decl->getAttr<VKBindingAttr>(),
                               decl->getAttr<VKCounterBindingAttr>(), true);
@@ -1213,26 +1213,63 @@ private:
 /// set and binding number.
 class BindingSet {
 public:
-  /// Uses the given set and binding number.
-  void useBinding(uint32_t binding, uint32_t set) {
-    usedBindings[set].insert(binding);
+  /// Uses the given set and binding number. Returns false if the binding number
+  /// was already occupied in the set, and returns true otherwise.
+  bool useBinding(uint32_t binding, uint32_t set) {
+    bool inserted = false;
+    std::tie(std::ignore, inserted) = usedBindings[set].insert(binding);
+    return inserted;
+  }
+
+  /// Uses the next avaiable binding number in |set|. If more than one binding
+  /// number is to be occupied, it finds the next available chunk that can fit
+  /// |numBindingsToUse| in the |set|.
+  uint32_t useNextBinding(uint32_t set, uint32_t numBindingsToUse = 1) {
+    uint32_t bindingNoStart = getNextBindingChunk(set, numBindingsToUse);
+    auto &binding = usedBindings[set];
+    for (uint32_t i = 0; i < numBindingsToUse; ++i)
+      binding.insert(bindingNoStart + i);
+    return bindingNoStart;
   }
 
-  /// Uses the next avaiable binding number in set 0.
-  uint32_t useNextBinding(uint32_t set) {
-    auto &binding = usedBindings[set];
-    auto &next = nextBindings[set];
-    while (binding.count(next))
-      ++next;
-    binding.insert(next);
-    return next++;
+  /// Returns the first available binding number in the |set| for which |n|
+  /// consecutive binding numbers are unused.
+  uint32_t getNextBindingChunk(uint32_t set, uint32_t n) {
+    auto &existingBindings = usedBindings[set];
+
+    // There were no bindings in this set. Can start at binding zero.
+    if (existingBindings.empty())
+      return 0;
+
+    // Check whether the chunk of |n| binding numbers can be fitted at the
+    // very beginning of the list (start at binding 0 in the current set).
+    uint32_t curBinding = *existingBindings.begin();
+    if (curBinding >= n)
+      return 0;
+
+    auto iter = std::next(existingBindings.begin());
+    while (iter != existingBindings.end()) {
+      // There exists a next binding number that is used. Check to see if the
+      // gap between current binding number and next binding number is large
+      // enough to accommodate |n|.
+      uint32_t nextBinding = *iter;
+      if (n <= nextBinding - curBinding - 1)
+        return curBinding + 1;
+
+      curBinding = nextBinding;
+
+      // Peek at the next binding that has already been used (if any).
+      ++iter;
+    }
+
+    // |curBinding| was the last binding that was used in this set. The next
+    // chunk of |n| bindings can start at |curBinding|+1.
+    return curBinding + 1;
   }
 
 private:
   ///< set number -> set of used binding number
-  llvm::DenseMap<uint32_t, llvm::DenseSet<uint32_t>> usedBindings;
-  ///< set number -> next available binding number
-  llvm::DenseMap<uint32_t, uint32_t> nextBindings;
+  llvm::DenseMap<uint32_t, std::set<uint32_t>> usedBindings;
 };
 } // namespace
 
@@ -1553,11 +1590,30 @@ bool DeclResultIdMapper::decorateResourceBindings() {
 
   // Decorates the given varId of the given category with set number
   // setNo, binding number bindingNo. Ignores overlaps.
-  const auto tryToDecorate = [this, &bindingSet](SpirvVariable *var,
+  const auto tryToDecorate = [this, &bindingSet](const ResourceVar &var,
                                                  const uint32_t setNo,
                                                  const uint32_t bindingNo) {
-    bindingSet.useBinding(bindingNo, setNo);
-    spvBuilder.decorateDSetBinding(var, setNo, bindingNo);
+    // By default we use one binding number per resource, and an array of
+    // resources also gets only one binding number. However, for array of
+    // resources (e.g. array of textures), DX uses one binding number per array
+    // element. We can match this behavior via a command line option.
+    uint32_t numBindingsToUse = 1;
+    if (spirvOptions.flattenResourceArrays)
+      numBindingsToUse = var.getArraySize();
+
+    for (uint32_t i = 0; i < numBindingsToUse; ++i) {
+      bool success = bindingSet.useBinding(bindingNo + i, setNo);
+      if (!success && spirvOptions.flattenResourceArrays) {
+        emitError("ran into binding number conflict when assigning binding "
+                  "number %0 in set %1",
+                  {})
+            << bindingNo << setNo;
+      }
+    }
+
+    // No need to decorate multiple binding numbers for arrays. It will be done
+    // by legalization/optimization.
+    spvBuilder.decorateDSetBinding(var.getSpirvInstr(), setNo, bindingNo);
   };
 
   for (const auto &var : resourceVars) {
@@ -1570,13 +1626,12 @@ bool DeclResultIdMapper::decorateResourceBindings() {
         else if (const auto *reg = var.getRegister())
           set = reg->RegisterSpace.getValueOr(defaultSpace);
 
-        tryToDecorate(var.getSpirvInstr(), set, vkCBinding->getBinding());
+        tryToDecorate(var, set, vkCBinding->getBinding());
       }
     } else {
       if (const auto *vkBinding = var.getBinding()) {
         // Process m1
-        tryToDecorate(var.getSpirvInstr(),
-                      getVkBindingAttrSet(vkBinding, defaultSpace),
+        tryToDecorate(var, getVkBindingAttrSet(vkBinding, defaultSpace),
                       vkBinding->getBinding());
       }
     }
@@ -1617,10 +1672,18 @@ bool DeclResultIdMapper::decorateResourceBindings() {
           llvm_unreachable("unknown register type found");
         }
 
-        tryToDecorate(var.getSpirvInstr(), set, binding);
+        tryToDecorate(var, set, binding);
       }
 
   for (const auto &var : resourceVars) {
+    // By default we use one binding number per resource, and an array of
+    // resources also gets only one binding number. However, for array of
+    // resources (e.g. array of textures), DX uses one binding number per array
+    // element. We can match this behavior via a command line option.
+    uint32_t numBindingsToUse = 1;
+    if (spirvOptions.flattenResourceArrays)
+      numBindingsToUse = var.getArraySize();
+
     if (var.isCounter()) {
       if (!var.getCounterBinding()) {
         // Process mX * c2
@@ -1630,15 +1693,17 @@ bool DeclResultIdMapper::decorateResourceBindings() {
         else if (const auto *reg = var.getRegister())
           set = reg->RegisterSpace.getValueOr(defaultSpace);
 
-        spvBuilder.decorateDSetBinding(var.getSpirvInstr(), set,
-                                       bindingSet.useNextBinding(set));
+        spvBuilder.decorateDSetBinding(
+            var.getSpirvInstr(), set,
+            bindingSet.useNextBinding(set, numBindingsToUse));
       }
     } else if (!var.getBinding()) {
       const auto *reg = var.getRegister();
       if (reg && reg->isSpaceOnly()) {
         const uint32_t set = reg->RegisterSpace.getValueOr(defaultSpace);
-        spvBuilder.decorateDSetBinding(var.getSpirvInstr(), set,
-                                       bindingSet.useNextBinding(set));
+        spvBuilder.decorateDSetBinding(
+            var.getSpirvInstr(), set,
+            bindingSet.useNextBinding(set, numBindingsToUse));
       } else if (!reg) {
         // Process m3 (no 'vk::binding' and no ':register' assignment)
 
@@ -1653,7 +1718,7 @@ bool DeclResultIdMapper::decorateResourceBindings() {
         else {
           spvBuilder.decorateDSetBinding(
               var.getSpirvInstr(), defaultSpace,
-              bindingSet.useNextBinding(defaultSpace));
+              bindingSet.useNextBinding(defaultSpace, numBindingsToUse));
         }
       }
     }

+ 21 - 2
tools/clang/lib/SPIRV/DeclResultIdMapper.h

@@ -111,12 +111,24 @@ private:
 
 class ResourceVar {
 public:
-  ResourceVar(SpirvVariable *var, SourceLocation loc,
+  ResourceVar(SpirvVariable *var, const Decl *decl, SourceLocation loc,
               const hlsl::RegisterAssignment *r, const VKBindingAttr *b,
               const VKCounterBindingAttr *cb, bool counter = false,
               bool globalsBuffer = false)
       : variable(var), srcLoc(loc), reg(r), binding(b), counterBinding(cb),
-        isCounterVar(counter), isGlobalsCBuffer(globalsBuffer) {}
+        isCounterVar(counter), isGlobalsCBuffer(globalsBuffer), arraySize(1) {
+    if (decl) {
+      if (const ValueDecl *valueDecl = dyn_cast<ValueDecl>(decl)) {
+        const QualType type = valueDecl->getType();
+        if (!type.isNull() && type->isConstantArrayType()) {
+          if (auto constArrayType = dyn_cast<ConstantArrayType>(type)) {
+            arraySize =
+                static_cast<uint32_t>(constArrayType->getSize().getZExtValue());
+          }
+        }
+      }
+    }
+  }
 
   SpirvVariable *getSpirvInstr() const { return variable; }
   SourceLocation getSourceLocation() const { return srcLoc; }
@@ -127,6 +139,7 @@ public:
   const VKCounterBindingAttr *getCounterBinding() const {
     return counterBinding;
   }
+  uint32_t getArraySize() const { return arraySize; }
 
 private:
   SpirvVariable *variable;                    ///< The variable
@@ -136,6 +149,7 @@ private:
   const VKCounterBindingAttr *counterBinding; ///< Vulkan counter binding
   bool isCounterVar;                          ///< Couter variable or not
   bool isGlobalsCBuffer;                      ///< $Globals cbuffer or not
+  uint32_t arraySize;                         ///< Size if resource is an array
 };
 
 /// A (instruction-pointer, is-alias-or-not) pair for counter variables
@@ -297,6 +311,11 @@ public:
   SpirvVariable *createRayTracingNVStageVar(spv::StorageClass sc,
                                             const VarDecl *decl);
 
+  /// \brief Creates the taskNV stage variables for payload struct variable
+  /// and returns true on success. SPIR-V instructions will also be generated
+  /// to load/store the contents from/to *value. payloadMemOffset is incremented
+  /// based on payload struct member size, alignment and offset, and SPIR-V
+  /// decorations PerTaskNV and Offset are assigned to each member.
   bool createPayloadStageVars(const hlsl::SigPoint *sigPoint,
                               spv::StorageClass sc, const NamedDecl *decl,
                               bool asInput, QualType type,

+ 3 - 3
tools/clang/lib/SPIRV/GlPerVertex.cpp

@@ -363,7 +363,7 @@ bool GlPerVertex::tryToAccess(hlsl::SigPoint::Kind sigPointKind,
                               SpirvInstruction *vecComponent,
                               SourceLocation loc) {
   assert(value);
-  // invocationId should only be used for HSPCOut.
+  // invocationId should only be used for HSPCOut or MSOut.
   assert(invocationId.hasValue()
              ? (sigPointKind == hlsl::SigPoint::Kind::HSCPOut ||
                 sigPointKind == hlsl::SigPoint::Kind::MSOut)
@@ -655,7 +655,7 @@ bool GlPerVertex::writeField(hlsl::Semantic::Kind semanticKind,
   // The interesting shader stage is HS. We need the InvocationID to write
   // out the value to the correct array element.
   SpirvInstruction *offset = nullptr;
-  QualType type;
+  QualType type = {};
   bool isClip = false;
   switch (semanticKind) {
   case hlsl::Semantic::Kind::ClipDistance: {
@@ -686,7 +686,7 @@ bool GlPerVertex::writeField(hlsl::Semantic::Kind semanticKind,
     return false;
   }
   if (vecComponent) {
-    QualType elemType;
+    QualType elemType = {};
     if (!isVectorType(type, &elemType)) {
       assert(false && "expected vector type");
     }

+ 6 - 5
tools/clang/lib/SPIRV/SpirvEmitter.cpp

@@ -164,7 +164,7 @@ bool spirvToolsLegalize(spv_target_env env, std::vector<uint32_t> *module,
 }
 
 bool spirvToolsOptimize(spv_target_env env, std::vector<uint32_t> *module,
-                        const llvm::SmallVector<llvm::StringRef, 4> &flags,
+                        clang::spirv::SpirvCodeGenOptions &spirvOptions,
                         std::string *messages) {
   spvtools::Optimizer optimizer(env);
 
@@ -176,14 +176,16 @@ bool spirvToolsOptimize(spv_target_env env, std::vector<uint32_t> *module,
   spvtools::OptimizerOptions options;
   options.set_run_validator(false);
 
-  if (flags.empty()) {
+  if (spirvOptions.optConfig.empty()) {
     optimizer.RegisterPerformancePasses();
+    if (spirvOptions.flattenResourceArrays)
+      optimizer.RegisterPass(spvtools::CreateDescriptorScalarReplacementPass());
     optimizer.RegisterPass(spvtools::CreateCompactIdsPass());
   } else {
     // Command line options use llvm::SmallVector and llvm::StringRef, whereas
     // SPIR-V optimizer uses std::vector and std::string.
     std::vector<std::string> stdFlags;
-    for (const auto &f : flags)
+    for (const auto &f : spirvOptions.optConfig)
       stdFlags.push_back(f.str());
     if (!optimizer.RegisterPassesFromFlags(stdFlags))
       return false;
@@ -662,8 +664,7 @@ void SpirvEmitter::HandleTranslationUnit(ASTContext &context) {
     // Run optimization passes
     if (theCompilerInstance.getCodeGenOpts().OptimizationLevel > 0) {
       std::string messages;
-      if (!spirvToolsOptimize(targetEnv, &m, spirvOptions.optConfig,
-                              &messages)) {
+      if (!spirvToolsOptimize(targetEnv, &m, spirvOptions, &messages)) {
         emitFatalError("failed to optimize SPIR-V: %0", {}) << messages;
         emitNote("please file a bug report on "
                  "https://github.com/Microsoft/DirectXShaderCompiler/issues "

+ 4 - 2
tools/clang/lib/SPIRV/SpirvEmitter.h

@@ -309,8 +309,10 @@ private:
                                     SpirvInstruction *initValue,
                                     SourceLocation loc);
 
-  /// Collects all indices from consecutive MemberExprs
-  /// TODO: Update method description here.
+  /// Collects all indices from consecutive MemberExprs, ArraySubscriptExprs and
+  /// CXXOperatorCallExprs. Also special handles all mesh shader out attributes
+  /// to return the entire expression in order for caller to extract the member
+  /// expression.
   const Expr *
   collectArrayStructIndices(const Expr *expr, bool rawIndex,
                             llvm::SmallVectorImpl<uint32_t> *rawIndices,

+ 18 - 0
tools/clang/test/CodeGenHLSL/batch/expressions/intrinsics/frexp.hlsl

@@ -0,0 +1,18 @@
+// RUN: %dxc -E main -T ps_6_2 %s | FileCheck %s
+
+// Make sure frexp generate code pattern.
+// CHECK:bitcast float {{.*}} to i32
+// CHECK:and i32 {{.*}}, 2139095040
+// CHECK:add {{.*}}, -1056964608
+// CHECK:ashr {{.*}}, 23
+// CHECK:sitofp
+// CHECK:and i32 {{.*}}, 8388607
+// CHECK:or i32 {{.*}}, 1056964608
+// CHECK:fadd
+
+float main(float a:A) : SV_Target {
+  float b;
+  float c = frexp ( a , b );
+
+  return b+c;
+}

+ 17 - 0
tools/clang/test/CodeGenHLSL/batch/passes/sroa_hlsl/memcpy_dom.hlsl

@@ -0,0 +1,17 @@
+// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
+
+// Regression test for a validation error, where parameter SROA
+// would generate GEPs before the indices it uses
+
+// CHECK: @main
+
+Texture2D tex0[10] : register(t0);
+
+float4 f(Texture2D textures[], unsigned int idx) {
+  return textures[idx].Load(0);
+}
+
+[ RootSignature("DescriptorTable(SRV(t0, numDescriptors=10))") ]
+float4 main() : SV_Target {
+  return f(tex0, 1);
+}

+ 202 - 0
tools/clang/test/CodeGenSPIRV/meshshading.nv.buffer.mesh.hlsl

@@ -0,0 +1,202 @@
+// Run: %dxc -T ms_6_5 -E main
+
+// CHECK:  OpCapability MeshShadingNV
+// CHECK:  OpExtension "SPV_NV_mesh_shader"
+// CHECK:  OpEntryPoint MeshNV %main "main"
+
+// CHECK:  OpName %UserVertex "UserVertex"
+struct UserVertex {
+// CHECK:  OpMemberName %UserVertex 0 "position"
+// CHECK:  OpMemberName %UserVertex 1 "texcoord"
+// CHECK:  OpMemberName %UserVertex 2 "color"
+    float3 position;
+    float2 texcoord;
+    float3 color;
+};
+
+// CHECK:  OpName %Mesh "Mesh"
+struct Mesh {
+// CHECK:  OpMemberName %Mesh 0 "firstSubmesh"
+// CHECK:  OpMemberName %Mesh 1 "submeshCount"
+// CHECK:  OpMemberName %Mesh 2 "dummy"
+    uint firstSubmesh;
+    uint submeshCount;
+    uint dummy[2];
+};
+
+// CHECK:  OpName %SubMesh "SubMesh"
+struct SubMesh {
+// CHECK:  OpMemberName %SubMesh 0 "vertexCount"
+// CHECK:  OpMemberName %SubMesh 1 "vertexOffset"
+// CHECK:  OpMemberName %SubMesh 2 "primitiveCount"
+// CHECK:  OpMemberName %SubMesh 3 "indexOffset"
+// CHECK:  OpMemberName %SubMesh 4 "boundingBox"
+    uint vertexCount;
+    uint vertexOffset;
+    uint primitiveCount;
+    uint indexOffset;
+    float4 boundingBox[8];
+};
+
+// CHECK:  OpDecorate %userVertices DescriptorSet 0
+// CHECK:  OpDecorate %userVertices Binding 0
+// CHECK:  OpDecorate %userIndices DescriptorSet 0
+// CHECK:  OpDecorate %userIndices Binding 1
+// CHECK:  OpDecorate %meshes DescriptorSet 0
+// CHECK:  OpDecorate %meshes Binding 2
+// CHECK:  OpDecorate %submeshes DescriptorSet 0
+// CHECK:  OpDecorate %submeshes Binding 3
+// CHECK:  OpDecorate %UBO DescriptorSet 0
+// CHECK:  OpDecorate %UBO Binding 4
+
+// CHECK:  OpMemberDecorate %UserVertex 0 Offset 0
+// CHECK:  OpMemberDecorate %UserVertex 1 Offset 16
+// CHECK:  OpMemberDecorate %UserVertex 2 Offset 32
+// CHECK:  OpDecorate %_runtimearr_UserVertex ArrayStride 48
+// CHECK:  OpMemberDecorate %type_RWStructuredBuffer_UserVertex 0 Offset 0
+// CHECK:  OpDecorate %type_RWStructuredBuffer_UserVertex BufferBlock
+
+// CHECK:  OpDecorate %_runtimearr_uint ArrayStride 4
+// CHECK:  OpMemberDecorate %type_RWStructuredBuffer_uint 0 Offset 0
+// CHECK:  OpDecorate %type_RWStructuredBuffer_uint BufferBlock
+
+// CHECK:  OpMemberDecorate %Mesh 0 Offset 0
+// CHECK:  OpMemberDecorate %Mesh 1 Offset 4
+// CHECK:  OpMemberDecorate %Mesh 2 Offset 8
+// CHECK:  OpDecorate %_runtimearr_Mesh ArrayStride 16
+// CHECK:  OpMemberDecorate %type_RWStructuredBuffer_Mesh 0 Offset 0
+// CHECK:  OpDecorate %type_RWStructuredBuffer_Mesh BufferBlock
+
+// CHECK:  OpMemberDecorate %SubMesh 0 Offset 0
+// CHECK:  OpMemberDecorate %SubMesh 1 Offset 4
+// CHECK:  OpMemberDecorate %SubMesh 2 Offset 8
+// CHECK:  OpMemberDecorate %SubMesh 3 Offset 12
+// CHECK:  OpMemberDecorate %SubMesh 4 Offset 16
+// CHECK:  OpDecorate %_runtimearr_SubMesh ArrayStride 144
+// CHECK:  OpMemberDecorate %type_RWStructuredBuffer_SubMesh 0 Offset 0
+// CHECK:  OpDecorate %type_RWStructuredBuffer_SubMesh BufferBlock
+
+// CHECK:  OpMemberDecorate %type_UBO 0 Offset 0
+// CHECK:  OpMemberDecorate %type_UBO 0 MatrixStride 16
+// CHECK:  OpMemberDecorate %type_UBO 0 ColMajor
+// CHECK:  OpDecorate %type_UBO Block
+
+// CHECK:  %UserVertex = OpTypeStruct %v3float %v2float %v3float
+// CHECK:  %_runtimearr_UserVertex = OpTypeRuntimeArray %UserVertex
+// CHECK:  %type_RWStructuredBuffer_UserVertex = OpTypeStruct %_runtimearr_UserVertex
+// CHECK:  %_ptr_Uniform_type_RWStructuredBuffer_UserVertex = OpTypePointer Uniform %type_RWStructuredBuffer_UserVertex
+[[vk::binding(0, 0)]]
+RWStructuredBuffer<UserVertex> userVertices;
+
+// CHECK:  %_runtimearr_uint = OpTypeRuntimeArray %uint
+// CHECK:  %type_RWStructuredBuffer_uint = OpTypeStruct %_runtimearr_uint
+// CHECK:  %_ptr_Uniform_type_RWStructuredBuffer_uint = OpTypePointer Uniform %type_RWStructuredBuffer_uint
+[[vk::binding(1, 0)]]
+RWStructuredBuffer<uint> userIndices;
+
+// CHECK:  %_arr_uint_uint_2 = OpTypeArray %uint %uint_2
+// CHECK:  %Mesh = OpTypeStruct %uint %uint %_arr_uint_uint_2
+// CHECK:  %_runtimearr_Mesh = OpTypeRuntimeArray %Mesh
+// CHECK:  %type_RWStructuredBuffer_Mesh = OpTypeStruct %_runtimearr_Mesh
+// CHECK:  %_ptr_Uniform_type_RWStructuredBuffer_Mesh = OpTypePointer Uniform %type_RWStructuredBuffer_Mesh
+[[vk::binding(2, 0)]]
+RWStructuredBuffer<Mesh> meshes;
+
+// CHECK:  %uint_8 = OpConstant %uint 8
+// CHECK:  %v4float = OpTypeVector %float 4
+// CHECK:  %_arr_v4float_uint_8 = OpTypeArray %v4float %uint_8
+// CHECK:  %SubMesh = OpTypeStruct %uint %uint %uint %uint %_arr_v4float_uint_8
+// CHECK:  %_runtimearr_SubMesh = OpTypeRuntimeArray %SubMesh
+// CHECK:  %type_RWStructuredBuffer_SubMesh = OpTypeStruct %_runtimearr_SubMesh
+// CHECK:  %_ptr_Uniform_type_RWStructuredBuffer_SubMesh = OpTypePointer Uniform %type_RWStructuredBuffer_SubMesh
+[[vk::binding(3, 0)]]
+RWStructuredBuffer<SubMesh> submeshes;
+
+// CHECK:  %mat4v4float = OpTypeMatrix %v4float 4
+// CHECK:  %type_UBO = OpTypeStruct %mat4v4float
+// CHECK:  %_ptr_Uniform_type_UBO = OpTypePointer Uniform %type_UBO
+[[vk::binding(4, 0)]]
+cbuffer UBO {
+    row_major float4x4 mvp;
+}
+
+struct PerVertex {
+    float4 position : SV_Position;
+    float2 texcoord : TEXCOORD;
+    float3 color : COLOR;
+};
+
+struct PerPrimitive {
+    float4 primcolor : PCOLOR;
+};
+
+struct SubMeshes {
+    uint submeshID[256] : SUBMESH;
+};
+
+static const uint vertsPerPrim = 3U;
+
+// CHECK:  %userVertices = OpVariable %_ptr_Uniform_type_RWStructuredBuffer_UserVertex Uniform
+// CHECK:  %userIndices = OpVariable %_ptr_Uniform_type_RWStructuredBuffer_uint Uniform
+// CHECK:  %meshes = OpVariable %_ptr_Uniform_type_RWStructuredBuffer_Mesh Uniform
+// CHECK:  %submeshes = OpVariable %_ptr_Uniform_type_RWStructuredBuffer_SubMesh Uniform
+// CHECK:  %UBO = OpVariable %_ptr_Uniform_type_UBO Uniform
+
+[outputtopology("triangle")]
+[numthreads(32, 1, 1)]
+void main(
+            out indices uint3 primIndices[128],
+            out vertices PerVertex verts[128],
+            out primitives PerPrimitive prims[128],
+            in payload SubMeshes taskmem,
+            in uint gid : SV_GroupID,
+            in uint tid : SV_GroupThreadID
+         )
+{
+    uint task = taskmem.submeshID[gid];
+// CHECK:  %submesh = OpVariable %_ptr_Function_SubMesh_0 Function
+// CHECK:  OpAccessChain %_ptr_Uniform_SubMesh %submeshes %int_0 [[task:%\d+]]
+// CHECK:  OpStore %submesh [[submeshVal:%\d+]]
+    SubMesh submesh = submeshes[task];
+// CHECK:  OpAccessChain %_ptr_Function_uint %submesh %int_0
+    uint numPackedVertices = submesh.vertexCount;
+// CHECK:  OpAccessChain %_ptr_Function_uint %submesh %int_2
+    uint numPackedPrimitives = submesh.primitiveCount;
+
+    SetMeshOutputCounts(numPackedVertices, numPackedPrimitives);
+
+    for (uint i = 0U; i < numPackedVertices; i += 32U) {
+        uint vid = i + tid;
+// CHECK:  OpAccessChain %_ptr_Function_uint %submesh %int_1
+        uint svid = vid + submesh.vertexOffset;
+        if (vid >= numPackedVertices) continue;
+// CHECK:  OpAccessChain %_ptr_Uniform_v2float %userVertices %int_0 [[svid_1:%\d+]] %int_1
+        verts[vid].texcoord = userVertices[svid].texcoord;
+// CHECK:  OpAccessChain %_ptr_Uniform_v3float %userVertices %int_0 [[svid_2:%\d+]] %int_2
+        verts[vid].color = userVertices[svid].color;
+// CHECK:  OpAccessChain %_ptr_Uniform_v3float %userVertices %int_0 [[svid_0:%\d+]] %int_0
+        float3 position = userVertices[svid].position;
+// CHECK:  OpAccessChain %_ptr_Uniform_mat4v4float %UBO %int_0 
+        verts[vid].position = mul(mvp, float4(position, 1.0));
+    }
+
+    GroupMemoryBarrier();
+
+    for (uint j = 0U; j < numPackedPrimitives; j += 32U) {
+        uint pid = j + tid;
+        uint didxoff = vertsPerPrim * pid;
+// CHECK:  OpAccessChain %_ptr_Function_uint %submesh %int_3
+        uint sidxoff = submesh.indexOffset + didxoff;
+        if (pid >= numPackedPrimitives) continue;
+// CHECK:  OpAccessChain %_ptr_Uniform_uint %userIndices %int_0 [[sidxoff_0:%\d+]]
+// CHECK:  OpAccessChain %_ptr_Uniform_uint %userIndices %int_0 [[sidxoff_1:%\d+]]
+// CHECK:  OpAccessChain %_ptr_Uniform_uint %userIndices %int_0 [[sidxoff_2:%\d+]]
+        primIndices[pid] = uint3(userIndices[sidxoff], userIndices[sidxoff+1], userIndices[sidxoff+2]);
+// CHECK:  OpAccessChain %_ptr_Function_uint %submesh %int_1
+// CHECK:  OpAccessChain %_ptr_Uniform_uint %userIndices %int_0 [[ind:%\d+]]
+        uint providx = submesh.vertexOffset + userIndices[sidxoff + vertsPerPrim - 1U];
+// CHECK:  OpAccessChain %_ptr_Uniform_v3float %userVertices %int_0 [[providx:%\d+]] %int_2
+        prims[pid].primcolor = float4(userVertices[providx].color, 1.0);
+    }
+}
+

+ 135 - 0
tools/clang/test/CodeGenSPIRV/meshshading.nv.fncall.amplification.hlsl

@@ -0,0 +1,135 @@
+// Run: %dxc -T as_6_5 -E main -fspv-target-env=vulkan1.1
+
+// CHECK:  OpCapability MeshShadingNV
+// CHECK:  OpExtension "SPV_NV_mesh_shader"
+// CHECK:  OpEntryPoint TaskNV %main "main"
+
+struct SubMesh {
+    uint vertexCount;
+    uint vertexOffset;
+    uint primitiveCount;
+    uint indexOffset;
+    float4 boundingBox[8];
+};
+
+struct Mesh {
+    uint firstSubmesh;
+    uint submeshCount;
+    uint dummy[2];
+};
+
+struct UserVertex {
+    float3 position;
+    float2 texcoord;
+    float3 color;
+};
+
+[[vk::binding(0, 0)]]
+RWStructuredBuffer<UserVertex> userVertices;
+
+[[vk::binding(1, 0)]]
+RWStructuredBuffer<uint> userIndices;
+
+[[vk::binding(2, 0)]]
+RWStructuredBuffer<Mesh> meshes;
+
+[[vk::binding(3, 0)]]
+RWStructuredBuffer<SubMesh> submeshes;
+
+[[vk::binding(4, 0)]]
+cbuffer UBO {
+    row_major float4x4 mvp;
+}
+
+groupshared uint passedSubmeshes;
+struct SubMeshes {
+    uint submeshID[256] : SUBMESH;
+};
+groupshared SubMeshes sharedSubMeshes;
+
+// CHECK:  %_arr_v4float_uint_8_0 = OpTypeArray %v4float %uint_8
+// CHECK:  %SubMesh_0 = OpTypeStruct %uint %uint %uint %uint %_arr_v4float_uint_8_0
+// CHECK:  %_ptr_Function_SubMesh_0 = OpTypePointer Function %SubMesh_0
+// CHECK:  [[funcType:%\d+]] = OpTypeFunction %bool %_ptr_Function_SubMesh_0
+
+bool TestSubmesh(SubMesh submesh) {
+    uint clip = 0x0U;
+
+    for (uint bbv = 0U ; bbv < 8U; bbv++) {
+        float4 pos= mul(mvp, submesh.boundingBox[bbv]);
+        if (pos.x <= pos.w) clip |= 0x1U;
+        if (pos.y <= 0.3333 * pos.w) clip |= 0x2U;
+        if (pos.z <= pos.w) clip |= 0x4U;
+        if (pos.x >= -pos.w) clip |= 0x8U;
+        if (pos.y >= -pos.w) clip |= 0x10U;
+        if (pos.z >= -pos.w) clip |= 0x20U;
+    }
+    return (clip == 0x3FU);
+}
+
+[numthreads(32, 1, 1)]
+void main(
+            in uint tid : SV_GroupThreadID,
+            in uint mid : SV_GroupID
+         )
+{
+    uint firstSubmesh = meshes[mid].firstSubmesh;
+    uint submeshCount = meshes[mid].submeshCount;
+    passedSubmeshes = 0U;
+    GroupMemoryBarrier();
+    for (uint i = 0U; i < submeshCount; i += 32U) {
+        uint smid = firstSubmesh + i + tid;
+        if (smid >= firstSubmesh + submeshCount) continue;
+
+// CHECK:  %submesh = OpVariable %_ptr_Function_SubMesh_0 Function
+// CHECK:  %passed = OpVariable %_ptr_Function_bool Function
+// CHECK:  %param_var_submesh = OpVariable %_ptr_Function_SubMesh_0 Function
+        SubMesh submesh = submeshes[smid];
+        bool passed = true;
+
+// CHECK:  [[submeshValue:%\d+]] = OpLoad %SubMesh_0 %submesh
+// CHECK:  OpStore %param_var_submesh [[submeshValue]]
+// CHECK:  [[rv:%\d+]] = OpFunctionCall %bool %TestSubmesh %param_var_submesh
+// CHECK:  [[cond:%\d+]] = OpLogicalNot %bool [[rv]]
+// CHECK:  OpSelectionMerge %if_merge_0 None
+// CHECK:  OpBranchConditional [[cond]] %if_true_0 %if_merge_0
+// CHECK:  %if_true_0 = OpLabel
+// CHECK:  OpStore %passed %false
+// CHECK:  OpBranch %if_merge_0
+// CHECK:  %if_merge_0 = OpLabel
+        if (!TestSubmesh(submesh)) passed = false;
+
+        if (passed) {
+            uint ballot = WaveActiveBallot(passed).x;
+            uint laneMaskLT = (1 << WaveGetLaneIndex()) - 1;
+            uint lowerThreads = ballot & laneMaskLT;
+            uint slot = passedSubmeshes + WavePrefixCountBits(passed);
+            sharedSubMeshes.submeshID[slot] = smid;
+            if (lowerThreads == 0U) {
+                passedSubmeshes += WaveActiveCountBits(passed);
+            }
+        }
+        GroupMemoryBarrier();
+    }
+    DispatchMesh(passedSubmeshes, 1, 1, sharedSubMeshes);
+}
+
+/* bool TestSubmesh(SubMesh submesh) { ... } */
+
+// CHECK:  %TestSubmesh = OpFunction %bool None [[funcType]]
+// CHECK:  %submesh_0 = OpFunctionParameter %_ptr_Function_SubMesh_0
+
+// CHECK:  %bb_entry_0 = OpLabel
+
+// CHECK:  %clip = OpVariable %_ptr_Function_uint Function
+// CHECK:  %bbv = OpVariable %_ptr_Function_uint Function
+// CHECK:  %pos = OpVariable %_ptr_Function_v4float Function
+
+// CHECK:  %for_check_0 = OpLabel
+// CHECK:  %for_body_0 = OpLabel
+// CHECK:  %for_merge_0 = OpLabel
+
+// CHECK:  [[clipValue:%\d+]] = OpLoad %uint %clip
+// CHECK:  [[retValue:%\d+]] = OpIEqual %bool [[clipValue]] %uint_63
+// CHECK:  OpReturnValue [[retValue]]
+// CHECK:  OpFunctionEnd

+ 19 - 0
tools/clang/test/CodeGenSPIRV/vk.binding.cl.flatten-arrays.error.hlsl

@@ -0,0 +1,19 @@
+// Run: %dxc -T ps_6_0 -E main -fspv-flatten-resource-arrays
+
+// CHECK: error: ran into binding number conflict when assigning binding number 3 in set 0
+
+Texture2D    MyTextures[5] : register(t0); // Forced use of binding numbers 0, 1, 2, 3, 4.
+Texture2D    AnotherTexture : register(t3); // Error: Forced use of binding number 3.
+SamplerState MySampler;
+
+float4 main(float2 TexCoord : TexCoord) : SV_Target0 {
+  float4 result =
+    MyTextures[0].Sample(MySampler, TexCoord) +
+    MyTextures[1].Sample(MySampler, TexCoord) +
+    MyTextures[2].Sample(MySampler, TexCoord) +
+    MyTextures[3].Sample(MySampler, TexCoord) +
+    MyTextures[4].Sample(MySampler, TexCoord) +
+    AnotherTexture.Sample(MySampler, TexCoord);
+  return result;
+}
+

+ 36 - 0
tools/clang/test/CodeGenSPIRV/vk.binding.cl.flatten-arrays.example1-optimized.hlsl

@@ -0,0 +1,36 @@
+// Run: %dxc -T ps_6_0 -E main -fspv-flatten-resource-arrays -O3
+
+// CHECK: OpDecorate %AnotherTexture Binding 5
+// CHECK: OpDecorate %NextTexture Binding 6
+// CHECK: OpDecorate [[MyTextures0:%\d+]] Binding 0
+// CHECK: OpDecorate [[MyTextures1:%\d+]] Binding 1
+// CHECK: OpDecorate [[MyTextures2:%\d+]] Binding 2
+// CHECK: OpDecorate [[MyTextures3:%\d+]] Binding 3
+// CHECK: OpDecorate [[MyTextures4:%\d+]] Binding 4
+// CHECK: OpDecorate [[MySamplers0:%\d+]] Binding 7
+// CHECK: OpDecorate [[MySamplers1:%\d+]] Binding 8
+
+// CHECK: [[MyTextures0]] = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+// CHECK: [[MyTextures1]] = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+// CHECK: [[MyTextures2]] = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+// CHECK: [[MyTextures3]] = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+// CHECK: [[MyTextures4]] = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+// CHECK: [[MySamplers0]] = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant
+// CHECK: [[MySamplers1]] = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant
+Texture2D    MyTextures[5] : register(t0);
+Texture2D    NextTexture;  // This is suppose to be t6.
+Texture2D    AnotherTexture : register(t5);
+SamplerState MySamplers[2];
+
+float4 main(float2 TexCoord : TexCoord) : SV_Target0
+{
+  float4 result =
+    MyTextures[0].Sample(MySamplers[0], TexCoord) +
+    MyTextures[1].Sample(MySamplers[0], TexCoord) +
+    MyTextures[2].Sample(MySamplers[0], TexCoord) +
+    MyTextures[3].Sample(MySamplers[1], TexCoord) +
+    MyTextures[4].Sample(MySamplers[1], TexCoord) +
+    AnotherTexture.Sample(MySamplers[1], TexCoord) +
+    NextTexture.Sample(MySamplers[1], TexCoord);
+  return result;
+}

+ 23 - 0
tools/clang/test/CodeGenSPIRV/vk.binding.cl.flatten-arrays.example1.hlsl

@@ -0,0 +1,23 @@
+// Run: %dxc -T ps_6_0 -E main -fspv-flatten-resource-arrays
+
+// CHECK: OpDecorate %MyTextures Binding 0
+// CHECK: OpDecorate %AnotherTexture Binding 5
+// CHECK: OpDecorate %NextTexture Binding 6
+// CHECK: OpDecorate %MySamplers Binding 7
+Texture2D    MyTextures[5] : register(t0);
+Texture2D    NextTexture;  // This is suppose to be t6.
+Texture2D    AnotherTexture : register(t5);
+SamplerState MySamplers[2];
+
+float4 main(float2 TexCoord : TexCoord) : SV_Target0
+{
+  float4 result =
+    MyTextures[0].Sample(MySamplers[0], TexCoord) +
+    MyTextures[1].Sample(MySamplers[0], TexCoord) +
+    MyTextures[2].Sample(MySamplers[0], TexCoord) +
+    MyTextures[3].Sample(MySamplers[1], TexCoord) +
+    MyTextures[4].Sample(MySamplers[1], TexCoord) +
+    AnotherTexture.Sample(MySamplers[1], TexCoord) +
+    NextTexture.Sample(MySamplers[1], TexCoord);
+  return result;
+}

+ 41 - 0
tools/clang/test/CodeGenSPIRV/vk.binding.cl.flatten-arrays.example2-optimized.hlsl

@@ -0,0 +1,41 @@
+// Run: %dxc -T ps_6_0 -E main -fspv-flatten-resource-arrays -O3
+
+// CHECK: OpDecorate %AnotherTexture Binding 3
+// CHECK: OpDecorate %MySampler Binding 2
+// CHECK: OpDecorate %MySampler2 Binding 9
+// CHECK: OpDecorate [[MyTextures0:%\d+]] Binding 4
+// CHECK: OpDecorate [[MyTextures1:%\d+]] Binding 5
+// CHECK: OpDecorate [[MyTextures2:%\d+]] Binding 6
+// CHECK: OpDecorate [[MyTextures3:%\d+]] Binding 7
+// CHECK: OpDecorate [[MyTextures4:%\d+]] Binding 8
+// CHECK: OpDecorate [[MyTextures20:%\d+]] Binding 0
+// CHECK: OpDecorate [[MyTextures21:%\d+]] Binding 1
+
+// CHECK:  [[MyTextures0:%\d+]] = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+// CHECK:  [[MyTextures1:%\d+]] = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+// CHECK:  [[MyTextures2:%\d+]] = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+// CHECK:  [[MyTextures3:%\d+]] = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+// CHECK:  [[MyTextures4:%\d+]] = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+// CHECK: [[MyTextures20:%\d+]] = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+// CHECK: [[MyTextures21:%\d+]] = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+
+Texture2D    MyTextures[5]; // five array elements cannot fit in [0-2] binding slots, so it should take slot [4-8].
+Texture2D    AnotherTexture : register(t3); // force binding number 3.
+Texture2D    MyTextures2[2]; // take binding slot 0 and 1.
+SamplerState MySampler; // take binding slot 2.
+SamplerState MySampler2; // binding 0 to 8 are taken. The next available binding is 9.
+
+float4 main(float2 TexCoord : TexCoord) : SV_Target0
+{
+  float4 result =
+    MyTextures[0].Sample(MySampler, TexCoord) +
+    MyTextures[1].Sample(MySampler, TexCoord) +
+    MyTextures[2].Sample(MySampler, TexCoord) +
+    MyTextures[3].Sample(MySampler, TexCoord) +
+    MyTextures[4].Sample(MySampler, TexCoord) +
+    MyTextures2[0].Sample(MySampler2, TexCoord) +
+    MyTextures2[1].Sample(MySampler2, TexCoord) +
+    AnotherTexture.Sample(MySampler, TexCoord);
+  return result;
+}
+

+ 29 - 0
tools/clang/test/CodeGenSPIRV/vk.binding.cl.flatten-arrays.example2.hlsl

@@ -0,0 +1,29 @@
+// Run: %dxc -T ps_6_0 -E main -fspv-flatten-resource-arrays
+
+
+// CHECK: OpDecorate %AnotherTexture Binding 3
+// CHECK: OpDecorate %MyTextures Binding 4
+// CHECK: OpDecorate %MyTextures2 Binding 0
+// CHECK: OpDecorate %MySampler Binding 2
+// CHECK: OpDecorate %MySampler2 Binding 9
+
+Texture2D    MyTextures[5]; // five array elements cannot fit in [0-2] binding slots, so it should take slot [4-8].
+Texture2D    AnotherTexture : register(t3); // force binding number 3.
+Texture2D    MyTextures2[2]; // take binding slot 0 and 1.
+SamplerState MySampler; // take binding slot 2.
+SamplerState MySampler2; // binding 0 to 8 are taken. The next available binding is 9.
+
+float4 main(float2 TexCoord : TexCoord) : SV_Target0
+{
+  float4 result =
+    MyTextures[0].Sample(MySampler, TexCoord) +
+    MyTextures[1].Sample(MySampler, TexCoord) +
+    MyTextures[2].Sample(MySampler, TexCoord) +
+    MyTextures[3].Sample(MySampler, TexCoord) +
+    MyTextures[4].Sample(MySampler, TexCoord) +
+    MyTextures2[0].Sample(MySampler2, TexCoord) +
+    MyTextures2[1].Sample(MySampler2, TexCoord) +
+    AnotherTexture.Sample(MySampler, TexCoord);
+  return result;
+}
+

+ 62 - 0
tools/clang/unittests/HLSL/DxilModuleTest.cpp

@@ -62,6 +62,10 @@ public:
   TEST_METHOD(Precise6)
   TEST_METHOD(Precise7)
 
+  TEST_METHOD(CSGetNumThreads)
+  TEST_METHOD(MSGetNumThreads)
+  TEST_METHOD(ASGetNumThreads)
+
   TEST_METHOD(SetValidatorVersion)
 
   void VerifyValidatorVersionFails(
@@ -435,6 +439,64 @@ TEST_F(DxilModuleTest, Precise7) {
   VERIFY_ARE_EQUAL(numChecks, 4);
 }
 
+TEST_F(DxilModuleTest, CSGetNumThreads) {
+  Compiler c(m_dllSupport);
+  c.Compile(
+    "[numthreads(8, 4, 2)]\n"
+    "void main() {\n"
+    "}\n"
+    ,
+    L"cs_6_0"
+  );
+
+  DxilModule &DM = c.GetDxilModule();
+  VERIFY_ARE_EQUAL(8, DM.GetNumThreads(0));
+  VERIFY_ARE_EQUAL(4, DM.GetNumThreads(1));
+  VERIFY_ARE_EQUAL(2, DM.GetNumThreads(2));
+}
+
+TEST_F(DxilModuleTest, MSGetNumThreads) {
+  Compiler c(m_dllSupport);
+  if (c.SkipDxil_Test(1,5)) return;
+  c.Compile(
+    "struct MeshPerVertex { float4 pos : SV_Position; };\n"
+    "[numthreads(8, 4, 2)]\n"
+    "[outputtopology(\"triangle\")]\n"
+    "void main(\n"
+    "          out indices uint3 primIndices[1]\n"
+    ") {\n"
+    "    SetMeshOutputCounts(0, 0);\n"
+    "}\n"
+    ,
+    L"ms_6_5"
+  );
+
+  DxilModule &DM = c.GetDxilModule();
+  VERIFY_ARE_EQUAL(8, DM.GetNumThreads(0));
+  VERIFY_ARE_EQUAL(4, DM.GetNumThreads(1));
+  VERIFY_ARE_EQUAL(2, DM.GetNumThreads(2));
+}
+
+TEST_F(DxilModuleTest, ASGetNumThreads) {
+  Compiler c(m_dllSupport);
+  if (c.SkipDxil_Test(1,5)) return;
+  c.Compile(
+    "struct Payload { uint i; };\n"
+    "[numthreads(8, 4, 2)]\n"
+    "void main() {\n"
+    "  Payload pld = {0};\n"
+    "    DispatchMesh(1, 1, 1, pld);\n"
+    "}\n"
+    ,
+    L"as_6_5"
+  );
+
+  DxilModule &DM = c.GetDxilModule();
+  VERIFY_ARE_EQUAL(8, DM.GetNumThreads(0));
+  VERIFY_ARE_EQUAL(4, DM.GetNumThreads(1));
+  VERIFY_ARE_EQUAL(2, DM.GetNumThreads(2));
+}
+
 void DxilModuleTest::VerifyValidatorVersionFails(
     LPCWSTR shaderModel, const std::vector<LPCWSTR> &arguments,
     const std::vector<LPCSTR> &expectedErrors) {

+ 24 - 0
tools/clang/unittests/SPIRV/CodeGenSpirvTest.cpp

@@ -1620,6 +1620,23 @@ TEST_F(FileTest, VulkanRegisterBinding1to1MappingAssociatedCounter) {
   runFileTest("vk.binding.cl.register.counter.hlsl", Expect::Failure);
 }
 
+// For flattening array of resources
+TEST_F(FileTest, FlattenResourceArrayBindings1) {
+  runFileTest("vk.binding.cl.flatten-arrays.example1.hlsl");
+}
+TEST_F(FileTest, FlattenResourceArrayBindings1Optimized) {
+  runFileTest("vk.binding.cl.flatten-arrays.example1-optimized.hlsl");
+}
+TEST_F(FileTest, FlattenResourceArrayBindings2) {
+  runFileTest("vk.binding.cl.flatten-arrays.example2.hlsl");
+}
+TEST_F(FileTest, FlattenResourceArrayBindings2Optimized) {
+  runFileTest("vk.binding.cl.flatten-arrays.example2-optimized.hlsl");
+}
+TEST_F(FileTest, FlattenResourceArrayBindingsOverlapError) {
+  runFileTest("vk.binding.cl.flatten-arrays.error.hlsl", Expect::Failure);
+}
+
 // For testing the "-auto-binding-space" command line option which specifies the
 // "default space" for resources.
 TEST_F(FileTest, VulkanRegisterBindingDefaultSpaceImplicit) {
@@ -2044,6 +2061,9 @@ TEST_F(FileTest, MeshShadingNVMeshLine) {
 TEST_F(FileTest, MeshShadingNVMeshPoint) {
   runFileTest("meshshading.nv.point.mesh.hlsl");
 }
+TEST_F(FileTest, MeshShadingNVMeshBuffer) {
+  runFileTest("meshshading.nv.buffer.mesh.hlsl");
+}
 TEST_F(FileTest, MeshShadingNVMeshError1) {
   runFileTest("meshshading.nv.error1.mesh.hlsl", Expect::Failure);
 }
@@ -2089,6 +2109,10 @@ TEST_F(FileTest, MeshShadingNVMeshError14) {
 TEST_F(FileTest, MeshShadingNVAmplification) {
   runFileTest("meshshading.nv.amplification.hlsl");
 }
+TEST_F(FileTest, MeshShadingNVAmplificationFunCall) {
+  useVulkan1p1();
+  runFileTest("meshshading.nv.fncall.amplification.hlsl");
+}
 TEST_F(FileTest, MeshShadingNVAmplificationError1) {
   runFileTest("meshshading.nv.error1.amplification.hlsl", Expect::Failure);
 }