Prechádzať zdrojové kódy

Merge of branch 'master' of https://github.com/Microsoft/DirectXShaderCompiler into merge-master-hlsl-2021

Helena Kotas 4 rokov pred
rodič
commit
af088513ab
100 zmenil súbory, kde vykonal 4921 pridanie a 688 odobranie
  1. 4 5
      .travis.yml
  2. 5 1
      CMakeLists.txt
  3. 3 3
      README.md
  4. 2 2
      appveyor.yml
  5. 3 4
      cmake/modules/FindD3D12.cmake
  6. 4 3
      docs/DXIL.rst
  7. 8 2
      docs/SPIR-V.rst
  8. 1 1
      external/SPIRV-Headers
  9. 1 1
      external/SPIRV-Tools
  10. 23 1
      include/dxc/DXIL/DxilConstants.h
  11. 9 2
      include/dxc/DXIL/DxilFunctionProps.h
  12. 21 0
      include/dxc/DXIL/DxilMetadataHelper.h
  13. 4 0
      include/dxc/DXIL/DxilModule.h
  14. 4 0
      include/dxc/DXIL/DxilResource.h
  15. 3 2
      include/dxc/DXIL/DxilShaderModel.h
  16. 52 0
      include/dxc/DXIL/DxilTypeSystem.h
  17. 3 0
      include/dxc/DXIL/DxilUtil.h
  18. 0 19
      include/dxc/HLSL/DxilConvergent.h
  19. 5 1
      include/dxc/HLSL/DxilExportMap.h
  20. 11 3
      include/dxc/HLSL/DxilValidation.h
  21. 9 0
      include/dxc/Support/ErrorCodes.h
  22. 4 0
      include/dxc/Support/FileIOHelper.h
  23. 7 3
      include/dxc/Support/HLSLOptions.h
  24. 12 4
      include/dxc/Support/HLSLOptions.td
  25. 2634 0
      include/dxc/Support/d3d12TokenizedProgramFormat.hpp
  26. 5 0
      include/dxc/Test/DxcTestUtils.h
  27. 19 1
      include/dxc/dxcapi.h
  28. 29 0
      include/dxc/dxcerrors.h
  29. 1 0
      include/llvm/ADT/StringRef.h
  30. 2 0
      include/llvm/IR/BasicBlock.h
  31. 1 0
      include/llvm/IR/DebugInfo.h
  32. 2 0
      include/llvm/Option/OptTable.h
  33. 7 6
      include/llvm/Support/Casting.h
  34. 7 1
      include/llvm/Support/ErrorHandling.h
  35. 184 21
      lib/DXIL/DxilMetadataHelper.cpp
  36. 45 13
      lib/DXIL/DxilModule.cpp
  37. 8 0
      lib/DXIL/DxilResource.cpp
  38. 3 1
      lib/DXIL/DxilResourceProperties.cpp
  39. 2 1
      lib/DXIL/DxilShaderFlags.cpp
  40. 94 63
      lib/DXIL/DxilShaderModel.cpp
  41. 133 5
      lib/DXIL/DxilTypeSystem.cpp
  42. 22 1
      lib/DXIL/DxilUtil.cpp
  43. 8 0
      lib/DxcSupport/FileIOHelper.cpp
  44. 26 10
      lib/DxcSupport/HLSLOptions.cpp
  45. 5 11
      lib/DxilContainer/DxilContainerAssembler.cpp
  46. 3 53
      lib/DxilPIXPasses/DxilAddPixelHitInstrumentation.cpp
  47. 2 48
      lib/DxilPIXPasses/DxilDebugInstrumentation.cpp
  48. 6 19
      lib/DxilPIXPasses/DxilOutputColorBecomesConstant.cpp
  49. 3 49
      lib/DxilPIXPasses/DxilPIXMeshShaderOutputInstrumentation.cpp
  50. 333 138
      lib/DxilPIXPasses/DxilShaderAccessTracking.cpp
  51. 146 8
      lib/DxilPIXPasses/PixPassHelpers.cpp
  52. 5 0
      lib/DxilPIXPasses/PixPassHelpers.h
  53. 1 1
      lib/HLSL/DxilCondenseResources.cpp
  54. 3 1
      lib/HLSL/DxilContainerReflection.cpp
  55. 2 12
      lib/HLSL/DxilConvergent.cpp
  56. 1 1
      lib/HLSL/DxilGenerationPass.cpp
  57. 35 1
      lib/HLSL/DxilLinker.cpp
  58. 10 4
      lib/HLSL/DxilPreparePasses.cpp
  59. 92 12
      lib/HLSL/DxilValidation.cpp
  60. 3 3
      lib/HLSL/HLMatrixLowerPass.cpp
  61. 1 3
      lib/HLSL/HLModule.cpp
  62. 3 4
      lib/HLSL/HLOperationLower.cpp
  63. 60 6
      lib/HLSL/WaveSensitivityAnalysis.cpp
  64. 12 0
      lib/IR/BasicBlock.cpp
  65. 15 0
      lib/IR/DebugInfo.cpp
  66. 2 5
      lib/IR/DiagnosticInfo.cpp
  67. 0 2
      lib/MC/MCObjectStreamer.cpp
  68. 29 0
      lib/Option/OptTable.cpp
  69. 16 6
      lib/Support/ErrorHandling.cpp
  70. 1 1
      lib/Transforms/Scalar/LowerTypePasses.cpp
  71. 4 2
      lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
  72. 12 7
      lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
  73. 2 2
      lib/Transforms/Scalar/Scalarizer.cpp
  74. 5 0
      lib/Transforms/Utils/Local.cpp
  75. 2 2
      projects/dxilconv/include/Support/DXIncludes.h
  76. 1 1
      projects/dxilconv/lib/ShaderBinary/ShaderBinaryIncludes.h
  77. 17 1
      tools/clang/include/clang/AST/HlslTypes.h
  78. 5 0
      tools/clang/include/clang/Basic/Attr.td
  79. 33 0
      tools/clang/include/clang/Basic/Diagnostic.h
  80. 10 0
      tools/clang/include/clang/Basic/DiagnosticGroups.td
  81. 36 0
      tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
  82. 1 0
      tools/clang/include/clang/Basic/LangOptions.h
  83. 2 0
      tools/clang/include/clang/Frontend/CodeGenOptions.h
  84. 7 0
      tools/clang/include/clang/SPIRV/AstTypeProbe.h
  85. 1 0
      tools/clang/include/clang/SPIRV/FeatureManager.h
  86. 10 0
      tools/clang/include/clang/Sema/SemaHLSL.h
  87. 3 0
      tools/clang/lib/AST/ASTContextHLSL.cpp
  88. 20 0
      tools/clang/lib/AST/ASTDumper.cpp
  89. 17 0
      tools/clang/lib/AST/DeclPrinter.cpp
  90. 155 34
      tools/clang/lib/CodeGen/CGExprConstant.cpp
  91. 151 21
      tools/clang/lib/CodeGen/CGHLSLMS.cpp
  92. 92 0
      tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp
  93. 0 3
      tools/clang/lib/CodeGen/CGHLSLRuntime.h
  94. 1 3
      tools/clang/lib/CodeGen/CodeGenAction.cpp
  95. 0 6
      tools/clang/lib/CodeGen/CodeGenFunction.cpp
  96. 73 3
      tools/clang/lib/Parse/ParseDecl.cpp
  97. 16 3
      tools/clang/lib/SPIRV/AstTypeProbe.cpp
  98. 4 4
      tools/clang/lib/SPIRV/CapabilityVisitor.cpp
  99. 18 28
      tools/clang/lib/SPIRV/DeclResultIdMapper.cpp
  100. 4 0
      tools/clang/lib/SPIRV/FeatureManager.cpp

+ 4 - 5
.travis.yml

@@ -41,11 +41,10 @@ matrix:
   # Allow address sanitizer bot to fail for now. TODO(Remove this).
   allow_failures:
     - os: linux
-      # All linux timing out with empty log currently.
-      # compiler: clang
-      # env:
-      #   - DXC_BUILD_TYPE=Debug
-      #   - BUILD_CONFIG=ASAN
+      compiler: clang
+      env:
+        - DXC_BUILD_TYPE=Debug
+        - BUILD_CONFIG=ASAN
 
 cache:
   apt: true

+ 5 - 1
CMakeLists.txt

@@ -100,7 +100,11 @@ option(HLSL_SUPPORT_QUERY_GIT_COMMIT_INFO "Supports querying Git commit info." O
 if ( HLSL_SUPPORT_QUERY_GIT_COMMIT_INFO )
   add_definitions(-DSUPPORT_QUERY_GIT_COMMIT_INFO)
 endif()
-# HLSL Chnage Ends
+# adjust link option to enable debugging from kernel mode; not compatible with incremental linking
+if(NOT CMAKE_VERSION VERSION_LESS "3.13" AND WIN32)
+  add_link_options(/DEBUGTYPE:CV,FIXUP,PDATA /INCREMENTAL:NO)
+endif()
+# HLSL Change Ends
 
 # HLSL Change Starts - set flag for Appveyor CI
 if ( "$ENV{CI}" AND "$ENV{APPVEYOR}" )

+ 3 - 3
README.md

@@ -1,6 +1,6 @@
 # DirectX Shader Compiler
 
-[![Build status](https://ci.appveyor.com/api/projects/status/oaf66n7w30xbrg38/branch/master?svg=true)](https://ci.appveyor.com/project/antiagainst/directxshadercompiler/branch/master)
+[![Build status](https://ci.appveyor.com/api/projects/status/6sx47j66g4dbyem9/branch/master?svg=true)](https://ci.appveyor.com/project/dnovillo/directxshadercompiler/branch/master)
 
 The DirectX Shader Compiler project includes a compiler and related tools used to compile High-Level Shader Language (HLSL) programs into DirectX Intermediate Language (DXIL) representation. Applications that make use of DirectX for graphics, games, and computation can use it to generate shader programs.
 
@@ -10,8 +10,8 @@ For more information, see the [Wiki](https://github.com/microsoft/DirectXShaderC
 You can download the latest successful build's artifacts (built by Appveyor) for the master branch:
 | Downloads |        |
 |-----------|--------|
-| Windows   | [⬇](https://ci.appveyor.com/api/projects/antiagainst/directxshadercompiler/artifacts/build%2FRelease%2Fdxc-artifacts.zip?branch=master&pr=false&job=image%3A%20Visual%20Studio%202017) |
-| Ubuntu    | [⬇](https://ci.appveyor.com/api/projects/antiagainst/directxshadercompiler/artifacts/build%2Fdxc-artifacts.tar.gz?branch=master&pr=false&job=image%3A%20Ubuntu) |
+| Windows   | [⬇](https://ci.appveyor.com/api/projects/dnovillo/directxshadercompiler/artifacts/build%2FRelease%2Fdxc-artifacts.zip?branch=master&pr=false&job=image%3A%20Visual%20Studio%202019) |
+| Ubuntu    | [⬇](https://ci.appveyor.com/api/projects/dnovillo/directxshadercompiler/artifacts/build%2Fdxc-artifacts.tar.gz?branch=master&pr=false&job=image%3A%20Ubuntu) |
 
 ## Features and Goals
 

+ 2 - 2
appveyor.yml

@@ -1,7 +1,7 @@
 version: 1.0.{build}
 
 image:
-  - Visual Studio 2017
+  - Visual Studio 2019
   - Ubuntu
 
 platform: x64
@@ -38,7 +38,7 @@ before_build:
 - cmd: call utils\hct\hctstart %HLSL_SRC_DIR% %HLSL_BLD_DIR%
 
 build_script:
-- cmd: call utils\hct\hctbuild -%PLATFORM% -%CONFIGURATION% -vs2017 -spirvtest
+- cmd: call utils\hct\hctbuild -%PLATFORM% -%CONFIGURATION% -show-cmake-log -spirvtest
 - sh: mkdir build && cd build
 - sh: cmake .. -GNinja $(cat ../utils/cmake-predefined-config-params) -DSPIRV_BUILD_TESTS=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_CXX_FLAGS=-Werror
 - sh: ninja

+ 3 - 4
cmake/modules/FindD3D12.cmake

@@ -1,8 +1,7 @@
-# Find the win10 SDK path.
+# Find the Win10 SDK path.
 if ("$ENV{WIN10_SDK_PATH}$ENV{WIN10_SDK_VERSION}" STREQUAL "" )
-  get_filename_component(WIN10_SDK_PATH "[HKEY_LOCAL_MACHINE\\SOFTWARE\\WOW6432Node\\Microsoft\\Microsoft SDKs\\Windows\\v10.0;InstallationFolder]" ABSOLUTE CACHE)
-  get_filename_component(TEMP_WIN10_SDK_VERSION "[HKEY_LOCAL_MACHINE\\SOFTWARE\\WOW6432Node\\Microsoft\\Microsoft SDKs\\Windows\\v10.0;ProductVersion]" ABSOLUTE CACHE)
-  get_filename_component(WIN10_SDK_VERSION ${TEMP_WIN10_SDK_VERSION} NAME)
+  get_filename_component(WIN10_SDK_PATH "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Windows Kits\\Installed Roots;KitsRoot10]" ABSOLUTE CACHE)
+  set (WIN10_SDK_VERSION ${CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION})
 elseif(TRUE)
   set (WIN10_SDK_PATH $ENV{WIN10_SDK_PATH})
   set (WIN10_SDK_VERSION $ENV{WIN10_SDK_VERSION})

+ 4 - 3
docs/DXIL.rst

@@ -2969,7 +2969,7 @@ The set of validation rules that are known to hold for a DXIL program is identif
 ========================================= ========================================================================================================================================================================================================================================================================================================
 Rule Code                                 Description
 ========================================= ========================================================================================================================================================================================================================================================================================================
-BITCODE.VALID                             TODO - Module must be bitcode-valid
+BITCODE.VALID                             Module must be bitcode-valid
 CONTAINER.PARTINVALID                     DXIL Container must not contain unknown parts
 CONTAINER.PARTMATCHES                     DXIL Container Parts must match Module
 CONTAINER.PARTMISSING                     DXIL Container requires certain parts, corresponding to module
@@ -3096,7 +3096,7 @@ META.KNOWN                                Named metadata should be known
 META.MAXTESSFACTOR                        Hull Shader MaxTessFactor must be [%0..%1].  %2 specified.
 META.NOENTRYPROPSFORENTRY                 Entry point %0 must have entry properties.
 META.NOSEMANTICOVERLAP                    Semantics must not overlap
-META.REQUIRED                             TODO - Required metadata missing.
+META.REQUIRED                             Required metadata missing.
 META.SEMAKINDMATCHESNAME                  Semantic name must match system value, when defined.
 META.SEMAKINDVALID                        Semantic kind must be valid
 META.SEMANTICCOMPTYPE                     %0 must be %1.
@@ -3120,7 +3120,8 @@ META.TEXTURETYPE                          elements of typed buffers and textures
 META.USED                                 All metadata must be used by dxil.
 META.VALIDSAMPLERMODE                     Invalid sampler mode on sampler .
 META.VALUERANGE                           Metadata value must be within range.
-META.WELLFORMED                           TODO - Metadata must be well-formed in operand count and types.
+META.VERSIONSUPPORTED                     Version in metadata must be supported.
+META.WELLFORMED                           Metadata must be well-formed in operand count and types.
 SM.64BITRAWBUFFERLOADSTORE                i64/f64 rawBufferLoad/Store overloads are allowed after SM 6.3.
 SM.AMPLIFICATIONSHADERPAYLOADSIZE         For amplification shader with entry '%0', payload size %1 is greater than maximum size of %2 bytes.
 SM.AMPLIFICATIONSHADERPAYLOADSIZEDECLARED For amplification shader with entry '%0', payload size %1 is greater than declared size of %2 bytes.

+ 8 - 2
docs/SPIR-V.rst

@@ -283,7 +283,7 @@ Supported extensions
 * SPV_KHR_shader_draw_parameters
 * SPV_EXT_descriptor_indexing
 * SPV_EXT_fragment_fully_covered
-* SPV_EXT_fragment_invocation_density
+* SPV_KHR_fragment_shading_rate
 * SPV_EXT_shader_stencil_support
 * SPV_AMD_shader_explicit_vertex_parameter
 * SPV_GOOGLE_hlsl_functionality1
@@ -1495,7 +1495,13 @@ some system-value (SV) semantic strings will be translated into SPIR-V
 |                           +-------------+----------------------------------------+-----------------------+-----------------------------+
 |                           | MSIn        | ``ViewIndex``                          | N/A                   | ``MultiView``               |
 +---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
-| SV_ShadingRate            | PSIn        | ``FragSizeEXT``                        | N/A                   | ``FragmentDensityEXT``      |
+|                           | VSOut       | ``PrimitiveShadingRateKHR``            | N/A                   | ``FragmentShadingRate``     |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | GSOut       | ``PrimitiveShadingRateKHR``            | N/A                   | ``FragmentShadingRate``     |
+| SV_ShadingRate            +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | PSIn        | ``ShadingRateKHR``                     | N/A                   | ``FragmentShadingRate``     |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | MSOut       | ``PrimitiveShadingRateKHR``            | N/A                   | ``FragmentShadingRate``     |
 +---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
 
 For entities (function parameters, function return values, struct fields) with

+ 1 - 1
external/SPIRV-Headers

@@ -1 +1 @@
-Subproject commit a3fdfe81465d57efc97cfd28ac6c8190fb31a6c8
+Subproject commit dafead1765f6c1a5f9f8a76387dcb2abe4e54acd

+ 1 - 1
external/SPIRV-Tools

@@ -1 +1 @@
-Subproject commit ef3290bbea35935ba8fd623970511ed9f045bbd7
+Subproject commit dc72924cb31cd9f3dbc3eb47e9d926cf641e3a07

+ 23 - 1
include/dxc/DXIL/DxilConstants.h

@@ -29,7 +29,7 @@ namespace DXIL {
   const unsigned kDxilMajor = 1;
   /* <py::lines('VALRULE-TEXT')>hctdb_instrhelp.get_dxil_version_minor()</py>*/
   // VALRULE-TEXT:BEGIN
-  const unsigned kDxilMinor = 6;
+  const unsigned kDxilMinor = 7;
   // VALRULE-TEXT:END
 
   inline unsigned MakeDxilVersion(unsigned DxilMajor, unsigned DxilMinor) {
@@ -1489,6 +1489,28 @@ namespace DXIL {
     CandidateProceduralPrimitive = 1,
   };
 
+  enum class PayloadAccessQualifier : uint32_t {
+    NoAccess = 0,
+    Read = 1,
+    Write = 2,
+    ReadWrite = 3
+  };
+
+  enum class PayloadAccessShaderStage : uint32_t {
+    Caller = 0,
+    Closesthit = 1,
+    Miss = 2,
+    Anyhit = 3, 
+    Invalid = 0xffffffffu
+  }; 
+
+  // Allocate 4 bits per shader stage:
+  //     bits 0-1 for payload access qualifiers
+  //     bits 2-3 reserved for future use
+  const uint32_t PayloadAccessQualifierBitsPerStage = 4;
+  const uint32_t PayloadAccessQualifierValidMaskPerStage = 3;
+  const uint32_t PayloadAccessQualifierValidMask = 0x00003333;
+
   inline bool IsValidHitGroupType(HitGroupType type) {
     return (type >= HitGroupType::Triangle && type < HitGroupType::LastEntry);
   }

+ 9 - 2
include/dxc/DXIL/DxilFunctionProps.h

@@ -21,7 +21,9 @@ class Constant;
 namespace hlsl {
 struct DxilFunctionProps {
   DxilFunctionProps() {
-    memset(this, 0, sizeof(DxilFunctionProps));
+    memset(&ShaderProps, 0, sizeof(ShaderProps));
+    shaderKind = DXIL::ShaderKind::Invalid;
+    waveSize = 0;
   }
   union {
     // Compute shader.
@@ -83,7 +85,12 @@ struct DxilFunctionProps {
   } ShaderProps;
   DXIL::ShaderKind shaderKind;
   // WaveSize is currently allowed only on compute shaders, but could be supported on other shader types in the future
-  unsigned waveSize; 
+  unsigned waveSize;
+  // Save root signature for lib profile entry.
+  std::vector<uint8_t> serializedRootSignature;
+  void SetSerializedRootSignature(const uint8_t *pData, unsigned size) {
+    serializedRootSignature.assign(pData, pData+size);
+  }
 
   // TODO: Should we have an unmangled name here for ray tracing shaders?
   bool IsPS() const     { return shaderKind == DXIL::ShaderKind::Pixel; }

+ 21 - 0
include/dxc/DXIL/DxilMetadataHelper.h

@@ -32,6 +32,7 @@ class MDNode;
 class NamedMDNode;
 class GlobalVariable;
 class StringRef;
+class Type;
 }
 
 namespace hlsl {
@@ -48,6 +49,8 @@ class DxilSampler;
 class DxilTypeSystem;
 class DxilStructAnnotation;
 class DxilFieldAnnotation;
+class DxilPayloadAnnotation;
+class DxilPayloadFieldAnnotation;
 class DxilTemplateArgAnnotation;
 class DxilFunctionAnnotation;
 class DxilParameterAnnotation;
@@ -217,6 +220,10 @@ public:
   static const unsigned kDxilFieldAnnotationPreciseTag            = 8;
   static const unsigned kDxilFieldAnnotationCBUsedTag             = 9;
 
+  // DXR Payload Annotations
+  static const unsigned kDxilPayloadAnnotationStructTag           = 0;
+  static const unsigned kDxilPayloadFieldAnnotationAccessTag      = 0;
+
   // StructAnnotation extended property tags (DXIL 1.5+ only, appended)
   static const unsigned kDxilTemplateArgumentsTag                 = 0;  // Name for name-value list of extended struct properties
   // TemplateArgument tags
@@ -249,6 +256,9 @@ public:
   static const char kDxilValidatorVersionMDName[];
   // Validator version uses the same constants for fields as kDxilVersion*
 
+  // DXR Payload Annotations metadata.
+  static const char kDxilDxrPayloadAnnotationsMDName[];
+
   // Extended shader property tags.
   static const unsigned kDxilShaderFlagsTag     = 0;
   static const unsigned kDxilGSStateTag         = 1;
@@ -262,6 +272,7 @@ public:
   static const unsigned kDxilMSStateTag         = 9;
   static const unsigned kDxilASStateTag         = 10;
   static const unsigned kDxilWaveSizeTag        = 11;
+  static const unsigned kDxilEntryRootSigTag    = 12;
 
   // GSState.
   static const unsigned kDxilGSStateNumFields               = 5;
@@ -414,6 +425,16 @@ public:
   llvm::Metadata *EmitDxilTemplateArgAnnotation(const DxilTemplateArgAnnotation &annotation);
   void LoadDxilTemplateArgAnnotation(const llvm::MDOperand &MDO, DxilTemplateArgAnnotation &annotation);
 
+  // DXR Payload Annotations 
+  void EmitDxrPayloadAnnotations(DxilTypeSystem &TypeSystem);
+  llvm::Metadata *EmitDxrPayloadStructAnnotation(const DxilPayloadAnnotation& SA);
+  llvm::Metadata *EmitDxrPayloadFieldAnnotation(const DxilPayloadFieldAnnotation &FA, llvm::Type* fieldType);
+  void LoadDxrPayloadAnnotationNode(const llvm::MDTuple &MDT, DxilTypeSystem &TypeSystem);
+  void LoadDxrPayloadAnnotations(DxilTypeSystem &TypeSystem);
+  void LoadDxrPayloadFieldAnnoations(const llvm::MDOperand& MDO, DxilPayloadAnnotation& SA);
+  void LoadDxrPayloadFieldAnnoation(const llvm::MDOperand &MDO, DxilPayloadFieldAnnotation &FA);
+  void LoadDxrPayloadAccessQualifiers(const llvm::MDOperand &MDO, DxilPayloadFieldAnnotation &FA);
+
   // Function props.
   llvm::MDTuple *EmitDxilFunctionProps(const hlsl::DxilFunctionProps *props,
                                        const llvm::Function *F);

+ 4 - 0
include/dxc/DXIL/DxilModule.h

@@ -166,6 +166,7 @@ public:
 
   // DXIL type system.
   DxilTypeSystem &GetTypeSystem();
+  const DxilTypeSystem &GetTypeSystem() const;
 
   /// Emit llvm.used array to make sure that optimizations do not remove unreferenced globals.
   void EmitLLVMUsed();
@@ -386,6 +387,9 @@ private:
   uint32_t m_IntermediateFlags;
   uint32_t m_AutoBindingSpace;
 
+  // porperties infered from the DXILTypeSystem
+  bool m_bHasPayloadQualifiers;
+
   std::unique_ptr<DxilSubobjects> m_pSubobjects;
 
   // m_bMetadataErrors is true if non-fatal metadata errors were encountered.

+ 4 - 0
include/dxc/DXIL/DxilResource.h

@@ -45,6 +45,9 @@ public:
   unsigned GetElementStride() const;
   void SetElementStride(unsigned ElemStride);
 
+  unsigned GetBaseAlignLog2() const;
+  void SetBaseAlignLog2(unsigned baseAlignLog2);
+
   DXIL::SamplerFeedbackType GetSamplerFeedbackType() const;
   void SetSamplerFeedbackType(DXIL::SamplerFeedbackType Value);
 
@@ -76,6 +79,7 @@ public:
 private:
   unsigned m_SampleCount;
   unsigned m_ElementStride; // in bytes
+  unsigned m_baseAlignLog2 = 0; // worst-case alignment
   CompType m_CompType;
   DXIL::SamplerFeedbackType m_SamplerFeedbackType;
   bool m_bGloballyCoherent;

+ 3 - 2
include/dxc/DXIL/DxilShaderModel.h

@@ -31,7 +31,7 @@ public:
   /* <py::lines('VALRULE-TEXT')>hctdb_instrhelp.get_highest_shader_model()</py>*/
   // VALRULE-TEXT:BEGIN
   static const unsigned kHighestMajor = 6;
-  static const unsigned kHighestMinor = 6;
+  static const unsigned kHighestMinor = 7;
   // VALRULE-TEXT:END
   static const unsigned kOfflineMinor = 0xF;
 
@@ -68,6 +68,7 @@ public:
   bool IsSM64Plus() const { return IsSMAtLeast(6, 4); }
   bool IsSM65Plus() const { return IsSMAtLeast(6, 5); }
   bool IsSM66Plus() const { return IsSMAtLeast(6, 6); }
+  bool IsSM67Plus() const { return IsSMAtLeast(6, 7); }
   // VALRULE-TEXT:END
   const char *GetName() const { return m_pszName; }
   const char *GetKindName() const;
@@ -97,7 +98,7 @@ private:
               bool m_bUAVs, bool m_bTypedUavs, unsigned m_UAVRegsLim);
   /* <py::lines('VALRULE-TEXT')>hctdb_instrhelp.get_num_shader_models()</py>*/
   // VALRULE-TEXT:BEGIN
-  static const unsigned kNumShaderModels = 74;
+  static const unsigned kNumShaderModels = 83;
   // VALRULE-TEXT:END
   static const ShaderModel ms_ShaderModels[kNumShaderModels];
 

+ 52 - 0
include/dxc/DXIL/DxilTypeSystem.h

@@ -12,6 +12,7 @@
 #pragma once
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/MapVector.h"
+#include "dxc/DXIL/DxilConstants.h"
 #include "dxc/DXIL/DxilCompType.h"
 #include "dxc/DXIL/DxilInterpolationMode.h"
 
@@ -140,6 +141,46 @@ private:
 };
 
 
+/// Use this class to represent type annotation for DXR payload field.
+class DxilPayloadFieldAnnotation {
+public:
+
+  static unsigned GetBitOffsetForShaderStage(DXIL::PayloadAccessShaderStage shaderStage);
+
+  DxilPayloadFieldAnnotation() = default;
+
+  bool HasCompType() const;
+  const CompType &GetCompType() const;
+  void SetCompType(CompType::Kind kind);
+
+  uint32_t GetPayloadFieldQualifierMask() const;
+  void SetPayloadFieldQualifierMask(uint32_t fieldBitmask);
+  void AddPayloadFieldQualifier(DXIL::PayloadAccessShaderStage shaderStage, DXIL::PayloadAccessQualifier qualifier);
+  DXIL::PayloadAccessQualifier GetPayloadFieldQualifier(DXIL::PayloadAccessShaderStage shaderStage) const;
+  bool HasAnnotations() const;
+
+private:
+  CompType m_CompType;
+  unsigned m_bitmask = 0;
+};
+
+/// Use this class to represent DXR payload structures.
+class DxilPayloadAnnotation {
+  friend class DxilTypeSystem;
+
+public:
+  unsigned GetNumFields() const;
+  DxilPayloadFieldAnnotation &GetFieldAnnotation(unsigned FieldIdx);
+  const DxilPayloadFieldAnnotation &GetFieldAnnotation(unsigned FieldIdx) const;
+  const llvm::StructType *GetStructType() const;
+  void SetStructType(const llvm::StructType *Ty);
+
+private:
+  const llvm::StructType *m_pStructType;
+  std::vector<DxilPayloadFieldAnnotation> m_FieldAnnotations;
+};
+
+
 enum class DxilParamInputQual {
   In,
   Out,
@@ -192,6 +233,7 @@ private:
 class DxilTypeSystem {
 public:
   using StructAnnotationMap = llvm::MapVector<const llvm::StructType *, std::unique_ptr<DxilStructAnnotation> >;
+  using PayloadAnnotationMap = llvm::MapVector<const llvm::StructType *, std::unique_ptr<DxilPayloadAnnotation> >;
   using FunctionAnnotationMap = llvm::MapVector<const llvm::Function *, std::unique_ptr<DxilFunctionAnnotation> >;
 
   DxilTypeSystem(llvm::Module *pModule);
@@ -202,6 +244,15 @@ public:
   void EraseStructAnnotation(const llvm::StructType *pStructType);
 
   StructAnnotationMap &GetStructAnnotationMap();
+  const StructAnnotationMap &GetStructAnnotationMap() const;
+
+  DxilPayloadAnnotation *AddPayloadAnnotation(const llvm::StructType *pStructType);
+  DxilPayloadAnnotation *GetPayloadAnnotation(const llvm::StructType *pStructType);
+  const DxilPayloadAnnotation *GetPayloadAnnotation(const llvm::StructType *pStructType) const;
+  void ErasePayloadAnnotation(const llvm::StructType *pStructType);
+
+  PayloadAnnotationMap &GetPayloadAnnotationMap();
+  const PayloadAnnotationMap &GetPayloadAnnotationMap() const;
 
   DxilFunctionAnnotation *AddFunctionAnnotation(const llvm::Function *pFunction);
   DxilFunctionAnnotation *GetFunctionAnnotation(const llvm::Function *pFunction);
@@ -227,6 +278,7 @@ public:
 private:
   llvm::Module *m_pModule;
   StructAnnotationMap m_StructAnnotations;
+  PayloadAnnotationMap m_PayloadAnnotations;
   FunctionAnnotationMap m_FunctionAnnotations;
 
   DXIL::LowPrecisionMode m_LowPrecisionMode;

+ 3 - 0
include/dxc/DXIL/DxilUtil.h

@@ -152,6 +152,9 @@ namespace dxilutil {
 
   void ReplaceRawBufferLoad64Bit(llvm::Function *F, llvm::Type *EltTy, hlsl::OP *hlslOP);
   void ReplaceRawBufferStore64Bit(llvm::Function *F, llvm::Type *ETy, hlsl::OP *hlslOP);
+
+  bool IsConvergentMarker(llvm::Value *V);
+  llvm::Value *GetConvergentSource(llvm::Value *V);
 }
 
 }

+ 0 - 19
include/dxc/HLSL/DxilConvergent.h

@@ -1,19 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-//                                                                           //
-// DxilConvergent.h                                                          //
-// Copyright (C) Microsoft Corporation. All rights reserved.                 //
-// This file is distributed under the University of Illinois Open Source     //
-// License. See LICENSE.TXT for details.                                     //
-//                                                                           //
-///////////////////////////////////////////////////////////////////////////////
-#pragma once
-
-namespace llvm {
-  class Value;
-  class Function;
-}
-
-namespace hlsl {
-  bool IsConvergentMarker(llvm::Value *V);
-  llvm::Value *GetConvergentSource(llvm::Value *V);
-}

+ 5 - 1
include/dxc/HLSL/DxilExportMap.h

@@ -38,10 +38,13 @@ namespace dxilutil {
     typedef ExportMapByString::iterator iterator;
     typedef ExportMapByString::const_iterator const_iterator;
 
-    ExportMap() {}
+    ExportMap():m_ExportShadersOnly(false) {}
     void clear();
     bool empty() const;
 
+    void setExportShadersOnly(bool v) { m_ExportShadersOnly = v; }
+    bool isExportShadersOnly() const { return m_ExportShadersOnly; }
+
     // Iterate export map by string name
     iterator begin() { return m_ExportMap.begin(); }
     const_iterator begin() const { return m_ExportMap.begin(); }
@@ -100,6 +103,7 @@ namespace dxilutil {
     NameSet m_ExportNames;
     NameSet m_NameCollisions;
     NameSet m_UnusedExports;
+    bool    m_ExportShadersOnly;
   };
 }
 

+ 11 - 3
include/dxc/HLSL/DxilValidation.h

@@ -31,7 +31,7 @@ namespace hlsl {
 // Known validation rules
 enum class ValidationRule : unsigned {
   // Bitcode
-  BitcodeValid, // TODO - Module must be bitcode-valid
+  BitcodeValid, // Module must be bitcode-valid
 
   // Container
   ContainerPartInvalid, // DXIL Container must not contain unknown parts
@@ -162,7 +162,7 @@ enum class ValidationRule : unsigned {
   MetaMaxTessFactor, // Hull Shader MaxTessFactor must be [%0..%1].  %2 specified.
   MetaNoEntryPropsForEntry, // Entry point %0 must have entry properties.
   MetaNoSemanticOverlap, // Semantics must not overlap
-  MetaRequired, // TODO - Required metadata missing.
+  MetaRequired, // Required metadata missing.
   MetaSemaKindMatchesName, // Semantic name must match system value, when defined.
   MetaSemaKindValid, // Semantic kind must be valid
   MetaSemanticCompType, // %0 must be %1.
@@ -186,7 +186,8 @@ enum class ValidationRule : unsigned {
   MetaUsed, // All metadata must be used by dxil.
   MetaValidSamplerMode, // Invalid sampler mode on sampler .
   MetaValueRange, // Metadata value must be within range.
-  MetaWellFormed, // TODO - Metadata must be well-formed in operand count and types.
+  MetaVersionSupported, // Version in metadata must be supported.
+  MetaWellFormed, // Metadata must be well-formed in operand count and types.
 
   // Program flow
   FlowDeadLoop, // Loop must have break.
@@ -347,6 +348,13 @@ HRESULT ValidateDxilContainer(_In_reads_bytes_(ContainerSize) const void *pConta
                               _In_ uint32_t ContainerSize,
                               _In_ llvm::raw_ostream &DiagStream);
 
+// Full container validation, including ValidateDxilModule, with debug module
+HRESULT ValidateDxilContainer(_In_reads_bytes_(ContainerSize) const void *pContainer,
+                              _In_ uint32_t ContainerSize,
+                              const void *pOptDebugBitcode,
+                              uint32_t OptDebugBitcodeSize,
+                              _In_ llvm::raw_ostream &DiagStream);
+
 class PrintDiagnosticContext {
 private:
   llvm::DiagnosticPrinter &m_Printer;

+ 9 - 0
include/dxc/Support/ErrorCodes.h

@@ -107,3 +107,12 @@
 
 // 0X80AA001A - Error in extension mechanism.
 #define DXC_E_EXTENSION_ERROR                         DXC_MAKE_HRESULT(DXC_SEVERITY_ERROR,FACILITY_DXC,(0x001A))
+
+// 0X80AA001B - LLVM Fatal Error
+#define DXC_E_LLVM_FATAL_ERROR                         DXC_MAKE_HRESULT(DXC_SEVERITY_ERROR,FACILITY_DXC,(0x001B))
+
+// 0X80AA001C - LLVM Unreachable code
+#define DXC_E_LLVM_UNREACHABLE                         DXC_MAKE_HRESULT(DXC_SEVERITY_ERROR,FACILITY_DXC,(0x001C))
+
+// 0X80AA001D - LLVM Cast Failure
+#define DXC_E_LLVM_CAST_ERROR                         DXC_MAKE_HRESULT(DXC_SEVERITY_ERROR,FACILITY_DXC,(0x001D))

+ 4 - 0
include/dxc/Support/FileIOHelper.h

@@ -190,6 +190,10 @@ HRESULT DxcCreateBlobWithEncodingFromPinned(
     _In_bytecount_(size) LPCVOID pText, UINT32 size, UINT32 codePage,
     _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) throw();
 
+HRESULT DxcCreateBlobFromPinned(
+    _In_bytecount_(size) LPCVOID pText, UINT32 size,
+    _COM_Outptr_ IDxcBlob **pBlob) throw();
+
 HRESULT
 DxcCreateBlobWithEncodingFromStream(
     IStream *pStream, bool newInstanceAlways, UINT32 codePage,

+ 7 - 3
include/dxc/Support/HLSLOptions.h

@@ -141,6 +141,7 @@ public:
   bool DebugNameForBinary = false; // OPT_Zsb
   bool DebugNameForSource = false; // OPT_Zss
   bool DumpBin = false;        // OPT_dumpbin
+  bool Link = false;        // OPT_link
   bool WarningAsError = false; // OPT__SLASH_WX
   bool IEEEStrict = false;     // OPT_Gis
   bool IgnoreLineDirectives = false; // OPT_ignore_line_directives
@@ -172,9 +173,9 @@ public:
   bool RecompileFromBinary = false; // OPT _Recompile (Recompiling the DXBC binary file not .hlsl file)
   bool StripDebug = false; // OPT Qstrip_debug
   bool EmbedDebug = false; // OPT Qembed_debug
-  bool SourceInDebugModule = false; // OPT Qsource_in_debug_module
+  bool SourceInDebugModule = false; // OPT Zs
   bool SourceOnlyDebug = false; // OPT Qsource_only_debug
-  bool FullDebug = false; // OPT Qfull_debug
+  bool PdbInPrivate = false; // OPT Qpdb_in_private
   bool StripRootSignature = false; // OPT_Qstrip_rootsignature
   bool StripPrivate = false; // OPT_Qstrip_priv
   bool StripReflection = false; // OPT_Qstrip_reflect
@@ -201,6 +202,8 @@ public:
   std::map<std::string, std::string> DxcOptimizationSelects; // OPT_opt_select
 
   bool PrintAfterAll; // OPT_print_after_all
+  bool EnablePayloadQualifiers = false; // OPT_enable_payload_qualifiers
+  bool HandleExceptions = false; // OPT_disable_exception_handling
 
   // Rewriter Options
   RewriterOpts RWOpt;
@@ -211,7 +214,8 @@ public:
   bool IsLibraryProfile();
 
   // Helpers to clarify interpretation of flags for behavior in implementation
-  bool IsDebugInfoEnabled();    // Zi
+  bool GenerateFullDebugInfo(); // Zi
+  bool GeneratePDB();           // Zi or Zs
   bool EmbedDebugInfo();        // Qembed_debug
   bool EmbedPDBName();          // Zi or Fd
   bool DebugFileIsDirectory();  // Fd ends in '\\'

+ 12 - 4
include/dxc/Support/HLSLOptions.td

@@ -279,6 +279,12 @@ def disable_lifetime_markers : Flag<["-", "/"], "disable-lifetime-markers">, Gro
   HelpText<"Disable generation of lifetime markers where they would be otherwise (6.6+)">;
 def enable_templates: Flag<["-", "/"], "enable-templates">, Group<hlslcomp_Group>, Flags<[CoreOption]>,
   HelpText<"Enable template support for HLSL.">;
+def enable_payload_qualifiers : Flag<["-", "/"], "enable-payload-qualifiers">, Group<hlslcomp_Group>, Flags<[CoreOption, RewriteOption, DriverOption]>,
+  HelpText<"Enables support for payload access qualifiers for raytracing payloads in SM 6.6.">;
+def disable_payload_qualifiers : Flag<["-", "/"], "disable-payload-qualifiers">, Group<hlslcomp_Group>, Flags<[CoreOption, RewriteOption, DriverOption]>,
+  HelpText<"Disables support for payload access qualifiers for raytracing payloads in SM 6.7.">;
+def disable_exception_handling : Flag<["-", "/"], "disable-exception-handling">, Group<hlslcomp_Group>, Flags<[DriverOption, HelpHidden]>,
+  HelpText<"Disable dxc handling of exceptions">;
 
 // Used with API only
 def skip_serialization : Flag<["-", "/"], "skip-serialization">, Group<hlslcore_Group>, Flags<[CoreOption, HelpHidden]>,
@@ -340,7 +346,7 @@ def Oconfig : CommaJoined<["-"], "Oconfig=">, Group<spirv_Group>, Flags<[CoreOpt
 def target_profile : JoinedOrSeparate<["-", "/"], "T">, Flags<[CoreOption]>, Group<hlslcomp_Group>, MetaVarName<"<profile>">,
   /* <py::lines('VALRULE-TEXT')>hctdb_instrhelp.get_target_profiles()</py>*/
   // VALRULE-TEXT:BEGIN
-  HelpText<"Set target profile. \n\t<profile>: ps_6_0, ps_6_1, ps_6_2, ps_6_3, ps_6_4, ps_6_5, ps_6_6, \n\t\t vs_6_0, vs_6_1, vs_6_2, vs_6_3, vs_6_4, vs_6_5, vs_6_6, \n\t\t gs_6_0, gs_6_1, gs_6_2, gs_6_3, gs_6_4, gs_6_5, gs_6_6, \n\t\t hs_6_0, hs_6_1, hs_6_2, hs_6_3, hs_6_4, hs_6_5, hs_6_6, \n\t\t ds_6_0, ds_6_1, ds_6_2, ds_6_3, ds_6_4, ds_6_5, ds_6_6, \n\t\t cs_6_0, cs_6_1, cs_6_2, cs_6_3, cs_6_4, cs_6_5, cs_6_6, \n\t\t lib_6_1, lib_6_2, lib_6_3, lib_6_4, lib_6_5, lib_6_6, \n\t\t ms_6_5, ms_6_6, \n\t\t as_6_5, as_6_6, \n\t\t ">;
+  HelpText<"Set target profile. \n\t<profile>: ps_6_0, ps_6_1, ps_6_2, ps_6_3, ps_6_4, ps_6_5, ps_6_6, ps_6_7, \n\t\t vs_6_0, vs_6_1, vs_6_2, vs_6_3, vs_6_4, vs_6_5, vs_6_6, vs_6_7, \n\t\t gs_6_0, gs_6_1, gs_6_2, gs_6_3, gs_6_4, gs_6_5, gs_6_6, gs_6_7, \n\t\t hs_6_0, hs_6_1, hs_6_2, hs_6_3, hs_6_4, hs_6_5, hs_6_6, hs_6_7, \n\t\t ds_6_0, ds_6_1, ds_6_2, ds_6_3, ds_6_4, ds_6_5, ds_6_6, ds_6_7, \n\t\t cs_6_0, cs_6_1, cs_6_2, cs_6_3, cs_6_4, cs_6_5, cs_6_6, cs_6_7, \n\t\t lib_6_1, lib_6_2, lib_6_3, lib_6_4, lib_6_5, lib_6_6, lib_6_7, \n\t\t ms_6_5, ms_6_6, ms_6_7, \n\t\t as_6_5, as_6_6, as_6_7, \n\t\t ">;
   // VALRULE-TEXT:END
 def entrypoint :  JoinedOrSeparate<["-", "/"], "E">, Flags<[CoreOption, RewriteOption]>, Group<hlslcomp_Group>,
   HelpText<"Entry point name">;
@@ -403,6 +409,8 @@ def P : Separate<["-", "/"], "P">, Flags<[CoreOption, DriverOption]>, Group<hlsl
 
 def dumpbin : Flag<["-", "/"], "dumpbin">, Flags<[DriverOption]>, Group<hlslutil_Group>,
   HelpText<"Load a binary file rather than compiling">;
+def link : Flag<["-", "/"], "link">, Flags<[DriverOption]>, Group<hlslutil_Group>,
+  HelpText<"Link list of libraries provided in <inputs> argument separated by ';'">;
 def Qstrip_reflect : Flag<["-", "/"], "Qstrip_reflect">, Flags<[CoreOption, DriverOption]>, Group<hlslutil_Group>,
   HelpText<"Strip reflection data from shader bytecode  (must be used with /Fo <file>)">;
 def Qstrip_debug : Flag<["-", "/"], "Qstrip_debug">, Flags<[CoreOption, DriverOption]>, Group<hlslutil_Group>,
@@ -413,10 +421,10 @@ def Qstrip_priv : Flag<["-", "/"], "Qstrip_priv">, Flags<[DriverOption]>, Group<
   HelpText<"Strip private data from shader bytecode  (must be used with /Fo <file>)">;
 def Qsource_in_debug_module : Flag<["-", "/"], "Qsource_in_debug_module">, Flags<[CoreOption, HelpHidden]>, Group<hlslutil_Group>,
   HelpText<"Generate old PDB format.">;
-def Qsource_only_debug : Flag<["-", "/"], "Qsource_only_debug">, Flags<[CoreOption, HelpHidden]>, Group<hlslutil_Group>,
+def Zs : Flag<["-", "/"], "Zs">, Flags<[CoreOption]>, Group<hlslutil_Group>,
   HelpText<"Generate small PDB with just sources and compile options.">;
-def Qfull_debug : Flag<["-", "/"], "Qfull_debug">, Flags<[CoreOption, HelpHidden]>, Group<hlslutil_Group>,
-  HelpText<"Generate full debug info for PDB.">;
+def Qpdb_in_private : Flag<["-", "/"], "Qpdb_in_private">, Flags<[CoreOption, HelpHidden]>, Group<hlslutil_Group>,
+  HelpText<"Store PDB in private user data.">;
 
 def Qstrip_rootsignature : Flag<["-", "/"], "Qstrip_rootsignature">, Flags<[CoreOption, DriverOption]>, Group<hlslutil_Group>, HelpText<"Strip root signature data from shader bytecode  (must be used with /Fo <file>)">;
 def setrootsignature     : JoinedOrSeparate<["-", "/"], "setrootsignature">,     MetaVarName<"<file>">, Flags<[CoreOption, DriverOption]>, Group<hlslutil_Group>, HelpText<"Attach root signature to shader bytecode">;

+ 2634 - 0
include/dxc/Support/d3d12TokenizedProgramFormat.hpp

@@ -0,0 +1,2634 @@
+#pragma once
+
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// d3d12TokenizedProgramFormat.hpp                                           //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Provides declarations for the DirectX Tokenized Program Format.           //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+// ----------------------------------------------------------------------------
+//
+// High Level Goals
+//
+// - Serve as the runtime/DDI representation for all D3D11 tokenized code,
+//   for all classes of programs, including pixel program, vertex program,
+//   geometry program, etc.
+//
+// - Any information that HLSL needs to give to drivers is encoded in
+//   this token format in some form.
+//
+// - Enable common tools and source code for managing all tokenizable
+//   program formats.
+//
+// - Support extensible token definitions, allowing full customizations for
+//   specific program classes, while maintaining general conventions for all
+//   program models.
+//
+// - Binary backwards compatible with D3D10.  Any token name that was originally
+//   defined with "D3D10" in it is unchanged; D3D11 only adds new tokens.
+//
+// ----------------------------------------------------------------------------
+//
+// Low Level Feature Summary
+//
+// - DWORD based tokens always, for simplicity
+// - Opcode token is generally a single DWORD, though there is a bit indicating
+//   if extended information (extra DWORD(s)) are present
+// - Operand tokens are a completely self contained, extensible format,
+//   with scalar and 4-vector data types as first class citizens, but
+//   allowance for extension to n-component vectors.
+// - Initial operand token identifies register type, register file
+//   structure/dimensionality and mode of indexing for each dimension,
+//   and choice of component selection mechanism (i.e. mask vs. swizzle etc).
+// - Optional additional extended operand tokens can defined things like
+//   modifiers (which are not needed by default).
+// - Operand's immediate index value(s), if needed, appear as subsequent DWORD
+//   values, and if relative addressing is specified, an additional completely
+//   self contained operand definition appears nested in the token sequence.
+//
+// ----------------------------------------------------------------------------
+
+#include <winapifamily.h>
+
+#pragma region Application Family
+#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_GAMES)
+
+// ----------------------------------------------------------------------------
+// Version Token (VerTok)
+//
+// [07:00] minor version number (0-255)
+// [15:08] major version number (0-255)
+// [31:16] D3D10_SB_TOKENIZED_PROGRAM_TYPE
+//
+// ----------------------------------------------------------------------------
+
+typedef enum D3D10_SB_TOKENIZED_PROGRAM_TYPE
+{
+    D3D10_SB_PIXEL_SHADER       = 0,
+    D3D10_SB_VERTEX_SHADER      = 1,
+    D3D10_SB_GEOMETRY_SHADER    = 2,
+    
+    // D3D11 Shaders
+    D3D11_SB_HULL_SHADER        = 3,
+    D3D11_SB_DOMAIN_SHADER      = 4,
+    D3D11_SB_COMPUTE_SHADER     = 5,
+
+    // Subset of D3D12 Shaders where this field is referenced by runtime
+    // Entries from 6-12 are unique to state objects 
+    // (e.g. library, callable and raytracing shaders)
+    D3D12_SB_MESH_SHADER        = 13,
+    D3D12_SB_AMPLIFICATION_SHADER = 14,
+
+    D3D11_SB_RESERVED0          = 0xFFF0
+} D3D10_SB_TOKENIZED_PROGRAM_TYPE;
+
+#define D3D10_SB_TOKENIZED_PROGRAM_TYPE_MASK  0xffff0000
+#define D3D10_SB_TOKENIZED_PROGRAM_TYPE_SHIFT 16
+
+// DECODER MACRO: Retrieve program type from version token
+#define DECODE_D3D10_SB_TOKENIZED_PROGRAM_TYPE(VerTok) ((D3D10_SB_TOKENIZED_PROGRAM_TYPE)(((VerTok)&D3D10_SB_TOKENIZED_PROGRAM_TYPE_MASK)>>D3D10_SB_TOKENIZED_PROGRAM_TYPE_SHIFT))
+
+#define D3D10_SB_TOKENIZED_PROGRAM_MAJOR_VERSION_MASK  0x000000f0
+#define D3D10_SB_TOKENIZED_PROGRAM_MAJOR_VERSION_SHIFT 4
+#define D3D10_SB_TOKENIZED_PROGRAM_MINOR_VERSION_MASK  0x0000000f
+
+// DECODER MACRO: Retrieve major version # from version token
+#define DECODE_D3D10_SB_TOKENIZED_PROGRAM_MAJOR_VERSION(VerTok) (((VerTok)&D3D10_SB_TOKENIZED_PROGRAM_MAJOR_VERSION_MASK)>>D3D10_SB_TOKENIZED_PROGRAM_MAJOR_VERSION_SHIFT)
+// DECODER MACRO: Retrieve minor version # from version token
+#define DECODE_D3D10_SB_TOKENIZED_PROGRAM_MINOR_VERSION(VerTok) ((VerTok)&D3D10_SB_TOKENIZED_PROGRAM_MINOR_VERSION_MASK)
+
+// ENCODER MACRO: Create complete VerTok
+#define ENCODE_D3D10_SB_TOKENIZED_PROGRAM_VERSION_TOKEN(ProgType,MajorVer,MinorVer) ((((ProgType)<<D3D10_SB_TOKENIZED_PROGRAM_TYPE_SHIFT)&D3D10_SB_TOKENIZED_PROGRAM_TYPE_MASK)|\
+                                                                               ((((MajorVer)<<D3D10_SB_TOKENIZED_PROGRAM_MAJOR_VERSION_SHIFT)&D3D10_SB_TOKENIZED_PROGRAM_MAJOR_VERSION_MASK))|\
+                                                                               ((MinorVer)&D3D10_SB_TOKENIZED_PROGRAM_MINOR_VERSION_MASK))
+
+// ----------------------------------------------------------------------------
+// Length Token (LenTok)
+//
+// Always follows VerTok
+//
+// [31:00] Unsigned integer count of number of
+//              DWORDs in program code, including version
+//              and length tokens.  So the minimum value
+//              is 0x00000002 (if an empty program is ever
+//              valid).
+//
+// ----------------------------------------------------------------------------
+
+// DECODER MACRO: Retrieve program length
+#define DECODE_D3D10_SB_TOKENIZED_PROGRAM_LENGTH(LenTok) (LenTok)
+// ENCODER MACRO: Create complete LenTok
+#define ENCODE_D3D10_SB_TOKENIZED_PROGRAM_LENGTH(Length) (Length)
+#define MAX_D3D10_SB_TOKENIZED_PROGRAM_LENGTH (0xffffffff)
+
+// ----------------------------------------------------------------------------
+// Opcode Format (OpcodeToken0)
+//
+// [10:00] D3D10_SB_OPCODE_TYPE
+// if( [10:00] == D3D10_SB_OPCODE_CUSTOMDATA )
+// {
+//    Token starts a custom-data block.  See "Custom-Data Block Format".
+// }
+// else // standard opcode token
+// {
+//    [23:11] Opcode-Specific Controls
+//    [30:24] Instruction length in DWORDs including the opcode token.
+//    [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//            contains extended opcode token.
+// }
+//
+// ----------------------------------------------------------------------------
+
+typedef enum D3D10_SB_OPCODE_TYPE {
+    D3D10_SB_OPCODE_ADD          ,
+    D3D10_SB_OPCODE_AND          ,
+    D3D10_SB_OPCODE_BREAK        ,
+    D3D10_SB_OPCODE_BREAKC       ,
+    D3D10_SB_OPCODE_CALL         ,
+    D3D10_SB_OPCODE_CALLC        ,
+    D3D10_SB_OPCODE_CASE         ,
+    D3D10_SB_OPCODE_CONTINUE     ,
+    D3D10_SB_OPCODE_CONTINUEC    ,
+    D3D10_SB_OPCODE_CUT          ,
+    D3D10_SB_OPCODE_DEFAULT      ,
+    D3D10_SB_OPCODE_DERIV_RTX    ,
+    D3D10_SB_OPCODE_DERIV_RTY    ,
+    D3D10_SB_OPCODE_DISCARD      ,
+    D3D10_SB_OPCODE_DIV          ,
+    D3D10_SB_OPCODE_DP2          ,
+    D3D10_SB_OPCODE_DP3          ,
+    D3D10_SB_OPCODE_DP4          ,
+    D3D10_SB_OPCODE_ELSE         ,
+    D3D10_SB_OPCODE_EMIT         ,
+    D3D10_SB_OPCODE_EMITTHENCUT  ,
+    D3D10_SB_OPCODE_ENDIF        ,
+    D3D10_SB_OPCODE_ENDLOOP      ,
+    D3D10_SB_OPCODE_ENDSWITCH    ,
+    D3D10_SB_OPCODE_EQ           ,
+    D3D10_SB_OPCODE_EXP          ,
+    D3D10_SB_OPCODE_FRC          ,
+    D3D10_SB_OPCODE_FTOI         ,
+    D3D10_SB_OPCODE_FTOU         ,
+    D3D10_SB_OPCODE_GE           ,
+    D3D10_SB_OPCODE_IADD         ,
+    D3D10_SB_OPCODE_IF           ,
+    D3D10_SB_OPCODE_IEQ          ,
+    D3D10_SB_OPCODE_IGE          ,
+    D3D10_SB_OPCODE_ILT          ,
+    D3D10_SB_OPCODE_IMAD         ,
+    D3D10_SB_OPCODE_IMAX         ,
+    D3D10_SB_OPCODE_IMIN         ,
+    D3D10_SB_OPCODE_IMUL         ,
+    D3D10_SB_OPCODE_INE          ,
+    D3D10_SB_OPCODE_INEG         ,
+    D3D10_SB_OPCODE_ISHL         ,
+    D3D10_SB_OPCODE_ISHR         ,
+    D3D10_SB_OPCODE_ITOF         ,
+    D3D10_SB_OPCODE_LABEL        ,
+    D3D10_SB_OPCODE_LD           ,
+    D3D10_SB_OPCODE_LD_MS        ,
+    D3D10_SB_OPCODE_LOG          ,
+    D3D10_SB_OPCODE_LOOP         ,
+    D3D10_SB_OPCODE_LT           ,
+    D3D10_SB_OPCODE_MAD          ,
+    D3D10_SB_OPCODE_MIN          ,
+    D3D10_SB_OPCODE_MAX          ,
+    D3D10_SB_OPCODE_CUSTOMDATA   ,
+    D3D10_SB_OPCODE_MOV          ,
+    D3D10_SB_OPCODE_MOVC         ,
+    D3D10_SB_OPCODE_MUL          ,
+    D3D10_SB_OPCODE_NE           ,
+    D3D10_SB_OPCODE_NOP          ,
+    D3D10_SB_OPCODE_NOT          ,
+    D3D10_SB_OPCODE_OR           ,
+    D3D10_SB_OPCODE_RESINFO      ,
+    D3D10_SB_OPCODE_RET          ,
+    D3D10_SB_OPCODE_RETC         ,
+    D3D10_SB_OPCODE_ROUND_NE     ,
+    D3D10_SB_OPCODE_ROUND_NI     ,
+    D3D10_SB_OPCODE_ROUND_PI     ,
+    D3D10_SB_OPCODE_ROUND_Z      ,
+    D3D10_SB_OPCODE_RSQ          ,
+    D3D10_SB_OPCODE_SAMPLE       ,
+    D3D10_SB_OPCODE_SAMPLE_C     ,
+    D3D10_SB_OPCODE_SAMPLE_C_LZ  ,
+    D3D10_SB_OPCODE_SAMPLE_L     ,
+    D3D10_SB_OPCODE_SAMPLE_D     ,
+    D3D10_SB_OPCODE_SAMPLE_B     ,
+    D3D10_SB_OPCODE_SQRT         ,
+    D3D10_SB_OPCODE_SWITCH       ,
+    D3D10_SB_OPCODE_SINCOS       ,
+    D3D10_SB_OPCODE_UDIV         ,
+    D3D10_SB_OPCODE_ULT          ,
+    D3D10_SB_OPCODE_UGE          ,
+    D3D10_SB_OPCODE_UMUL         ,
+    D3D10_SB_OPCODE_UMAD         ,
+    D3D10_SB_OPCODE_UMAX         ,
+    D3D10_SB_OPCODE_UMIN         ,
+    D3D10_SB_OPCODE_USHR         ,
+    D3D10_SB_OPCODE_UTOF         ,
+    D3D10_SB_OPCODE_XOR          ,
+    D3D10_SB_OPCODE_DCL_RESOURCE                     , // DCL* opcodes have
+    D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER              , // custom operand formats.
+    D3D10_SB_OPCODE_DCL_SAMPLER                      ,
+    D3D10_SB_OPCODE_DCL_INDEX_RANGE                  ,
+    D3D10_SB_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY ,
+    D3D10_SB_OPCODE_DCL_GS_INPUT_PRIMITIVE           ,
+    D3D10_SB_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT      ,
+    D3D10_SB_OPCODE_DCL_INPUT                        ,
+    D3D10_SB_OPCODE_DCL_INPUT_SGV                    ,
+    D3D10_SB_OPCODE_DCL_INPUT_SIV                    ,
+    D3D10_SB_OPCODE_DCL_INPUT_PS                     ,
+    D3D10_SB_OPCODE_DCL_INPUT_PS_SGV                 ,
+    D3D10_SB_OPCODE_DCL_INPUT_PS_SIV                 ,
+    D3D10_SB_OPCODE_DCL_OUTPUT                       ,
+    D3D10_SB_OPCODE_DCL_OUTPUT_SGV                   ,
+    D3D10_SB_OPCODE_DCL_OUTPUT_SIV                   ,
+    D3D10_SB_OPCODE_DCL_TEMPS                        ,
+    D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP               ,
+    D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS                 ,
+
+// -----------------------------------------------
+
+    // This marks the end of D3D10.0 opcodes
+    D3D10_SB_OPCODE_RESERVED0,
+    
+// ---------- DX 10.1 op codes---------------------
+
+    D3D10_1_SB_OPCODE_LOD,
+    D3D10_1_SB_OPCODE_GATHER4,
+    D3D10_1_SB_OPCODE_SAMPLE_POS,
+    D3D10_1_SB_OPCODE_SAMPLE_INFO,
+
+// -----------------------------------------------
+
+    // This marks the end of D3D10.1 opcodes
+    D3D10_1_SB_OPCODE_RESERVED1,
+
+// ---------- DX 11 op codes---------------------
+    D3D11_SB_OPCODE_HS_DECLS                         , // token marks beginning of HS sub-shader
+    D3D11_SB_OPCODE_HS_CONTROL_POINT_PHASE           , // token marks beginning of HS sub-shader
+    D3D11_SB_OPCODE_HS_FORK_PHASE                    , // token marks beginning of HS sub-shader
+    D3D11_SB_OPCODE_HS_JOIN_PHASE                    , // token marks beginning of HS sub-shader
+
+    D3D11_SB_OPCODE_EMIT_STREAM                      ,
+    D3D11_SB_OPCODE_CUT_STREAM                       ,
+    D3D11_SB_OPCODE_EMITTHENCUT_STREAM               ,
+    D3D11_SB_OPCODE_INTERFACE_CALL                   ,
+
+    D3D11_SB_OPCODE_BUFINFO                          ,
+    D3D11_SB_OPCODE_DERIV_RTX_COARSE                 ,
+    D3D11_SB_OPCODE_DERIV_RTX_FINE                   ,
+    D3D11_SB_OPCODE_DERIV_RTY_COARSE                 ,
+    D3D11_SB_OPCODE_DERIV_RTY_FINE                   ,
+    D3D11_SB_OPCODE_GATHER4_C                        ,
+    D3D11_SB_OPCODE_GATHER4_PO                       ,
+    D3D11_SB_OPCODE_GATHER4_PO_C                     ,
+    D3D11_SB_OPCODE_RCP                              ,
+    D3D11_SB_OPCODE_F32TOF16                         ,
+    D3D11_SB_OPCODE_F16TOF32                         ,
+    D3D11_SB_OPCODE_UADDC                            ,
+    D3D11_SB_OPCODE_USUBB                            ,
+    D3D11_SB_OPCODE_COUNTBITS                        ,
+    D3D11_SB_OPCODE_FIRSTBIT_HI                      ,
+    D3D11_SB_OPCODE_FIRSTBIT_LO                      ,
+    D3D11_SB_OPCODE_FIRSTBIT_SHI                     ,
+    D3D11_SB_OPCODE_UBFE                             ,
+    D3D11_SB_OPCODE_IBFE                             ,
+    D3D11_SB_OPCODE_BFI                              ,
+    D3D11_SB_OPCODE_BFREV                            ,
+    D3D11_SB_OPCODE_SWAPC                            ,
+
+    D3D11_SB_OPCODE_DCL_STREAM                       ,
+    D3D11_SB_OPCODE_DCL_FUNCTION_BODY                ,
+    D3D11_SB_OPCODE_DCL_FUNCTION_TABLE               ,
+    D3D11_SB_OPCODE_DCL_INTERFACE                    ,
+    
+    D3D11_SB_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT    ,
+    D3D11_SB_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT   ,
+    D3D11_SB_OPCODE_DCL_TESS_DOMAIN                  ,
+    D3D11_SB_OPCODE_DCL_TESS_PARTITIONING            ,
+    D3D11_SB_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE        ,
+    D3D11_SB_OPCODE_DCL_HS_MAX_TESSFACTOR            ,
+    D3D11_SB_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT ,
+    D3D11_SB_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT ,
+
+    D3D11_SB_OPCODE_DCL_THREAD_GROUP                 ,
+    D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED  ,
+    D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW    ,
+    D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED,
+    D3D11_SB_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW,
+    D3D11_SB_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED,
+    D3D11_SB_OPCODE_DCL_RESOURCE_RAW                 ,
+    D3D11_SB_OPCODE_DCL_RESOURCE_STRUCTURED          ,
+    D3D11_SB_OPCODE_LD_UAV_TYPED                     ,
+    D3D11_SB_OPCODE_STORE_UAV_TYPED                  ,
+    D3D11_SB_OPCODE_LD_RAW                           ,
+    D3D11_SB_OPCODE_STORE_RAW                        ,
+    D3D11_SB_OPCODE_LD_STRUCTURED                    ,
+    D3D11_SB_OPCODE_STORE_STRUCTURED                 ,
+    D3D11_SB_OPCODE_ATOMIC_AND                       ,
+    D3D11_SB_OPCODE_ATOMIC_OR                        ,
+    D3D11_SB_OPCODE_ATOMIC_XOR                       ,
+    D3D11_SB_OPCODE_ATOMIC_CMP_STORE                 ,
+    D3D11_SB_OPCODE_ATOMIC_IADD                      ,
+    D3D11_SB_OPCODE_ATOMIC_IMAX                      ,
+    D3D11_SB_OPCODE_ATOMIC_IMIN                      ,
+    D3D11_SB_OPCODE_ATOMIC_UMAX                      ,
+    D3D11_SB_OPCODE_ATOMIC_UMIN                      ,
+    D3D11_SB_OPCODE_IMM_ATOMIC_ALLOC                 ,
+    D3D11_SB_OPCODE_IMM_ATOMIC_CONSUME               ,
+    D3D11_SB_OPCODE_IMM_ATOMIC_IADD                  ,
+    D3D11_SB_OPCODE_IMM_ATOMIC_AND                   ,
+    D3D11_SB_OPCODE_IMM_ATOMIC_OR                    ,
+    D3D11_SB_OPCODE_IMM_ATOMIC_XOR                   ,
+    D3D11_SB_OPCODE_IMM_ATOMIC_EXCH                  ,
+    D3D11_SB_OPCODE_IMM_ATOMIC_CMP_EXCH              ,
+    D3D11_SB_OPCODE_IMM_ATOMIC_IMAX                  ,
+    D3D11_SB_OPCODE_IMM_ATOMIC_IMIN                  ,
+    D3D11_SB_OPCODE_IMM_ATOMIC_UMAX                  ,
+    D3D11_SB_OPCODE_IMM_ATOMIC_UMIN                  ,   
+    D3D11_SB_OPCODE_SYNC                             ,
+    
+    D3D11_SB_OPCODE_DADD                             ,
+    D3D11_SB_OPCODE_DMAX                             ,
+    D3D11_SB_OPCODE_DMIN                             ,
+    D3D11_SB_OPCODE_DMUL                             ,
+    D3D11_SB_OPCODE_DEQ                              ,
+    D3D11_SB_OPCODE_DGE                              ,
+    D3D11_SB_OPCODE_DLT                              ,
+    D3D11_SB_OPCODE_DNE                              ,
+    D3D11_SB_OPCODE_DMOV                             ,
+    D3D11_SB_OPCODE_DMOVC                            ,
+    D3D11_SB_OPCODE_DTOF                             ,
+    D3D11_SB_OPCODE_FTOD                             ,
+
+    D3D11_SB_OPCODE_EVAL_SNAPPED                     ,
+    D3D11_SB_OPCODE_EVAL_SAMPLE_INDEX                ,
+    D3D11_SB_OPCODE_EVAL_CENTROID                    ,
+    
+    D3D11_SB_OPCODE_DCL_GS_INSTANCE_COUNT            ,
+
+    D3D11_SB_OPCODE_ABORT                            ,
+    D3D11_SB_OPCODE_DEBUG_BREAK                      ,
+
+// -----------------------------------------------
+
+    // This marks the end of D3D11.0 opcodes
+    D3D11_SB_OPCODE_RESERVED0,
+
+    D3D11_1_SB_OPCODE_DDIV,
+    D3D11_1_SB_OPCODE_DFMA,
+    D3D11_1_SB_OPCODE_DRCP,
+
+    D3D11_1_SB_OPCODE_MSAD,
+
+    D3D11_1_SB_OPCODE_DTOI,
+    D3D11_1_SB_OPCODE_DTOU,
+    D3D11_1_SB_OPCODE_ITOD,
+    D3D11_1_SB_OPCODE_UTOD,
+
+// -----------------------------------------------
+
+    // This marks the end of D3D11.1 opcodes
+    D3D11_1_SB_OPCODE_RESERVED0,
+
+    D3DWDDM1_3_SB_OPCODE_GATHER4_FEEDBACK,
+    D3DWDDM1_3_SB_OPCODE_GATHER4_C_FEEDBACK,
+    D3DWDDM1_3_SB_OPCODE_GATHER4_PO_FEEDBACK,
+    D3DWDDM1_3_SB_OPCODE_GATHER4_PO_C_FEEDBACK,
+    D3DWDDM1_3_SB_OPCODE_LD_FEEDBACK,
+    D3DWDDM1_3_SB_OPCODE_LD_MS_FEEDBACK,
+    D3DWDDM1_3_SB_OPCODE_LD_UAV_TYPED_FEEDBACK,
+    D3DWDDM1_3_SB_OPCODE_LD_RAW_FEEDBACK,
+    D3DWDDM1_3_SB_OPCODE_LD_STRUCTURED_FEEDBACK,
+    D3DWDDM1_3_SB_OPCODE_SAMPLE_L_FEEDBACK,
+    D3DWDDM1_3_SB_OPCODE_SAMPLE_C_LZ_FEEDBACK,
+
+    D3DWDDM1_3_SB_OPCODE_SAMPLE_CLAMP_FEEDBACK,
+    D3DWDDM1_3_SB_OPCODE_SAMPLE_B_CLAMP_FEEDBACK,
+    D3DWDDM1_3_SB_OPCODE_SAMPLE_D_CLAMP_FEEDBACK,
+    D3DWDDM1_3_SB_OPCODE_SAMPLE_C_CLAMP_FEEDBACK,
+
+    D3DWDDM1_3_SB_OPCODE_CHECK_ACCESS_FULLY_MAPPED,
+
+// -----------------------------------------------
+
+    // This marks the end of WDDM 1.3 opcodes
+    D3DWDDM1_3_SB_OPCODE_RESERVED0,
+
+    D3D10_SB_NUM_OPCODES                                     // Should be the last entry
+} D3D10_SB_OPCODE_TYPE;
+
+#define D3D10_SB_OPCODE_TYPE_MASK 0x00007ff
+// DECODER MACRO: Retrieve program opcode
+#define DECODE_D3D10_SB_OPCODE_TYPE(OpcodeToken0) ((D3D10_SB_OPCODE_TYPE)((OpcodeToken0)&D3D10_SB_OPCODE_TYPE_MASK))
+// ENCODER MACRO: Create the opcode-type portion of OpcodeToken0
+#define ENCODE_D3D10_SB_OPCODE_TYPE(OpcodeName) ((OpcodeName)&D3D10_SB_OPCODE_TYPE_MASK)
+
+#define D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH_MASK 0x7f000000
+#define D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH_SHIFT 24
+// DECODER MACRO: Retrieve instruction length
+// in # of DWORDs including the opcode token(s).
+// The range is 1-127.
+#define DECODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(OpcodeToken0) (((OpcodeToken0)&D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH_MASK)>> D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH_SHIFT)
+
+// ENCODER MACRO: Store instruction length
+// portion of OpcodeToken0, in # of DWORDs
+// including the opcode token(s).
+// Valid range is 1-127.
+#define ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(Length) (((Length)<<D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH_SHIFT)&D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH_MASK)
+#define MAX_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH 127
+
+#define D3D10_SB_INSTRUCTION_SATURATE_MASK 0x00002000
+// DECODER MACRO: Check OpcodeToken0 to see if an instruction
+// is to saturate the result [0..1]
+// This flag is indicated by one of the bits in the
+// opcode specific control range.
+#define DECODE_IS_D3D10_SB_INSTRUCTION_SATURATE_ENABLED(OpcodeToken0) ((OpcodeToken0)&D3D10_SB_INSTRUCTION_SATURATE_MASK)
+// ENCODER MACRO: Encode in OpcodeToken0 if instruction is to saturate the result.
+#define ENCODE_D3D10_SB_INSTRUCTION_SATURATE(bSat) (((bSat)!=0)?D3D10_SB_INSTRUCTION_SATURATE_MASK:0)
+
+// Boolean test for conditional instructions such as if (if_z or if_nz)
+// This is part of the opcode specific control range.
+typedef enum D3D10_SB_INSTRUCTION_TEST_BOOLEAN
+{
+    D3D10_SB_INSTRUCTION_TEST_ZERO       = 0,
+    D3D10_SB_INSTRUCTION_TEST_NONZERO    = 1
+} D3D10_SB_INSTRUCTION_TEST_BOOLEAN;
+#define D3D10_SB_INSTRUCTION_TEST_BOOLEAN_MASK  0x00040000
+#define D3D10_SB_INSTRUCTION_TEST_BOOLEAN_SHIFT 18
+
+// DECODER MACRO: For an OpcodeToken0 for requires either a
+// zero or non-zero test, determine which test was chosen.
+#define DECODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(OpcodeToken0) ((D3D10_SB_INSTRUCTION_TEST_BOOLEAN)(((OpcodeToken0)&D3D10_SB_INSTRUCTION_TEST_BOOLEAN_MASK)>>D3D10_SB_INSTRUCTION_TEST_BOOLEAN_SHIFT))
+// ENCODER MACRO: Store "zero" or "nonzero" in the opcode
+// specific control range of OpcodeToken0
+#define ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(Boolean) (((Boolean)<<D3D10_SB_INSTRUCTION_TEST_BOOLEAN_SHIFT)&D3D10_SB_INSTRUCTION_TEST_BOOLEAN_MASK)
+
+// Precise value mask (bits 19-22)
+// This is part of the opcode specific control range.
+// It's 1 bit per-channel of the output, for instructions with multiple
+// output operands, it applies to that component in each operand. This
+// uses the components defined in D3D10_SB_COMPONENT_NAME.
+#define D3D11_SB_INSTRUCTION_PRECISE_VALUES_MASK  0x00780000
+#define D3D11_SB_INSTRUCTION_PRECISE_VALUES_SHIFT 19
+
+// DECODER MACRO: this macro extracts from OpcodeToken0 the 4 component
+// (xyzw) mask, as a field of D3D10_SB_4_COMPONENT_[X|Y|Z|W] flags.
+#define DECODE_D3D11_SB_INSTRUCTION_PRECISE_VALUES(OpcodeToken0) ((((OpcodeToken0)&D3D11_SB_INSTRUCTION_PRECISE_VALUES_MASK)>>D3D11_SB_INSTRUCTION_PRECISE_VALUES_SHIFT))
+// ENCODER MACRO: Given a set of
+// D3D10_SB_OPERAND_4_COMPONENT_[X|Y|Z|W] values
+// or'd together, encode them in OpcodeToken0.
+#define ENCODE_D3D11_SB_INSTRUCTION_PRECISE_VALUES(ComponentMask) (((ComponentMask)<<D3D11_SB_INSTRUCTION_PRECISE_VALUES_SHIFT)&D3D11_SB_INSTRUCTION_PRECISE_VALUES_MASK)
+
+// resinfo instruction return type
+typedef enum D3D10_SB_RESINFO_INSTRUCTION_RETURN_TYPE
+{
+    D3D10_SB_RESINFO_INSTRUCTION_RETURN_FLOAT      = 0,
+    D3D10_SB_RESINFO_INSTRUCTION_RETURN_RCPFLOAT   = 1,
+    D3D10_SB_RESINFO_INSTRUCTION_RETURN_UINT       = 2
+} D3D10_SB_RESINFO_INSTRUCTION_RETURN_TYPE;
+
+#define D3D10_SB_RESINFO_INSTRUCTION_RETURN_TYPE_MASK  0x00001800
+#define D3D10_SB_RESINFO_INSTRUCTION_RETURN_TYPE_SHIFT 11
+
+// DECODER MACRO: For an OpcodeToken0 for the resinfo instruction, 
+// determine the return type.
+#define DECODE_D3D10_SB_RESINFO_INSTRUCTION_RETURN_TYPE(OpcodeToken0) ((D3D10_SB_RESINFO_INSTRUCTION_RETURN_TYPE)(((OpcodeToken0)&D3D10_SB_RESINFO_INSTRUCTION_RETURN_TYPE_MASK)>>D3D10_SB_RESINFO_INSTRUCTION_RETURN_TYPE_SHIFT))
+// ENCODER MACRO: Encode the return type for the resinfo instruction
+// in the opcode specific control range of OpcodeToken0
+#define ENCODE_D3D10_SB_RESINFO_INSTRUCTION_RETURN_TYPE(ReturnType) (((ReturnType)<<D3D10_SB_RESINFO_INSTRUCTION_RETURN_TYPE_SHIFT)&D3D10_SB_RESINFO_INSTRUCTION_RETURN_TYPE_MASK)
+
+// sync instruction flags
+#define D3D11_SB_SYNC_THREADS_IN_GROUP              0x00000800
+#define D3D11_SB_SYNC_THREAD_GROUP_SHARED_MEMORY    0x00001000
+#define D3D11_SB_SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GROUP  0x00002000
+#define D3D11_SB_SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GLOBAL  0x00004000
+#define D3D11_SB_SYNC_FLAGS_MASK                    0x00007800
+
+// DECODER MACRO: Retrieve flags for sync instruction from OpcodeToken0.
+#define DECODE_D3D11_SB_SYNC_FLAGS(OperandToken0) ((OperandToken0)&D3D11_SB_SYNC_FLAGS_MASK)
+
+// ENCODER MACRO: Given a set of sync instruciton flags, encode them in OpcodeToken0.
+#define ENCODE_D3D11_SB_SYNC_FLAGS(Flags) ((Flags)&D3D11_SB_SYNC_FLAGS_MASK)
+
+#define D3D10_SB_OPCODE_EXTENDED_MASK 0x80000000
+#define D3D10_SB_OPCODE_EXTENDED_SHIFT 31
+// DECODER MACRO: Determine if the opcode is extended
+// by an additional opcode token.  Currently there are no
+// extended opcodes.
+#define DECODE_IS_D3D10_SB_OPCODE_EXTENDED(OpcodeToken0) (((OpcodeToken0)&D3D10_SB_OPCODE_EXTENDED_MASK)>> D3D10_SB_OPCODE_EXTENDED_SHIFT)
+// ENCODER MACRO: Store in OpcodeToken0 whether the opcode is extended
+// by an additional opcode token.  
+#define ENCODE_D3D10_SB_OPCODE_EXTENDED(bExtended) (((bExtended)!=0)?D3D10_SB_OPCODE_EXTENDED_MASK:0)
+
+// ----------------------------------------------------------------------------
+// Extended Opcode Format (OpcodeToken1)
+//
+// If bit31 of an opcode token is set, the
+// opcode has an additional extended opcode token DWORD
+// directly following OpcodeToken0.  Other tokens
+// expected for the opcode, such as the operand
+// token(s) always follow
+// OpcodeToken0 AND OpcodeToken1..n (extended
+// opcode tokens, if present).
+//
+// [05:00] D3D10_SB_EXTENDED_OPCODE_TYPE
+// [30:06] if([05:00] == D3D10_SB_EXTENDED_OPCODE_SAMPLE_CONTROLS)
+//         {
+//              This custom opcode contains controls for SAMPLE.
+//              [08:06] Ignored, 0.
+//              [12:09] U texel immediate offset (4 bit 2's comp) (0 default)
+//              [16:13] V texel immediate offset (4 bit 2's comp) (0 default)
+//              [20:17] W texel immediate offset (4 bit 2's comp) (0 default)
+//              [30:14] Ignored, 0.
+//         }
+//         else if( [05:00] == D3D11_SB_EXTENDED_OPCODE_RESOURCE_DIM )
+//         {
+//              [10:06] D3D10_SB_RESOURCE_DIMENSION
+//              [22:11] When dimension is D3D11_SB_RESOURCE_DIMENSION_STRUCTURED_BUFFER this holds the buffer stride, otherwise 0
+//              [30:23] Ignored, 0.
+//         }
+//         else if( [05:00] == D3D11_SB_EXTENDED_OPCODE_RESOURCE_RETURN_TYPE )
+//         {
+//              [09:06] D3D10_SB_RESOURCE_RETURN_TYPE for component X
+//              [13:10] D3D10_SB_RESOURCE_RETURN_TYPE for component Y
+//              [17:14] D3D10_SB_RESOURCE_RETURN_TYPE for component Z
+//              [21:18] D3D10_SB_RESOURCE_RETURN_TYPE for component W
+//              [30:22] Ignored, 0.
+//         }
+//         else
+//         {
+//              [30:04] Ignored, 0.
+//         }
+// [31]    0 normally. 1 there is another extended opcode.  Any number
+//         of extended opcode tokens can be chained.  It is possible that some extended
+//         opcode tokens could include multiple DWORDS - that is defined
+//         on a case by case basis.
+//
+// ----------------------------------------------------------------------------
+typedef enum D3D10_SB_EXTENDED_OPCODE_TYPE
+{
+    D3D10_SB_EXTENDED_OPCODE_EMPTY           = 0,
+    D3D10_SB_EXTENDED_OPCODE_SAMPLE_CONTROLS = 1,
+    D3D11_SB_EXTENDED_OPCODE_RESOURCE_DIM = 2,
+    D3D11_SB_EXTENDED_OPCODE_RESOURCE_RETURN_TYPE = 3,
+} D3D10_SB_EXTENDED_OPCODE_TYPE;
+#define D3D11_SB_MAX_SIMULTANEOUS_EXTENDED_OPCODES 3
+
+#define D3D10_SB_EXTENDED_OPCODE_TYPE_MASK 0x0000003f
+
+// DECODER MACRO: Given an extended opcode
+// token (OpcodeToken1), figure out what type
+// of token it is (from D3D10_SB_EXTENDED_OPCODE_TYPE enum)
+// to be able to interpret the rest of the token's contents.
+#define DECODE_D3D10_SB_EXTENDED_OPCODE_TYPE(OpcodeToken1) ((D3D10_SB_EXTENDED_OPCODE_TYPE)((OpcodeToken1)&D3D10_SB_EXTENDED_OPCODE_TYPE_MASK))
+
+// ENCODER MACRO: Store extended opcode token
+// type in OpcodeToken1.
+#define ENCODE_D3D10_SB_EXTENDED_OPCODE_TYPE(ExtOpcodeType) ((ExtOpcodeType)&D3D10_SB_EXTENDED_OPCODE_TYPE_MASK)
+
+typedef enum D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_COORD
+{
+    D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_U        = 0,
+    D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_V        = 1,
+    D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_W        = 2,
+} D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_COORD;
+#define D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_COORD_MASK (3)
+#define D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_SHIFT(Coord) (9+4*((Coord)&D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_COORD_MASK))
+#define D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_MASK(Coord) (0x0000000f<<D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_SHIFT(Coord))
+
+// DECODER MACRO: Given an extended opcode token
+// (OpcodeToken1), and extended token type ==
+// D3D10_SB_EXTENDED_OPCODE_SAMPLE_CONTROLS, determine the immediate
+// texel address offset for u/v/w (D3D10_SB_ADDRESS_OFFSET_COORD)
+// This macro returns a (signed) integer, by sign extending the
+// decoded 4 bit 2's complement immediate value.
+#define DECODE_IMMEDIATE_D3D10_SB_ADDRESS_OFFSET(Coord,OpcodeToken1) ((((OpcodeToken1)&D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_MASK(Coord))>>(D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_SHIFT(Coord))))
+
+// ENCODER MACRO: Store the immediate texel address offset
+// for U or V or W Coord (D3D10_SB_ADDRESS_OFFSET_COORD) in an extended
+// opcode token (OpcodeToken1) that has extended opcode
+// type == D3D10_SB_EXTENDED_OPCODE_SAMPLE_CONTROLS (opcode type encoded separately)
+// A 2's complement number is expected as input, from which the LSB 4 bits are extracted.
+#define ENCODE_IMMEDIATE_D3D10_SB_ADDRESS_OFFSET(Coord,ImmediateOffset) (((ImmediateOffset)<<D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_SHIFT(Coord))&D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_MASK(Coord))
+
+#define D3D11_SB_EXTENDED_RESOURCE_DIMENSION_MASK  0x000007C0
+#define D3D11_SB_EXTENDED_RESOURCE_DIMENSION_SHIFT 6
+
+// DECODER MACRO: Given an extended resource declaration token,
+// (D3D11_SB_EXTENDED_OPCODE_RESOURCE_DIM), determine the resource dimension
+// (D3D10_SB_RESOURCE_DIMENSION enum)
+#define DECODE_D3D11_SB_EXTENDED_RESOURCE_DIMENSION(OpcodeTokenN) ((D3D10_SB_RESOURCE_DIMENSION)(((OpcodeTokenN)&D3D11_SB_EXTENDED_RESOURCE_DIMENSION_MASK)>>D3D11_SB_EXTENDED_RESOURCE_DIMENSION_SHIFT))
+
+// ENCODER MACRO: Store resource dimension
+// (D3D10_SB_RESOURCE_DIMENSION enum) into a
+// an extended resource declaration token (D3D11_SB_EXTENDED_OPCODE_RESOURCE_DIM)
+#define ENCODE_D3D11_SB_EXTENDED_RESOURCE_DIMENSION(ResourceDim) (((ResourceDim)<<D3D11_SB_EXTENDED_RESOURCE_DIMENSION_SHIFT)&D3D11_SB_EXTENDED_RESOURCE_DIMENSION_MASK)
+
+#define D3D11_SB_EXTENDED_RESOURCE_DIMENSION_STRUCTURE_STRIDE_MASK  0x007FF800
+#define D3D11_SB_EXTENDED_RESOURCE_DIMENSION_STRUCTURE_STRIDE_SHIFT 11
+
+// DECODER MACRO: Given an extended resource declaration token for a structured buffer,
+// (D3D11_SB_EXTENDED_OPCODE_RESOURCE_DIM), determine the structure stride
+// (12-bit unsigned integer)
+#define DECODE_D3D11_SB_EXTENDED_RESOURCE_DIMENSION_STRUCTURE_STRIDE(OpcodeTokenN) (((OpcodeTokenN)&D3D11_SB_EXTENDED_RESOURCE_DIMENSION_STRUCTURE_STRIDE_MASK)>>D3D11_SB_EXTENDED_RESOURCE_DIMENSION_STRUCTURE_STRIDE_SHIFT)
+
+// ENCODER MACRO: Store resource dimension structure stride
+// (12-bit unsigned integer) into a
+// an extended resource declaration token (D3D11_SB_EXTENDED_OPCODE_RESOURCE_DIM)
+#define ENCODE_D3D11_SB_EXTENDED_RESOURCE_DIMENSION_STRUCTURE_STRIDE(Stride) (((Stride)<<D3D11_SB_EXTENDED_RESOURCE_DIMENSION_STRUCTURE_STRIDE_SHIFT)&D3D11_SB_EXTENDED_RESOURCE_DIMENSION_STRUCTURE_STRIDE_MASK)
+
+#define D3D10_SB_RESOURCE_RETURN_TYPE_MASK    0x0000000f
+#define D3D10_SB_RESOURCE_RETURN_TYPE_NUMBITS 0x00000004
+#define D3D11_SB_EXTENDED_RESOURCE_RETURN_TYPE_SHIFT 6
+
+// DECODER MACRO: Get the resource return type for component (0-3) from
+// an extended resource declaration token (D3D11_SB_EXTENDED_OPCODE_RESOURCE_RETURN_TYPE)
+#define DECODE_D3D11_SB_EXTENDED_RESOURCE_RETURN_TYPE(OpcodeTokenN, Component) \
+    ((D3D10_SB_RESOURCE_RETURN_TYPE)(((OpcodeTokenN) >> \
+    (Component * D3D10_SB_RESOURCE_RETURN_TYPE_NUMBITS + D3D11_SB_EXTENDED_RESOURCE_RETURN_TYPE_SHIFT))&D3D10_SB_RESOURCE_RETURN_TYPE_MASK))
+
+// ENCODER MACRO: Generate a resource return type for a component in an extended
+// resource delcaration token (D3D11_SB_EXTENDED_OPCODE_RESOURCE_RETURN_TYPE)
+#define ENCODE_D3D11_SB_EXTENDED_RESOURCE_RETURN_TYPE(ReturnType, Component) \
+    (((ReturnType)&D3D10_SB_RESOURCE_RETURN_TYPE_MASK) << (Component * D3D10_SB_RESOURCE_RETURN_TYPE_NUMBITS + D3D11_SB_EXTENDED_RESOURCE_RETURN_TYPE_SHIFT))
+
+// ----------------------------------------------------------------------------
+// Custom-Data Block Format
+//
+// DWORD 0 (CustomDataDescTok):
+// [10:00] == D3D10_SB_OPCODE_CUSTOMDATA
+// [31:11] == D3D10_SB_CUSTOMDATA_CLASS
+//
+// DWORD 1: 
+//          32-bit unsigned integer count of number
+//          of DWORDs in custom-data block,
+//          including DWORD 0 and DWORD 1.
+//          So the minimum value is 0x00000002,
+//          meaning empty custom-data.
+//
+// Layout of custom-data contents, for the various meta-data classes,
+// not defined in this file.
+//
+// ----------------------------------------------------------------------------
+
+typedef enum D3D10_SB_CUSTOMDATA_CLASS
+{
+    D3D10_SB_CUSTOMDATA_COMMENT = 0,
+    D3D10_SB_CUSTOMDATA_DEBUGINFO,
+    D3D10_SB_CUSTOMDATA_OPAQUE,
+    D3D10_SB_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER,
+    D3D11_SB_CUSTOMDATA_SHADER_MESSAGE,
+    D3D11_SB_CUSTOMDATA_SHADER_CLIP_PLANE_CONSTANT_MAPPINGS_FOR_DX9,
+} D3D10_SB_CUSTOMDATA_CLASS;
+
+#define D3D10_SB_CUSTOMDATA_CLASS_MASK 0xfffff800
+#define D3D10_SB_CUSTOMDATA_CLASS_SHIFT 11
+// DECODER MACRO: Find out what class of custom-data is present.
+// The contents of the custom-data block are defined
+// for each class of custom-data.
+#define DECODE_D3D10_SB_CUSTOMDATA_CLASS(CustomDataDescTok) ((D3D10_SB_CUSTOMDATA_CLASS)(((CustomDataDescTok)&D3D10_SB_CUSTOMDATA_CLASS_MASK)>>D3D10_SB_CUSTOMDATA_CLASS_SHIFT))
+// ENCODER MACRO: Create complete CustomDataDescTok
+#define ENCODE_D3D10_SB_CUSTOMDATA_CLASS(CustomDataClass) (ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_CUSTOMDATA)|(((CustomDataClass)<<D3D10_SB_CUSTOMDATA_CLASS_SHIFT)&D3D10_SB_CUSTOMDATA_CLASS_MASK))
+
+// ----------------------------------------------------------------------------
+// Instruction Operand Format (OperandToken0)
+//
+// [01:00] D3D10_SB_OPERAND_NUM_COMPONENTS
+// [11:02] Component Selection
+//         if([01:00] == D3D10_SB_OPERAND_0_COMPONENT)
+//              [11:02] = Ignored, 0
+//         else if([01:00] == D3D10_SB_OPERAND_1_COMPONENT
+//              [11:02] = Ignored, 0
+//         else if([01:00] == D3D10_SB_OPERAND_4_COMPONENT
+//         {
+//              [03:02] = D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE
+//              if([03:02] == D3D10_SB_OPERAND_4_COMPONENT_MASK_MODE)
+//              {
+//                  [07:04] = D3D10_SB_OPERAND_4_COMPONENT_MASK
+//                  [11:08] = Ignored, 0
+//              }
+//              else if([03:02] == D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE)
+//              {
+//                  [11:04] = D3D10_SB_4_COMPONENT_SWIZZLE
+//              }
+//              else if([03:02] == D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_MODE)
+//              {
+//                  [05:04] = D3D10_SB_4_COMPONENT_NAME
+//                  [11:06] = Ignored, 0
+//              }
+//         }
+//         else if([01:00] == D3D10_SB_OPERAND_N_COMPONENT)
+//         {
+//              Currently not defined.
+//         }
+// [19:12] D3D10_SB_OPERAND_TYPE
+// [21:20] D3D10_SB_OPERAND_INDEX_DIMENSION:
+//            Number of dimensions in the register
+//            file (NOT the # of dimensions in the
+//            individual register or memory
+//            resource being referenced).
+// [24:22] if( [21:20] >= D3D10_SB_OPERAND_INDEX_1D )
+//             D3D10_SB_OPERAND_INDEX_REPRESENTATION for first operand index
+//         else
+//             Ignored, 0
+// [27:25] if( [21:20] >= D3D10_SB_OPERAND_INDEX_2D )
+//             D3D10_SB_OPERAND_INDEX_REPRESENTATION for second operand index
+//         else
+//             Ignored, 0
+// [30:28] if( [21:20] == D3D10_SB_OPERAND_INDEX_3D )
+//             D3D10_SB_OPERAND_INDEX_REPRESENTATION for third operand index
+//         else
+//             Ignored, 0
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.
+//
+// ----------------------------------------------------------------------------
+
+// Number of components in data vector referred to by operand.
+typedef enum D3D10_SB_OPERAND_NUM_COMPONENTS
+{
+    D3D10_SB_OPERAND_0_COMPONENT = 0,
+    D3D10_SB_OPERAND_1_COMPONENT = 1,
+    D3D10_SB_OPERAND_4_COMPONENT = 2,
+    D3D10_SB_OPERAND_N_COMPONENT = 3 // unused for now
+} D3D10_SB_OPERAND_NUM_COMPONENTS;
+#define D3D10_SB_OPERAND_NUM_COMPONENTS_MASK 0x00000003
+
+// DECODER MACRO: Extract from OperandToken0 how many components
+// the data vector referred to by the operand contains.
+// (D3D10_SB_OPERAND_NUM_COMPONENTS enum)
+#define DECODE_D3D10_SB_OPERAND_NUM_COMPONENTS(OperandToken0) ((D3D10_SB_OPERAND_NUM_COMPONENTS)((OperandToken0)&D3D10_SB_OPERAND_NUM_COMPONENTS_MASK))
+
+// ENCODER MACRO: Define in OperandToken0 how many components
+// the data vector referred to by the operand contains.
+// (D3D10_SB_OPERAND_NUM_COMPONENTS enum).
+#define ENCODE_D3D10_SB_OPERAND_NUM_COMPONENTS(NumComp) ((NumComp)&D3D10_SB_OPERAND_NUM_COMPONENTS_MASK)
+
+typedef enum D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE
+{
+    D3D10_SB_OPERAND_4_COMPONENT_MASK_MODE    = 0,  // mask 4 components
+    D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE = 1,  // swizzle 4 components
+    D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_MODE = 2, // select 1 of 4 components
+} D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE;
+
+#define D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE_MASK  0x0000000c
+#define D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE_SHIFT 2
+
+// DECODER MACRO: For an operand representing 4component data,
+// extract from OperandToken0 the method for selecting data from
+// the 4 components (D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE).
+#define DECODE_D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE(OperandToken0) ((D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE)(((OperandToken0)&D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE_MASK)>>D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE_SHIFT))
+
+// ENCODER MACRO: For an operand representing 4component data,
+// encode in OperandToken0 a value from D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE
+#define ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE(SelectionMode) (((SelectionMode)<<D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE_SHIFT)&D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE_MASK)
+
+typedef enum D3D10_SB_4_COMPONENT_NAME
+{
+    D3D10_SB_4_COMPONENT_X = 0,
+    D3D10_SB_4_COMPONENT_Y = 1,
+    D3D10_SB_4_COMPONENT_Z = 2,
+    D3D10_SB_4_COMPONENT_W = 3,
+    D3D10_SB_4_COMPONENT_R = 0,
+    D3D10_SB_4_COMPONENT_G = 1,
+    D3D10_SB_4_COMPONENT_B = 2,
+    D3D10_SB_4_COMPONENT_A = 3
+} D3D10_SB_4_COMPONENT_NAME;
+#define D3D10_SB_4_COMPONENT_NAME_MASK 3
+
+// MACROS FOR USE IN D3D10_SB_OPERAND_4_COMPONENT_MASK_MODE:
+
+#define D3D10_SB_OPERAND_4_COMPONENT_MASK_MASK   0x000000f0
+#define D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT  4
+#define D3D10_SB_OPERAND_4_COMPONENT_MASK_X      0x00000010
+#define D3D10_SB_OPERAND_4_COMPONENT_MASK_Y      0x00000020
+#define D3D10_SB_OPERAND_4_COMPONENT_MASK_Z      0x00000040
+#define D3D10_SB_OPERAND_4_COMPONENT_MASK_W      0x00000080
+#define D3D10_SB_OPERAND_4_COMPONENT_MASK_R      D3D10_SB_OPERAND_4_COMPONENT_MASK_X
+#define D3D10_SB_OPERAND_4_COMPONENT_MASK_G      D3D10_SB_OPERAND_4_COMPONENT_MASK_Y
+#define D3D10_SB_OPERAND_4_COMPONENT_MASK_B      D3D10_SB_OPERAND_4_COMPONENT_MASK_Z
+#define D3D10_SB_OPERAND_4_COMPONENT_MASK_A      D3D10_SB_OPERAND_4_COMPONENT_MASK_W
+#define D3D10_SB_OPERAND_4_COMPONENT_MASK_ALL    D3D10_SB_OPERAND_4_COMPONENT_MASK_MASK
+
+// DECODER MACRO: When 4 component selection mode is
+// D3D10_SB_OPERAND_4_COMPONENT_MASK_MODE, this macro
+// extracts from OperandToken0 the 4 component (xyzw) mask,
+// as a field of D3D10_SB_OPERAND_4_COMPONENT_MASK_[X|Y|Z|W] flags.
+// Alternatively, the D3D10_SB_OPERAND_4_COMPONENT_MASK_[X|Y|Z|W] masks
+// can be tested on OperandToken0 directly, without this macro.
+#define DECODE_D3D10_SB_OPERAND_4_COMPONENT_MASK(OperandToken0) ((OperandToken0)&D3D10_SB_OPERAND_4_COMPONENT_MASK_MASK)
+
+// ENCODER MACRO: Given a set of
+// D3D10_SB_OPERAND_4_COMPONENT_MASK_[X|Y|Z|W] values
+// or'd together, encode them in OperandToken0.
+#define ENCODE_D3D10_SB_OPERAND_4_COMPONENT_MASK(ComponentMask) ((ComponentMask)&D3D10_SB_OPERAND_4_COMPONENT_MASK_MASK)
+
+// ENCODER/DECODER MACRO: Given a D3D10_SB_4_COMPONENT_NAME,
+// generate the 4-component mask for it.
+// This can be used in loops that build masks or read masks.
+// Alternatively, the D3D10_SB_OPERAND_4_COMPONENT_MASK_[X|Y|Z|W] masks
+// can be used directly, without this macro.
+#define D3D10_SB_OPERAND_4_COMPONENT_MASK(ComponentName) ((1<<(D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT+ComponentName))&D3D10_SB_OPERAND_4_COMPONENT_MASK_MASK)
+
+// MACROS FOR USE IN D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE:
+
+#define D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MASK 0x00000ff0
+#define D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_SHIFT 4
+
+// DECODER MACRO: When 4 component selection mode is
+// D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE, this macro
+// extracts from OperandToken0 the 4 component swizzle,
+// as a field of D3D10_SB_OPERAND_4_COMPONENT_MASK_[X|Y|Z|W] flags.
+#define DECODE_D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE(OperandToken0) ((OperandToken0)&D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MASK)
+
+// DECODER MACRO: Pass a D3D10_SB_4_COMPONENT_NAME as "DestComp" in following
+// macro to extract, from OperandToken0 or from a decoded swizzle,
+// the swizzle source component (D3D10_SB_4_COMPONENT_NAME enum):
+#define DECODE_D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_SOURCE(OperandToken0,DestComp) ((D3D10_SB_4_COMPONENT_NAME)(((OperandToken0)>>(D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_SHIFT+2*((DestComp)&D3D10_SB_4_COMPONENT_NAME_MASK)))&D3D10_SB_4_COMPONENT_NAME_MASK))
+
+// ENCODER MACRO: Generate a 4 component swizzle given
+// 4 D3D10_SB_4_COMPONENT_NAME source values for dest
+// components x, y, z, w respectively.
+#define ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE(XSrc,YSrc,ZSrc,WSrc) ((((XSrc)&D3D10_SB_4_COMPONENT_NAME_MASK)|     \
+                                                                     (((YSrc)&D3D10_SB_4_COMPONENT_NAME_MASK)<<2)| \
+                                                                     (((ZSrc)&D3D10_SB_4_COMPONENT_NAME_MASK)<<4)| \
+                                                                     (((WSrc)&D3D10_SB_4_COMPONENT_NAME_MASK)<<6)  \
+                                                                      )<<D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_SHIFT)
+
+// ENCODER/DECODER MACROS: Various common swizzle patterns
+// (noswizzle and replicate of each channels)
+#define D3D10_SB_OPERAND_4_COMPONENT_NOSWIZZLE   ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE(D3D10_SB_4_COMPONENT_X,\
+                                                                                   D3D10_SB_4_COMPONENT_Y,\
+                                                                                   D3D10_SB_4_COMPONENT_Z,\
+                                                                                   D3D10_SB_4_COMPONENT_W)
+
+#define D3D10_SB_OPERAND_4_COMPONENT_REPLICATEX  ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE(D3D10_SB_4_COMPONENT_X,\
+                                                                                   D3D10_SB_4_COMPONENT_X,\
+                                                                                   D3D10_SB_4_COMPONENT_X,\
+                                                                                   D3D10_SB_4_COMPONENT_X)
+
+#define D3D10_SB_OPERAND_4_COMPONENT_REPLICATEY  ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE(D3D10_SB_4_COMPONENT_Y,\
+                                                                                   D3D10_SB_4_COMPONENT_Y,\
+                                                                                   D3D10_SB_4_COMPONENT_Y,\
+                                                                                   D3D10_SB_4_COMPONENT_Y)
+
+#define D3D10_SB_OPERAND_4_COMPONENT_REPLICATEZ  ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE(D3D10_SB_4_COMPONENT_Z,\
+                                                                                   D3D10_SB_4_COMPONENT_Z,\
+                                                                                   D3D10_SB_4_COMPONENT_Z,\
+                                                                                   D3D10_SB_4_COMPONENT_Z)
+
+#define D3D10_SB_OPERAND_4_COMPONENT_REPLICATEW  ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE(D3D10_SB_4_COMPONENT_W,\
+                                                                                   D3D10_SB_4_COMPONENT_W,\
+                                                                                   D3D10_SB_4_COMPONENT_W,\
+                                                                                   D3D10_SB_4_COMPONENT_W)
+
+#define D3D10_SB_OPERAND_4_COMPONENT_REPLICATERED    D3D10_SB_OPERAND_4_COMPONENT_REPLICATEX
+#define D3D10_SB_OPERAND_4_COMPONENT_REPLICATEGREEN  D3D10_SB_OPERAND_4_COMPONENT_REPLICATEY
+#define D3D10_SB_OPERAND_4_COMPONENT_REPLICATEBLUE   D3D10_SB_OPERAND_4_COMPONENT_REPLICATEZ
+#define D3D10_SB_OPERAND_4_COMPONENT_REPLICATEALPHA  D3D10_SB_OPERAND_4_COMPONENT_REPLICATEW
+
+// MACROS FOR USE IN D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_MODE:
+#define D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_MASK   0x00000030
+#define D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_SHIFT  4
+
+// DECODER MACRO: When 4 component selection mode is
+// D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_MODE, this macro
+// extracts from OperandToken0 a D3D10_SB_4_COMPONENT_NAME
+// which picks one of the 4 components.
+#define DECODE_D3D10_SB_OPERAND_4_COMPONENT_SELECT_1(OperandToken0) ((D3D10_SB_4_COMPONENT_NAME)(((OperandToken0)&D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_MASK)>>D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_SHIFT))
+
+// ENCODER MACRO: Given a D3D10_SB_4_COMPONENT_NAME selecting
+// a single component for D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_MODE,
+// encode it into OperandToken0
+#define ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SELECT_1(SelectedComp) (((SelectedComp)<<D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_SHIFT)&D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_MASK)
+
+// MACROS FOR DETERMINING OPERAND TYPE:
+
+typedef enum D3D10_SB_OPERAND_TYPE
+{
+    D3D10_SB_OPERAND_TYPE_TEMP           = 0,  // Temporary Register File
+    D3D10_SB_OPERAND_TYPE_INPUT          = 1,  // General Input Register File
+    D3D10_SB_OPERAND_TYPE_OUTPUT         = 2,  // General Output Register File
+    D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP = 3,  // Temporary Register File (indexable)
+    D3D10_SB_OPERAND_TYPE_IMMEDIATE32    = 4,  // 32bit/component immediate value(s)
+                                          // If for example, operand token bits
+                                          // [01:00]==D3D10_SB_OPERAND_4_COMPONENT,
+                                          // this means that the operand type:
+                                          // D3D10_SB_OPERAND_TYPE_IMMEDIATE32
+                                          // results in 4 additional 32bit
+                                          // DWORDS present for the operand.
+    D3D10_SB_OPERAND_TYPE_IMMEDIATE64    = 5,  // 64bit/comp.imm.val(s)HI:LO
+    D3D10_SB_OPERAND_TYPE_SAMPLER        = 6,  // Reference to sampler state
+    D3D10_SB_OPERAND_TYPE_RESOURCE       = 7,  // Reference to memory resource (e.g. texture)
+    D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER= 8,  // Reference to constant buffer
+    D3D10_SB_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER= 9,  // Reference to immediate constant buffer
+    D3D10_SB_OPERAND_TYPE_LABEL          = 10, // Label
+    D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID = 11, // Input primitive ID
+    D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH   = 12, // Output Depth
+    D3D10_SB_OPERAND_TYPE_NULL           = 13, // Null register, used to discard results of operations
+                                               // Below Are operands new in DX 10.1
+    D3D10_SB_OPERAND_TYPE_RASTERIZER     = 14, // DX10.1 Rasterizer register, used to denote the depth/stencil and render target resources
+    D3D10_SB_OPERAND_TYPE_OUTPUT_COVERAGE_MASK = 15, // DX10.1 PS output MSAA coverage mask (scalar)
+                                               // Below Are operands new in DX 11
+    D3D11_SB_OPERAND_TYPE_STREAM         = 16, // Reference to GS stream output resource
+    D3D11_SB_OPERAND_TYPE_FUNCTION_BODY  = 17, // Reference to a function definition
+    D3D11_SB_OPERAND_TYPE_FUNCTION_TABLE = 18, // Reference to a set of functions used by a class
+    D3D11_SB_OPERAND_TYPE_INTERFACE      = 19, // Reference to an interface
+    D3D11_SB_OPERAND_TYPE_FUNCTION_INPUT = 20, // Reference to an input parameter to a function
+    D3D11_SB_OPERAND_TYPE_FUNCTION_OUTPUT = 21, // Reference to an output parameter to a function
+    D3D11_SB_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID = 22, // HS Control Point phase input saying which output control point ID this is
+    D3D11_SB_OPERAND_TYPE_INPUT_FORK_INSTANCE_ID = 23, // HS Fork Phase input instance ID
+    D3D11_SB_OPERAND_TYPE_INPUT_JOIN_INSTANCE_ID = 24, // HS Join Phase input instance ID
+    D3D11_SB_OPERAND_TYPE_INPUT_CONTROL_POINT = 25, // HS Fork+Join, DS phase input control points (array of them)
+    D3D11_SB_OPERAND_TYPE_OUTPUT_CONTROL_POINT = 26, // HS Fork+Join phase output control points (array of them)
+    D3D11_SB_OPERAND_TYPE_INPUT_PATCH_CONSTANT = 27, // DS+HSJoin Input Patch Constants (array of them)
+    D3D11_SB_OPERAND_TYPE_INPUT_DOMAIN_POINT = 28, // DS Input Domain point
+    D3D11_SB_OPERAND_TYPE_THIS_POINTER       = 29, // Reference to an interface this pointer
+    D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW = 30, // Reference to UAV u#
+    D3D11_SB_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY = 31, // Reference to Thread Group Shared Memory g#
+    D3D11_SB_OPERAND_TYPE_INPUT_THREAD_ID = 32, // Compute Shader Thread ID
+    D3D11_SB_OPERAND_TYPE_INPUT_THREAD_GROUP_ID = 33, // Compute Shader Thread Group ID
+    D3D11_SB_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP = 34, // Compute Shader Thread ID In Thread Group
+    D3D11_SB_OPERAND_TYPE_INPUT_COVERAGE_MASK = 35, // Pixel shader coverage mask input
+    D3D11_SB_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED = 36, // Compute Shader Thread ID In Group Flattened to a 1D value.
+    D3D11_SB_OPERAND_TYPE_INPUT_GS_INSTANCE_ID = 37, // Input GS instance ID
+    D3D11_SB_OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL = 38, // Output Depth, forced to be greater than or equal than current depth
+    D3D11_SB_OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL    = 39, // Output Depth, forced to be less than or equal to current depth
+    D3D11_SB_OPERAND_TYPE_CYCLE_COUNTER = 40, // Cycle counter
+    D3D11_SB_OPERAND_TYPE_OUTPUT_STENCIL_REF = 41, // DX11 PS output stencil reference (scalar)
+    D3D11_SB_OPERAND_TYPE_INNER_COVERAGE = 42, // DX11 PS input inner coverage (scalar)
+} D3D10_SB_OPERAND_TYPE;
+
+#define D3D10_SB_OPERAND_TYPE_MASK   0x000ff000
+#define D3D10_SB_OPERAND_TYPE_SHIFT  12
+
+// DECODER MACRO: Determine operand type from OperandToken0.
+#define DECODE_D3D10_SB_OPERAND_TYPE(OperandToken0) ((D3D10_SB_OPERAND_TYPE)(((OperandToken0)&D3D10_SB_OPERAND_TYPE_MASK)>>D3D10_SB_OPERAND_TYPE_SHIFT))
+
+// ENCODER MACRO: Store operand type in OperandToken0.
+#define ENCODE_D3D10_SB_OPERAND_TYPE(OperandType) (((OperandType)<<D3D10_SB_OPERAND_TYPE_SHIFT)&D3D10_SB_OPERAND_TYPE_MASK)
+
+typedef enum D3D10_SB_OPERAND_INDEX_DIMENSION
+{
+    D3D10_SB_OPERAND_INDEX_0D = 0, // e.g. Position
+    D3D10_SB_OPERAND_INDEX_1D = 1, // Most common.  e.g. Temp registers.
+    D3D10_SB_OPERAND_INDEX_2D = 2, // e.g. Geometry Program Input registers.
+    D3D10_SB_OPERAND_INDEX_3D = 3, // 3D rarely if ever used.
+} D3D10_SB_OPERAND_INDEX_DIMENSION;
+#define D3D10_SB_OPERAND_INDEX_DIMENSION_MASK  0x00300000
+#define D3D10_SB_OPERAND_INDEX_DIMENSION_SHIFT 20
+
+// DECODER MACRO: Determine operand index dimension from OperandToken0.
+#define DECODE_D3D10_SB_OPERAND_INDEX_DIMENSION(OperandToken0) ((D3D10_SB_OPERAND_INDEX_DIMENSION)(((OperandToken0)&D3D10_SB_OPERAND_INDEX_DIMENSION_MASK)>>D3D10_SB_OPERAND_INDEX_DIMENSION_SHIFT))
+
+// ENCODER MACRO: Store operand index dimension
+// (D3D10_SB_OPERAND_INDEX_DIMENSION enum) in OperandToken0.
+#define ENCODE_D3D10_SB_OPERAND_INDEX_DIMENSION(OperandIndexDim) (((OperandIndexDim)<<D3D10_SB_OPERAND_INDEX_DIMENSION_SHIFT)&D3D10_SB_OPERAND_INDEX_DIMENSION_MASK)
+
+typedef enum D3D10_SB_OPERAND_INDEX_REPRESENTATION
+{
+    D3D10_SB_OPERAND_INDEX_IMMEDIATE32               = 0, // Extra DWORD
+    D3D10_SB_OPERAND_INDEX_IMMEDIATE64               = 1, // 2 Extra DWORDs
+                                                     //   (HI32:LO32)
+    D3D10_SB_OPERAND_INDEX_RELATIVE                  = 2, // Extra operand
+    D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE = 3, // Extra DWORD followed by
+                                                     //   extra operand
+    D3D10_SB_OPERAND_INDEX_IMMEDIATE64_PLUS_RELATIVE = 4, // 2 Extra DWORDS
+                                                     //   (HI32:LO32) followed
+                                                     //   by extra operand
+} D3D10_SB_OPERAND_INDEX_REPRESENTATION;
+#define D3D10_SB_OPERAND_INDEX_REPRESENTATION_SHIFT(Dim) (22+3*((Dim)&3))
+#define D3D10_SB_OPERAND_INDEX_REPRESENTATION_MASK(Dim) (0x3<<D3D10_SB_OPERAND_INDEX_REPRESENTATION_SHIFT(Dim))
+
+// DECODER MACRO: Determine from OperandToken0 what representation
+// an operand index is provided as (D3D10_SB_OPERAND_INDEX_REPRESENTATION enum),
+// for index dimension [0], [1] or [2], depending on D3D10_SB_OPERAND_INDEX_DIMENSION.
+#define DECODE_D3D10_SB_OPERAND_INDEX_REPRESENTATION(Dim,OperandToken0) ((D3D10_SB_OPERAND_INDEX_REPRESENTATION)(((OperandToken0)&D3D10_SB_OPERAND_INDEX_REPRESENTATION_MASK(Dim))>>D3D10_SB_OPERAND_INDEX_REPRESENTATION_SHIFT(Dim)))
+
+// ENCODER MACRO: Store in OperandToken0 what representation
+// an operand index is provided as (D3D10_SB_OPERAND_INDEX_REPRESENTATION enum),
+// for index dimension [0], [1] or [2], depending on D3D10_SB_OPERAND_INDEX_DIMENSION.
+#define ENCODE_D3D10_SB_OPERAND_INDEX_REPRESENTATION(Dim,IndexRepresentation) (((IndexRepresentation)<<D3D10_SB_OPERAND_INDEX_REPRESENTATION_SHIFT(Dim))&D3D10_SB_OPERAND_INDEX_REPRESENTATION_MASK(Dim))
+
+#define D3D10_SB_OPERAND_EXTENDED_MASK  0x80000000
+#define D3D10_SB_OPERAND_EXTENDED_SHIFT 31
+
+// DECODER MACRO: Determine if the operand is extended
+// by an additional opcode token.
+#define DECODE_IS_D3D10_SB_OPERAND_EXTENDED(OperandToken0) (((OperandToken0)&D3D10_SB_OPERAND_EXTENDED_MASK)>>D3D10_SB_OPERAND_EXTENDED_SHIFT)
+
+// ENCODER MACRO: Store in OperandToken0 whether the operand is extended
+// by an additional operand token.
+#define ENCODE_D3D10_SB_OPERAND_EXTENDED(bExtended) (((bExtended)!=0)?D3D10_SB_OPERAND_EXTENDED_MASK:0)
+
+// ----------------------------------------------------------------------------
+// Extended Instruction Operand Format (OperandToken1)
+//
+// If bit31 of an operand token is set, the
+// operand has additional data in a second DWORD
+// directly following OperandToken0.  Other tokens
+// expected for the operand, such as immmediate
+// values or relative address operands (full
+// operands in themselves) always follow
+// OperandToken0 AND OperandToken1..n (extended
+// operand tokens, if present).
+//
+// [05:00] D3D10_SB_EXTENDED_OPERAND_TYPE
+// [16:06] if([05:00] == D3D10_SB_EXTENDED_OPERAND_MODIFIER)
+//         {
+//              [13:06] D3D10_SB_OPERAND_MODIFIER
+//              [16:14] Min Precision: D3D11_SB_OPERAND_MIN_PRECISION
+//              [17:17] Non-uniform: D3D12_SB_OPERAND_NON_UNIFORM
+//         }
+//         else
+//         {
+//              [17:06] Ignored, 0.
+//         }
+// [30:18] Ignored, 0.
+// [31]    0 normally. 1 if second order extended operand definition,
+//         meaning next DWORD contains yet ANOTHER extended operand
+//         description. Currently no second order extensions defined.
+//         This would be useful if a particular extended operand does
+//         not have enough space to store the required information in
+//         a single token and so is extended further.
+//
+// ----------------------------------------------------------------------------
+
+typedef enum D3D10_SB_EXTENDED_OPERAND_TYPE
+{
+    D3D10_SB_EXTENDED_OPERAND_EMPTY            = 0, // Might be used if this
+                                               // enum is full and
+                                               // further extended opcode
+                                               // is needed.
+    D3D10_SB_EXTENDED_OPERAND_MODIFIER         = 1,
+} D3D10_SB_EXTENDED_OPERAND_TYPE;
+#define D3D10_SB_EXTENDED_OPERAND_TYPE_MASK 0x0000003f
+
+// DECODER MACRO: Given an extended operand
+// token (OperandToken1), figure out what type
+// of token it is (from D3D10_SB_EXTENDED_OPERAND_TYPE enum)
+// to be able to interpret the rest of the token's contents.
+#define DECODE_D3D10_SB_EXTENDED_OPERAND_TYPE(OperandToken1) ((D3D10_SB_EXTENDED_OPERAND_TYPE)((OperandToken1)&D3D10_SB_EXTENDED_OPERAND_TYPE_MASK))
+
+// ENCODER MACRO: Store extended operand token
+// type in OperandToken1.
+#define ENCODE_D3D10_SB_EXTENDED_OPERAND_TYPE(ExtOperandType) ((ExtOperandType)&D3D10_SB_EXTENDED_OPERAND_TYPE_MASK)
+
+typedef enum D3D10_SB_OPERAND_MODIFIER
+{
+    D3D10_SB_OPERAND_MODIFIER_NONE     = 0, // Nop.  This is the implied
+                                             // default if the extended
+                                             // operand is not present for
+                                             // an operand for which source
+                                             // modifiers are meaningful
+    D3D10_SB_OPERAND_MODIFIER_NEG      = 1, // Negate
+    D3D10_SB_OPERAND_MODIFIER_ABS      = 2, // Absolute value, abs()
+    D3D10_SB_OPERAND_MODIFIER_ABSNEG   = 3, // -abs()
+} D3D10_SB_OPERAND_MODIFIER;
+#define D3D10_SB_OPERAND_MODIFIER_MASK  0x00003fc0
+#define D3D10_SB_OPERAND_MODIFIER_SHIFT 6
+
+// DECODER MACRO: Given a D3D10_SB_EXTENDED_OPERAND_MODIFIER
+// extended token (OperandToken1), determine the source modifier
+// (D3D10_SB_OPERAND_MODIFIER enum)
+#define DECODE_D3D10_SB_OPERAND_MODIFIER(OperandToken1) ((D3D10_SB_OPERAND_MODIFIER)(((OperandToken1)&D3D10_SB_OPERAND_MODIFIER_MASK)>>D3D10_SB_OPERAND_MODIFIER_SHIFT))
+
+// ENCODER MACRO: Generate a complete source modifier extended token
+// (OperandToken1), given D3D10_SB_OPERAND_MODIFIER enum (the
+// ext. operand type is also set to D3D10_SB_EXTENDED_OPERAND_MODIFIER).
+#define ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER(SourceMod)  ((((SourceMod)<<D3D10_SB_OPERAND_MODIFIER_SHIFT)&D3D10_SB_OPERAND_MODIFIER_MASK)| \
+                                                                ENCODE_D3D10_SB_EXTENDED_OPERAND_TYPE(D3D10_SB_EXTENDED_OPERAND_MODIFIER) | \
+                                                                ENCODE_D3D10_SB_OPERAND_DOUBLE_EXTENDED(0))
+
+// Min precision specifier for source/dest operands.  This 
+// fits in the extended operand token field. Implementations are free to 
+// execute at higher precision than the min - details spec'ed elsewhere.
+// This is part of the opcode specific control range.
+typedef enum D3D11_SB_OPERAND_MIN_PRECISION
+{
+    D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT    = 0, // Default precision 
+                                                       // for the shader model
+    D3D11_SB_OPERAND_MIN_PRECISION_FLOAT_16   = 1, // Min 16 bit/component float
+    D3D11_SB_OPERAND_MIN_PRECISION_FLOAT_2_8  = 2, // Min 10(2.8)bit/comp. float
+    D3D11_SB_OPERAND_MIN_PRECISION_SINT_16    = 4, // Min 16 bit/comp. signed integer
+    D3D11_SB_OPERAND_MIN_PRECISION_UINT_16    = 5, // Min 16 bit/comp. unsigned integer
+} D3D11_SB_OPERAND_MIN_PRECISION;
+#define D3D11_SB_OPERAND_MIN_PRECISION_MASK  0x0001C000
+#define D3D11_SB_OPERAND_MIN_PRECISION_SHIFT 14
+
+// DECODER MACRO: For an OperandToken1 that can specify
+// a minimum precision for execution, find out what it is.
+#define DECODE_D3D11_SB_OPERAND_MIN_PRECISION(OperandToken1) ((D3D11_SB_OPERAND_MIN_PRECISION)(((OperandToken1)& D3D11_SB_OPERAND_MIN_PRECISION_MASK)>> D3D11_SB_OPERAND_MIN_PRECISION_SHIFT))
+
+// ENCODER MACRO: Encode minimum precision for execution
+// into the extended operand token, OperandToken1
+#define ENCODE_D3D11_SB_OPERAND_MIN_PRECISION(MinPrecision) (((MinPrecision)<< D3D11_SB_OPERAND_MIN_PRECISION_SHIFT)& D3D11_SB_OPERAND_MIN_PRECISION_MASK)
+
+
+// Non-uniform extended operand modifier.
+#define D3D12_SB_OPERAND_NON_UNIFORM_MASK  0x00020000
+#define D3D12_SB_OPERAND_NON_UNIFORM_SHIFT 17
+
+// DECODER MACRO: For an OperandToken1 that can specify a non-uniform operand
+#define DECODE_D3D12_SB_OPERAND_NON_UNIFORM(OperandToken1) (((OperandToken1)& D3D12_SB_OPERAND_NON_UNIFORM_MASK)>> D3D12_SB_OPERAND_NON_UNIFORM_SHIFT)
+
+// ENCODER MACRO: Encode non-uniform state into the extended operand token, OperandToken1
+#define ENCODE_D3D12_SB_OPERAND_NON_UNIFORM(NonUniform) (((NonUniform)<< D3D12_SB_OPERAND_NON_UNIFORM_SHIFT)& D3D12_SB_OPERAND_NON_UNIFORM_MASK)
+
+
+#define D3D10_SB_OPERAND_DOUBLE_EXTENDED_MASK  0x80000000
+#define D3D10_SB_OPERAND_DOUBLE_EXTENDED_SHIFT 31
+// DECODER MACRO: Determine if an extended operand token
+// (OperandToken1) is further extended by yet another token
+// (OperandToken2).  Currently there are no secondary
+// extended operand tokens.
+#define DECODE_IS_D3D10_SB_OPERAND_DOUBLE_EXTENDED(OperandToken1) (((OperandToken1)&D3D10_SB_OPERAND_DOUBLE_EXTENDED_MASK)>>D3D10_SB_OPERAND_DOUBLE_EXTENDED_SHIFT)
+
+// ENCODER MACRO: Store in OperandToken1 whether the operand is extended
+// by an additional operand token.  Currently there are no secondary
+// extended operand tokens.
+#define ENCODE_D3D10_SB_OPERAND_DOUBLE_EXTENDED(bExtended) (((bExtended)!=0)?D3D10_SB_OPERAND_DOUBLE_EXTENDED_MASK:0)
+
+// ----------------------------------------------------------------------------
+// Name Token (NameToken) (used in declaration statements)
+//
+// [15:00] D3D10_SB_NAME enumeration
+// [31:16] Reserved, 0
+//
+// ----------------------------------------------------------------------------
+#define D3D10_SB_NAME_MASK  0x0000ffff
+
+// DECODER MACRO: Get the name from NameToken
+#define DECODE_D3D10_SB_NAME(NameToken) ((D3D10_SB_NAME)((NameToken)&D3D10_SB_NAME_MASK))
+
+// ENCODER MACRO: Generate a complete NameToken given a D3D10_SB_NAME
+#define ENCODE_D3D10_SB_NAME(Name) ((Name)&D3D10_SB_NAME_MASK)
+
+//---------------------------------------------------------------------
+// Declaration Statements
+//
+// Declarations start with a standard opcode token,
+// having opcode type being D3D10_SB_OPCODE_DCL*.
+// Each particular declaration type has custom
+// operand token(s), described below.
+//---------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Global Flags Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS
+// [11:11] Refactoring allowed if bit set.
+// [12:12] Enable double precision float ops.
+// [13:13] Force early depth-stencil test.
+// [14:14] Enable RAW and structured buffers in non-CS 4.x shaders.
+// [15:15] Skip optimizations of shader IL when translating to native code
+// [16:16] Enable minimum-precision data types
+// [17:17] Enable 11.1 double-precision floating-point instruction extensions
+// [18:18] Enable 11.1 non-double instruction extensions
+// [23:19] Reserved for future flags.
+// [30:24] Instruction length in DWORDs including the opcode token. == 1
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by no operands.
+//
+// ----------------------------------------------------------------------------
+#define D3D10_SB_GLOBAL_FLAG_REFACTORING_ALLOWED               (1<<11)
+#define D3D11_SB_GLOBAL_FLAG_ENABLE_DOUBLE_PRECISION_FLOAT_OPS (1<<12)
+#define D3D11_SB_GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL         (1<<13)
+#define D3D11_SB_GLOBAL_FLAG_ENABLE_RAW_AND_STRUCTURED_BUFFERS (1<<14)
+#define D3D11_1_SB_GLOBAL_FLAG_SKIP_OPTIMIZATION               (1<<15)
+#define D3D11_1_SB_GLOBAL_FLAG_ENABLE_MINIMUM_PRECISION        (1<<16)
+#define D3D11_1_SB_GLOBAL_FLAG_ENABLE_DOUBLE_EXTENSIONS        (1<<17)
+#define D3D11_1_SB_GLOBAL_FLAG_ENABLE_SHADER_EXTENSIONS        (1<<18)
+#define D3D12_SB_GLOBAL_FLAG_ALL_RESOURCES_BOUND               (1<<19)
+
+#define D3D10_SB_GLOBAL_FLAGS_MASK  0x00fff800
+
+// DECODER MACRO: Get global flags
+#define DECODE_D3D10_SB_GLOBAL_FLAGS(OpcodeToken0) ((OpcodeToken0)&D3D10_SB_GLOBAL_FLAGS_MASK)
+
+// ENCODER MACRO: Encode global flags
+#define ENCODE_D3D10_SB_GLOBAL_FLAGS(Flags) ((Flags)&D3D10_SB_GLOBAL_FLAGS_MASK)
+
+// ----------------------------------------------------------------------------
+// Resource Declaration (non multisampled)
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_RESOURCE
+// [15:11] D3D10_SB_RESOURCE_DIMENSION
+// [23:16] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 2 operands on Shader Models 4.0 through 5.0:
+// (1) an operand, starting with OperandToken0, defining which
+//     t# register (D3D10_SB_OPERAND_TYPE_RESOURCE) is being declared.
+// (2) a Resource Return Type token (ResourceReturnTypeToken)
+//
+// OpcodeToken0 is followed by 3 operands on Shader Model 5.1 and later:
+// (1) an operand, starting with OperandToken0, defining which
+//     t# register (D3D10_SB_OPERAND_TYPE_RESOURCE) is being declared.
+//     The indexing dimension for the register must be D3D10_SB_OPERAND_INDEX_DIMENSION_3D, 
+//     and the meaning of the index dimensions are as follows: (t<id>[<lbound>:<ubound>])
+//       1 <id>:     variable ID being declared
+//       2 <lbound>: the lower bound of the range of resources in the space
+//       3 <ubound>: the upper bound (inclusive) of this range
+//     As opposed to when the t# is used in shader instructions, where the register
+//     must be D3D10_SB_OPERAND_INDEX_DIMENSION_2D, and the meaning of the index 
+//     dimensions are as follows: (t<id>[<idx>]):
+//       1 <id>:  variable ID being used (matches dcl)
+//       2 <idx>: absolute index of resource within space (may be dynamically indexed)
+// (2) a Resource Return Type token (ResourceReturnTypeToken)
+// (3) a DWORD indicating the space index.
+//
+// ----------------------------------------------------------------------------
+#define D3D10_SB_RESOURCE_DIMENSION_MASK  0x0000F800
+#define D3D10_SB_RESOURCE_DIMENSION_SHIFT 11
+
+// DECODER MACRO: Given a resource declaration token,
+// (OpcodeToken0), determine the resource dimension
+// (D3D10_SB_RESOURCE_DIMENSION enum)
+#define DECODE_D3D10_SB_RESOURCE_DIMENSION(OpcodeToken0) ((D3D10_SB_RESOURCE_DIMENSION)(((OpcodeToken0)&D3D10_SB_RESOURCE_DIMENSION_MASK)>>D3D10_SB_RESOURCE_DIMENSION_SHIFT))
+
+// ENCODER MACRO: Store resource dimension
+// (D3D10_SB_RESOURCE_DIMENSION enum) into a
+// a resource declaration token (OpcodeToken0)
+#define ENCODE_D3D10_SB_RESOURCE_DIMENSION(ResourceDim) (((ResourceDim)<<D3D10_SB_RESOURCE_DIMENSION_SHIFT)&D3D10_SB_RESOURCE_DIMENSION_MASK)
+
+// ----------------------------------------------------------------------------
+// Resource Declaration (multisampled)
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_RESOURCE (same opcode as non-multisampled case)
+// [15:11] D3D10_SB_RESOURCE_DIMENSION (must be TEXTURE2DMS or TEXTURE2DMSARRAY)
+// [22:16] Sample count 1...127.  0 is currently disallowed, though
+//         in future versions 0 could mean "configurable" sample count
+// [23:23] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 2 operands on Shader Models 4.0 through 5.0:
+// (1) an operand, starting with OperandToken0, defining which
+//     t# register (D3D10_SB_OPERAND_TYPE_RESOURCE) is being declared.
+// (2) a Resource Return Type token (ResourceReturnTypeToken)
+//
+// OpcodeToken0 is followed by 3 operands on Shader Model 5.1 and later:
+// (1) an operand, starting with OperandToken0, defining which
+//     t# register (D3D10_SB_OPERAND_TYPE_RESOURCE) is being declared.
+//     The indexing dimension for the register must be D3D10_SB_OPERAND_INDEX_DIMENSION_3D, 
+//     and the meaning of the index dimensions are as follows: (t<id>[<lbound>:<ubound>])
+//       1 <id>:     variable ID being declared
+//       2 <lbound>: the lower bound of the range of resources in the space
+//       3 <ubound>: the upper bound (inclusive) of this range
+//     As opposed to when the t# is used in shader instructions, where the register
+//     must be D3D10_SB_OPERAND_INDEX_DIMENSION_2D, and the meaning of the index 
+//     dimensions are as follows: (t<id>[<idx>]):
+//       1 <id>:  variable ID being used (matches dcl)
+//       2 <idx>: absolute index of resource within space (may be dynamically indexed)
+// (2) a Resource Return Type token (ResourceReturnTypeToken)
+// (3) a DWORD indicating the space index.
+//
+// ----------------------------------------------------------------------------
+
+// use same macro for encoding/decoding resource dimension aas the non-msaa declaration
+
+#define D3D10_SB_RESOURCE_SAMPLE_COUNT_MASK  0x07F0000
+#define D3D10_SB_RESOURCE_SAMPLE_COUNT_SHIFT 16
+
+// DECODER MACRO: Given a resource declaration token,
+// (OpcodeToken0), determine the resource sample count (1..127)
+#define DECODE_D3D10_SB_RESOURCE_SAMPLE_COUNT(OpcodeToken0) ((UINT)(((OpcodeToken0)&D3D10_SB_RESOURCE_SAMPLE_COUNT_MASK)>>D3D10_SB_RESOURCE_SAMPLE_COUNT_SHIFT))
+
+// ENCODER MACRO: Store resource sample count up to 127 into a
+// a resource declaration token (OpcodeToken0)
+#define ENCODE_D3D10_SB_RESOURCE_SAMPLE_COUNT(SampleCount) (((SampleCount > 127 ? 127 : SampleCount)<<D3D10_SB_RESOURCE_SAMPLE_COUNT_SHIFT)&D3D10_SB_RESOURCE_SAMPLE_COUNT_MASK)
+
+// ----------------------------------------------------------------------------
+// Resource Return Type Token (ResourceReturnTypeToken) (used in resource
+// declaration statements)
+//
+// [03:00] D3D10_SB_RESOURCE_RETURN_TYPE for component X
+// [07:04] D3D10_SB_RESOURCE_RETURN_TYPE for component Y
+// [11:08] D3D10_SB_RESOURCE_RETURN_TYPE for component Z
+// [15:12] D3D10_SB_RESOURCE_RETURN_TYPE for component W
+// [31:16] Reserved, 0
+//
+// ----------------------------------------------------------------------------
+// DECODER MACRO: Get the resource return type for component (0-3) from
+// ResourceReturnTypeToken
+#define DECODE_D3D10_SB_RESOURCE_RETURN_TYPE(ResourceReturnTypeToken, Component) \
+    ((D3D10_SB_RESOURCE_RETURN_TYPE)(((ResourceReturnTypeToken) >> \
+    (Component * D3D10_SB_RESOURCE_RETURN_TYPE_NUMBITS))&D3D10_SB_RESOURCE_RETURN_TYPE_MASK))
+
+// ENCODER MACRO: Generate a resource return type for a component
+#define ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnType, Component) \
+    (((ReturnType)&D3D10_SB_RESOURCE_RETURN_TYPE_MASK) << (Component * D3D10_SB_RESOURCE_RETURN_TYPE_NUMBITS))
+
+// ----------------------------------------------------------------------------
+// Sampler Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_SAMPLER
+// [14:11] D3D10_SB_SAMPLER_MODE
+// [23:15] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 1 operand on Shader Models 4.0 through 5.0:
+// (1) Operand starting with OperandToken0, defining which sampler
+//     (D3D10_SB_OPERAND_TYPE_SAMPLER) register # is being declared.
+//
+// OpcodeToken0 is followed by 2 operands on Shader Model 5.1 and later:
+// (1) an operand, starting with OperandToken0, defining which
+//     s# register (D3D10_SB_OPERAND_TYPE_SAMPLER) is being declared.
+//     The indexing dimension for the register must be D3D10_SB_OPERAND_INDEX_DIMENSION_3D, 
+//     and the meaning of the index dimensions are as follows: (s<id>[<lbound>:<ubound>])
+//       1 <id>:     variable ID being declared
+//       2 <lbound>: the lower bound of the range of samplers in the space
+//       3 <ubound>: the upper bound (inclusive) of this range
+//     As opposed to when the s# is used in shader instructions, where the register
+//     must be D3D10_SB_OPERAND_INDEX_DIMENSION_2D, and the meaning of the index 
+//     dimensions are as follows: (s<id>[<idx>]):
+//       1 <id>:  variable ID being used (matches dcl)
+//       2 <idx>: absolute index of sampler within space (may be dynamically indexed)
+// (2) a DWORD indicating the space index.
+//
+// ----------------------------------------------------------------------------
+typedef enum D3D10_SB_SAMPLER_MODE
+{
+    D3D10_SB_SAMPLER_MODE_DEFAULT      = 0,
+    D3D10_SB_SAMPLER_MODE_COMPARISON   = 1,
+    D3D10_SB_SAMPLER_MODE_MONO         = 2,
+} D3D10_SB_SAMPLER_MODE;
+
+#define D3D10_SB_SAMPLER_MODE_MASK  0x00007800
+#define D3D10_SB_SAMPLER_MODE_SHIFT 11
+
+// DECODER MACRO: Find out if a Constant Buffer is going to be indexed or not
+#define DECODE_D3D10_SB_SAMPLER_MODE(OpcodeToken0) ((D3D10_SB_SAMPLER_MODE)(((OpcodeToken0)&D3D10_SB_SAMPLER_MODE_MASK)>>D3D10_SB_SAMPLER_MODE_SHIFT))
+
+// ENCODER MACRO: Generate a resource return type for a component
+#define ENCODE_D3D10_SB_SAMPLER_MODE(SamplerMode) (((SamplerMode)<<D3D10_SB_SAMPLER_MODE_SHIFT)&D3D10_SB_SAMPLER_MODE_MASK)
+
+// ----------------------------------------------------------------------------
+// Input Register Declaration (see separate declarations for Pixel Shaders)
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_INPUT
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 1 operand:
+// (1) Operand, starting with OperandToken0, defining which input
+//     v# register (D3D10_SB_OPERAND_TYPE_INPUT) is being declared, 
+//     including writemask.
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Input Register Declaration w/System Interpreted Value
+// (see separate declarations for Pixel Shaders)
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_INPUT_SIV
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 2 operands:
+// (1) Operand, starting with OperandToken0, defining which input
+//     v# register (D3D10_SB_OPERAND_TYPE_INPUT) is being declared,
+//     including writemask.  For Geometry Shaders, the input is 
+//     v[vertex][attribute], and this declaration is only for which register 
+//     on the attribute axis is being declared.  The vertex axis value must 
+//     be equal to the # of vertices in the current input primitive for the GS
+//     (i.e. 6 for triangle + adjacency).
+// (2) a System Interpreted Value Name (NameToken)
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Input Register Declaration w/System Generated Value
+// (available for all shaders incl. Pixel Shader, no interpolation mode needed)
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_INPUT_SGV
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 2 operands:
+// (1) Operand, starting with OperandToken0, defining which input
+//     v# register (D3D10_SB_OPERAND_TYPE_INPUT) is being declared,
+//     including writemask.
+// (2) a System Generated Value Name (NameToken)
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Pixel Shader Input Register Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_INPUT_PS
+// [14:11] D3D10_SB_INTERPOLATION_MODE
+// [23:15] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 1 operand:
+// (1) Operand, starting with OperandToken0, defining which input
+//     v# register (D3D10_SB_OPERAND_TYPE_INPUT) is being declared,
+//     including writemask.
+//
+// ----------------------------------------------------------------------------
+#define D3D10_SB_INPUT_INTERPOLATION_MODE_MASK  0x00007800
+#define D3D10_SB_INPUT_INTERPOLATION_MODE_SHIFT 11
+
+// DECODER MACRO: Find out interpolation mode for the input register
+#define DECODE_D3D10_SB_INPUT_INTERPOLATION_MODE(OpcodeToken0) ((D3D10_SB_INTERPOLATION_MODE)(((OpcodeToken0)&D3D10_SB_INPUT_INTERPOLATION_MODE_MASK)>>D3D10_SB_INPUT_INTERPOLATION_MODE_SHIFT))
+
+// ENCODER MACRO: Encode interpolation mode for a register.
+#define ENCODE_D3D10_SB_INPUT_INTERPOLATION_MODE(InterpolationMode) (((InterpolationMode)<<D3D10_SB_INPUT_INTERPOLATION_MODE_SHIFT)&D3D10_SB_INPUT_INTERPOLATION_MODE_MASK)
+
+// ----------------------------------------------------------------------------
+// Pixel Shader Input Register Declaration w/System Interpreted Value
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_INPUT_PS_SIV
+// [14:11] D3D10_SB_INTERPOLATION_MODE
+// [23:15] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 2 operands:
+// (1) Operand, starting with OperandToken0, defining which input
+//     v# register (D3D10_SB_OPERAND_TYPE_INPUT) is being declared.
+// (2) a System Interpreted Value Name (NameToken)
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Pixel Shader Input Register Declaration w/System Generated Value
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_INPUT_PS_SGV
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 2 operands:
+// (1) Operand, starting with OperandToken0, defining which input
+//     v# register (D3D10_SB_OPERAND_TYPE_INPUT) is being declared.
+// (2) a System Generated Value Name (NameToken)
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Output Register Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_OUTPUT
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 1 operand:
+// (1) Operand, starting with OperandToken0, defining which
+//     o# register (D3D10_SB_OPERAND_TYPE_OUTPUT) is being declared,
+//     including writemask.
+//     (in Pixel Shader, output can also be one of 
+//     D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH,
+//     D3D11_SB_OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL, or
+//     D3D11_SB_OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL )
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Output Register Declaration w/System Interpreted Value
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_OUTPUT_SIV
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 2 operands:
+// (1) an operand, starting with OperandToken0, defining which
+//     o# register (D3D10_SB_OPERAND_TYPE_OUTPUT) is being declared,
+//     including writemask.
+// (2) a System Interpreted Name token (NameToken)
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Output Register Declaration w/System Generated Value
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_OUTPUT_SGV
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 2 operands:
+// (1) an operand, starting with OperandToken0, defining which
+//     o# register (D3D10_SB_OPERAND_TYPE_OUTPUT) is being declared,
+//     including writemask.
+// (2) a System Generated Name token (NameToken)
+//
+// ----------------------------------------------------------------------------
+
+
+// ----------------------------------------------------------------------------
+// Input or Output Register Indexing Range Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_INDEX_RANGE
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 2 operands:
+// (1) an operand, starting with OperandToken0, defining which
+//     input (v#) or output (o#) register is having its array indexing range
+//     declared, including writemask.  For Geometry Shader inputs, 
+//     it is assumed that the vertex axis is always fully indexable,
+//     and 0 must be specified as the vertex# in this declaration, so that 
+//     only the a range of attributes are having their index range defined.
+//     
+// (2) a DWORD representing the count of registers starting from the one
+//     indicated in (1).
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Temp Register Declaration r0...r(n-1) 
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_TEMPS
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 1 operand:
+// (1) DWORD (unsigned int) indicating how many temps are being declared.  
+//     i.e. 5 means r0...r4 are declared.
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Indexable Temp Register (x#[size]) Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 3 DWORDs:
+// (1) Register index (defines which x# register is declared)
+// (2) Number of registers in this register bank
+// (3) Number of components in the array (1-4). 1 means .x, 2 means .xy etc.
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Constant Buffer Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER
+// [11]    D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN
+// [23:12] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 1 operand on Shader Model 4.0 through 5.0:
+// (1) Operand, starting with OperandToken0, defining which CB slot (cb#[size])
+//     is being declared. (operand type: D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER)
+//     The indexing dimension for the register must be 
+//     D3D10_SB_OPERAND_INDEX_DIMENSION_2D, where the first index specifies
+//     which cb#[] is being declared, and the second (array) index specifies the size 
+//     of the buffer, as a count of 32-bit*4 elements.  (As opposed to when the 
+//     cb#[] is used in shader instructions, and the array index represents which 
+//     location in the constant buffer is being referenced.)
+//     If the size is specified as 0, the CB size is not known (any size CB
+//     can be bound to the slot).
+//
+// The order of constant buffer declarations in a shader indicates their
+// relative priority from highest to lowest (hint to driver).
+// 
+// OpcodeToken0 is followed by 3 operands on Shader Model 5.1 and later:
+// (1) Operand, starting with OperandToken0, defining which CB range (ID and bounds)
+//     is being declared. (operand type: D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER)
+//     The indexing dimension for the register must be D3D10_SB_OPERAND_INDEX_DIMENSION_3D, 
+//     and the meaning of the index dimensions are as follows: (cb<id>[<lbound>:<ubound>])
+//       1 <id>:     variable ID being declared
+//       2 <lbound>: the lower bound of the range of constant buffers in the space
+//       3 <ubound>: the upper bound (inclusive) of this range
+//     As opposed to when the cb#[] is used in shader instructions: (cb<id>[<idx>][<loc>])
+//       1 <id>:  variable ID being used (matches dcl)
+//       2 <idx>: absolute index of constant buffer within space (may be dynamically indexed)
+//       3 <loc>: location of vector within constant buffer being referenced,
+//          which may also be dynamically indexed, with no access pattern flag required.
+// (2) a DWORD indicating the size of the constant buffer as a count of 16-byte vectors.
+//     Each vector is 32-bit*4 elements == 128-bits == 16 bytes.
+//     If the size is specified as 0, the CB size is not known (any size CB
+//     can be bound to the slot).
+// (3) a DWORD indicating the space index.
+//
+// ----------------------------------------------------------------------------
+
+typedef enum D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN
+{
+    D3D10_SB_CONSTANT_BUFFER_IMMEDIATE_INDEXED  = 0,
+    D3D10_SB_CONSTANT_BUFFER_DYNAMIC_INDEXED    = 1
+} D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN;
+
+#define D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN_MASK  0x00000800
+#define D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN_SHIFT 11
+
+// DECODER MACRO: Find out if a Constant Buffer is going to be indexed or not
+#define DECODE_D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN(OpcodeToken0) ((D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN)(((OpcodeToken0)&D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN_MASK)>>D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN_SHIFT))
+
+// ENCODER MACRO: Encode the access pattern for the Constant Buffer
+#define ENCODE_D3D10_SB_D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN(AccessPattern) (((AccessPattern)<<D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN_SHIFT)&D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN_MASK)
+
+// ----------------------------------------------------------------------------
+// Immediate Constant Buffer Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_CUSTOMDATA
+// [31:11] == D3D10_SB_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER
+//
+// OpcodeToken0 is followed by:
+// (1) DWORD indicating length of declaration, including OpcodeToken0.
+//     This length must = 2(for OpcodeToken0 and 1) + a multiple of 4 
+//                                                    (# of immediate constants)
+// (2) Sequence of 4-tuples of DWORDs defining the Immediate Constant Buffer.
+//     The number of 4-tuples is (length above - 1) / 4
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Shader Message Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_CUSTOMDATA
+// [31:11] == D3D11_SB_CUSTOMDATA_SHADER_MESSAGE
+//
+// OpcodeToken0 is followed by:
+// (1) DWORD indicating length of declaration, including OpcodeToken0.
+// (2) DWORD (D3D11_SB_SHADER_MESSAGE_ID) indicating shader message or error.
+// (3) D3D11_SB_SHADER_MESSAGE_FORMAT indicating the convention for formatting the message.
+// (4) DWORD indicating the number of characters in the string without the terminator.
+// (5) DWORD indicating the number of operands.
+// (6) DWORD indicating length of operands.
+// (7) Encoded operands.
+// (8) String with trailing zero, padded to a multiple of DWORDs.
+//     The string is in the given format and the operands given should
+//     be used for argument substitutions when formatting.
+// ----------------------------------------------------------------------------
+
+typedef enum D3D11_SB_SHADER_MESSAGE_ID
+{
+    D3D11_SB_SHADER_MESSAGE_ID_MESSAGE = 0x00200102,
+    D3D11_SB_SHADER_MESSAGE_ID_ERROR = 0x00200103
+} D3D11_SB_SHADER_MESSAGE_ID;
+
+typedef enum D3D11_SB_SHADER_MESSAGE_FORMAT
+{
+    // No formatting, just a text string.  Operands are ignored.
+    D3D11_SB_SHADER_MESSAGE_FORMAT_ANSI_TEXT,
+    // Format string follows C/C++ printf conventions.
+    D3D11_SB_SHADER_MESSAGE_FORMAT_ANSI_PRINTF,
+} D3D11_SB_SHADER_MESSAGE_FORMAT;
+
+// ----------------------------------------------------------------------------
+// Shader Clip Plane Constant Mappings for DX9 hardware
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_CUSTOMDATA
+// [31:11] == D3D11_SB_CUSTOMDATA_SHADER_CLIP_PLANE_CONSTANT_MAPPINGS_FOR_DX9
+//
+// OpcodeToken0 is followed by:
+// (1) DWORD indicating length of declaration, including OpcodeToken0.
+// (2) DWORD indicating number of constant mappings (up to 6 mappings).
+// (3+) Constant mapping tables in following format.
+//
+// struct _Clip_Plane_Constant_Mapping
+// {
+//     WORD ConstantBufferIndex;  // cb[n]
+//     WORD StartConstantElement; // starting index of cb[n][m]
+//     WORD ConstantElemntCount;  // number of elements cb[n][m] ~ cb[n][m+l]
+//     WORD Reserved;             //
+// };
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Geometry Shader Input Primitive Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_GS_INPUT_PRIMITIVE
+// [16:11] D3D10_SB_PRIMITIVE [not D3D10_SB_PRIMITIVE_TOPOLOGY]
+// [23:17] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token. == 1
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// ----------------------------------------------------------------------------
+
+#define D3D10_SB_GS_INPUT_PRIMITIVE_MASK  0x0001f800
+#define D3D10_SB_GS_INPUT_PRIMITIVE_SHIFT 11
+
+// DECODER MACRO: Given a primitive topology declaration,
+// (OpcodeToken0), determine the primitive topology
+// (D3D10_SB_PRIMITIVE enum)
+#define DECODE_D3D10_SB_GS_INPUT_PRIMITIVE(OpcodeToken0) ((D3D10_SB_PRIMITIVE)(((OpcodeToken0)&D3D10_SB_GS_INPUT_PRIMITIVE_MASK)>>D3D10_SB_GS_INPUT_PRIMITIVE_SHIFT))
+
+// ENCODER MACRO: Store primitive topology
+// (D3D10_SB_PRIMITIVE enum) into a
+// a primitive topology declaration token (OpcodeToken0)
+#define ENCODE_D3D10_SB_GS_INPUT_PRIMITIVE(Prim) (((Prim)<<D3D10_SB_GS_INPUT_PRIMITIVE_SHIFT)&D3D10_SB_GS_INPUT_PRIMITIVE_MASK)
+
+// ----------------------------------------------------------------------------
+// Geometry Shader Output Topology Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY
+// [17:11] D3D10_SB_PRIMITIVE_TOPOLOGY
+// [23:18] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token. == 1
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// ----------------------------------------------------------------------------
+
+#define D3D10_SB_GS_OUTPUT_PRIMITIVE_TOPOLOGY_MASK  0x0001f800
+#define D3D10_SB_GS_OUTPUT_PRIMITIVE_TOPOLOGY_SHIFT 11
+
+// DECODER MACRO: Given a primitive topology declaration,
+// (OpcodeToken0), determine the primitive topology
+// (D3D10_SB_PRIMITIVE_TOPOLOGY enum)
+#define DECODE_D3D10_SB_GS_OUTPUT_PRIMITIVE_TOPOLOGY(OpcodeToken0) ((D3D10_SB_PRIMITIVE_TOPOLOGY)(((OpcodeToken0)&D3D10_SB_GS_OUTPUT_PRIMITIVE_TOPOLOGY_MASK)>>D3D10_SB_GS_OUTPUT_PRIMITIVE_TOPOLOGY_SHIFT))
+
+// ENCODER MACRO: Store primitive topology
+// (D3D10_SB_PRIMITIVE_TOPOLOGY enum) into a
+// a primitive topology declaration token (OpcodeToken0)
+#define ENCODE_D3D10_SB_GS_OUTPUT_PRIMITIVE_TOPOLOGY(PrimTopology) (((PrimTopology)<<D3D10_SB_GS_OUTPUT_PRIMITIVE_TOPOLOGY_SHIFT)&D3D10_SB_GS_OUTPUT_PRIMITIVE_TOPOLOGY_MASK)
+
+// ----------------------------------------------------------------------------
+// Geometry Shader Maximum Output Vertex Count Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by a DWORD representing the
+// maximum number of primitives that could be output
+// by the Geometry Shader.
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Geometry Shader Instance Count Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D11_SB_OPCODE_DCL_GS_INSTANCE_COUNT
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by a UINT32 representing the
+// number of instances of the geometry shader program to execute.
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Hull Shader Declaration Phase: HS/DS Input Control Point Count
+//
+// OpcodeToken0:
+//
+// [10:00] D3D11_SB_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT
+// [16:11] Control point count 
+// [23:17] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token. == 1
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// ----------------------------------------------------------------------------
+#define D3D11_SB_INPUT_CONTROL_POINT_COUNT_MASK  0x0001f800
+#define D3D11_SB_INPUT_CONTROL_POINT_COUNT_SHIFT 11
+
+// DECODER MACRO: Given an input control point count declaration token,
+// (OpcodeToken0), determine the control point count
+#define DECODE_D3D11_SB_INPUT_CONTROL_POINT_COUNT(OpcodeToken0) ((UINT)(((OpcodeToken0)&D3D11_SB_INPUT_CONTROL_POINT_COUNT_MASK)>>D3D11_SB_INPUT_CONTROL_POINT_COUNT_SHIFT))
+
+// ENCODER MACRO: Store input control point count into a declaration token
+#define ENCODE_D3D11_SB_INPUT_CONTROL_POINT_COUNT(Count) (((Count)<<D3D11_SB_INPUT_CONTROL_POINT_COUNT_SHIFT)&D3D11_SB_INPUT_CONTROL_POINT_COUNT_MASK)
+
+// ----------------------------------------------------------------------------
+// Hull Shader Declaration Phase: HS Output Control Point Count
+//
+// OpcodeToken0:
+//
+// [10:00] D3D11_SB_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT
+// [16:11] Control point count 
+// [23:17] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token. == 1
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// ----------------------------------------------------------------------------
+#define D3D11_SB_OUTPUT_CONTROL_POINT_COUNT_MASK  0x0001f800
+#define D3D11_SB_OUTPUT_CONTROL_POINT_COUNT_SHIFT 11
+
+// DECODER MACRO: Given an output control point count declaration token,
+// (OpcodeToken0), determine the control point count
+#define DECODE_D3D11_SB_OUTPUT_CONTROL_POINT_COUNT(OpcodeToken0) ((UINT)(((OpcodeToken0)&D3D11_SB_OUTPUT_CONTROL_POINT_COUNT_MASK)>>D3D11_SB_OUTPUT_CONTROL_POINT_COUNT_SHIFT))
+
+// ENCODER MACRO: Store output control point count into a declaration token
+#define ENCODE_D3D11_SB_OUTPUT_CONTROL_POINT_COUNT(Count) (((Count)<<D3D11_SB_OUTPUT_CONTROL_POINT_COUNT_SHIFT)&D3D11_SB_OUTPUT_CONTROL_POINT_COUNT_MASK)
+
+// ----------------------------------------------------------------------------
+// Hull Shader Declaration Phase: Tessellator Domain
+//
+// OpcodeToken0:
+//
+// [10:00] D3D11_SB_OPCODE_DCL_TESS_DOMAIN
+// [12:11] Domain
+// [23:13] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token. == 1
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// ----------------------------------------------------------------------------
+typedef enum D3D11_SB_TESSELLATOR_DOMAIN
+{
+    D3D11_SB_TESSELLATOR_DOMAIN_UNDEFINED = 0,
+    D3D11_SB_TESSELLATOR_DOMAIN_ISOLINE   = 1,
+    D3D11_SB_TESSELLATOR_DOMAIN_TRI       = 2,
+    D3D11_SB_TESSELLATOR_DOMAIN_QUAD      = 3
+} D3D11_SB_TESSELLATOR_DOMAIN;
+
+#define D3D11_SB_TESS_DOMAIN_MASK  0x00001800
+#define D3D11_SB_TESS_DOMAIN_SHIFT 11
+
+// DECODER MACRO: Given a tessellator domain declaration,
+// (OpcodeToken0), determine the domain
+// (D3D11_SB_TESSELLATOR_DOMAIN enum)
+#define DECODE_D3D11_SB_TESS_DOMAIN(OpcodeToken0) ((D3D11_SB_TESSELLATOR_DOMAIN)(((OpcodeToken0)&D3D11_SB_TESS_DOMAIN_MASK)>>D3D11_SB_TESS_DOMAIN_SHIFT))
+
+// ENCODER MACRO: Store tessellator domain
+// (D3D11_SB_TESSELLATOR_DOMAIN enum) into a
+// a tessellator domain declaration token (OpcodeToken0)
+#define ENCODE_D3D11_SB_TESS_DOMAIN(Domain) (((Domain)<<D3D11_SB_TESS_DOMAIN_SHIFT)&D3D11_SB_TESS_DOMAIN_MASK)
+
+// ----------------------------------------------------------------------------
+// Hull Shader Declaration Phase: Tessellator Partitioning
+//
+// OpcodeToken0:
+//
+// [10:00] D3D11_SB_OPCODE_DCL_TESS_PARTITIONING
+// [13:11] Partitioning
+// [23:14] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token. == 1
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// ----------------------------------------------------------------------------
+typedef enum D3D11_SB_TESSELLATOR_PARTITIONING
+{
+    D3D11_SB_TESSELLATOR_PARTITIONING_UNDEFINED       = 0,
+    D3D11_SB_TESSELLATOR_PARTITIONING_INTEGER         = 1,
+    D3D11_SB_TESSELLATOR_PARTITIONING_POW2            = 2,
+    D3D11_SB_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD  = 3,
+    D3D11_SB_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN = 4
+} D3D11_SB_TESSELLATOR_PARTITIONING;
+
+#define D3D11_SB_TESS_PARTITIONING_MASK  0x00003800
+#define D3D11_SB_TESS_PARTITIONING_SHIFT 11
+
+// DECODER MACRO: Given a tessellator partitioning declaration,
+// (OpcodeToken0), determine the domain
+// (D3D11_SB_TESSELLATOR_PARTITIONING enum)
+#define DECODE_D3D11_SB_TESS_PARTITIONING(OpcodeToken0) ((D3D11_SB_TESSELLATOR_PARTITIONING)(((OpcodeToken0)&D3D11_SB_TESS_PARTITIONING_MASK)>>D3D11_SB_TESS_PARTITIONING_SHIFT))
+
+// ENCODER MACRO: Store tessellator partitioning
+// (D3D11_SB_TESSELLATOR_PARTITIONING enum) into a
+// a tessellator partitioning declaration token (OpcodeToken0)
+#define ENCODE_D3D11_SB_TESS_PARTITIONING(Partitioning) (((Partitioning)<<D3D11_SB_TESS_PARTITIONING_SHIFT)&D3D11_SB_TESS_PARTITIONING_MASK)
+
+// ----------------------------------------------------------------------------
+// Hull Shader Declaration Phase: Tessellator Output Primitive
+//
+// OpcodeToken0:
+//
+// [10:00] D3D11_SB_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE
+// [13:11] Output Primitive
+// [23:14] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token. == 1
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// ----------------------------------------------------------------------------
+typedef enum D3D11_SB_TESSELLATOR_OUTPUT_PRIMITIVE
+{
+    D3D11_SB_TESSELLATOR_OUTPUT_UNDEFINED     = 0,
+    D3D11_SB_TESSELLATOR_OUTPUT_POINT         = 1,
+    D3D11_SB_TESSELLATOR_OUTPUT_LINE          = 2,
+    D3D11_SB_TESSELLATOR_OUTPUT_TRIANGLE_CW   = 3,
+    D3D11_SB_TESSELLATOR_OUTPUT_TRIANGLE_CCW  = 4
+} D3D11_SB_TESSELLATOR_OUTPUT_PRIMITIVE;
+
+#define D3D11_SB_TESS_OUTPUT_PRIMITIVE_MASK  0x00003800
+#define D3D11_SB_TESS_OUTPUT_PRIMITIVE_SHIFT 11
+
+// DECODER MACRO: Given a tessellator output primitive declaration,
+// (OpcodeToken0), determine the domain
+// (D3D11_SB_TESSELLATOR_OUTPUT_PRIMITIVE enum)
+#define DECODE_D3D11_SB_TESS_OUTPUT_PRIMITIVE(OpcodeToken0) ((D3D11_SB_TESSELLATOR_OUTPUT_PRIMITIVE)(((OpcodeToken0)&D3D11_SB_TESS_OUTPUT_PRIMITIVE_MASK)>>D3D11_SB_TESS_OUTPUT_PRIMITIVE_SHIFT))
+
+// ENCODER MACRO: Store tessellator output primitive
+// (D3D11_SB_TESSELLATOR_OUTPUT_PRIMITIVE enum) into a
+// a tessellator output primitive declaration token (OpcodeToken0)
+#define ENCODE_D3D11_SB_TESS_OUTPUT_PRIMITIVE(OutputPrimitive) (((OutputPrimitive)<<D3D11_SB_TESS_OUTPUT_PRIMITIVE_SHIFT)&D3D11_SB_TESS_OUTPUT_PRIMITIVE_MASK)
+
+
+// ----------------------------------------------------------------------------
+// Hull Shader Declaration Phase: Hull Shader Max Tessfactor
+//
+// OpcodeToken0:
+//
+// [10:00] D3D11_SB_OPCODE_DCL_HS_MAX_TESSFACTOR
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by a float32 representing the
+// maximum TessFactor.
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Hull Shader Declaration Phase: Hull Shader Fork Phase Instance Count
+//
+// OpcodeToken0:
+//
+// [10:00] D3D11_SB_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by a UINT32 representing the
+// number of instances of the current fork phase program to execute.
+//
+// ----------------------------------------------------------------------------
+
+typedef enum D3D10_SB_INTERPOLATION_MODE
+{
+    D3D10_SB_INTERPOLATION_UNDEFINED = 0,
+    D3D10_SB_INTERPOLATION_CONSTANT = 1,
+    D3D10_SB_INTERPOLATION_LINEAR = 2,
+    D3D10_SB_INTERPOLATION_LINEAR_CENTROID = 3,
+    D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE = 4,
+    D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID = 5,
+    D3D10_SB_INTERPOLATION_LINEAR_SAMPLE = 6, // DX10.1
+    D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE = 7, // DX10.1
+} D3D10_SB_INTERPOLATION_MODE;
+
+// Keep PRIMITIVE_TOPOLOGY values in sync with earlier DX versions (HW consumes values directly).
+typedef enum D3D10_SB_PRIMITIVE_TOPOLOGY
+{
+    D3D10_SB_PRIMITIVE_TOPOLOGY_UNDEFINED = 0,
+    D3D10_SB_PRIMITIVE_TOPOLOGY_POINTLIST = 1,
+    D3D10_SB_PRIMITIVE_TOPOLOGY_LINELIST = 2,
+    D3D10_SB_PRIMITIVE_TOPOLOGY_LINESTRIP = 3,
+    D3D10_SB_PRIMITIVE_TOPOLOGY_TRIANGLELIST = 4,
+    D3D10_SB_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP = 5,
+    // 6 is reserved for legacy triangle fans
+    // Adjacency values should be equal to (0x8 & non-adjacency):
+    D3D10_SB_PRIMITIVE_TOPOLOGY_LINELIST_ADJ = 10,
+    D3D10_SB_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ = 11,
+    D3D10_SB_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ = 12,
+    D3D10_SB_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ = 13,
+} D3D10_SB_PRIMITIVE_TOPOLOGY;
+
+typedef enum D3D10_SB_PRIMITIVE
+{
+    D3D10_SB_PRIMITIVE_UNDEFINED = 0,
+    D3D10_SB_PRIMITIVE_POINT = 1,
+    D3D10_SB_PRIMITIVE_LINE = 2,
+    D3D10_SB_PRIMITIVE_TRIANGLE = 3,
+    // Adjacency values should be equal to (0x4 & non-adjacency):
+    D3D10_SB_PRIMITIVE_LINE_ADJ = 6,
+    D3D10_SB_PRIMITIVE_TRIANGLE_ADJ = 7,
+    D3D11_SB_PRIMITIVE_1_CONTROL_POINT_PATCH = 8,
+    D3D11_SB_PRIMITIVE_2_CONTROL_POINT_PATCH = 9,
+    D3D11_SB_PRIMITIVE_3_CONTROL_POINT_PATCH = 10,
+    D3D11_SB_PRIMITIVE_4_CONTROL_POINT_PATCH = 11,
+    D3D11_SB_PRIMITIVE_5_CONTROL_POINT_PATCH = 12,
+    D3D11_SB_PRIMITIVE_6_CONTROL_POINT_PATCH = 13,
+    D3D11_SB_PRIMITIVE_7_CONTROL_POINT_PATCH = 14,
+    D3D11_SB_PRIMITIVE_8_CONTROL_POINT_PATCH = 15,
+    D3D11_SB_PRIMITIVE_9_CONTROL_POINT_PATCH = 16,
+    D3D11_SB_PRIMITIVE_10_CONTROL_POINT_PATCH = 17,
+    D3D11_SB_PRIMITIVE_11_CONTROL_POINT_PATCH = 18,
+    D3D11_SB_PRIMITIVE_12_CONTROL_POINT_PATCH = 19,
+    D3D11_SB_PRIMITIVE_13_CONTROL_POINT_PATCH = 20,
+    D3D11_SB_PRIMITIVE_14_CONTROL_POINT_PATCH = 21,
+    D3D11_SB_PRIMITIVE_15_CONTROL_POINT_PATCH = 22,
+    D3D11_SB_PRIMITIVE_16_CONTROL_POINT_PATCH = 23,
+    D3D11_SB_PRIMITIVE_17_CONTROL_POINT_PATCH = 24,
+    D3D11_SB_PRIMITIVE_18_CONTROL_POINT_PATCH = 25,
+    D3D11_SB_PRIMITIVE_19_CONTROL_POINT_PATCH = 26,
+    D3D11_SB_PRIMITIVE_20_CONTROL_POINT_PATCH = 27,
+    D3D11_SB_PRIMITIVE_21_CONTROL_POINT_PATCH = 28,
+    D3D11_SB_PRIMITIVE_22_CONTROL_POINT_PATCH = 29,
+    D3D11_SB_PRIMITIVE_23_CONTROL_POINT_PATCH = 30,
+    D3D11_SB_PRIMITIVE_24_CONTROL_POINT_PATCH = 31,
+    D3D11_SB_PRIMITIVE_25_CONTROL_POINT_PATCH = 32,
+    D3D11_SB_PRIMITIVE_26_CONTROL_POINT_PATCH = 33,
+    D3D11_SB_PRIMITIVE_27_CONTROL_POINT_PATCH = 34,
+    D3D11_SB_PRIMITIVE_28_CONTROL_POINT_PATCH = 35,
+    D3D11_SB_PRIMITIVE_29_CONTROL_POINT_PATCH = 36,
+    D3D11_SB_PRIMITIVE_30_CONTROL_POINT_PATCH = 37,
+    D3D11_SB_PRIMITIVE_31_CONTROL_POINT_PATCH = 38,
+    D3D11_SB_PRIMITIVE_32_CONTROL_POINT_PATCH = 39,
+} D3D10_SB_PRIMITIVE;
+
+typedef enum D3D10_SB_COMPONENT_MASK
+{
+    D3D10_SB_COMPONENT_MASK_X = 1,
+    D3D10_SB_COMPONENT_MASK_Y = 2,
+    D3D10_SB_COMPONENT_MASK_Z = 4,
+    D3D10_SB_COMPONENT_MASK_W = 8,
+    D3D10_SB_COMPONENT_MASK_R = 1,
+    D3D10_SB_COMPONENT_MASK_G = 2,
+    D3D10_SB_COMPONENT_MASK_B = 4,
+    D3D10_SB_COMPONENT_MASK_A = 8,
+    D3D10_SB_COMPONENT_MASK_ALL = 15,
+} D3D10_SB_COMPONENT_MASK;
+
+typedef enum D3D10_SB_NAME
+{
+    D3D10_SB_NAME_UNDEFINED = 0,
+    D3D10_SB_NAME_POSITION = 1,
+    D3D10_SB_NAME_CLIP_DISTANCE = 2,
+    D3D10_SB_NAME_CULL_DISTANCE = 3,
+    D3D10_SB_NAME_RENDER_TARGET_ARRAY_INDEX = 4,
+    D3D10_SB_NAME_VIEWPORT_ARRAY_INDEX = 5,
+    D3D10_SB_NAME_VERTEX_ID = 6,
+    D3D10_SB_NAME_PRIMITIVE_ID = 7,
+    D3D10_SB_NAME_INSTANCE_ID = 8,
+    D3D10_SB_NAME_IS_FRONT_FACE = 9,
+    D3D10_SB_NAME_SAMPLE_INDEX = 10,
+    // The following are added for D3D11
+    D3D11_SB_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR = 11, 
+    D3D11_SB_NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR = 12, 
+    D3D11_SB_NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR = 13, 
+    D3D11_SB_NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR = 14, 
+    D3D11_SB_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR = 15, 
+    D3D11_SB_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR = 16, 
+    D3D11_SB_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR = 17, 
+    D3D11_SB_NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR = 18, 
+    D3D11_SB_NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR = 19, 
+    D3D11_SB_NAME_FINAL_TRI_INSIDE_TESSFACTOR = 20, 
+    D3D11_SB_NAME_FINAL_LINE_DETAIL_TESSFACTOR = 21,
+    D3D11_SB_NAME_FINAL_LINE_DENSITY_TESSFACTOR = 22,
+    // The following are added for D3D12
+    D3D12_SB_NAME_BARYCENTRICS = 23,
+    D3D12_SB_NAME_SHADINGRATE = 24,
+    D3D12_SB_NAME_CULLPRIMITIVE = 25,
+} D3D10_SB_NAME;
+
+typedef enum D3D10_SB_RESOURCE_DIMENSION
+{
+    D3D10_SB_RESOURCE_DIMENSION_UNKNOWN = 0,
+    D3D10_SB_RESOURCE_DIMENSION_BUFFER = 1,
+    D3D10_SB_RESOURCE_DIMENSION_TEXTURE1D = 2,
+    D3D10_SB_RESOURCE_DIMENSION_TEXTURE2D = 3,
+    D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DMS = 4,
+    D3D10_SB_RESOURCE_DIMENSION_TEXTURE3D = 5,
+    D3D10_SB_RESOURCE_DIMENSION_TEXTURECUBE = 6,
+    D3D10_SB_RESOURCE_DIMENSION_TEXTURE1DARRAY = 7,
+    D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DARRAY = 8,
+    D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DMSARRAY = 9,
+    D3D10_SB_RESOURCE_DIMENSION_TEXTURECUBEARRAY = 10,
+    D3D11_SB_RESOURCE_DIMENSION_RAW_BUFFER = 11,
+    D3D11_SB_RESOURCE_DIMENSION_STRUCTURED_BUFFER = 12,
+} D3D10_SB_RESOURCE_DIMENSION;
+
+typedef enum D3D10_SB_RESOURCE_RETURN_TYPE
+{
+    D3D10_SB_RETURN_TYPE_UNORM = 1,
+    D3D10_SB_RETURN_TYPE_SNORM = 2,
+    D3D10_SB_RETURN_TYPE_SINT = 3,
+    D3D10_SB_RETURN_TYPE_UINT = 4,
+    D3D10_SB_RETURN_TYPE_FLOAT = 5,
+    D3D10_SB_RETURN_TYPE_MIXED = 6,
+    D3D11_SB_RETURN_TYPE_DOUBLE = 7,
+    D3D11_SB_RETURN_TYPE_CONTINUED = 8,
+    D3D11_SB_RETURN_TYPE_UNUSED = 9,
+} D3D10_SB_RESOURCE_RETURN_TYPE;
+
+typedef enum D3D10_SB_REGISTER_COMPONENT_TYPE
+{
+    D3D10_SB_REGISTER_COMPONENT_UNKNOWN = 0,
+    D3D10_SB_REGISTER_COMPONENT_UINT32 = 1,
+    D3D10_SB_REGISTER_COMPONENT_SINT32 = 2,
+    D3D10_SB_REGISTER_COMPONENT_FLOAT32 = 3
+} D3D10_SB_REGISTER_COMPONENT_TYPE;
+
+typedef enum D3D10_SB_INSTRUCTION_RETURN_TYPE
+{
+    D3D10_SB_INSTRUCTION_RETURN_FLOAT      = 0,
+    D3D10_SB_INSTRUCTION_RETURN_UINT       = 1
+} D3D10_SB_INSTRUCTION_RETURN_TYPE;
+
+#define D3D10_SB_INSTRUCTION_RETURN_TYPE_MASK  0x00001800
+#define D3D10_SB_INSTRUCTION_RETURN_TYPE_SHIFT 11
+
+// DECODER MACRO: For an OpcodeToken0 with the return type 
+// determine the return type.
+#define DECODE_D3D10_SB_INSTRUCTION_RETURN_TYPE(OpcodeToken0) ((D3D10_SB_INSTRUCTION_RETURN_TYPE)(((OpcodeToken0)&D3D10_SB_INSTRUCTION_RETURN_TYPE_MASK)>>D3D10_SB_INSTRUCTION_RETURN_TYPE_SHIFT))
+// ENCODER MACRO: Encode the return type for instructions
+// in the opcode specific control range of OpcodeToken0
+#define ENCODE_D3D10_SB_INSTRUCTION_RETURN_TYPE(ReturnType) (((ReturnType)<<D3D10_SB_INSTRUCTION_RETURN_TYPE_SHIFT)&D3D10_SB_INSTRUCTION_RETURN_TYPE_MASK)
+
+// ----------------------------------------------------------------------------
+// Interface function body Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_FUNCTION_BODY
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  If it is extended, then
+//         it contains the actual instruction length in DWORDs, since
+//         it may not fit into 7 bits if enough operands are defined.
+//
+// OpcodeToken0 is followed by a DWORD that represents the function body
+// identifier.
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Interface function table Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_FUNCTION_TABLE
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  If it is extended, then
+//         it contains the actual instruction length in DWORDs, since
+//         it may not fit into 7 bits if enough functions are defined.
+//
+// OpcodeToken0 is followed by a DWORD that represents the function table
+// identifier and another DWORD (TableLength) that gives the number of
+// functions in the table.
+//
+// This is followed by TableLength DWORDs which are function body indices.
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Interface Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_INTERFACE
+// [11]    1 if the interface is indexed dynamically, 0 otherwise.
+// [23:12] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  If it is extended, then
+//         it contains the actual instruction length in DWORDs, since
+//         it may not fit into 7 bits if enough types are used.
+//
+// OpcodeToken0 is followed by a DWORD that represents the interface
+// identifier. Next is a DWORD that gives the expected function table
+// length. Then another DWORD (OpcodeToken3) with the following layout:
+//
+// [15:00] TableLength, the number of types that implement this interface
+// [31:16] ArrayLength, the number of interfaces that are defined in this array.
+//
+// This is followed by TableLength DWORDs which are function table
+// identifiers, representing possible tables for a given interface.
+//
+// ----------------------------------------------------------------------------
+
+#define D3D11_SB_INTERFACE_INDEXED_BIT_MASK  0x00000800
+#define D3D11_SB_INTERFACE_INDEXED_BIT_SHIFT 11
+
+#define D3D11_SB_INTERFACE_TABLE_LENGTH_MASK  0x0000ffff
+#define D3D11_SB_INTERFACE_TABLE_LENGTH_SHIFT 0
+
+#define D3D11_SB_INTERFACE_ARRAY_LENGTH_MASK  0xffff0000
+#define D3D11_SB_INTERFACE_ARRAY_LENGTH_SHIFT 16
+
+// get/set the indexed bit for an interface definition
+#define DECODE_D3D11_SB_INTERFACE_INDEXED_BIT(OpcodeToken0) ((((OpcodeToken0)&D3D11_SB_INTERFACE_INDEXED_BIT_MASK)>>D3D11_SB_INTERFACE_INDEXED_BIT_SHIFT) ? true : false)
+#define ENCODE_D3D11_SB_INTERFACE_INDEXED_BIT(IndexedBit) (((IndexedBit)<<D3D11_SB_INTERFACE_INDEXED_BIT_SHIFT)&D3D11_SB_INTERFACE_INDEXED_BIT_MASK)
+
+// get/set the table length for an interface definition
+#define DECODE_D3D11_SB_INTERFACE_TABLE_LENGTH(OpcodeToken0) ((UINT)(((OpcodeToken0)&D3D11_SB_INTERFACE_TABLE_LENGTH_MASK)>>D3D11_SB_INTERFACE_TABLE_LENGTH_SHIFT))
+#define ENCODE_D3D11_SB_INTERFACE_TABLE_LENGTH(TableLength) (((TableLength)<<D3D11_SB_INTERFACE_TABLE_LENGTH_SHIFT)&D3D11_SB_INTERFACE_TABLE_LENGTH_MASK)
+
+// get/set the array length for an interface definition
+#define DECODE_D3D11_SB_INTERFACE_ARRAY_LENGTH(OpcodeToken0) ((UINT)(((OpcodeToken0)&D3D11_SB_INTERFACE_ARRAY_LENGTH_MASK)>>D3D11_SB_INTERFACE_ARRAY_LENGTH_SHIFT))
+#define ENCODE_D3D11_SB_INTERFACE_ARRAY_LENGTH(ArrayLength) (((ArrayLength)<<D3D11_SB_INTERFACE_ARRAY_LENGTH_SHIFT)&D3D11_SB_INTERFACE_ARRAY_LENGTH_MASK)
+
+// ----------------------------------------------------------------------------
+// Interface call
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_INTERFACE_CALL
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  If it is extended, then
+//         it contains the actual instruction length in DWORDs, since
+//         it may not fit into 7 bits if enough types are used.
+//
+// OpcodeToken0 is followed by a DWORD that gives the function index to
+// call in the function table specified for the given interface. 
+// Next is the interface operand.
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Thread Group Declaration (Compute Shader)
+//
+// OpcodeToken0:
+//
+// [10:00] D3D11_SB_OPCODE_DCL_THREAD_GROUP
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  If it is extended, then
+//         it contains the actual instruction length in DWORDs, since
+//         it may not fit into 7 bits if enough types are used.
+//
+// OpcodeToken0 is followed by 3 DWORDs, the Thread Group dimensions as UINT32:
+// x, y, z
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Typed Unordered Access View Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED
+// [15:11] D3D10_SB_RESOURCE_DIMENSION
+// [16:16] D3D11_SB_GLOBALLY_COHERENT_ACCESS or 0 (LOCALLY_COHERENT)
+// [17:17] D3D11_SB_RASTERIZER_ORDERED_ACCESS or 0
+// [23:18] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 2 operands on Shader Models 4.0 through 5.0:
+// (1) an operand, starting with OperandToken0, defining which
+//     u# register (D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW) is being declared.
+// (2) a Resource Return Type token (ResourceReturnTypeToken)
+//
+// OpcodeToken0 is followed by 3 operands on Shader Model 5.1 and later:
+// (1) an operand, starting with OperandToken0, defining which
+//     u# register (D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW) is being declared.
+//     The indexing dimension for the register must be D3D10_SB_OPERAND_INDEX_DIMENSION_3D, 
+//     and the meaning of the index dimensions are as follows: (u<id>[<lbound>:<ubound>])
+//       1 <id>:     variable ID being declared
+//       2 <lbound>: the lower bound of the range of UAV's in the space
+//       3 <ubound>: the upper bound (inclusive) of this range
+//     As opposed to when the u# is used in shader instructions, where the register
+//     must be D3D10_SB_OPERAND_INDEX_DIMENSION_2D, and the meaning of the index 
+//     dimensions are as follows: (u<id>[<idx>]):
+//       1 <id>:  variable ID being used (matches dcl)
+//       2 <idx>: absolute index of uav within space (may be dynamically indexed)
+// (2) a Resource Return Type token (ResourceReturnTypeToken)
+// (3) a DWORD indicating the space index.
+//
+// ----------------------------------------------------------------------------
+// UAV access scope flags
+#define D3D11_SB_GLOBALLY_COHERENT_ACCESS 0x00010000
+#define D3D11_SB_ACCESS_COHERENCY_MASK    0x00010000
+
+// DECODER MACRO: Retrieve flags for sync instruction from OpcodeToken0.
+#define DECODE_D3D11_SB_ACCESS_COHERENCY_FLAGS(OperandToken0) ((OperandToken0)&D3D11_SB_ACCESS_COHERENCY_MASK)
+
+// ENCODER MACRO: Given a set of sync instruciton flags, encode them in OpcodeToken0.
+#define ENCODE_D3D11_SB_ACCESS_COHERENCY_FLAGS(Flags) ((Flags)&D3D11_SB_ACCESS_COHERENCY_MASK)
+
+// Additional UAV access flags
+#define D3D11_SB_RASTERIZER_ORDERED_ACCESS 0x00020000
+
+// Resource flags mask.  Use to retrieve all resource flags, including the order preserving counter.
+#define D3D11_SB_RESOURCE_FLAGS_MASK    (D3D11_SB_GLOBALLY_COHERENT_ACCESS|D3D11_SB_RASTERIZER_ORDERED_ACCESS|D3D11_SB_UAV_HAS_ORDER_PRESERVING_COUNTER)
+
+// DECODER MACRO: Retrieve UAV access flags for from OpcodeToken0.
+#define DECODE_D3D11_SB_RESOURCE_FLAGS(OperandToken0) ((OperandToken0)&D3D11_SB_RESOURCE_FLAGS_MASK)
+
+// ENCODER MACRO: Given UAV access flags, encode them in OpcodeToken0.
+#define ENCODE_D3D11_SB_RESOURCE_FLAGS(Flags) ((Flags)&D3D11_SB_RESOURCE_FLAGS_MASK)
+
+// ----------------------------------------------------------------------------
+// Raw Unordered Access View Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW
+// [15:11] Ignored, 0
+// [16:16] D3D11_SB_GLOBALLY_COHERENT_ACCESS or 0 (LOCALLY_COHERENT)
+// [17:17] D3D11_SB_RASTERIZER_ORDERED_ACCESS or 0
+// [23:18] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 1 operand on Shader Models 4.0 through 5.0:
+// (1) an operand, starting with OperandToken0, defining which
+//     u# register (D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW) is being declared.
+//
+// OpcodeToken0 is followed by 2 operands on Shader Model 5.1 and later:
+// (1) an operand, starting with OperandToken0, defining which
+//     u# register (D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW) is being declared.
+//     The indexing dimension for the register must be D3D10_SB_OPERAND_INDEX_DIMENSION_3D, 
+//     and the meaning of the index dimensions are as follows: (u<id>[<lbound>:<ubound>])
+//       1 <id>:     variable ID being declared
+//       2 <lbound>: the lower bound of the range of UAV's in the space
+//       3 <ubound>: the upper bound (inclusive) of this range
+//     As opposed to when the u# is used in shader instructions, where the register
+//     must be D3D10_SB_OPERAND_INDEX_DIMENSION_2D, and the meaning of the index 
+//     dimensions are as follows: (u<id>[<idx>]):
+//       1 <id>:  variable ID being used (matches dcl)
+//       2 <idx>: absolute index of uav within space (may be dynamically indexed)
+// (2) a DWORD indicating the space index.
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Structured Unordered Access View Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED
+// [15:11] Ignored, 0
+// [16:16] D3D11_SB_GLOBALLY_COHERENT_ACCESS or 0 (LOCALLY_COHERENT)
+// [17:17] D3D11_SB_RASTERIZER_ORDERED_ACCESS or 0
+// [22:18] Ignored, 0
+// [23:23] D3D11_SB_UAV_HAS_ORDER_PRESERVING_COUNTER or 0
+//
+//            The presence of this flag means that if a UAV is bound to the
+//            corresponding slot, it must have been created with 
+//            D3D11_BUFFER_UAV_FLAG_COUNTER at the API.  Also, the shader
+//            can contain either imm_atomic_alloc or _consume instructions
+//            operating on the given UAV.
+// 
+//            If this flag is not present, the shader can still contain
+//            either imm_atomic_alloc or imm_atomic_consume instructions for
+//            this UAV.  But if such instructions are present in this case,
+//            and a UAV is bound corresponding slot, it must have been created 
+//            with the D3D11_BUFFER_UAV_FLAG_APPEND flag at the API.
+//            Append buffers have a counter as well, but values returned 
+//            to the shader are only valid for the lifetime of the shader 
+//            invocation.
+//
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 2 operands:
+// (1) an operand, starting with OperandToken0, defining which
+//     u# register (D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW) is 
+//     being declared.
+// (2) a DWORD indicating UINT32 byte stride
+//
+// OpcodeToken0 is followed by 3 operands on Shader Model 5.1 and later:
+// (1) an operand, starting with OperandToken0, defining which
+//     u# register (D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW) is being declared.
+//     The indexing dimension for the register must be D3D10_SB_OPERAND_INDEX_DIMENSION_3D, 
+//     and the meaning of the index dimensions are as follows: (u<id>[<lbound>:<ubound>])
+//       1 <id>:     variable ID being declared
+//       2 <lbound>: the lower bound of the range of UAV's in the space
+//       3 <ubound>: the upper bound (inclusive) of this range
+//     As opposed to when the u# is used in shader instructions, where the register
+//     must be D3D10_SB_OPERAND_INDEX_DIMENSION_2D, and the meaning of the index 
+//     dimensions are as follows: (u<id>[<idx>]):
+//       1 <id>:  variable ID being used (matches dcl)
+//       2 <idx>: absolute index of uav within space (may be dynamically indexed)
+// (2) a DWORD indicating UINT32 byte stride
+// (3) a DWORD indicating the space index.
+//
+// ----------------------------------------------------------------------------
+// UAV flags
+#define D3D11_SB_UAV_HAS_ORDER_PRESERVING_COUNTER 0x00800000
+#define D3D11_SB_UAV_FLAGS_MASK                   0x00800000
+
+// DECODER MACRO: Retrieve flags about UAV from OpcodeToken0.
+#define DECODE_D3D11_SB_UAV_FLAGS(OperandToken0) ((OperandToken0)&D3D11_SB_UAV_FLAGS_MASK)
+
+// ENCODER MACRO: Given a set of UAV flags, encode them in OpcodeToken0.
+#define ENCODE_D3D11_SB_UAV_FLAGS(Flags) ((Flags)&D3D11_SB_UAV_FLAGS_MASK)
+
+// ----------------------------------------------------------------------------
+// Raw Thread Group Shared Memory Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D11_SB_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 2 operands:
+// (1) an operand, starting with OperandToken0, defining which
+//     g# register (D3D11_SB_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) is being declared.
+// (2) a DWORD indicating the byte count, which must be a multiple of 4.
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Structured Thread Group Shared Memory Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D11_SB_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 3 operands:
+// (1) an operand, starting with OperandToken0, defining which
+//     g# register (D3D11_SB_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) is 
+//     being declared.
+// (2) a DWORD indicating UINT32 struct byte stride
+// (3) a DWORD indicating UINT32 struct count
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Raw Shader Resource View Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D11_SB_OPCODE_DCL_RESOURCE_RAW
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 1 operand:
+// (1) an operand, starting with OperandToken0, defining which
+//     t# register (D3D10_SB_OPERAND_TYPE_RESOURCE) is being declared.
+//
+// OpcodeToken0 is followed by 2 operands on Shader Model 5.1 and later:
+// (1) an operand, starting with OperandToken0, defining which
+//     t# register (D3D10_SB_OPERAND_TYPE_RESOURCE) is being declared.
+//     The indexing dimension for the register must be D3D10_SB_OPERAND_INDEX_DIMENSION_3D, 
+//     and the meaning of the index dimensions are as follows: (t<id>[<lbound>:<ubound>])
+//       1 <id>:     variable ID being declared
+//       2 <lbound>: the lower bound of the range of resources in the space
+//       3 <ubound>: the upper bound (inclusive) of this range
+//     As opposed to when the t# is used in shader instructions, where the register
+//     must be D3D10_SB_OPERAND_INDEX_DIMENSION_2D, and the meaning of the index 
+//     dimensions are as follows: (t<id>[<idx>]):
+//       1 <id>:  variable ID being used (matches dcl)
+//       2 <idx>: absolute index of resource within space (may be dynamically indexed)
+// (2) a DWORD indicating the space index.
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Structured Shader Resource View Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D11_SB_OPCODE_DCL_RESOURCE_STRUCTURED
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 2 operands:
+// (1) an operand, starting with OperandToken0, defining which
+//     g# register (D3D10_SB_OPERAND_TYPE_RESOURCE) is 
+//     being declared.
+// (2) a DWORD indicating UINT32 struct byte stride
+//
+// OpcodeToken0 is followed by 3 operands on Shader Model 5.1 and later:
+// (1) an operand, starting with OperandToken0, defining which
+//     t# register (D3D10_SB_OPERAND_TYPE_RESOURCE) is being declared.
+//     The indexing dimension for the register must be D3D10_SB_OPERAND_INDEX_DIMENSION_3D, 
+//     and the meaning of the index dimensions are as follows: (t<id>[<lbound>:<ubound>])
+//       1 <id>:     variable ID being declared
+//       2 <lbound>: the lower bound of the range of resources in the space
+//       3 <ubound>: the upper bound (inclusive) of this range
+//     As opposed to when the t# is used in shader instructions, where the register
+//     must be D3D10_SB_OPERAND_INDEX_DIMENSION_2D, and the meaning of the index 
+//     dimensions are as follows: (t<id>[<idx>]):
+//       1 <id>:  variable ID being used (matches dcl)
+//       2 <idx>: absolute index of resource within space (may be dynamically indexed)
+// (2) a DWORD indicating UINT32 struct byte stride
+// (3) a DWORD indicating the space index.
+//
+// ----------------------------------------------------------------------------
+
+#endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_GAMES) */
+#pragma endregion

+ 5 - 0
include/dxc/Test/DxcTestUtils.h

@@ -60,6 +60,9 @@ public:
   int Run();
 };
 
+// wstring because most uses need UTF-16: IDxcResult output names, include handler
+typedef std::map<std::wstring, CComPtr<IDxcBlob>> FileMap;
+
 // The result of running a single command in a run pipeline
 struct FileRunCommandResult {
   CComPtr<IDxcOperationResult> OpResult; // The operation result, if any.
@@ -109,6 +112,7 @@ public:
   std::string Command;      // Command to run, eg %dxc
   std::string Arguments;    // Arguments to command
   LPCWSTR CommandFileName;  // File name replacement for %s
+  FileMap *pVFS = nullptr;  // Files in virtual file system
 
 private:
   FileRunCommandResult RunFileChecker(const FileRunCommandResult *Prior, LPCWSTR dumpName = nullptr);
@@ -117,6 +121,7 @@ private:
   FileRunCommandResult RunOpt(dxc::DxcDllSupport &DllSupport, const FileRunCommandResult *Prior);
   FileRunCommandResult RunD3DReflect(dxc::DxcDllSupport &DllSupport, const FileRunCommandResult *Prior);
   FileRunCommandResult RunDxr(dxc::DxcDllSupport &DllSupport, const FileRunCommandResult *Prior);
+  FileRunCommandResult RunLink(dxc::DxcDllSupport &DllSupport, const FileRunCommandResult *Prior);
   FileRunCommandResult RunTee(const FileRunCommandResult *Prior);
   FileRunCommandResult RunXFail(const FileRunCommandResult *Prior);
   FileRunCommandResult RunDxilVer(dxc::DxcDllSupport& DllSupport, const FileRunCommandResult* Prior);

+ 19 - 1
include/dxc/dxcapi.h

@@ -511,6 +511,17 @@ struct IDxcValidator : public IUnknown {
     ) = 0;
 };
 
+CROSS_PLATFORM_UUIDOF(IDxcValidator2, "458e1fd1-b1b2-4750-a6e1-9c10f03bed92")
+struct IDxcValidator2 : public IDxcValidator {
+  // Validate a shader.
+  virtual HRESULT STDMETHODCALLTYPE ValidateWithDebug(
+    _In_ IDxcBlob *pShader,                       // Shader to validate.
+    _In_ UINT32 Flags,                            // Validation flags.
+    _In_opt_ DxcBuffer *pOptDebugBitcode,         // Optional debug module bitcode to provide line numbers
+    _COM_Outptr_ IDxcOperationResult **ppResult   // Validation output status, buffer, and errors
+    ) = 0;
+};
+
 CROSS_PLATFORM_UUIDOF(IDxcContainerBuilder, "334b1f50-2292-4b35-99a1-25588d8c17fe")
 struct IDxcContainerBuilder : public IUnknown {
   virtual HRESULT STDMETHODCALLTYPE Load(_In_ IDxcBlob *pDxilContainerHeader) = 0;                // Loads DxilContainer to the builder
@@ -576,12 +587,17 @@ struct IDxcVersionInfo2 : public IDxcVersionInfo {
 };
 
 CROSS_PLATFORM_UUIDOF(IDxcVersionInfo3, "5e13e843-9d25-473c-9ad2-03b2d0b44b1e")
-struct IDxcVersionInfo3 : public IDxcVersionInfo2 {
+struct IDxcVersionInfo3 : public IUnknown {
   virtual HRESULT STDMETHODCALLTYPE GetCustomVersionString(
     _Outptr_result_z_ char **pVersionString // Custom version string for compiler. (Must be CoTaskMemFree()'d!)
   ) = 0;
 };
 
+struct DxcArgPair {
+  const WCHAR *pName;
+  const WCHAR *pValue;
+};
+
 CROSS_PLATFORM_UUIDOF(IDxcPdbUtils, "E6C9647E-9D6A-4C3B-B94C-524B5A6C343D")
 struct IDxcPdbUtils : public IUnknown {
   virtual HRESULT STDMETHODCALLTYPE Load(_In_ IDxcBlob *pPdbOrDxil) = 0;
@@ -616,6 +632,8 @@ struct IDxcPdbUtils : public IUnknown {
 
   virtual HRESULT STDMETHODCALLTYPE SetCompiler(_In_ IDxcCompiler3 *pCompiler) = 0;
   virtual HRESULT STDMETHODCALLTYPE CompileForFullPDB(_COM_Outptr_ IDxcResult **ppResult) = 0;
+  virtual HRESULT STDMETHODCALLTYPE OverrideArgs(_In_ DxcArgPair *pArgPairs, UINT32 uNumArgPairs) = 0;
+  virtual HRESULT STDMETHODCALLTYPE OverrideRootSignature(_In_ const WCHAR *pRootSignature) = 0;
 };
 
 // Note: __declspec(selectany) requires 'extern'

+ 29 - 0
include/dxc/dxcerrors.h

@@ -0,0 +1,29 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// dxcerror.h                                                                //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Provides definition of error codes.                                        //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef __DXC_ERRORS__
+#define __DXC_ERRORS__
+
+#ifndef FACILITY_GRAPHICS
+#define FACILITY_GRAPHICS 36
+#endif
+
+#define DXC_EXCEPTION_CODE(name, status)                                 \
+    static constexpr DWORD EXCEPTION_##name =                 \
+    (0xc0000000u | (FACILITY_GRAPHICS << 16) | (0xff00u | (status & 0xffu)));
+
+DXC_EXCEPTION_CODE(LOAD_LIBRARY_FAILED, 0x00u)
+DXC_EXCEPTION_CODE(NO_HMODULE,          0x01u)
+DXC_EXCEPTION_CODE(GET_PROC_FAILED,     0x02u)
+
+#undef DXC_EXCEPTION_CODE
+
+#endif

+ 1 - 0
include/llvm/ADT/StringRef.h

@@ -64,6 +64,7 @@ namespace llvm {
 
     /// Construct an empty string ref.
     /*implicit*/ StringRef() : Data(nullptr), Length(0) {}
+    StringRef(std::nullptr_t) = delete; // HLSL Change - So we don't accidentally pass `false` again
 
     /// Construct a string ref from a cstring.
     /*implicit*/ StringRef(const char *Str)

+ 2 - 0
include/llvm/IR/BasicBlock.h

@@ -245,6 +245,8 @@ public:
   inline const Instruction       &back() const { return InstList.back();  }
   inline       Instruction       &back()       { return InstList.back();  }
 
+  size_t compute_size_no_dbg() const; // HLSL Change - Get the size of the block without the debug insts
+
   /// \brief Return the underlying instruction list container.
   ///
   /// Currently you need to access the underlying instruction list container

+ 1 - 0
include/llvm/IR/DebugInfo.h

@@ -60,6 +60,7 @@ bool stripDebugInfo(Function &F);
 
 /// \brief Return Debug Info Metadata Version by checking module flags.
 unsigned getDebugMetadataVersionFromModule(const Module &M);
+bool hasDebugInfo(const Module &M); // HLSL Change - Helper function to check if there's real debug info (variables, types)
 
 /// \brief Utility to find all debug info in a module.
 ///

+ 2 - 0
include/llvm/Option/OptTable.h

@@ -133,6 +133,8 @@ public:
                    unsigned FlagsToInclude = 0,
                    unsigned FlagsToExclude = 0) const;
 
+  Option findOption(const char *normalizedName, unsigned FlagsToInclude = 0, unsigned FlagsToExclude = 0) const; // HLSL Change
+
   /// \brief Parse an list of arguments into an InputArgList.
   ///
   /// The resulting InputArgList will reference the strings in [\p ArgBegin,

+ 7 - 6
include/llvm/Support/Casting.h

@@ -16,6 +16,7 @@
 #define LLVM_SUPPORT_CASTING_H
 
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/type_traits.h"
 #include <cassert>
 
@@ -221,21 +222,21 @@ template <class X, class Y>
 inline typename std::enable_if<!is_simple_type<Y>::value,
                                typename cast_retty<X, const Y>::ret_type>::type
 cast(const Y &Val) {
-  assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!");
+  llvm_cast_assert(X, Val); // HLSL change
   return cast_convert_val<
       X, const Y, typename simplify_type<const Y>::SimpleType>::doit(Val);
 }
 
 template <class X, class Y>
 inline typename cast_retty<X, Y>::ret_type cast(Y &Val) {
-  assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!");
+  llvm_cast_assert(X, Val); // HLSL change
   return cast_convert_val<X, Y,
                           typename simplify_type<Y>::SimpleType>::doit(Val);
 }
 
 template <class X, class Y>
 inline typename cast_retty<X, Y *>::ret_type cast(Y *Val) {
-  assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!");
+  llvm_cast_assert(X, Val); // HLSL change
   return cast_convert_val<X, Y*,
                           typename simplify_type<Y*>::SimpleType>::doit(Val);
 }
@@ -249,7 +250,7 @@ LLVM_ATTRIBUTE_UNUSED_RESULT inline typename std::enable_if<
 cast_or_null(const Y &Val) {
   if (!Val)
     return nullptr;
-  assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!");
+  llvm_cast_assert(X, Val); // HLSL change
   return cast<X>(Val);
 }
 
@@ -259,7 +260,7 @@ LLVM_ATTRIBUTE_UNUSED_RESULT inline typename std::enable_if<
 cast_or_null(Y &Val) {
   if (!Val)
     return nullptr;
-  assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!");
+  llvm_cast_assert(X, Val); // HLSL change
   return cast<X>(Val);
 }
 
@@ -267,7 +268,7 @@ template <class X, class Y>
 LLVM_ATTRIBUTE_UNUSED_RESULT inline typename cast_retty<X, Y *>::ret_type
 cast_or_null(Y *Val) {
   if (!Val) return nullptr;
-  assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!");
+  llvm_cast_assert(X, Val); // HLSL change
   return cast<X>(Val);
 }
 

+ 7 - 1
include/llvm/Support/ErrorHandling.h

@@ -84,6 +84,9 @@ namespace llvm {
   LLVM_ATTRIBUTE_NORETURN void
   llvm_unreachable_internal(const char *msg=nullptr, const char *file=nullptr,
                             unsigned line=0);
+
+  // HLSL Change - throw special exception for cast mismatch
+  void llvm_cast_assert_internal(const char *func);
 }
 
 /// Marks that the current location is not supposed to be reachable.
@@ -94,7 +97,7 @@ namespace llvm {
 ///
 /// Use this instead of assert(0).  It conveys intent more clearly and
 /// allows compilers to omit some unnecessary code.
-#ifndef NDEBUG
+#if 1 // HLSL Change - always throw exception with message for unreachable
 #define llvm_unreachable(msg) \
   ::llvm::llvm_unreachable_internal(msg, __FILE__, __LINE__)
 #elif defined(LLVM_BUILTIN_UNREACHABLE)
@@ -103,4 +106,7 @@ namespace llvm {
 #define llvm_unreachable(msg) ::llvm::llvm_unreachable_internal()
 #endif
 
+// HLSL Change - throw special exception for cast type mismatch
+#define llvm_cast_assert(X, Val) ((void)( (!!(isa<X>(Val))) || (::llvm::llvm_cast_assert_internal(__FUNCTION__), 0) ))
+
 #endif

+ 184 - 21
lib/DXIL/DxilMetadataHelper.cpp

@@ -41,6 +41,37 @@ using std::string;
 using std::vector;
 using std::unique_ptr;
 
+namespace {
+void LoadSerializedRootSignature(MDNode *pNode,
+                                 std::vector<uint8_t> &SerializedRootSignature,
+                                 LLVMContext &Ctx) {
+  IFTBOOL(pNode->getNumOperands() == 1, DXC_E_INCORRECT_DXIL_METADATA);
+  const MDOperand &MDO = pNode->getOperand(0);
+
+  const ConstantAsMetadata *pMetaData = dyn_cast<ConstantAsMetadata>(MDO.get());
+  IFTBOOL(pMetaData != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
+  const ConstantDataArray *pData =
+      dyn_cast<ConstantDataArray>(pMetaData->getValue());
+  IFTBOOL(pData != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
+  IFTBOOL(pData->getElementType() == Type::getInt8Ty(Ctx),
+          DXC_E_INCORRECT_DXIL_METADATA);
+
+  SerializedRootSignature.assign(pData->getRawDataValues().begin(),
+                                 pData->getRawDataValues().end());
+}
+
+MDNode *
+EmitSerializedRootSignature(const std::vector<uint8_t> &SerializedRootSignature,
+                            LLVMContext &Ctx) {
+  if (SerializedRootSignature.empty())
+    return nullptr;
+  Constant *V = llvm::ConstantDataArray::get(
+      Ctx, llvm::ArrayRef<uint8_t>(SerializedRootSignature.data(),
+                                   SerializedRootSignature.size()));
+  return MDNode::get(Ctx, {ConstantAsMetadata::get(V)});
+}
+
+} // namespace
 
 namespace hlsl {
 
@@ -57,6 +88,7 @@ const char DxilMDHelper::kDxilTempAllocaMDName[]                      = "dx.temp
 const char DxilMDHelper::kDxilNonUniformAttributeMDName[]             = "dx.nonuniform";
 const char DxilMDHelper::kHLDxilResourceAttributeMDName[]             = "dx.hl.resource.attribute";
 const char DxilMDHelper::kDxilValidatorVersionMDName[]                = "dx.valver";
+const char DxilMDHelper::kDxilDxrPayloadAnnotationsMDName[]           = "dx.dxrPayloadAnnotations";
 
 // This named metadata is not valid in final module (should be moved to DxilContainer)
 const char DxilMDHelper::kDxilRootSignatureMDName[]                   = "dx.rootSignature";
@@ -77,7 +109,7 @@ const char DxilMDHelper::kDxilSourceArgsOldMDName[]                   = "llvm.db
 // This is reflection-only metadata
 const char DxilMDHelper::kDxilCountersMDName[]                        = "dx.counters";
 
-static std::array<const char *, 7> DxilMDNames = { {
+static std::array<const char *, 8> DxilMDNames = { {
   DxilMDHelper::kDxilVersionMDName,
   DxilMDHelper::kDxilShaderModelMDName,
   DxilMDHelper::kDxilEntryPointsMDName,
@@ -85,6 +117,7 @@ static std::array<const char *, 7> DxilMDNames = { {
   DxilMDHelper::kDxilTypeSystemMDName,
   DxilMDHelper::kDxilValidatorVersionMDName,
   DxilMDHelper::kDxilViewIdStateMDName,
+  DxilMDHelper::kDxilDxrPayloadAnnotationsMDName,
 }};
 
 DxilMDHelper::DxilMDHelper(Module *pModule, std::unique_ptr<ExtraPropertyHelper> EPH)
@@ -384,14 +417,12 @@ void DxilMDHelper::EmitRootSignature(
     return;
   }
 
-  Constant *V = llvm::ConstantDataArray::get(
-      m_Ctx, llvm::ArrayRef<uint8_t>(SerializedRootSignature.data(),
-                                     SerializedRootSignature.size()));
+  MDNode *Node = EmitSerializedRootSignature(SerializedRootSignature, m_Ctx);
 
   NamedMDNode *pRootSignatureNamedMD = m_pModule->getNamedMetadata(kDxilRootSignatureMDName);
   IFTBOOL(pRootSignatureNamedMD == nullptr, DXC_E_INCORRECT_DXIL_METADATA);
   pRootSignatureNamedMD = m_pModule->getOrInsertNamedMetadata(kDxilRootSignatureMDName);
-  pRootSignatureNamedMD->addOperand(MDNode::get(m_Ctx, {ConstantAsMetadata::get(V)}));
+  pRootSignatureNamedMD->addOperand(Node);
   return ;
 }
 
@@ -445,22 +476,7 @@ void DxilMDHelper::LoadRootSignature(std::vector<uint8_t> &SerializedRootSignatu
   IFTBOOL(pRootSignatureNamedMD->getNumOperands() == 1, DXC_E_INCORRECT_DXIL_METADATA);
 
   MDNode *pNode = pRootSignatureNamedMD->getOperand(0);
-  IFTBOOL(pNode->getNumOperands() == 1, DXC_E_INCORRECT_DXIL_METADATA);
-  const MDOperand &MDO = pNode->getOperand(0);
-
-  const ConstantAsMetadata *pMetaData = dyn_cast<ConstantAsMetadata>(MDO.get());
-  IFTBOOL(pMetaData != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
-  const ConstantDataArray *pData =
-      dyn_cast<ConstantDataArray>(pMetaData->getValue());
-  IFTBOOL(pData != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
-  IFTBOOL(pData->getElementType() == Type::getInt8Ty(m_Ctx),
-          DXC_E_INCORRECT_DXIL_METADATA);
-
-  SerializedRootSignature.clear();
-  unsigned size = pData->getRawDataValues().size();
-  SerializedRootSignature.resize(size);
-  memcpy(SerializedRootSignature.data(),
-         (const uint8_t *)pData->getRawDataValues().begin(), size);
+  LoadSerializedRootSignature(pNode, SerializedRootSignature, m_Ctx);
 }
 
 static const MDTuple *CastToTupleOrNull(const MDOperand &MDO) {
@@ -850,6 +866,130 @@ void DxilMDHelper::LoadDxilTypeSystem(DxilTypeSystem &TypeSystem) {
   }
 }
 
+void DxilMDHelper::EmitDxrPayloadAnnotations(DxilTypeSystem &TypeSystem) {
+  auto &TypeMap = TypeSystem.GetPayloadAnnotationMap();
+  vector<Metadata *> MDVals;
+  MDVals.emplace_back(Uint32ToConstMD(kDxilPayloadAnnotationStructTag)); // Tag
+  unsigned GVIdx = 0;
+  for (auto it = TypeMap.begin(); it != TypeMap.end(); ++it, GVIdx++) {
+    StructType *pStructType = const_cast<StructType *>(it->first);
+    DxilPayloadAnnotation *pA = it->second.get();
+    // Emit struct type field annotations.
+    Metadata *pMD = EmitDxrPayloadStructAnnotation(*pA);
+
+    MDVals.push_back(ValueAsMetadata::get(UndefValue::get(pStructType)));
+    MDVals.push_back(pMD);
+  }
+
+  NamedMDNode *pDxrPayloadAnnotationsMD = m_pModule->getNamedMetadata(kDxilDxrPayloadAnnotationsMDName);
+  if (pDxrPayloadAnnotationsMD != nullptr) {
+    m_pModule->eraseNamedMetadata(pDxrPayloadAnnotationsMD);
+  }
+
+  if (MDVals.size() > 1) {
+    pDxrPayloadAnnotationsMD = m_pModule->getOrInsertNamedMetadata(kDxilDxrPayloadAnnotationsMDName);
+    pDxrPayloadAnnotationsMD->addOperand(MDNode::get(m_Ctx, MDVals));
+  }
+}
+
+Metadata *
+DxilMDHelper::EmitDxrPayloadStructAnnotation(const DxilPayloadAnnotation &SA) {
+  vector<Metadata *> MDVals;
+  MDVals.reserve(SA.GetNumFields());
+  MDVals.resize(SA.GetNumFields());
+
+  const StructType* STy = SA.GetStructType();
+  for (unsigned i = 0; i < SA.GetNumFields(); i++) {
+    MDVals[i] = EmitDxrPayloadFieldAnnotation(SA.GetFieldAnnotation(i), STy->getElementType(i));
+  }
+
+  return MDNode::get(m_Ctx, MDVals);
+}
+
+void DxilMDHelper::LoadDxrPayloadAccessQualifiers(const MDOperand &MDO,
+                                               DxilPayloadFieldAnnotation &FA) {
+  unsigned fieldBitmask = ConstMDToInt32(MDO);
+  if (fieldBitmask & ~DXIL::PayloadAccessQualifierValidMask) {
+    DXASSERT(false, "Unknown payload access qualifier bits set");
+    m_bExtraMetadata = true;
+  }
+  fieldBitmask &= DXIL::PayloadAccessQualifierValidMask;
+  FA.SetPayloadFieldQualifierMask(fieldBitmask);
+}
+
+void DxilMDHelper::LoadDxrPayloadFieldAnnoation(
+    const MDOperand &MDO, DxilPayloadFieldAnnotation &FA) {
+  IFTBOOL(MDO.get() != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
+  const MDTuple *pTupleMD = dyn_cast<MDTuple>(MDO.get()); // Tag-Value list.
+  IFTBOOL(pTupleMD != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
+  IFTBOOL((pTupleMD->getNumOperands() & 0x1) == 0, DXC_E_INCORRECT_DXIL_METADATA);
+
+  for (unsigned i = 0; i < pTupleMD->getNumOperands(); i += 2) {
+    unsigned Tag = ConstMDToUint32(pTupleMD->getOperand(i));
+    const MDOperand &MDO = pTupleMD->getOperand(i + 1);
+    IFTBOOL(MDO.get() != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
+
+    switch (Tag) {
+    case kDxilPayloadFieldAnnotationAccessTag:
+      LoadDxrPayloadAccessQualifiers(MDO, FA);
+      break;
+    default:
+      DXASSERT(false, "Unknown payload field annotation tag");
+      m_bExtraMetadata = true;
+      break;
+    }
+  }
+}
+
+void DxilMDHelper::LoadDxrPayloadFieldAnnoations(const MDOperand &MDO,
+                                                DxilPayloadAnnotation &SA) {
+  IFTBOOL(MDO.get() != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
+  const MDTuple *pTupleMD = dyn_cast<MDTuple>(MDO.get());
+  IFTBOOL(pTupleMD != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
+  IFTBOOL(pTupleMD->getNumOperands() == SA.GetNumFields(),
+          DXC_E_INCORRECT_DXIL_METADATA);
+  for (unsigned i = 0; i < SA.GetNumFields(); ++i) {
+    LoadDxrPayloadFieldAnnoation(pTupleMD->getOperand(i), SA.GetFieldAnnotation(i));
+  }
+}
+
+void DxilMDHelper::LoadDxrPayloadAnnotationNode(const llvm::MDTuple &MDT,
+                                                DxilTypeSystem &TypeSystem) {
+  unsigned Tag = ConstMDToUint32(MDT.getOperand(0));
+  IFTBOOL(Tag == kDxilPayloadAnnotationStructTag, DXC_E_INCORRECT_DXIL_METADATA)
+  IFTBOOL((MDT.getNumOperands() & 0x1) == 1, DXC_E_INCORRECT_DXIL_METADATA);
+
+  Constant *pGV = dyn_cast<Constant>(ValueMDToValue(MDT.getOperand(1)));
+  IFTBOOL(pGV != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
+  StructType *pGVType = dyn_cast<StructType>(pGV->getType());
+  IFTBOOL(pGVType != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
+
+  // Check if this struct is already part of the DXIL Type System
+  DxilPayloadAnnotation *pPA = TypeSystem.AddPayloadAnnotation(pGVType);
+
+  LoadDxrPayloadFieldAnnoations(MDT.getOperand(2), *pPA);
+}
+
+void DxilMDHelper::LoadDxrPayloadAnnotations(DxilTypeSystem &TypeSystem) {
+  NamedMDNode *pDxilPayloadAnnotationsMD =
+      m_pModule->getNamedMetadata(kDxilDxrPayloadAnnotationsMDName);
+  if (pDxilPayloadAnnotationsMD == nullptr)
+    return;
+
+  if (DXIL::CompareVersions(m_MinValMajor, m_MinValMinor, 1, 6) < 0) {
+    DXASSERT(false, "payload access qualifier emitted for dxil version < 1.6");
+    m_bExtraMetadata = true;
+  }
+  DXASSERT(pDxilPayloadAnnotationsMD->getNumOperands() != 0, "empty metadata node?");
+
+  for (unsigned i = 0; i < pDxilPayloadAnnotationsMD->getNumOperands(); i++) {
+    const MDTuple *pTupleMD =
+        dyn_cast<MDTuple>(pDxilPayloadAnnotationsMD->getOperand(i));
+    IFTBOOL(pTupleMD != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
+    LoadDxrPayloadAnnotationNode(*pTupleMD, TypeSystem);
+  }
+}
+
 Metadata *DxilMDHelper::EmitDxilTemplateArgAnnotation(const DxilTemplateArgAnnotation &annotation) {
   SmallVector<Metadata *, 2> MDVals;
   if (annotation.IsType()) {
@@ -1065,6 +1205,7 @@ Metadata *DxilMDHelper::EmitDxilFieldAnnotation(const DxilFieldAnnotation &FA) {
   return MDNode::get(m_Ctx, MDVals);
 }
 
+
 void DxilMDHelper::LoadDxilFieldAnnotation(const MDOperand &MDO, DxilFieldAnnotation &FA) {
   IFTBOOL(MDO.get() != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
   const MDTuple *pTupleMD = dyn_cast<MDTuple>(MDO.get());
@@ -1116,6 +1257,17 @@ void DxilMDHelper::LoadDxilFieldAnnotation(const MDOperand &MDO, DxilFieldAnnota
   }
 }
 
+Metadata *
+DxilMDHelper::EmitDxrPayloadFieldAnnotation(const DxilPayloadFieldAnnotation &FA, Type* fieldType) {
+  vector<Metadata *> MDVals; // Tag-Value list.
+  MDVals.emplace_back(Uint32ToConstMD(kDxilPayloadFieldAnnotationAccessTag));
+
+  auto mask = FA.GetPayloadFieldQualifierMask();
+  MDVals.emplace_back(Uint32ToConstMD(mask));
+
+  return MDNode::get(m_Ctx, MDVals);
+}
+
 const Function *DxilMDHelper::LoadDxilFunctionProps(const MDTuple *pProps,
                                               hlsl::DxilFunctionProps *props) {
   unsigned idx = 0;
@@ -1345,6 +1497,13 @@ MDTuple *DxilMDHelper::EmitDxilEntryProperties(uint64_t rawShaderFlag,
         MDNode::get(m_Ctx, {Uint32ToConstMD(autoBindingSpace)}));
   }
 
+  if (!props.serializedRootSignature.empty() &&
+      DXIL::CompareVersions(m_MinValMajor, m_MinValMinor, 1, 6) > 0) {
+    MDVals.emplace_back(Uint32ToConstMD(DxilMDHelper::kDxilEntryRootSigTag));
+    MDVals.emplace_back(
+        EmitSerializedRootSignature(props.serializedRootSignature, m_Ctx));
+  }
+
   if (!MDVals.empty())
     return MDNode::get(m_Ctx, MDVals);
   else
@@ -1468,6 +1627,10 @@ void DxilMDHelper::LoadDxilEntryProperties(const MDOperand &MDO,
       MDNode *pNode = cast<MDNode>(MDO.get());
       props.waveSize = ConstMDToUint32(pNode->getOperand(0));
     } break;
+    case DxilMDHelper::kDxilEntryRootSigTag: {
+      MDNode *pNode = cast<MDNode>(MDO.get());
+      LoadSerializedRootSignature(pNode, props.serializedRootSignature, m_Ctx);
+    } break;
     default:
       DXASSERT(false, "Unknown extended shader properties tag");
       m_bExtraMetadata = true;

+ 45 - 13
lib/DXIL/DxilModule.cpp

@@ -1309,8 +1309,6 @@ void DxilModule::UpdateValidatorVersionMetadata() {
 }
 
 void DxilModule::ResetSerializedRootSignature(std::vector<uint8_t> &Value) {
-  m_SerializedRootSignature.clear();
-  m_SerializedRootSignature.reserve(Value.size());
   m_SerializedRootSignature.assign(Value.begin(), Value.end());
 }
 
@@ -1318,6 +1316,10 @@ DxilTypeSystem &DxilModule::GetTypeSystem() {
   return *m_pTypeSystem;
 }
 
+const DxilTypeSystem &DxilModule::GetTypeSystem() const {
+  return *m_pTypeSystem;
+}
+
 std::vector<unsigned> &DxilModule::GetSerializedViewIdState() {
   return m_SerializedState;
 }
@@ -1453,6 +1455,14 @@ void DxilModule::EmitDxilMetadata() {
        (m_ValMajor > 1 || (m_ValMajor == 1 && m_ValMinor >= 1)))) {
     m_pMDHelper->EmitDxilViewIdState(m_SerializedState);
   }
+
+  // Emit the DXR Payload Annotations only for library Dxil 1.6 and above.
+  if (m_pSM->IsLib()) {
+    if (DXIL::CompareVersions(m_DxilMajor, m_DxilMinor, 1, 6) >= 0) {
+      m_pMDHelper->EmitDxrPayloadAnnotations(GetTypeSystem());
+    }
+  }
+
   EmitLLVMUsed();
   MDTuple *pEntry = m_pMDHelper->EmitDxilEntryPointTuple(GetEntryFunction(), m_EntryName, pMDSignatures, pMDResources, pMDProperties);
   vector<MDNode *> Entries;
@@ -1505,7 +1515,6 @@ bool DxilModule::HasMetadataErrors() {
 
 void DxilModule::LoadDxilMetadata() {
   m_bMetadataErrors = false;
-  m_pMDHelper->LoadDxilVersion(m_DxilMajor, m_DxilMinor);
   m_pMDHelper->LoadValidatorVersion(m_ValMajor, m_ValMinor);
   const ShaderModel *loadedSM;
   m_pMDHelper->LoadDxilShaderModel(loadedSM);
@@ -1547,6 +1556,9 @@ void DxilModule::LoadDxilMetadata() {
 
   // Now that we have the UseMinPrecision flag, set shader model:
   SetShaderModel(loadedSM, m_bUseMinPrecision);
+  // SetShaderModel will initialize m_DxilMajor/m_DxilMinor to min for SM,
+  // so, load here after shader model so it matches the metadata.
+  m_pMDHelper->LoadDxilVersion(m_DxilMajor, m_DxilMinor);
 
   if (loadedSM->IsLib()) {
     for (unsigned i = 1; i < pEntries->getNumOperands(); i++) {
@@ -1607,6 +1619,17 @@ void DxilModule::LoadDxilMetadata() {
     m_pTypeSystem->GetFunctionAnnotationMap().clear();
   }
 
+  // Payload annotations not required for consumption of dxil.
+  try {
+    m_pMDHelper->LoadDxrPayloadAnnotations(*m_pTypeSystem.get());
+  } catch (hlsl::Exception &) {
+    m_bMetadataErrors = true;
+#ifdef DBG
+    throw;
+#endif
+    m_pTypeSystem->GetPayloadAnnotationMap().clear();
+  }
+
   m_pMDHelper->LoadRootSignature(m_SerializedRootSignature);
 
   m_pMDHelper->LoadDxilViewIdState(m_SerializedState);
@@ -1740,20 +1763,29 @@ bool DxilModule::StripReflection() {
     // since they have not yet been converted for legacy layout.
     // Keep all structs contained in any we must keep.
     SmallStructSetVector structsToKeep;
-    SmallStructSetVector structsToRemove;
-    for (auto &item : m_pTypeSystem->GetStructAnnotationMap()) {
       SmallStructSetVector containedStructs;
-      if (!ResourceTypeRequiresTranslation(item.first, containedStructs))
-        structsToRemove.insert(item.first);
-      else
-        structsToKeep.insert(containedStructs.begin(), containedStructs.end());
+    for (auto &CBuf : GetCBuffers())
+      if (StructType *ST = dyn_cast<StructType>(CBuf->GetHLSLType()))
+        if (ResourceTypeRequiresTranslation(ST, containedStructs))
+          structsToKeep.insert(containedStructs.begin(), containedStructs.end());
+
+    for (auto &UAV : GetUAVs()) {
+      if (DXIL::IsStructuredBuffer(UAV->GetKind()))
+        if (StructType *ST = dyn_cast<StructType>(UAV->GetHLSLType()))
+          if (ResourceTypeRequiresTranslation(ST, containedStructs))
+            structsToKeep.insert(containedStructs.begin(), containedStructs.end());
     }
 
-    for (auto Ty : structsToKeep)
-      structsToRemove.remove(Ty);
-    for (auto Ty : structsToRemove) {
-      m_pTypeSystem->GetStructAnnotationMap().erase(Ty);
+    for (auto &SRV : GetSRVs()) {
+      if (SRV->IsStructuredBuffer() || SRV->IsTBuffer())
+        if (StructType *ST = dyn_cast<StructType>(SRV->GetHLSLType()))
+          if (ResourceTypeRequiresTranslation(ST, containedStructs))
+            structsToKeep.insert(containedStructs.begin(), containedStructs.end());
     }
+
+    m_pTypeSystem->GetStructAnnotationMap().remove_if([structsToKeep](
+      const std::pair<const StructType *, std::unique_ptr<DxilStructAnnotation>>
+          &I) { return !structsToKeep.count(I.first); });
   } else {
     // Remove struct annotations.
     if (!m_pTypeSystem->GetStructAnnotationMap().empty()) {

+ 8 - 0
lib/DXIL/DxilResource.cpp

@@ -77,6 +77,14 @@ void DxilResource::SetElementStride(unsigned ElemStride) {
   m_ElementStride = ElemStride;
 }
 
+unsigned DxilResource::GetBaseAlignLog2() const {
+  return m_baseAlignLog2;
+}
+
+void DxilResource::SetBaseAlignLog2(unsigned baseAlignLog2) {
+  m_baseAlignLog2 = baseAlignLog2;
+}
+
 DXIL::SamplerFeedbackType DxilResource::GetSamplerFeedbackType() const {
   return m_SamplerFeedbackType;
 }

+ 3 - 1
lib/DXIL/DxilResourceProperties.cpp

@@ -155,11 +155,13 @@ DxilResourceProperties loadPropsFromResourceBase(const DxilResourceBase *Res) {
 
       break;
     case DXIL::ResourceKind::StructuredBuffer:
+    {
       RP.StructStrideInBytes = Res.GetElementStride();
+      RP.Basic.BaseAlignLog2 = Res.GetBaseAlignLog2();
       break;
+    }
     case DXIL::ResourceKind::Texture2DMS:
     case DXIL::ResourceKind::Texture2DMSArray:
-      break;
     case DXIL::ResourceKind::TypedBuffer:
     case DXIL::ResourceKind::Texture1D:
     case DXIL::ResourceKind::Texture2D:

+ 2 - 1
lib/DXIL/DxilShaderFlags.cpp

@@ -373,6 +373,7 @@ ShaderFlags ShaderFlags::CollectShaderFlags(const Function *F,
   M->GetValidatorVersion(valMajor, valMinor);
   bool hasMulticomponentUAVLoadsBackCompat = valMajor == 1 && valMinor == 0;
   bool hasViewportOrRTArrayIndexBackCombat = valMajor == 1 && valMinor < 4;
+  bool hasBarycentricsBackCompat = valMajor == 1 && valMinor < 6;
 
   Type *int16Ty = Type::getInt16Ty(F->getContext());
   Type *int64Ty = Type::getInt64Ty(F->getContext());
@@ -630,7 +631,7 @@ ShaderFlags ShaderFlags::CollectShaderFlags(const Function *F,
   flag.SetViewID(hasViewID);
   flag.SetViewportAndRTArrayIndex(hasViewportOrRTArrayIndex);
   flag.SetShadingRate(hasShadingRate);
-  flag.SetBarycentrics(hasBarycentrics);
+  flag.SetBarycentrics(hasBarycentricsBackCompat ? false : hasBarycentrics);
   flag.SetSamplerFeedback(hasSamplerFeedback);
   flag.SetRaytracingTier1_1(hasRaytracingTier1_1);
   flag.SetAtomicInt64OnTypedResource(hasAtomicInt64OnTypedResource);

+ 94 - 63
lib/DXIL/DxilShaderModel.cpp

@@ -63,6 +63,7 @@ bool ShaderModel::IsValidForDxil() const {
       case 4:
       case 5:
       case 6:
+      case 7:
       // VALRULE-TEXT:END
         return true;
       case kOfflineMinor:
@@ -94,69 +95,78 @@ const ShaderModel *ShaderModel::Get(Kind Kind, unsigned Major, unsigned Minor) {
   {1540,8}, //ps_6_4
   {1541,9}, //ps_6_5
   {1542,10}, //ps_6_6
-  {66560,11}, //vs_4_0
-  {66561,12}, //vs_4_1
-  {66816,13}, //vs_5_0
-  {66817,14}, //vs_5_1
-  {67072,15}, //vs_6_0
-  {67073,16}, //vs_6_1
-  {67074,17}, //vs_6_2
-  {67075,18}, //vs_6_3
-  {67076,19}, //vs_6_4
-  {67077,20}, //vs_6_5
-  {67078,21}, //vs_6_6
-  {132096,22}, //gs_4_0
-  {132097,23}, //gs_4_1
-  {132352,24}, //gs_5_0
-  {132353,25}, //gs_5_1
-  {132608,26}, //gs_6_0
-  {132609,27}, //gs_6_1
-  {132610,28}, //gs_6_2
-  {132611,29}, //gs_6_3
-  {132612,30}, //gs_6_4
-  {132613,31}, //gs_6_5
-  {132614,32}, //gs_6_6
-  {197888,33}, //hs_5_0
-  {197889,34}, //hs_5_1
-  {198144,35}, //hs_6_0
-  {198145,36}, //hs_6_1
-  {198146,37}, //hs_6_2
-  {198147,38}, //hs_6_3
-  {198148,39}, //hs_6_4
-  {198149,40}, //hs_6_5
-  {198150,41}, //hs_6_6
-  {263424,42}, //ds_5_0
-  {263425,43}, //ds_5_1
-  {263680,44}, //ds_6_0
-  {263681,45}, //ds_6_1
-  {263682,46}, //ds_6_2
-  {263683,47}, //ds_6_3
-  {263684,48}, //ds_6_4
-  {263685,49}, //ds_6_5
-  {263686,50}, //ds_6_6
-  {328704,51}, //cs_4_0
-  {328705,52}, //cs_4_1
-  {328960,53}, //cs_5_0
-  {328961,54}, //cs_5_1
-  {329216,55}, //cs_6_0
-  {329217,56}, //cs_6_1
-  {329218,57}, //cs_6_2
-  {329219,58}, //cs_6_3
-  {329220,59}, //cs_6_4
-  {329221,60}, //cs_6_5
-  {329222,61}, //cs_6_6
-  {394753,62}, //lib_6_1
-  {394754,63}, //lib_6_2
-  {394755,64}, //lib_6_3
-  {394756,65}, //lib_6_4
-  {394757,66}, //lib_6_5
-  {394758,67}, //lib_6_6
+  {1543,11}, //ps_6_7
+  {66560,12}, //vs_4_0
+  {66561,13}, //vs_4_1
+  {66816,14}, //vs_5_0
+  {66817,15}, //vs_5_1
+  {67072,16}, //vs_6_0
+  {67073,17}, //vs_6_1
+  {67074,18}, //vs_6_2
+  {67075,19}, //vs_6_3
+  {67076,20}, //vs_6_4
+  {67077,21}, //vs_6_5
+  {67078,22}, //vs_6_6
+  {67079,23}, //vs_6_7
+  {132096,24}, //gs_4_0
+  {132097,25}, //gs_4_1
+  {132352,26}, //gs_5_0
+  {132353,27}, //gs_5_1
+  {132608,28}, //gs_6_0
+  {132609,29}, //gs_6_1
+  {132610,30}, //gs_6_2
+  {132611,31}, //gs_6_3
+  {132612,32}, //gs_6_4
+  {132613,33}, //gs_6_5
+  {132614,34}, //gs_6_6
+  {132615,35}, //gs_6_7
+  {197888,36}, //hs_5_0
+  {197889,37}, //hs_5_1
+  {198144,38}, //hs_6_0
+  {198145,39}, //hs_6_1
+  {198146,40}, //hs_6_2
+  {198147,41}, //hs_6_3
+  {198148,42}, //hs_6_4
+  {198149,43}, //hs_6_5
+  {198150,44}, //hs_6_6
+  {198151,45}, //hs_6_7
+  {263424,46}, //ds_5_0
+  {263425,47}, //ds_5_1
+  {263680,48}, //ds_6_0
+  {263681,49}, //ds_6_1
+  {263682,50}, //ds_6_2
+  {263683,51}, //ds_6_3
+  {263684,52}, //ds_6_4
+  {263685,53}, //ds_6_5
+  {263686,54}, //ds_6_6
+  {263687,55}, //ds_6_7
+  {328704,56}, //cs_4_0
+  {328705,57}, //cs_4_1
+  {328960,58}, //cs_5_0
+  {328961,59}, //cs_5_1
+  {329216,60}, //cs_6_0
+  {329217,61}, //cs_6_1
+  {329218,62}, //cs_6_2
+  {329219,63}, //cs_6_3
+  {329220,64}, //cs_6_4
+  {329221,65}, //cs_6_5
+  {329222,66}, //cs_6_6
+  {329223,67}, //cs_6_7
+  {394753,68}, //lib_6_1
+  {394754,69}, //lib_6_2
+  {394755,70}, //lib_6_3
+  {394756,71}, //lib_6_4
+  {394757,72}, //lib_6_5
+  {394758,73}, //lib_6_6
+  {394759,74}, //lib_6_7
   // lib_6_x is for offline linking only, and relaxes restrictions
-  {394767,68},//lib_6_x
-  {853509,69}, //ms_6_5
-  {853510,70}, //ms_6_6
-  {919045,71}, //as_6_5
-  {919046,72}, //as_6_6
+  {394767,75},//lib_6_x
+  {853509,76}, //ms_6_5
+  {853510,77}, //ms_6_6
+  {853511,78}, //ms_6_7
+  {919045,79}, //as_6_5
+  {919046,80}, //as_6_6
+  {919047,81}, //as_6_7
   };
   unsigned hash = (unsigned)Kind << 16 | Major << 8 | Minor;
   auto it = hashToIdxMap.find(hash);
@@ -237,6 +247,12 @@ const ShaderModel *ShaderModel::GetByName(const char *pszName) {
       break;
     }
   else return GetInvalid();
+  case '7':
+    if (Major == 6) {
+      Minor = 7;
+      break;
+    }
+  else return GetInvalid();
   // VALRULE-TEXT:END
     case 'x':
       if (kind == Kind::Library && Major == 6) {
@@ -279,8 +295,11 @@ void ShaderModel::GetDxilVersion(unsigned &DxilMajor, unsigned &DxilMinor) const
   case 6:
     DxilMinor = 6;
     break;
+  case 7:
+    DxilMinor = 7;
+    break;
   case kOfflineMinor: // Always update this to highest dxil version
-    DxilMinor = 6;
+    DxilMinor = 7;
     break;
   // VALRULE-TEXT:END
   default:
@@ -316,6 +335,9 @@ void ShaderModel::GetMinValidatorVersion(unsigned &ValMajor, unsigned &ValMinor)
   case 6:
     ValMinor = 6;
     break;
+  case 7:
+    ValMinor = 7;
+    break;
   // VALRULE-TEXT:END
   case kOfflineMinor:
     ValMajor = 0;
@@ -365,6 +387,7 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = {
   SM(Kind::Pixel, 6, 4, "ps_6_4", 32, 8, true, true, UINT_MAX),
   SM(Kind::Pixel, 6, 5, "ps_6_5", 32, 8, true, true, UINT_MAX),
   SM(Kind::Pixel, 6, 6, "ps_6_6", 32, 8, true, true, UINT_MAX),
+  SM(Kind::Pixel, 6, 7, "ps_6_7", 32, 8, true, true, UINT_MAX),
   SM(Kind::Vertex, 4, 0, "vs_4_0", 16, 16, false, false, 0),
   SM(Kind::Vertex, 4, 1, "vs_4_1", 32, 32, false, false, 0),
   SM(Kind::Vertex, 5, 0, "vs_5_0", 32, 32, true, true, 64),
@@ -376,6 +399,7 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = {
   SM(Kind::Vertex, 6, 4, "vs_6_4", 32, 32, true, true, UINT_MAX),
   SM(Kind::Vertex, 6, 5, "vs_6_5", 32, 32, true, true, UINT_MAX),
   SM(Kind::Vertex, 6, 6, "vs_6_6", 32, 32, true, true, UINT_MAX),
+  SM(Kind::Vertex, 6, 7, "vs_6_7", 32, 32, true, true, UINT_MAX),
   SM(Kind::Geometry, 4, 0, "gs_4_0", 16, 32, false, false, 0),
   SM(Kind::Geometry, 4, 1, "gs_4_1", 32, 32, false, false, 0),
   SM(Kind::Geometry, 5, 0, "gs_5_0", 32, 32, true, true, 64),
@@ -387,6 +411,7 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = {
   SM(Kind::Geometry, 6, 4, "gs_6_4", 32, 32, true, true, UINT_MAX),
   SM(Kind::Geometry, 6, 5, "gs_6_5", 32, 32, true, true, UINT_MAX),
   SM(Kind::Geometry, 6, 6, "gs_6_6", 32, 32, true, true, UINT_MAX),
+  SM(Kind::Geometry, 6, 7, "gs_6_7", 32, 32, true, true, UINT_MAX),
   SM(Kind::Hull, 5, 0, "hs_5_0", 32, 32, true, true, 64),
   SM(Kind::Hull, 5, 1, "hs_5_1", 32, 32, true, true, 64),
   SM(Kind::Hull, 6, 0, "hs_6_0", 32, 32, true, true, UINT_MAX),
@@ -396,6 +421,7 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = {
   SM(Kind::Hull, 6, 4, "hs_6_4", 32, 32, true, true, UINT_MAX),
   SM(Kind::Hull, 6, 5, "hs_6_5", 32, 32, true, true, UINT_MAX),
   SM(Kind::Hull, 6, 6, "hs_6_6", 32, 32, true, true, UINT_MAX),
+  SM(Kind::Hull, 6, 7, "hs_6_7", 32, 32, true, true, UINT_MAX),
   SM(Kind::Domain, 5, 0, "ds_5_0", 32, 32, true, true, 64),
   SM(Kind::Domain, 5, 1, "ds_5_1", 32, 32, true, true, 64),
   SM(Kind::Domain, 6, 0, "ds_6_0", 32, 32, true, true, UINT_MAX),
@@ -405,6 +431,7 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = {
   SM(Kind::Domain, 6, 4, "ds_6_4", 32, 32, true, true, UINT_MAX),
   SM(Kind::Domain, 6, 5, "ds_6_5", 32, 32, true, true, UINT_MAX),
   SM(Kind::Domain, 6, 6, "ds_6_6", 32, 32, true, true, UINT_MAX),
+  SM(Kind::Domain, 6, 7, "ds_6_7", 32, 32, true, true, UINT_MAX),
   SM(Kind::Compute, 4, 0, "cs_4_0", 0, 0, false, false, 0),
   SM(Kind::Compute, 4, 1, "cs_4_1", 0, 0, false, false, 0),
   SM(Kind::Compute, 5, 0, "cs_5_0", 0, 0, true, true, 64),
@@ -416,18 +443,22 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = {
   SM(Kind::Compute, 6, 4, "cs_6_4", 0, 0, true, true, UINT_MAX),
   SM(Kind::Compute, 6, 5, "cs_6_5", 0, 0, true, true, UINT_MAX),
   SM(Kind::Compute, 6, 6, "cs_6_6", 0, 0, true, true, UINT_MAX),
+  SM(Kind::Compute, 6, 7, "cs_6_7", 0, 0, true, true, UINT_MAX),
   SM(Kind::Library, 6, 1, "lib_6_1", 32, 32, true, true, UINT_MAX),
   SM(Kind::Library, 6, 2, "lib_6_2", 32, 32, true, true, UINT_MAX),
   SM(Kind::Library, 6, 3, "lib_6_3", 32, 32, true, true, UINT_MAX),
   SM(Kind::Library, 6, 4, "lib_6_4", 32, 32, true, true, UINT_MAX),
   SM(Kind::Library, 6, 5, "lib_6_5", 32, 32, true, true, UINT_MAX),
   SM(Kind::Library, 6, 6, "lib_6_6", 32, 32, true, true, UINT_MAX),
+  SM(Kind::Library, 6, 7, "lib_6_7", 32, 32, true, true, UINT_MAX),
   // lib_6_x is for offline linking only, and relaxes restrictions
   SM(Kind::Library,  6, kOfflineMinor, "lib_6_x",  32, 32,  true,  true,  UINT_MAX),
   SM(Kind::Mesh, 6, 5, "ms_6_5", 0, 0, true, true, UINT_MAX),
   SM(Kind::Mesh, 6, 6, "ms_6_6", 0, 0, true, true, UINT_MAX),
+  SM(Kind::Mesh, 6, 7, "ms_6_7", 0, 0, true, true, UINT_MAX),
   SM(Kind::Amplification, 6, 5, "as_6_5", 0, 0, true, true, UINT_MAX),
   SM(Kind::Amplification, 6, 6, "as_6_6", 0, 0, true, true, UINT_MAX),
+  SM(Kind::Amplification, 6, 7, "as_6_7", 0, 0, true, true, UINT_MAX),
   // Values before Invalid must remain sorted by Kind, then Major, then Minor.
   SM(Kind::Invalid,  0, 0, "invalid", 0,  0,   false, false, 0),
   // VALRULE-TEXT:END

+ 133 - 5
lib/DXIL/DxilTypeSystem.cpp

@@ -80,7 +80,69 @@ void DxilFieldAnnotation::SetFieldName(const std::string &FieldName) { m_FieldNa
 bool DxilFieldAnnotation::IsCBVarUsed() const { return m_bCBufferVarUsed; }
 void DxilFieldAnnotation::SetCBVarUsed(bool used) { m_bCBufferVarUsed = used; }
 
+//------------------------------------------------------------------------------
+//
+// DxilPayloadFieldAnnotation class methods.
+//
+bool DxilPayloadFieldAnnotation::HasCompType() const { return m_CompType.GetKind() != CompType::Kind::Invalid; }
+const CompType &DxilPayloadFieldAnnotation::GetCompType() const { return m_CompType; }
+void DxilPayloadFieldAnnotation::SetCompType(CompType::Kind kind) { m_CompType = CompType(kind); }
+uint32_t DxilPayloadFieldAnnotation::GetPayloadFieldQualifierMask() const {
+  return m_bitmask;
+}
+
+unsigned DxilPayloadFieldAnnotation::GetBitOffsetForShaderStage(DXIL::PayloadAccessShaderStage shaderStage ) {
+  unsigned bitOffset = static_cast<unsigned>(shaderStage) *
+                       DXIL::PayloadAccessQualifierBitsPerStage;
+  return bitOffset;
+}
+
+void DxilPayloadFieldAnnotation::SetPayloadFieldQualifierMask(uint32_t fieldBitmask) {
+  DXASSERT((fieldBitmask & ~DXIL::PayloadAccessQualifierValidMask) == 0,
+           "Unknown payload access qualifier bits set");
+  m_bitmask = fieldBitmask & DXIL::PayloadAccessQualifierValidMask;
+}
+
+void DxilPayloadFieldAnnotation::AddPayloadFieldQualifier(
+    DXIL::PayloadAccessShaderStage shaderStage, DXIL::PayloadAccessQualifier qualifier) {
+  unsigned accessBits = static_cast<unsigned>(qualifier);
+  DXASSERT((accessBits & ~DXIL::PayloadAccessQualifierValidMaskPerStage) == 0,
+           "Unknown payload access qualifier bits set");
+  accessBits &= DXIL::PayloadAccessQualifierValidMaskPerStage;
 
+  accessBits <<= GetBitOffsetForShaderStage(shaderStage);
+  m_bitmask |= accessBits;
+}
+
+DXIL::PayloadAccessQualifier DxilPayloadFieldAnnotation::GetPayloadFieldQualifier(
+    DXIL::PayloadAccessShaderStage shaderStage) const {
+
+  int bitOffset = GetBitOffsetForShaderStage(shaderStage);
+
+  // default type is always ReadWrite
+  DXIL::PayloadAccessQualifier accessType = DXIL::PayloadAccessQualifier::ReadWrite;
+
+  const unsigned readBit = static_cast<unsigned>(DXIL::PayloadAccessQualifier::Read);
+  const unsigned writeBit = static_cast<unsigned>(DXIL::PayloadAccessQualifier::Write);
+
+  unsigned accessBits = m_bitmask >> bitOffset;
+  if (accessBits & readBit) {
+    // set Read if the first bit is set
+    accessType = DXIL::PayloadAccessQualifier::Read;
+  }
+  if (accessBits & writeBit) {
+
+    // set Write only if the second bit set, if both are set set to ReadWrite
+    accessType = accessType == DXIL::PayloadAccessQualifier::ReadWrite
+                     ? DXIL::PayloadAccessQualifier::Write
+                     : DXIL::PayloadAccessQualifier::ReadWrite;
+  }
+  return accessType;
+}
+
+bool DxilPayloadFieldAnnotation::HasAnnotations() const {
+  return m_bitmask != 0;
+}
 
 //------------------------------------------------------------------------------
 //
@@ -98,10 +160,6 @@ bool DxilTemplateArgAnnotation::IsIntegral() const { return m_Type == nullptr; }
 int64_t DxilTemplateArgAnnotation::GetIntegral() const { return m_Integral; }
 void DxilTemplateArgAnnotation::SetIntegral(int64_t i64) { m_Type = nullptr; m_Integral = i64; }
 
-//------------------------------------------------------------------------------
-//
-// DxilStructAnnotation class methods.
-//
 unsigned DxilStructAnnotation::GetNumFields() const {
   return (unsigned)m_FieldAnnotations.size();
 }
@@ -200,7 +258,30 @@ const Function *DxilFunctionAnnotation::GetFunction() const {
 
 //------------------------------------------------------------------------------
 //
-// DxilStructAnnotationSystem class methods.
+// DxilPayloadAnnotation class methods.
+//
+unsigned DxilPayloadAnnotation::GetNumFields() const {
+  return (unsigned)m_FieldAnnotations.size();
+}
+
+DxilPayloadFieldAnnotation &DxilPayloadAnnotation::GetFieldAnnotation(unsigned FieldIdx) {
+  return m_FieldAnnotations[FieldIdx];
+}
+
+const DxilPayloadFieldAnnotation &DxilPayloadAnnotation::GetFieldAnnotation(unsigned FieldIdx) const {
+  return m_FieldAnnotations[FieldIdx];
+}
+
+const StructType *DxilPayloadAnnotation::GetStructType() const {
+  return m_pStructType;
+}
+void DxilPayloadAnnotation::SetStructType(const llvm::StructType *Ty) {
+  m_pStructType = Ty;
+}
+
+//------------------------------------------------------------------------------
+//
+// DxilTypeSystem class methods.
 //
 DxilTypeSystem::DxilTypeSystem(Module *pModule)
     : m_pModule(pModule),
@@ -246,6 +327,53 @@ DxilTypeSystem::StructAnnotationMap &DxilTypeSystem::GetStructAnnotationMap() {
   return m_StructAnnotations;
 }
 
+const DxilTypeSystem::StructAnnotationMap &DxilTypeSystem::GetStructAnnotationMap() const{
+  return m_StructAnnotations;
+}
+
+DxilPayloadAnnotation *DxilTypeSystem::AddPayloadAnnotation(const StructType *pStructType) {
+  DXASSERT_NOMSG(m_PayloadAnnotations.find(pStructType) == m_PayloadAnnotations.end());
+  DxilPayloadAnnotation *pA = new DxilPayloadAnnotation();
+  m_PayloadAnnotations[pStructType] = unique_ptr<DxilPayloadAnnotation>(pA);
+  pA->m_pStructType = pStructType;
+  pA->m_FieldAnnotations.resize(pStructType->getNumElements());
+  return pA;
+}
+
+DxilPayloadAnnotation *DxilTypeSystem::GetPayloadAnnotation(const StructType *pStructType) {
+  auto it = m_PayloadAnnotations.find(pStructType);
+  if (it != m_PayloadAnnotations.end()) {
+    return it->second.get();
+  } else {
+    return nullptr;
+  }
+}
+
+const DxilPayloadAnnotation *
+DxilTypeSystem::GetPayloadAnnotation(const StructType *pStructType) const {
+  auto it = m_PayloadAnnotations.find(pStructType);
+  if (it != m_PayloadAnnotations.end()) {
+    return it->second.get();
+  } else {
+    return nullptr;
+  }
+}
+
+void DxilTypeSystem::ErasePayloadAnnotation(const StructType *pStructType) {
+  DXASSERT_NOMSG(m_StructAnnotations.count(pStructType));
+  m_PayloadAnnotations.remove_if([pStructType](
+      const std::pair<const StructType *, std::unique_ptr<DxilPayloadAnnotation>>
+          &I) { return pStructType == I.first; });
+}
+
+DxilTypeSystem::PayloadAnnotationMap &DxilTypeSystem::GetPayloadAnnotationMap() {
+  return m_PayloadAnnotations;
+}
+
+const DxilTypeSystem::PayloadAnnotationMap &DxilTypeSystem::GetPayloadAnnotationMap() const{
+  return m_PayloadAnnotations;
+}
+
 DxilFunctionAnnotation *DxilTypeSystem::AddFunctionAnnotation(const Function *pFunction) {
   DXASSERT_NOMSG(m_FunctionAnnotations.find(pFunction) == m_FunctionAnnotations.end());
   DxilFunctionAnnotation *pA = new DxilFunctionAnnotation();

+ 22 - 1
lib/DXIL/DxilUtil.cpp

@@ -14,6 +14,7 @@
 #include "dxc/DXIL/DxilUtil.h"
 #include "dxc/DXIL/DxilModule.h"
 #include "dxc/DXIL/DxilOperations.h"
+#include "dxc/HLSL/DxilConvergentName.h"
 #include "dxc/Support/Global.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
@@ -314,7 +315,7 @@ static void EmitWarningOrErrorOnGlobalVariable(llvm::LLVMContext &Ctx, GlobalVar
 
   if (GV) {
     Module &M = *GV->getParent();
-    if (getDebugMetadataVersionFromModule(M) != 0) {
+    if (hasDebugInfo(M)) {
       DebugInfoFinder FinderObj;
       DebugInfoFinder &Finder = FinderObj;
       // Debug modules have no dxil modules. Use it if you got it.
@@ -1171,6 +1172,26 @@ void ReplaceRawBufferStore64Bit(llvm::Function *F, llvm::Type *ETy, hlsl::OP *hl
   }
 }
 
+bool IsConvergentMarker(const char *Name) {
+  StringRef RName = Name;
+  return RName.startswith(kConvergentFunctionPrefix);
+}
+
+bool IsConvergentMarker(const Function *F) {
+  return F && F->getName().startswith(kConvergentFunctionPrefix);
+}
+
+bool IsConvergentMarker(Value *V) {
+  CallInst *CI = dyn_cast<CallInst>(V);
+  if (!CI)
+    return false;
+  return IsConvergentMarker(CI->getCalledFunction());
+}
+
+Value *GetConvergentSource(Value *V) {
+  return cast<CallInst>(V)->getOperand(0);
+}
+
 }
 }
 

+ 8 - 0
lib/DxcSupport/FileIOHelper.cpp

@@ -778,6 +778,14 @@ HRESULT DxcCreateBlobWithEncodingFromPinned(LPCVOID pText, UINT32 size,
   return DxcCreateBlob(pText, size, true, false, true, codePage, nullptr, pBlobEncoding);
 }
 
+HRESULT DxcCreateBlobFromPinned(
+    _In_bytecount_(size) LPCVOID pText, UINT32 size,
+    _COM_Outptr_ IDxcBlob **pBlob) throw() {
+  CComPtr<IDxcBlobEncoding> pBlobEncoding;
+  DxcCreateBlob(pText, size, true, false, false, CP_ACP, nullptr, &pBlobEncoding);
+  return pBlobEncoding.QueryInterface(pBlob);
+}
+
 _Use_decl_annotations_
 HRESULT
 DxcCreateBlobWithEncodingFromStream(IStream *pStream, bool newInstanceAlways,

+ 26 - 10
lib/DxcSupport/HLSLOptions.cpp

@@ -138,16 +138,20 @@ bool DxcOpts::IsLibraryProfile() {
   return TargetProfile.startswith("lib_");
 }
 
-bool DxcOpts::IsDebugInfoEnabled() {
+bool DxcOpts::GenerateFullDebugInfo() {
   return DebugInfo;
 }
 
+bool DxcOpts::GeneratePDB() {
+  return DebugInfo || SourceOnlyDebug;
+}
+
 bool DxcOpts::EmbedDebugInfo() {
   return EmbedDebug;
 }
 
 bool DxcOpts::EmbedPDBName() {
-  return IsDebugInfoEnabled() || !DebugFile.empty();
+  return GeneratePDB() || !DebugFile.empty();
 }
 
 bool DxcOpts::DebugFileIsDirectory() {
@@ -613,6 +617,7 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
   opts.DefaultRowMajor = Args.hasFlag(OPT_Zpr, OPT_INVALID, false);
   opts.DefaultColMajor = Args.hasFlag(OPT_Zpc, OPT_INVALID, false);
   opts.DumpBin = Args.hasFlag(OPT_dumpbin, OPT_INVALID, false);
+  opts.Link = Args.hasFlag(OPT_link, OPT_INVALID, false);
   opts.NotUseLegacyCBufLoad = Args.hasFlag(OPT_no_legacy_cbuf_layout, OPT_INVALID, false);
   opts.NotUseLegacyCBufLoad = Args.hasFlag(OPT_not_use_legacy_cbuf_load_, OPT_INVALID, opts.NotUseLegacyCBufLoad);
   opts.PackPrefixStable = Args.hasFlag(OPT_pack_prefix_stable, OPT_INVALID, false);
@@ -627,8 +632,8 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
   opts.StripDebug = Args.hasFlag(OPT_Qstrip_debug, OPT_INVALID, false);
   opts.EmbedDebug = Args.hasFlag(OPT_Qembed_debug, OPT_INVALID, false);
   opts.SourceInDebugModule = Args.hasFlag(OPT_Qsource_in_debug_module, OPT_INVALID, false);
-  opts.SourceOnlyDebug = Args.hasFlag(OPT_Qsource_only_debug, OPT_INVALID, false);
-  opts.FullDebug = Args.hasFlag(OPT_Qfull_debug, OPT_INVALID, false);
+  opts.SourceOnlyDebug = Args.hasFlag(OPT_Zs, OPT_INVALID, false);
+  opts.PdbInPrivate = Args.hasFlag(OPT_Qpdb_in_private, OPT_INVALID, false);
   opts.StripRootSignature = Args.hasFlag(OPT_Qstrip_rootsignature, OPT_INVALID, false);
   opts.StripPrivate = Args.hasFlag(OPT_Qstrip_priv, OPT_INVALID, false);
   opts.StripReflection = Args.hasFlag(OPT_Qstrip_reflect, OPT_INVALID, false);
@@ -649,7 +654,18 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
   // Lifetime markers on by default in 6.6 unless disabled explicitly
   opts.EnableLifetimeMarkers = Args.hasFlag(OPT_enable_lifetime_markers, OPT_INVALID,
                                             DXIL::CompareVersions(Major, Minor, 6, 6) >= 0) &&
-                               !Args.hasFlag(OPT_disable_lifetime_markers, OPT_INVALID, false);
+                              !Args.hasFlag(OPT_disable_lifetime_markers, OPT_INVALID, false);
+  opts.EnablePayloadQualifiers = Args.hasFlag(OPT_enable_payload_qualifiers, OPT_INVALID,
+                                            DXIL::CompareVersions(Major, Minor, 6, 7) >= 0); 
+  if (DXIL::CompareVersions(Major, Minor, 6, 8) < 0) {
+     opts.EnablePayloadQualifiers &= !Args.hasFlag(OPT_disable_payload_qualifiers, OPT_INVALID, false);
+  }
+  if (opts.EnablePayloadQualifiers && DXIL::CompareVersions(Major, Minor, 6, 6) < 0) {
+    errors << "Invalid target for payload access qualifiers. Only lib_6_6 and beyond are supported.";
+    return 1;
+  }
+
+  opts.HandleExceptions = !Args.hasFlag(OPT_disable_exception_handling, OPT_INVALID, false);
 
   if (opts.DefaultColMajor && opts.DefaultRowMajor) {
     errors << "Cannot specify /Zpr and /Zpc together, use /? to get usage information";
@@ -923,13 +939,13 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
     return 1;
   }
 
-  if (opts.FullDebug && opts.SourceOnlyDebug) {
-    errors << "Cannot specify both /Qfull_debug and /Qsource_only_debug";
+  if (opts.DebugInfo && opts.SourceOnlyDebug) {
+    errors << "Cannot specify both /Zi and /Zs";
     return 1;
   }
 
   if (opts.SourceInDebugModule && opts.SourceOnlyDebug) {
-    errors << "Cannot specify both /Qsource_in_debug_module and /Qsource_only_debug";
+    errors << "Cannot specify both /Qsource_in_debug_module and /Zs";
     return 1;
   }
 
@@ -940,8 +956,8 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
     return 1;
   }
 
-  if (opts.DebugNameForSource && !opts.DebugInfo) {
-    errors << "/Zss requires debug info (/Zi)";
+  if (opts.DebugNameForSource && (!opts.DebugInfo && !opts.SourceOnlyDebug)) {
+    errors << "/Zss requires debug info (/Zi or /Zs)";
     return 1;
   }
 

+ 5 - 11
lib/DxilContainer/DxilContainerAssembler.cpp

@@ -1531,15 +1531,10 @@ DxilContainerWriter *hlsl::NewDxilContainerWriter() {
   return new DxilContainerWriter_impl();
 }
 
-static bool HasDebugInfo(const Module &M) {
-  for (Module::const_named_metadata_iterator NMI = M.named_metadata_begin(),
-                                             NME = M.named_metadata_end();
-       NMI != NME; ++NMI) {
-    if (NMI->getName().startswith("llvm.dbg.")) {
-      return true;
-    }
-  }
-  return false;
+static bool HasDebugInfoOrLineNumbers(const Module &M) {
+  return
+    llvm::getDebugMetadataVersionFromModule(M) != 0 ||
+    llvm::hasDebugInfo(M);
 }
 
 static void GetPaddedProgramPartSize(AbstractMemoryStream *pStream,
@@ -1776,8 +1771,7 @@ void hlsl::SerializeDxilContainerForModule(DxilModule *pModule,
   // If we have debug information present, serialize it to a debug part, then use the stripped version as the canonical program version.
   CComPtr<AbstractMemoryStream> pProgramStream = pInputProgramStream;
   bool bModuleStripped = false;
-  bool bHasDebugInfo = HasDebugInfo(*pModule->GetModule());
-  if (bHasDebugInfo) {
+  if (HasDebugInfoOrLineNumbers(*pModule->GetModule())) {
     uint32_t debugInUInt32, debugPaddingBytes;
     GetPaddedProgramPartSize(pInputProgramStream, debugInUInt32, debugPaddingBytes);
     if (Flags & SerializeDxilFlags::IncludeDebugInfoPart) {

+ 3 - 53
lib/DxilPIXPasses/DxilAddPixelHitInstrumentation.cpp

@@ -21,6 +21,8 @@
 #include "llvm/IR/PassManager.h"
 #include "llvm/Transforms/Utils/Local.h"
 
+#include "PixPassHelpers.h"
+
 using namespace llvm;
 using namespace hlsl;
 
@@ -102,59 +104,7 @@ bool DxilAddPixelHitInstrumentation::runOnModule(Module &M) {
     IRBuilder<> Builder(
         dxilutil::FirstNonAllocaInsertionPt(DM.GetEntryFunction()));
 
-    unsigned int UAVResourceHandle =
-        static_cast<unsigned int>(DM.GetUAVs().size());
-
-    // Set up a UAV with structure of a single int
-    SmallVector<llvm::Type *, 1> Elements{Type::getInt32Ty(Ctx)};
-    llvm::StructType *UAVStructTy =
-        llvm::StructType::create(Elements, "class.RWStructuredBuffer");
-    std::unique_ptr<DxilResource> pUAV = llvm::make_unique<DxilResource>();
-    pUAV->SetGlobalName("PIX_CountUAVName");
-    pUAV->SetGlobalSymbol(UndefValue::get(UAVStructTy->getPointerTo()));
-    pUAV->SetID(UAVResourceHandle);
-    pUAV->SetSpaceID(
-        (unsigned int)-2); // This is the reserved-for-tools register space
-    pUAV->SetSampleCount(1);
-    pUAV->SetGloballyCoherent(false);
-    pUAV->SetHasCounter(false);
-    pUAV->SetCompType(CompType::getI32());
-    pUAV->SetLowerBound(0);
-    pUAV->SetRangeSize(1);
-    pUAV->SetKind(DXIL::ResourceKind::RawBuffer);
-    pUAV->SetRW(true);
-
-    auto pAnnotation = DM.GetTypeSystem().GetStructAnnotation(UAVStructTy);
-    if (pAnnotation == nullptr) {
-      pAnnotation = DM.GetTypeSystem().AddStructAnnotation(UAVStructTy);
-      pAnnotation->GetFieldAnnotation(0).SetCBufferOffset(0);
-      pAnnotation->GetFieldAnnotation(0).SetCompType(
-          hlsl::DXIL::ComponentType::I32);
-      pAnnotation->GetFieldAnnotation(0).SetFieldName("count");
-    }
-
-    ID = DM.AddUAV(std::move(pUAV));
-
-    assert((unsigned)ID == UAVResourceHandle);
-
-    // Create handle for the newly-added UAV
-    Function *CreateHandleOpFunc =
-        HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(Ctx));
-    Constant *CreateHandleOpcodeArg =
-        HlslOP->GetU32Const((unsigned)DXIL::OpCode::CreateHandle);
-    Constant *UAVArg = HlslOP->GetI8Const(
-        static_cast<std::underlying_type<DxilResourceBase::Class>::type>(
-            DXIL::ResourceClass::UAV));
-    Constant *MetaDataArg =
-        HlslOP->GetU32Const(ID); // position of the metadata record in the
-                                 // corresponding metadata list
-    Constant *IndexArg = HlslOP->GetU32Const(0); //
-    Constant *FalseArg =
-        HlslOP->GetI1Const(0); // non-uniform resource index: false
-    HandleForUAV = Builder.CreateCall(
-        CreateHandleOpFunc,
-        {CreateHandleOpcodeArg, UAVArg, MetaDataArg, IndexArg, FalseArg},
-        "PIX_CountUAV_Handle");
+    HandleForUAV = PIXPassHelpers::CreateUAV(DM, Builder, 0, "PIX_CountUAV_Handle");
 
     DM.ReEmitDxilResources();
   }

+ 2 - 48
lib/DxilPIXPasses/DxilDebugInstrumentation.cpp

@@ -252,7 +252,6 @@ public:
 
 private:
   SystemValueIndices addRequiredSystemValues(BuilderContext &BC);
-  void addUAV(BuilderContext &BC);
   void addInvocationSelectionProlog(BuilderContext &BC,
                                     SystemValueIndices SVIndices);
   Value *addPixelShaderProlog(BuilderContext &BC, SystemValueIndices SVIndices);
@@ -548,52 +547,6 @@ DxilDebugInstrumentation::addPixelShaderProlog(BuilderContext &BC,
   return ComparePos;
 }
 
-void DxilDebugInstrumentation::addUAV(BuilderContext &BC) {
-  // Set up a UAV with structure of a single int
-  unsigned int UAVResourceHandle =
-      static_cast<unsigned int>(BC.DM.GetUAVs().size());
-  SmallVector<llvm::Type *, 1> Elements{Type::getInt32Ty(BC.Ctx)};
-  llvm::StructType *UAVStructTy =
-      llvm::StructType::create(Elements, "PIX_DebugUAV_Type");
-  std::unique_ptr<DxilResource> pUAV = llvm::make_unique<DxilResource>();
-  pUAV->SetGlobalName("PIX_DebugUAVName");
-  pUAV->SetGlobalSymbol(UndefValue::get(UAVStructTy->getPointerTo()));
-  pUAV->SetID(UAVResourceHandle);
-  pUAV->SetSpaceID(
-      (unsigned int)-2); // This is the reserved-for-tools register space
-  pUAV->SetSampleCount(1);
-  pUAV->SetGloballyCoherent(false);
-  pUAV->SetHasCounter(false);
-  pUAV->SetCompType(CompType::getI32());
-  pUAV->SetLowerBound(0);
-  pUAV->SetRangeSize(1);
-  pUAV->SetKind(DXIL::ResourceKind::RawBuffer);
-  pUAV->SetRW(true);
-
-  auto ID = BC.DM.AddUAV(std::move(pUAV));
-  assert(ID == UAVResourceHandle);
-
-  BC.DM.m_ShaderFlags.SetEnableRawAndStructuredBuffers(true);
-
-  // Create handle for the newly-added UAV
-  Function *CreateHandleOpFunc =
-      BC.HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(BC.Ctx));
-  Constant *CreateHandleOpcodeArg =
-      BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::CreateHandle);
-  Constant *UAVVArg = BC.HlslOP->GetI8Const(
-      static_cast<std::underlying_type<DxilResourceBase::Class>::type>(
-          DXIL::ResourceClass::UAV));
-  Constant *MetaDataArg = BC.HlslOP->GetU32Const(
-      ID); // position of the metadata record in the corresponding metadata list
-  Constant *IndexArg = BC.HlslOP->GetU32Const(0); //
-  Constant *FalseArg =
-      BC.HlslOP->GetI1Const(0); // non-uniform resource index: false
-  m_HandleForUAV = BC.Builder.CreateCall(
-      CreateHandleOpFunc,
-      {CreateHandleOpcodeArg, UAVVArg, MetaDataArg, IndexArg, FalseArg},
-      "PIX_DebugUAV_Handle");
-}
-
 void DxilDebugInstrumentation::addInvocationSelectionProlog(
     BuilderContext &BC, SystemValueIndices SVIndices) {
   auto ShaderModel = BC.DM.GetShaderModel();
@@ -959,7 +912,8 @@ bool DxilDebugInstrumentation::runOnModule(Module &M) {
 
   BuilderContext BC{M, DM, Ctx, HlslOP, Builder};
 
-  addUAV(BC);
+  m_HandleForUAV = PIXPassHelpers::CreateUAV(BC.DM, BC.Builder, 0, "PIX_DebugUAV_Handle");
+
   auto SystemValues = addRequiredSystemValues(BC);
   addInvocationSelectionProlog(BC, SystemValues);
   addInvocationStartMarker(BC);

+ 6 - 19
lib/DxilPIXPasses/DxilOutputColorBecomesConstant.cpp

@@ -20,6 +20,8 @@
 #include "llvm/Transforms/Utils/Local.h"
 #include <array>
 
+#include "PixPassHelpers.h"
+
 using namespace llvm;
 using namespace hlsl;
 
@@ -166,8 +168,6 @@ bool DxilOutputColorBecomesConstant::runOnModule(Module &M) {
     pCBuf->SetRangeSize(1);
     pCBuf->SetSize(4);
 
-    ID = DM.AddCBuffer(std::move(pCBuf));
-
     Instruction *entryPointInstruction =
         &*(DM.GetEntryFunction()->begin()->begin());
     IRBuilder<> Builder(entryPointInstruction);
@@ -175,23 +175,10 @@ bool DxilOutputColorBecomesConstant::runOnModule(Module &M) {
     // Create handle for the newly-added constant buffer (which is achieved via
     // a function call)
     auto ConstantBufferName = "PIX_Constant_Color_CB_Handle";
-    Function *createHandle =
-        HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(Ctx));
-    Constant *CreateHandleOpcodeArg =
-        HlslOP->GetU32Const((unsigned)DXIL::OpCode::CreateHandle);
-    Constant *CBVArg = HlslOP->GetI8Const(
-        static_cast<std::underlying_type<DxilResourceBase::Class>::type>(
-            DXIL::ResourceClass::CBuffer));
-    Constant *MetaDataArg =
-        HlslOP->GetU32Const(ID); // position of the metadata record in the
-                                 // corresponding metadata list
-    Constant *IndexArg = HlslOP->GetU32Const(0); //
-    Constant *FalseArg =
-        HlslOP->GetI1Const(0); // non-uniform resource index: false
-    CallInst *callCreateHandle = Builder.CreateCall(
-        createHandle,
-        {CreateHandleOpcodeArg, CBVArg, MetaDataArg, IndexArg, FalseArg},
-        ConstantBufferName);
+
+    CallInst* callCreateHandle = PIXPassHelpers::CreateHandleForResource(DM, Builder, pCBuf.get(), ConstantBufferName);
+
+    DM.AddCBuffer(std::move(pCBuf));
 
     DM.ReEmitDxilResources();
 

+ 3 - 49
lib/DxilPIXPasses/DxilPIXMeshShaderOutputInstrumentation.cpp

@@ -28,6 +28,8 @@
 #include <winerror.h>
 #endif
 
+#include "PixPassHelpers.h"
+
 // Keep these in sync with the same-named value in the debugger application's
 // WinPixShaderUtils.h
 
@@ -72,7 +74,6 @@ private:
     IRBuilder<> &Builder;
   };
 
-  CallInst *addUAV(BuilderContext &BC);
   Value *insertInstructionsToCalculateFlattenedGroupIdXandY(BuilderContext &BC);
   Value *insertInstructionsToCalculateGroupIdZ(BuilderContext &BC);
   Value *reserveDebugEntrySpace(BuilderContext &BC, uint32_t SpaceInBytes);
@@ -92,53 +93,6 @@ uint32_t DxilPIXMeshShaderOutputInstrumentation::UAVDumpingGroundOffset()
   return static_cast<uint32_t>(m_UAVSize - DebugBufferDumpingGroundSize);
 }
 
-CallInst *DxilPIXMeshShaderOutputInstrumentation::addUAV(BuilderContext &BC) 
-{
-  // Set up a UAV with structure of a single int
-  unsigned int UAVResourceHandle =
-      static_cast<unsigned int>(BC.DM.GetUAVs().size());
-  SmallVector<llvm::Type *, 1> Elements{Type::getInt32Ty(BC.Ctx)};
-  llvm::StructType *UAVStructTy =
-      llvm::StructType::create(Elements, "PIX_DebugUAV_Type");
-  std::unique_ptr<DxilResource> pUAV = llvm::make_unique<DxilResource>();
-  pUAV->SetGlobalName("PIX_DebugUAVName");
-  pUAV->SetGlobalSymbol(UndefValue::get(UAVStructTy->getPointerTo()));
-  pUAV->SetID(UAVResourceHandle);
-  pUAV->SetSpaceID(
-      (unsigned int)-2); // This is the reserved-for-tools register space
-  pUAV->SetSampleCount(1);
-  pUAV->SetGloballyCoherent(false);
-  pUAV->SetHasCounter(false);
-  pUAV->SetCompType(CompType::getI32());
-  pUAV->SetLowerBound(0);
-  pUAV->SetRangeSize(1);
-  pUAV->SetKind(DXIL::ResourceKind::RawBuffer);
-  pUAV->SetRW(true);
-
-  auto ID = BC.DM.AddUAV(std::move(pUAV));
-  assert(ID == UAVResourceHandle);
-
-  BC.DM.m_ShaderFlags.SetEnableRawAndStructuredBuffers(true);
-
-  // Create handle for the newly-added UAV
-  Function *CreateHandleOpFunc =
-      BC.HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(BC.Ctx));
-  Constant *CreateHandleOpcodeArg =
-      BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::CreateHandle);
-  Constant *UAVVArg = BC.HlslOP->GetI8Const(
-      static_cast<std::underlying_type<DxilResourceBase::Class>::type>(
-          DXIL::ResourceClass::UAV));
-  Constant *MetaDataArg = BC.HlslOP->GetU32Const(
-      ID); // position of the metadata record in the corresponding metadata list
-  Constant *IndexArg = BC.HlslOP->GetU32Const(0); //
-  Constant *FalseArg =
-      BC.HlslOP->GetI1Const(0); // non-uniform resource index: false
-  return BC.Builder.CreateCall(
-      CreateHandleOpFunc,
-      {CreateHandleOpcodeArg, UAVVArg, MetaDataArg, IndexArg, FalseArg},
-      "PIX_DebugUAV_Handle");
-}
-
 Value *DxilPIXMeshShaderOutputInstrumentation::
     insertInstructionsToCalculateFlattenedGroupIdXandY(BuilderContext &BC)
 {
@@ -275,7 +229,7 @@ bool DxilPIXMeshShaderOutputInstrumentation::runOnModule(Module &M)
 
   m_OffsetMask = BC.HlslOP->GetU32Const(UAVDumpingGroundOffset() - 1);
 
-  m_OutputUAV = addUAV(BC);
+  m_OutputUAV = PIXPassHelpers::CreateUAV(DM, Builder, 0, "PIX_DebugUAV_Handle");
 
   auto GroupIdXandY = insertInstructionsToCalculateFlattenedGroupIdXandY(BC);
   auto GroupIdZ = insertInstructionsToCalculateGroupIdZ(BC);

+ 333 - 138
lib/DxilPIXPasses/DxilShaderAccessTracking.cpp

@@ -14,6 +14,8 @@
 
 #include "dxc/DXIL/DxilInstructions.h"
 #include "dxc/DXIL/DxilModule.h"
+#include "dxc/DXIL/DxilResourceBinding.h"
+#include "dxc/DXIL/DxilResourceProperties.h"
 #include "dxc/DxilPIXPasses/DxilPIXPasses.h"
 #include "dxc/HLSL/DxilGenerationPass.h"
 #include "dxc/HLSL/DxilSpanAllocator.h"
@@ -23,6 +25,8 @@
 #include "llvm/Transforms/Utils/Local.h"
 #include <deque>
 
+#include "PixPassHelpers.h"
+
 #ifdef _WIN32
 #include <winerror.h>
 #endif
@@ -48,6 +52,8 @@ enum class ShaderAccessFlags : uint32_t {
   // attached to the UAV was accessed, but not necessarily the UAV resource.
   Counter = 1 << 2,
 
+  Sampler = 1 << 3,
+
   // Descriptor-only read (if any), but not the resource contents (if any).
   // Used for GetDimensions, samplers, and secondary texture for sampler
   // feedback.
@@ -141,10 +147,24 @@ struct SlotRange {
   unsigned numInvariableSlots;
 };
 
+enum class AccessStyle { None, FromRootSig, ResourceFromDescriptorHeap, SamplerFromDescriptorHeap };
 struct DxilResourceAndClass {
-  DxilResourceBase *resource;
+  AccessStyle accessStyle;
+  RegisterType registerType;
+  int RegisterSpace;
+  unsigned RegisterID;
   Value *index;
-  DXIL::ResourceClass resClass;
+  Value *dynamicallyBoundIndex;
+};
+
+enum class ResourceAccessStyle {
+  None,
+  Sampler,
+  UAVRead,
+  UAVWrite,
+  CBVRead,
+  SRVRead,
+  EndOfEnum
 };
 
 //---------------------------------------------------------------------------------------------------------------------------------
@@ -165,11 +185,23 @@ private:
   bool EmitResourceAccess(DxilResourceAndClass &res, Instruction *instruction,
                           OP *HlslOP, LLVMContext &Ctx,
                           ShaderAccessFlags readWrite);
+  DxilResourceAndClass GetResourceFromHandle(Value* resHandle, DxilModule& DM);
 
 private:
+  struct DynamicResourceBinding {
+    int HeapIndex;
+    bool HeapIsSampler; // else resource
+    std::string Name;
+  };
+
+  std::vector<DynamicResourceBinding> m_dynamicResourceBindings;
   bool m_CheckForDynamicIndexing = false;
+  int m_DynamicResourceDataOffset = -1;
+  int m_DynamicSamplerDataOffset = -1;
+  int m_OutputBufferSize = -1;
   std::map<RegisterTypeAndSpace, SlotRange> m_slotAssignments;
   std::map<llvm::Function *, CallInst *> m_FunctionToUAVHandle;
+  std::map<llvm::Function *, std::map<ResourceAccessStyle, Constant *>> m_FunctionToEncodedAccess;
   std::set<RSRegisterIdentifier> m_DynamicallyIndexedBindPoints;
 };
 
@@ -264,6 +296,11 @@ void DxilShaderAccessTracking::applyOptions(PassOptions O) {
 
       rt = ParseRegisterType(config);
     }
+    m_DynamicResourceDataOffset = DeserializeInt(config);
+    ValidateDelimiter(config, ';');
+    m_DynamicSamplerDataOffset = DeserializeInt(config);
+    ValidateDelimiter(config, ';');
+    m_OutputBufferSize = DeserializeInt(config);
   }
 }
 
@@ -302,110 +339,301 @@ void DxilShaderAccessTracking::EmitAccess(LLVMContext &Ctx, OP *HlslOP,
       });
 }
 
+static ResourceAccessStyle AccessStyleFromAccessAndType(
+    AccessStyle accessStyle, 
+    RegisterType registerType,
+    ShaderAccessFlags readWrite)
+{
+    switch (accessStyle)
+    {
+    case AccessStyle::ResourceFromDescriptorHeap:
+        switch (registerType)
+        {
+        case RegisterType::CBV:
+          return ResourceAccessStyle::CBVRead;
+        case RegisterType::SRV:
+          return ResourceAccessStyle::SRVRead;
+        case RegisterType::UAV:
+            return readWrite == ShaderAccessFlags::Read ?
+                ResourceAccessStyle::UAVRead :
+                ResourceAccessStyle::UAVWrite;
+        default:
+          return ResourceAccessStyle::None;
+        }
+    case AccessStyle::SamplerFromDescriptorHeap:
+        return ResourceAccessStyle::Sampler;
+    default:
+        return ResourceAccessStyle::None;
+    }
+}
+
 bool DxilShaderAccessTracking::EmitResourceAccess(DxilResourceAndClass &res,
                                                   Instruction *instruction,
                                                   OP *HlslOP, LLVMContext &Ctx,
                                                   ShaderAccessFlags readWrite) {
-
-  RegisterTypeAndSpace typeAndSpace{RegisterTypeFromResourceClass(res.resClass),
-                                    res.resource->GetSpaceID()};
-
-  auto slot = m_slotAssignments.find(typeAndSpace);
-  // If the assignment isn't found, we assume it's not accessed
-  if (slot != m_slotAssignments.end()) {
-
-    IRBuilder<> Builder(instruction);
-    Value *slotIndex;
-
-    if (isa<ConstantInt>(res.index)) {
-      unsigned index = cast<ConstantInt>(res.index)->getLimitedValue();
-      if (index > slot->second.numSlots) {
-        // out-of-range accesses are written to slot zero:
-        slotIndex = HlslOP->GetU32Const(0);
+  IRBuilder<> Builder(instruction);
+  
+  if (res.accessStyle == AccessStyle::FromRootSig) {
+    RegisterTypeAndSpace typeAndSpace{
+        res.registerType, 
+        static_cast<unsigned>(res.RegisterSpace) // reserved spaces are -ve, but user spaces can only be +ve
+    };
+
+    auto slot = m_slotAssignments.find(typeAndSpace);
+    // If the assignment isn't found, we assume it's not accessed
+    if (slot != m_slotAssignments.end()) {
+
+        Value *slotIndex;
+    
+      if (isa<ConstantInt>(res.index)) {
+        unsigned index = cast<ConstantInt>(res.index)->getLimitedValue();
+        if (index > slot->second.numSlots) {
+          // out-of-range accesses are written to slot zero:
+          slotIndex = HlslOP->GetU32Const(0);
+        } else {
+          slotIndex = HlslOP->GetU32Const((slot->second.startSlot + index) *
+                                          DWORDsPerResource * BytesPerDWORD);
+        }
       } else {
-        slotIndex = HlslOP->GetU32Const((slot->second.startSlot + index) *
-                                        DWORDsPerResource * BytesPerDWORD);
+        RSRegisterIdentifier id{typeAndSpace.Type, typeAndSpace.Space,
+                                res.RegisterID};
+        m_DynamicallyIndexedBindPoints.emplace(std::move(id));
+    
+        // CompareWithSlotLimit will contain 1 if the access is out-of-bounds
+        // (both over- and and under-flow via the unsigned >= with slot count)
+        auto CompareWithSlotLimit = Builder.CreateICmpUGE(
+            res.index, HlslOP->GetU32Const(slot->second.numSlots),
+            "CompareWithSlotLimit");
+        auto CompareWithSlotLimitAsUint = Builder.CreateCast(
+            Instruction::CastOps::ZExt, CompareWithSlotLimit,
+            Type::getInt32Ty(Ctx), "CompareWithSlotLimitAsUint");
+    
+        // IsInBounds will therefore contain 0 if the access is out-of-bounds, and
+        // 1 otherwise.
+        auto IsInBounds = Builder.CreateSub(
+            HlslOP->GetU32Const(1), CompareWithSlotLimitAsUint, "IsInBounds");
+    
+        auto SlotDwordOffset = Builder.CreateAdd(
+            res.index, HlslOP->GetU32Const(slot->second.startSlot),
+            "SlotDwordOffset");
+        auto SlotByteOffset = Builder.CreateMul(
+            SlotDwordOffset,
+            HlslOP->GetU32Const(DWORDsPerResource * BytesPerDWORD),
+            "SlotByteOffset");
+    
+        // This will drive an out-of-bounds access slot down to 0
+        slotIndex = Builder.CreateMul(SlotByteOffset, IsInBounds, "slotIndex");
       }
-    } else {
-      RSRegisterIdentifier id{typeAndSpace.Type, typeAndSpace.Space,
-                              res.resource->GetID()};
-      m_DynamicallyIndexedBindPoints.emplace(std::move(id));
-
-      // CompareWithSlotLimit will contain 1 if the access is out-of-bounds
-      // (both over- and and under-flow via the unsigned >= with slot count)
-      auto CompareWithSlotLimit = Builder.CreateICmpUGE(
-          res.index, HlslOP->GetU32Const(slot->second.numSlots),
-          "CompareWithSlotLimit");
-      auto CompareWithSlotLimitAsUint = Builder.CreateCast(
-          Instruction::CastOps::ZExt, CompareWithSlotLimit,
-          Type::getInt32Ty(Ctx), "CompareWithSlotLimitAsUint");
-
-      // IsInBounds will therefore contain 0 if the access is out-of-bounds, and
-      // 1 otherwise.
-      auto IsInBounds = Builder.CreateSub(
-          HlslOP->GetU32Const(1), CompareWithSlotLimitAsUint, "IsInBounds");
-
-      auto SlotDwordOffset = Builder.CreateAdd(
-          res.index, HlslOP->GetU32Const(slot->second.startSlot),
-          "SlotDwordOffset");
-      auto SlotByteOffset = Builder.CreateMul(
-          SlotDwordOffset,
-          HlslOP->GetU32Const(DWORDsPerResource * BytesPerDWORD),
-          "SlotByteOffset");
-
-      // This will drive an out-of-bounds access slot down to 0
-      slotIndex = Builder.CreateMul(SlotByteOffset, IsInBounds, "slotIndex");
+    
+      EmitAccess(Ctx, HlslOP, Builder, slotIndex, readWrite);
+    
+      return true; // did modify
     }
+  }
+  else if (m_DynamicResourceDataOffset != -1) {
+      if (res.accessStyle == AccessStyle::ResourceFromDescriptorHeap ||
+          res.accessStyle == AccessStyle::SamplerFromDescriptorHeap)
+      {
+          Constant* BaseOfRecordsForType;
+          int LimitForType;
+          if (res.accessStyle == AccessStyle::ResourceFromDescriptorHeap) {
+              LimitForType = m_DynamicSamplerDataOffset - m_DynamicResourceDataOffset;
+              BaseOfRecordsForType =
+                  HlslOP->GetU32Const(m_DynamicResourceDataOffset);
+          } else {
+              LimitForType = m_OutputBufferSize - m_DynamicSamplerDataOffset;
+              BaseOfRecordsForType =
+                HlslOP->GetU32Const(m_DynamicSamplerDataOffset);
+          }
 
-    EmitAccess(Ctx, HlslOP, Builder, slotIndex, readWrite);
+          // Branchless limit: compare offset to size of data reserved for that type,
+          // resulting in a value of 0 or 1.
+          // Extend that 0/1 to an integer, and multiply the offset by that value.
+          // Result: expected offset, or 0 if too large.
+
+          // Add 1 to the index in order to skip over the zeroth entry: that's 
+          // reserved for "out of bounds" writes.
+          auto *IndexToWrite =
+              Builder.CreateAdd(res.dynamicallyBoundIndex, HlslOP->GetU32Const(1));
+
+          // Each record is two dwords:
+          // the first dword is for write access, the second for read.
+          Constant *SizeofRecord =
+              HlslOP->GetU32Const(2 * static_cast<unsigned int>(sizeof(uint32_t)));
+          auto *BaseOfRecord =
+              Builder.CreateMul(IndexToWrite, SizeofRecord);
+          Value* OffsetToWrite;
+          if (readWrite == ShaderAccessFlags::Write) {
+            OffsetToWrite = BaseOfRecord;
+          }
+          else {
+            OffsetToWrite = Builder.CreateAdd(BaseOfRecord, 
+                HlslOP->GetU32Const(static_cast<unsigned int>(sizeof(uint32_t))));
+          }
 
-    return true; // did modify
+          // Generate the 0 (out of bounds) or 1 (in-bounds) multiplier:
+          Constant *BufferLimit = HlslOP->GetU32Const(LimitForType);
+          auto *LimitBoolean =
+              Builder.CreateICmpULT(OffsetToWrite, BufferLimit);
+          
+          auto * LimitIntegerValue = Builder.CreateCast(
+              Instruction::CastOps::ZExt, LimitBoolean,
+              Type::getInt32Ty(Ctx));
+          
+          // Limit the offset to the out-of-bounds record if the above generated 0,
+          // or leave it as-is if the above generated 1:
+          auto *LimitedOffset = Builder.CreateMul(OffsetToWrite, LimitIntegerValue);
+          
+          // Offset into the range of records for this type of access (resource or sampler)
+          auto* Offset = Builder.CreateAdd(BaseOfRecordsForType, LimitedOffset);
+
+          ResourceAccessStyle accessStyle = AccessStyleFromAccessAndType(
+              res.accessStyle, 
+              res.registerType,
+              readWrite);
+
+          Constant* EncodedFlags = m_FunctionToEncodedAccess
+                                .at(Builder.GetInsertBlock()->getParent())
+                                .at(accessStyle);
+
+          Constant *ElementMask = HlslOP->GetI8Const(1);
+          Function *StoreFunc =
+              HlslOP->GetOpFunc(OP::OpCode::BufferStore, Type::getInt32Ty(Ctx));
+          Constant *StoreOpcode =
+              HlslOP->GetU32Const((unsigned)OP::OpCode::BufferStore);
+          UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(Ctx));
+          (void)Builder.CreateCall(
+              StoreFunc,
+              {
+                  StoreOpcode,                  // i32, ; opcode
+                  m_FunctionToUAVHandle.at(
+                      Builder.GetInsertBlock()
+                          ->getParent()),       // %dx.types.Handle, ; resource handle
+                  Offset,                // i32, ; coordinate c0: byte offset
+                  UndefArg,                     // i32, ; coordinate c1 (unused)
+                  EncodedFlags,                 // i32, ; value v0
+                  UndefArg,                     // i32, ; value v1
+                  UndefArg,                     // i32, ; value v2
+                  UndefArg,                     // i32, ; value v3
+                  ElementMask                   // i8 ; just the first value is used
+              });
+          return true; // did modify
+      }
   }
+
   return false; // did not modify
 }
 
-DxilResourceAndClass GetResourceFromHandle(Value *resHandle, DxilModule &DM) {
+DxilResourceAndClass 
+DxilShaderAccessTracking::GetResourceFromHandle(Value *resHandle,
+                                                DxilModule &DM) {
 
-  DxilResourceAndClass ret{nullptr, nullptr, DXIL::ResourceClass::Invalid};
+  DxilResourceAndClass ret{
+      AccessStyle::None, 
+      RegisterType::Terminator,
+      0,
+      0,
+      nullptr,
+      nullptr};
 
   CallInst *handle = cast<CallInst>(resHandle);
-  DxilInst_CreateHandle createHandle(handle);
 
-  // Dynamic rangeId is not supported - skip and let validation report the
-  // error.
-  if (!isa<ConstantInt>(createHandle.get_rangeId()))
-    return ret;
+  unsigned rangeId = -1;
 
-  unsigned rangeId =
-      cast<ConstantInt>(createHandle.get_rangeId())->getLimitedValue();
+  if (hlsl::OP::IsDxilOpFuncCallInst(handle, hlsl::OP::OpCode::CreateHandle))
+  {
+    DxilInst_CreateHandle createHandle(handle);
 
-  auto resClass =
-      static_cast<DXIL::ResourceClass>(createHandle.get_resourceClass_val());
+    // Dynamic rangeId is not supported - skip and let validation report the
+    // error.
+    if (isa<ConstantInt>(createHandle.get_rangeId())) {
+        rangeId = cast<ConstantInt>(createHandle.get_rangeId())->getLimitedValue();
 
-  switch (resClass) {
-  case DXIL::ResourceClass::SRV:
-    ret.resource = &DM.GetSRV(rangeId);
-    break;
-  case DXIL::ResourceClass::UAV:
-    ret.resource = &DM.GetUAV(rangeId);
-    break;
-  case DXIL::ResourceClass::CBuffer:
-    ret.resource = &DM.GetCBuffer(rangeId);
-    break;
-  case DXIL::ResourceClass::Sampler:
-    ret.resource = &DM.GetSampler(rangeId);
-    break;
-  default:
-    DXASSERT(0, "invalid res class");
-    return ret;
-  }
+        auto resClass = static_cast<DXIL::ResourceClass>(createHandle.get_resourceClass_val());
 
-  ret.index = createHandle.get_index();
-  ret.resClass = resClass;
+        DxilResourceBase* resource = nullptr;
+        RegisterType registerType = RegisterType::Invalid;
+        switch (resClass) {
+        case DXIL::ResourceClass::SRV:
+            resource = &DM.GetSRV(rangeId);
+            registerType = RegisterType::SRV;
+            break;
+        case DXIL::ResourceClass::UAV:
+            resource = &DM.GetUAV(rangeId);
+          registerType = RegisterType::UAV;
+          break;
+        case DXIL::ResourceClass::CBuffer:
+            resource = &DM.GetCBuffer(rangeId);
+            registerType = RegisterType::CBV;
+            break;
+        case DXIL::ResourceClass::Sampler:
+            resource = &DM.GetSampler(rangeId);
+            registerType = RegisterType::Sampler;
+            break;
+        }
+        if (resource != nullptr) {
+            ret.index = createHandle.get_index();
+            ret.registerType = registerType;
+            ret.accessStyle = AccessStyle::FromRootSig;
+            ret.RegisterID = resource->GetID();
+            ret.RegisterSpace = resource->GetSpaceID();
+        }
+    }
+  } else if (hlsl::OP::IsDxilOpFuncCallInst(handle, hlsl::OP::OpCode::AnnotateHandle)) {
+      DxilInst_AnnotateHandle annotateHandle(handle);
+      auto properties = hlsl::resource_helper::loadPropsFromAnnotateHandle(
+          annotateHandle, *DM.GetShaderModel());
+
+      auto* handleCreation = cast<CallInst>(annotateHandle.get_res());
+
+      if (hlsl::OP::IsDxilOpFuncCallInst(handleCreation, hlsl::OP::OpCode::CreateHandleFromBinding)) {
+          DxilInst_CreateHandleFromBinding createHandleFromBinding(handleCreation);
+          Constant* B = cast<Constant>(createHandleFromBinding.get_bind());
+          auto binding = hlsl::resource_helper::loadBindingFromConstant(*B);
+          ret.accessStyle = AccessStyle::FromRootSig;
+          ret.index = createHandleFromBinding.get_index();
+          ret.registerType = RegisterTypeFromResourceClass(
+              static_cast<hlsl::DXIL::ResourceClass>(binding.resourceClass));
+          ret.RegisterSpace = binding.spaceID;
+      } else if (hlsl::OP::IsDxilOpFuncCallInst(handleCreation, hlsl::OP::OpCode::CreateHandleFromHeap)) {
+          DxilInst_CreateHandleFromHeap createHandleFromHeap(handleCreation);
+          ret.accessStyle = createHandleFromHeap.get_samplerHeap_val()
+              ? AccessStyle::SamplerFromDescriptorHeap : AccessStyle::ResourceFromDescriptorHeap;
+          ret.dynamicallyBoundIndex = createHandleFromHeap.get_index();
+
+          ret.registerType = RegisterTypeFromResourceClass(properties.getResourceClass());
+
+          DynamicResourceBinding drb{};
+          drb.HeapIsSampler = createHandleFromHeap.get_samplerHeap_val();
+          drb.HeapIndex = -1;
+          drb.Name = "ShaderNameTodo";
+          if (auto * constInt = dyn_cast<ConstantInt>(createHandleFromHeap.get_index()))
+          {
+              drb.HeapIndex = constInt->getLimitedValue();
+          }
+          m_dynamicResourceBindings.emplace_back(std::move(drb));
+
+          return ret;
+      } else {
+          DXASSERT_NOMSG(false);
+      }
+  }
 
   return ret;
 }
 
+static uint32_t EncodeShaderModel(DXIL::ShaderKind kind)
+{
+    DXASSERT_NOMSG(static_cast<int>(DXIL::ShaderKind::Invalid) <= 16);
+    return static_cast<uint32_t>(kind) << 28;
+}
+
+static uint32_t EncodeAccess(ResourceAccessStyle access) {
+    uint32_t encoded = static_cast<uint32_t>(access);
+    DXASSERT_NOMSG(encoded < 8);
+    return encoded << 24;
+}
+
 bool DxilShaderAccessTracking::runOnModule(Module &M) {
   // This pass adds instrumentation for shader access to resources
 
@@ -447,66 +675,24 @@ bool DxilShaderAccessTracking::runOnModule(Module &M) {
           FOS << "ShouldAssumeDsvAccess";
         }
       }
-
+      int uavRegId = 0;
       for (llvm::Function &F : M.functions()) {
         if (!F.getBasicBlockList().empty()) {
           IRBuilder<> Builder(F.getEntryBlock().getFirstInsertionPt());
 
-          unsigned int UAVResourceHandle =
-              static_cast<unsigned int>(DM.GetUAVs().size());
-
-          // Set up a UAV with structure of a single int
-          SmallVector<llvm::Type *, 1> Elements{Type::getInt32Ty(Ctx)};
-          llvm::StructType *UAVStructTy =
-              llvm::StructType::create(Elements, "class.RWStructuredBuffer");
-          std::unique_ptr<DxilResource> pUAV =
-              llvm::make_unique<DxilResource>();
-          pUAV->SetGlobalName("PIX_CountUAVName");
-          pUAV->SetGlobalSymbol(UndefValue::get(UAVStructTy->getPointerTo()));
-          pUAV->SetID(UAVResourceHandle);
-          pUAV->SetSpaceID((
-              unsigned int)-2); // This is the reserved-for-tools register space
-          pUAV->SetSampleCount(1);
-          pUAV->SetGloballyCoherent(false);
-          pUAV->SetHasCounter(false);
-          pUAV->SetCompType(CompType::getI32());
-          pUAV->SetLowerBound(0);
-          pUAV->SetRangeSize(1);
-          pUAV->SetKind(DXIL::ResourceKind::RawBuffer);
-
-          auto pAnnotation =
-              DM.GetTypeSystem().GetStructAnnotation(UAVStructTy);
-          if (pAnnotation == nullptr) {
-
-            pAnnotation = DM.GetTypeSystem().AddStructAnnotation(UAVStructTy);
-            pAnnotation->GetFieldAnnotation(0).SetCBufferOffset(0);
-            pAnnotation->GetFieldAnnotation(0).SetCompType(
-                hlsl::DXIL::ComponentType::I32);
-            pAnnotation->GetFieldAnnotation(0).SetFieldName("count");
+          m_FunctionToUAVHandle[&F] = PIXPassHelpers::CreateUAV(DM, Builder, uavRegId++, "PIX_CountUAV_Handle");
+          auto const* shaderModel = DM.GetShaderModel();
+          auto shaderKind = shaderModel->GetKind();
+          OP *HlslOP = DM.GetOP();
+          for (int accessStyle = 1;
+              accessStyle < static_cast<int>(ResourceAccessStyle::EndOfEnum);
+              ++accessStyle)
+          {
+              ResourceAccessStyle style = static_cast<ResourceAccessStyle>(accessStyle);
+              m_FunctionToEncodedAccess[&F][style] =
+                  HlslOP->GetU32Const(EncodeShaderModel(shaderKind) |
+                      EncodeAccess(style));
           }
-
-          ID = DM.AddUAV(std::move(pUAV));
-
-          assert((unsigned)ID == UAVResourceHandle);
-
-          // Create handle for the newly-added UAV
-          Function *CreateHandleOpFunc = HlslOP->GetOpFunc(
-              DXIL::OpCode::CreateHandle, Type::getVoidTy(Ctx));
-          Constant *CreateHandleOpcodeArg =
-              HlslOP->GetU32Const((unsigned)DXIL::OpCode::CreateHandle);
-          Constant *UAVArg = HlslOP->GetI8Const(
-              static_cast<std::underlying_type<DxilResourceBase::Class>::type>(
-                  DXIL::ResourceClass::UAV));
-          Constant *MetaDataArg =
-              HlslOP->GetU32Const(ID); // position of the metadata record in the
-                                       // corresponding metadata list
-          Constant *IndexArg = HlslOP->GetU32Const(0); //
-          Constant *FalseArg =
-              HlslOP->GetI1Const(0); // non-uniform resource index: false
-          m_FunctionToUAVHandle[&F] = Builder.CreateCall(
-              CreateHandleOpFunc,
-              {CreateHandleOpcodeArg, UAVArg, MetaDataArg, IndexArg, FalseArg},
-              "PIX_CountUAV_Handle");
         }
       }
       DM.ReEmitDxilResources();
@@ -576,9 +762,11 @@ bool DxilShaderAccessTracking::runOnModule(Module &M) {
 
         for (unsigned iParam : handleParams) {
           auto res = GetResourceFromHandle(Call->getArgOperand(iParam), DM);
+          if (res.accessStyle == AccessStyle::None) {
+            continue;
+          }
           // Don't instrument the accesses to the UAV that we just added
-          if (res.resClass == DXIL::ResourceClass::UAV &&
-              res.resource->GetSpaceID() == (unsigned)-2) {
+          if (res.RegisterSpace  == -2) {
             break;
           }
           if (EmitResourceAccess(res, Call, HlslOP, Ctx, readWrite)) {
@@ -598,6 +786,13 @@ bool DxilShaderAccessTracking::runOnModule(Module &M) {
             << ';';
       }
       FOS << ".";
+
+      // todo: this will reflect dynamic resource names when the metadata exists
+      FOS << "DynamicallyBoundResources=";
+      for (auto const &drb : m_dynamicResourceBindings) {
+        FOS << (drb.HeapIsSampler ? 'S' : 'R') << drb.HeapIndex << ';';
+      }
+      FOS << ".";
     }
   }
 

+ 146 - 8
lib/DxilPIXPasses/PixPassHelpers.cpp

@@ -8,15 +8,153 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 #include "dxc/DXIL/DxilOperations.h"
+#include "dxc/DXIL/DxilInstructions.h"
+#include "dxc/DXIL/DxilModule.h"
+#include "dxc/DXIL/DxilResourceBinding.h"
+#include "dxc/DXIL/DxilResourceProperties.h"
+#include "dxc/HLSL/DxilSpanAllocator.h"
+
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
 #include "llvm/Pass.h"
 
-namespace PIXPassHelpers
-{
-    bool IsAllocateRayQueryInstruction(llvm::Value* Val) {
-        if (llvm::Instruction* Inst = llvm::dyn_cast<llvm::Instruction>(Val)) {
-            return hlsl::OP::IsDxilOpFuncCallInst(Inst, hlsl::OP::OpCode::AllocateRayQuery);
-        }
-        return false;
+using namespace llvm;
+using namespace hlsl;
+
+namespace PIXPassHelpers {
+bool IsAllocateRayQueryInstruction(llvm::Value *Val) {
+  if (Val != nullptr) {
+    if (llvm::Instruction *Inst = llvm::dyn_cast<llvm::Instruction>(Val)) {
+      return hlsl::OP::IsDxilOpFuncCallInst(Inst,
+                                            hlsl::OP::OpCode::AllocateRayQuery);
     }
-}
+  }
+  return false;
+}
+
+static unsigned int
+GetNextRegisterIdForClass(hlsl::DxilModule &DM,
+                          DXIL::ResourceClass resourceClass) {
+  switch (resourceClass) {
+  case DXIL::ResourceClass::CBuffer:
+    return static_cast<unsigned int>(DM.GetCBuffers().size());
+  case DXIL::ResourceClass::UAV:
+    return static_cast<unsigned int>(DM.GetUAVs().size());
+  default:
+    DXASSERT(false, "Unexpected resource class");
+    return 0;
+  }
+}
+
+static bool IsDynamicResourceShaderModel(DxilModule &DM) {
+  return DM.GetShaderModel()->IsSMAtLeast(6, 6);
+}
+
+llvm::CallInst *CreateHandleForResource(hlsl::DxilModule &DM,
+                                        llvm::IRBuilder<> &Builder,
+                                        hlsl::DxilResourceBase *resource,
+                                        const char *name) {
+
+  OP *HlslOP = DM.GetOP();
+  LLVMContext &Ctx = DM.GetModule()->getContext();
+
+  DXIL::ResourceClass resourceClass = resource->GetClass();
+
+  unsigned int resourceMetaDataId =
+      GetNextRegisterIdForClass(DM, resourceClass);
+
+  // Create handle for the newly-added resource
+  if (IsDynamicResourceShaderModel(DM)) {
+    Function *CreateHandleFromBindingOpFunc = HlslOP->GetOpFunc(
+        DXIL::OpCode::CreateHandleFromBinding, Type::getVoidTy(Ctx));
+    Constant *CreateHandleFromBindingOpcodeArg =
+        HlslOP->GetU32Const((unsigned)DXIL::OpCode::CreateHandleFromBinding);
+    DxilResourceBinding binding =
+        resource_helper::loadBindingFromResourceBase(resource);
+    Value *bindingV = resource_helper::getAsConstant(
+        binding, HlslOP->GetResourceBindingType(), *DM.GetShaderModel());
+
+    Value *registerIndex = HlslOP->GetU32Const(resourceMetaDataId);
+
+    Value *isUniformRes = HlslOP->GetI1Const(0);
+
+    Value *createHandleFromBindingArgs[] = {CreateHandleFromBindingOpcodeArg,
+                                            bindingV, registerIndex,
+                                            isUniformRes};
+
+    auto *handle = Builder.CreateCall(CreateHandleFromBindingOpFunc,
+                                      createHandleFromBindingArgs, name);
+
+    Function *annotHandleFn =
+        HlslOP->GetOpFunc(DXIL::OpCode::AnnotateHandle, Type::getVoidTy(Ctx));
+    Value *annotHandleArg =
+        HlslOP->GetI32Const((unsigned)DXIL::OpCode::AnnotateHandle);
+    DxilResourceProperties RP =
+        resource_helper::loadPropsFromResourceBase(resource);
+    Type *resPropertyTy = HlslOP->GetResourcePropertiesType();
+    Value *propertiesV =
+        resource_helper::getAsConstant(RP, resPropertyTy, *DM.GetShaderModel());
+
+    return Builder.CreateCall(annotHandleFn,
+                              {annotHandleArg, handle, propertiesV});
+  } else {
+    Function *CreateHandleOpFunc =
+        HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(Ctx));
+    Constant *CreateHandleOpcodeArg =
+        HlslOP->GetU32Const((unsigned)DXIL::OpCode::CreateHandle);
+    Constant *ClassArg = HlslOP->GetI8Const(
+        static_cast<std::underlying_type<DxilResourceBase::Class>::type>(
+            resourceClass));
+    Constant *MetaDataArg = HlslOP->GetU32Const(
+        resourceMetaDataId); // position of the metadata record in the
+                             // corresponding metadata list
+    Constant *IndexArg = HlslOP->GetU32Const(0); //
+    Constant *FalseArg =
+        HlslOP->GetI1Const(0); // non-uniform resource index: false
+    return Builder.CreateCall(
+        CreateHandleOpFunc,
+        {CreateHandleOpcodeArg, ClassArg, MetaDataArg, IndexArg, FalseArg}, name);
+  }
+}
+
+// Set up a UAV with structure of a single int
+llvm::CallInst *CreateUAV(DxilModule &DM, IRBuilder<> &Builder,
+                          unsigned int registerId, const char *name) {
+  LLVMContext &Ctx = DM.GetModule()->getContext();
+
+  SmallVector<llvm::Type *, 1> Elements{Type::getInt32Ty(Ctx)};
+  llvm::StructType *UAVStructTy =
+      llvm::StructType::create(Elements, "class.RWStructuredBuffer");
+  std::unique_ptr<DxilResource> pUAV = llvm::make_unique<DxilResource>();
+  pUAV->SetGlobalName(name);
+  pUAV->SetGlobalSymbol(UndefValue::get(UAVStructTy->getPointerTo()));
+  pUAV->SetID(GetNextRegisterIdForClass(DM, DXIL::ResourceClass::UAV));
+  pUAV->SetRW(true); // sets UAV class
+  pUAV->SetSpaceID(
+      (unsigned int)-2); // This is the reserved-for-tools register space
+  pUAV->SetSampleCount(1);
+  pUAV->SetGloballyCoherent(false);
+  pUAV->SetHasCounter(false);
+  pUAV->SetCompType(CompType::getI32());
+  pUAV->SetLowerBound(0);
+  pUAV->SetRangeSize(1);
+  pUAV->SetKind(DXIL::ResourceKind::RawBuffer);
+
+  auto pAnnotation = DM.GetTypeSystem().GetStructAnnotation(UAVStructTy);
+  if (pAnnotation == nullptr) {
+
+    pAnnotation = DM.GetTypeSystem().AddStructAnnotation(UAVStructTy);
+    pAnnotation->GetFieldAnnotation(0).SetCBufferOffset(0);
+    pAnnotation->GetFieldAnnotation(0).SetCompType(
+        hlsl::DXIL::ComponentType::I32);
+    pAnnotation->GetFieldAnnotation(0).SetFieldName("count");
+  }
+
+  auto *handle = CreateHandleForResource(DM, Builder, pUAV.get(), name);
+
+  DM.AddUAV(std::move(pUAV));
+
+  return handle;
+}
+} // namespace PIXPassHelpers

+ 5 - 0
lib/DxilPIXPasses/PixPassHelpers.h

@@ -12,4 +12,9 @@
 namespace PIXPassHelpers
 {
 	bool IsAllocateRayQueryInstruction(llvm::Value* Val);
+    llvm::CallInst* CreateUAV(hlsl::DxilModule& DM, llvm::IRBuilder<>& Builder,
+                                  unsigned int registerId, const char *name);
+    llvm::CallInst* CreateHandleForResource(hlsl::DxilModule& DM, llvm::IRBuilder<>& Builder,
+        hlsl::DxilResourceBase * resource,
+        const char* name);
 }

+ 1 - 1
lib/HLSL/DxilCondenseResources.cpp

@@ -560,7 +560,7 @@ public:
 
     // Load up debug information, to cross-reference values and the instructions
     // used to load them.
-    m_HasDbgInfo = getDebugMetadataVersionFromModule(M) != 0;
+    m_HasDbgInfo = hasDebugInfo(M);
 
     GenerateDxilResourceHandles();
 

+ 3 - 1
lib/HLSL/DxilContainerReflection.cpp

@@ -1631,7 +1631,9 @@ void DxilModuleReflection::CreateReflectionObjectForResource(DxilResourceBase *R
     if (inputBind.NumSamples == 0) {
       if (R->IsStructuredBuffer()) {
         inputBind.NumSamples = CalcResTypeSize(*m_pDxilModule, *R);
-      } else if (!R->IsRawBuffer() && !R->IsTBuffer()) {
+      } else if (!R->IsRawBuffer() && !R->IsTBuffer() &&
+                 R->GetKind() != DXIL::ResourceKind::Texture2DMS &&
+                 R->GetKind() != DXIL::ResourceKind::Texture2DMSArray) {
         inputBind.NumSamples = 0xFFFFFFFF;
       }
     }

+ 2 - 12
lib/HLSL/DxilConvergent.cpp

@@ -22,24 +22,13 @@
 #include "dxc/HLSL/DxilGenerationPass.h"
 #include "dxc/HLSL/HLOperations.h"
 #include "dxc/HLSL/HLModule.h"
-#include "dxc/HLSL/DxilConvergent.h"
 #include "dxc/HlslIntrinsicOp.h"
 #include "dxc/HLSL/DxilConvergentName.h"
 
 using namespace llvm;
 using namespace hlsl;
 
-bool hlsl::IsConvergentMarker(Value *V) {
-  CallInst *CI = dyn_cast<CallInst>(V);
-  if (!CI)
-    return false;
-  Function *F = CI->getCalledFunction();
-  return F->getName().startswith(kConvergentFunctionPrefix);
-}
 
-Value *hlsl::GetConvergentSource(Value *V) {
-  return cast<CallInst>(V)->getOperand(0);
-}
 
 ///////////////////////////////////////////////////////////////////////////////
 // DxilConvergent.
@@ -58,7 +47,8 @@ public:
 
   bool runOnModule(Module &M) override {
     if (M.HasHLModule()) {
-      if (!M.GetHLModule().GetShaderModel()->IsPS())
+      const ShaderModel *SM = M.GetHLModule().GetShaderModel();
+      if (!SM->IsPS() && !SM->IsLib() && (!SM->IsSM66Plus() || (!SM->IsCS() && !SM->IsMS() && !SM->IsAS())))
         return false;
     }
     bool bUpdated = false;

+ 1 - 1
lib/HLSL/DxilGenerationPass.cpp

@@ -201,7 +201,7 @@ public:
 
     // Load up debug information, to cross-reference values and the instructions
     // used to load them.
-    m_HasDbgInfo = getDebugMetadataVersionFromModule(M) != 0;
+    m_HasDbgInfo = hasDebugInfo(M);
 
     // EntrySig for shader functions.
     DxilEntryPropsMap EntryPropsMap;

+ 35 - 1
lib/HLSL/DxilLinker.cpp

@@ -119,6 +119,7 @@ public:
     return m_functionNameMap;
   }
   bool IsInitFunc(llvm::Function *F);
+  bool IsEntry(llvm::Function *F);
   bool IsResourceGlobal(const llvm::Constant *GV);
   DxilResourceBase *GetResource(const llvm::Constant *GV);
 
@@ -135,6 +136,7 @@ private:
   DxilModule &m_DM;
   // Map from name to Link info for extern functions.
   llvm::StringMap<std::unique_ptr<DxilFunctionLinkInfo>> m_functionNameMap;
+  llvm::SmallPtrSet<llvm::Function*,4>  m_entrySet;
   // Map from resource link global to resource. MapVector for deterministic iteration.
   llvm::MapVector<const llvm::Constant *, DxilResourceBase *> m_resourceMap;
   // Set of initialize functions for global variable. SetVector for deterministic iteration.
@@ -202,6 +204,8 @@ DxilLib::DxilLib(std::unique_ptr<llvm::Module> pModule)
     }
     m_functionNameMap[F.getName()] =
         llvm::make_unique<DxilFunctionLinkInfo>(&F);
+    if (m_DM.IsEntry(&F))
+      m_entrySet.insert(&F);
   }
 
   // Update internal global name.
@@ -211,6 +215,7 @@ DxilLib::DxilLib(std::unique_ptr<llvm::Module> pModule)
       GV.setName(MID + GV.getName());
     }
   }
+
 }
 
 void DxilLib::FixIntrinsicOverloads() {
@@ -327,6 +332,7 @@ bool DxilLib::HasFunction(std::string &name) {
   return m_functionNameMap.count(name);
 }
 
+bool DxilLib::IsEntry(llvm::Function *F) { return m_entrySet.count(F); }
 bool DxilLib::IsInitFunc(llvm::Function *F) { return m_initFuncSet.count(F); }
 bool DxilLib::IsResourceGlobal(const llvm::Constant *GV) {
   return m_resourceMap.count(GV);
@@ -782,6 +788,12 @@ DxilLinkJob::Link(std::pair<DxilFunctionLinkInfo *, DxilLib *> &entryLinkPair,
     if (newPatchConstantFunc->hasFnAttribute(llvm::Attribute::AlwaysInline))
       newPatchConstantFunc->removeFnAttr(llvm::Attribute::AlwaysInline);
   }
+
+  // Set root sig if exist.
+  if (!props.serializedRootSignature.empty()) {
+    DM.ResetSerializedRootSignature(props.serializedRootSignature);
+    props.serializedRootSignature.clear();
+  }
   // Set EntryProps
   DM.SetShaderProperties(&props);
 
@@ -1466,7 +1478,7 @@ DxilLinkerImpl::Link(StringRef entry, StringRef profile, dxilutil::ExportMap &ex
       return nullptr;
 
   } else {
-    if (exportMap.empty()) {
+    if (exportMap.empty() && !exportMap.isExportShadersOnly()) {
       // Add every function for lib profile.
       for (auto &it : m_functionNameMap) {
         StringRef name = it.getKey();
@@ -1496,6 +1508,28 @@ DxilLinkerImpl::Link(StringRef entry, StringRef profile, dxilutil::ExportMap &ex
           }
         }
       }
+    } else if (exportMap.isExportShadersOnly()) {
+      SmallVector<StringRef, 4> workList;
+      for (auto *pLib : m_attachedLibs) {
+        auto &DM = pLib->GetDxilModule();
+        auto *pM = DM.GetModule();
+        for (Function &F : pM->functions()) {
+          if (!pLib->IsEntry(&F)) {
+            if (!F.isDeclaration()) {
+              // Set none entry to be internal so they could be removed.
+              F.setLinkage(GlobalValue::LinkageTypes::InternalLinkage);
+            }
+            continue;
+          }
+          workList.emplace_back(F.getName());
+        }
+        libSet.insert(pLib);
+      }
+
+      if (!AddFunctions(workList, libSet, addedFunctionSet, linkJob,
+                        /*bLazyLoadDone*/ false,
+                        /*bAllowFuncionDecls*/ false))
+        return nullptr;
     } else {
       SmallVector<StringRef, 4> workList;
 

+ 10 - 4
lib/HLSL/DxilPreparePasses.cpp

@@ -516,9 +516,11 @@ public:
     }
   }
 
+  GlobalVariable *GetIsHelperGV(Module &M) {
+    return M.getGlobalVariable(DXIL::kDxIsHelperGlobalName, /*AllowLocal*/ true);
+  }
   GlobalVariable *GetOrCreateIsHelperGV(Module &M, hlsl::OP *hlslOP) {
-    GlobalVariable *GV =
-        M.getGlobalVariable(DXIL::kDxIsHelperGlobalName, /*AllowLocal*/ true);
+    GlobalVariable *GV = GetIsHelperGV(M);
     if (GV)
       return GV;
     DxilModule &DM = M.GetDxilModule();
@@ -593,7 +595,11 @@ public:
       for (auto uit = F->user_begin(); uit != F->user_end();) {
         CallInst *CI = cast<CallInst>(*(uit++));
         if (!GV)
-          GV = GetOrCreateIsHelperGV(*F->getParent(), hlslOP);
+          GV = GetIsHelperGV(*F->getParent());
+        // If we don't already have a global for this,
+        // we didn't have any IsHelper() calls, so no need to add one now.
+        if (!GV)
+          return;
         IRBuilder<> Builder(CI);
         Value *Cond =
             Builder.CreateZExt(DxilInst_Discard(CI).get_condition(), I32Ty);
@@ -618,7 +624,7 @@ public:
       // in an exported function linked to a PS in another library in this case.
       // But it won't pass validation otherwise.
       if (pSM->IsLib() && DXIL::CompareVersions(ValMajor, ValMinor, 1, 6) < 1) {
-        if (GlobalVariable *GV = M.getGlobalVariable(DXIL::kDxIsHelperGlobalName, /*AllowLocal*/ true)) {
+        if (GlobalVariable *GV = GetIsHelperGV(M)) {
           GV->setLinkage(GlobalValue::InternalLinkage);
         }
       }

+ 92 - 12
lib/HLSL/DxilValidation.cpp

@@ -74,11 +74,12 @@ const char *hlsl::GetValidationRuleText(ValidationRule value) {
     case hlsl::ValidationRule::ContainerPartMissing: return "Missing part '%0' required by module.";
     case hlsl::ValidationRule::ContainerPartInvalid: return "Unknown part '%0' found in DXIL container.";
     case hlsl::ValidationRule::ContainerRootSignatureIncompatible: return "Root Signature in DXIL container is not compatible with shader.";
-    case hlsl::ValidationRule::MetaRequired: return "TODO - Required metadata missing.";
+    case hlsl::ValidationRule::MetaRequired: return "Required metadata missing.";
     case hlsl::ValidationRule::MetaKnown: return "Named metadata '%0' is unknown.";
     case hlsl::ValidationRule::MetaUsed: return "All metadata must be used by dxil.";
     case hlsl::ValidationRule::MetaTarget: return "Unknown target triple '%0'.";
-    case hlsl::ValidationRule::MetaWellFormed: return "TODO - Metadata must be well-formed in operand count and types.";
+    case hlsl::ValidationRule::MetaWellFormed: return "Metadata must be well-formed in operand count and types.";
+    case hlsl::ValidationRule::MetaVersionSupported: return "%0 version in metadata (%1.%2) is not supported; maximum: (%3.%4).";
     case hlsl::ValidationRule::MetaSemanticLen: return "Semantic length must be at least 1 and at most 64.";
     case hlsl::ValidationRule::MetaInterpModeValid: return "Invalid interpolation mode for '%0'.";
     case hlsl::ValidationRule::MetaSemaKindValid: return "Semantic kind for '%0' is invalid.";
@@ -208,7 +209,7 @@ const char *hlsl::GetValidationRuleText(ValidationRule value) {
     case hlsl::ValidationRule::TypesNoPtrToPtr: return "Pointers to pointers, or pointers in structures are not allowed.";
     case hlsl::ValidationRule::TypesI8: return "I8 can only be used as immediate value for intrinsic or as i8* via bitcast by lifetime intrinsics.";
     case hlsl::ValidationRule::SmName: return "Unknown shader model '%0'.";
-    case hlsl::ValidationRule::SmDxilVersion: return "Shader model requires Dxil Version %0,%1.";
+    case hlsl::ValidationRule::SmDxilVersion: return "Shader model requires Dxil Version %0.%1.";
     case hlsl::ValidationRule::SmOpcode: return "Opcode %0 not valid in shader model %1.";
     case hlsl::ValidationRule::SmOperand: return "Operand must be defined in target shader model.";
     case hlsl::ValidationRule::SmSemantic: return "Semantic '%0' is invalid as %1 %2.";
@@ -687,8 +688,49 @@ struct ValidationContext {
     Failed = true;
   }
 
+  // Use this instead of DxilResourceBase::GetGlobalName
+  std::string GetResourceName(const hlsl::DxilResourceBase *Res) {
+    if (!Res)
+      return "nullptr";
+    std::string resName = Res->GetGlobalName();
+    if (!resName.empty())
+      return resName;
+    if (pDebugModule) {
+      DxilModule &DM = pDebugModule->GetOrCreateDxilModule();
+      switch (Res->GetClass()) {
+      case DXIL::ResourceClass::CBuffer:  return DM.GetCBuffer(Res->GetID()).GetGlobalName();
+      case DXIL::ResourceClass::Sampler:  return DM.GetSampler(Res->GetID()).GetGlobalName();
+      case DXIL::ResourceClass::SRV:      return DM.GetSRV(Res->GetID()).GetGlobalName();
+      case DXIL::ResourceClass::UAV:      return DM.GetUAV(Res->GetID()).GetGlobalName();
+      default: return "Invalid Resource";
+      }
+    }
+    // When names have been stripped, use class and binding location to
+    // identify the resource.  Format is roughly:
+    // Allocated:   (CB|T|U|S)<ID>: <ResourceKind> ((cb|t|u|s)<LB>[<RangeSize>] space<SpaceID>)
+    // Unallocated: (CB|T|U|S)<ID>: <ResourceKind> (no bind location)
+    // Example: U0: TypedBuffer (u5[2] space1)
+    // [<RangeSize>] and space<SpaceID> skipped if 1 and 0 respectively.
+    return (Twine(Res->GetResIDPrefix()) + Twine(Res->GetID()) + ": " +
+            Twine(Res->GetResKindName()) +
+            (Res->IsAllocated()
+                 ? (" (" + Twine(Res->GetResBindPrefix()) +
+                    Twine(Res->GetLowerBound()) +
+                    (Res->IsUnbounded()
+                         ? Twine("[unbounded]")
+                         : (Res->GetRangeSize() != 1)
+                               ? "[" + Twine(Res->GetRangeSize()) + "]"
+                               : Twine()) +
+                    ((Res->GetSpaceID() != 0)
+                         ? " space" + Twine(Res->GetSpaceID())
+                         : Twine()) +
+                    ")")
+                 : Twine(" (no bind location)")))
+        .str();
+  }
+
   void EmitResourceError(const hlsl::DxilResourceBase *Res, ValidationRule rule) {
-    std::string QuotedRes = " '" + Res->GetGlobalName() + "'";
+    std::string QuotedRes = " '" + GetResourceName(Res) + "'";
     dxilutil::EmitErrorOnContext(M.getContext(), GetValidationRuleText(rule) + QuotedRes);
     Failed = true;
   }
@@ -696,7 +738,7 @@ struct ValidationContext {
   void EmitResourceFormatError(const hlsl::DxilResourceBase *Res,
                                ValidationRule rule,
                                ArrayRef<StringRef> args) {
-    std::string QuotedRes = " '" + Res->GetGlobalName() + "'";
+    std::string QuotedRes = " '" + GetResourceName(Res) + "'";
     std::string ruleText = GetValidationRuleText(rule);
     FormatRuleText(ruleText, args);
     dxilutil::EmitErrorOnContext(M.getContext(), ruleText + QuotedRes);
@@ -2525,6 +2567,17 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
       ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction,
                                   {"64-bit atomic operations", "Shader Model 6.6+"});
   } break;
+  case DXIL::OpCode::CreateHandle:
+    if (ValCtx.isLibProfile) {
+      ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction,
+                                  {"CreateHandle", "non-library targets"});
+    }
+    // CreateHandle should not be used in SM 6.6 and above:
+    if (DXIL::CompareVersions(ValCtx.m_DxilMajor, ValCtx.m_DxilMinor, 1, 5) > 0) {
+      ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction,
+                                  {"CreateHandle", "Shader model 6.5 and below"});
+    }
+    break;
   default:
     // TODO: make sure every opcode is checked.
     // Skip opcodes don't need special check.
@@ -3858,6 +3911,12 @@ static void ValidateValidatorVersion(ValidationContext &ValCtx) {
         // depending on the degree of compat across versions.
         if (majorVer == curMajor && minorVer <= curMinor) {
           return;
+        } else {
+          ValCtx.EmitFormatError(
+              ValidationRule::MetaVersionSupported,
+              {"Validator", std::to_string(majorVer), std::to_string(minorVer),
+               std::to_string(curMajor), std::to_string(curMinor)});
+          return;
         }
       }
     }
@@ -3879,9 +3938,15 @@ static void ValidateDxilVersion(ValidationContext &ValCtx) {
           GetNodeOperandAsInt(ValCtx, pVerValues, 1, &minorVer)) {
         // This will need to be updated as dxil major/minor versions evolve,
         // depending on the degree of compat across versions.
-        if ((majorVer == 1 && minorVer <= DXIL::kDxilMinor) &&
+        if ((majorVer == DXIL::kDxilMajor && minorVer <= DXIL::kDxilMinor) &&
             (majorVer == ValCtx.m_DxilMajor && minorVer == ValCtx.m_DxilMinor)) {
           return;
+        } else {
+          ValCtx.EmitFormatError(
+              ValidationRule::MetaVersionSupported,
+              {"Dxil", std::to_string(majorVer), std::to_string(minorVer),
+               std::to_string(DXIL::kDxilMajor), std::to_string(DXIL::kDxilMinor)});
+          return;
         }
       }
     }
@@ -3923,6 +3988,9 @@ static void ValidateBitcode(ValidationContext &ValCtx) {
 }
 
 static void ValidateMetadata(ValidationContext &ValCtx) {
+  ValidateValidatorVersion(ValCtx);
+  ValidateDxilVersion(ValCtx);
+
   Module *pModule = &ValCtx.M;
   const std::string &target = pModule->getTargetTriple();
   if (target != "dxil-ms-dx") {
@@ -3970,8 +4038,6 @@ static void ValidateMetadata(ValidationContext &ValCtx) {
     }
   }
 
-  ValidateDxilVersion(ValCtx);
-  ValidateValidatorVersion(ValCtx);
   ValidateTypeAnnotation(ValCtx);
 }
 
@@ -3996,7 +4062,7 @@ static void ValidateResourceOverlap(
   if (conflictRes) {
     ValCtx.EmitFormatError(
         ValidationRule::SmResourceRangeOverlap,
-        {res.GetGlobalName(), std::to_string(base),
+        {ValCtx.GetResourceName(&res), std::to_string(base),
          std::to_string(size),
          std::to_string(conflictRes->GetLowerBound()),
          std::to_string(conflictRes->GetRangeSize()),
@@ -4208,7 +4274,7 @@ static void ValidateCBuffer(DxilCBuffer &cb, ValidationContext &ValCtx) {
       DXIL::kMaxCBufferSize << 4);
   CollectCBufferRanges(annotation, constAllocator,
                        0, typeSys,
-                       cb.GetGlobalName(), ValCtx);
+                       ValCtx.GetResourceName(&cb), ValCtx);
 }
 
 static void ValidateResources(ValidationContext &ValCtx) {
@@ -4240,7 +4306,7 @@ static void ValidateResources(ValidationContext &ValCtx) {
     if (uav->HasCounter() && uav->IsGloballyCoherent())
       ValCtx.EmitResourceFormatError(uav.get(),
                                      ValidationRule::MetaGlcNotOnAppendConsume,
-                                     {uav.get()->GetGlobalName()});
+                                     {ValCtx.GetResourceName(uav.get())});
 
     ValidateResource(*uav, ValCtx);
     ValidateResourceOverlap(*uav, uavAllocator, ValCtx);
@@ -5693,7 +5759,7 @@ void GetValidationVersion(_Out_ unsigned *pMajor, _Out_ unsigned *pMinor) {
   // - Mesh and Amplification shaders
   // - DXR 1.1 & RayQuery support
   *pMajor = 1;
-  *pMinor = 6;
+  *pMinor = 7;
   // VALRULE-TEXT:END
 }
 
@@ -6246,6 +6312,8 @@ _Use_decl_annotations_ HRESULT ValidateLoadModuleFromContainerLazy(
 _Use_decl_annotations_
 HRESULT ValidateDxilContainer(const void *pContainer,
                               uint32_t ContainerSize,
+                              const void *pOptDebugBitcode,
+                              uint32_t OptDebugBitcodeSize,
                               llvm::raw_ostream &DiagStream) {
   LLVMContext Ctx, DbgCtx;
   std::unique_ptr<llvm::Module> pModule, pDebugModule;
@@ -6260,6 +6328,12 @@ HRESULT ValidateDxilContainer(const void *pContainer,
   IFR(ValidateLoadModuleFromContainer(pContainer, ContainerSize, pModule, pDebugModule,
       Ctx, DbgCtx, DiagStream));
 
+  if (!pDebugModule && pOptDebugBitcode) {
+    // TODO: lazy load for perf
+    IFR(ValidateLoadModule((const char *)pOptDebugBitcode, OptDebugBitcodeSize,
+                           pDebugModule, DbgCtx, DiagStream, /*bLazyLoad*/false));
+  }
+
   // Validate DXIL Module
   IFR(ValidateDxilModule(pModule.get(), pDebugModule.get()));
 
@@ -6271,4 +6345,10 @@ HRESULT ValidateDxilContainer(const void *pContainer,
     IsDxilContainerLike(pContainer, ContainerSize), ContainerSize);
 }
 
+_Use_decl_annotations_
+HRESULT ValidateDxilContainer(const void *pContainer,
+                              uint32_t ContainerSize,
+                              llvm::raw_ostream &DiagStream) {
+  return ValidateDxilContainer(pContainer, ContainerSize, nullptr, 0, DiagStream);
+}
 } // namespace hlsl

+ 3 - 3
lib/HLSL/HLMatrixLowerPass.cpp

@@ -197,7 +197,7 @@ bool HLMatrixLowerPass::runOnModule(Module &M) {
   m_pHLModule = &m_pModule->GetOrCreateHLModule();
   // Load up debug information, to cross-reference values and the instructions
   // used to load them.
-  m_HasDbgInfo = getDebugMetadataVersionFromModule(M) != 0;
+  m_HasDbgInfo = hasDebugInfo(M);
   m_matToVecStubs = &matToVecStubs;
   m_vecToMatStubs = &vecToMatStubs;
 
@@ -531,8 +531,8 @@ void HLMatrixLowerPass::replaceAllVariableUses(
     }
 
     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Use.getUser())) {
-      DXASSERT(CE->getOpcode() == Instruction::AddrSpaceCast,
-               "Unexpected constant user");
+      DXASSERT(CE->getOpcode() == Instruction::AddrSpaceCast ||
+        CE->use_empty(), "Unexpected constant user");
       replaceAllVariableUses(GEPIdxStack, CE, LoweredPtr);
       DXASSERT_NOMSG(CE->use_empty());
       CE->destroyConstant();

+ 1 - 3
lib/HLSL/HLModule.cpp

@@ -322,9 +322,7 @@ std::vector<uint8_t> &HLModule::GetSerializedRootSignature() {
 }
 
 void HLModule::SetSerializedRootSignature(const uint8_t *pData, unsigned size) {
-  m_SerializedRootSignature.clear();
-  m_SerializedRootSignature.resize(size);
-  memcpy(m_SerializedRootSignature.data(), pData, size);
+  m_SerializedRootSignature.assign(pData, pData+size);
 }
 
 DxilTypeSystem &HLModule::GetTypeSystem() {

+ 3 - 4
lib/HLSL/HLOperationLower.cpp

@@ -25,7 +25,6 @@
 #include "dxc/HLSL/HLOperationLowerExtension.h"
 #include "dxc/HLSL/HLOperations.h"
 #include "dxc/HlslIntrinsicOp.h"
-#include "dxc/HLSL/DxilConvergent.h"
 #include "dxc/DXIL/DxilResourceProperties.h"
 
 #include "llvm/IR/GetElementPtrTypeIterator.h"
@@ -844,8 +843,8 @@ Value *FindScalarSource(Value *src, unsigned vecIdx = 0) {
         vecIdx = (unsigned)cast<ConstantInt>(EE->getIndexOperand())
           ->getUniqueInteger().getLimitedValue();
         src = EE->getVectorOperand();
-      } else if (hlsl::IsConvergentMarker(src)) {
-        src = hlsl::GetConvergentSource(src);
+      } else if (hlsl::dxilutil::IsConvergentMarker(src)) {
+        src = hlsl::dxilutil::GetConvergentSource(src);
       } else {
         break;  // Found it.
       }
@@ -6657,7 +6656,7 @@ void TranslateCBGepLegacy(GetElementPtrInst *GEP, Value *handle,
 
         // Load the whole register.
         Value *newLd = GenerateCBLoadLegacy(handle, legacyIndex,
-                                     /*channelOffset*/ 0, EltTy,
+                                     /*channelOffset*/ channel, EltTy,
                                      /*vecSize*/ vecSize, hlslOP, Builder);
         // Copy to array.
         IRBuilder<> AllocaBuilder(GEP->getParent()->getParent()->getEntryBlock().getFirstInsertionPt());

+ 60 - 6
lib/HLSL/WaveSensitivityAnalysis.cpp

@@ -63,6 +63,7 @@ private:
   map<Instruction *, WaveSensitivity> InstState;
   map<BasicBlock *, WaveSensitivity> BBState;
   std::vector<Instruction *> InstWorkList;
+  std::vector<PHINode *> UnknownPhis; // currently unknown phis. Indicate cycles after Analyze
   std::vector<BasicBlock *> BBWorkList;
   bool CheckBBState(BasicBlock *BB, WaveSensitivity WS);
   WaveSensitivity GetInstState(Instruction *I);
@@ -72,6 +73,7 @@ private:
 public:
   WaveSensitivityAnalyzer(PostDominatorTree &PDT) : pPDT(&PDT) {}
   void Analyze(Function *F);
+  void Analyze();
   bool IsWaveSensitive(Instruction *op);
 };
 
@@ -79,8 +81,60 @@ WaveSensitivityAnalysis* WaveSensitivityAnalysis::create(PostDominatorTree &PDT)
   return new WaveSensitivityAnalyzer(PDT);
 }
 
+// Analyze the given function's instructions as wave-sensitive or not
 void WaveSensitivityAnalyzer::Analyze(Function *F) {
-  UpdateBlock(&F->getEntryBlock(), KnownNotSensitive);
+  // Add all blocks but the entry in reverse order so they come out in order
+  auto it = F->getBasicBlockList().end();
+  for ( it-- ; it != F->getBasicBlockList().begin(); it--)
+    BBWorkList.emplace_back(&*it);
+  // Add entry block as non-sensitive
+  UpdateBlock(&*it, KnownNotSensitive);
+
+  // First analysis
+  Analyze();
+
+  // If any phis with explored preds remain unknown
+  // it has to be in a loop that don't include wave sensitivity
+  // Update each as such and redo Analyze to mark the descendents
+  while (!UnknownPhis.empty() || !InstWorkList.empty() || !BBWorkList.empty()) {
+    while (!UnknownPhis.empty()) {
+      PHINode *Phi = UnknownPhis.back();
+      UnknownPhis.pop_back();
+      // UnknownPhis might have actually known phis that were changed. skip them
+      if (Unknown == GetInstState(Phi)) {
+        // If any of the preds have not been visited, we can't assume a cycle yet
+        bool allPredsVisited = true;
+        for (unsigned i = 0; i < Phi->getNumIncomingValues(); i++) {
+          if (!BBState.count(Phi->getIncomingBlock(i))) {
+            allPredsVisited = false;
+            break;
+          }
+        }
+#ifndef NDEBUG
+        for (unsigned i = 0; i < Phi->getNumIncomingValues(); i++) {
+          if (Instruction *IArg = dyn_cast<Instruction>(Phi->getIncomingValue(i))) {
+            DXASSERT_LOCALVAR(IArg, GetInstState(IArg) != KnownSensitive,
+                   "Unknown wave-status Phi argument should not be able to be known sensitive");
+          }
+        }
+#endif
+        if (allPredsVisited)
+          UpdateInst(Phi, KnownNotSensitive);
+      }
+    }
+    Analyze();
+  }
+#ifndef NDEBUG
+  for (BasicBlock &BB : *F) {
+    for (Instruction &I : BB) {
+      DXASSERT_LOCALVAR(I, Unknown != GetInstState(&I), "Wave sensitivity analysis exited without finding results for all instructions");
+    }
+  }
+#endif
+}
+
+// Analyze the member instruction and BBlock worklists
+void WaveSensitivityAnalyzer::Analyze() {
   while (!InstWorkList.empty() || !BBWorkList.empty()) {
     // Process the instruction work list.
     while (!InstWorkList.empty()) {
@@ -94,8 +148,8 @@ void WaveSensitivityAnalyzer::Analyze(Function *F) {
       }
     }
 
-    // Process the basic block work list.
-    while (!BBWorkList.empty()) {
+    // Process one entry of the basic block work list.
+    if (!BBWorkList.empty()) {
       BasicBlock *BB = BBWorkList.back();
       BBWorkList.pop_back();
 
@@ -184,6 +238,8 @@ void WaveSensitivityAnalyzer::VisitInst(Instruction *I) {
       if (WS == KnownSensitive) {
         UpdateInst(I, KnownSensitive);
         return;
+      } else if (Unknown == GetInstState(I)) {
+        UnknownPhis.emplace_back(Phi);
       }
     }
   }
@@ -196,10 +252,8 @@ void WaveSensitivityAnalyzer::VisitInst(Instruction *I) {
       if (WS == KnownSensitive) {
         UpdateInst(I, KnownSensitive);
         return;
-      }
-      if (WS == Unknown) {
+      } else if (WS == Unknown) {
         allKnownNotSensitive = false;
-        return;
       }
     }
   }

+ 12 - 0
lib/IR/BasicBlock.cpp

@@ -168,6 +168,18 @@ CallInst *BasicBlock::getTerminatingMustTailCall() {
   return nullptr;
 }
 
+// HLSL Change - begin
+size_t BasicBlock::compute_size_no_dbg() const {
+  size_t ret = 0;
+  for (auto it = InstList.begin(), E = InstList.end(); it != E; it++) {
+    if (isa<DbgInfoIntrinsic>(&*it))
+      continue;
+    ret++;
+  }
+  return ret;
+}
+// HLSL Change - end
+
 Instruction* BasicBlock::getFirstNonPHI() {
   for (Instruction &I : *this)
     if (!isa<PHINode>(I))

+ 15 - 0
lib/IR/DebugInfo.cpp

@@ -379,6 +379,21 @@ unsigned llvm::getDebugMetadataVersionFromModule(const Module &M) {
   return 0;
 }
 
+// HLSL Change - begin
+bool llvm::hasDebugInfo(const Module &M) {
+  // We might just get away with checking if there's "llvm.dbg.cu",
+  // but this is more robust.
+  for (Module::const_named_metadata_iterator NMI = M.named_metadata_begin(),
+                                             NME = M.named_metadata_end();
+       NMI != NME; ++NMI) {
+    if (NMI->getName().startswith("llvm.dbg.")) {
+      return true;
+    }
+  }
+  return false;
+}
+// HLSL Change - end
+
 DenseMap<const llvm::Function *, DISubprogram *>
 llvm::makeSubprogramMap(const Module &M) {
   DenseMap<const Function *, DISubprogram *> R;

+ 2 - 5
lib/IR/DiagnosticInfo.cpp

@@ -246,15 +246,12 @@ void DiagnosticInfoDxil::print(DiagnosticPrinter &DP) const {
     DP << "Function: " << Func->getName() << ": ";
   }
 
-  bool ZiPrompt = true;
   switch (getSeverity()) {
-  case DiagnosticSeverity::DS_Note:    DP << "note: "; ZiPrompt = false; break;
-  case DiagnosticSeverity::DS_Remark:  DP << "remark: "; ZiPrompt = false; break;
+  case DiagnosticSeverity::DS_Note:    DP << "note: "; break;
+  case DiagnosticSeverity::DS_Remark:  DP << "remark: "; break;
   case DiagnosticSeverity::DS_Warning: DP << "warning: "; break;
   case DiagnosticSeverity::DS_Error:   DP << "error: "; break;
   }
   DP << getMsgStr();
-  if (!DLoc && ZiPrompt)
-    DP << " Use /Zi for source location.";
 }
 // HLSL Change end - Dxil Diagnostic Info reporter

+ 0 - 2
lib/MC/MCObjectStreamer.cpp

@@ -280,10 +280,8 @@ void MCObjectStreamer::EmitInstToFragment(const MCInst &Inst,
   IF->getContents().append(Code.begin(), Code.end());
 }
 
-#ifndef NDEBUG
 static const char *const BundlingNotImplementedMsg =
   "Aligned bundling is not implemented for this object format";
-#endif
 
 void MCObjectStreamer::EmitBundleAlignMode(unsigned AlignPow2) {
   llvm_unreachable(BundlingNotImplementedMsg);

+ 29 - 0
lib/Option/OptTable.cpp

@@ -188,6 +188,35 @@ static unsigned matchOption(const OptTable::Info *I, StringRef Str,
   return 0;
 }
 
+// HLSL Change - begin
+Option OptTable::findOption(const char *normalizedName, unsigned FlagsToInclude, unsigned FlagsToExclude) const {
+  const Info *Start = OptionInfos + FirstSearchableIndex;
+  const Info *End = OptionInfos + getNumOptions();
+
+  StringRef Str(normalizedName);
+
+  for (; Start != End; ++Start) {
+    // Scan for first option which is a proper prefix.
+    for (; Start != End; ++Start)
+      if (Str.startswith(Start->Name))
+        break;
+    if (Start == End)
+      break;
+
+    Option Opt(Start, this);
+
+    if (FlagsToInclude && !Opt.hasFlag(FlagsToInclude))
+      continue;
+    if (Opt.hasFlag(FlagsToExclude))
+      continue;
+
+    return Opt;
+  }
+
+  return Option(nullptr, this);
+}
+// HLSL Change - end
+
 Arg *OptTable::ParseOneArg(const ArgList &Args, unsigned &Index,
                            unsigned FlagsToInclude,
                            unsigned FlagsToExclude) const {

+ 16 - 6
lib/Support/ErrorHandling.cpp

@@ -32,6 +32,7 @@
 #ifdef _WIN32
 #include "windows.h"  // HLSL Change
 #endif
+#include "dxc/Support/exception.h"  // HLSL Change
 
 #if defined(HAVE_UNISTD_H)
 # include <unistd.h>
@@ -112,7 +113,8 @@ void llvm::report_fatal_error(const Twine &Reason, bool GenCrashDiag) {
   if (handler) {
     handler(handlerData, Reason.str(), GenCrashDiag);
   }
-  RaiseException(STATUS_LLVM_FATAL, 0, 0, 0);
+
+  throw hlsl::Exception(DXC_E_LLVM_FATAL_ERROR, std::string("LLVM ERROR: ") + Reason.str() + "\n");
 #endif
 }
 
@@ -121,19 +123,27 @@ void llvm::llvm_unreachable_internal(const char *msg, const char *file,
   // This code intentionally doesn't call the ErrorHandler callback, because
   // llvm_unreachable is intended to be used to indicate "impossible"
   // situations, and not legitimate runtime errors.
+  // HLSL Change - collect full message in string
+  SmallVector<char, 64> Buffer;
+  raw_svector_ostream OS(Buffer);
   if (msg)
-    dbgs() << msg << "\n";
-  dbgs() << "UNREACHABLE executed";
+    OS << msg << "\n";
+  OS << "UNREACHABLE executed";
   if (file)
-    dbgs() << " at " << file << ":" << line;
-  dbgs() << "!\n";
+    OS << " at " << file << ":" << line;
+  OS << "!\n";
 #ifndef LLVM_ON_WIN32 // HLSL Change - unwind if necessary, but don't terminate the process
+  dbgs() << OS.str();
   abort();
 #else
-  RaiseException(STATUS_LLVM_UNREACHABLE, 0, 0, 0);
+  throw hlsl::Exception(DXC_E_LLVM_UNREACHABLE, OS.str());
 #endif
 }
 
+void llvm::llvm_cast_assert_internal(const char *func) {
+  throw hlsl::Exception(DXC_E_LLVM_CAST_ERROR, std::string(func) + "<X>() argument of incompatible type!\n");
+}
+
 static void bindingsErrorHandler(void *user_data, const std::string& reason,
                                  bool gen_crash_diag) {
   LLVMFatalErrorHandler handler =

+ 1 - 1
lib/Transforms/Scalar/LowerTypePasses.cpp

@@ -130,7 +130,7 @@ bool LowerTypePass::runOnModule(Module &M) {
   initialize(M);
   // Load up debug information, to cross-reference values and the instructions
   // used to load them.
-  bool HasDbgInfo = getDebugMetadataVersionFromModule(M) != 0;
+  bool HasDbgInfo = llvm::hasDebugInfo(M);
   llvm::DebugInfoFinder Finder;
   if (HasDbgInfo) {
     Finder.processModule(M);

+ 4 - 2
lib/Transforms/Scalar/MergedLoadStoreMotion.cpp

@@ -359,7 +359,8 @@ bool MergedLoadStoreMotion::mergeLoads(BasicBlock *BB) {
   BasicBlock *Succ0 = BI->getSuccessor(0);
   BasicBlock *Succ1 = BI->getSuccessor(1);
   // #Instructions in Succ1 for Compile Time Control
-  int Size1 = Succ1->size();
+  // int Size1 = Succ1->size(); // HLSL Change
+  int Size1 = Succ1->compute_size_no_dbg(); // HLSL Change
   int NLoads = 0;
   for (BasicBlock::iterator BBI = Succ0->begin(), BBE = Succ0->end();
        BBI != BBE;) {
@@ -529,7 +530,8 @@ bool MergedLoadStoreMotion::mergeStores(BasicBlock *T) {
     return false; // No. More than 2 predecessors.
 
   // #Instructions in Succ1 for Compile Time Control
-  int Size1 = Pred1->size();
+  // int Size1 = Succ1->size(); // HLSL Change
+  int Size1 = Pred1->compute_size_no_dbg(); // HLSL Change
   int NStores = 0;
 
   for (BasicBlock::reverse_iterator RBI = Pred0->rbegin(), RBE = Pred0->rend();

+ 12 - 7
lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp

@@ -3770,8 +3770,11 @@ bool SROA_Helper::LowerMemcpy(Value *V, DxilFieldAnnotation *annotation,
             if (group == HLOpcodeGroup::HLSubscript) {
               if (isReadOnlyPtr(PtrCI)) {
                 // Ptr from CBuffer/SRV is safe.
-                if (ReplaceMemcpy(V, Src, MC, annotation, typeSys, DL, DT))
-                  return true;
+                if (ReplaceMemcpy(V, Src, MC, annotation, typeSys, DL, DT)) {
+                  if (V->user_empty())
+                    return true;
+                  return LowerMemcpy(V, annotation, typeSys, DL, DT, bAllowReplace);
+                }
               }
             }
           }
@@ -3782,8 +3785,11 @@ bool SROA_Helper::LowerMemcpy(Value *V, DxilFieldAnnotation *annotation,
           hlutil::PointerStatus SrcPS(Src, size, /*bLdStOnly*/ false);
           SrcPS.analyze(typeSys, bStructElt);
           if (SrcPS.storedType != hlutil::PointerStatus::StoredType::Stored) {
-            if (ReplaceMemcpy(V, Src, MC, annotation, typeSys, DL, DT))
-              return true;
+            if (ReplaceMemcpy(V, Src, MC, annotation, typeSys, DL, DT)) {
+              if (V->user_empty())
+                return true;
+              return LowerMemcpy(V, annotation, typeSys, DL, DT, bAllowReplace);
+            }
           }
         }
       }
@@ -3899,7 +3905,7 @@ public:
     const DataLayout &DL = M.getDataLayout();
     // Load up debug information, to cross-reference values and the instructions
     // used to load them.
-    m_HasDbgInfo = getDebugMetadataVersionFromModule(M) != 0;
+    m_HasDbgInfo = nullptr != M.getNamedMetadata("llvm.dbg.cu");
 
     InjectReturnAfterNoReturnPreserveOutput(*m_pHLModule);
 
@@ -5769,8 +5775,7 @@ void SROA_Parameter_HLSL::createFlattenedFunction(Function *F) {
   if (m_pHLModule->HasDxilFunctionProps(F)) {
     DxilFunctionProps &funcProps = m_pHLModule->GetDxilFunctionProps(F);
     std::unique_ptr<DxilFunctionProps> flatFuncProps = llvm::make_unique<DxilFunctionProps>();
-    flatFuncProps->shaderKind = funcProps.shaderKind;
-    flatFuncProps->ShaderProps = funcProps.ShaderProps;
+    *flatFuncProps = funcProps;
     m_pHLModule->AddDxilFunctionProps(flatF, flatFuncProps);
     if (funcProps.shaderKind == ShaderModel::Kind::Vertex) {
       auto &VS = funcProps.ShaderProps.VS;

+ 2 - 2
lib/Transforms/Scalar/Scalarizer.cpp

@@ -316,7 +316,7 @@ Scatterer Scalarizer::scatter(Instruction *Point, Value *V) {
     auto InsertPoint = BB->begin();
     while (InsertPoint != BB->end() && isa<DbgInfoIntrinsic>(InsertPoint))
       InsertPoint++;
-    Scatterer(BB, InsertPoint, V, AllowFolding, &Scattered[V]);
+    return Scatterer(BB, InsertPoint, V, AllowFolding, &Scattered[V]);
     // HLSL Change - End
   }
   if (Instruction *VOp = dyn_cast<Instruction>(V)) {
@@ -729,7 +729,7 @@ bool Scalarizer::finish() {
   Module &M = *Gathered.front().first->getModule();
   LLVMContext &Ctx = M.getContext();
   const DataLayout &DL = M.getDataLayout();
-  bool HasDbgInfo = getDebugMetadataVersionFromModule(M) != 0;
+  bool HasDbgInfo = hasDebugInfo(M);
   // Map from an extract element inst to a Value which replaced it.
   DenseMap<Instruction *, Value*> EltMap;
   // HLSL Change Ends.

+ 5 - 0
lib/Transforms/Utils/Local.cpp

@@ -46,6 +46,7 @@
 #include "llvm/Support/raw_ostream.h"
 
 #include "dxc/DXIL/DxilMetadataHelper.h" // HLSL Change - combine dxil metadata.
+#include "dxc/DXIL/DxilUtil.h" // HLSL Change - special handling of convergent marker
 using namespace llvm;
 
 #define DEBUG_TYPE "local"
@@ -331,6 +332,10 @@ bool llvm::isInstructionTriviallyDead(Instruction *I,
     if (Constant *C = dyn_cast<Constant>(CI->getArgOperand(0)))
       return C->isNullValue() || isa<UndefValue>(C);
 
+  // HLSL change - don't force unused convergenet markers to stay
+  if (CallInst *CI = dyn_cast<CallInst>(I))
+    if (hlsl::dxilutil::IsConvergentMarker(CI)) return true;
+
   return false;
 }
 

+ 2 - 2
projects/dxilconv/include/Support/DXIncludes.h

@@ -33,7 +33,7 @@
 #include <wincrypt.h>
 
 #ifndef DECODE_D3D10_SB_TOKENIZED_PROGRAM_TYPE
-#include <d3d12TokenizedProgramFormat.hpp>
+#include "dxc\Support\d3d12TokenizedProgramFormat.hpp"
 #endif
 
-#include <ShaderBinary/ShaderBinary.h>
+#include "ShaderBinary/ShaderBinary.h"

+ 1 - 1
projects/dxilconv/lib/ShaderBinary/ShaderBinaryIncludes.h

@@ -18,7 +18,7 @@
 #include <d3d12.h>
 #define D3DX12_NO_STATE_OBJECT_HELPERS
 #include "dxc/Support/d3dx12.h"
-#include "D3D12TokenizedProgramFormat.hpp"
+#include "dxc/Support/D3D12TokenizedProgramFormat.hpp"
 #include "ShaderBinary/ShaderBinary.h"
 
 #define ASSUME( _exp ) { assert( _exp ); __analysis_assume( _exp ); __assume( _exp ); }

+ 17 - 1
tools/clang/include/clang/AST/HlslTypes.h

@@ -199,7 +199,8 @@ public:
   enum UnusualAnnotationKind {
     UA_RegisterAssignment,
     UA_ConstantPacking,
-    UA_SemanticDecl
+    UA_SemanticDecl,
+    UA_PayloadAccessQualifier
   };
 private:
   const UnusualAnnotationKind Kind;
@@ -243,6 +244,21 @@ struct RegisterAssignment : public UnusualAnnotation
   }
 };
 
+// <summary>Use this structure to capture a ': in/out' definiton.</summary>
+struct PayloadAccessAnnotation: public UnusualAnnotation {
+  /// <summary>Initializes a new PayloadAccessAnnotation in invalid state.</summary>
+  PayloadAccessAnnotation() : UnusualAnnotation(UA_PayloadAccessQualifier){};
+
+  DXIL::PayloadAccessQualifier qualifier = DXIL::PayloadAccessQualifier::NoAccess;
+  
+  llvm::SmallVector<DXIL::PayloadAccessShaderStage, 4> ShaderStages;
+
+  static bool classof(const UnusualAnnotation *UA) {
+    return UA->getKind() == UA_PayloadAccessQualifier;
+  }
+};
+
+
 /// <summary>Use this structure to capture a ': packoffset' definition.</summary>
 struct ConstantPacking : public UnusualAnnotation
 {

+ 5 - 0
tools/clang/include/clang/Basic/Attr.td

@@ -895,6 +895,11 @@ def HLSLPayload : InheritableAttr {
   let Documentation = [Undocumented];
 }
 
+def HLSLRayPayload : InheritableAttr {
+  let Spellings = [CXX11<"", "raypayload", 2015>];
+  let Documentation = [Undocumented];
+}
+
 def HLSLWaveSensitive : InheritableAttr {
   let Spellings = [CXX11<"", "wavesensitive", 2015>];
   let Subjects = SubjectList<[ParmVar]>;

+ 33 - 0
tools/clang/include/clang/Basic/Diagnostic.h

@@ -23,6 +23,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/iterator_range.h"
+#include "llvm/ADT/SmallVector.h"
 #include <list>
 #include <vector>
 
@@ -674,6 +675,17 @@ public:
 
   void Report(const StoredDiagnostic &storedDiag);
 
+  /// \brief Issue the message to the client but only once.
+  ///
+  /// This actually returns an instance of DiagnosticBuilder which emits the
+  /// diagnostics (through @c ProcessDiag) when it is destroyed.
+  ///
+  /// \param DiagID A member of the @c diag::kind enum.
+  /// \param Loc Represents the source location associated with the diagnostic,
+  /// which can be an invalid location if no position information is available.
+  inline DiagnosticBuilder ReportOnce(unsigned DiagID);
+  inline DiagnosticBuilder ReportOnce(SourceLocation Loc, unsigned DiagID);
+
   /// \brief Determine whethere there is already a diagnostic in flight.
   bool isDiagnosticInFlight() const { return CurDiagID != ~0U; }
 
@@ -726,6 +738,9 @@ private:
   /// \brief The location of the current diagnostic that is in flight.
   SourceLocation CurDiagLoc;
 
+  /// \brief Stores Diagnostics that should be onyl remited once.
+  llvm::SmallVector<unsigned, 2> DiagOnceDiagnostics;
+
   /// \brief The ID of the current diagnostic that is in flight.
   ///
   /// This is set to ~0U when there is no diagnostic in flight.
@@ -1126,10 +1141,28 @@ inline DiagnosticBuilder DiagnosticsEngine::Report(SourceLocation Loc,
   return DiagnosticBuilder(this);
 }
 
+
 inline DiagnosticBuilder DiagnosticsEngine::Report(unsigned DiagID) {
   return Report(SourceLocation(), DiagID);
 }
 
+
+inline DiagnosticBuilder DiagnosticsEngine::ReportOnce(unsigned DiagID) {
+  return ReportOnce(SourceLocation(), DiagID);
+}
+
+inline DiagnosticBuilder DiagnosticsEngine::ReportOnce(SourceLocation Loc,
+                                                       unsigned DiagID) {
+  if (std::find(DiagOnceDiagnostics.begin(), DiagOnceDiagnostics.end(),
+                DiagID) != DiagOnceDiagnostics.end()) {
+    auto DisabledDiag =  DiagnosticBuilder(this);
+    DisabledDiag.IsActive = false;
+    return DisabledDiag;
+  }
+
+  DiagOnceDiagnostics.push_back(DiagID);
+  return Report(Loc, DiagID);
+}
 //===----------------------------------------------------------------------===//
 // Diagnostic
 //                                                                           //

+ 10 - 0
tools/clang/include/clang/Basic/DiagnosticGroups.td

@@ -787,4 +787,14 @@ def HLSLSpecifierOverride : DiagGroup<"specifier-override">;
 def HLSLPackOffsetOverride : DiagGroup<"packoffset-override">;
 def HLSLCommaInInit : DiagGroup<"comma-in-init">;
 def HLSLAmbigLitShift : DiagGroup<"ambig-lit-shift">;
+def HLSLPayloadAccessQualiferTrace: DiagGroup<"payload-access-trace">;
+def HLSLPayloadAccessQualiferShader: DiagGroup<"payload-access-shader">;
+def HLSLPayloadAccessQualiferPerf: DiagGroup<"payload-access-perf">;
+def HLSLPayloadAccessQualiferCall: DiagGroup<"payload-access-call">;
+def HLSLPayloadAccessQualifer: DiagGroup<"payload-access-qualifier", [
+     HLSLPayloadAccessQualiferTrace,
+     HLSLPayloadAccessQualiferShader,
+     HLSLPayloadAccessQualiferPerf,
+     HLSLPayloadAccessQualiferCall
+  ]>;
 // HLSL Change Ends

+ 36 - 0
tools/clang/include/clang/Basic/DiagnosticSemaKinds.td

@@ -7540,6 +7540,42 @@ def err_hlsl_unsupported_buffer_packoffset : Error<
   "packoffset is only allowed within a constant buffer, not on the constant buffer declaration">;
 def err_hlsl_unsupported_buffer_slot_target_specific : Error<
   "user defined constant buffer slots cannot be target specific">;
+def err_hlsl_unsupported_payload_access_qualifier : Error<
+  "payload access qualifiers are only allowed for member variables of a payload structure">;
+def err_hlsl_unsupported_payload_access_qualifier_struct : Error<
+  "payload access qualifiers are not supported on struct types.">;
+def err_hlsl_payload_access_qualifier_unsupported_shader : Error<
+  "payload access qualifiers are only defined for raytracing shader stages closesthit, miss, anyhit and for special keyword: caller. '%0' is not supported">;
+def err_hlsl_payload_access_qualifier_invalid_combination : Error<
+  "field %0 is qualified '%1' for shader stage '%2' but has no valid %3">;
+def err_hlsl_payload_access_qualifier_multiple_defined : Error<
+  "payload access qualifier '%0' has already been defined">;
+def warn_hlsl_payload_access_data_loss : Warning<
+  "potential loss of data for payload field '%0'. Field is qualified 'write' in earlier stages and 'write' only for stage '%1' but never unconditionally written.">, InGroup<HLSLPayloadAccessQualiferShader>;
+def warn_hlsl_payload_access_undef_read : Warning<
+  "reading undefined value ('%0' is not qualified 'read' for shader stage '%1')">, InGroup<HLSLPayloadAccessQualiferShader>;
+def warn_hlsl_payload_access_write_loss : Warning<
+  "write will be dropped ('%0' is not qualified 'write' for shader stage '%1')">, InGroup<HLSLPayloadAccessQualiferShader>;
+def warn_hlsl_payload_access_no_write_for_trace_payload : Warning<
+  "field '%0' is 'write' for 'caller' stage but field is never written for TraceRay call">, InGroup<HLSLPayloadAccessQualiferTrace>;
+def warn_hlsl_payload_access_write_but_no_write_for_trace_payload : Warning<
+  "value will be undefined inside TraceRay ('%0' is not qualified 'write' for 'caller')">, InGroup<HLSLPayloadAccessQualiferTrace>;
+def warn_hlsl_payload_access_read_of_undef_after_trace : Warning<
+  "reading undefined value ('%0' is returned from TraceRay but not qualified 'read' for 'caller')">, InGroup<HLSLPayloadAccessQualiferTrace>;
+def warn_hlsl_payload_access_read_but_no_read_after_trace : Warning<
+  "'%0' is qualified 'read' for 'caller' but the field is never read after TraceCall (possible performance issue)">, InGroup<HLSLPayloadAccessQualiferPerf>;
+def warn_qualified_payload_passed_to_extern_function : Warning<
+  "passing a qualified payload to an extern function can cause undefined behavior if payload qualifiers mismatch">, InGroup<HLSLPayloadAccessQualiferCall>;
+def err_not_all_payload_fields_qualified : Error<
+  "payload type '%0' requires that all fields carry payload access qualifiers.">;
+def err_payload_requires_attribute : Error<
+  "type '%0' used as payload requires that it is annotated with the [raypayload] attribute">;
+def err_payload_fields_not_qualified : Error<
+  "payload field '%0' has no payload access qualifiers.">;
+def err_payload_fields_is_payload_and_overqualified : Error<
+  "payload field '%0' is a payload struct. Payload access qualifiers are not allowed on payload types.">;
+def warn_hlsl_payload_qualifer_dropped : Warning<
+  "payload access qualifieres are only supported for target lib_6_6 and beyond. You can opt-in for lib_6_6 with the -enable-payload-qualifiers flag. Qualifiers will be dropped.">, InGroup<HLSLPayloadAccessQualifer>;
 def err_hlsl_unsupported_builtin_op: Error<
   "operator cannot be used with built-in type %0">;
 def err_hlsl_unsupported_char_literal : Error<

+ 1 - 0
tools/clang/include/clang/Basic/LangOptions.h

@@ -159,6 +159,7 @@ public:
   bool EnableDX9CompatMode;
   bool EnableFXCCompatMode;
   bool EnableTemplates;
+  bool EnablePayloadAccessQualifiers;
   // HLSL Change Ends
 
   bool SPIRV = false;  // SPIRV Change

+ 2 - 0
tools/clang/include/clang/Frontend/CodeGenOptions.h

@@ -234,6 +234,8 @@ public:
   bool HLSLEnableLifetimeMarkers = false;
   /// Put shader sources and options in the module
   bool HLSLEmbedSourcesInModule = false;
+  /// Enable generation of payload access qualifier metadata. 
+  bool HLSLEnablePayloadAccessQualifiers = false;
   // HLSL Change Ends
 
   // SPIRV Change Starts

+ 7 - 0
tools/clang/include/clang/SPIRV/AstTypeProbe.h

@@ -86,6 +86,13 @@ bool isMx1Matrix(QualType type, QualType *elemType = nullptr,
 bool isMxNMatrix(QualType type, QualType *elemType = nullptr,
                  uint32_t *rowCount = nullptr, uint32_t *colCount = nullptr);
 
+/// Returns true if the given type will be translated into a SPIR-V array type.
+///
+/// Writes the element type and count into *elementType and *count respectively
+/// if they are not nullptr.
+bool isArrayType(QualType type, QualType *elemType = nullptr,
+                 uint32_t *elemCount = nullptr);
+
 /// \brief Returns true if the given type is a ConstantBuffer or an array of
 /// ConstantBuffers.
 bool isConstantBuffer(QualType);

+ 1 - 0
tools/clang/include/clang/SPIRV/FeatureManager.h

@@ -31,6 +31,7 @@ enum class Extension {
   KHR = 0,
   KHR_16bit_storage,
   KHR_device_group,
+  KHR_fragment_shading_rate,
   KHR_non_semantic_info,
   KHR_multiview,
   KHR_shader_draw_parameters,

+ 10 - 0
tools/clang/include/clang/Sema/SemaHLSL.h

@@ -86,6 +86,16 @@ void DiagnoseUnusualAnnotationsForHLSL(
   clang::Sema& S,
   std::vector<hlsl::UnusualAnnotation *>& annotations);
 
+void DiagnosePayloadAccessQualifierAnnotations(
+  clang::Sema &S,
+  clang::Declarator& D,
+  const clang::QualType& T,
+  const std::vector<hlsl::UnusualAnnotation *> &annotations);
+
+void DiagnoseRaytracingPayloadAccess(
+  clang::Sema &S,
+  clang::TranslationUnitDecl* TU);
+
 /// <summary>Finds the best viable function on this overload set, if it exists.</summary>
 clang::OverloadingResult GetBestViableFunction(
   clang::Sema &S,

+ 3 - 0
tools/clang/lib/AST/ASTContextHLSL.cpp

@@ -1169,6 +1169,9 @@ UnusualAnnotation* hlsl::UnusualAnnotation::CopyToASTContext(ASTContext& Context
     break;
   case UA_ConstantPacking:
     instanceSize = sizeof(hlsl::ConstantPacking);
+    break;  
+  case UA_PayloadAccessQualifier:
+    instanceSize = sizeof(hlsl::PayloadAccessAnnotation);
     break;
   default:
     DXASSERT(Kind == UA_SemanticDecl, "Kind == UA_SemanticDecl -- otherwise switch is incomplete");

+ 20 - 0
tools/clang/lib/AST/ASTDumper.cpp

@@ -994,6 +994,8 @@ void ASTDumper::dumpHLSLUnusualAnnotations(const ArrayRef<hlsl::UnusualAnnotatio
             OS << "RegisterAssignment"; break;
           case hlsl::UnusualAnnotation::UA_SemanticDecl:
             OS << "SemanticDecl"; break;
+          case hlsl::UnusualAnnotation::UA_PayloadAccessQualifier:
+            OS << "PayloadAccessQualifier"; break;
         }
       }
       dumpPointer(It);
@@ -1043,7 +1045,25 @@ void ASTDumper::dumpHLSLUnusualAnnotations(const ArrayRef<hlsl::UnusualAnnotatio
           const hlsl::SemanticDecl* semanticDecl = cast<hlsl::SemanticDecl>(*It);
           OS << " \"" << semanticDecl->SemanticName << "\"";
           break;
+        }      
+      case hlsl::UnusualAnnotation::UA_PayloadAccessQualifier: {
+        const hlsl::PayloadAccessAnnotation *annotation =
+            cast<hlsl::PayloadAccessAnnotation>(*It);
+        OS << " "
+           << (annotation->qualifier == hlsl::DXIL::PayloadAccessQualifier::Read
+                   ? "read"
+                   : "write")
+           << "(";
+        StringRef shaderStageNames[] = {"caller", "closesthit", "miss", "anyhit"};
+        for (unsigned i = 0; i < annotation->ShaderStages.size(); ++i) {
+          OS << shaderStageNames[static_cast<unsigned>(
+              annotation->ShaderStages[i])];
+          if (i < annotation->ShaderStages.size() - 1)
+            OS << ", ";
         }
+        OS << ")";
+        break;
+      }
       }
     });
   }

+ 17 - 0
tools/clang/lib/AST/DeclPrinter.cpp

@@ -1495,6 +1495,23 @@ void DeclPrinter::VisitHLSLUnusualAnnotation(const hlsl::UnusualAnnotation *UA)
     Out << ")";
     break;
   }
+  case hlsl::UnusualAnnotation::UA_PayloadAccessQualifier: {
+    const hlsl::PayloadAccessAnnotation *annotation =
+        cast<hlsl::PayloadAccessAnnotation>(UA);
+    Out << " : "
+        << (annotation->qualifier == hlsl::DXIL::PayloadAccessQualifier::Read
+                ? "read"
+                : "write")
+        << "(";
+    StringRef shaderStageNames[] = { "caller", "closesthit", "miss", "anyhit"};
+    for (unsigned i = 0; i < annotation->ShaderStages.size(); ++i) {
+      Out << shaderStageNames[static_cast<unsigned>(annotation->ShaderStages[i])];
+      if (i < annotation->ShaderStages.size() - 1)
+        Out << ", ";
+    }
+    Out << ")";
+    break;
+  }
   }
 }
 

+ 155 - 34
tools/clang/lib/CodeGen/CGExprConstant.cpp

@@ -636,6 +636,72 @@ public:
     return Visit(E->getInitializer());
   }
 
+  // HLSL changes begin
+  static void ExtractConstantValueElems(llvm::Constant *constVec, llvm::SmallVector<llvm::Constant*, 4> &Elems, unsigned vecSize) {
+    if (llvm::ConstantDataVector *CDV = dyn_cast<llvm::ConstantDataVector>(constVec)) {
+      for (unsigned c = 0; c < vecSize; c++) {
+        Elems[c] = CDV->getElementAsConstant(c);
+      }
+    }
+    else if (llvm::ConstantVector *CV = dyn_cast<llvm::ConstantVector>(constVec)) {
+      for (unsigned c = 0; c < vecSize; c++) {
+        Elems[c] = CV->getOperand(c);
+      }
+    }
+    else {
+      llvm::ConstantAggregateZero *CAZ = cast<llvm::ConstantAggregateZero>(constVec);
+      for (unsigned c = 0; c < vecSize; c++) {
+        Elems[c] = CAZ->getElementValue(c);
+      }
+    }
+  }
+
+  static llvm::Constant* ConvertToMatchDestType (const clang::Type *srcTy, const clang::Type *destTy,
+    llvm::Type *srcLLVMTy, llvm::Type *destLLVMTy, llvm::Constant *C, CodeGenModule &CGM) {
+
+    assert(srcTy->isFloatingType() || srcTy->isIntegerType());
+    assert(destTy->isFloatingType() || destTy->isIntegerType());
+
+    // Special handling for cast to boolean type
+    if (destLLVMTy->isIntegerTy() && destLLVMTy->getScalarSizeInBits() == 1) {
+      return C->isZeroValue() ? llvm::ConstantInt::get(destLLVMTy, 0)
+        : llvm::ConstantInt::get(destLLVMTy, 1);
+    }
+
+    llvm::Instruction::CastOps castOp = llvm::Instruction::CastOpsEnd;
+
+    if (srcLLVMTy->isFloatingPointTy() && destLLVMTy->isFloatingPointTy()) {
+      if (srcLLVMTy->getScalarSizeInBits() > destLLVMTy->getScalarSizeInBits()) {
+        castOp = llvm::Instruction::FPTrunc;
+      }
+      else {
+        castOp = llvm::Instruction::FPExt;
+      }
+    }
+    else if (srcLLVMTy->isFloatingPointTy() && destLLVMTy->isIntegerTy()) {
+      castOp = destTy->isSignedIntegerType() ? llvm::Instruction::FPToSI : llvm::Instruction::FPToUI;
+    }
+    else if (srcLLVMTy->isIntegerTy() && destLLVMTy->isFloatingPointTy()) {
+      castOp = srcTy->isSignedIntegerType() ? llvm::Instruction::SIToFP : llvm::Instruction::UIToFP;
+    }
+    else {
+      // Both src and dest should be of integer type here.
+      assert(srcLLVMTy->isIntegerTy() && destLLVMTy->isIntegerTy());
+
+      if (srcLLVMTy->getScalarSizeInBits() > destLLVMTy->getScalarSizeInBits()) {
+        castOp = llvm::Instruction::Trunc;
+      }
+      else {
+        castOp = srcTy->isSignedIntegerType() ? llvm::Instruction::SExt : llvm::Instruction::ZExt;
+      }
+    }
+
+    assert(castOp != llvm::Instruction::CastOpsEnd);
+    return llvm::ConstantExpr::getCast(castOp, C, destLLVMTy);
+  }
+
+  // HLSL changes end
+
   llvm::Constant *VisitCastExpr(CastExpr* E) {
     Expr *subExpr = E->getSubExpr();
     llvm::Constant *C = CGM.EmitConstantExpr(subExpr, subExpr->getType(), CGF);
@@ -748,10 +814,68 @@ public:
     case CK_HLSLCC_IntegralToBoolean:
     case CK_HLSLCC_IntegralToFloating:
     case CK_HLSLCC_FloatingToIntegral:
-    case CK_HLSLCC_FloatingToBoolean:
-      // Since these cast kinds have already been handled in ExprConstant.cpp,
-      // we can reuse the logic there.
-      return CGM.EmitConstantExpr(E, E->getType(), CGF);
+    case CK_HLSLCC_FloatingToBoolean: {
+      bool isMatrixCast = hlsl::IsHLSLMatType(E->getType()) && hlsl::IsHLSLMatType(E->getSubExpr()->getType());
+      if (!isMatrixCast) {
+        // Since these cast kinds have already been handled in ExprConstant.cpp,
+        // we can reuse the logic there.
+        return CGM.EmitConstantExpr(E, E->getType(), CGF);
+      }
+      else {
+        // For cast involving matrix type, if the subexperssion has already
+        // been successfully evaluated to a constant, then just cast it to
+        // match the destination type.
+        llvm::Constant *SubExprResult = C;
+
+        const clang::Type * srcEltType = hlsl::GetHLSLMatElementType(E->getSubExpr()->getType()).getCanonicalType().getTypePtr();
+        const clang::Type * destEltType = hlsl::GetHLSLMatElementType(E->getType()).getCanonicalType().getTypePtr();
+
+        // If the dest type is same as the src type, then trivially
+        // return the result of the subexpression evaluation.
+        llvm::Type *srcEltLLVMTy = CGM.getTypes().ConvertType(srcEltType->getCanonicalTypeInternal());
+        llvm::Type *destEltLLVMTy = CGM.getTypes().ConvertType(destEltType->getCanonicalTypeInternal());
+        // Use desugared llvm type for comparison as half and float could both mean float type
+        // when -enable-16bit-types flag is not used.
+        if (srcEltLLVMTy == destEltLLVMTy) {
+          return SubExprResult;
+        }
+
+        unsigned destRow, destCol;
+        hlsl::GetHLSLMatRowColCount(E->getType(), destRow, destCol);
+
+        unsigned srcRow, srcCol;
+        hlsl::GetHLSLMatRowColCount(E->getSubExpr()->getType(), srcRow, srcCol);
+
+        // Src and Dest matrices must have same order
+        assert(destRow == srcRow && destCol == srcCol);
+
+        if (llvm::ConstantStruct *srcVal = dyn_cast<llvm::ConstantStruct>(SubExprResult)) {
+          llvm::ConstantArray *srcMat = cast<llvm::ConstantArray>(srcVal->getOperand(0));
+          llvm::SmallVector<llvm::Constant*, 4> destRowElts;
+
+          for (unsigned r = 0; r < srcRow; r++) {
+            llvm::SmallVector<llvm::Constant*, 4> destColElts(srcCol);
+            llvm::Constant *srcColVal = srcMat->getOperand(r);
+            ExtractConstantValueElems(srcColVal, destColElts, srcCol);
+            for (unsigned i = 0; i < srcCol; i++) {
+              destColElts[i] = ConvertToMatchDestType(srcEltType, destEltType, srcEltLLVMTy, destEltLLVMTy, destColElts[i], CGM);
+            }
+            llvm::Constant *destCols = llvm::ConstantVector::get(destColElts);
+            destRowElts.emplace_back(destCols);
+          }
+
+          llvm::StructType *destValType = cast<llvm::StructType>(destType);
+          llvm::Constant *destMat = llvm::ConstantArray::get(
+            cast<llvm::ArrayType>(destValType->getElementType(0)), destRowElts);
+          llvm::Constant* destVal = llvm::ConstantStruct::get(destValType, destMat);
+          return destVal;
+        }
+        else if (llvm::ConstantAggregateZero *CAZ = dyn_cast<llvm::ConstantAggregateZero>(SubExprResult)) {
+          return llvm::Constant::getNullValue(destType);
+        }
+      }
+    }
+
     case CK_FlatConversion:
       return nullptr;
     case CK_HLSLVectorSplat: {
@@ -773,54 +897,51 @@ public:
     case CK_HLSLVectorTruncationCast: {
       unsigned vecSize = hlsl::GetHLSLVecSize(E->getType());
       SmallVector<llvm::Constant*, 4> Elts(vecSize);
-      if (llvm::ConstantDataVector *CDV = dyn_cast<llvm::ConstantDataVector>(C)) {
-        for (unsigned i = 0; i < vecSize; i++)
-          Elts[i] = CDV->getElementAsConstant(i);
-      } else if (llvm::ConstantVector* CV = dyn_cast<llvm::ConstantVector>(C)) {
-        for (unsigned i = 0; i < vecSize; i++)
-          Elts[i] = CV->getOperand(i);
-      } else {
-        llvm::ConstantAggregateZero* CAZ = cast<llvm::ConstantAggregateZero>(C);
-        for (unsigned i = 0; i < vecSize; i++)
-          Elts[i] = CAZ->getElementValue(i);
-      }
+      ExtractConstantValueElems(C, Elts, vecSize);
       return llvm::ConstantVector::get(Elts);
     }
     case CK_HLSLVectorToScalarCast: {
-      if (llvm::ConstantDataVector* CDV = dyn_cast<llvm::ConstantDataVector>(C)) {
-        return CDV->getElementAsConstant(0);
+      SmallVector<llvm::Constant*, 4> Elts(1);
+      ExtractConstantValueElems(C, Elts, 1);
+      return Elts[0];
+    }
+    case CK_HLSLMatrixToScalarCast: {
+      unsigned rowCt, colCt;
+      hlsl::GetHLSLMatRowColCount(E->getType(), rowCt, colCt);
+      if (llvm::ConstantStruct *CS = dyn_cast<llvm::ConstantStruct>(C)) {
+        llvm::ConstantArray *CA = dyn_cast<llvm::ConstantArray>(CS->getOperand(0));
+        SmallVector<llvm::Constant*, 4> Elts(colCt);
+        ExtractConstantValueElems(CA->getOperand(0), Elts, colCt);
+        return Elts[0];
       }
-      else if (llvm::ConstantVector* CV = dyn_cast<llvm::ConstantVector>(C)) {
-        return CV->getOperand(0);
-      } else {
-        llvm::ConstantAggregateZero* CAZ = cast<llvm::ConstantAggregateZero>(C);
-        return CAZ->getElementValue((unsigned)0);
+      else if (llvm::ConstantAggregateZero *CAZ = dyn_cast<llvm::ConstantAggregateZero>(C)) {
+        llvm::Constant *destVal = llvm::Constant::getNullValue(destType);
+        return destVal;
       }
     }
     case CK_HLSLMatrixTruncationCast: {
-      llvm::StructType *ST =
-          cast<llvm::StructType>(CGM.getTypes().ConvertType(E->getType()));
-      unsigned rowCt,colCt;
-      hlsl::GetHLSLMatRowColCount(E->getType(), rowCt, colCt);
       if (llvm::ConstantStruct *CS = dyn_cast<llvm::ConstantStruct>(C)) {
+        unsigned rowCt, colCt;
+        hlsl::GetHLSLMatRowColCount(E->getType(), rowCt, colCt);
         llvm::ConstantArray *CA = dyn_cast<llvm::ConstantArray>(CS->getOperand(0));
         SmallVector<llvm::Constant *, 4> Rows(rowCt);
         for (unsigned i = 0; i < rowCt; i++) {
           SmallVector<llvm::Constant*, 4> Elts(colCt);
-          if (llvm::ConstantDataVector *CDV = dyn_cast<llvm::ConstantDataVector>(CA->getOperand(i))) {
-            for (unsigned j = 0; j < colCt; j++)
-              Elts[j] = CDV->getElementAsConstant(j);
-          } else {
-            llvm::ConstantVector *CV = cast<llvm::ConstantVector>(CA->getOperand(i));
-            for (unsigned j = 0; j < colCt; j++)
-              Elts[j] = CV->getOperand(j);
-          }
+          ExtractConstantValueElems(CA->getOperand(i), Elts, colCt);
           Rows[i] = llvm::ConstantVector::get(Elts);
         }
+
+        // Create truncated matrix
+        llvm::StructType *ST =
+          cast<llvm::StructType>(CGM.getTypes().ConvertType(E->getType()));
         llvm::Constant *Mat = llvm::ConstantArray::get(
             cast<llvm::ArrayType>(ST->getElementType(0)), Rows);
         return llvm::ConstantStruct::get(ST, Mat);
       }
+      else if (llvm::ConstantAggregateZero *CAZ = dyn_cast<llvm::ConstantAggregateZero>(C)) {
+        llvm::Constant *destVal = llvm::Constant::getNullValue(destType);
+        return destVal;
+      }
     }
     // HLSL Change Ends.
     }

+ 151 - 21
tools/clang/lib/CodeGen/CGHLSLMS.cpp

@@ -179,8 +179,8 @@ private:
                               QualType Type, QualType SrcType,
                               llvm::Type *Ty);
 
-  void EmitHLSLRootSignature(CodeGenFunction &CGF, HLSLRootSignatureAttr *RSA,
-                             llvm::Function *Fn) override;
+  void EmitHLSLRootSignature(HLSLRootSignatureAttr *RSA,
+                             Function *Fn, DxilFunctionProps &props);
 
   void CheckParameterAnnotation(SourceLocation SLoc,
                                 const DxilParameterAnnotation &paramInfo,
@@ -204,6 +204,7 @@ private:
 
   // Type annotation related.
   unsigned ConstructStructAnnotation(DxilStructAnnotation *annotation,
+                                     DxilPayloadAnnotation* payloadAnnotation,
                                      const RecordDecl *RD,
                                      DxilTypeSystem &dxilTypeSys);
   unsigned AddTypeAnnotation(QualType Ty, DxilTypeSystem &dxilTypeSys,
@@ -920,6 +921,7 @@ static unsigned AlignBaseOffset(QualType Ty, unsigned baseOffset,
 }
 
 unsigned CGMSHLSLRuntime::ConstructStructAnnotation(DxilStructAnnotation *annotation,
+                                      DxilPayloadAnnotation* payloadAnnotation,
                                       const RecordDecl *RD,
                                       DxilTypeSystem &dxilTypeSys) {
   unsigned fieldIdx = 0;
@@ -992,6 +994,9 @@ unsigned CGMSHLSLRuntime::ConstructStructAnnotation(DxilStructAnnotation *annota
 
     unsigned CBufferOffset = offset;
 
+    DxilFieldAnnotation &fieldAnnotation = annotation->GetFieldAnnotation(fieldIdx++);
+    ConstructFieldAttributedAnnotation(fieldAnnotation, fieldTy, bDefaultRowMajor);
+
     // Try to get info from fieldDecl.
     for (const hlsl::UnusualAnnotation *it :
          fieldDecl->getUnusualAnnotations()) {
@@ -1016,6 +1021,21 @@ unsigned CGMSHLSLRuntime::ConstructStructAnnotation(DxilStructAnnotation *annota
         Diags.Report(it->Loc, DiagID);
         return 0;
       } break;
+      case hlsl::UnusualAnnotation::UA_PayloadAccessQualifier: {
+        // Forward payload access qualifiers to fieldAnnotation. 
+        if (payloadAnnotation) {
+          const hlsl::PayloadAccessAnnotation *annotation =
+              cast<hlsl::PayloadAccessAnnotation>(it);
+          DxilPayloadFieldAnnotation &payloadFieldAnnotation =
+              payloadAnnotation->GetFieldAnnotation(fieldIdx - 1);
+          payloadFieldAnnotation.SetCompType(
+              fieldAnnotation.GetCompType().GetKind());
+          for (auto stage : annotation->ShaderStages) {
+            payloadFieldAnnotation.AddPayloadFieldQualifier(
+                stage, annotation->qualifier);
+          }
+        }
+      } break;
       default:
         llvm_unreachable("only semantic for input/output");
         break;
@@ -1029,9 +1049,6 @@ unsigned CGMSHLSLRuntime::ConstructStructAnnotation(DxilStructAnnotation *annota
     // Update offset.
     offset += size;
     
-    DxilFieldAnnotation &fieldAnnotation = annotation->GetFieldAnnotation(fieldIdx++);
-
-    ConstructFieldAttributedAnnotation(fieldAnnotation, fieldTy, bDefaultRowMajor);
     ConstructFieldInterpolation(fieldAnnotation, fieldDecl);
     if (fieldDecl->hasAttr<HLSLPreciseAttr>())
       fieldAnnotation.SetPrecise();
@@ -1068,6 +1085,65 @@ static unsigned GetNumTemplateArgsForRecordDecl(const RecordDecl *RD) {
   return 0;
 }
 
+static bool ValidatePayloadDecl(const RecordDecl *Decl,
+                                const ShaderModel &Model,
+                                DiagnosticsEngine &Diag,
+                                const CodeGenOptions &Options) {
+  // Already checked in Sema, this is not a payload.
+  if (!Decl->hasAttr<HLSLRayPayloadAttr>())
+    return false;
+
+  // If we have a payload warn about them beeing dropped.
+  if (!Options.HLSLEnablePayloadAccessQualifiers) {
+    Diag.ReportOnce(Decl->getLocation(), diag::warn_hlsl_payload_qualifer_dropped);
+    return false;
+  }
+
+  // Check if all fileds have a payload qualifier.
+  bool allFieldsQualifed = true;
+  for (FieldDecl *field : Decl->fields()) {
+    bool fieldHasPayloadQualifier = false;
+    bool isPayloadStruct = false;
+    for (UnusualAnnotation *annotation : field->getUnusualAnnotations()) {
+      fieldHasPayloadQualifier |= isa<hlsl::PayloadAccessAnnotation>(annotation);
+    }
+    // Check if this is a struct type. 
+    // If it is, check for the [payload] field, [payload] structs must carry
+    // PayloadAccessQualifiers and these are taken from the struct directly. 
+    // If it is not a payload struct, check if it has qualifiers attached.
+    if (RecordDecl *recordTy = field->getType()->getAsCXXRecordDecl()) {
+      if (recordTy->hasAttr<HLSLRayPayloadAttr>())
+        isPayloadStruct = true;
+    }
+
+    if (fieldHasPayloadQualifier && isPayloadStruct) {
+      Diag.Report(field->getLocation(),
+                  diag::err_payload_fields_is_payload_and_overqualified)
+          << field->getName();
+      continue;
+    }
+    else 
+    {
+        if (isPayloadStruct)
+            fieldHasPayloadQualifier = true;
+    }
+
+    if (!fieldHasPayloadQualifier) {
+      Diag.Report(field->getLocation(),
+                  diag::err_payload_fields_not_qualified)
+          << field->getName();
+    }
+    allFieldsQualifed &= fieldHasPayloadQualifier;
+  }
+  if (!allFieldsQualifed) {
+    Diag.Report(Decl->getLocation(), diag::err_not_all_payload_fields_qualified)
+        << Decl->getName();
+    return false;
+  }
+ 
+  return true;
+}
+
 // Return the size for constant buffer of each decl.
 unsigned CGMSHLSLRuntime::AddTypeAnnotation(QualType Ty,
                                             DxilTypeSystem &dxilTypeSys,
@@ -1108,8 +1184,10 @@ unsigned CGMSHLSLRuntime::AddTypeAnnotation(QualType Ty,
     }
     DxilStructAnnotation *annotation = dxilTypeSys.AddStructAnnotation(ST,
       GetNumTemplateArgsForRecordDecl(RT->getDecl()));
-
-    return ConstructStructAnnotation(annotation, RD, dxilTypeSys);
+    DxilPayloadAnnotation *payloadAnnotation = nullptr;
+    if (ValidatePayloadDecl(RT->getDecl(), *m_pHLModule->GetShaderModel(), CGM.getDiags(), CGM.getCodeGenOpts()))
+      payloadAnnotation = dxilTypeSys.AddPayloadAnnotation(ST);
+    return ConstructStructAnnotation(annotation, payloadAnnotation, RD, dxilTypeSys);
   } else if (const RecordType *RT = dyn_cast<RecordType>(paramTy)) {
     // For this pointer.
     RecordDecl *RD = RT->getDecl();
@@ -1121,8 +1199,10 @@ unsigned CGMSHLSLRuntime::AddTypeAnnotation(QualType Ty,
     }
     DxilStructAnnotation *annotation = dxilTypeSys.AddStructAnnotation(ST,
       GetNumTemplateArgsForRecordDecl(RT->getDecl()));
-
-    return ConstructStructAnnotation(annotation, RD, dxilTypeSys);
+    DxilPayloadAnnotation* payloadAnnotation = nullptr;
+    if (ValidatePayloadDecl(RT->getDecl(), *m_pHLModule->GetShaderModel(), CGM.getDiags(), CGM.getCodeGenOpts()))
+         payloadAnnotation = dxilTypeSys.AddPayloadAnnotation(ST);
+    return ConstructStructAnnotation(annotation, payloadAnnotation, RD, dxilTypeSys);
   } else if (IsHLSLResourceType(Ty)) {
     // Save result type info.
     AddTypeAnnotation(GetHLSLResourceResultType(Ty), dxilTypeSys, arrayEltSize);
@@ -1731,6 +1811,7 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
   bool hasOutVertices = false;
   bool hasOutPrimitives = false;
   bool hasInPayload = false;
+  bool rayShaderHaveErrors = false;
   for (; ArgNo < F->arg_size(); ++ArgNo, ++ParmIdx, ++ArgIt) {
     DxilParameterAnnotation &paramAnnotation =
         FuncAnnotation->GetParameterAnnotation(ArgNo);
@@ -2071,27 +2152,31 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
           DiagnosticsEngine::Error, "parameters are not allowed for %0 shader"))
             << (funcProps->shaderKind == DXIL::ShaderKind::RayGeneration ?
                 "raygeneration" : "intersection");
-        break;
+        rayShaderHaveErrors = true;
       case DXIL::ShaderKind::AnyHit:
       case DXIL::ShaderKind::ClosestHit:
         if (0 == ArgNo && dxilInputQ != DxilParamInputQual::Inout) {
           Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
             DiagnosticsEngine::Error,
             "ray payload parameter must be inout"));
+          rayShaderHaveErrors = true;
         } else if (1 == ArgNo && dxilInputQ != DxilParamInputQual::In) {
           Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
             DiagnosticsEngine::Error,
             "intersection attributes parameter must be in"));
+          rayShaderHaveErrors = true;
         } else if (ArgNo > 1) {
           Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
             DiagnosticsEngine::Error,
             "too many parameters, expected payload and attributes parameters only."));
+          rayShaderHaveErrors = true;
         }
         if (ArgNo < 2) {
           if (!IsHLSLNumericUserDefinedType(parmDecl->getType())) {
             Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
               DiagnosticsEngine::Error,
               "payload and attribute structures must be user defined types with only numeric contents."));
+            rayShaderHaveErrors = true;
           } else {
             DataLayout DL(&this->TheModule);
             unsigned size = DL.getTypeAllocSize(F->getFunctionType()->getFunctionParamType(ArgNo)->getPointerElementType());
@@ -2107,16 +2192,19 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
           Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
             DiagnosticsEngine::Error,
             "only one parameter (ray payload) allowed for miss shader"));
+          rayShaderHaveErrors = true;
         } else if (dxilInputQ != DxilParamInputQual::Inout) {
           Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
             DiagnosticsEngine::Error,
             "ray payload parameter must be declared inout"));
+          rayShaderHaveErrors = true;
         }
         if (ArgNo < 1) {
           if (!IsHLSLNumericUserDefinedType(parmDecl->getType())) {
             Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
               DiagnosticsEngine::Error,
               "ray payload parameter must be a user defined type with only numeric contents."));
+            rayShaderHaveErrors = true;
           } else {
             DataLayout DL(&this->TheModule);
             unsigned size = DL.getTypeAllocSize(F->getFunctionType()->getFunctionParamType(ArgNo)->getPointerElementType());
@@ -2129,16 +2217,19 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
           Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
             DiagnosticsEngine::Error,
             "only one parameter allowed for callable shader"));
+          rayShaderHaveErrors = true;
         } else if (dxilInputQ != DxilParamInputQual::Inout) {
           Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
             DiagnosticsEngine::Error,
             "callable parameter must be declared inout"));
+          rayShaderHaveErrors = true;
         }
         if (ArgNo < 1) {
           if (!IsHLSLNumericUserDefinedType(parmDecl->getType())) {
             Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
               DiagnosticsEngine::Error,
               "callable parameter must be a user defined type with only numeric contents."));
+            rayShaderHaveErrors = true;
           } else {
             DataLayout DL(&this->TheModule);
             unsigned size = DL.getTypeAllocSize(F->getFunctionType()->getFunctionParamType(ArgNo)->getPointerElementType());
@@ -2188,6 +2279,7 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
           Diags.getCustomDiagID(DiagnosticsEngine::Error,
             "shader must include inout parameter structure.");
         Diags.Report(FD->getLocation(), DiagID);
+        rayShaderHaveErrors = true;
       }
     }
     if (bNeedsAttributes &&
@@ -2195,9 +2287,17 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
       Diags.Report(FD->getLocation(), Diags.getCustomDiagID(
         DiagnosticsEngine::Error,
         "shader must include attributes structure parameter."));
+      rayShaderHaveErrors = true;
     }
   }
 
+  // If we encountered an error during verification of RayTracing 
+  // shader signatures, stop here. Otherwise we risk to trigger 
+  // unhandled behaviour, i.e., DXC crashes when the payload is 
+  // declared as matrix<float...> type.
+  if(rayShaderHaveErrors)
+      return;
+
   // Type annotation for parameters and return type.
   DxilTypeSystem &dxilTypeSys = m_pHLModule->GetTypeSystem();
   unsigned arrayEltSize = 0;
@@ -2225,6 +2325,12 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
     }
   }
 
+  // Only parse root signature for entry function.
+  if (HLSLRootSignatureAttr *RSA = FD->getAttr<HLSLRootSignatureAttr>()) {
+    if (isExportedEntry || isEntry)
+      EmitHLSLRootSignature(RSA, F, *funcProps);
+  }
+
   // Only add functionProps when exist.
   if (isExportedEntry || isEntry)
     m_pHLModule->AddDxilFunctionProps(F, funcProps);
@@ -2715,6 +2821,10 @@ static void InitFromUnusualAnnotations(DxilResourceBase &Resource, NamedDecl &De
     case hlsl::UnusualAnnotation::UA_ConstantPacking:
       // Should be handled by front-end
       llvm_unreachable("packoffset on resource");
+      break;    
+    case hlsl::UnusualAnnotation::UA_PayloadAccessQualifier:
+      // Should be handled by front-end
+      llvm_unreachable("payload qualifier on resource");
       break;
     default:
       llvm_unreachable("unknown UnusualAnnotation on resource");
@@ -3090,6 +3200,12 @@ bool CGMSHLSLRuntime::SetUAVSRV(SourceLocation loc,
 
     uint32_t strideInBytes = dataLayout.getTypeAllocSize(retTy);
     hlslRes->SetElementStride(strideInBytes);
+    if (kind == hlsl::DxilResource::Kind::StructuredBuffer) {
+      if (StructType* ST = dyn_cast<StructType>(retTy)) {
+        const StructLayout* SL = dataLayout.getStructLayout(ST);
+        hlslRes->SetBaseAlignLog2(Log2_32(SL->getAlignment()));
+      }
+    }
   }
   if (HasHLSLGloballyCoherent(QualTy)) {
     hlslRes->SetGloballyCoherent(true);
@@ -3273,6 +3389,9 @@ void CGMSHLSLRuntime::AddConstant(VarDecl *constDecl, HLCBuffer &CB) {
     }
     case hlsl::UnusualAnnotation::UA_SemanticDecl:
       // skip semantic on constant
+      break;    
+    case hlsl::UnusualAnnotation::UA_PayloadAccessQualifier:
+      // skip payload qualifers on constant
       break;
     }
   }
@@ -5631,22 +5750,33 @@ void CGMSHLSLRuntime::EmitHLSLFlatConversion(CodeGenFunction &CGF,
   }
 }
 
-void CGMSHLSLRuntime::EmitHLSLRootSignature(CodeGenFunction &CGF,
-                                            HLSLRootSignatureAttr *RSA,
-                                            Function *Fn) {
-  // Only parse root signature for entry function.
-  if (Fn != Entry.Func)
-    return;
-
+void CGMSHLSLRuntime::EmitHLSLRootSignature(HLSLRootSignatureAttr *RSA,
+                                            Function *Fn,
+                                            DxilFunctionProps &props) {
   StringRef StrRef = RSA->getSignatureName();
-  DiagnosticsEngine &Diags = CGF.getContext().getDiagnostics();
+  DiagnosticsEngine &Diags = CGM.getDiags();
   SourceLocation SLoc = RSA->getLocation();
   RootSignatureHandle RootSigHandle;
-  clang::CompileRootSignature(StrRef, Diags, SLoc, rootSigVer, DxilRootSignatureCompilationFlags::GlobalRootSignature, &RootSigHandle);
+  clang::CompileRootSignature(
+      StrRef, Diags, SLoc, rootSigVer,
+      DxilRootSignatureCompilationFlags::GlobalRootSignature, &RootSigHandle);
   if (!RootSigHandle.IsEmpty()) {
     RootSigHandle.EnsureSerializedAvailable();
-    m_pHLModule->SetSerializedRootSignature(RootSigHandle.GetSerializedBytes(),
-                                            RootSigHandle.GetSerializedSize());
+    if (!m_bIsLib) {
+      m_pHLModule->SetSerializedRootSignature(
+          RootSigHandle.GetSerializedBytes(),
+          RootSigHandle.GetSerializedSize());
+    } else {
+      if (!props.IsRay()) {
+        props.SetSerializedRootSignature(RootSigHandle.GetSerializedBytes(),
+                                         RootSigHandle.GetSerializedSize());
+      } else {
+        unsigned DiagID = Diags.getCustomDiagID(
+            DiagnosticsEngine::Error, "root signature attribute not supported "
+                                      "for raytracing entry functions");
+        Diags.Report(RSA->getLocation(), DiagID);
+      }
+    }
   }
 }
 

+ 92 - 0
tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp

@@ -1460,6 +1460,10 @@ typedef APInt(__cdecl *IntBinaryEvalFuncType)(const APInt &, const APInt &);
 typedef float(__cdecl *FloatBinaryEvalFuncType)(float, float);
 typedef double(__cdecl *DoubleBinaryEvalFuncType)(double, double);
 
+typedef APInt(__cdecl *IntTernaryEvalFuncType)(const APInt &, const APInt &, const APInt &);
+typedef float(__cdecl *FloatTernaryEvalFuncType)(float, float, float);
+typedef double(__cdecl *DoubleTernaryEvalFuncType)(double, double, double);
+
 Value *EvalUnaryIntrinsic(ConstantFP *fpV, FloatUnaryEvalFuncType floatEvalFunc,
                           DoubleUnaryEvalFuncType doubleEvalFunc) {
   llvm::Type *Ty = fpV->getType();
@@ -1510,6 +1514,45 @@ Value *EvalBinaryIntrinsic(Constant *cV0, Constant *cV1,
   return Result;
 }
 
+Value *EvalTernaryIntrinsic(Constant *cV0, Constant *cV1, Constant *cV2,
+                             FloatTernaryEvalFuncType floatEvalFunc,
+                             DoubleTernaryEvalFuncType doubleEvalFunc,
+                             IntTernaryEvalFuncType intEvalFunc) {
+  llvm::Type *Ty = cV0->getType();
+  Value *Result = nullptr;
+  if (Ty->isDoubleTy()) {
+    ConstantFP *fpV0 = cast<ConstantFP>(cV0);
+    ConstantFP *fpV1 = cast<ConstantFP>(cV1);
+    ConstantFP *fpV2 = cast<ConstantFP>(cV2);
+    double dV0 = fpV0->getValueAPF().convertToDouble();
+    double dV1 = fpV1->getValueAPF().convertToDouble();
+    double dV2 = fpV2->getValueAPF().convertToDouble();
+    Value *dResult = ConstantFP::get(Ty, doubleEvalFunc(dV0, dV1, dV2));
+    Result = dResult;
+  } else if (Ty->isFloatTy()) {
+    ConstantFP *fpV0 = cast<ConstantFP>(cV0);
+    ConstantFP *fpV1 = cast<ConstantFP>(cV1);
+    ConstantFP *fpV2 = cast<ConstantFP>(cV2);
+    float fV0 = fpV0->getValueAPF().convertToFloat();
+    float fV1 = fpV1->getValueAPF().convertToFloat();
+    float fV2 = fpV2->getValueAPF().convertToFloat();
+    Value *dResult = ConstantFP::get(Ty, floatEvalFunc(fV0, fV1, fV2));
+    Result = dResult;
+  } else {
+    DXASSERT_NOMSG(Ty->isIntegerTy());
+    DXASSERT_NOMSG(intEvalFunc);
+    ConstantInt *ciV0 = cast<ConstantInt>(cV0);
+    ConstantInt *ciV1 = cast<ConstantInt>(cV1);
+    ConstantInt *ciV2 = cast<ConstantInt>(cV2);
+    const APInt &iV0 = ciV0->getValue();
+    const APInt &iV1 = ciV1->getValue();
+    const APInt &iV2 = ciV2->getValue();
+    Value *dResult = ConstantInt::get(Ty, intEvalFunc(iV0, iV1, iV2));
+    Result = dResult;
+  }
+  return Result;
+}
+
 Value *EvalUnaryIntrinsic(CallInst *CI, FloatUnaryEvalFuncType floatEvalFunc,
                           DoubleUnaryEvalFuncType doubleEvalFunc) {
   Value *V = CI->getArgOperand(0);
@@ -1566,6 +1609,43 @@ Value *EvalBinaryIntrinsic(CallInst *CI, FloatBinaryEvalFuncType floatEvalFunc,
   return Result;
 }
 
+Value *EvalTernaryIntrinsic(CallInst *CI, FloatTernaryEvalFuncType floatEvalFunc,
+                             DoubleTernaryEvalFuncType doubleEvalFunc,
+                             IntTernaryEvalFuncType intEvalFunc = nullptr) {
+  Value *V0 = CI->getArgOperand(0);
+  Value *V1 = CI->getArgOperand(1);
+  Value *V2 = CI->getArgOperand(2);
+  llvm::Type *Ty = CI->getType();
+  Value *Result = nullptr;
+  if (llvm::VectorType *VT = dyn_cast<llvm::VectorType>(Ty)) {
+    Result = UndefValue::get(Ty);
+    Constant *CV0 = cast<Constant>(V0);
+    Constant *CV1 = cast<Constant>(V1);
+    Constant *CV2 = cast<Constant>(V2);
+    IRBuilder<> Builder(CI);
+    for (unsigned i = 0; i < VT->getNumElements(); i++) {
+      Constant *cV0 = cast<Constant>(CV0->getAggregateElement(i));
+      Constant *cV1 = cast<Constant>(CV1->getAggregateElement(i));
+      Constant *cV2 = cast<Constant>(CV2->getAggregateElement(i));
+      Value *EltResult = EvalTernaryIntrinsic(cV0, cV1, cV2, floatEvalFunc,
+                                             doubleEvalFunc, intEvalFunc);
+      Result = Builder.CreateInsertElement(Result, EltResult, i);
+    }
+  } else {
+    Constant *cV0 = cast<Constant>(V0);
+    Constant *cV1 = cast<Constant>(V1);
+    Constant *cV2 = cast<Constant>(V2);
+    Result = EvalTernaryIntrinsic(cV0, cV1, cV2, floatEvalFunc, doubleEvalFunc,
+                                 intEvalFunc);
+  }
+  CI->replaceAllUsesWith(Result);
+  CI->eraseFromParent();
+  return Result;
+
+  CI->eraseFromParent();
+  return Result;
+}
+
 void SimpleTransformForHLDXIRInst(Instruction *I, SmallInstSet &deadInsts) {
 
   unsigned opcode = I->getOpcode();
@@ -1789,6 +1869,18 @@ Value *TryEvalIntrinsic(CallInst *CI, IntrinsicOp intriOp,
     CI->eraseFromParent();
     return cNan;
   } break;
+  case IntrinsicOp::IOP_clamp: {
+    auto clampF = [](float a, float b, float c) {
+      return a < b ? b : a > c ? c : a;
+    };
+    auto clampD = [](double a, double b, double c) {
+      return a < b ? b : a > c ? c : a;
+    };
+    auto clampI = [](const APInt &a, const APInt &b, const APInt &c) -> APInt {
+      return a.slt(b) ? b : a.sgt(c) ? c : a;
+    };
+    return EvalTernaryIntrinsic(CI, clampF, clampD, clampI);
+  } break;
   default:
     return nullptr;
   }

+ 0 - 3
tools/clang/lib/CodeGen/CGHLSLRuntime.h

@@ -120,9 +120,6 @@ public:
                                    clang::QualType SrcTy,
                                    llvm::Value *DestPtr,
                                    clang::QualType DestTy) = 0;
-  virtual void EmitHLSLRootSignature(CodeGenFunction &CGF,
-                                     clang::HLSLRootSignatureAttr *RSA,
-                                     llvm::Function *Fn) = 0;
   virtual llvm::Value *EmitHLSLLiteralCast(CodeGenFunction &CGF, llvm::Value *Src, clang::QualType SrcType,
                                                clang::QualType DstType) = 0;
 

+ 1 - 3
tools/clang/lib/CodeGen/CodeGenAction.cpp

@@ -556,10 +556,8 @@ BackendConsumer::DxilDiagHandler(const llvm::DiagnosticInfoDxil &D) {
   }
   FullSourceLoc Loc(DILoc, SourceMgr);
 
-  // If no location information is available, prompt for debug flag
-  // and add function name to give some information
+  // If no location information is available, add function name
   if (Loc.isInvalid()) {
-    Message += " Use /Zi for source location.";
     auto *DiagClient = dynamic_cast<TextDiagnosticPrinter*>(Diags.getClient());
     auto *func = D.getFunction();
     if (DiagClient && func)

+ 0 - 6
tools/clang/lib/CodeGen/CodeGenFunction.cpp

@@ -866,12 +866,6 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
   FunctionArgList Args;
   QualType ResTy = FD->getReturnType();
 
-  // HLSL Change Start - emit root signature associated with function
-  if (HLSLRootSignatureAttr *RSA = FD->getAttr<HLSLRootSignatureAttr>()) {
-    CGM.getHLSLRuntime().EmitHLSLRootSignature(*this, RSA, Fn);
-  }
-  // HLSL Change Ends - emit root signature associated with function
-
   CurGD = GD;
   const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(FD);
   if (MD && MD->isInstance()) {

+ 73 - 3
tools/clang/lib/Parse/ParseDecl.cpp

@@ -27,8 +27,10 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringSwitch.h"
-#include "dxc/Support/Global.h"    // HLSL Change
-#include "clang/Sema/SemaHLSL.h"   // HLSL Change
+#include "dxc/Support/Global.h"       // HLSL Change
+#include "clang/Sema/SemaHLSL.h"      // HLSL Change
+#include "dxc/DXIL/DxilShaderModel.h" // HLSL Change
+#include "dxc/DXIL/DxilConstants.h"   // HLSL Change
 
 using namespace clang;
 
@@ -359,7 +361,70 @@ bool Parser::MaybeParseHLSLAttributes(std::vector<hlsl::UnusualAnnotation *> &ta
       return false;
     }
 
-    if (NextToken().is(tok::kw_register)) {
+    bool identifierIsPayloadAnnotation = false;
+    if (NextToken().is(tok::identifier)) {
+        StringRef identifier = NextToken().getIdentifierInfo()->getName();
+        identifierIsPayloadAnnotation = identifier == "read" || identifier == "write";
+    }
+
+    if (identifierIsPayloadAnnotation) {
+      hlsl::PayloadAccessAnnotation mod;
+
+      if (NextToken().getIdentifierInfo()->getName() == "read")
+          mod.qualifier = hlsl::DXIL::PayloadAccessQualifier::Read;
+      else
+          mod.qualifier = hlsl::DXIL::PayloadAccessQualifier::Write;
+
+      // : read/write ( shader stage *[,shader stage])
+      ConsumeToken(); // consume the colon.
+
+      mod.Loc = Tok.getLocation();
+      ConsumeToken(); // consume the read/write identifier
+      if (ExpectAndConsume(tok::l_paren, diag::err_expected_lparen_after,
+                           "payload access qualifier")) {
+        return true;
+      }
+
+      while(Tok.is(tok::identifier)) {
+        hlsl::DXIL::PayloadAccessShaderStage stage = hlsl::DXIL::PayloadAccessShaderStage::Invalid;
+        const char *stagePtr = Tok.getIdentifierInfo()->getName().data();
+        StringRef shaderStage(stagePtr);
+        if (shaderStage != "caller" && shaderStage != "anyhit" &&
+            shaderStage != "closesthit" && shaderStage != "miss") {
+          Diag(Tok.getLocation(),
+               diag::err_hlsl_payload_access_qualifier_unsupported_shader)
+              << shaderStage;
+          return true;
+        }
+
+        if (shaderStage == "caller") {
+          stage = hlsl::DXIL::PayloadAccessShaderStage::Caller;
+        } else if (shaderStage == "closesthit") {
+          stage = hlsl::DXIL::PayloadAccessShaderStage::Closesthit;
+        } else if (shaderStage == "miss") {
+          stage = hlsl::DXIL::PayloadAccessShaderStage::Miss;
+        } else if (shaderStage == "anyhit") {
+          stage = hlsl::DXIL::PayloadAccessShaderStage::Anyhit;
+        } 
+
+        mod.ShaderStages.push_back(stage);
+        ConsumeToken(); // consume shader type
+
+        if (Tok.is(tok::comma)) // check if we have a list of shader types
+          ConsumeToken();
+
+      } while (Tok.is(tok::identifier));
+
+      if (ExpectAndConsume(tok::r_paren, diag::err_expected_rparen_after,
+                           "payload access qualifier")) {
+        return true;
+      }
+
+      if (mod.ShaderStages.empty())
+          mod.qualifier = hlsl::DXIL::PayloadAccessQualifier::NoAccess;
+
+      target.push_back(new (context) hlsl::PayloadAccessAnnotation(mod));
+    }else if (NextToken().is(tok::kw_register)) {
       hlsl::RegisterAssignment r;
 
       // : register ([shader_profile], Type#[subcomponent] [,spaceX])
@@ -551,6 +616,10 @@ bool Parser::MaybeParseHLSLAttributes(std::vector<hlsl::UnusualAnnotation *> &ta
       ConsumeToken(); // consume colon.
 
       StringRef semanticName = Tok.getIdentifierInfo()->getName();
+      if (semanticName.equals("VFACE")) {
+        Diag(Tok.getLocation(), diag::warn_unsupported_target_attribute)
+            << semanticName;
+      }
       hlsl::SemanticDecl *pUA = new (context) hlsl::SemanticDecl(semanticName);
       pUA->Loc = Tok.getLocation();
       ConsumeToken(); // consume semantic
@@ -562,6 +631,7 @@ bool Parser::MaybeParseHLSLAttributes(std::vector<hlsl::UnusualAnnotation *> &ta
       return false;
     }
   }
+  return true;
 }
 // HLSL Change Ends
 

+ 16 - 3
tools/clang/lib/SPIRV/AstTypeProbe.cpp

@@ -265,6 +265,17 @@ bool isSubpassInputMS(QualType type) {
   return false;
 }
 
+bool isArrayType(QualType type, QualType *elemType, uint32_t *elemCount) {
+  if (const auto *arrayType = type->getAsArrayTypeUnsafe()) {
+    if (elemType)
+      *elemType = arrayType->getElementType();
+    if (elemCount)
+      *elemCount = hlsl::GetArraySize(type);
+    return true;
+  }
+  return false;
+}
+
 bool isConstantBuffer(clang::QualType type) {
   // Strip outer arrayness first
   while (type->isArrayType())
@@ -1051,12 +1062,14 @@ bool isRelaxedPrecisionType(QualType type, const SpirvCodeGenOptions &opts) {
         }
   }
 
-  // Vector & Matrix types could use relaxed precision based on their element
-  // type.
+  // Vector, Matrix and Array types could use relaxed precision based on their
+  // element type.
   {
     QualType elemType = {};
-    if (isVectorType(type, &elemType) || isMxNMatrix(type, &elemType))
+    if (isVectorType(type, &elemType) || isMxNMatrix(type, &elemType) ||
+        isArrayType(type, &elemType)) {
       return isRelaxedPrecisionType(elemType, opts);
+    }
   }
 
   // Images with RelaxedPrecision sampled type.

+ 4 - 4
tools/clang/lib/SPIRV/CapabilityVisitor.cpp

@@ -350,10 +350,10 @@ bool CapabilityVisitor::visit(SpirvDecoration *decor) {
                    "SV_Barycentrics", loc);
       break;
     }
-    case spv::BuiltIn::FragSizeEXT: {
-      addExtension(Extension::EXT_fragment_invocation_density, "SV_ShadingRate",
-                   loc);
-      addCapability(spv::Capability::FragmentDensityEXT);
+    case spv::BuiltIn::ShadingRateKHR:
+    case spv::BuiltIn::PrimitiveShadingRateKHR: {
+      addExtension(Extension::KHR_fragment_shading_rate, "SV_ShadingRate", loc);
+      addCapability(spv::Capability::FragmentShadingRateKHR);
       break;
     }
     default:

+ 18 - 28
tools/clang/lib/SPIRV/DeclResultIdMapper.cpp

@@ -2221,8 +2221,6 @@ bool DeclResultIdMapper::createStageVars(
     // * SV_DispatchThreadID, SV_GroupThreadID, and SV_GroupID are allowed to be
     //   uint, uint2, or uint3, but the corresponding builtins
     //   (GlobalInvocationId, LocalInvocationId, WorkgroupId) must be a uint3.
-    // * SV_ShadingRate is a uint value, but the builtin it corresponds to is a
-    //   int2.
 
     if (glPerVertex.tryToAccess(sigPointKind, semanticKind,
                                 semanticToUse->index, invocationId, value,
@@ -2264,9 +2262,6 @@ bool DeclResultIdMapper::createStageVars(
           hlsl::IsHLSLVecType(type) ? hlsl::GetHLSLVecElementType(type) : type,
           3);
       break;
-    case hlsl::Semantic::Kind::ShadingRate:
-      evalType = astContext.getExtVectorType(astContext.IntTy, 2);
-      break;
     default:
       // Only the semantic kinds mentioned above are handled.
       break;
@@ -2505,25 +2500,6 @@ bool DeclResultIdMapper::createStageVars(
               astContext.getExtVectorType(srcVecElemType, 2), *value, *value,
               {0, 1}, thisSemantic.loc);
       }
-      // Special handling of SV_ShadingRate, which is a bitpacked enum value,
-      // but SPIR-V's FragSizeEXT uses an int2. We build the enum value from
-      // the separate axis values.
-      else if (semanticKind == hlsl::Semantic::Kind::ShadingRate) {
-        // From the D3D12 functional spec for Variable-Rate Shading.
-        // #define D3D12_MAKE_COARSE_SHADING_RATE(x,y) ((x) << 2 | (y))
-        const auto x = spvBuilder.createCompositeExtract(
-            astContext.IntTy, *value, {0}, thisSemantic.loc);
-        const auto y = spvBuilder.createCompositeExtract(
-            astContext.IntTy, *value, {1}, thisSemantic.loc);
-        const auto constTwo =
-            spvBuilder.getConstantInt(astContext.IntTy, llvm::APInt(32, 2));
-        *value = spvBuilder.createBinaryOp(
-            spv::Op::OpBitwiseOr, astContext.UnsignedIntTy,
-            spvBuilder.createBinaryOp(spv::Op::OpShiftLeftLogical,
-                                      astContext.IntTy, x, constTwo,
-                                      thisSemantic.loc),
-            y, thisSemantic.loc);
-      }
 
       // Reciprocate SV_Position.w if requested
       if (semanticKind == hlsl::Semantic::Kind::Position)
@@ -3472,16 +3448,30 @@ SpirvVariable *DeclResultIdMapper::createSpirvStageVar(
   }
   // According to DXIL spec, the ShadingRate SV can only be used by GSOut,
   // VSOut, or PSIn. According to Vulkan spec, the FragSizeEXT BuiltIn can only
-  // be used as PSIn.
+  // be used as VSOut, GSOut, MSOut or PSIn.
   case hlsl::Semantic::Kind::ShadingRate: {
+    QualType checkType = type->getAs<ReferenceType>()
+                             ? type->getAs<ReferenceType>()->getPointeeType()
+                             : type;
+    QualType scalarTy;
+    if (!isScalarType(checkType, &scalarTy) || !scalarTy->isIntegerType()) {
+      emitError("semantic ShadingRate must be interger scalar type", srcLoc);
+    }
+
     switch (sigPointKind) {
     case hlsl::SigPoint::Kind::PSIn:
       stageVar->setIsSpirvBuiltin();
-      return spvBuilder.addStageBuiltinVar(type, sc, BuiltIn::FragSizeEXT,
+      return spvBuilder.addStageBuiltinVar(type, sc, BuiltIn::ShadingRateKHR,
                                            isPrecise, srcLoc);
+    case hlsl::SigPoint::Kind::VSOut:
+    case hlsl::SigPoint::Kind::GSOut:
+    case hlsl::SigPoint::Kind::MSOut:
+      stageVar->setIsSpirvBuiltin();
+      return spvBuilder.addStageBuiltinVar(
+          type, sc, BuiltIn::PrimitiveShadingRateKHR, isPrecise, srcLoc);
     default:
-      emitError("semantic ShadingRate currently unsupported in non-PS shader"
-                " stages",
+      emitError("semantic ShadingRate must be used only for PSIn, VSOut, "
+                "GSOut, MSOut",
                 srcLoc);
       break;
     }

+ 4 - 0
tools/clang/lib/SPIRV/FeatureManager.cpp

@@ -140,6 +140,8 @@ Extension FeatureManager::getExtensionSymbol(llvm::StringRef name) {
       .Case("SPV_NV_ray_tracing", Extension::NV_ray_tracing)
       .Case("SPV_NV_mesh_shader", Extension::NV_mesh_shader)
       .Case("SPV_KHR_ray_query", Extension::KHR_ray_query)
+      .Case("SPV_KHR_fragment_shading_rate",
+            Extension::KHR_fragment_shading_rate)
       .Default(Extension::Unknown);
 }
 
@@ -189,6 +191,8 @@ const char *FeatureManager::getExtensionName(Extension symbol) {
     return "SPV_NV_mesh_shader";
   case Extension::KHR_ray_query:
     return "SPV_KHR_ray_query";
+  case Extension::KHR_fragment_shading_rate:
+    return "SPV_KHR_fragment_shading_rate";
   default:
     break;
   }

Niektoré súbory nie sú zobrazené, pretože je v týchto rozdielových dátach zmenené mnoho súborov