Explorar o código

Merge of branch 'master' of https://github.com/Microsoft/DirectXShaderCompiler into merge-master-hlsl-2021

Helena Kotas %!s(int64=4) %!d(string=hai) anos
pai
achega
af088513ab
Modificáronse 100 ficheiros con 4921 adicións e 688 borrados
  1. 4 5
      .travis.yml
  2. 5 1
      CMakeLists.txt
  3. 3 3
      README.md
  4. 2 2
      appveyor.yml
  5. 3 4
      cmake/modules/FindD3D12.cmake
  6. 4 3
      docs/DXIL.rst
  7. 8 2
      docs/SPIR-V.rst
  8. 1 1
      external/SPIRV-Headers
  9. 1 1
      external/SPIRV-Tools
  10. 23 1
      include/dxc/DXIL/DxilConstants.h
  11. 9 2
      include/dxc/DXIL/DxilFunctionProps.h
  12. 21 0
      include/dxc/DXIL/DxilMetadataHelper.h
  13. 4 0
      include/dxc/DXIL/DxilModule.h
  14. 4 0
      include/dxc/DXIL/DxilResource.h
  15. 3 2
      include/dxc/DXIL/DxilShaderModel.h
  16. 52 0
      include/dxc/DXIL/DxilTypeSystem.h
  17. 3 0
      include/dxc/DXIL/DxilUtil.h
  18. 0 19
      include/dxc/HLSL/DxilConvergent.h
  19. 5 1
      include/dxc/HLSL/DxilExportMap.h
  20. 11 3
      include/dxc/HLSL/DxilValidation.h
  21. 9 0
      include/dxc/Support/ErrorCodes.h
  22. 4 0
      include/dxc/Support/FileIOHelper.h
  23. 7 3
      include/dxc/Support/HLSLOptions.h
  24. 12 4
      include/dxc/Support/HLSLOptions.td
  25. 2634 0
      include/dxc/Support/d3d12TokenizedProgramFormat.hpp
  26. 5 0
      include/dxc/Test/DxcTestUtils.h
  27. 19 1
      include/dxc/dxcapi.h
  28. 29 0
      include/dxc/dxcerrors.h
  29. 1 0
      include/llvm/ADT/StringRef.h
  30. 2 0
      include/llvm/IR/BasicBlock.h
  31. 1 0
      include/llvm/IR/DebugInfo.h
  32. 2 0
      include/llvm/Option/OptTable.h
  33. 7 6
      include/llvm/Support/Casting.h
  34. 7 1
      include/llvm/Support/ErrorHandling.h
  35. 184 21
      lib/DXIL/DxilMetadataHelper.cpp
  36. 45 13
      lib/DXIL/DxilModule.cpp
  37. 8 0
      lib/DXIL/DxilResource.cpp
  38. 3 1
      lib/DXIL/DxilResourceProperties.cpp
  39. 2 1
      lib/DXIL/DxilShaderFlags.cpp
  40. 94 63
      lib/DXIL/DxilShaderModel.cpp
  41. 133 5
      lib/DXIL/DxilTypeSystem.cpp
  42. 22 1
      lib/DXIL/DxilUtil.cpp
  43. 8 0
      lib/DxcSupport/FileIOHelper.cpp
  44. 26 10
      lib/DxcSupport/HLSLOptions.cpp
  45. 5 11
      lib/DxilContainer/DxilContainerAssembler.cpp
  46. 3 53
      lib/DxilPIXPasses/DxilAddPixelHitInstrumentation.cpp
  47. 2 48
      lib/DxilPIXPasses/DxilDebugInstrumentation.cpp
  48. 6 19
      lib/DxilPIXPasses/DxilOutputColorBecomesConstant.cpp
  49. 3 49
      lib/DxilPIXPasses/DxilPIXMeshShaderOutputInstrumentation.cpp
  50. 333 138
      lib/DxilPIXPasses/DxilShaderAccessTracking.cpp
  51. 146 8
      lib/DxilPIXPasses/PixPassHelpers.cpp
  52. 5 0
      lib/DxilPIXPasses/PixPassHelpers.h
  53. 1 1
      lib/HLSL/DxilCondenseResources.cpp
  54. 3 1
      lib/HLSL/DxilContainerReflection.cpp
  55. 2 12
      lib/HLSL/DxilConvergent.cpp
  56. 1 1
      lib/HLSL/DxilGenerationPass.cpp
  57. 35 1
      lib/HLSL/DxilLinker.cpp
  58. 10 4
      lib/HLSL/DxilPreparePasses.cpp
  59. 92 12
      lib/HLSL/DxilValidation.cpp
  60. 3 3
      lib/HLSL/HLMatrixLowerPass.cpp
  61. 1 3
      lib/HLSL/HLModule.cpp
  62. 3 4
      lib/HLSL/HLOperationLower.cpp
  63. 60 6
      lib/HLSL/WaveSensitivityAnalysis.cpp
  64. 12 0
      lib/IR/BasicBlock.cpp
  65. 15 0
      lib/IR/DebugInfo.cpp
  66. 2 5
      lib/IR/DiagnosticInfo.cpp
  67. 0 2
      lib/MC/MCObjectStreamer.cpp
  68. 29 0
      lib/Option/OptTable.cpp
  69. 16 6
      lib/Support/ErrorHandling.cpp
  70. 1 1
      lib/Transforms/Scalar/LowerTypePasses.cpp
  71. 4 2
      lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
  72. 12 7
      lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
  73. 2 2
      lib/Transforms/Scalar/Scalarizer.cpp
  74. 5 0
      lib/Transforms/Utils/Local.cpp
  75. 2 2
      projects/dxilconv/include/Support/DXIncludes.h
  76. 1 1
      projects/dxilconv/lib/ShaderBinary/ShaderBinaryIncludes.h
  77. 17 1
      tools/clang/include/clang/AST/HlslTypes.h
  78. 5 0
      tools/clang/include/clang/Basic/Attr.td
  79. 33 0
      tools/clang/include/clang/Basic/Diagnostic.h
  80. 10 0
      tools/clang/include/clang/Basic/DiagnosticGroups.td
  81. 36 0
      tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
  82. 1 0
      tools/clang/include/clang/Basic/LangOptions.h
  83. 2 0
      tools/clang/include/clang/Frontend/CodeGenOptions.h
  84. 7 0
      tools/clang/include/clang/SPIRV/AstTypeProbe.h
  85. 1 0
      tools/clang/include/clang/SPIRV/FeatureManager.h
  86. 10 0
      tools/clang/include/clang/Sema/SemaHLSL.h
  87. 3 0
      tools/clang/lib/AST/ASTContextHLSL.cpp
  88. 20 0
      tools/clang/lib/AST/ASTDumper.cpp
  89. 17 0
      tools/clang/lib/AST/DeclPrinter.cpp
  90. 155 34
      tools/clang/lib/CodeGen/CGExprConstant.cpp
  91. 151 21
      tools/clang/lib/CodeGen/CGHLSLMS.cpp
  92. 92 0
      tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp
  93. 0 3
      tools/clang/lib/CodeGen/CGHLSLRuntime.h
  94. 1 3
      tools/clang/lib/CodeGen/CodeGenAction.cpp
  95. 0 6
      tools/clang/lib/CodeGen/CodeGenFunction.cpp
  96. 73 3
      tools/clang/lib/Parse/ParseDecl.cpp
  97. 16 3
      tools/clang/lib/SPIRV/AstTypeProbe.cpp
  98. 4 4
      tools/clang/lib/SPIRV/CapabilityVisitor.cpp
  99. 18 28
      tools/clang/lib/SPIRV/DeclResultIdMapper.cpp
  100. 4 0
      tools/clang/lib/SPIRV/FeatureManager.cpp

+ 4 - 5
.travis.yml

@@ -41,11 +41,10 @@ matrix:
   # Allow address sanitizer bot to fail for now. TODO(Remove this).
   # Allow address sanitizer bot to fail for now. TODO(Remove this).
   allow_failures:
   allow_failures:
     - os: linux
     - os: linux
-      # All linux timing out with empty log currently.
-      # compiler: clang
-      # env:
-      #   - DXC_BUILD_TYPE=Debug
-      #   - BUILD_CONFIG=ASAN
+      compiler: clang
+      env:
+        - DXC_BUILD_TYPE=Debug
+        - BUILD_CONFIG=ASAN
 
 
 cache:
 cache:
   apt: true
   apt: true

+ 5 - 1
CMakeLists.txt

@@ -100,7 +100,11 @@ option(HLSL_SUPPORT_QUERY_GIT_COMMIT_INFO "Supports querying Git commit info." O
 if ( HLSL_SUPPORT_QUERY_GIT_COMMIT_INFO )
 if ( HLSL_SUPPORT_QUERY_GIT_COMMIT_INFO )
   add_definitions(-DSUPPORT_QUERY_GIT_COMMIT_INFO)
   add_definitions(-DSUPPORT_QUERY_GIT_COMMIT_INFO)
 endif()
 endif()
-# HLSL Chnage Ends
+# adjust link option to enable debugging from kernel mode; not compatible with incremental linking
+if(NOT CMAKE_VERSION VERSION_LESS "3.13" AND WIN32)
+  add_link_options(/DEBUGTYPE:CV,FIXUP,PDATA /INCREMENTAL:NO)
+endif()
+# HLSL Change Ends
 
 
 # HLSL Change Starts - set flag for Appveyor CI
 # HLSL Change Starts - set flag for Appveyor CI
 if ( "$ENV{CI}" AND "$ENV{APPVEYOR}" )
 if ( "$ENV{CI}" AND "$ENV{APPVEYOR}" )

+ 3 - 3
README.md

@@ -1,6 +1,6 @@
 # DirectX Shader Compiler
 # DirectX Shader Compiler
 
 
-[![Build status](https://ci.appveyor.com/api/projects/status/oaf66n7w30xbrg38/branch/master?svg=true)](https://ci.appveyor.com/project/antiagainst/directxshadercompiler/branch/master)
+[![Build status](https://ci.appveyor.com/api/projects/status/6sx47j66g4dbyem9/branch/master?svg=true)](https://ci.appveyor.com/project/dnovillo/directxshadercompiler/branch/master)
 
 
 The DirectX Shader Compiler project includes a compiler and related tools used to compile High-Level Shader Language (HLSL) programs into DirectX Intermediate Language (DXIL) representation. Applications that make use of DirectX for graphics, games, and computation can use it to generate shader programs.
 The DirectX Shader Compiler project includes a compiler and related tools used to compile High-Level Shader Language (HLSL) programs into DirectX Intermediate Language (DXIL) representation. Applications that make use of DirectX for graphics, games, and computation can use it to generate shader programs.
 
 
@@ -10,8 +10,8 @@ For more information, see the [Wiki](https://github.com/microsoft/DirectXShaderC
 You can download the latest successful build's artifacts (built by Appveyor) for the master branch:
 You can download the latest successful build's artifacts (built by Appveyor) for the master branch:
 | Downloads |        |
 | Downloads |        |
 |-----------|--------|
 |-----------|--------|
-| Windows   | [⬇](https://ci.appveyor.com/api/projects/antiagainst/directxshadercompiler/artifacts/build%2FRelease%2Fdxc-artifacts.zip?branch=master&pr=false&job=image%3A%20Visual%20Studio%202017) |
-| Ubuntu    | [⬇](https://ci.appveyor.com/api/projects/antiagainst/directxshadercompiler/artifacts/build%2Fdxc-artifacts.tar.gz?branch=master&pr=false&job=image%3A%20Ubuntu) |
+| Windows   | [⬇](https://ci.appveyor.com/api/projects/dnovillo/directxshadercompiler/artifacts/build%2FRelease%2Fdxc-artifacts.zip?branch=master&pr=false&job=image%3A%20Visual%20Studio%202019) |
+| Ubuntu    | [⬇](https://ci.appveyor.com/api/projects/dnovillo/directxshadercompiler/artifacts/build%2Fdxc-artifacts.tar.gz?branch=master&pr=false&job=image%3A%20Ubuntu) |
 
 
 ## Features and Goals
 ## Features and Goals
 
 

+ 2 - 2
appveyor.yml

@@ -1,7 +1,7 @@
 version: 1.0.{build}
 version: 1.0.{build}
 
 
 image:
 image:
-  - Visual Studio 2017
+  - Visual Studio 2019
   - Ubuntu
   - Ubuntu
 
 
 platform: x64
 platform: x64
@@ -38,7 +38,7 @@ before_build:
 - cmd: call utils\hct\hctstart %HLSL_SRC_DIR% %HLSL_BLD_DIR%
 - cmd: call utils\hct\hctstart %HLSL_SRC_DIR% %HLSL_BLD_DIR%
 
 
 build_script:
 build_script:
-- cmd: call utils\hct\hctbuild -%PLATFORM% -%CONFIGURATION% -vs2017 -spirvtest
+- cmd: call utils\hct\hctbuild -%PLATFORM% -%CONFIGURATION% -show-cmake-log -spirvtest
 - sh: mkdir build && cd build
 - sh: mkdir build && cd build
 - sh: cmake .. -GNinja $(cat ../utils/cmake-predefined-config-params) -DSPIRV_BUILD_TESTS=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_CXX_FLAGS=-Werror
 - sh: cmake .. -GNinja $(cat ../utils/cmake-predefined-config-params) -DSPIRV_BUILD_TESTS=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_CXX_FLAGS=-Werror
 - sh: ninja
 - sh: ninja

+ 3 - 4
cmake/modules/FindD3D12.cmake

@@ -1,8 +1,7 @@
-# Find the win10 SDK path.
+# Find the Win10 SDK path.
 if ("$ENV{WIN10_SDK_PATH}$ENV{WIN10_SDK_VERSION}" STREQUAL "" )
 if ("$ENV{WIN10_SDK_PATH}$ENV{WIN10_SDK_VERSION}" STREQUAL "" )
-  get_filename_component(WIN10_SDK_PATH "[HKEY_LOCAL_MACHINE\\SOFTWARE\\WOW6432Node\\Microsoft\\Microsoft SDKs\\Windows\\v10.0;InstallationFolder]" ABSOLUTE CACHE)
-  get_filename_component(TEMP_WIN10_SDK_VERSION "[HKEY_LOCAL_MACHINE\\SOFTWARE\\WOW6432Node\\Microsoft\\Microsoft SDKs\\Windows\\v10.0;ProductVersion]" ABSOLUTE CACHE)
-  get_filename_component(WIN10_SDK_VERSION ${TEMP_WIN10_SDK_VERSION} NAME)
+  get_filename_component(WIN10_SDK_PATH "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Windows Kits\\Installed Roots;KitsRoot10]" ABSOLUTE CACHE)
+  set (WIN10_SDK_VERSION ${CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION})
 elseif(TRUE)
 elseif(TRUE)
   set (WIN10_SDK_PATH $ENV{WIN10_SDK_PATH})
   set (WIN10_SDK_PATH $ENV{WIN10_SDK_PATH})
   set (WIN10_SDK_VERSION $ENV{WIN10_SDK_VERSION})
   set (WIN10_SDK_VERSION $ENV{WIN10_SDK_VERSION})

+ 4 - 3
docs/DXIL.rst

@@ -2969,7 +2969,7 @@ The set of validation rules that are known to hold for a DXIL program is identif
 ========================================= ========================================================================================================================================================================================================================================================================================================
 ========================================= ========================================================================================================================================================================================================================================================================================================
 Rule Code                                 Description
 Rule Code                                 Description
 ========================================= ========================================================================================================================================================================================================================================================================================================
 ========================================= ========================================================================================================================================================================================================================================================================================================
-BITCODE.VALID                             TODO - Module must be bitcode-valid
+BITCODE.VALID                             Module must be bitcode-valid
 CONTAINER.PARTINVALID                     DXIL Container must not contain unknown parts
 CONTAINER.PARTINVALID                     DXIL Container must not contain unknown parts
 CONTAINER.PARTMATCHES                     DXIL Container Parts must match Module
 CONTAINER.PARTMATCHES                     DXIL Container Parts must match Module
 CONTAINER.PARTMISSING                     DXIL Container requires certain parts, corresponding to module
 CONTAINER.PARTMISSING                     DXIL Container requires certain parts, corresponding to module
@@ -3096,7 +3096,7 @@ META.KNOWN                                Named metadata should be known
 META.MAXTESSFACTOR                        Hull Shader MaxTessFactor must be [%0..%1].  %2 specified.
 META.MAXTESSFACTOR                        Hull Shader MaxTessFactor must be [%0..%1].  %2 specified.
 META.NOENTRYPROPSFORENTRY                 Entry point %0 must have entry properties.
 META.NOENTRYPROPSFORENTRY                 Entry point %0 must have entry properties.
 META.NOSEMANTICOVERLAP                    Semantics must not overlap
 META.NOSEMANTICOVERLAP                    Semantics must not overlap
-META.REQUIRED                             TODO - Required metadata missing.
+META.REQUIRED                             Required metadata missing.
 META.SEMAKINDMATCHESNAME                  Semantic name must match system value, when defined.
 META.SEMAKINDMATCHESNAME                  Semantic name must match system value, when defined.
 META.SEMAKINDVALID                        Semantic kind must be valid
 META.SEMAKINDVALID                        Semantic kind must be valid
 META.SEMANTICCOMPTYPE                     %0 must be %1.
 META.SEMANTICCOMPTYPE                     %0 must be %1.
@@ -3120,7 +3120,8 @@ META.TEXTURETYPE                          elements of typed buffers and textures
 META.USED                                 All metadata must be used by dxil.
 META.USED                                 All metadata must be used by dxil.
 META.VALIDSAMPLERMODE                     Invalid sampler mode on sampler .
 META.VALIDSAMPLERMODE                     Invalid sampler mode on sampler .
 META.VALUERANGE                           Metadata value must be within range.
 META.VALUERANGE                           Metadata value must be within range.
-META.WELLFORMED                           TODO - Metadata must be well-formed in operand count and types.
+META.VERSIONSUPPORTED                     Version in metadata must be supported.
+META.WELLFORMED                           Metadata must be well-formed in operand count and types.
 SM.64BITRAWBUFFERLOADSTORE                i64/f64 rawBufferLoad/Store overloads are allowed after SM 6.3.
 SM.64BITRAWBUFFERLOADSTORE                i64/f64 rawBufferLoad/Store overloads are allowed after SM 6.3.
 SM.AMPLIFICATIONSHADERPAYLOADSIZE         For amplification shader with entry '%0', payload size %1 is greater than maximum size of %2 bytes.
 SM.AMPLIFICATIONSHADERPAYLOADSIZE         For amplification shader with entry '%0', payload size %1 is greater than maximum size of %2 bytes.
 SM.AMPLIFICATIONSHADERPAYLOADSIZEDECLARED For amplification shader with entry '%0', payload size %1 is greater than declared size of %2 bytes.
 SM.AMPLIFICATIONSHADERPAYLOADSIZEDECLARED For amplification shader with entry '%0', payload size %1 is greater than declared size of %2 bytes.

+ 8 - 2
docs/SPIR-V.rst

@@ -283,7 +283,7 @@ Supported extensions
 * SPV_KHR_shader_draw_parameters
 * SPV_KHR_shader_draw_parameters
 * SPV_EXT_descriptor_indexing
 * SPV_EXT_descriptor_indexing
 * SPV_EXT_fragment_fully_covered
 * SPV_EXT_fragment_fully_covered
-* SPV_EXT_fragment_invocation_density
+* SPV_KHR_fragment_shading_rate
 * SPV_EXT_shader_stencil_support
 * SPV_EXT_shader_stencil_support
 * SPV_AMD_shader_explicit_vertex_parameter
 * SPV_AMD_shader_explicit_vertex_parameter
 * SPV_GOOGLE_hlsl_functionality1
 * SPV_GOOGLE_hlsl_functionality1
@@ -1495,7 +1495,13 @@ some system-value (SV) semantic strings will be translated into SPIR-V
 |                           +-------------+----------------------------------------+-----------------------+-----------------------------+
 |                           +-------------+----------------------------------------+-----------------------+-----------------------------+
 |                           | MSIn        | ``ViewIndex``                          | N/A                   | ``MultiView``               |
 |                           | MSIn        | ``ViewIndex``                          | N/A                   | ``MultiView``               |
 +---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
 +---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
-| SV_ShadingRate            | PSIn        | ``FragSizeEXT``                        | N/A                   | ``FragmentDensityEXT``      |
+|                           | VSOut       | ``PrimitiveShadingRateKHR``            | N/A                   | ``FragmentShadingRate``     |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | GSOut       | ``PrimitiveShadingRateKHR``            | N/A                   | ``FragmentShadingRate``     |
+| SV_ShadingRate            +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | PSIn        | ``ShadingRateKHR``                     | N/A                   | ``FragmentShadingRate``     |
+|                           +-------------+----------------------------------------+-----------------------+-----------------------------+
+|                           | MSOut       | ``PrimitiveShadingRateKHR``            | N/A                   | ``FragmentShadingRate``     |
 +---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
 +---------------------------+-------------+----------------------------------------+-----------------------+-----------------------------+
 
 
 For entities (function parameters, function return values, struct fields) with
 For entities (function parameters, function return values, struct fields) with

+ 1 - 1
external/SPIRV-Headers

@@ -1 +1 @@
-Subproject commit a3fdfe81465d57efc97cfd28ac6c8190fb31a6c8
+Subproject commit dafead1765f6c1a5f9f8a76387dcb2abe4e54acd

+ 1 - 1
external/SPIRV-Tools

@@ -1 +1 @@
-Subproject commit ef3290bbea35935ba8fd623970511ed9f045bbd7
+Subproject commit dc72924cb31cd9f3dbc3eb47e9d926cf641e3a07

+ 23 - 1
include/dxc/DXIL/DxilConstants.h

@@ -29,7 +29,7 @@ namespace DXIL {
   const unsigned kDxilMajor = 1;
   const unsigned kDxilMajor = 1;
   /* <py::lines('VALRULE-TEXT')>hctdb_instrhelp.get_dxil_version_minor()</py>*/
   /* <py::lines('VALRULE-TEXT')>hctdb_instrhelp.get_dxil_version_minor()</py>*/
   // VALRULE-TEXT:BEGIN
   // VALRULE-TEXT:BEGIN
-  const unsigned kDxilMinor = 6;
+  const unsigned kDxilMinor = 7;
   // VALRULE-TEXT:END
   // VALRULE-TEXT:END
 
 
   inline unsigned MakeDxilVersion(unsigned DxilMajor, unsigned DxilMinor) {
   inline unsigned MakeDxilVersion(unsigned DxilMajor, unsigned DxilMinor) {
@@ -1489,6 +1489,28 @@ namespace DXIL {
     CandidateProceduralPrimitive = 1,
     CandidateProceduralPrimitive = 1,
   };
   };
 
 
+  enum class PayloadAccessQualifier : uint32_t {
+    NoAccess = 0,
+    Read = 1,
+    Write = 2,
+    ReadWrite = 3
+  };
+
+  enum class PayloadAccessShaderStage : uint32_t {
+    Caller = 0,
+    Closesthit = 1,
+    Miss = 2,
+    Anyhit = 3, 
+    Invalid = 0xffffffffu
+  }; 
+
+  // Allocate 4 bits per shader stage:
+  //     bits 0-1 for payload access qualifiers
+  //     bits 2-3 reserved for future use
+  const uint32_t PayloadAccessQualifierBitsPerStage = 4;
+  const uint32_t PayloadAccessQualifierValidMaskPerStage = 3;
+  const uint32_t PayloadAccessQualifierValidMask = 0x00003333;
+
   inline bool IsValidHitGroupType(HitGroupType type) {
   inline bool IsValidHitGroupType(HitGroupType type) {
     return (type >= HitGroupType::Triangle && type < HitGroupType::LastEntry);
     return (type >= HitGroupType::Triangle && type < HitGroupType::LastEntry);
   }
   }

+ 9 - 2
include/dxc/DXIL/DxilFunctionProps.h

@@ -21,7 +21,9 @@ class Constant;
 namespace hlsl {
 namespace hlsl {
 struct DxilFunctionProps {
 struct DxilFunctionProps {
   DxilFunctionProps() {
   DxilFunctionProps() {
-    memset(this, 0, sizeof(DxilFunctionProps));
+    memset(&ShaderProps, 0, sizeof(ShaderProps));
+    shaderKind = DXIL::ShaderKind::Invalid;
+    waveSize = 0;
   }
   }
   union {
   union {
     // Compute shader.
     // Compute shader.
@@ -83,7 +85,12 @@ struct DxilFunctionProps {
   } ShaderProps;
   } ShaderProps;
   DXIL::ShaderKind shaderKind;
   DXIL::ShaderKind shaderKind;
   // WaveSize is currently allowed only on compute shaders, but could be supported on other shader types in the future
   // WaveSize is currently allowed only on compute shaders, but could be supported on other shader types in the future
-  unsigned waveSize; 
+  unsigned waveSize;
+  // Save root signature for lib profile entry.
+  std::vector<uint8_t> serializedRootSignature;
+  void SetSerializedRootSignature(const uint8_t *pData, unsigned size) {
+    serializedRootSignature.assign(pData, pData+size);
+  }
 
 
   // TODO: Should we have an unmangled name here for ray tracing shaders?
   // TODO: Should we have an unmangled name here for ray tracing shaders?
   bool IsPS() const     { return shaderKind == DXIL::ShaderKind::Pixel; }
   bool IsPS() const     { return shaderKind == DXIL::ShaderKind::Pixel; }

+ 21 - 0
include/dxc/DXIL/DxilMetadataHelper.h

@@ -32,6 +32,7 @@ class MDNode;
 class NamedMDNode;
 class NamedMDNode;
 class GlobalVariable;
 class GlobalVariable;
 class StringRef;
 class StringRef;
+class Type;
 }
 }
 
 
 namespace hlsl {
 namespace hlsl {
@@ -48,6 +49,8 @@ class DxilSampler;
 class DxilTypeSystem;
 class DxilTypeSystem;
 class DxilStructAnnotation;
 class DxilStructAnnotation;
 class DxilFieldAnnotation;
 class DxilFieldAnnotation;
+class DxilPayloadAnnotation;
+class DxilPayloadFieldAnnotation;
 class DxilTemplateArgAnnotation;
 class DxilTemplateArgAnnotation;
 class DxilFunctionAnnotation;
 class DxilFunctionAnnotation;
 class DxilParameterAnnotation;
 class DxilParameterAnnotation;
@@ -217,6 +220,10 @@ public:
   static const unsigned kDxilFieldAnnotationPreciseTag            = 8;
   static const unsigned kDxilFieldAnnotationPreciseTag            = 8;
   static const unsigned kDxilFieldAnnotationCBUsedTag             = 9;
   static const unsigned kDxilFieldAnnotationCBUsedTag             = 9;
 
 
+  // DXR Payload Annotations
+  static const unsigned kDxilPayloadAnnotationStructTag           = 0;
+  static const unsigned kDxilPayloadFieldAnnotationAccessTag      = 0;
+
   // StructAnnotation extended property tags (DXIL 1.5+ only, appended)
   // StructAnnotation extended property tags (DXIL 1.5+ only, appended)
   static const unsigned kDxilTemplateArgumentsTag                 = 0;  // Name for name-value list of extended struct properties
   static const unsigned kDxilTemplateArgumentsTag                 = 0;  // Name for name-value list of extended struct properties
   // TemplateArgument tags
   // TemplateArgument tags
@@ -249,6 +256,9 @@ public:
   static const char kDxilValidatorVersionMDName[];
   static const char kDxilValidatorVersionMDName[];
   // Validator version uses the same constants for fields as kDxilVersion*
   // Validator version uses the same constants for fields as kDxilVersion*
 
 
+  // DXR Payload Annotations metadata.
+  static const char kDxilDxrPayloadAnnotationsMDName[];
+
   // Extended shader property tags.
   // Extended shader property tags.
   static const unsigned kDxilShaderFlagsTag     = 0;
   static const unsigned kDxilShaderFlagsTag     = 0;
   static const unsigned kDxilGSStateTag         = 1;
   static const unsigned kDxilGSStateTag         = 1;
@@ -262,6 +272,7 @@ public:
   static const unsigned kDxilMSStateTag         = 9;
   static const unsigned kDxilMSStateTag         = 9;
   static const unsigned kDxilASStateTag         = 10;
   static const unsigned kDxilASStateTag         = 10;
   static const unsigned kDxilWaveSizeTag        = 11;
   static const unsigned kDxilWaveSizeTag        = 11;
+  static const unsigned kDxilEntryRootSigTag    = 12;
 
 
   // GSState.
   // GSState.
   static const unsigned kDxilGSStateNumFields               = 5;
   static const unsigned kDxilGSStateNumFields               = 5;
@@ -414,6 +425,16 @@ public:
   llvm::Metadata *EmitDxilTemplateArgAnnotation(const DxilTemplateArgAnnotation &annotation);
   llvm::Metadata *EmitDxilTemplateArgAnnotation(const DxilTemplateArgAnnotation &annotation);
   void LoadDxilTemplateArgAnnotation(const llvm::MDOperand &MDO, DxilTemplateArgAnnotation &annotation);
   void LoadDxilTemplateArgAnnotation(const llvm::MDOperand &MDO, DxilTemplateArgAnnotation &annotation);
 
 
+  // DXR Payload Annotations 
+  void EmitDxrPayloadAnnotations(DxilTypeSystem &TypeSystem);
+  llvm::Metadata *EmitDxrPayloadStructAnnotation(const DxilPayloadAnnotation& SA);
+  llvm::Metadata *EmitDxrPayloadFieldAnnotation(const DxilPayloadFieldAnnotation &FA, llvm::Type* fieldType);
+  void LoadDxrPayloadAnnotationNode(const llvm::MDTuple &MDT, DxilTypeSystem &TypeSystem);
+  void LoadDxrPayloadAnnotations(DxilTypeSystem &TypeSystem);
+  void LoadDxrPayloadFieldAnnoations(const llvm::MDOperand& MDO, DxilPayloadAnnotation& SA);
+  void LoadDxrPayloadFieldAnnoation(const llvm::MDOperand &MDO, DxilPayloadFieldAnnotation &FA);
+  void LoadDxrPayloadAccessQualifiers(const llvm::MDOperand &MDO, DxilPayloadFieldAnnotation &FA);
+
   // Function props.
   // Function props.
   llvm::MDTuple *EmitDxilFunctionProps(const hlsl::DxilFunctionProps *props,
   llvm::MDTuple *EmitDxilFunctionProps(const hlsl::DxilFunctionProps *props,
                                        const llvm::Function *F);
                                        const llvm::Function *F);

+ 4 - 0
include/dxc/DXIL/DxilModule.h

@@ -166,6 +166,7 @@ public:
 
 
   // DXIL type system.
   // DXIL type system.
   DxilTypeSystem &GetTypeSystem();
   DxilTypeSystem &GetTypeSystem();
+  const DxilTypeSystem &GetTypeSystem() const;
 
 
   /// Emit llvm.used array to make sure that optimizations do not remove unreferenced globals.
   /// Emit llvm.used array to make sure that optimizations do not remove unreferenced globals.
   void EmitLLVMUsed();
   void EmitLLVMUsed();
@@ -386,6 +387,9 @@ private:
   uint32_t m_IntermediateFlags;
   uint32_t m_IntermediateFlags;
   uint32_t m_AutoBindingSpace;
   uint32_t m_AutoBindingSpace;
 
 
+  // porperties infered from the DXILTypeSystem
+  bool m_bHasPayloadQualifiers;
+
   std::unique_ptr<DxilSubobjects> m_pSubobjects;
   std::unique_ptr<DxilSubobjects> m_pSubobjects;
 
 
   // m_bMetadataErrors is true if non-fatal metadata errors were encountered.
   // m_bMetadataErrors is true if non-fatal metadata errors were encountered.

+ 4 - 0
include/dxc/DXIL/DxilResource.h

@@ -45,6 +45,9 @@ public:
   unsigned GetElementStride() const;
   unsigned GetElementStride() const;
   void SetElementStride(unsigned ElemStride);
   void SetElementStride(unsigned ElemStride);
 
 
+  unsigned GetBaseAlignLog2() const;
+  void SetBaseAlignLog2(unsigned baseAlignLog2);
+
   DXIL::SamplerFeedbackType GetSamplerFeedbackType() const;
   DXIL::SamplerFeedbackType GetSamplerFeedbackType() const;
   void SetSamplerFeedbackType(DXIL::SamplerFeedbackType Value);
   void SetSamplerFeedbackType(DXIL::SamplerFeedbackType Value);
 
 
@@ -76,6 +79,7 @@ public:
 private:
 private:
   unsigned m_SampleCount;
   unsigned m_SampleCount;
   unsigned m_ElementStride; // in bytes
   unsigned m_ElementStride; // in bytes
+  unsigned m_baseAlignLog2 = 0; // worst-case alignment
   CompType m_CompType;
   CompType m_CompType;
   DXIL::SamplerFeedbackType m_SamplerFeedbackType;
   DXIL::SamplerFeedbackType m_SamplerFeedbackType;
   bool m_bGloballyCoherent;
   bool m_bGloballyCoherent;

+ 3 - 2
include/dxc/DXIL/DxilShaderModel.h

@@ -31,7 +31,7 @@ public:
   /* <py::lines('VALRULE-TEXT')>hctdb_instrhelp.get_highest_shader_model()</py>*/
   /* <py::lines('VALRULE-TEXT')>hctdb_instrhelp.get_highest_shader_model()</py>*/
   // VALRULE-TEXT:BEGIN
   // VALRULE-TEXT:BEGIN
   static const unsigned kHighestMajor = 6;
   static const unsigned kHighestMajor = 6;
-  static const unsigned kHighestMinor = 6;
+  static const unsigned kHighestMinor = 7;
   // VALRULE-TEXT:END
   // VALRULE-TEXT:END
   static const unsigned kOfflineMinor = 0xF;
   static const unsigned kOfflineMinor = 0xF;
 
 
@@ -68,6 +68,7 @@ public:
   bool IsSM64Plus() const { return IsSMAtLeast(6, 4); }
   bool IsSM64Plus() const { return IsSMAtLeast(6, 4); }
   bool IsSM65Plus() const { return IsSMAtLeast(6, 5); }
   bool IsSM65Plus() const { return IsSMAtLeast(6, 5); }
   bool IsSM66Plus() const { return IsSMAtLeast(6, 6); }
   bool IsSM66Plus() const { return IsSMAtLeast(6, 6); }
+  bool IsSM67Plus() const { return IsSMAtLeast(6, 7); }
   // VALRULE-TEXT:END
   // VALRULE-TEXT:END
   const char *GetName() const { return m_pszName; }
   const char *GetName() const { return m_pszName; }
   const char *GetKindName() const;
   const char *GetKindName() const;
@@ -97,7 +98,7 @@ private:
               bool m_bUAVs, bool m_bTypedUavs, unsigned m_UAVRegsLim);
               bool m_bUAVs, bool m_bTypedUavs, unsigned m_UAVRegsLim);
   /* <py::lines('VALRULE-TEXT')>hctdb_instrhelp.get_num_shader_models()</py>*/
   /* <py::lines('VALRULE-TEXT')>hctdb_instrhelp.get_num_shader_models()</py>*/
   // VALRULE-TEXT:BEGIN
   // VALRULE-TEXT:BEGIN
-  static const unsigned kNumShaderModels = 74;
+  static const unsigned kNumShaderModels = 83;
   // VALRULE-TEXT:END
   // VALRULE-TEXT:END
   static const ShaderModel ms_ShaderModels[kNumShaderModels];
   static const ShaderModel ms_ShaderModels[kNumShaderModels];
 
 

+ 52 - 0
include/dxc/DXIL/DxilTypeSystem.h

@@ -12,6 +12,7 @@
 #pragma once
 #pragma once
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/MapVector.h"
+#include "dxc/DXIL/DxilConstants.h"
 #include "dxc/DXIL/DxilCompType.h"
 #include "dxc/DXIL/DxilCompType.h"
 #include "dxc/DXIL/DxilInterpolationMode.h"
 #include "dxc/DXIL/DxilInterpolationMode.h"
 
 
@@ -140,6 +141,46 @@ private:
 };
 };
 
 
 
 
+/// Use this class to represent type annotation for DXR payload field.
+class DxilPayloadFieldAnnotation {
+public:
+
+  static unsigned GetBitOffsetForShaderStage(DXIL::PayloadAccessShaderStage shaderStage);
+
+  DxilPayloadFieldAnnotation() = default;
+
+  bool HasCompType() const;
+  const CompType &GetCompType() const;
+  void SetCompType(CompType::Kind kind);
+
+  uint32_t GetPayloadFieldQualifierMask() const;
+  void SetPayloadFieldQualifierMask(uint32_t fieldBitmask);
+  void AddPayloadFieldQualifier(DXIL::PayloadAccessShaderStage shaderStage, DXIL::PayloadAccessQualifier qualifier);
+  DXIL::PayloadAccessQualifier GetPayloadFieldQualifier(DXIL::PayloadAccessShaderStage shaderStage) const;
+  bool HasAnnotations() const;
+
+private:
+  CompType m_CompType;
+  unsigned m_bitmask = 0;
+};
+
+/// Use this class to represent DXR payload structures.
+class DxilPayloadAnnotation {
+  friend class DxilTypeSystem;
+
+public:
+  unsigned GetNumFields() const;
+  DxilPayloadFieldAnnotation &GetFieldAnnotation(unsigned FieldIdx);
+  const DxilPayloadFieldAnnotation &GetFieldAnnotation(unsigned FieldIdx) const;
+  const llvm::StructType *GetStructType() const;
+  void SetStructType(const llvm::StructType *Ty);
+
+private:
+  const llvm::StructType *m_pStructType;
+  std::vector<DxilPayloadFieldAnnotation> m_FieldAnnotations;
+};
+
+
 enum class DxilParamInputQual {
 enum class DxilParamInputQual {
   In,
   In,
   Out,
   Out,
@@ -192,6 +233,7 @@ private:
 class DxilTypeSystem {
 class DxilTypeSystem {
 public:
 public:
   using StructAnnotationMap = llvm::MapVector<const llvm::StructType *, std::unique_ptr<DxilStructAnnotation> >;
   using StructAnnotationMap = llvm::MapVector<const llvm::StructType *, std::unique_ptr<DxilStructAnnotation> >;
+  using PayloadAnnotationMap = llvm::MapVector<const llvm::StructType *, std::unique_ptr<DxilPayloadAnnotation> >;
   using FunctionAnnotationMap = llvm::MapVector<const llvm::Function *, std::unique_ptr<DxilFunctionAnnotation> >;
   using FunctionAnnotationMap = llvm::MapVector<const llvm::Function *, std::unique_ptr<DxilFunctionAnnotation> >;
 
 
   DxilTypeSystem(llvm::Module *pModule);
   DxilTypeSystem(llvm::Module *pModule);
@@ -202,6 +244,15 @@ public:
   void EraseStructAnnotation(const llvm::StructType *pStructType);
   void EraseStructAnnotation(const llvm::StructType *pStructType);
 
 
   StructAnnotationMap &GetStructAnnotationMap();
   StructAnnotationMap &GetStructAnnotationMap();
+  const StructAnnotationMap &GetStructAnnotationMap() const;
+
+  DxilPayloadAnnotation *AddPayloadAnnotation(const llvm::StructType *pStructType);
+  DxilPayloadAnnotation *GetPayloadAnnotation(const llvm::StructType *pStructType);
+  const DxilPayloadAnnotation *GetPayloadAnnotation(const llvm::StructType *pStructType) const;
+  void ErasePayloadAnnotation(const llvm::StructType *pStructType);
+
+  PayloadAnnotationMap &GetPayloadAnnotationMap();
+  const PayloadAnnotationMap &GetPayloadAnnotationMap() const;
 
 
   DxilFunctionAnnotation *AddFunctionAnnotation(const llvm::Function *pFunction);
   DxilFunctionAnnotation *AddFunctionAnnotation(const llvm::Function *pFunction);
   DxilFunctionAnnotation *GetFunctionAnnotation(const llvm::Function *pFunction);
   DxilFunctionAnnotation *GetFunctionAnnotation(const llvm::Function *pFunction);
@@ -227,6 +278,7 @@ public:
 private:
 private:
   llvm::Module *m_pModule;
   llvm::Module *m_pModule;
   StructAnnotationMap m_StructAnnotations;
   StructAnnotationMap m_StructAnnotations;
+  PayloadAnnotationMap m_PayloadAnnotations;
   FunctionAnnotationMap m_FunctionAnnotations;
   FunctionAnnotationMap m_FunctionAnnotations;
 
 
   DXIL::LowPrecisionMode m_LowPrecisionMode;
   DXIL::LowPrecisionMode m_LowPrecisionMode;

+ 3 - 0
include/dxc/DXIL/DxilUtil.h

@@ -152,6 +152,9 @@ namespace dxilutil {
 
 
   void ReplaceRawBufferLoad64Bit(llvm::Function *F, llvm::Type *EltTy, hlsl::OP *hlslOP);
   void ReplaceRawBufferLoad64Bit(llvm::Function *F, llvm::Type *EltTy, hlsl::OP *hlslOP);
   void ReplaceRawBufferStore64Bit(llvm::Function *F, llvm::Type *ETy, hlsl::OP *hlslOP);
   void ReplaceRawBufferStore64Bit(llvm::Function *F, llvm::Type *ETy, hlsl::OP *hlslOP);
+
+  bool IsConvergentMarker(llvm::Value *V);
+  llvm::Value *GetConvergentSource(llvm::Value *V);
 }
 }
 
 
 }
 }

+ 0 - 19
include/dxc/HLSL/DxilConvergent.h

@@ -1,19 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-//                                                                           //
-// DxilConvergent.h                                                          //
-// Copyright (C) Microsoft Corporation. All rights reserved.                 //
-// This file is distributed under the University of Illinois Open Source     //
-// License. See LICENSE.TXT for details.                                     //
-//                                                                           //
-///////////////////////////////////////////////////////////////////////////////
-#pragma once
-
-namespace llvm {
-  class Value;
-  class Function;
-}
-
-namespace hlsl {
-  bool IsConvergentMarker(llvm::Value *V);
-  llvm::Value *GetConvergentSource(llvm::Value *V);
-}

+ 5 - 1
include/dxc/HLSL/DxilExportMap.h

@@ -38,10 +38,13 @@ namespace dxilutil {
     typedef ExportMapByString::iterator iterator;
     typedef ExportMapByString::iterator iterator;
     typedef ExportMapByString::const_iterator const_iterator;
     typedef ExportMapByString::const_iterator const_iterator;
 
 
-    ExportMap() {}
+    ExportMap():m_ExportShadersOnly(false) {}
     void clear();
     void clear();
     bool empty() const;
     bool empty() const;
 
 
+    void setExportShadersOnly(bool v) { m_ExportShadersOnly = v; }
+    bool isExportShadersOnly() const { return m_ExportShadersOnly; }
+
     // Iterate export map by string name
     // Iterate export map by string name
     iterator begin() { return m_ExportMap.begin(); }
     iterator begin() { return m_ExportMap.begin(); }
     const_iterator begin() const { return m_ExportMap.begin(); }
     const_iterator begin() const { return m_ExportMap.begin(); }
@@ -100,6 +103,7 @@ namespace dxilutil {
     NameSet m_ExportNames;
     NameSet m_ExportNames;
     NameSet m_NameCollisions;
     NameSet m_NameCollisions;
     NameSet m_UnusedExports;
     NameSet m_UnusedExports;
+    bool    m_ExportShadersOnly;
   };
   };
 }
 }
 
 

+ 11 - 3
include/dxc/HLSL/DxilValidation.h

@@ -31,7 +31,7 @@ namespace hlsl {
 // Known validation rules
 // Known validation rules
 enum class ValidationRule : unsigned {
 enum class ValidationRule : unsigned {
   // Bitcode
   // Bitcode
-  BitcodeValid, // TODO - Module must be bitcode-valid
+  BitcodeValid, // Module must be bitcode-valid
 
 
   // Container
   // Container
   ContainerPartInvalid, // DXIL Container must not contain unknown parts
   ContainerPartInvalid, // DXIL Container must not contain unknown parts
@@ -162,7 +162,7 @@ enum class ValidationRule : unsigned {
   MetaMaxTessFactor, // Hull Shader MaxTessFactor must be [%0..%1].  %2 specified.
   MetaMaxTessFactor, // Hull Shader MaxTessFactor must be [%0..%1].  %2 specified.
   MetaNoEntryPropsForEntry, // Entry point %0 must have entry properties.
   MetaNoEntryPropsForEntry, // Entry point %0 must have entry properties.
   MetaNoSemanticOverlap, // Semantics must not overlap
   MetaNoSemanticOverlap, // Semantics must not overlap
-  MetaRequired, // TODO - Required metadata missing.
+  MetaRequired, // Required metadata missing.
   MetaSemaKindMatchesName, // Semantic name must match system value, when defined.
   MetaSemaKindMatchesName, // Semantic name must match system value, when defined.
   MetaSemaKindValid, // Semantic kind must be valid
   MetaSemaKindValid, // Semantic kind must be valid
   MetaSemanticCompType, // %0 must be %1.
   MetaSemanticCompType, // %0 must be %1.
@@ -186,7 +186,8 @@ enum class ValidationRule : unsigned {
   MetaUsed, // All metadata must be used by dxil.
   MetaUsed, // All metadata must be used by dxil.
   MetaValidSamplerMode, // Invalid sampler mode on sampler .
   MetaValidSamplerMode, // Invalid sampler mode on sampler .
   MetaValueRange, // Metadata value must be within range.
   MetaValueRange, // Metadata value must be within range.
-  MetaWellFormed, // TODO - Metadata must be well-formed in operand count and types.
+  MetaVersionSupported, // Version in metadata must be supported.
+  MetaWellFormed, // Metadata must be well-formed in operand count and types.
 
 
   // Program flow
   // Program flow
   FlowDeadLoop, // Loop must have break.
   FlowDeadLoop, // Loop must have break.
@@ -347,6 +348,13 @@ HRESULT ValidateDxilContainer(_In_reads_bytes_(ContainerSize) const void *pConta
                               _In_ uint32_t ContainerSize,
                               _In_ uint32_t ContainerSize,
                               _In_ llvm::raw_ostream &DiagStream);
                               _In_ llvm::raw_ostream &DiagStream);
 
 
+// Full container validation, including ValidateDxilModule, with debug module
+HRESULT ValidateDxilContainer(_In_reads_bytes_(ContainerSize) const void *pContainer,
+                              _In_ uint32_t ContainerSize,
+                              const void *pOptDebugBitcode,
+                              uint32_t OptDebugBitcodeSize,
+                              _In_ llvm::raw_ostream &DiagStream);
+
 class PrintDiagnosticContext {
 class PrintDiagnosticContext {
 private:
 private:
   llvm::DiagnosticPrinter &m_Printer;
   llvm::DiagnosticPrinter &m_Printer;

+ 9 - 0
include/dxc/Support/ErrorCodes.h

@@ -107,3 +107,12 @@
 
 
 // 0X80AA001A - Error in extension mechanism.
 // 0X80AA001A - Error in extension mechanism.
 #define DXC_E_EXTENSION_ERROR                         DXC_MAKE_HRESULT(DXC_SEVERITY_ERROR,FACILITY_DXC,(0x001A))
 #define DXC_E_EXTENSION_ERROR                         DXC_MAKE_HRESULT(DXC_SEVERITY_ERROR,FACILITY_DXC,(0x001A))
+
+// 0X80AA001B - LLVM Fatal Error
+#define DXC_E_LLVM_FATAL_ERROR                         DXC_MAKE_HRESULT(DXC_SEVERITY_ERROR,FACILITY_DXC,(0x001B))
+
+// 0X80AA001C - LLVM Unreachable code
+#define DXC_E_LLVM_UNREACHABLE                         DXC_MAKE_HRESULT(DXC_SEVERITY_ERROR,FACILITY_DXC,(0x001C))
+
+// 0X80AA001D - LLVM Cast Failure
+#define DXC_E_LLVM_CAST_ERROR                         DXC_MAKE_HRESULT(DXC_SEVERITY_ERROR,FACILITY_DXC,(0x001D))

+ 4 - 0
include/dxc/Support/FileIOHelper.h

@@ -190,6 +190,10 @@ HRESULT DxcCreateBlobWithEncodingFromPinned(
     _In_bytecount_(size) LPCVOID pText, UINT32 size, UINT32 codePage,
     _In_bytecount_(size) LPCVOID pText, UINT32 size, UINT32 codePage,
     _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) throw();
     _COM_Outptr_ IDxcBlobEncoding **pBlobEncoding) throw();
 
 
+HRESULT DxcCreateBlobFromPinned(
+    _In_bytecount_(size) LPCVOID pText, UINT32 size,
+    _COM_Outptr_ IDxcBlob **pBlob) throw();
+
 HRESULT
 HRESULT
 DxcCreateBlobWithEncodingFromStream(
 DxcCreateBlobWithEncodingFromStream(
     IStream *pStream, bool newInstanceAlways, UINT32 codePage,
     IStream *pStream, bool newInstanceAlways, UINT32 codePage,

+ 7 - 3
include/dxc/Support/HLSLOptions.h

@@ -141,6 +141,7 @@ public:
   bool DebugNameForBinary = false; // OPT_Zsb
   bool DebugNameForBinary = false; // OPT_Zsb
   bool DebugNameForSource = false; // OPT_Zss
   bool DebugNameForSource = false; // OPT_Zss
   bool DumpBin = false;        // OPT_dumpbin
   bool DumpBin = false;        // OPT_dumpbin
+  bool Link = false;        // OPT_link
   bool WarningAsError = false; // OPT__SLASH_WX
   bool WarningAsError = false; // OPT__SLASH_WX
   bool IEEEStrict = false;     // OPT_Gis
   bool IEEEStrict = false;     // OPT_Gis
   bool IgnoreLineDirectives = false; // OPT_ignore_line_directives
   bool IgnoreLineDirectives = false; // OPT_ignore_line_directives
@@ -172,9 +173,9 @@ public:
   bool RecompileFromBinary = false; // OPT _Recompile (Recompiling the DXBC binary file not .hlsl file)
   bool RecompileFromBinary = false; // OPT _Recompile (Recompiling the DXBC binary file not .hlsl file)
   bool StripDebug = false; // OPT Qstrip_debug
   bool StripDebug = false; // OPT Qstrip_debug
   bool EmbedDebug = false; // OPT Qembed_debug
   bool EmbedDebug = false; // OPT Qembed_debug
-  bool SourceInDebugModule = false; // OPT Qsource_in_debug_module
+  bool SourceInDebugModule = false; // OPT Zs
   bool SourceOnlyDebug = false; // OPT Qsource_only_debug
   bool SourceOnlyDebug = false; // OPT Qsource_only_debug
-  bool FullDebug = false; // OPT Qfull_debug
+  bool PdbInPrivate = false; // OPT Qpdb_in_private
   bool StripRootSignature = false; // OPT_Qstrip_rootsignature
   bool StripRootSignature = false; // OPT_Qstrip_rootsignature
   bool StripPrivate = false; // OPT_Qstrip_priv
   bool StripPrivate = false; // OPT_Qstrip_priv
   bool StripReflection = false; // OPT_Qstrip_reflect
   bool StripReflection = false; // OPT_Qstrip_reflect
@@ -201,6 +202,8 @@ public:
   std::map<std::string, std::string> DxcOptimizationSelects; // OPT_opt_select
   std::map<std::string, std::string> DxcOptimizationSelects; // OPT_opt_select
 
 
   bool PrintAfterAll; // OPT_print_after_all
   bool PrintAfterAll; // OPT_print_after_all
+  bool EnablePayloadQualifiers = false; // OPT_enable_payload_qualifiers
+  bool HandleExceptions = false; // OPT_disable_exception_handling
 
 
   // Rewriter Options
   // Rewriter Options
   RewriterOpts RWOpt;
   RewriterOpts RWOpt;
@@ -211,7 +214,8 @@ public:
   bool IsLibraryProfile();
   bool IsLibraryProfile();
 
 
   // Helpers to clarify interpretation of flags for behavior in implementation
   // Helpers to clarify interpretation of flags for behavior in implementation
-  bool IsDebugInfoEnabled();    // Zi
+  bool GenerateFullDebugInfo(); // Zi
+  bool GeneratePDB();           // Zi or Zs
   bool EmbedDebugInfo();        // Qembed_debug
   bool EmbedDebugInfo();        // Qembed_debug
   bool EmbedPDBName();          // Zi or Fd
   bool EmbedPDBName();          // Zi or Fd
   bool DebugFileIsDirectory();  // Fd ends in '\\'
   bool DebugFileIsDirectory();  // Fd ends in '\\'

+ 12 - 4
include/dxc/Support/HLSLOptions.td

@@ -279,6 +279,12 @@ def disable_lifetime_markers : Flag<["-", "/"], "disable-lifetime-markers">, Gro
   HelpText<"Disable generation of lifetime markers where they would be otherwise (6.6+)">;
   HelpText<"Disable generation of lifetime markers where they would be otherwise (6.6+)">;
 def enable_templates: Flag<["-", "/"], "enable-templates">, Group<hlslcomp_Group>, Flags<[CoreOption]>,
 def enable_templates: Flag<["-", "/"], "enable-templates">, Group<hlslcomp_Group>, Flags<[CoreOption]>,
   HelpText<"Enable template support for HLSL.">;
   HelpText<"Enable template support for HLSL.">;
+def enable_payload_qualifiers : Flag<["-", "/"], "enable-payload-qualifiers">, Group<hlslcomp_Group>, Flags<[CoreOption, RewriteOption, DriverOption]>,
+  HelpText<"Enables support for payload access qualifiers for raytracing payloads in SM 6.6.">;
+def disable_payload_qualifiers : Flag<["-", "/"], "disable-payload-qualifiers">, Group<hlslcomp_Group>, Flags<[CoreOption, RewriteOption, DriverOption]>,
+  HelpText<"Disables support for payload access qualifiers for raytracing payloads in SM 6.7.">;
+def disable_exception_handling : Flag<["-", "/"], "disable-exception-handling">, Group<hlslcomp_Group>, Flags<[DriverOption, HelpHidden]>,
+  HelpText<"Disable dxc handling of exceptions">;
 
 
 // Used with API only
 // Used with API only
 def skip_serialization : Flag<["-", "/"], "skip-serialization">, Group<hlslcore_Group>, Flags<[CoreOption, HelpHidden]>,
 def skip_serialization : Flag<["-", "/"], "skip-serialization">, Group<hlslcore_Group>, Flags<[CoreOption, HelpHidden]>,
@@ -340,7 +346,7 @@ def Oconfig : CommaJoined<["-"], "Oconfig=">, Group<spirv_Group>, Flags<[CoreOpt
 def target_profile : JoinedOrSeparate<["-", "/"], "T">, Flags<[CoreOption]>, Group<hlslcomp_Group>, MetaVarName<"<profile>">,
 def target_profile : JoinedOrSeparate<["-", "/"], "T">, Flags<[CoreOption]>, Group<hlslcomp_Group>, MetaVarName<"<profile>">,
   /* <py::lines('VALRULE-TEXT')>hctdb_instrhelp.get_target_profiles()</py>*/
   /* <py::lines('VALRULE-TEXT')>hctdb_instrhelp.get_target_profiles()</py>*/
   // VALRULE-TEXT:BEGIN
   // VALRULE-TEXT:BEGIN
-  HelpText<"Set target profile. \n\t<profile>: ps_6_0, ps_6_1, ps_6_2, ps_6_3, ps_6_4, ps_6_5, ps_6_6, \n\t\t vs_6_0, vs_6_1, vs_6_2, vs_6_3, vs_6_4, vs_6_5, vs_6_6, \n\t\t gs_6_0, gs_6_1, gs_6_2, gs_6_3, gs_6_4, gs_6_5, gs_6_6, \n\t\t hs_6_0, hs_6_1, hs_6_2, hs_6_3, hs_6_4, hs_6_5, hs_6_6, \n\t\t ds_6_0, ds_6_1, ds_6_2, ds_6_3, ds_6_4, ds_6_5, ds_6_6, \n\t\t cs_6_0, cs_6_1, cs_6_2, cs_6_3, cs_6_4, cs_6_5, cs_6_6, \n\t\t lib_6_1, lib_6_2, lib_6_3, lib_6_4, lib_6_5, lib_6_6, \n\t\t ms_6_5, ms_6_6, \n\t\t as_6_5, as_6_6, \n\t\t ">;
+  HelpText<"Set target profile. \n\t<profile>: ps_6_0, ps_6_1, ps_6_2, ps_6_3, ps_6_4, ps_6_5, ps_6_6, ps_6_7, \n\t\t vs_6_0, vs_6_1, vs_6_2, vs_6_3, vs_6_4, vs_6_5, vs_6_6, vs_6_7, \n\t\t gs_6_0, gs_6_1, gs_6_2, gs_6_3, gs_6_4, gs_6_5, gs_6_6, gs_6_7, \n\t\t hs_6_0, hs_6_1, hs_6_2, hs_6_3, hs_6_4, hs_6_5, hs_6_6, hs_6_7, \n\t\t ds_6_0, ds_6_1, ds_6_2, ds_6_3, ds_6_4, ds_6_5, ds_6_6, ds_6_7, \n\t\t cs_6_0, cs_6_1, cs_6_2, cs_6_3, cs_6_4, cs_6_5, cs_6_6, cs_6_7, \n\t\t lib_6_1, lib_6_2, lib_6_3, lib_6_4, lib_6_5, lib_6_6, lib_6_7, \n\t\t ms_6_5, ms_6_6, ms_6_7, \n\t\t as_6_5, as_6_6, as_6_7, \n\t\t ">;
   // VALRULE-TEXT:END
   // VALRULE-TEXT:END
 def entrypoint :  JoinedOrSeparate<["-", "/"], "E">, Flags<[CoreOption, RewriteOption]>, Group<hlslcomp_Group>,
 def entrypoint :  JoinedOrSeparate<["-", "/"], "E">, Flags<[CoreOption, RewriteOption]>, Group<hlslcomp_Group>,
   HelpText<"Entry point name">;
   HelpText<"Entry point name">;
@@ -403,6 +409,8 @@ def P : Separate<["-", "/"], "P">, Flags<[CoreOption, DriverOption]>, Group<hlsl
 
 
 def dumpbin : Flag<["-", "/"], "dumpbin">, Flags<[DriverOption]>, Group<hlslutil_Group>,
 def dumpbin : Flag<["-", "/"], "dumpbin">, Flags<[DriverOption]>, Group<hlslutil_Group>,
   HelpText<"Load a binary file rather than compiling">;
   HelpText<"Load a binary file rather than compiling">;
+def link : Flag<["-", "/"], "link">, Flags<[DriverOption]>, Group<hlslutil_Group>,
+  HelpText<"Link list of libraries provided in <inputs> argument separated by ';'">;
 def Qstrip_reflect : Flag<["-", "/"], "Qstrip_reflect">, Flags<[CoreOption, DriverOption]>, Group<hlslutil_Group>,
 def Qstrip_reflect : Flag<["-", "/"], "Qstrip_reflect">, Flags<[CoreOption, DriverOption]>, Group<hlslutil_Group>,
   HelpText<"Strip reflection data from shader bytecode  (must be used with /Fo <file>)">;
   HelpText<"Strip reflection data from shader bytecode  (must be used with /Fo <file>)">;
 def Qstrip_debug : Flag<["-", "/"], "Qstrip_debug">, Flags<[CoreOption, DriverOption]>, Group<hlslutil_Group>,
 def Qstrip_debug : Flag<["-", "/"], "Qstrip_debug">, Flags<[CoreOption, DriverOption]>, Group<hlslutil_Group>,
@@ -413,10 +421,10 @@ def Qstrip_priv : Flag<["-", "/"], "Qstrip_priv">, Flags<[DriverOption]>, Group<
   HelpText<"Strip private data from shader bytecode  (must be used with /Fo <file>)">;
   HelpText<"Strip private data from shader bytecode  (must be used with /Fo <file>)">;
 def Qsource_in_debug_module : Flag<["-", "/"], "Qsource_in_debug_module">, Flags<[CoreOption, HelpHidden]>, Group<hlslutil_Group>,
 def Qsource_in_debug_module : Flag<["-", "/"], "Qsource_in_debug_module">, Flags<[CoreOption, HelpHidden]>, Group<hlslutil_Group>,
   HelpText<"Generate old PDB format.">;
   HelpText<"Generate old PDB format.">;
-def Qsource_only_debug : Flag<["-", "/"], "Qsource_only_debug">, Flags<[CoreOption, HelpHidden]>, Group<hlslutil_Group>,
+def Zs : Flag<["-", "/"], "Zs">, Flags<[CoreOption]>, Group<hlslutil_Group>,
   HelpText<"Generate small PDB with just sources and compile options.">;
   HelpText<"Generate small PDB with just sources and compile options.">;
-def Qfull_debug : Flag<["-", "/"], "Qfull_debug">, Flags<[CoreOption, HelpHidden]>, Group<hlslutil_Group>,
-  HelpText<"Generate full debug info for PDB.">;
+def Qpdb_in_private : Flag<["-", "/"], "Qpdb_in_private">, Flags<[CoreOption, HelpHidden]>, Group<hlslutil_Group>,
+  HelpText<"Store PDB in private user data.">;
 
 
 def Qstrip_rootsignature : Flag<["-", "/"], "Qstrip_rootsignature">, Flags<[CoreOption, DriverOption]>, Group<hlslutil_Group>, HelpText<"Strip root signature data from shader bytecode  (must be used with /Fo <file>)">;
 def Qstrip_rootsignature : Flag<["-", "/"], "Qstrip_rootsignature">, Flags<[CoreOption, DriverOption]>, Group<hlslutil_Group>, HelpText<"Strip root signature data from shader bytecode  (must be used with /Fo <file>)">;
 def setrootsignature     : JoinedOrSeparate<["-", "/"], "setrootsignature">,     MetaVarName<"<file>">, Flags<[CoreOption, DriverOption]>, Group<hlslutil_Group>, HelpText<"Attach root signature to shader bytecode">;
 def setrootsignature     : JoinedOrSeparate<["-", "/"], "setrootsignature">,     MetaVarName<"<file>">, Flags<[CoreOption, DriverOption]>, Group<hlslutil_Group>, HelpText<"Attach root signature to shader bytecode">;

+ 2634 - 0
include/dxc/Support/d3d12TokenizedProgramFormat.hpp

@@ -0,0 +1,2634 @@
+#pragma once
+
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// d3d12TokenizedProgramFormat.hpp                                           //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Provides declarations for the DirectX Tokenized Program Format.           //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+// ----------------------------------------------------------------------------
+//
+// High Level Goals
+//
+// - Serve as the runtime/DDI representation for all D3D11 tokenized code,
+//   for all classes of programs, including pixel program, vertex program,
+//   geometry program, etc.
+//
+// - Any information that HLSL needs to give to drivers is encoded in
+//   this token format in some form.
+//
+// - Enable common tools and source code for managing all tokenizable
+//   program formats.
+//
+// - Support extensible token definitions, allowing full customizations for
+//   specific program classes, while maintaining general conventions for all
+//   program models.
+//
+// - Binary backwards compatible with D3D10.  Any token name that was originally
+//   defined with "D3D10" in it is unchanged; D3D11 only adds new tokens.
+//
+// ----------------------------------------------------------------------------
+//
+// Low Level Feature Summary
+//
+// - DWORD based tokens always, for simplicity
+// - Opcode token is generally a single DWORD, though there is a bit indicating
+//   if extended information (extra DWORD(s)) are present
+// - Operand tokens are a completely self contained, extensible format,
+//   with scalar and 4-vector data types as first class citizens, but
+//   allowance for extension to n-component vectors.
+// - Initial operand token identifies register type, register file
+//   structure/dimensionality and mode of indexing for each dimension,
+//   and choice of component selection mechanism (i.e. mask vs. swizzle etc).
+// - Optional additional extended operand tokens can defined things like
+//   modifiers (which are not needed by default).
+// - Operand's immediate index value(s), if needed, appear as subsequent DWORD
+//   values, and if relative addressing is specified, an additional completely
+//   self contained operand definition appears nested in the token sequence.
+//
+// ----------------------------------------------------------------------------
+
+#include <winapifamily.h>
+
+#pragma region Application Family
+#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_GAMES)
+
+// ----------------------------------------------------------------------------
+// Version Token (VerTok)
+//
+// [07:00] minor version number (0-255)
+// [15:08] major version number (0-255)
+// [31:16] D3D10_SB_TOKENIZED_PROGRAM_TYPE
+//
+// ----------------------------------------------------------------------------
+
+typedef enum D3D10_SB_TOKENIZED_PROGRAM_TYPE
+{
+    D3D10_SB_PIXEL_SHADER       = 0,
+    D3D10_SB_VERTEX_SHADER      = 1,
+    D3D10_SB_GEOMETRY_SHADER    = 2,
+    
+    // D3D11 Shaders
+    D3D11_SB_HULL_SHADER        = 3,
+    D3D11_SB_DOMAIN_SHADER      = 4,
+    D3D11_SB_COMPUTE_SHADER     = 5,
+
+    // Subset of D3D12 Shaders where this field is referenced by runtime
+    // Entries from 6-12 are unique to state objects 
+    // (e.g. library, callable and raytracing shaders)
+    D3D12_SB_MESH_SHADER        = 13,
+    D3D12_SB_AMPLIFICATION_SHADER = 14,
+
+    D3D11_SB_RESERVED0          = 0xFFF0
+} D3D10_SB_TOKENIZED_PROGRAM_TYPE;
+
+#define D3D10_SB_TOKENIZED_PROGRAM_TYPE_MASK  0xffff0000
+#define D3D10_SB_TOKENIZED_PROGRAM_TYPE_SHIFT 16
+
+// DECODER MACRO: Retrieve program type from version token
+#define DECODE_D3D10_SB_TOKENIZED_PROGRAM_TYPE(VerTok) ((D3D10_SB_TOKENIZED_PROGRAM_TYPE)(((VerTok)&D3D10_SB_TOKENIZED_PROGRAM_TYPE_MASK)>>D3D10_SB_TOKENIZED_PROGRAM_TYPE_SHIFT))
+
+#define D3D10_SB_TOKENIZED_PROGRAM_MAJOR_VERSION_MASK  0x000000f0
+#define D3D10_SB_TOKENIZED_PROGRAM_MAJOR_VERSION_SHIFT 4
+#define D3D10_SB_TOKENIZED_PROGRAM_MINOR_VERSION_MASK  0x0000000f
+
+// DECODER MACRO: Retrieve major version # from version token
+#define DECODE_D3D10_SB_TOKENIZED_PROGRAM_MAJOR_VERSION(VerTok) (((VerTok)&D3D10_SB_TOKENIZED_PROGRAM_MAJOR_VERSION_MASK)>>D3D10_SB_TOKENIZED_PROGRAM_MAJOR_VERSION_SHIFT)
+// DECODER MACRO: Retrieve minor version # from version token
+#define DECODE_D3D10_SB_TOKENIZED_PROGRAM_MINOR_VERSION(VerTok) ((VerTok)&D3D10_SB_TOKENIZED_PROGRAM_MINOR_VERSION_MASK)
+
+// ENCODER MACRO: Create complete VerTok
+#define ENCODE_D3D10_SB_TOKENIZED_PROGRAM_VERSION_TOKEN(ProgType,MajorVer,MinorVer) ((((ProgType)<<D3D10_SB_TOKENIZED_PROGRAM_TYPE_SHIFT)&D3D10_SB_TOKENIZED_PROGRAM_TYPE_MASK)|\
+                                                                               ((((MajorVer)<<D3D10_SB_TOKENIZED_PROGRAM_MAJOR_VERSION_SHIFT)&D3D10_SB_TOKENIZED_PROGRAM_MAJOR_VERSION_MASK))|\
+                                                                               ((MinorVer)&D3D10_SB_TOKENIZED_PROGRAM_MINOR_VERSION_MASK))
+
+// ----------------------------------------------------------------------------
+// Length Token (LenTok)
+//
+// Always follows VerTok
+//
+// [31:00] Unsigned integer count of number of
+//              DWORDs in program code, including version
+//              and length tokens.  So the minimum value
+//              is 0x00000002 (if an empty program is ever
+//              valid).
+//
+// ----------------------------------------------------------------------------
+
+// DECODER MACRO: Retrieve program length
+#define DECODE_D3D10_SB_TOKENIZED_PROGRAM_LENGTH(LenTok) (LenTok)
+// ENCODER MACRO: Create complete LenTok
+#define ENCODE_D3D10_SB_TOKENIZED_PROGRAM_LENGTH(Length) (Length)
+#define MAX_D3D10_SB_TOKENIZED_PROGRAM_LENGTH (0xffffffff)
+
+// ----------------------------------------------------------------------------
+// Opcode Format (OpcodeToken0)
+//
+// [10:00] D3D10_SB_OPCODE_TYPE
+// if( [10:00] == D3D10_SB_OPCODE_CUSTOMDATA )
+// {
+//    Token starts a custom-data block.  See "Custom-Data Block Format".
+// }
+// else // standard opcode token
+// {
+//    [23:11] Opcode-Specific Controls
+//    [30:24] Instruction length in DWORDs including the opcode token.
+//    [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//            contains extended opcode token.
+// }
+//
+// ----------------------------------------------------------------------------
+
+typedef enum D3D10_SB_OPCODE_TYPE {
+    D3D10_SB_OPCODE_ADD          ,
+    D3D10_SB_OPCODE_AND          ,
+    D3D10_SB_OPCODE_BREAK        ,
+    D3D10_SB_OPCODE_BREAKC       ,
+    D3D10_SB_OPCODE_CALL         ,
+    D3D10_SB_OPCODE_CALLC        ,
+    D3D10_SB_OPCODE_CASE         ,
+    D3D10_SB_OPCODE_CONTINUE     ,
+    D3D10_SB_OPCODE_CONTINUEC    ,
+    D3D10_SB_OPCODE_CUT          ,
+    D3D10_SB_OPCODE_DEFAULT      ,
+    D3D10_SB_OPCODE_DERIV_RTX    ,
+    D3D10_SB_OPCODE_DERIV_RTY    ,
+    D3D10_SB_OPCODE_DISCARD      ,
+    D3D10_SB_OPCODE_DIV          ,
+    D3D10_SB_OPCODE_DP2          ,
+    D3D10_SB_OPCODE_DP3          ,
+    D3D10_SB_OPCODE_DP4          ,
+    D3D10_SB_OPCODE_ELSE         ,
+    D3D10_SB_OPCODE_EMIT         ,
+    D3D10_SB_OPCODE_EMITTHENCUT  ,
+    D3D10_SB_OPCODE_ENDIF        ,
+    D3D10_SB_OPCODE_ENDLOOP      ,
+    D3D10_SB_OPCODE_ENDSWITCH    ,
+    D3D10_SB_OPCODE_EQ           ,
+    D3D10_SB_OPCODE_EXP          ,
+    D3D10_SB_OPCODE_FRC          ,
+    D3D10_SB_OPCODE_FTOI         ,
+    D3D10_SB_OPCODE_FTOU         ,
+    D3D10_SB_OPCODE_GE           ,
+    D3D10_SB_OPCODE_IADD         ,
+    D3D10_SB_OPCODE_IF           ,
+    D3D10_SB_OPCODE_IEQ          ,
+    D3D10_SB_OPCODE_IGE          ,
+    D3D10_SB_OPCODE_ILT          ,
+    D3D10_SB_OPCODE_IMAD         ,
+    D3D10_SB_OPCODE_IMAX         ,
+    D3D10_SB_OPCODE_IMIN         ,
+    D3D10_SB_OPCODE_IMUL         ,
+    D3D10_SB_OPCODE_INE          ,
+    D3D10_SB_OPCODE_INEG         ,
+    D3D10_SB_OPCODE_ISHL         ,
+    D3D10_SB_OPCODE_ISHR         ,
+    D3D10_SB_OPCODE_ITOF         ,
+    D3D10_SB_OPCODE_LABEL        ,
+    D3D10_SB_OPCODE_LD           ,
+    D3D10_SB_OPCODE_LD_MS        ,
+    D3D10_SB_OPCODE_LOG          ,
+    D3D10_SB_OPCODE_LOOP         ,
+    D3D10_SB_OPCODE_LT           ,
+    D3D10_SB_OPCODE_MAD          ,
+    D3D10_SB_OPCODE_MIN          ,
+    D3D10_SB_OPCODE_MAX          ,
+    D3D10_SB_OPCODE_CUSTOMDATA   ,
+    D3D10_SB_OPCODE_MOV          ,
+    D3D10_SB_OPCODE_MOVC         ,
+    D3D10_SB_OPCODE_MUL          ,
+    D3D10_SB_OPCODE_NE           ,
+    D3D10_SB_OPCODE_NOP          ,
+    D3D10_SB_OPCODE_NOT          ,
+    D3D10_SB_OPCODE_OR           ,
+    D3D10_SB_OPCODE_RESINFO      ,
+    D3D10_SB_OPCODE_RET          ,
+    D3D10_SB_OPCODE_RETC         ,
+    D3D10_SB_OPCODE_ROUND_NE     ,
+    D3D10_SB_OPCODE_ROUND_NI     ,
+    D3D10_SB_OPCODE_ROUND_PI     ,
+    D3D10_SB_OPCODE_ROUND_Z      ,
+    D3D10_SB_OPCODE_RSQ          ,
+    D3D10_SB_OPCODE_SAMPLE       ,
+    D3D10_SB_OPCODE_SAMPLE_C     ,
+    D3D10_SB_OPCODE_SAMPLE_C_LZ  ,
+    D3D10_SB_OPCODE_SAMPLE_L     ,
+    D3D10_SB_OPCODE_SAMPLE_D     ,
+    D3D10_SB_OPCODE_SAMPLE_B     ,
+    D3D10_SB_OPCODE_SQRT         ,
+    D3D10_SB_OPCODE_SWITCH       ,
+    D3D10_SB_OPCODE_SINCOS       ,
+    D3D10_SB_OPCODE_UDIV         ,
+    D3D10_SB_OPCODE_ULT          ,
+    D3D10_SB_OPCODE_UGE          ,
+    D3D10_SB_OPCODE_UMUL         ,
+    D3D10_SB_OPCODE_UMAD         ,
+    D3D10_SB_OPCODE_UMAX         ,
+    D3D10_SB_OPCODE_UMIN         ,
+    D3D10_SB_OPCODE_USHR         ,
+    D3D10_SB_OPCODE_UTOF         ,
+    D3D10_SB_OPCODE_XOR          ,
+    D3D10_SB_OPCODE_DCL_RESOURCE                     , // DCL* opcodes have
+    D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER              , // custom operand formats.
+    D3D10_SB_OPCODE_DCL_SAMPLER                      ,
+    D3D10_SB_OPCODE_DCL_INDEX_RANGE                  ,
+    D3D10_SB_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY ,
+    D3D10_SB_OPCODE_DCL_GS_INPUT_PRIMITIVE           ,
+    D3D10_SB_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT      ,
+    D3D10_SB_OPCODE_DCL_INPUT                        ,
+    D3D10_SB_OPCODE_DCL_INPUT_SGV                    ,
+    D3D10_SB_OPCODE_DCL_INPUT_SIV                    ,
+    D3D10_SB_OPCODE_DCL_INPUT_PS                     ,
+    D3D10_SB_OPCODE_DCL_INPUT_PS_SGV                 ,
+    D3D10_SB_OPCODE_DCL_INPUT_PS_SIV                 ,
+    D3D10_SB_OPCODE_DCL_OUTPUT                       ,
+    D3D10_SB_OPCODE_DCL_OUTPUT_SGV                   ,
+    D3D10_SB_OPCODE_DCL_OUTPUT_SIV                   ,
+    D3D10_SB_OPCODE_DCL_TEMPS                        ,
+    D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP               ,
+    D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS                 ,
+
+// -----------------------------------------------
+
+    // This marks the end of D3D10.0 opcodes
+    D3D10_SB_OPCODE_RESERVED0,
+    
+// ---------- DX 10.1 op codes---------------------
+
+    D3D10_1_SB_OPCODE_LOD,
+    D3D10_1_SB_OPCODE_GATHER4,
+    D3D10_1_SB_OPCODE_SAMPLE_POS,
+    D3D10_1_SB_OPCODE_SAMPLE_INFO,
+
+// -----------------------------------------------
+
+    // This marks the end of D3D10.1 opcodes
+    D3D10_1_SB_OPCODE_RESERVED1,
+
+// ---------- DX 11 op codes---------------------
+    D3D11_SB_OPCODE_HS_DECLS                         , // token marks beginning of HS sub-shader
+    D3D11_SB_OPCODE_HS_CONTROL_POINT_PHASE           , // token marks beginning of HS sub-shader
+    D3D11_SB_OPCODE_HS_FORK_PHASE                    , // token marks beginning of HS sub-shader
+    D3D11_SB_OPCODE_HS_JOIN_PHASE                    , // token marks beginning of HS sub-shader
+
+    D3D11_SB_OPCODE_EMIT_STREAM                      ,
+    D3D11_SB_OPCODE_CUT_STREAM                       ,
+    D3D11_SB_OPCODE_EMITTHENCUT_STREAM               ,
+    D3D11_SB_OPCODE_INTERFACE_CALL                   ,
+
+    D3D11_SB_OPCODE_BUFINFO                          ,
+    D3D11_SB_OPCODE_DERIV_RTX_COARSE                 ,
+    D3D11_SB_OPCODE_DERIV_RTX_FINE                   ,
+    D3D11_SB_OPCODE_DERIV_RTY_COARSE                 ,
+    D3D11_SB_OPCODE_DERIV_RTY_FINE                   ,
+    D3D11_SB_OPCODE_GATHER4_C                        ,
+    D3D11_SB_OPCODE_GATHER4_PO                       ,
+    D3D11_SB_OPCODE_GATHER4_PO_C                     ,
+    D3D11_SB_OPCODE_RCP                              ,
+    D3D11_SB_OPCODE_F32TOF16                         ,
+    D3D11_SB_OPCODE_F16TOF32                         ,
+    D3D11_SB_OPCODE_UADDC                            ,
+    D3D11_SB_OPCODE_USUBB                            ,
+    D3D11_SB_OPCODE_COUNTBITS                        ,
+    D3D11_SB_OPCODE_FIRSTBIT_HI                      ,
+    D3D11_SB_OPCODE_FIRSTBIT_LO                      ,
+    D3D11_SB_OPCODE_FIRSTBIT_SHI                     ,
+    D3D11_SB_OPCODE_UBFE                             ,
+    D3D11_SB_OPCODE_IBFE                             ,
+    D3D11_SB_OPCODE_BFI                              ,
+    D3D11_SB_OPCODE_BFREV                            ,
+    D3D11_SB_OPCODE_SWAPC                            ,
+
+    D3D11_SB_OPCODE_DCL_STREAM                       ,
+    D3D11_SB_OPCODE_DCL_FUNCTION_BODY                ,
+    D3D11_SB_OPCODE_DCL_FUNCTION_TABLE               ,
+    D3D11_SB_OPCODE_DCL_INTERFACE                    ,
+    
+    D3D11_SB_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT    ,
+    D3D11_SB_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT   ,
+    D3D11_SB_OPCODE_DCL_TESS_DOMAIN                  ,
+    D3D11_SB_OPCODE_DCL_TESS_PARTITIONING            ,
+    D3D11_SB_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE        ,
+    D3D11_SB_OPCODE_DCL_HS_MAX_TESSFACTOR            ,
+    D3D11_SB_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT ,
+    D3D11_SB_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT ,
+
+    D3D11_SB_OPCODE_DCL_THREAD_GROUP                 ,
+    D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED  ,
+    D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW    ,
+    D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED,
+    D3D11_SB_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW,
+    D3D11_SB_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED,
+    D3D11_SB_OPCODE_DCL_RESOURCE_RAW                 ,
+    D3D11_SB_OPCODE_DCL_RESOURCE_STRUCTURED          ,
+    D3D11_SB_OPCODE_LD_UAV_TYPED                     ,
+    D3D11_SB_OPCODE_STORE_UAV_TYPED                  ,
+    D3D11_SB_OPCODE_LD_RAW                           ,
+    D3D11_SB_OPCODE_STORE_RAW                        ,
+    D3D11_SB_OPCODE_LD_STRUCTURED                    ,
+    D3D11_SB_OPCODE_STORE_STRUCTURED                 ,
+    D3D11_SB_OPCODE_ATOMIC_AND                       ,
+    D3D11_SB_OPCODE_ATOMIC_OR                        ,
+    D3D11_SB_OPCODE_ATOMIC_XOR                       ,
+    D3D11_SB_OPCODE_ATOMIC_CMP_STORE                 ,
+    D3D11_SB_OPCODE_ATOMIC_IADD                      ,
+    D3D11_SB_OPCODE_ATOMIC_IMAX                      ,
+    D3D11_SB_OPCODE_ATOMIC_IMIN                      ,
+    D3D11_SB_OPCODE_ATOMIC_UMAX                      ,
+    D3D11_SB_OPCODE_ATOMIC_UMIN                      ,
+    D3D11_SB_OPCODE_IMM_ATOMIC_ALLOC                 ,
+    D3D11_SB_OPCODE_IMM_ATOMIC_CONSUME               ,
+    D3D11_SB_OPCODE_IMM_ATOMIC_IADD                  ,
+    D3D11_SB_OPCODE_IMM_ATOMIC_AND                   ,
+    D3D11_SB_OPCODE_IMM_ATOMIC_OR                    ,
+    D3D11_SB_OPCODE_IMM_ATOMIC_XOR                   ,
+    D3D11_SB_OPCODE_IMM_ATOMIC_EXCH                  ,
+    D3D11_SB_OPCODE_IMM_ATOMIC_CMP_EXCH              ,
+    D3D11_SB_OPCODE_IMM_ATOMIC_IMAX                  ,
+    D3D11_SB_OPCODE_IMM_ATOMIC_IMIN                  ,
+    D3D11_SB_OPCODE_IMM_ATOMIC_UMAX                  ,
+    D3D11_SB_OPCODE_IMM_ATOMIC_UMIN                  ,   
+    D3D11_SB_OPCODE_SYNC                             ,
+    
+    D3D11_SB_OPCODE_DADD                             ,
+    D3D11_SB_OPCODE_DMAX                             ,
+    D3D11_SB_OPCODE_DMIN                             ,
+    D3D11_SB_OPCODE_DMUL                             ,
+    D3D11_SB_OPCODE_DEQ                              ,
+    D3D11_SB_OPCODE_DGE                              ,
+    D3D11_SB_OPCODE_DLT                              ,
+    D3D11_SB_OPCODE_DNE                              ,
+    D3D11_SB_OPCODE_DMOV                             ,
+    D3D11_SB_OPCODE_DMOVC                            ,
+    D3D11_SB_OPCODE_DTOF                             ,
+    D3D11_SB_OPCODE_FTOD                             ,
+
+    D3D11_SB_OPCODE_EVAL_SNAPPED                     ,
+    D3D11_SB_OPCODE_EVAL_SAMPLE_INDEX                ,
+    D3D11_SB_OPCODE_EVAL_CENTROID                    ,
+    
+    D3D11_SB_OPCODE_DCL_GS_INSTANCE_COUNT            ,
+
+    D3D11_SB_OPCODE_ABORT                            ,
+    D3D11_SB_OPCODE_DEBUG_BREAK                      ,
+
+// -----------------------------------------------
+
+    // This marks the end of D3D11.0 opcodes
+    D3D11_SB_OPCODE_RESERVED0,
+
+    D3D11_1_SB_OPCODE_DDIV,
+    D3D11_1_SB_OPCODE_DFMA,
+    D3D11_1_SB_OPCODE_DRCP,
+
+    D3D11_1_SB_OPCODE_MSAD,
+
+    D3D11_1_SB_OPCODE_DTOI,
+    D3D11_1_SB_OPCODE_DTOU,
+    D3D11_1_SB_OPCODE_ITOD,
+    D3D11_1_SB_OPCODE_UTOD,
+
+// -----------------------------------------------
+
+    // This marks the end of D3D11.1 opcodes
+    D3D11_1_SB_OPCODE_RESERVED0,
+
+    D3DWDDM1_3_SB_OPCODE_GATHER4_FEEDBACK,
+    D3DWDDM1_3_SB_OPCODE_GATHER4_C_FEEDBACK,
+    D3DWDDM1_3_SB_OPCODE_GATHER4_PO_FEEDBACK,
+    D3DWDDM1_3_SB_OPCODE_GATHER4_PO_C_FEEDBACK,
+    D3DWDDM1_3_SB_OPCODE_LD_FEEDBACK,
+    D3DWDDM1_3_SB_OPCODE_LD_MS_FEEDBACK,
+    D3DWDDM1_3_SB_OPCODE_LD_UAV_TYPED_FEEDBACK,
+    D3DWDDM1_3_SB_OPCODE_LD_RAW_FEEDBACK,
+    D3DWDDM1_3_SB_OPCODE_LD_STRUCTURED_FEEDBACK,
+    D3DWDDM1_3_SB_OPCODE_SAMPLE_L_FEEDBACK,
+    D3DWDDM1_3_SB_OPCODE_SAMPLE_C_LZ_FEEDBACK,
+
+    D3DWDDM1_3_SB_OPCODE_SAMPLE_CLAMP_FEEDBACK,
+    D3DWDDM1_3_SB_OPCODE_SAMPLE_B_CLAMP_FEEDBACK,
+    D3DWDDM1_3_SB_OPCODE_SAMPLE_D_CLAMP_FEEDBACK,
+    D3DWDDM1_3_SB_OPCODE_SAMPLE_C_CLAMP_FEEDBACK,
+
+    D3DWDDM1_3_SB_OPCODE_CHECK_ACCESS_FULLY_MAPPED,
+
+// -----------------------------------------------
+
+    // This marks the end of WDDM 1.3 opcodes
+    D3DWDDM1_3_SB_OPCODE_RESERVED0,
+
+    D3D10_SB_NUM_OPCODES                                     // Should be the last entry
+} D3D10_SB_OPCODE_TYPE;
+
+#define D3D10_SB_OPCODE_TYPE_MASK 0x00007ff
+// DECODER MACRO: Retrieve program opcode
+#define DECODE_D3D10_SB_OPCODE_TYPE(OpcodeToken0) ((D3D10_SB_OPCODE_TYPE)((OpcodeToken0)&D3D10_SB_OPCODE_TYPE_MASK))
+// ENCODER MACRO: Create the opcode-type portion of OpcodeToken0
+#define ENCODE_D3D10_SB_OPCODE_TYPE(OpcodeName) ((OpcodeName)&D3D10_SB_OPCODE_TYPE_MASK)
+
+#define D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH_MASK 0x7f000000
+#define D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH_SHIFT 24
+// DECODER MACRO: Retrieve instruction length
+// in # of DWORDs including the opcode token(s).
+// The range is 1-127.
+#define DECODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(OpcodeToken0) (((OpcodeToken0)&D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH_MASK)>> D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH_SHIFT)
+
+// ENCODER MACRO: Store instruction length
+// portion of OpcodeToken0, in # of DWORDs
+// including the opcode token(s).
+// Valid range is 1-127.
+#define ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(Length) (((Length)<<D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH_SHIFT)&D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH_MASK)
+#define MAX_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH 127
+
+#define D3D10_SB_INSTRUCTION_SATURATE_MASK 0x00002000
+// DECODER MACRO: Check OpcodeToken0 to see if an instruction
+// is to saturate the result [0..1]
+// This flag is indicated by one of the bits in the
+// opcode specific control range.
+#define DECODE_IS_D3D10_SB_INSTRUCTION_SATURATE_ENABLED(OpcodeToken0) ((OpcodeToken0)&D3D10_SB_INSTRUCTION_SATURATE_MASK)
+// ENCODER MACRO: Encode in OpcodeToken0 if instruction is to saturate the result.
+#define ENCODE_D3D10_SB_INSTRUCTION_SATURATE(bSat) (((bSat)!=0)?D3D10_SB_INSTRUCTION_SATURATE_MASK:0)
+
+// Boolean test for conditional instructions such as if (if_z or if_nz)
+// This is part of the opcode specific control range.
+typedef enum D3D10_SB_INSTRUCTION_TEST_BOOLEAN
+{
+    D3D10_SB_INSTRUCTION_TEST_ZERO       = 0,
+    D3D10_SB_INSTRUCTION_TEST_NONZERO    = 1
+} D3D10_SB_INSTRUCTION_TEST_BOOLEAN;
+#define D3D10_SB_INSTRUCTION_TEST_BOOLEAN_MASK  0x00040000
+#define D3D10_SB_INSTRUCTION_TEST_BOOLEAN_SHIFT 18
+
+// DECODER MACRO: For an OpcodeToken0 for requires either a
+// zero or non-zero test, determine which test was chosen.
+#define DECODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(OpcodeToken0) ((D3D10_SB_INSTRUCTION_TEST_BOOLEAN)(((OpcodeToken0)&D3D10_SB_INSTRUCTION_TEST_BOOLEAN_MASK)>>D3D10_SB_INSTRUCTION_TEST_BOOLEAN_SHIFT))
+// ENCODER MACRO: Store "zero" or "nonzero" in the opcode
+// specific control range of OpcodeToken0
+#define ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(Boolean) (((Boolean)<<D3D10_SB_INSTRUCTION_TEST_BOOLEAN_SHIFT)&D3D10_SB_INSTRUCTION_TEST_BOOLEAN_MASK)
+
+// Precise value mask (bits 19-22)
+// This is part of the opcode specific control range.
+// It's 1 bit per-channel of the output, for instructions with multiple
+// output operands, it applies to that component in each operand. This
+// uses the components defined in D3D10_SB_COMPONENT_NAME.
+#define D3D11_SB_INSTRUCTION_PRECISE_VALUES_MASK  0x00780000
+#define D3D11_SB_INSTRUCTION_PRECISE_VALUES_SHIFT 19
+
+// DECODER MACRO: this macro extracts from OpcodeToken0 the 4 component
+// (xyzw) mask, as a field of D3D10_SB_4_COMPONENT_[X|Y|Z|W] flags.
+#define DECODE_D3D11_SB_INSTRUCTION_PRECISE_VALUES(OpcodeToken0) ((((OpcodeToken0)&D3D11_SB_INSTRUCTION_PRECISE_VALUES_MASK)>>D3D11_SB_INSTRUCTION_PRECISE_VALUES_SHIFT))
+// ENCODER MACRO: Given a set of
+// D3D10_SB_OPERAND_4_COMPONENT_[X|Y|Z|W] values
+// or'd together, encode them in OpcodeToken0.
+#define ENCODE_D3D11_SB_INSTRUCTION_PRECISE_VALUES(ComponentMask) (((ComponentMask)<<D3D11_SB_INSTRUCTION_PRECISE_VALUES_SHIFT)&D3D11_SB_INSTRUCTION_PRECISE_VALUES_MASK)
+
+// resinfo instruction return type
+typedef enum D3D10_SB_RESINFO_INSTRUCTION_RETURN_TYPE
+{
+    D3D10_SB_RESINFO_INSTRUCTION_RETURN_FLOAT      = 0,
+    D3D10_SB_RESINFO_INSTRUCTION_RETURN_RCPFLOAT   = 1,
+    D3D10_SB_RESINFO_INSTRUCTION_RETURN_UINT       = 2
+} D3D10_SB_RESINFO_INSTRUCTION_RETURN_TYPE;
+
+#define D3D10_SB_RESINFO_INSTRUCTION_RETURN_TYPE_MASK  0x00001800
+#define D3D10_SB_RESINFO_INSTRUCTION_RETURN_TYPE_SHIFT 11
+
+// DECODER MACRO: For an OpcodeToken0 for the resinfo instruction, 
+// determine the return type.
+#define DECODE_D3D10_SB_RESINFO_INSTRUCTION_RETURN_TYPE(OpcodeToken0) ((D3D10_SB_RESINFO_INSTRUCTION_RETURN_TYPE)(((OpcodeToken0)&D3D10_SB_RESINFO_INSTRUCTION_RETURN_TYPE_MASK)>>D3D10_SB_RESINFO_INSTRUCTION_RETURN_TYPE_SHIFT))
+// ENCODER MACRO: Encode the return type for the resinfo instruction
+// in the opcode specific control range of OpcodeToken0
+#define ENCODE_D3D10_SB_RESINFO_INSTRUCTION_RETURN_TYPE(ReturnType) (((ReturnType)<<D3D10_SB_RESINFO_INSTRUCTION_RETURN_TYPE_SHIFT)&D3D10_SB_RESINFO_INSTRUCTION_RETURN_TYPE_MASK)
+
+// sync instruction flags
+#define D3D11_SB_SYNC_THREADS_IN_GROUP              0x00000800
+#define D3D11_SB_SYNC_THREAD_GROUP_SHARED_MEMORY    0x00001000
+#define D3D11_SB_SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GROUP  0x00002000
+#define D3D11_SB_SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GLOBAL  0x00004000
+#define D3D11_SB_SYNC_FLAGS_MASK                    0x00007800
+
+// DECODER MACRO: Retrieve flags for sync instruction from OpcodeToken0.
+#define DECODE_D3D11_SB_SYNC_FLAGS(OperandToken0) ((OperandToken0)&D3D11_SB_SYNC_FLAGS_MASK)
+
+// ENCODER MACRO: Given a set of sync instruciton flags, encode them in OpcodeToken0.
+#define ENCODE_D3D11_SB_SYNC_FLAGS(Flags) ((Flags)&D3D11_SB_SYNC_FLAGS_MASK)
+
+#define D3D10_SB_OPCODE_EXTENDED_MASK 0x80000000
+#define D3D10_SB_OPCODE_EXTENDED_SHIFT 31
+// DECODER MACRO: Determine if the opcode is extended
+// by an additional opcode token.  Currently there are no
+// extended opcodes.
+#define DECODE_IS_D3D10_SB_OPCODE_EXTENDED(OpcodeToken0) (((OpcodeToken0)&D3D10_SB_OPCODE_EXTENDED_MASK)>> D3D10_SB_OPCODE_EXTENDED_SHIFT)
+// ENCODER MACRO: Store in OpcodeToken0 whether the opcode is extended
+// by an additional opcode token.  
+#define ENCODE_D3D10_SB_OPCODE_EXTENDED(bExtended) (((bExtended)!=0)?D3D10_SB_OPCODE_EXTENDED_MASK:0)
+
+// ----------------------------------------------------------------------------
+// Extended Opcode Format (OpcodeToken1)
+//
+// If bit31 of an opcode token is set, the
+// opcode has an additional extended opcode token DWORD
+// directly following OpcodeToken0.  Other tokens
+// expected for the opcode, such as the operand
+// token(s) always follow
+// OpcodeToken0 AND OpcodeToken1..n (extended
+// opcode tokens, if present).
+//
+// [05:00] D3D10_SB_EXTENDED_OPCODE_TYPE
+// [30:06] if([05:00] == D3D10_SB_EXTENDED_OPCODE_SAMPLE_CONTROLS)
+//         {
+//              This custom opcode contains controls for SAMPLE.
+//              [08:06] Ignored, 0.
+//              [12:09] U texel immediate offset (4 bit 2's comp) (0 default)
+//              [16:13] V texel immediate offset (4 bit 2's comp) (0 default)
+//              [20:17] W texel immediate offset (4 bit 2's comp) (0 default)
+//              [30:14] Ignored, 0.
+//         }
+//         else if( [05:00] == D3D11_SB_EXTENDED_OPCODE_RESOURCE_DIM )
+//         {
+//              [10:06] D3D10_SB_RESOURCE_DIMENSION
+//              [22:11] When dimension is D3D11_SB_RESOURCE_DIMENSION_STRUCTURED_BUFFER this holds the buffer stride, otherwise 0
+//              [30:23] Ignored, 0.
+//         }
+//         else if( [05:00] == D3D11_SB_EXTENDED_OPCODE_RESOURCE_RETURN_TYPE )
+//         {
+//              [09:06] D3D10_SB_RESOURCE_RETURN_TYPE for component X
+//              [13:10] D3D10_SB_RESOURCE_RETURN_TYPE for component Y
+//              [17:14] D3D10_SB_RESOURCE_RETURN_TYPE for component Z
+//              [21:18] D3D10_SB_RESOURCE_RETURN_TYPE for component W
+//              [30:22] Ignored, 0.
+//         }
+//         else
+//         {
+//              [30:04] Ignored, 0.
+//         }
+// [31]    0 normally. 1 there is another extended opcode.  Any number
+//         of extended opcode tokens can be chained.  It is possible that some extended
+//         opcode tokens could include multiple DWORDS - that is defined
+//         on a case by case basis.
+//
+// ----------------------------------------------------------------------------
+typedef enum D3D10_SB_EXTENDED_OPCODE_TYPE
+{
+    D3D10_SB_EXTENDED_OPCODE_EMPTY           = 0,
+    D3D10_SB_EXTENDED_OPCODE_SAMPLE_CONTROLS = 1,
+    D3D11_SB_EXTENDED_OPCODE_RESOURCE_DIM = 2,
+    D3D11_SB_EXTENDED_OPCODE_RESOURCE_RETURN_TYPE = 3,
+} D3D10_SB_EXTENDED_OPCODE_TYPE;
+#define D3D11_SB_MAX_SIMULTANEOUS_EXTENDED_OPCODES 3
+
+#define D3D10_SB_EXTENDED_OPCODE_TYPE_MASK 0x0000003f
+
+// DECODER MACRO: Given an extended opcode
+// token (OpcodeToken1), figure out what type
+// of token it is (from D3D10_SB_EXTENDED_OPCODE_TYPE enum)
+// to be able to interpret the rest of the token's contents.
+#define DECODE_D3D10_SB_EXTENDED_OPCODE_TYPE(OpcodeToken1) ((D3D10_SB_EXTENDED_OPCODE_TYPE)((OpcodeToken1)&D3D10_SB_EXTENDED_OPCODE_TYPE_MASK))
+
+// ENCODER MACRO: Store extended opcode token
+// type in OpcodeToken1.
+#define ENCODE_D3D10_SB_EXTENDED_OPCODE_TYPE(ExtOpcodeType) ((ExtOpcodeType)&D3D10_SB_EXTENDED_OPCODE_TYPE_MASK)
+
+typedef enum D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_COORD
+{
+    D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_U        = 0,
+    D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_V        = 1,
+    D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_W        = 2,
+} D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_COORD;
+#define D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_COORD_MASK (3)
+#define D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_SHIFT(Coord) (9+4*((Coord)&D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_COORD_MASK))
+#define D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_MASK(Coord) (0x0000000f<<D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_SHIFT(Coord))
+
+// DECODER MACRO: Given an extended opcode token
+// (OpcodeToken1), and extended token type ==
+// D3D10_SB_EXTENDED_OPCODE_SAMPLE_CONTROLS, determine the immediate
+// texel address offset for u/v/w (D3D10_SB_ADDRESS_OFFSET_COORD)
+// This macro returns a (signed) integer, by sign extending the
+// decoded 4 bit 2's complement immediate value.
+#define DECODE_IMMEDIATE_D3D10_SB_ADDRESS_OFFSET(Coord,OpcodeToken1) ((((OpcodeToken1)&D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_MASK(Coord))>>(D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_SHIFT(Coord))))
+
+// ENCODER MACRO: Store the immediate texel address offset
+// for U or V or W Coord (D3D10_SB_ADDRESS_OFFSET_COORD) in an extended
+// opcode token (OpcodeToken1) that has extended opcode
+// type == D3D10_SB_EXTENDED_OPCODE_SAMPLE_CONTROLS (opcode type encoded separately)
+// A 2's complement number is expected as input, from which the LSB 4 bits are extracted.
+#define ENCODE_IMMEDIATE_D3D10_SB_ADDRESS_OFFSET(Coord,ImmediateOffset) (((ImmediateOffset)<<D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_SHIFT(Coord))&D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_MASK(Coord))
+
+#define D3D11_SB_EXTENDED_RESOURCE_DIMENSION_MASK  0x000007C0
+#define D3D11_SB_EXTENDED_RESOURCE_DIMENSION_SHIFT 6
+
+// DECODER MACRO: Given an extended resource declaration token,
+// (D3D11_SB_EXTENDED_OPCODE_RESOURCE_DIM), determine the resource dimension
+// (D3D10_SB_RESOURCE_DIMENSION enum)
+#define DECODE_D3D11_SB_EXTENDED_RESOURCE_DIMENSION(OpcodeTokenN) ((D3D10_SB_RESOURCE_DIMENSION)(((OpcodeTokenN)&D3D11_SB_EXTENDED_RESOURCE_DIMENSION_MASK)>>D3D11_SB_EXTENDED_RESOURCE_DIMENSION_SHIFT))
+
+// ENCODER MACRO: Store resource dimension
+// (D3D10_SB_RESOURCE_DIMENSION enum) into a
+// an extended resource declaration token (D3D11_SB_EXTENDED_OPCODE_RESOURCE_DIM)
+#define ENCODE_D3D11_SB_EXTENDED_RESOURCE_DIMENSION(ResourceDim) (((ResourceDim)<<D3D11_SB_EXTENDED_RESOURCE_DIMENSION_SHIFT)&D3D11_SB_EXTENDED_RESOURCE_DIMENSION_MASK)
+
+#define D3D11_SB_EXTENDED_RESOURCE_DIMENSION_STRUCTURE_STRIDE_MASK  0x007FF800
+#define D3D11_SB_EXTENDED_RESOURCE_DIMENSION_STRUCTURE_STRIDE_SHIFT 11
+
+// DECODER MACRO: Given an extended resource declaration token for a structured buffer,
+// (D3D11_SB_EXTENDED_OPCODE_RESOURCE_DIM), determine the structure stride
+// (12-bit unsigned integer)
+#define DECODE_D3D11_SB_EXTENDED_RESOURCE_DIMENSION_STRUCTURE_STRIDE(OpcodeTokenN) (((OpcodeTokenN)&D3D11_SB_EXTENDED_RESOURCE_DIMENSION_STRUCTURE_STRIDE_MASK)>>D3D11_SB_EXTENDED_RESOURCE_DIMENSION_STRUCTURE_STRIDE_SHIFT)
+
+// ENCODER MACRO: Store resource dimension structure stride
+// (12-bit unsigned integer) into a
+// an extended resource declaration token (D3D11_SB_EXTENDED_OPCODE_RESOURCE_DIM)
+#define ENCODE_D3D11_SB_EXTENDED_RESOURCE_DIMENSION_STRUCTURE_STRIDE(Stride) (((Stride)<<D3D11_SB_EXTENDED_RESOURCE_DIMENSION_STRUCTURE_STRIDE_SHIFT)&D3D11_SB_EXTENDED_RESOURCE_DIMENSION_STRUCTURE_STRIDE_MASK)
+
+#define D3D10_SB_RESOURCE_RETURN_TYPE_MASK    0x0000000f
+#define D3D10_SB_RESOURCE_RETURN_TYPE_NUMBITS 0x00000004
+#define D3D11_SB_EXTENDED_RESOURCE_RETURN_TYPE_SHIFT 6
+
+// DECODER MACRO: Get the resource return type for component (0-3) from
+// an extended resource declaration token (D3D11_SB_EXTENDED_OPCODE_RESOURCE_RETURN_TYPE)
+#define DECODE_D3D11_SB_EXTENDED_RESOURCE_RETURN_TYPE(OpcodeTokenN, Component) \
+    ((D3D10_SB_RESOURCE_RETURN_TYPE)(((OpcodeTokenN) >> \
+    (Component * D3D10_SB_RESOURCE_RETURN_TYPE_NUMBITS + D3D11_SB_EXTENDED_RESOURCE_RETURN_TYPE_SHIFT))&D3D10_SB_RESOURCE_RETURN_TYPE_MASK))
+
+// ENCODER MACRO: Generate a resource return type for a component in an extended
+// resource delcaration token (D3D11_SB_EXTENDED_OPCODE_RESOURCE_RETURN_TYPE)
+#define ENCODE_D3D11_SB_EXTENDED_RESOURCE_RETURN_TYPE(ReturnType, Component) \
+    (((ReturnType)&D3D10_SB_RESOURCE_RETURN_TYPE_MASK) << (Component * D3D10_SB_RESOURCE_RETURN_TYPE_NUMBITS + D3D11_SB_EXTENDED_RESOURCE_RETURN_TYPE_SHIFT))
+
+// ----------------------------------------------------------------------------
+// Custom-Data Block Format
+//
+// DWORD 0 (CustomDataDescTok):
+// [10:00] == D3D10_SB_OPCODE_CUSTOMDATA
+// [31:11] == D3D10_SB_CUSTOMDATA_CLASS
+//
+// DWORD 1: 
+//          32-bit unsigned integer count of number
+//          of DWORDs in custom-data block,
+//          including DWORD 0 and DWORD 1.
+//          So the minimum value is 0x00000002,
+//          meaning empty custom-data.
+//
+// Layout of custom-data contents, for the various meta-data classes,
+// not defined in this file.
+//
+// ----------------------------------------------------------------------------
+
+typedef enum D3D10_SB_CUSTOMDATA_CLASS
+{
+    D3D10_SB_CUSTOMDATA_COMMENT = 0,
+    D3D10_SB_CUSTOMDATA_DEBUGINFO,
+    D3D10_SB_CUSTOMDATA_OPAQUE,
+    D3D10_SB_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER,
+    D3D11_SB_CUSTOMDATA_SHADER_MESSAGE,
+    D3D11_SB_CUSTOMDATA_SHADER_CLIP_PLANE_CONSTANT_MAPPINGS_FOR_DX9,
+} D3D10_SB_CUSTOMDATA_CLASS;
+
+#define D3D10_SB_CUSTOMDATA_CLASS_MASK 0xfffff800
+#define D3D10_SB_CUSTOMDATA_CLASS_SHIFT 11
+// DECODER MACRO: Find out what class of custom-data is present.
+// The contents of the custom-data block are defined
+// for each class of custom-data.
+#define DECODE_D3D10_SB_CUSTOMDATA_CLASS(CustomDataDescTok) ((D3D10_SB_CUSTOMDATA_CLASS)(((CustomDataDescTok)&D3D10_SB_CUSTOMDATA_CLASS_MASK)>>D3D10_SB_CUSTOMDATA_CLASS_SHIFT))
+// ENCODER MACRO: Create complete CustomDataDescTok
+#define ENCODE_D3D10_SB_CUSTOMDATA_CLASS(CustomDataClass) (ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_CUSTOMDATA)|(((CustomDataClass)<<D3D10_SB_CUSTOMDATA_CLASS_SHIFT)&D3D10_SB_CUSTOMDATA_CLASS_MASK))
+
+// ----------------------------------------------------------------------------
+// Instruction Operand Format (OperandToken0)
+//
+// [01:00] D3D10_SB_OPERAND_NUM_COMPONENTS
+// [11:02] Component Selection
+//         if([01:00] == D3D10_SB_OPERAND_0_COMPONENT)
+//              [11:02] = Ignored, 0
+//         else if([01:00] == D3D10_SB_OPERAND_1_COMPONENT
+//              [11:02] = Ignored, 0
+//         else if([01:00] == D3D10_SB_OPERAND_4_COMPONENT
+//         {
+//              [03:02] = D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE
+//              if([03:02] == D3D10_SB_OPERAND_4_COMPONENT_MASK_MODE)
+//              {
+//                  [07:04] = D3D10_SB_OPERAND_4_COMPONENT_MASK
+//                  [11:08] = Ignored, 0
+//              }
+//              else if([03:02] == D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE)
+//              {
+//                  [11:04] = D3D10_SB_4_COMPONENT_SWIZZLE
+//              }
+//              else if([03:02] == D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_MODE)
+//              {
+//                  [05:04] = D3D10_SB_4_COMPONENT_NAME
+//                  [11:06] = Ignored, 0
+//              }
+//         }
+//         else if([01:00] == D3D10_SB_OPERAND_N_COMPONENT)
+//         {
+//              Currently not defined.
+//         }
+// [19:12] D3D10_SB_OPERAND_TYPE
+// [21:20] D3D10_SB_OPERAND_INDEX_DIMENSION:
+//            Number of dimensions in the register
+//            file (NOT the # of dimensions in the
+//            individual register or memory
+//            resource being referenced).
+// [24:22] if( [21:20] >= D3D10_SB_OPERAND_INDEX_1D )
+//             D3D10_SB_OPERAND_INDEX_REPRESENTATION for first operand index
+//         else
+//             Ignored, 0
+// [27:25] if( [21:20] >= D3D10_SB_OPERAND_INDEX_2D )
+//             D3D10_SB_OPERAND_INDEX_REPRESENTATION for second operand index
+//         else
+//             Ignored, 0
+// [30:28] if( [21:20] == D3D10_SB_OPERAND_INDEX_3D )
+//             D3D10_SB_OPERAND_INDEX_REPRESENTATION for third operand index
+//         else
+//             Ignored, 0
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.
+//
+// ----------------------------------------------------------------------------
+
+// Number of components in data vector referred to by operand.
+typedef enum D3D10_SB_OPERAND_NUM_COMPONENTS
+{
+    D3D10_SB_OPERAND_0_COMPONENT = 0,
+    D3D10_SB_OPERAND_1_COMPONENT = 1,
+    D3D10_SB_OPERAND_4_COMPONENT = 2,
+    D3D10_SB_OPERAND_N_COMPONENT = 3 // unused for now
+} D3D10_SB_OPERAND_NUM_COMPONENTS;
+#define D3D10_SB_OPERAND_NUM_COMPONENTS_MASK 0x00000003
+
+// DECODER MACRO: Extract from OperandToken0 how many components
+// the data vector referred to by the operand contains.
+// (D3D10_SB_OPERAND_NUM_COMPONENTS enum)
+#define DECODE_D3D10_SB_OPERAND_NUM_COMPONENTS(OperandToken0) ((D3D10_SB_OPERAND_NUM_COMPONENTS)((OperandToken0)&D3D10_SB_OPERAND_NUM_COMPONENTS_MASK))
+
+// ENCODER MACRO: Define in OperandToken0 how many components
+// the data vector referred to by the operand contains.
+// (D3D10_SB_OPERAND_NUM_COMPONENTS enum).
+#define ENCODE_D3D10_SB_OPERAND_NUM_COMPONENTS(NumComp) ((NumComp)&D3D10_SB_OPERAND_NUM_COMPONENTS_MASK)
+
+typedef enum D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE
+{
+    D3D10_SB_OPERAND_4_COMPONENT_MASK_MODE    = 0,  // mask 4 components
+    D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE = 1,  // swizzle 4 components
+    D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_MODE = 2, // select 1 of 4 components
+} D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE;
+
+#define D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE_MASK  0x0000000c
+#define D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE_SHIFT 2
+
+// DECODER MACRO: For an operand representing 4component data,
+// extract from OperandToken0 the method for selecting data from
+// the 4 components (D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE).
+#define DECODE_D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE(OperandToken0) ((D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE)(((OperandToken0)&D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE_MASK)>>D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE_SHIFT))
+
+// ENCODER MACRO: For an operand representing 4component data,
+// encode in OperandToken0 a value from D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE
+#define ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE(SelectionMode) (((SelectionMode)<<D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE_SHIFT)&D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE_MASK)
+
+typedef enum D3D10_SB_4_COMPONENT_NAME
+{
+    D3D10_SB_4_COMPONENT_X = 0,
+    D3D10_SB_4_COMPONENT_Y = 1,
+    D3D10_SB_4_COMPONENT_Z = 2,
+    D3D10_SB_4_COMPONENT_W = 3,
+    D3D10_SB_4_COMPONENT_R = 0,
+    D3D10_SB_4_COMPONENT_G = 1,
+    D3D10_SB_4_COMPONENT_B = 2,
+    D3D10_SB_4_COMPONENT_A = 3
+} D3D10_SB_4_COMPONENT_NAME;
+#define D3D10_SB_4_COMPONENT_NAME_MASK 3
+
+// MACROS FOR USE IN D3D10_SB_OPERAND_4_COMPONENT_MASK_MODE:
+
+#define D3D10_SB_OPERAND_4_COMPONENT_MASK_MASK   0x000000f0
+#define D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT  4
+#define D3D10_SB_OPERAND_4_COMPONENT_MASK_X      0x00000010
+#define D3D10_SB_OPERAND_4_COMPONENT_MASK_Y      0x00000020
+#define D3D10_SB_OPERAND_4_COMPONENT_MASK_Z      0x00000040
+#define D3D10_SB_OPERAND_4_COMPONENT_MASK_W      0x00000080
+#define D3D10_SB_OPERAND_4_COMPONENT_MASK_R      D3D10_SB_OPERAND_4_COMPONENT_MASK_X
+#define D3D10_SB_OPERAND_4_COMPONENT_MASK_G      D3D10_SB_OPERAND_4_COMPONENT_MASK_Y
+#define D3D10_SB_OPERAND_4_COMPONENT_MASK_B      D3D10_SB_OPERAND_4_COMPONENT_MASK_Z
+#define D3D10_SB_OPERAND_4_COMPONENT_MASK_A      D3D10_SB_OPERAND_4_COMPONENT_MASK_W
+#define D3D10_SB_OPERAND_4_COMPONENT_MASK_ALL    D3D10_SB_OPERAND_4_COMPONENT_MASK_MASK
+
+// DECODER MACRO: When 4 component selection mode is
+// D3D10_SB_OPERAND_4_COMPONENT_MASK_MODE, this macro
+// extracts from OperandToken0 the 4 component (xyzw) mask,
+// as a field of D3D10_SB_OPERAND_4_COMPONENT_MASK_[X|Y|Z|W] flags.
+// Alternatively, the D3D10_SB_OPERAND_4_COMPONENT_MASK_[X|Y|Z|W] masks
+// can be tested on OperandToken0 directly, without this macro.
+#define DECODE_D3D10_SB_OPERAND_4_COMPONENT_MASK(OperandToken0) ((OperandToken0)&D3D10_SB_OPERAND_4_COMPONENT_MASK_MASK)
+
+// ENCODER MACRO: Given a set of
+// D3D10_SB_OPERAND_4_COMPONENT_MASK_[X|Y|Z|W] values
+// or'd together, encode them in OperandToken0.
+#define ENCODE_D3D10_SB_OPERAND_4_COMPONENT_MASK(ComponentMask) ((ComponentMask)&D3D10_SB_OPERAND_4_COMPONENT_MASK_MASK)
+
+// ENCODER/DECODER MACRO: Given a D3D10_SB_4_COMPONENT_NAME,
+// generate the 4-component mask for it.
+// This can be used in loops that build masks or read masks.
+// Alternatively, the D3D10_SB_OPERAND_4_COMPONENT_MASK_[X|Y|Z|W] masks
+// can be used directly, without this macro.
+#define D3D10_SB_OPERAND_4_COMPONENT_MASK(ComponentName) ((1<<(D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT+ComponentName))&D3D10_SB_OPERAND_4_COMPONENT_MASK_MASK)
+
+// MACROS FOR USE IN D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE:
+
+#define D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MASK 0x00000ff0
+#define D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_SHIFT 4
+
+// DECODER MACRO: When 4 component selection mode is
+// D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE, this macro
+// extracts from OperandToken0 the 4 component swizzle,
+// as a field of D3D10_SB_OPERAND_4_COMPONENT_MASK_[X|Y|Z|W] flags.
+#define DECODE_D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE(OperandToken0) ((OperandToken0)&D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MASK)
+
+// DECODER MACRO: Pass a D3D10_SB_4_COMPONENT_NAME as "DestComp" in following
+// macro to extract, from OperandToken0 or from a decoded swizzle,
+// the swizzle source component (D3D10_SB_4_COMPONENT_NAME enum):
+#define DECODE_D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_SOURCE(OperandToken0,DestComp) ((D3D10_SB_4_COMPONENT_NAME)(((OperandToken0)>>(D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_SHIFT+2*((DestComp)&D3D10_SB_4_COMPONENT_NAME_MASK)))&D3D10_SB_4_COMPONENT_NAME_MASK))
+
+// ENCODER MACRO: Generate a 4 component swizzle given
+// 4 D3D10_SB_4_COMPONENT_NAME source values for dest
+// components x, y, z, w respectively.
+#define ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE(XSrc,YSrc,ZSrc,WSrc) ((((XSrc)&D3D10_SB_4_COMPONENT_NAME_MASK)|     \
+                                                                     (((YSrc)&D3D10_SB_4_COMPONENT_NAME_MASK)<<2)| \
+                                                                     (((ZSrc)&D3D10_SB_4_COMPONENT_NAME_MASK)<<4)| \
+                                                                     (((WSrc)&D3D10_SB_4_COMPONENT_NAME_MASK)<<6)  \
+                                                                      )<<D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_SHIFT)
+
+// ENCODER/DECODER MACROS: Various common swizzle patterns
+// (noswizzle and replicate of each channels)
+#define D3D10_SB_OPERAND_4_COMPONENT_NOSWIZZLE   ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE(D3D10_SB_4_COMPONENT_X,\
+                                                                                   D3D10_SB_4_COMPONENT_Y,\
+                                                                                   D3D10_SB_4_COMPONENT_Z,\
+                                                                                   D3D10_SB_4_COMPONENT_W)
+
+#define D3D10_SB_OPERAND_4_COMPONENT_REPLICATEX  ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE(D3D10_SB_4_COMPONENT_X,\
+                                                                                   D3D10_SB_4_COMPONENT_X,\
+                                                                                   D3D10_SB_4_COMPONENT_X,\
+                                                                                   D3D10_SB_4_COMPONENT_X)
+
+#define D3D10_SB_OPERAND_4_COMPONENT_REPLICATEY  ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE(D3D10_SB_4_COMPONENT_Y,\
+                                                                                   D3D10_SB_4_COMPONENT_Y,\
+                                                                                   D3D10_SB_4_COMPONENT_Y,\
+                                                                                   D3D10_SB_4_COMPONENT_Y)
+
+#define D3D10_SB_OPERAND_4_COMPONENT_REPLICATEZ  ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE(D3D10_SB_4_COMPONENT_Z,\
+                                                                                   D3D10_SB_4_COMPONENT_Z,\
+                                                                                   D3D10_SB_4_COMPONENT_Z,\
+                                                                                   D3D10_SB_4_COMPONENT_Z)
+
+#define D3D10_SB_OPERAND_4_COMPONENT_REPLICATEW  ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE(D3D10_SB_4_COMPONENT_W,\
+                                                                                   D3D10_SB_4_COMPONENT_W,\
+                                                                                   D3D10_SB_4_COMPONENT_W,\
+                                                                                   D3D10_SB_4_COMPONENT_W)
+
+#define D3D10_SB_OPERAND_4_COMPONENT_REPLICATERED    D3D10_SB_OPERAND_4_COMPONENT_REPLICATEX
+#define D3D10_SB_OPERAND_4_COMPONENT_REPLICATEGREEN  D3D10_SB_OPERAND_4_COMPONENT_REPLICATEY
+#define D3D10_SB_OPERAND_4_COMPONENT_REPLICATEBLUE   D3D10_SB_OPERAND_4_COMPONENT_REPLICATEZ
+#define D3D10_SB_OPERAND_4_COMPONENT_REPLICATEALPHA  D3D10_SB_OPERAND_4_COMPONENT_REPLICATEW
+
+// MACROS FOR USE IN D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_MODE:
+#define D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_MASK   0x00000030
+#define D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_SHIFT  4
+
+// DECODER MACRO: When 4 component selection mode is
+// D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_MODE, this macro
+// extracts from OperandToken0 a D3D10_SB_4_COMPONENT_NAME
+// which picks one of the 4 components.
+#define DECODE_D3D10_SB_OPERAND_4_COMPONENT_SELECT_1(OperandToken0) ((D3D10_SB_4_COMPONENT_NAME)(((OperandToken0)&D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_MASK)>>D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_SHIFT))
+
+// ENCODER MACRO: Given a D3D10_SB_4_COMPONENT_NAME selecting
+// a single component for D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_MODE,
+// encode it into OperandToken0
+#define ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SELECT_1(SelectedComp) (((SelectedComp)<<D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_SHIFT)&D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_MASK)
+
+// MACROS FOR DETERMINING OPERAND TYPE:
+
+typedef enum D3D10_SB_OPERAND_TYPE
+{
+    D3D10_SB_OPERAND_TYPE_TEMP           = 0,  // Temporary Register File
+    D3D10_SB_OPERAND_TYPE_INPUT          = 1,  // General Input Register File
+    D3D10_SB_OPERAND_TYPE_OUTPUT         = 2,  // General Output Register File
+    D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP = 3,  // Temporary Register File (indexable)
+    D3D10_SB_OPERAND_TYPE_IMMEDIATE32    = 4,  // 32bit/component immediate value(s)
+                                          // If for example, operand token bits
+                                          // [01:00]==D3D10_SB_OPERAND_4_COMPONENT,
+                                          // this means that the operand type:
+                                          // D3D10_SB_OPERAND_TYPE_IMMEDIATE32
+                                          // results in 4 additional 32bit
+                                          // DWORDS present for the operand.
+    D3D10_SB_OPERAND_TYPE_IMMEDIATE64    = 5,  // 64bit/comp.imm.val(s)HI:LO
+    D3D10_SB_OPERAND_TYPE_SAMPLER        = 6,  // Reference to sampler state
+    D3D10_SB_OPERAND_TYPE_RESOURCE       = 7,  // Reference to memory resource (e.g. texture)
+    D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER= 8,  // Reference to constant buffer
+    D3D10_SB_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER= 9,  // Reference to immediate constant buffer
+    D3D10_SB_OPERAND_TYPE_LABEL          = 10, // Label
+    D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID = 11, // Input primitive ID
+    D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH   = 12, // Output Depth
+    D3D10_SB_OPERAND_TYPE_NULL           = 13, // Null register, used to discard results of operations
+                                               // Below Are operands new in DX 10.1
+    D3D10_SB_OPERAND_TYPE_RASTERIZER     = 14, // DX10.1 Rasterizer register, used to denote the depth/stencil and render target resources
+    D3D10_SB_OPERAND_TYPE_OUTPUT_COVERAGE_MASK = 15, // DX10.1 PS output MSAA coverage mask (scalar)
+                                               // Below Are operands new in DX 11
+    D3D11_SB_OPERAND_TYPE_STREAM         = 16, // Reference to GS stream output resource
+    D3D11_SB_OPERAND_TYPE_FUNCTION_BODY  = 17, // Reference to a function definition
+    D3D11_SB_OPERAND_TYPE_FUNCTION_TABLE = 18, // Reference to a set of functions used by a class
+    D3D11_SB_OPERAND_TYPE_INTERFACE      = 19, // Reference to an interface
+    D3D11_SB_OPERAND_TYPE_FUNCTION_INPUT = 20, // Reference to an input parameter to a function
+    D3D11_SB_OPERAND_TYPE_FUNCTION_OUTPUT = 21, // Reference to an output parameter to a function
+    D3D11_SB_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID = 22, // HS Control Point phase input saying which output control point ID this is
+    D3D11_SB_OPERAND_TYPE_INPUT_FORK_INSTANCE_ID = 23, // HS Fork Phase input instance ID
+    D3D11_SB_OPERAND_TYPE_INPUT_JOIN_INSTANCE_ID = 24, // HS Join Phase input instance ID
+    D3D11_SB_OPERAND_TYPE_INPUT_CONTROL_POINT = 25, // HS Fork+Join, DS phase input control points (array of them)
+    D3D11_SB_OPERAND_TYPE_OUTPUT_CONTROL_POINT = 26, // HS Fork+Join phase output control points (array of them)
+    D3D11_SB_OPERAND_TYPE_INPUT_PATCH_CONSTANT = 27, // DS+HSJoin Input Patch Constants (array of them)
+    D3D11_SB_OPERAND_TYPE_INPUT_DOMAIN_POINT = 28, // DS Input Domain point
+    D3D11_SB_OPERAND_TYPE_THIS_POINTER       = 29, // Reference to an interface this pointer
+    D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW = 30, // Reference to UAV u#
+    D3D11_SB_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY = 31, // Reference to Thread Group Shared Memory g#
+    D3D11_SB_OPERAND_TYPE_INPUT_THREAD_ID = 32, // Compute Shader Thread ID
+    D3D11_SB_OPERAND_TYPE_INPUT_THREAD_GROUP_ID = 33, // Compute Shader Thread Group ID
+    D3D11_SB_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP = 34, // Compute Shader Thread ID In Thread Group
+    D3D11_SB_OPERAND_TYPE_INPUT_COVERAGE_MASK = 35, // Pixel shader coverage mask input
+    D3D11_SB_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED = 36, // Compute Shader Thread ID In Group Flattened to a 1D value.
+    D3D11_SB_OPERAND_TYPE_INPUT_GS_INSTANCE_ID = 37, // Input GS instance ID
+    D3D11_SB_OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL = 38, // Output Depth, forced to be greater than or equal than current depth
+    D3D11_SB_OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL    = 39, // Output Depth, forced to be less than or equal to current depth
+    D3D11_SB_OPERAND_TYPE_CYCLE_COUNTER = 40, // Cycle counter
+    D3D11_SB_OPERAND_TYPE_OUTPUT_STENCIL_REF = 41, // DX11 PS output stencil reference (scalar)
+    D3D11_SB_OPERAND_TYPE_INNER_COVERAGE = 42, // DX11 PS input inner coverage (scalar)
+} D3D10_SB_OPERAND_TYPE;
+
+#define D3D10_SB_OPERAND_TYPE_MASK   0x000ff000
+#define D3D10_SB_OPERAND_TYPE_SHIFT  12
+
+// DECODER MACRO: Determine operand type from OperandToken0.
+#define DECODE_D3D10_SB_OPERAND_TYPE(OperandToken0) ((D3D10_SB_OPERAND_TYPE)(((OperandToken0)&D3D10_SB_OPERAND_TYPE_MASK)>>D3D10_SB_OPERAND_TYPE_SHIFT))
+
+// ENCODER MACRO: Store operand type in OperandToken0.
+#define ENCODE_D3D10_SB_OPERAND_TYPE(OperandType) (((OperandType)<<D3D10_SB_OPERAND_TYPE_SHIFT)&D3D10_SB_OPERAND_TYPE_MASK)
+
+typedef enum D3D10_SB_OPERAND_INDEX_DIMENSION
+{
+    D3D10_SB_OPERAND_INDEX_0D = 0, // e.g. Position
+    D3D10_SB_OPERAND_INDEX_1D = 1, // Most common.  e.g. Temp registers.
+    D3D10_SB_OPERAND_INDEX_2D = 2, // e.g. Geometry Program Input registers.
+    D3D10_SB_OPERAND_INDEX_3D = 3, // 3D rarely if ever used.
+} D3D10_SB_OPERAND_INDEX_DIMENSION;
+#define D3D10_SB_OPERAND_INDEX_DIMENSION_MASK  0x00300000
+#define D3D10_SB_OPERAND_INDEX_DIMENSION_SHIFT 20
+
+// DECODER MACRO: Determine operand index dimension from OperandToken0.
+#define DECODE_D3D10_SB_OPERAND_INDEX_DIMENSION(OperandToken0) ((D3D10_SB_OPERAND_INDEX_DIMENSION)(((OperandToken0)&D3D10_SB_OPERAND_INDEX_DIMENSION_MASK)>>D3D10_SB_OPERAND_INDEX_DIMENSION_SHIFT))
+
+// ENCODER MACRO: Store operand index dimension
+// (D3D10_SB_OPERAND_INDEX_DIMENSION enum) in OperandToken0.
+#define ENCODE_D3D10_SB_OPERAND_INDEX_DIMENSION(OperandIndexDim) (((OperandIndexDim)<<D3D10_SB_OPERAND_INDEX_DIMENSION_SHIFT)&D3D10_SB_OPERAND_INDEX_DIMENSION_MASK)
+
+typedef enum D3D10_SB_OPERAND_INDEX_REPRESENTATION
+{
+    D3D10_SB_OPERAND_INDEX_IMMEDIATE32               = 0, // Extra DWORD
+    D3D10_SB_OPERAND_INDEX_IMMEDIATE64               = 1, // 2 Extra DWORDs
+                                                     //   (HI32:LO32)
+    D3D10_SB_OPERAND_INDEX_RELATIVE                  = 2, // Extra operand
+    D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE = 3, // Extra DWORD followed by
+                                                     //   extra operand
+    D3D10_SB_OPERAND_INDEX_IMMEDIATE64_PLUS_RELATIVE = 4, // 2 Extra DWORDS
+                                                     //   (HI32:LO32) followed
+                                                     //   by extra operand
+} D3D10_SB_OPERAND_INDEX_REPRESENTATION;
+#define D3D10_SB_OPERAND_INDEX_REPRESENTATION_SHIFT(Dim) (22+3*((Dim)&3))
+#define D3D10_SB_OPERAND_INDEX_REPRESENTATION_MASK(Dim) (0x3<<D3D10_SB_OPERAND_INDEX_REPRESENTATION_SHIFT(Dim))
+
+// DECODER MACRO: Determine from OperandToken0 what representation
+// an operand index is provided as (D3D10_SB_OPERAND_INDEX_REPRESENTATION enum),
+// for index dimension [0], [1] or [2], depending on D3D10_SB_OPERAND_INDEX_DIMENSION.
+#define DECODE_D3D10_SB_OPERAND_INDEX_REPRESENTATION(Dim,OperandToken0) ((D3D10_SB_OPERAND_INDEX_REPRESENTATION)(((OperandToken0)&D3D10_SB_OPERAND_INDEX_REPRESENTATION_MASK(Dim))>>D3D10_SB_OPERAND_INDEX_REPRESENTATION_SHIFT(Dim)))
+
+// ENCODER MACRO: Store in OperandToken0 what representation
+// an operand index is provided as (D3D10_SB_OPERAND_INDEX_REPRESENTATION enum),
+// for index dimension [0], [1] or [2], depending on D3D10_SB_OPERAND_INDEX_DIMENSION.
+#define ENCODE_D3D10_SB_OPERAND_INDEX_REPRESENTATION(Dim,IndexRepresentation) (((IndexRepresentation)<<D3D10_SB_OPERAND_INDEX_REPRESENTATION_SHIFT(Dim))&D3D10_SB_OPERAND_INDEX_REPRESENTATION_MASK(Dim))
+
+#define D3D10_SB_OPERAND_EXTENDED_MASK  0x80000000
+#define D3D10_SB_OPERAND_EXTENDED_SHIFT 31
+
+// DECODER MACRO: Determine if the operand is extended
+// by an additional opcode token.
+#define DECODE_IS_D3D10_SB_OPERAND_EXTENDED(OperandToken0) (((OperandToken0)&D3D10_SB_OPERAND_EXTENDED_MASK)>>D3D10_SB_OPERAND_EXTENDED_SHIFT)
+
+// ENCODER MACRO: Store in OperandToken0 whether the operand is extended
+// by an additional operand token.
+#define ENCODE_D3D10_SB_OPERAND_EXTENDED(bExtended) (((bExtended)!=0)?D3D10_SB_OPERAND_EXTENDED_MASK:0)
+
+// ----------------------------------------------------------------------------
+// Extended Instruction Operand Format (OperandToken1)
+//
+// If bit31 of an operand token is set, the
+// operand has additional data in a second DWORD
+// directly following OperandToken0.  Other tokens
+// expected for the operand, such as immmediate
+// values or relative address operands (full
+// operands in themselves) always follow
+// OperandToken0 AND OperandToken1..n (extended
+// operand tokens, if present).
+//
+// [05:00] D3D10_SB_EXTENDED_OPERAND_TYPE
+// [16:06] if([05:00] == D3D10_SB_EXTENDED_OPERAND_MODIFIER)
+//         {
+//              [13:06] D3D10_SB_OPERAND_MODIFIER
+//              [16:14] Min Precision: D3D11_SB_OPERAND_MIN_PRECISION
+//              [17:17] Non-uniform: D3D12_SB_OPERAND_NON_UNIFORM
+//         }
+//         else
+//         {
+//              [17:06] Ignored, 0.
+//         }
+// [30:18] Ignored, 0.
+// [31]    0 normally. 1 if second order extended operand definition,
+//         meaning next DWORD contains yet ANOTHER extended operand
+//         description. Currently no second order extensions defined.
+//         This would be useful if a particular extended operand does
+//         not have enough space to store the required information in
+//         a single token and so is extended further.
+//
+// ----------------------------------------------------------------------------
+
+typedef enum D3D10_SB_EXTENDED_OPERAND_TYPE
+{
+    D3D10_SB_EXTENDED_OPERAND_EMPTY            = 0, // Might be used if this
+                                               // enum is full and
+                                               // further extended opcode
+                                               // is needed.
+    D3D10_SB_EXTENDED_OPERAND_MODIFIER         = 1,
+} D3D10_SB_EXTENDED_OPERAND_TYPE;
+#define D3D10_SB_EXTENDED_OPERAND_TYPE_MASK 0x0000003f
+
+// DECODER MACRO: Given an extended operand
+// token (OperandToken1), figure out what type
+// of token it is (from D3D10_SB_EXTENDED_OPERAND_TYPE enum)
+// to be able to interpret the rest of the token's contents.
+#define DECODE_D3D10_SB_EXTENDED_OPERAND_TYPE(OperandToken1) ((D3D10_SB_EXTENDED_OPERAND_TYPE)((OperandToken1)&D3D10_SB_EXTENDED_OPERAND_TYPE_MASK))
+
+// ENCODER MACRO: Store extended operand token
+// type in OperandToken1.
+#define ENCODE_D3D10_SB_EXTENDED_OPERAND_TYPE(ExtOperandType) ((ExtOperandType)&D3D10_SB_EXTENDED_OPERAND_TYPE_MASK)
+
+typedef enum D3D10_SB_OPERAND_MODIFIER
+{
+    D3D10_SB_OPERAND_MODIFIER_NONE     = 0, // Nop.  This is the implied
+                                             // default if the extended
+                                             // operand is not present for
+                                             // an operand for which source
+                                             // modifiers are meaningful
+    D3D10_SB_OPERAND_MODIFIER_NEG      = 1, // Negate
+    D3D10_SB_OPERAND_MODIFIER_ABS      = 2, // Absolute value, abs()
+    D3D10_SB_OPERAND_MODIFIER_ABSNEG   = 3, // -abs()
+} D3D10_SB_OPERAND_MODIFIER;
+#define D3D10_SB_OPERAND_MODIFIER_MASK  0x00003fc0
+#define D3D10_SB_OPERAND_MODIFIER_SHIFT 6
+
+// DECODER MACRO: Given a D3D10_SB_EXTENDED_OPERAND_MODIFIER
+// extended token (OperandToken1), determine the source modifier
+// (D3D10_SB_OPERAND_MODIFIER enum)
+#define DECODE_D3D10_SB_OPERAND_MODIFIER(OperandToken1) ((D3D10_SB_OPERAND_MODIFIER)(((OperandToken1)&D3D10_SB_OPERAND_MODIFIER_MASK)>>D3D10_SB_OPERAND_MODIFIER_SHIFT))
+
+// ENCODER MACRO: Generate a complete source modifier extended token
+// (OperandToken1), given D3D10_SB_OPERAND_MODIFIER enum (the
+// ext. operand type is also set to D3D10_SB_EXTENDED_OPERAND_MODIFIER).
+#define ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER(SourceMod)  ((((SourceMod)<<D3D10_SB_OPERAND_MODIFIER_SHIFT)&D3D10_SB_OPERAND_MODIFIER_MASK)| \
+                                                                ENCODE_D3D10_SB_EXTENDED_OPERAND_TYPE(D3D10_SB_EXTENDED_OPERAND_MODIFIER) | \
+                                                                ENCODE_D3D10_SB_OPERAND_DOUBLE_EXTENDED(0))
+
+// Min precision specifier for source/dest operands.  This 
+// fits in the extended operand token field. Implementations are free to 
+// execute at higher precision than the min - details spec'ed elsewhere.
+// This is part of the opcode specific control range.
+typedef enum D3D11_SB_OPERAND_MIN_PRECISION
+{
+    D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT    = 0, // Default precision 
+                                                       // for the shader model
+    D3D11_SB_OPERAND_MIN_PRECISION_FLOAT_16   = 1, // Min 16 bit/component float
+    D3D11_SB_OPERAND_MIN_PRECISION_FLOAT_2_8  = 2, // Min 10(2.8)bit/comp. float
+    D3D11_SB_OPERAND_MIN_PRECISION_SINT_16    = 4, // Min 16 bit/comp. signed integer
+    D3D11_SB_OPERAND_MIN_PRECISION_UINT_16    = 5, // Min 16 bit/comp. unsigned integer
+} D3D11_SB_OPERAND_MIN_PRECISION;
+#define D3D11_SB_OPERAND_MIN_PRECISION_MASK  0x0001C000
+#define D3D11_SB_OPERAND_MIN_PRECISION_SHIFT 14
+
+// DECODER MACRO: For an OperandToken1 that can specify
+// a minimum precision for execution, find out what it is.
+#define DECODE_D3D11_SB_OPERAND_MIN_PRECISION(OperandToken1) ((D3D11_SB_OPERAND_MIN_PRECISION)(((OperandToken1)& D3D11_SB_OPERAND_MIN_PRECISION_MASK)>> D3D11_SB_OPERAND_MIN_PRECISION_SHIFT))
+
+// ENCODER MACRO: Encode minimum precision for execution
+// into the extended operand token, OperandToken1
+#define ENCODE_D3D11_SB_OPERAND_MIN_PRECISION(MinPrecision) (((MinPrecision)<< D3D11_SB_OPERAND_MIN_PRECISION_SHIFT)& D3D11_SB_OPERAND_MIN_PRECISION_MASK)
+
+
+// Non-uniform extended operand modifier.
+#define D3D12_SB_OPERAND_NON_UNIFORM_MASK  0x00020000
+#define D3D12_SB_OPERAND_NON_UNIFORM_SHIFT 17
+
+// DECODER MACRO: For an OperandToken1 that can specify a non-uniform operand
+#define DECODE_D3D12_SB_OPERAND_NON_UNIFORM(OperandToken1) (((OperandToken1)& D3D12_SB_OPERAND_NON_UNIFORM_MASK)>> D3D12_SB_OPERAND_NON_UNIFORM_SHIFT)
+
+// ENCODER MACRO: Encode non-uniform state into the extended operand token, OperandToken1
+#define ENCODE_D3D12_SB_OPERAND_NON_UNIFORM(NonUniform) (((NonUniform)<< D3D12_SB_OPERAND_NON_UNIFORM_SHIFT)& D3D12_SB_OPERAND_NON_UNIFORM_MASK)
+
+
+#define D3D10_SB_OPERAND_DOUBLE_EXTENDED_MASK  0x80000000
+#define D3D10_SB_OPERAND_DOUBLE_EXTENDED_SHIFT 31
+// DECODER MACRO: Determine if an extended operand token
+// (OperandToken1) is further extended by yet another token
+// (OperandToken2).  Currently there are no secondary
+// extended operand tokens.
+#define DECODE_IS_D3D10_SB_OPERAND_DOUBLE_EXTENDED(OperandToken1) (((OperandToken1)&D3D10_SB_OPERAND_DOUBLE_EXTENDED_MASK)>>D3D10_SB_OPERAND_DOUBLE_EXTENDED_SHIFT)
+
+// ENCODER MACRO: Store in OperandToken1 whether the operand is extended
+// by an additional operand token.  Currently there are no secondary
+// extended operand tokens.
+#define ENCODE_D3D10_SB_OPERAND_DOUBLE_EXTENDED(bExtended) (((bExtended)!=0)?D3D10_SB_OPERAND_DOUBLE_EXTENDED_MASK:0)
+
+// ----------------------------------------------------------------------------
+// Name Token (NameToken) (used in declaration statements)
+//
+// [15:00] D3D10_SB_NAME enumeration
+// [31:16] Reserved, 0
+//
+// ----------------------------------------------------------------------------
+#define D3D10_SB_NAME_MASK  0x0000ffff
+
+// DECODER MACRO: Get the name from NameToken
+#define DECODE_D3D10_SB_NAME(NameToken) ((D3D10_SB_NAME)((NameToken)&D3D10_SB_NAME_MASK))
+
+// ENCODER MACRO: Generate a complete NameToken given a D3D10_SB_NAME
+#define ENCODE_D3D10_SB_NAME(Name) ((Name)&D3D10_SB_NAME_MASK)
+
+//---------------------------------------------------------------------
+// Declaration Statements
+//
+// Declarations start with a standard opcode token,
+// having opcode type being D3D10_SB_OPCODE_DCL*.
+// Each particular declaration type has custom
+// operand token(s), described below.
+//---------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Global Flags Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS
+// [11:11] Refactoring allowed if bit set.
+// [12:12] Enable double precision float ops.
+// [13:13] Force early depth-stencil test.
+// [14:14] Enable RAW and structured buffers in non-CS 4.x shaders.
+// [15:15] Skip optimizations of shader IL when translating to native code
+// [16:16] Enable minimum-precision data types
+// [17:17] Enable 11.1 double-precision floating-point instruction extensions
+// [18:18] Enable 11.1 non-double instruction extensions
+// [23:19] Reserved for future flags.
+// [30:24] Instruction length in DWORDs including the opcode token. == 1
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by no operands.
+//
+// ----------------------------------------------------------------------------
+#define D3D10_SB_GLOBAL_FLAG_REFACTORING_ALLOWED               (1<<11)
+#define D3D11_SB_GLOBAL_FLAG_ENABLE_DOUBLE_PRECISION_FLOAT_OPS (1<<12)
+#define D3D11_SB_GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL         (1<<13)
+#define D3D11_SB_GLOBAL_FLAG_ENABLE_RAW_AND_STRUCTURED_BUFFERS (1<<14)
+#define D3D11_1_SB_GLOBAL_FLAG_SKIP_OPTIMIZATION               (1<<15)
+#define D3D11_1_SB_GLOBAL_FLAG_ENABLE_MINIMUM_PRECISION        (1<<16)
+#define D3D11_1_SB_GLOBAL_FLAG_ENABLE_DOUBLE_EXTENSIONS        (1<<17)
+#define D3D11_1_SB_GLOBAL_FLAG_ENABLE_SHADER_EXTENSIONS        (1<<18)
+#define D3D12_SB_GLOBAL_FLAG_ALL_RESOURCES_BOUND               (1<<19)
+
+#define D3D10_SB_GLOBAL_FLAGS_MASK  0x00fff800
+
+// DECODER MACRO: Get global flags
+#define DECODE_D3D10_SB_GLOBAL_FLAGS(OpcodeToken0) ((OpcodeToken0)&D3D10_SB_GLOBAL_FLAGS_MASK)
+
+// ENCODER MACRO: Encode global flags
+#define ENCODE_D3D10_SB_GLOBAL_FLAGS(Flags) ((Flags)&D3D10_SB_GLOBAL_FLAGS_MASK)
+
+// ----------------------------------------------------------------------------
+// Resource Declaration (non multisampled)
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_RESOURCE
+// [15:11] D3D10_SB_RESOURCE_DIMENSION
+// [23:16] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 2 operands on Shader Models 4.0 through 5.0:
+// (1) an operand, starting with OperandToken0, defining which
+//     t# register (D3D10_SB_OPERAND_TYPE_RESOURCE) is being declared.
+// (2) a Resource Return Type token (ResourceReturnTypeToken)
+//
+// OpcodeToken0 is followed by 3 operands on Shader Model 5.1 and later:
+// (1) an operand, starting with OperandToken0, defining which
+//     t# register (D3D10_SB_OPERAND_TYPE_RESOURCE) is being declared.
+//     The indexing dimension for the register must be D3D10_SB_OPERAND_INDEX_DIMENSION_3D, 
+//     and the meaning of the index dimensions are as follows: (t<id>[<lbound>:<ubound>])
+//       1 <id>:     variable ID being declared
+//       2 <lbound>: the lower bound of the range of resources in the space
+//       3 <ubound>: the upper bound (inclusive) of this range
+//     As opposed to when the t# is used in shader instructions, where the register
+//     must be D3D10_SB_OPERAND_INDEX_DIMENSION_2D, and the meaning of the index 
+//     dimensions are as follows: (t<id>[<idx>]):
+//       1 <id>:  variable ID being used (matches dcl)
+//       2 <idx>: absolute index of resource within space (may be dynamically indexed)
+// (2) a Resource Return Type token (ResourceReturnTypeToken)
+// (3) a DWORD indicating the space index.
+//
+// ----------------------------------------------------------------------------
+#define D3D10_SB_RESOURCE_DIMENSION_MASK  0x0000F800
+#define D3D10_SB_RESOURCE_DIMENSION_SHIFT 11
+
+// DECODER MACRO: Given a resource declaration token,
+// (OpcodeToken0), determine the resource dimension
+// (D3D10_SB_RESOURCE_DIMENSION enum)
+#define DECODE_D3D10_SB_RESOURCE_DIMENSION(OpcodeToken0) ((D3D10_SB_RESOURCE_DIMENSION)(((OpcodeToken0)&D3D10_SB_RESOURCE_DIMENSION_MASK)>>D3D10_SB_RESOURCE_DIMENSION_SHIFT))
+
+// ENCODER MACRO: Store resource dimension
+// (D3D10_SB_RESOURCE_DIMENSION enum) into a
+// a resource declaration token (OpcodeToken0)
+#define ENCODE_D3D10_SB_RESOURCE_DIMENSION(ResourceDim) (((ResourceDim)<<D3D10_SB_RESOURCE_DIMENSION_SHIFT)&D3D10_SB_RESOURCE_DIMENSION_MASK)
+
+// ----------------------------------------------------------------------------
+// Resource Declaration (multisampled)
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_RESOURCE (same opcode as non-multisampled case)
+// [15:11] D3D10_SB_RESOURCE_DIMENSION (must be TEXTURE2DMS or TEXTURE2DMSARRAY)
+// [22:16] Sample count 1...127.  0 is currently disallowed, though
+//         in future versions 0 could mean "configurable" sample count
+// [23:23] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 2 operands on Shader Models 4.0 through 5.0:
+// (1) an operand, starting with OperandToken0, defining which
+//     t# register (D3D10_SB_OPERAND_TYPE_RESOURCE) is being declared.
+// (2) a Resource Return Type token (ResourceReturnTypeToken)
+//
+// OpcodeToken0 is followed by 3 operands on Shader Model 5.1 and later:
+// (1) an operand, starting with OperandToken0, defining which
+//     t# register (D3D10_SB_OPERAND_TYPE_RESOURCE) is being declared.
+//     The indexing dimension for the register must be D3D10_SB_OPERAND_INDEX_DIMENSION_3D, 
+//     and the meaning of the index dimensions are as follows: (t<id>[<lbound>:<ubound>])
+//       1 <id>:     variable ID being declared
+//       2 <lbound>: the lower bound of the range of resources in the space
+//       3 <ubound>: the upper bound (inclusive) of this range
+//     As opposed to when the t# is used in shader instructions, where the register
+//     must be D3D10_SB_OPERAND_INDEX_DIMENSION_2D, and the meaning of the index 
+//     dimensions are as follows: (t<id>[<idx>]):
+//       1 <id>:  variable ID being used (matches dcl)
+//       2 <idx>: absolute index of resource within space (may be dynamically indexed)
+// (2) a Resource Return Type token (ResourceReturnTypeToken)
+// (3) a DWORD indicating the space index.
+//
+// ----------------------------------------------------------------------------
+
+// use same macro for encoding/decoding resource dimension aas the non-msaa declaration
+
+#define D3D10_SB_RESOURCE_SAMPLE_COUNT_MASK  0x07F0000
+#define D3D10_SB_RESOURCE_SAMPLE_COUNT_SHIFT 16
+
+// DECODER MACRO: Given a resource declaration token,
+// (OpcodeToken0), determine the resource sample count (1..127)
+#define DECODE_D3D10_SB_RESOURCE_SAMPLE_COUNT(OpcodeToken0) ((UINT)(((OpcodeToken0)&D3D10_SB_RESOURCE_SAMPLE_COUNT_MASK)>>D3D10_SB_RESOURCE_SAMPLE_COUNT_SHIFT))
+
+// ENCODER MACRO: Store resource sample count up to 127 into a
+// a resource declaration token (OpcodeToken0)
+#define ENCODE_D3D10_SB_RESOURCE_SAMPLE_COUNT(SampleCount) (((SampleCount > 127 ? 127 : SampleCount)<<D3D10_SB_RESOURCE_SAMPLE_COUNT_SHIFT)&D3D10_SB_RESOURCE_SAMPLE_COUNT_MASK)
+
+// ----------------------------------------------------------------------------
+// Resource Return Type Token (ResourceReturnTypeToken) (used in resource
+// declaration statements)
+//
+// [03:00] D3D10_SB_RESOURCE_RETURN_TYPE for component X
+// [07:04] D3D10_SB_RESOURCE_RETURN_TYPE for component Y
+// [11:08] D3D10_SB_RESOURCE_RETURN_TYPE for component Z
+// [15:12] D3D10_SB_RESOURCE_RETURN_TYPE for component W
+// [31:16] Reserved, 0
+//
+// ----------------------------------------------------------------------------
+// DECODER MACRO: Get the resource return type for component (0-3) from
+// ResourceReturnTypeToken
+#define DECODE_D3D10_SB_RESOURCE_RETURN_TYPE(ResourceReturnTypeToken, Component) \
+    ((D3D10_SB_RESOURCE_RETURN_TYPE)(((ResourceReturnTypeToken) >> \
+    (Component * D3D10_SB_RESOURCE_RETURN_TYPE_NUMBITS))&D3D10_SB_RESOURCE_RETURN_TYPE_MASK))
+
+// ENCODER MACRO: Generate a resource return type for a component
+#define ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnType, Component) \
+    (((ReturnType)&D3D10_SB_RESOURCE_RETURN_TYPE_MASK) << (Component * D3D10_SB_RESOURCE_RETURN_TYPE_NUMBITS))
+
+// ----------------------------------------------------------------------------
+// Sampler Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_SAMPLER
+// [14:11] D3D10_SB_SAMPLER_MODE
+// [23:15] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 1 operand on Shader Models 4.0 through 5.0:
+// (1) Operand starting with OperandToken0, defining which sampler
+//     (D3D10_SB_OPERAND_TYPE_SAMPLER) register # is being declared.
+//
+// OpcodeToken0 is followed by 2 operands on Shader Model 5.1 and later:
+// (1) an operand, starting with OperandToken0, defining which
+//     s# register (D3D10_SB_OPERAND_TYPE_SAMPLER) is being declared.
+//     The indexing dimension for the register must be D3D10_SB_OPERAND_INDEX_DIMENSION_3D, 
+//     and the meaning of the index dimensions are as follows: (s<id>[<lbound>:<ubound>])
+//       1 <id>:     variable ID being declared
+//       2 <lbound>: the lower bound of the range of samplers in the space
+//       3 <ubound>: the upper bound (inclusive) of this range
+//     As opposed to when the s# is used in shader instructions, where the register
+//     must be D3D10_SB_OPERAND_INDEX_DIMENSION_2D, and the meaning of the index 
+//     dimensions are as follows: (s<id>[<idx>]):
+//       1 <id>:  variable ID being used (matches dcl)
+//       2 <idx>: absolute index of sampler within space (may be dynamically indexed)
+// (2) a DWORD indicating the space index.
+//
+// ----------------------------------------------------------------------------
+typedef enum D3D10_SB_SAMPLER_MODE
+{
+    D3D10_SB_SAMPLER_MODE_DEFAULT      = 0,
+    D3D10_SB_SAMPLER_MODE_COMPARISON   = 1,
+    D3D10_SB_SAMPLER_MODE_MONO         = 2,
+} D3D10_SB_SAMPLER_MODE;
+
+#define D3D10_SB_SAMPLER_MODE_MASK  0x00007800
+#define D3D10_SB_SAMPLER_MODE_SHIFT 11
+
+// DECODER MACRO: Find out if a Constant Buffer is going to be indexed or not
+#define DECODE_D3D10_SB_SAMPLER_MODE(OpcodeToken0) ((D3D10_SB_SAMPLER_MODE)(((OpcodeToken0)&D3D10_SB_SAMPLER_MODE_MASK)>>D3D10_SB_SAMPLER_MODE_SHIFT))
+
+// ENCODER MACRO: Generate a resource return type for a component
+#define ENCODE_D3D10_SB_SAMPLER_MODE(SamplerMode) (((SamplerMode)<<D3D10_SB_SAMPLER_MODE_SHIFT)&D3D10_SB_SAMPLER_MODE_MASK)
+
+// ----------------------------------------------------------------------------
+// Input Register Declaration (see separate declarations for Pixel Shaders)
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_INPUT
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 1 operand:
+// (1) Operand, starting with OperandToken0, defining which input
+//     v# register (D3D10_SB_OPERAND_TYPE_INPUT) is being declared, 
+//     including writemask.
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Input Register Declaration w/System Interpreted Value
+// (see separate declarations for Pixel Shaders)
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_INPUT_SIV
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 2 operands:
+// (1) Operand, starting with OperandToken0, defining which input
+//     v# register (D3D10_SB_OPERAND_TYPE_INPUT) is being declared,
+//     including writemask.  For Geometry Shaders, the input is 
+//     v[vertex][attribute], and this declaration is only for which register 
+//     on the attribute axis is being declared.  The vertex axis value must 
+//     be equal to the # of vertices in the current input primitive for the GS
+//     (i.e. 6 for triangle + adjacency).
+// (2) a System Interpreted Value Name (NameToken)
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Input Register Declaration w/System Generated Value
+// (available for all shaders incl. Pixel Shader, no interpolation mode needed)
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_INPUT_SGV
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 2 operands:
+// (1) Operand, starting with OperandToken0, defining which input
+//     v# register (D3D10_SB_OPERAND_TYPE_INPUT) is being declared,
+//     including writemask.
+// (2) a System Generated Value Name (NameToken)
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Pixel Shader Input Register Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_INPUT_PS
+// [14:11] D3D10_SB_INTERPOLATION_MODE
+// [23:15] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 1 operand:
+// (1) Operand, starting with OperandToken0, defining which input
+//     v# register (D3D10_SB_OPERAND_TYPE_INPUT) is being declared,
+//     including writemask.
+//
+// ----------------------------------------------------------------------------
+#define D3D10_SB_INPUT_INTERPOLATION_MODE_MASK  0x00007800
+#define D3D10_SB_INPUT_INTERPOLATION_MODE_SHIFT 11
+
+// DECODER MACRO: Find out interpolation mode for the input register
+#define DECODE_D3D10_SB_INPUT_INTERPOLATION_MODE(OpcodeToken0) ((D3D10_SB_INTERPOLATION_MODE)(((OpcodeToken0)&D3D10_SB_INPUT_INTERPOLATION_MODE_MASK)>>D3D10_SB_INPUT_INTERPOLATION_MODE_SHIFT))
+
+// ENCODER MACRO: Encode interpolation mode for a register.
+#define ENCODE_D3D10_SB_INPUT_INTERPOLATION_MODE(InterpolationMode) (((InterpolationMode)<<D3D10_SB_INPUT_INTERPOLATION_MODE_SHIFT)&D3D10_SB_INPUT_INTERPOLATION_MODE_MASK)
+
+// ----------------------------------------------------------------------------
+// Pixel Shader Input Register Declaration w/System Interpreted Value
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_INPUT_PS_SIV
+// [14:11] D3D10_SB_INTERPOLATION_MODE
+// [23:15] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 2 operands:
+// (1) Operand, starting with OperandToken0, defining which input
+//     v# register (D3D10_SB_OPERAND_TYPE_INPUT) is being declared.
+// (2) a System Interpreted Value Name (NameToken)
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Pixel Shader Input Register Declaration w/System Generated Value
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_INPUT_PS_SGV
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 2 operands:
+// (1) Operand, starting with OperandToken0, defining which input
+//     v# register (D3D10_SB_OPERAND_TYPE_INPUT) is being declared.
+// (2) a System Generated Value Name (NameToken)
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Output Register Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_OUTPUT
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 1 operand:
+// (1) Operand, starting with OperandToken0, defining which
+//     o# register (D3D10_SB_OPERAND_TYPE_OUTPUT) is being declared,
+//     including writemask.
+//     (in Pixel Shader, output can also be one of 
+//     D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH,
+//     D3D11_SB_OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL, or
+//     D3D11_SB_OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL )
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Output Register Declaration w/System Interpreted Value
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_OUTPUT_SIV
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 2 operands:
+// (1) an operand, starting with OperandToken0, defining which
+//     o# register (D3D10_SB_OPERAND_TYPE_OUTPUT) is being declared,
+//     including writemask.
+// (2) a System Interpreted Name token (NameToken)
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Output Register Declaration w/System Generated Value
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_OUTPUT_SGV
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 2 operands:
+// (1) an operand, starting with OperandToken0, defining which
+//     o# register (D3D10_SB_OPERAND_TYPE_OUTPUT) is being declared,
+//     including writemask.
+// (2) a System Generated Name token (NameToken)
+//
+// ----------------------------------------------------------------------------
+
+
+// ----------------------------------------------------------------------------
+// Input or Output Register Indexing Range Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_INDEX_RANGE
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 2 operands:
+// (1) an operand, starting with OperandToken0, defining which
+//     input (v#) or output (o#) register is having its array indexing range
+//     declared, including writemask.  For Geometry Shader inputs, 
+//     it is assumed that the vertex axis is always fully indexable,
+//     and 0 must be specified as the vertex# in this declaration, so that 
+//     only the a range of attributes are having their index range defined.
+//     
+// (2) a DWORD representing the count of registers starting from the one
+//     indicated in (1).
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Temp Register Declaration r0...r(n-1) 
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_TEMPS
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 1 operand:
+// (1) DWORD (unsigned int) indicating how many temps are being declared.  
+//     i.e. 5 means r0...r4 are declared.
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Indexable Temp Register (x#[size]) Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 3 DWORDs:
+// (1) Register index (defines which x# register is declared)
+// (2) Number of registers in this register bank
+// (3) Number of components in the array (1-4). 1 means .x, 2 means .xy etc.
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Constant Buffer Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER
+// [11]    D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN
+// [23:12] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 1 operand on Shader Model 4.0 through 5.0:
+// (1) Operand, starting with OperandToken0, defining which CB slot (cb#[size])
+//     is being declared. (operand type: D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER)
+//     The indexing dimension for the register must be 
+//     D3D10_SB_OPERAND_INDEX_DIMENSION_2D, where the first index specifies
+//     which cb#[] is being declared, and the second (array) index specifies the size 
+//     of the buffer, as a count of 32-bit*4 elements.  (As opposed to when the 
+//     cb#[] is used in shader instructions, and the array index represents which 
+//     location in the constant buffer is being referenced.)
+//     If the size is specified as 0, the CB size is not known (any size CB
+//     can be bound to the slot).
+//
+// The order of constant buffer declarations in a shader indicates their
+// relative priority from highest to lowest (hint to driver).
+// 
+// OpcodeToken0 is followed by 3 operands on Shader Model 5.1 and later:
+// (1) Operand, starting with OperandToken0, defining which CB range (ID and bounds)
+//     is being declared. (operand type: D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER)
+//     The indexing dimension for the register must be D3D10_SB_OPERAND_INDEX_DIMENSION_3D, 
+//     and the meaning of the index dimensions are as follows: (cb<id>[<lbound>:<ubound>])
+//       1 <id>:     variable ID being declared
+//       2 <lbound>: the lower bound of the range of constant buffers in the space
+//       3 <ubound>: the upper bound (inclusive) of this range
+//     As opposed to when the cb#[] is used in shader instructions: (cb<id>[<idx>][<loc>])
+//       1 <id>:  variable ID being used (matches dcl)
+//       2 <idx>: absolute index of constant buffer within space (may be dynamically indexed)
+//       3 <loc>: location of vector within constant buffer being referenced,
+//          which may also be dynamically indexed, with no access pattern flag required.
+// (2) a DWORD indicating the size of the constant buffer as a count of 16-byte vectors.
+//     Each vector is 32-bit*4 elements == 128-bits == 16 bytes.
+//     If the size is specified as 0, the CB size is not known (any size CB
+//     can be bound to the slot).
+// (3) a DWORD indicating the space index.
+//
+// ----------------------------------------------------------------------------
+
+typedef enum D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN
+{
+    D3D10_SB_CONSTANT_BUFFER_IMMEDIATE_INDEXED  = 0,
+    D3D10_SB_CONSTANT_BUFFER_DYNAMIC_INDEXED    = 1
+} D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN;
+
+#define D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN_MASK  0x00000800
+#define D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN_SHIFT 11
+
+// DECODER MACRO: Find out if a Constant Buffer is going to be indexed or not
+#define DECODE_D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN(OpcodeToken0) ((D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN)(((OpcodeToken0)&D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN_MASK)>>D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN_SHIFT))
+
+// ENCODER MACRO: Encode the access pattern for the Constant Buffer
+#define ENCODE_D3D10_SB_D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN(AccessPattern) (((AccessPattern)<<D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN_SHIFT)&D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN_MASK)
+
+// ----------------------------------------------------------------------------
+// Immediate Constant Buffer Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_CUSTOMDATA
+// [31:11] == D3D10_SB_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER
+//
+// OpcodeToken0 is followed by:
+// (1) DWORD indicating length of declaration, including OpcodeToken0.
+//     This length must = 2(for OpcodeToken0 and 1) + a multiple of 4 
+//                                                    (# of immediate constants)
+// (2) Sequence of 4-tuples of DWORDs defining the Immediate Constant Buffer.
+//     The number of 4-tuples is (length above - 1) / 4
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Shader Message Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_CUSTOMDATA
+// [31:11] == D3D11_SB_CUSTOMDATA_SHADER_MESSAGE
+//
+// OpcodeToken0 is followed by:
+// (1) DWORD indicating length of declaration, including OpcodeToken0.
+// (2) DWORD (D3D11_SB_SHADER_MESSAGE_ID) indicating shader message or error.
+// (3) D3D11_SB_SHADER_MESSAGE_FORMAT indicating the convention for formatting the message.
+// (4) DWORD indicating the number of characters in the string without the terminator.
+// (5) DWORD indicating the number of operands.
+// (6) DWORD indicating length of operands.
+// (7) Encoded operands.
+// (8) String with trailing zero, padded to a multiple of DWORDs.
+//     The string is in the given format and the operands given should
+//     be used for argument substitutions when formatting.
+// ----------------------------------------------------------------------------
+
+typedef enum D3D11_SB_SHADER_MESSAGE_ID
+{
+    D3D11_SB_SHADER_MESSAGE_ID_MESSAGE = 0x00200102,
+    D3D11_SB_SHADER_MESSAGE_ID_ERROR = 0x00200103
+} D3D11_SB_SHADER_MESSAGE_ID;
+
+typedef enum D3D11_SB_SHADER_MESSAGE_FORMAT
+{
+    // No formatting, just a text string.  Operands are ignored.
+    D3D11_SB_SHADER_MESSAGE_FORMAT_ANSI_TEXT,
+    // Format string follows C/C++ printf conventions.
+    D3D11_SB_SHADER_MESSAGE_FORMAT_ANSI_PRINTF,
+} D3D11_SB_SHADER_MESSAGE_FORMAT;
+
+// ----------------------------------------------------------------------------
+// Shader Clip Plane Constant Mappings for DX9 hardware
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_CUSTOMDATA
+// [31:11] == D3D11_SB_CUSTOMDATA_SHADER_CLIP_PLANE_CONSTANT_MAPPINGS_FOR_DX9
+//
+// OpcodeToken0 is followed by:
+// (1) DWORD indicating length of declaration, including OpcodeToken0.
+// (2) DWORD indicating number of constant mappings (up to 6 mappings).
+// (3+) Constant mapping tables in following format.
+//
+// struct _Clip_Plane_Constant_Mapping
+// {
+//     WORD ConstantBufferIndex;  // cb[n]
+//     WORD StartConstantElement; // starting index of cb[n][m]
+//     WORD ConstantElemntCount;  // number of elements cb[n][m] ~ cb[n][m+l]
+//     WORD Reserved;             //
+// };
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Geometry Shader Input Primitive Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_GS_INPUT_PRIMITIVE
+// [16:11] D3D10_SB_PRIMITIVE [not D3D10_SB_PRIMITIVE_TOPOLOGY]
+// [23:17] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token. == 1
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// ----------------------------------------------------------------------------
+
+#define D3D10_SB_GS_INPUT_PRIMITIVE_MASK  0x0001f800
+#define D3D10_SB_GS_INPUT_PRIMITIVE_SHIFT 11
+
+// DECODER MACRO: Given a primitive topology declaration,
+// (OpcodeToken0), determine the primitive topology
+// (D3D10_SB_PRIMITIVE enum)
+#define DECODE_D3D10_SB_GS_INPUT_PRIMITIVE(OpcodeToken0) ((D3D10_SB_PRIMITIVE)(((OpcodeToken0)&D3D10_SB_GS_INPUT_PRIMITIVE_MASK)>>D3D10_SB_GS_INPUT_PRIMITIVE_SHIFT))
+
+// ENCODER MACRO: Store primitive topology
+// (D3D10_SB_PRIMITIVE enum) into a
+// a primitive topology declaration token (OpcodeToken0)
+#define ENCODE_D3D10_SB_GS_INPUT_PRIMITIVE(Prim) (((Prim)<<D3D10_SB_GS_INPUT_PRIMITIVE_SHIFT)&D3D10_SB_GS_INPUT_PRIMITIVE_MASK)
+
+// ----------------------------------------------------------------------------
+// Geometry Shader Output Topology Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY
+// [17:11] D3D10_SB_PRIMITIVE_TOPOLOGY
+// [23:18] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token. == 1
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// ----------------------------------------------------------------------------
+
+#define D3D10_SB_GS_OUTPUT_PRIMITIVE_TOPOLOGY_MASK  0x0001f800
+#define D3D10_SB_GS_OUTPUT_PRIMITIVE_TOPOLOGY_SHIFT 11
+
+// DECODER MACRO: Given a primitive topology declaration,
+// (OpcodeToken0), determine the primitive topology
+// (D3D10_SB_PRIMITIVE_TOPOLOGY enum)
+#define DECODE_D3D10_SB_GS_OUTPUT_PRIMITIVE_TOPOLOGY(OpcodeToken0) ((D3D10_SB_PRIMITIVE_TOPOLOGY)(((OpcodeToken0)&D3D10_SB_GS_OUTPUT_PRIMITIVE_TOPOLOGY_MASK)>>D3D10_SB_GS_OUTPUT_PRIMITIVE_TOPOLOGY_SHIFT))
+
+// ENCODER MACRO: Store primitive topology
+// (D3D10_SB_PRIMITIVE_TOPOLOGY enum) into a
+// a primitive topology declaration token (OpcodeToken0)
+#define ENCODE_D3D10_SB_GS_OUTPUT_PRIMITIVE_TOPOLOGY(PrimTopology) (((PrimTopology)<<D3D10_SB_GS_OUTPUT_PRIMITIVE_TOPOLOGY_SHIFT)&D3D10_SB_GS_OUTPUT_PRIMITIVE_TOPOLOGY_MASK)
+
+// ----------------------------------------------------------------------------
+// Geometry Shader Maximum Output Vertex Count Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by a DWORD representing the
+// maximum number of primitives that could be output
+// by the Geometry Shader.
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Geometry Shader Instance Count Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D11_SB_OPCODE_DCL_GS_INSTANCE_COUNT
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by a UINT32 representing the
+// number of instances of the geometry shader program to execute.
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Hull Shader Declaration Phase: HS/DS Input Control Point Count
+//
+// OpcodeToken0:
+//
+// [10:00] D3D11_SB_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT
+// [16:11] Control point count 
+// [23:17] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token. == 1
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// ----------------------------------------------------------------------------
+#define D3D11_SB_INPUT_CONTROL_POINT_COUNT_MASK  0x0001f800
+#define D3D11_SB_INPUT_CONTROL_POINT_COUNT_SHIFT 11
+
+// DECODER MACRO: Given an input control point count declaration token,
+// (OpcodeToken0), determine the control point count
+#define DECODE_D3D11_SB_INPUT_CONTROL_POINT_COUNT(OpcodeToken0) ((UINT)(((OpcodeToken0)&D3D11_SB_INPUT_CONTROL_POINT_COUNT_MASK)>>D3D11_SB_INPUT_CONTROL_POINT_COUNT_SHIFT))
+
+// ENCODER MACRO: Store input control point count into a declaration token
+#define ENCODE_D3D11_SB_INPUT_CONTROL_POINT_COUNT(Count) (((Count)<<D3D11_SB_INPUT_CONTROL_POINT_COUNT_SHIFT)&D3D11_SB_INPUT_CONTROL_POINT_COUNT_MASK)
+
+// ----------------------------------------------------------------------------
+// Hull Shader Declaration Phase: HS Output Control Point Count
+//
+// OpcodeToken0:
+//
+// [10:00] D3D11_SB_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT
+// [16:11] Control point count 
+// [23:17] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token. == 1
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// ----------------------------------------------------------------------------
+#define D3D11_SB_OUTPUT_CONTROL_POINT_COUNT_MASK  0x0001f800
+#define D3D11_SB_OUTPUT_CONTROL_POINT_COUNT_SHIFT 11
+
+// DECODER MACRO: Given an output control point count declaration token,
+// (OpcodeToken0), determine the control point count
+#define DECODE_D3D11_SB_OUTPUT_CONTROL_POINT_COUNT(OpcodeToken0) ((UINT)(((OpcodeToken0)&D3D11_SB_OUTPUT_CONTROL_POINT_COUNT_MASK)>>D3D11_SB_OUTPUT_CONTROL_POINT_COUNT_SHIFT))
+
+// ENCODER MACRO: Store output control point count into a declaration token
+#define ENCODE_D3D11_SB_OUTPUT_CONTROL_POINT_COUNT(Count) (((Count)<<D3D11_SB_OUTPUT_CONTROL_POINT_COUNT_SHIFT)&D3D11_SB_OUTPUT_CONTROL_POINT_COUNT_MASK)
+
+// ----------------------------------------------------------------------------
+// Hull Shader Declaration Phase: Tessellator Domain
+//
+// OpcodeToken0:
+//
+// [10:00] D3D11_SB_OPCODE_DCL_TESS_DOMAIN
+// [12:11] Domain
+// [23:13] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token. == 1
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// ----------------------------------------------------------------------------
+typedef enum D3D11_SB_TESSELLATOR_DOMAIN
+{
+    D3D11_SB_TESSELLATOR_DOMAIN_UNDEFINED = 0,
+    D3D11_SB_TESSELLATOR_DOMAIN_ISOLINE   = 1,
+    D3D11_SB_TESSELLATOR_DOMAIN_TRI       = 2,
+    D3D11_SB_TESSELLATOR_DOMAIN_QUAD      = 3
+} D3D11_SB_TESSELLATOR_DOMAIN;
+
+#define D3D11_SB_TESS_DOMAIN_MASK  0x00001800
+#define D3D11_SB_TESS_DOMAIN_SHIFT 11
+
+// DECODER MACRO: Given a tessellator domain declaration,
+// (OpcodeToken0), determine the domain
+// (D3D11_SB_TESSELLATOR_DOMAIN enum)
+#define DECODE_D3D11_SB_TESS_DOMAIN(OpcodeToken0) ((D3D11_SB_TESSELLATOR_DOMAIN)(((OpcodeToken0)&D3D11_SB_TESS_DOMAIN_MASK)>>D3D11_SB_TESS_DOMAIN_SHIFT))
+
+// ENCODER MACRO: Store tessellator domain
+// (D3D11_SB_TESSELLATOR_DOMAIN enum) into a
+// a tessellator domain declaration token (OpcodeToken0)
+#define ENCODE_D3D11_SB_TESS_DOMAIN(Domain) (((Domain)<<D3D11_SB_TESS_DOMAIN_SHIFT)&D3D11_SB_TESS_DOMAIN_MASK)
+
+// ----------------------------------------------------------------------------
+// Hull Shader Declaration Phase: Tessellator Partitioning
+//
+// OpcodeToken0:
+//
+// [10:00] D3D11_SB_OPCODE_DCL_TESS_PARTITIONING
+// [13:11] Partitioning
+// [23:14] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token. == 1
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// ----------------------------------------------------------------------------
+typedef enum D3D11_SB_TESSELLATOR_PARTITIONING
+{
+    D3D11_SB_TESSELLATOR_PARTITIONING_UNDEFINED       = 0,
+    D3D11_SB_TESSELLATOR_PARTITIONING_INTEGER         = 1,
+    D3D11_SB_TESSELLATOR_PARTITIONING_POW2            = 2,
+    D3D11_SB_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD  = 3,
+    D3D11_SB_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN = 4
+} D3D11_SB_TESSELLATOR_PARTITIONING;
+
+#define D3D11_SB_TESS_PARTITIONING_MASK  0x00003800
+#define D3D11_SB_TESS_PARTITIONING_SHIFT 11
+
+// DECODER MACRO: Given a tessellator partitioning declaration,
+// (OpcodeToken0), determine the domain
+// (D3D11_SB_TESSELLATOR_PARTITIONING enum)
+#define DECODE_D3D11_SB_TESS_PARTITIONING(OpcodeToken0) ((D3D11_SB_TESSELLATOR_PARTITIONING)(((OpcodeToken0)&D3D11_SB_TESS_PARTITIONING_MASK)>>D3D11_SB_TESS_PARTITIONING_SHIFT))
+
+// ENCODER MACRO: Store tessellator partitioning
+// (D3D11_SB_TESSELLATOR_PARTITIONING enum) into a
+// a tessellator partitioning declaration token (OpcodeToken0)
+#define ENCODE_D3D11_SB_TESS_PARTITIONING(Partitioning) (((Partitioning)<<D3D11_SB_TESS_PARTITIONING_SHIFT)&D3D11_SB_TESS_PARTITIONING_MASK)
+
+// ----------------------------------------------------------------------------
+// Hull Shader Declaration Phase: Tessellator Output Primitive
+//
+// OpcodeToken0:
+//
+// [10:00] D3D11_SB_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE
+// [13:11] Output Primitive
+// [23:14] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token. == 1
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// ----------------------------------------------------------------------------
+typedef enum D3D11_SB_TESSELLATOR_OUTPUT_PRIMITIVE
+{
+    D3D11_SB_TESSELLATOR_OUTPUT_UNDEFINED     = 0,
+    D3D11_SB_TESSELLATOR_OUTPUT_POINT         = 1,
+    D3D11_SB_TESSELLATOR_OUTPUT_LINE          = 2,
+    D3D11_SB_TESSELLATOR_OUTPUT_TRIANGLE_CW   = 3,
+    D3D11_SB_TESSELLATOR_OUTPUT_TRIANGLE_CCW  = 4
+} D3D11_SB_TESSELLATOR_OUTPUT_PRIMITIVE;
+
+#define D3D11_SB_TESS_OUTPUT_PRIMITIVE_MASK  0x00003800
+#define D3D11_SB_TESS_OUTPUT_PRIMITIVE_SHIFT 11
+
+// DECODER MACRO: Given a tessellator output primitive declaration,
+// (OpcodeToken0), determine the domain
+// (D3D11_SB_TESSELLATOR_OUTPUT_PRIMITIVE enum)
+#define DECODE_D3D11_SB_TESS_OUTPUT_PRIMITIVE(OpcodeToken0) ((D3D11_SB_TESSELLATOR_OUTPUT_PRIMITIVE)(((OpcodeToken0)&D3D11_SB_TESS_OUTPUT_PRIMITIVE_MASK)>>D3D11_SB_TESS_OUTPUT_PRIMITIVE_SHIFT))
+
+// ENCODER MACRO: Store tessellator output primitive
+// (D3D11_SB_TESSELLATOR_OUTPUT_PRIMITIVE enum) into a
+// a tessellator output primitive declaration token (OpcodeToken0)
+#define ENCODE_D3D11_SB_TESS_OUTPUT_PRIMITIVE(OutputPrimitive) (((OutputPrimitive)<<D3D11_SB_TESS_OUTPUT_PRIMITIVE_SHIFT)&D3D11_SB_TESS_OUTPUT_PRIMITIVE_MASK)
+
+
+// ----------------------------------------------------------------------------
+// Hull Shader Declaration Phase: Hull Shader Max Tessfactor
+//
+// OpcodeToken0:
+//
+// [10:00] D3D11_SB_OPCODE_DCL_HS_MAX_TESSFACTOR
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by a float32 representing the
+// maximum TessFactor.
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Hull Shader Declaration Phase: Hull Shader Fork Phase Instance Count
+//
+// OpcodeToken0:
+//
+// [10:00] D3D11_SB_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by a UINT32 representing the
+// number of instances of the current fork phase program to execute.
+//
+// ----------------------------------------------------------------------------
+
+typedef enum D3D10_SB_INTERPOLATION_MODE
+{
+    D3D10_SB_INTERPOLATION_UNDEFINED = 0,
+    D3D10_SB_INTERPOLATION_CONSTANT = 1,
+    D3D10_SB_INTERPOLATION_LINEAR = 2,
+    D3D10_SB_INTERPOLATION_LINEAR_CENTROID = 3,
+    D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE = 4,
+    D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID = 5,
+    D3D10_SB_INTERPOLATION_LINEAR_SAMPLE = 6, // DX10.1
+    D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE = 7, // DX10.1
+} D3D10_SB_INTERPOLATION_MODE;
+
+// Keep PRIMITIVE_TOPOLOGY values in sync with earlier DX versions (HW consumes values directly).
+typedef enum D3D10_SB_PRIMITIVE_TOPOLOGY
+{
+    D3D10_SB_PRIMITIVE_TOPOLOGY_UNDEFINED = 0,
+    D3D10_SB_PRIMITIVE_TOPOLOGY_POINTLIST = 1,
+    D3D10_SB_PRIMITIVE_TOPOLOGY_LINELIST = 2,
+    D3D10_SB_PRIMITIVE_TOPOLOGY_LINESTRIP = 3,
+    D3D10_SB_PRIMITIVE_TOPOLOGY_TRIANGLELIST = 4,
+    D3D10_SB_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP = 5,
+    // 6 is reserved for legacy triangle fans
+    // Adjacency values should be equal to (0x8 & non-adjacency):
+    D3D10_SB_PRIMITIVE_TOPOLOGY_LINELIST_ADJ = 10,
+    D3D10_SB_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ = 11,
+    D3D10_SB_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ = 12,
+    D3D10_SB_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ = 13,
+} D3D10_SB_PRIMITIVE_TOPOLOGY;
+
+typedef enum D3D10_SB_PRIMITIVE
+{
+    D3D10_SB_PRIMITIVE_UNDEFINED = 0,
+    D3D10_SB_PRIMITIVE_POINT = 1,
+    D3D10_SB_PRIMITIVE_LINE = 2,
+    D3D10_SB_PRIMITIVE_TRIANGLE = 3,
+    // Adjacency values should be equal to (0x4 & non-adjacency):
+    D3D10_SB_PRIMITIVE_LINE_ADJ = 6,
+    D3D10_SB_PRIMITIVE_TRIANGLE_ADJ = 7,
+    D3D11_SB_PRIMITIVE_1_CONTROL_POINT_PATCH = 8,
+    D3D11_SB_PRIMITIVE_2_CONTROL_POINT_PATCH = 9,
+    D3D11_SB_PRIMITIVE_3_CONTROL_POINT_PATCH = 10,
+    D3D11_SB_PRIMITIVE_4_CONTROL_POINT_PATCH = 11,
+    D3D11_SB_PRIMITIVE_5_CONTROL_POINT_PATCH = 12,
+    D3D11_SB_PRIMITIVE_6_CONTROL_POINT_PATCH = 13,
+    D3D11_SB_PRIMITIVE_7_CONTROL_POINT_PATCH = 14,
+    D3D11_SB_PRIMITIVE_8_CONTROL_POINT_PATCH = 15,
+    D3D11_SB_PRIMITIVE_9_CONTROL_POINT_PATCH = 16,
+    D3D11_SB_PRIMITIVE_10_CONTROL_POINT_PATCH = 17,
+    D3D11_SB_PRIMITIVE_11_CONTROL_POINT_PATCH = 18,
+    D3D11_SB_PRIMITIVE_12_CONTROL_POINT_PATCH = 19,
+    D3D11_SB_PRIMITIVE_13_CONTROL_POINT_PATCH = 20,
+    D3D11_SB_PRIMITIVE_14_CONTROL_POINT_PATCH = 21,
+    D3D11_SB_PRIMITIVE_15_CONTROL_POINT_PATCH = 22,
+    D3D11_SB_PRIMITIVE_16_CONTROL_POINT_PATCH = 23,
+    D3D11_SB_PRIMITIVE_17_CONTROL_POINT_PATCH = 24,
+    D3D11_SB_PRIMITIVE_18_CONTROL_POINT_PATCH = 25,
+    D3D11_SB_PRIMITIVE_19_CONTROL_POINT_PATCH = 26,
+    D3D11_SB_PRIMITIVE_20_CONTROL_POINT_PATCH = 27,
+    D3D11_SB_PRIMITIVE_21_CONTROL_POINT_PATCH = 28,
+    D3D11_SB_PRIMITIVE_22_CONTROL_POINT_PATCH = 29,
+    D3D11_SB_PRIMITIVE_23_CONTROL_POINT_PATCH = 30,
+    D3D11_SB_PRIMITIVE_24_CONTROL_POINT_PATCH = 31,
+    D3D11_SB_PRIMITIVE_25_CONTROL_POINT_PATCH = 32,
+    D3D11_SB_PRIMITIVE_26_CONTROL_POINT_PATCH = 33,
+    D3D11_SB_PRIMITIVE_27_CONTROL_POINT_PATCH = 34,
+    D3D11_SB_PRIMITIVE_28_CONTROL_POINT_PATCH = 35,
+    D3D11_SB_PRIMITIVE_29_CONTROL_POINT_PATCH = 36,
+    D3D11_SB_PRIMITIVE_30_CONTROL_POINT_PATCH = 37,
+    D3D11_SB_PRIMITIVE_31_CONTROL_POINT_PATCH = 38,
+    D3D11_SB_PRIMITIVE_32_CONTROL_POINT_PATCH = 39,
+} D3D10_SB_PRIMITIVE;
+
+typedef enum D3D10_SB_COMPONENT_MASK
+{
+    D3D10_SB_COMPONENT_MASK_X = 1,
+    D3D10_SB_COMPONENT_MASK_Y = 2,
+    D3D10_SB_COMPONENT_MASK_Z = 4,
+    D3D10_SB_COMPONENT_MASK_W = 8,
+    D3D10_SB_COMPONENT_MASK_R = 1,
+    D3D10_SB_COMPONENT_MASK_G = 2,
+    D3D10_SB_COMPONENT_MASK_B = 4,
+    D3D10_SB_COMPONENT_MASK_A = 8,
+    D3D10_SB_COMPONENT_MASK_ALL = 15,
+} D3D10_SB_COMPONENT_MASK;
+
+typedef enum D3D10_SB_NAME
+{
+    D3D10_SB_NAME_UNDEFINED = 0,
+    D3D10_SB_NAME_POSITION = 1,
+    D3D10_SB_NAME_CLIP_DISTANCE = 2,
+    D3D10_SB_NAME_CULL_DISTANCE = 3,
+    D3D10_SB_NAME_RENDER_TARGET_ARRAY_INDEX = 4,
+    D3D10_SB_NAME_VIEWPORT_ARRAY_INDEX = 5,
+    D3D10_SB_NAME_VERTEX_ID = 6,
+    D3D10_SB_NAME_PRIMITIVE_ID = 7,
+    D3D10_SB_NAME_INSTANCE_ID = 8,
+    D3D10_SB_NAME_IS_FRONT_FACE = 9,
+    D3D10_SB_NAME_SAMPLE_INDEX = 10,
+    // The following are added for D3D11
+    D3D11_SB_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR = 11, 
+    D3D11_SB_NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR = 12, 
+    D3D11_SB_NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR = 13, 
+    D3D11_SB_NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR = 14, 
+    D3D11_SB_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR = 15, 
+    D3D11_SB_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR = 16, 
+    D3D11_SB_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR = 17, 
+    D3D11_SB_NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR = 18, 
+    D3D11_SB_NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR = 19, 
+    D3D11_SB_NAME_FINAL_TRI_INSIDE_TESSFACTOR = 20, 
+    D3D11_SB_NAME_FINAL_LINE_DETAIL_TESSFACTOR = 21,
+    D3D11_SB_NAME_FINAL_LINE_DENSITY_TESSFACTOR = 22,
+    // The following are added for D3D12
+    D3D12_SB_NAME_BARYCENTRICS = 23,
+    D3D12_SB_NAME_SHADINGRATE = 24,
+    D3D12_SB_NAME_CULLPRIMITIVE = 25,
+} D3D10_SB_NAME;
+
+typedef enum D3D10_SB_RESOURCE_DIMENSION
+{
+    D3D10_SB_RESOURCE_DIMENSION_UNKNOWN = 0,
+    D3D10_SB_RESOURCE_DIMENSION_BUFFER = 1,
+    D3D10_SB_RESOURCE_DIMENSION_TEXTURE1D = 2,
+    D3D10_SB_RESOURCE_DIMENSION_TEXTURE2D = 3,
+    D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DMS = 4,
+    D3D10_SB_RESOURCE_DIMENSION_TEXTURE3D = 5,
+    D3D10_SB_RESOURCE_DIMENSION_TEXTURECUBE = 6,
+    D3D10_SB_RESOURCE_DIMENSION_TEXTURE1DARRAY = 7,
+    D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DARRAY = 8,
+    D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DMSARRAY = 9,
+    D3D10_SB_RESOURCE_DIMENSION_TEXTURECUBEARRAY = 10,
+    D3D11_SB_RESOURCE_DIMENSION_RAW_BUFFER = 11,
+    D3D11_SB_RESOURCE_DIMENSION_STRUCTURED_BUFFER = 12,
+} D3D10_SB_RESOURCE_DIMENSION;
+
+typedef enum D3D10_SB_RESOURCE_RETURN_TYPE
+{
+    D3D10_SB_RETURN_TYPE_UNORM = 1,
+    D3D10_SB_RETURN_TYPE_SNORM = 2,
+    D3D10_SB_RETURN_TYPE_SINT = 3,
+    D3D10_SB_RETURN_TYPE_UINT = 4,
+    D3D10_SB_RETURN_TYPE_FLOAT = 5,
+    D3D10_SB_RETURN_TYPE_MIXED = 6,
+    D3D11_SB_RETURN_TYPE_DOUBLE = 7,
+    D3D11_SB_RETURN_TYPE_CONTINUED = 8,
+    D3D11_SB_RETURN_TYPE_UNUSED = 9,
+} D3D10_SB_RESOURCE_RETURN_TYPE;
+
+typedef enum D3D10_SB_REGISTER_COMPONENT_TYPE
+{
+    D3D10_SB_REGISTER_COMPONENT_UNKNOWN = 0,
+    D3D10_SB_REGISTER_COMPONENT_UINT32 = 1,
+    D3D10_SB_REGISTER_COMPONENT_SINT32 = 2,
+    D3D10_SB_REGISTER_COMPONENT_FLOAT32 = 3
+} D3D10_SB_REGISTER_COMPONENT_TYPE;
+
+typedef enum D3D10_SB_INSTRUCTION_RETURN_TYPE
+{
+    D3D10_SB_INSTRUCTION_RETURN_FLOAT      = 0,
+    D3D10_SB_INSTRUCTION_RETURN_UINT       = 1
+} D3D10_SB_INSTRUCTION_RETURN_TYPE;
+
+#define D3D10_SB_INSTRUCTION_RETURN_TYPE_MASK  0x00001800
+#define D3D10_SB_INSTRUCTION_RETURN_TYPE_SHIFT 11
+
+// DECODER MACRO: For an OpcodeToken0 with the return type 
+// determine the return type.
+#define DECODE_D3D10_SB_INSTRUCTION_RETURN_TYPE(OpcodeToken0) ((D3D10_SB_INSTRUCTION_RETURN_TYPE)(((OpcodeToken0)&D3D10_SB_INSTRUCTION_RETURN_TYPE_MASK)>>D3D10_SB_INSTRUCTION_RETURN_TYPE_SHIFT))
+// ENCODER MACRO: Encode the return type for instructions
+// in the opcode specific control range of OpcodeToken0
+#define ENCODE_D3D10_SB_INSTRUCTION_RETURN_TYPE(ReturnType) (((ReturnType)<<D3D10_SB_INSTRUCTION_RETURN_TYPE_SHIFT)&D3D10_SB_INSTRUCTION_RETURN_TYPE_MASK)
+
+// ----------------------------------------------------------------------------
+// Interface function body Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_FUNCTION_BODY
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  If it is extended, then
+//         it contains the actual instruction length in DWORDs, since
+//         it may not fit into 7 bits if enough operands are defined.
+//
+// OpcodeToken0 is followed by a DWORD that represents the function body
+// identifier.
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Interface function table Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_FUNCTION_TABLE
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  If it is extended, then
+//         it contains the actual instruction length in DWORDs, since
+//         it may not fit into 7 bits if enough functions are defined.
+//
+// OpcodeToken0 is followed by a DWORD that represents the function table
+// identifier and another DWORD (TableLength) that gives the number of
+// functions in the table.
+//
+// This is followed by TableLength DWORDs which are function body indices.
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Interface Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_DCL_INTERFACE
+// [11]    1 if the interface is indexed dynamically, 0 otherwise.
+// [23:12] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  If it is extended, then
+//         it contains the actual instruction length in DWORDs, since
+//         it may not fit into 7 bits if enough types are used.
+//
+// OpcodeToken0 is followed by a DWORD that represents the interface
+// identifier. Next is a DWORD that gives the expected function table
+// length. Then another DWORD (OpcodeToken3) with the following layout:
+//
+// [15:00] TableLength, the number of types that implement this interface
+// [31:16] ArrayLength, the number of interfaces that are defined in this array.
+//
+// This is followed by TableLength DWORDs which are function table
+// identifiers, representing possible tables for a given interface.
+//
+// ----------------------------------------------------------------------------
+
+#define D3D11_SB_INTERFACE_INDEXED_BIT_MASK  0x00000800
+#define D3D11_SB_INTERFACE_INDEXED_BIT_SHIFT 11
+
+#define D3D11_SB_INTERFACE_TABLE_LENGTH_MASK  0x0000ffff
+#define D3D11_SB_INTERFACE_TABLE_LENGTH_SHIFT 0
+
+#define D3D11_SB_INTERFACE_ARRAY_LENGTH_MASK  0xffff0000
+#define D3D11_SB_INTERFACE_ARRAY_LENGTH_SHIFT 16
+
+// get/set the indexed bit for an interface definition
+#define DECODE_D3D11_SB_INTERFACE_INDEXED_BIT(OpcodeToken0) ((((OpcodeToken0)&D3D11_SB_INTERFACE_INDEXED_BIT_MASK)>>D3D11_SB_INTERFACE_INDEXED_BIT_SHIFT) ? true : false)
+#define ENCODE_D3D11_SB_INTERFACE_INDEXED_BIT(IndexedBit) (((IndexedBit)<<D3D11_SB_INTERFACE_INDEXED_BIT_SHIFT)&D3D11_SB_INTERFACE_INDEXED_BIT_MASK)
+
+// get/set the table length for an interface definition
+#define DECODE_D3D11_SB_INTERFACE_TABLE_LENGTH(OpcodeToken0) ((UINT)(((OpcodeToken0)&D3D11_SB_INTERFACE_TABLE_LENGTH_MASK)>>D3D11_SB_INTERFACE_TABLE_LENGTH_SHIFT))
+#define ENCODE_D3D11_SB_INTERFACE_TABLE_LENGTH(TableLength) (((TableLength)<<D3D11_SB_INTERFACE_TABLE_LENGTH_SHIFT)&D3D11_SB_INTERFACE_TABLE_LENGTH_MASK)
+
+// get/set the array length for an interface definition
+#define DECODE_D3D11_SB_INTERFACE_ARRAY_LENGTH(OpcodeToken0) ((UINT)(((OpcodeToken0)&D3D11_SB_INTERFACE_ARRAY_LENGTH_MASK)>>D3D11_SB_INTERFACE_ARRAY_LENGTH_SHIFT))
+#define ENCODE_D3D11_SB_INTERFACE_ARRAY_LENGTH(ArrayLength) (((ArrayLength)<<D3D11_SB_INTERFACE_ARRAY_LENGTH_SHIFT)&D3D11_SB_INTERFACE_ARRAY_LENGTH_MASK)
+
+// ----------------------------------------------------------------------------
+// Interface call
+//
+// OpcodeToken0:
+//
+// [10:00] D3D10_SB_OPCODE_INTERFACE_CALL
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  If it is extended, then
+//         it contains the actual instruction length in DWORDs, since
+//         it may not fit into 7 bits if enough types are used.
+//
+// OpcodeToken0 is followed by a DWORD that gives the function index to
+// call in the function table specified for the given interface. 
+// Next is the interface operand.
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Thread Group Declaration (Compute Shader)
+//
+// OpcodeToken0:
+//
+// [10:00] D3D11_SB_OPCODE_DCL_THREAD_GROUP
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  If it is extended, then
+//         it contains the actual instruction length in DWORDs, since
+//         it may not fit into 7 bits if enough types are used.
+//
+// OpcodeToken0 is followed by 3 DWORDs, the Thread Group dimensions as UINT32:
+// x, y, z
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Typed Unordered Access View Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED
+// [15:11] D3D10_SB_RESOURCE_DIMENSION
+// [16:16] D3D11_SB_GLOBALLY_COHERENT_ACCESS or 0 (LOCALLY_COHERENT)
+// [17:17] D3D11_SB_RASTERIZER_ORDERED_ACCESS or 0
+// [23:18] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 2 operands on Shader Models 4.0 through 5.0:
+// (1) an operand, starting with OperandToken0, defining which
+//     u# register (D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW) is being declared.
+// (2) a Resource Return Type token (ResourceReturnTypeToken)
+//
+// OpcodeToken0 is followed by 3 operands on Shader Model 5.1 and later:
+// (1) an operand, starting with OperandToken0, defining which
+//     u# register (D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW) is being declared.
+//     The indexing dimension for the register must be D3D10_SB_OPERAND_INDEX_DIMENSION_3D, 
+//     and the meaning of the index dimensions are as follows: (u<id>[<lbound>:<ubound>])
+//       1 <id>:     variable ID being declared
+//       2 <lbound>: the lower bound of the range of UAV's in the space
+//       3 <ubound>: the upper bound (inclusive) of this range
+//     As opposed to when the u# is used in shader instructions, where the register
+//     must be D3D10_SB_OPERAND_INDEX_DIMENSION_2D, and the meaning of the index 
+//     dimensions are as follows: (u<id>[<idx>]):
+//       1 <id>:  variable ID being used (matches dcl)
+//       2 <idx>: absolute index of uav within space (may be dynamically indexed)
+// (2) a Resource Return Type token (ResourceReturnTypeToken)
+// (3) a DWORD indicating the space index.
+//
+// ----------------------------------------------------------------------------
+// UAV access scope flags
+#define D3D11_SB_GLOBALLY_COHERENT_ACCESS 0x00010000
+#define D3D11_SB_ACCESS_COHERENCY_MASK    0x00010000
+
+// DECODER MACRO: Retrieve flags for sync instruction from OpcodeToken0.
+#define DECODE_D3D11_SB_ACCESS_COHERENCY_FLAGS(OperandToken0) ((OperandToken0)&D3D11_SB_ACCESS_COHERENCY_MASK)
+
+// ENCODER MACRO: Given a set of sync instruciton flags, encode them in OpcodeToken0.
+#define ENCODE_D3D11_SB_ACCESS_COHERENCY_FLAGS(Flags) ((Flags)&D3D11_SB_ACCESS_COHERENCY_MASK)
+
+// Additional UAV access flags
+#define D3D11_SB_RASTERIZER_ORDERED_ACCESS 0x00020000
+
+// Resource flags mask.  Use to retrieve all resource flags, including the order preserving counter.
+#define D3D11_SB_RESOURCE_FLAGS_MASK    (D3D11_SB_GLOBALLY_COHERENT_ACCESS|D3D11_SB_RASTERIZER_ORDERED_ACCESS|D3D11_SB_UAV_HAS_ORDER_PRESERVING_COUNTER)
+
+// DECODER MACRO: Retrieve UAV access flags for from OpcodeToken0.
+#define DECODE_D3D11_SB_RESOURCE_FLAGS(OperandToken0) ((OperandToken0)&D3D11_SB_RESOURCE_FLAGS_MASK)
+
+// ENCODER MACRO: Given UAV access flags, encode them in OpcodeToken0.
+#define ENCODE_D3D11_SB_RESOURCE_FLAGS(Flags) ((Flags)&D3D11_SB_RESOURCE_FLAGS_MASK)
+
+// ----------------------------------------------------------------------------
+// Raw Unordered Access View Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW
+// [15:11] Ignored, 0
+// [16:16] D3D11_SB_GLOBALLY_COHERENT_ACCESS or 0 (LOCALLY_COHERENT)
+// [17:17] D3D11_SB_RASTERIZER_ORDERED_ACCESS or 0
+// [23:18] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 1 operand on Shader Models 4.0 through 5.0:
+// (1) an operand, starting with OperandToken0, defining which
+//     u# register (D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW) is being declared.
+//
+// OpcodeToken0 is followed by 2 operands on Shader Model 5.1 and later:
+// (1) an operand, starting with OperandToken0, defining which
+//     u# register (D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW) is being declared.
+//     The indexing dimension for the register must be D3D10_SB_OPERAND_INDEX_DIMENSION_3D, 
+//     and the meaning of the index dimensions are as follows: (u<id>[<lbound>:<ubound>])
+//       1 <id>:     variable ID being declared
+//       2 <lbound>: the lower bound of the range of UAV's in the space
+//       3 <ubound>: the upper bound (inclusive) of this range
+//     As opposed to when the u# is used in shader instructions, where the register
+//     must be D3D10_SB_OPERAND_INDEX_DIMENSION_2D, and the meaning of the index 
+//     dimensions are as follows: (u<id>[<idx>]):
+//       1 <id>:  variable ID being used (matches dcl)
+//       2 <idx>: absolute index of uav within space (may be dynamically indexed)
+// (2) a DWORD indicating the space index.
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Structured Unordered Access View Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED
+// [15:11] Ignored, 0
+// [16:16] D3D11_SB_GLOBALLY_COHERENT_ACCESS or 0 (LOCALLY_COHERENT)
+// [17:17] D3D11_SB_RASTERIZER_ORDERED_ACCESS or 0
+// [22:18] Ignored, 0
+// [23:23] D3D11_SB_UAV_HAS_ORDER_PRESERVING_COUNTER or 0
+//
+//            The presence of this flag means that if a UAV is bound to the
+//            corresponding slot, it must have been created with 
+//            D3D11_BUFFER_UAV_FLAG_COUNTER at the API.  Also, the shader
+//            can contain either imm_atomic_alloc or _consume instructions
+//            operating on the given UAV.
+// 
+//            If this flag is not present, the shader can still contain
+//            either imm_atomic_alloc or imm_atomic_consume instructions for
+//            this UAV.  But if such instructions are present in this case,
+//            and a UAV is bound corresponding slot, it must have been created 
+//            with the D3D11_BUFFER_UAV_FLAG_APPEND flag at the API.
+//            Append buffers have a counter as well, but values returned 
+//            to the shader are only valid for the lifetime of the shader 
+//            invocation.
+//
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 2 operands:
+// (1) an operand, starting with OperandToken0, defining which
+//     u# register (D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW) is 
+//     being declared.
+// (2) a DWORD indicating UINT32 byte stride
+//
+// OpcodeToken0 is followed by 3 operands on Shader Model 5.1 and later:
+// (1) an operand, starting with OperandToken0, defining which
+//     u# register (D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW) is being declared.
+//     The indexing dimension for the register must be D3D10_SB_OPERAND_INDEX_DIMENSION_3D, 
+//     and the meaning of the index dimensions are as follows: (u<id>[<lbound>:<ubound>])
+//       1 <id>:     variable ID being declared
+//       2 <lbound>: the lower bound of the range of UAV's in the space
+//       3 <ubound>: the upper bound (inclusive) of this range
+//     As opposed to when the u# is used in shader instructions, where the register
+//     must be D3D10_SB_OPERAND_INDEX_DIMENSION_2D, and the meaning of the index 
+//     dimensions are as follows: (u<id>[<idx>]):
+//       1 <id>:  variable ID being used (matches dcl)
+//       2 <idx>: absolute index of uav within space (may be dynamically indexed)
+// (2) a DWORD indicating UINT32 byte stride
+// (3) a DWORD indicating the space index.
+//
+// ----------------------------------------------------------------------------
+// UAV flags
+#define D3D11_SB_UAV_HAS_ORDER_PRESERVING_COUNTER 0x00800000
+#define D3D11_SB_UAV_FLAGS_MASK                   0x00800000
+
+// DECODER MACRO: Retrieve flags about UAV from OpcodeToken0.
+#define DECODE_D3D11_SB_UAV_FLAGS(OperandToken0) ((OperandToken0)&D3D11_SB_UAV_FLAGS_MASK)
+
+// ENCODER MACRO: Given a set of UAV flags, encode them in OpcodeToken0.
+#define ENCODE_D3D11_SB_UAV_FLAGS(Flags) ((Flags)&D3D11_SB_UAV_FLAGS_MASK)
+
+// ----------------------------------------------------------------------------
+// Raw Thread Group Shared Memory Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D11_SB_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 2 operands:
+// (1) an operand, starting with OperandToken0, defining which
+//     g# register (D3D11_SB_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) is being declared.
+// (2) a DWORD indicating the byte count, which must be a multiple of 4.
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Structured Thread Group Shared Memory Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D11_SB_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 3 operands:
+// (1) an operand, starting with OperandToken0, defining which
+//     g# register (D3D11_SB_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) is 
+//     being declared.
+// (2) a DWORD indicating UINT32 struct byte stride
+// (3) a DWORD indicating UINT32 struct count
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Raw Shader Resource View Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D11_SB_OPCODE_DCL_RESOURCE_RAW
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 1 operand:
+// (1) an operand, starting with OperandToken0, defining which
+//     t# register (D3D10_SB_OPERAND_TYPE_RESOURCE) is being declared.
+//
+// OpcodeToken0 is followed by 2 operands on Shader Model 5.1 and later:
+// (1) an operand, starting with OperandToken0, defining which
+//     t# register (D3D10_SB_OPERAND_TYPE_RESOURCE) is being declared.
+//     The indexing dimension for the register must be D3D10_SB_OPERAND_INDEX_DIMENSION_3D, 
+//     and the meaning of the index dimensions are as follows: (t<id>[<lbound>:<ubound>])
+//       1 <id>:     variable ID being declared
+//       2 <lbound>: the lower bound of the range of resources in the space
+//       3 <ubound>: the upper bound (inclusive) of this range
+//     As opposed to when the t# is used in shader instructions, where the register
+//     must be D3D10_SB_OPERAND_INDEX_DIMENSION_2D, and the meaning of the index 
+//     dimensions are as follows: (t<id>[<idx>]):
+//       1 <id>:  variable ID being used (matches dcl)
+//       2 <idx>: absolute index of resource within space (may be dynamically indexed)
+// (2) a DWORD indicating the space index.
+//
+// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+// Structured Shader Resource View Declaration
+//
+// OpcodeToken0:
+//
+// [10:00] D3D11_SB_OPCODE_DCL_RESOURCE_STRUCTURED
+// [23:11] Ignored, 0
+// [30:24] Instruction length in DWORDs including the opcode token.
+// [31]    0 normally. 1 if extended operand definition, meaning next DWORD
+//         contains extended operand description.  This dcl is currently not
+//         extended.
+//
+// OpcodeToken0 is followed by 2 operands:
+// (1) an operand, starting with OperandToken0, defining which
+//     g# register (D3D10_SB_OPERAND_TYPE_RESOURCE) is 
+//     being declared.
+// (2) a DWORD indicating UINT32 struct byte stride
+//
+// OpcodeToken0 is followed by 3 operands on Shader Model 5.1 and later:
+// (1) an operand, starting with OperandToken0, defining which
+//     t# register (D3D10_SB_OPERAND_TYPE_RESOURCE) is being declared.
+//     The indexing dimension for the register must be D3D10_SB_OPERAND_INDEX_DIMENSION_3D, 
+//     and the meaning of the index dimensions are as follows: (t<id>[<lbound>:<ubound>])
+//       1 <id>:     variable ID being declared
+//       2 <lbound>: the lower bound of the range of resources in the space
+//       3 <ubound>: the upper bound (inclusive) of this range
+//     As opposed to when the t# is used in shader instructions, where the register
+//     must be D3D10_SB_OPERAND_INDEX_DIMENSION_2D, and the meaning of the index 
+//     dimensions are as follows: (t<id>[<idx>]):
+//       1 <id>:  variable ID being used (matches dcl)
+//       2 <idx>: absolute index of resource within space (may be dynamically indexed)
+// (2) a DWORD indicating UINT32 struct byte stride
+// (3) a DWORD indicating the space index.
+//
+// ----------------------------------------------------------------------------
+
+#endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_GAMES) */
+#pragma endregion

+ 5 - 0
include/dxc/Test/DxcTestUtils.h

@@ -60,6 +60,9 @@ public:
   int Run();
   int Run();
 };
 };
 
 
+// wstring because most uses need UTF-16: IDxcResult output names, include handler
+typedef std::map<std::wstring, CComPtr<IDxcBlob>> FileMap;
+
 // The result of running a single command in a run pipeline
 // The result of running a single command in a run pipeline
 struct FileRunCommandResult {
 struct FileRunCommandResult {
   CComPtr<IDxcOperationResult> OpResult; // The operation result, if any.
   CComPtr<IDxcOperationResult> OpResult; // The operation result, if any.
@@ -109,6 +112,7 @@ public:
   std::string Command;      // Command to run, eg %dxc
   std::string Command;      // Command to run, eg %dxc
   std::string Arguments;    // Arguments to command
   std::string Arguments;    // Arguments to command
   LPCWSTR CommandFileName;  // File name replacement for %s
   LPCWSTR CommandFileName;  // File name replacement for %s
+  FileMap *pVFS = nullptr;  // Files in virtual file system
 
 
 private:
 private:
   FileRunCommandResult RunFileChecker(const FileRunCommandResult *Prior, LPCWSTR dumpName = nullptr);
   FileRunCommandResult RunFileChecker(const FileRunCommandResult *Prior, LPCWSTR dumpName = nullptr);
@@ -117,6 +121,7 @@ private:
   FileRunCommandResult RunOpt(dxc::DxcDllSupport &DllSupport, const FileRunCommandResult *Prior);
   FileRunCommandResult RunOpt(dxc::DxcDllSupport &DllSupport, const FileRunCommandResult *Prior);
   FileRunCommandResult RunD3DReflect(dxc::DxcDllSupport &DllSupport, const FileRunCommandResult *Prior);
   FileRunCommandResult RunD3DReflect(dxc::DxcDllSupport &DllSupport, const FileRunCommandResult *Prior);
   FileRunCommandResult RunDxr(dxc::DxcDllSupport &DllSupport, const FileRunCommandResult *Prior);
   FileRunCommandResult RunDxr(dxc::DxcDllSupport &DllSupport, const FileRunCommandResult *Prior);
+  FileRunCommandResult RunLink(dxc::DxcDllSupport &DllSupport, const FileRunCommandResult *Prior);
   FileRunCommandResult RunTee(const FileRunCommandResult *Prior);
   FileRunCommandResult RunTee(const FileRunCommandResult *Prior);
   FileRunCommandResult RunXFail(const FileRunCommandResult *Prior);
   FileRunCommandResult RunXFail(const FileRunCommandResult *Prior);
   FileRunCommandResult RunDxilVer(dxc::DxcDllSupport& DllSupport, const FileRunCommandResult* Prior);
   FileRunCommandResult RunDxilVer(dxc::DxcDllSupport& DllSupport, const FileRunCommandResult* Prior);

+ 19 - 1
include/dxc/dxcapi.h

@@ -511,6 +511,17 @@ struct IDxcValidator : public IUnknown {
     ) = 0;
     ) = 0;
 };
 };
 
 
+CROSS_PLATFORM_UUIDOF(IDxcValidator2, "458e1fd1-b1b2-4750-a6e1-9c10f03bed92")
+struct IDxcValidator2 : public IDxcValidator {
+  // Validate a shader.
+  virtual HRESULT STDMETHODCALLTYPE ValidateWithDebug(
+    _In_ IDxcBlob *pShader,                       // Shader to validate.
+    _In_ UINT32 Flags,                            // Validation flags.
+    _In_opt_ DxcBuffer *pOptDebugBitcode,         // Optional debug module bitcode to provide line numbers
+    _COM_Outptr_ IDxcOperationResult **ppResult   // Validation output status, buffer, and errors
+    ) = 0;
+};
+
 CROSS_PLATFORM_UUIDOF(IDxcContainerBuilder, "334b1f50-2292-4b35-99a1-25588d8c17fe")
 CROSS_PLATFORM_UUIDOF(IDxcContainerBuilder, "334b1f50-2292-4b35-99a1-25588d8c17fe")
 struct IDxcContainerBuilder : public IUnknown {
 struct IDxcContainerBuilder : public IUnknown {
   virtual HRESULT STDMETHODCALLTYPE Load(_In_ IDxcBlob *pDxilContainerHeader) = 0;                // Loads DxilContainer to the builder
   virtual HRESULT STDMETHODCALLTYPE Load(_In_ IDxcBlob *pDxilContainerHeader) = 0;                // Loads DxilContainer to the builder
@@ -576,12 +587,17 @@ struct IDxcVersionInfo2 : public IDxcVersionInfo {
 };
 };
 
 
 CROSS_PLATFORM_UUIDOF(IDxcVersionInfo3, "5e13e843-9d25-473c-9ad2-03b2d0b44b1e")
 CROSS_PLATFORM_UUIDOF(IDxcVersionInfo3, "5e13e843-9d25-473c-9ad2-03b2d0b44b1e")
-struct IDxcVersionInfo3 : public IDxcVersionInfo2 {
+struct IDxcVersionInfo3 : public IUnknown {
   virtual HRESULT STDMETHODCALLTYPE GetCustomVersionString(
   virtual HRESULT STDMETHODCALLTYPE GetCustomVersionString(
     _Outptr_result_z_ char **pVersionString // Custom version string for compiler. (Must be CoTaskMemFree()'d!)
     _Outptr_result_z_ char **pVersionString // Custom version string for compiler. (Must be CoTaskMemFree()'d!)
   ) = 0;
   ) = 0;
 };
 };
 
 
+struct DxcArgPair {
+  const WCHAR *pName;
+  const WCHAR *pValue;
+};
+
 CROSS_PLATFORM_UUIDOF(IDxcPdbUtils, "E6C9647E-9D6A-4C3B-B94C-524B5A6C343D")
 CROSS_PLATFORM_UUIDOF(IDxcPdbUtils, "E6C9647E-9D6A-4C3B-B94C-524B5A6C343D")
 struct IDxcPdbUtils : public IUnknown {
 struct IDxcPdbUtils : public IUnknown {
   virtual HRESULT STDMETHODCALLTYPE Load(_In_ IDxcBlob *pPdbOrDxil) = 0;
   virtual HRESULT STDMETHODCALLTYPE Load(_In_ IDxcBlob *pPdbOrDxil) = 0;
@@ -616,6 +632,8 @@ struct IDxcPdbUtils : public IUnknown {
 
 
   virtual HRESULT STDMETHODCALLTYPE SetCompiler(_In_ IDxcCompiler3 *pCompiler) = 0;
   virtual HRESULT STDMETHODCALLTYPE SetCompiler(_In_ IDxcCompiler3 *pCompiler) = 0;
   virtual HRESULT STDMETHODCALLTYPE CompileForFullPDB(_COM_Outptr_ IDxcResult **ppResult) = 0;
   virtual HRESULT STDMETHODCALLTYPE CompileForFullPDB(_COM_Outptr_ IDxcResult **ppResult) = 0;
+  virtual HRESULT STDMETHODCALLTYPE OverrideArgs(_In_ DxcArgPair *pArgPairs, UINT32 uNumArgPairs) = 0;
+  virtual HRESULT STDMETHODCALLTYPE OverrideRootSignature(_In_ const WCHAR *pRootSignature) = 0;
 };
 };
 
 
 // Note: __declspec(selectany) requires 'extern'
 // Note: __declspec(selectany) requires 'extern'

+ 29 - 0
include/dxc/dxcerrors.h

@@ -0,0 +1,29 @@
+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// dxcerror.h                                                                //
+// Copyright (C) Microsoft Corporation. All rights reserved.                 //
+// This file is distributed under the University of Illinois Open Source     //
+// License. See LICENSE.TXT for details.                                     //
+//                                                                           //
+// Provides definition of error codes.                                        //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef __DXC_ERRORS__
+#define __DXC_ERRORS__
+
+#ifndef FACILITY_GRAPHICS
+#define FACILITY_GRAPHICS 36
+#endif
+
+#define DXC_EXCEPTION_CODE(name, status)                                 \
+    static constexpr DWORD EXCEPTION_##name =                 \
+    (0xc0000000u | (FACILITY_GRAPHICS << 16) | (0xff00u | (status & 0xffu)));
+
+DXC_EXCEPTION_CODE(LOAD_LIBRARY_FAILED, 0x00u)
+DXC_EXCEPTION_CODE(NO_HMODULE,          0x01u)
+DXC_EXCEPTION_CODE(GET_PROC_FAILED,     0x02u)
+
+#undef DXC_EXCEPTION_CODE
+
+#endif

+ 1 - 0
include/llvm/ADT/StringRef.h

@@ -64,6 +64,7 @@ namespace llvm {
 
 
     /// Construct an empty string ref.
     /// Construct an empty string ref.
     /*implicit*/ StringRef() : Data(nullptr), Length(0) {}
     /*implicit*/ StringRef() : Data(nullptr), Length(0) {}
+    StringRef(std::nullptr_t) = delete; // HLSL Change - So we don't accidentally pass `false` again
 
 
     /// Construct a string ref from a cstring.
     /// Construct a string ref from a cstring.
     /*implicit*/ StringRef(const char *Str)
     /*implicit*/ StringRef(const char *Str)

+ 2 - 0
include/llvm/IR/BasicBlock.h

@@ -245,6 +245,8 @@ public:
   inline const Instruction       &back() const { return InstList.back();  }
   inline const Instruction       &back() const { return InstList.back();  }
   inline       Instruction       &back()       { return InstList.back();  }
   inline       Instruction       &back()       { return InstList.back();  }
 
 
+  size_t compute_size_no_dbg() const; // HLSL Change - Get the size of the block without the debug insts
+
   /// \brief Return the underlying instruction list container.
   /// \brief Return the underlying instruction list container.
   ///
   ///
   /// Currently you need to access the underlying instruction list container
   /// Currently you need to access the underlying instruction list container

+ 1 - 0
include/llvm/IR/DebugInfo.h

@@ -60,6 +60,7 @@ bool stripDebugInfo(Function &F);
 
 
 /// \brief Return Debug Info Metadata Version by checking module flags.
 /// \brief Return Debug Info Metadata Version by checking module flags.
 unsigned getDebugMetadataVersionFromModule(const Module &M);
 unsigned getDebugMetadataVersionFromModule(const Module &M);
+bool hasDebugInfo(const Module &M); // HLSL Change - Helper function to check if there's real debug info (variables, types)
 
 
 /// \brief Utility to find all debug info in a module.
 /// \brief Utility to find all debug info in a module.
 ///
 ///

+ 2 - 0
include/llvm/Option/OptTable.h

@@ -133,6 +133,8 @@ public:
                    unsigned FlagsToInclude = 0,
                    unsigned FlagsToInclude = 0,
                    unsigned FlagsToExclude = 0) const;
                    unsigned FlagsToExclude = 0) const;
 
 
+  Option findOption(const char *normalizedName, unsigned FlagsToInclude = 0, unsigned FlagsToExclude = 0) const; // HLSL Change
+
   /// \brief Parse an list of arguments into an InputArgList.
   /// \brief Parse an list of arguments into an InputArgList.
   ///
   ///
   /// The resulting InputArgList will reference the strings in [\p ArgBegin,
   /// The resulting InputArgList will reference the strings in [\p ArgBegin,

+ 7 - 6
include/llvm/Support/Casting.h

@@ -16,6 +16,7 @@
 #define LLVM_SUPPORT_CASTING_H
 #define LLVM_SUPPORT_CASTING_H
 
 
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/type_traits.h"
 #include "llvm/Support/type_traits.h"
 #include <cassert>
 #include <cassert>
 
 
@@ -221,21 +222,21 @@ template <class X, class Y>
 inline typename std::enable_if<!is_simple_type<Y>::value,
 inline typename std::enable_if<!is_simple_type<Y>::value,
                                typename cast_retty<X, const Y>::ret_type>::type
                                typename cast_retty<X, const Y>::ret_type>::type
 cast(const Y &Val) {
 cast(const Y &Val) {
-  assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!");
+  llvm_cast_assert(X, Val); // HLSL change
   return cast_convert_val<
   return cast_convert_val<
       X, const Y, typename simplify_type<const Y>::SimpleType>::doit(Val);
       X, const Y, typename simplify_type<const Y>::SimpleType>::doit(Val);
 }
 }
 
 
 template <class X, class Y>
 template <class X, class Y>
 inline typename cast_retty<X, Y>::ret_type cast(Y &Val) {
 inline typename cast_retty<X, Y>::ret_type cast(Y &Val) {
-  assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!");
+  llvm_cast_assert(X, Val); // HLSL change
   return cast_convert_val<X, Y,
   return cast_convert_val<X, Y,
                           typename simplify_type<Y>::SimpleType>::doit(Val);
                           typename simplify_type<Y>::SimpleType>::doit(Val);
 }
 }
 
 
 template <class X, class Y>
 template <class X, class Y>
 inline typename cast_retty<X, Y *>::ret_type cast(Y *Val) {
 inline typename cast_retty<X, Y *>::ret_type cast(Y *Val) {
-  assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!");
+  llvm_cast_assert(X, Val); // HLSL change
   return cast_convert_val<X, Y*,
   return cast_convert_val<X, Y*,
                           typename simplify_type<Y*>::SimpleType>::doit(Val);
                           typename simplify_type<Y*>::SimpleType>::doit(Val);
 }
 }
@@ -249,7 +250,7 @@ LLVM_ATTRIBUTE_UNUSED_RESULT inline typename std::enable_if<
 cast_or_null(const Y &Val) {
 cast_or_null(const Y &Val) {
   if (!Val)
   if (!Val)
     return nullptr;
     return nullptr;
-  assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!");
+  llvm_cast_assert(X, Val); // HLSL change
   return cast<X>(Val);
   return cast<X>(Val);
 }
 }
 
 
@@ -259,7 +260,7 @@ LLVM_ATTRIBUTE_UNUSED_RESULT inline typename std::enable_if<
 cast_or_null(Y &Val) {
 cast_or_null(Y &Val) {
   if (!Val)
   if (!Val)
     return nullptr;
     return nullptr;
-  assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!");
+  llvm_cast_assert(X, Val); // HLSL change
   return cast<X>(Val);
   return cast<X>(Val);
 }
 }
 
 
@@ -267,7 +268,7 @@ template <class X, class Y>
 LLVM_ATTRIBUTE_UNUSED_RESULT inline typename cast_retty<X, Y *>::ret_type
 LLVM_ATTRIBUTE_UNUSED_RESULT inline typename cast_retty<X, Y *>::ret_type
 cast_or_null(Y *Val) {
 cast_or_null(Y *Val) {
   if (!Val) return nullptr;
   if (!Val) return nullptr;
-  assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!");
+  llvm_cast_assert(X, Val); // HLSL change
   return cast<X>(Val);
   return cast<X>(Val);
 }
 }
 
 

+ 7 - 1
include/llvm/Support/ErrorHandling.h

@@ -84,6 +84,9 @@ namespace llvm {
   LLVM_ATTRIBUTE_NORETURN void
   LLVM_ATTRIBUTE_NORETURN void
   llvm_unreachable_internal(const char *msg=nullptr, const char *file=nullptr,
   llvm_unreachable_internal(const char *msg=nullptr, const char *file=nullptr,
                             unsigned line=0);
                             unsigned line=0);
+
+  // HLSL Change - throw special exception for cast mismatch
+  void llvm_cast_assert_internal(const char *func);
 }
 }
 
 
 /// Marks that the current location is not supposed to be reachable.
 /// Marks that the current location is not supposed to be reachable.
@@ -94,7 +97,7 @@ namespace llvm {
 ///
 ///
 /// Use this instead of assert(0).  It conveys intent more clearly and
 /// Use this instead of assert(0).  It conveys intent more clearly and
 /// allows compilers to omit some unnecessary code.
 /// allows compilers to omit some unnecessary code.
-#ifndef NDEBUG
+#if 1 // HLSL Change - always throw exception with message for unreachable
 #define llvm_unreachable(msg) \
 #define llvm_unreachable(msg) \
   ::llvm::llvm_unreachable_internal(msg, __FILE__, __LINE__)
   ::llvm::llvm_unreachable_internal(msg, __FILE__, __LINE__)
 #elif defined(LLVM_BUILTIN_UNREACHABLE)
 #elif defined(LLVM_BUILTIN_UNREACHABLE)
@@ -103,4 +106,7 @@ namespace llvm {
 #define llvm_unreachable(msg) ::llvm::llvm_unreachable_internal()
 #define llvm_unreachable(msg) ::llvm::llvm_unreachable_internal()
 #endif
 #endif
 
 
+// HLSL Change - throw special exception for cast type mismatch
+#define llvm_cast_assert(X, Val) ((void)( (!!(isa<X>(Val))) || (::llvm::llvm_cast_assert_internal(__FUNCTION__), 0) ))
+
 #endif
 #endif

+ 184 - 21
lib/DXIL/DxilMetadataHelper.cpp

@@ -41,6 +41,37 @@ using std::string;
 using std::vector;
 using std::vector;
 using std::unique_ptr;
 using std::unique_ptr;
 
 
+namespace {
+void LoadSerializedRootSignature(MDNode *pNode,
+                                 std::vector<uint8_t> &SerializedRootSignature,
+                                 LLVMContext &Ctx) {
+  IFTBOOL(pNode->getNumOperands() == 1, DXC_E_INCORRECT_DXIL_METADATA);
+  const MDOperand &MDO = pNode->getOperand(0);
+
+  const ConstantAsMetadata *pMetaData = dyn_cast<ConstantAsMetadata>(MDO.get());
+  IFTBOOL(pMetaData != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
+  const ConstantDataArray *pData =
+      dyn_cast<ConstantDataArray>(pMetaData->getValue());
+  IFTBOOL(pData != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
+  IFTBOOL(pData->getElementType() == Type::getInt8Ty(Ctx),
+          DXC_E_INCORRECT_DXIL_METADATA);
+
+  SerializedRootSignature.assign(pData->getRawDataValues().begin(),
+                                 pData->getRawDataValues().end());
+}
+
+MDNode *
+EmitSerializedRootSignature(const std::vector<uint8_t> &SerializedRootSignature,
+                            LLVMContext &Ctx) {
+  if (SerializedRootSignature.empty())
+    return nullptr;
+  Constant *V = llvm::ConstantDataArray::get(
+      Ctx, llvm::ArrayRef<uint8_t>(SerializedRootSignature.data(),
+                                   SerializedRootSignature.size()));
+  return MDNode::get(Ctx, {ConstantAsMetadata::get(V)});
+}
+
+} // namespace
 
 
 namespace hlsl {
 namespace hlsl {
 
 
@@ -57,6 +88,7 @@ const char DxilMDHelper::kDxilTempAllocaMDName[]                      = "dx.temp
 const char DxilMDHelper::kDxilNonUniformAttributeMDName[]             = "dx.nonuniform";
 const char DxilMDHelper::kDxilNonUniformAttributeMDName[]             = "dx.nonuniform";
 const char DxilMDHelper::kHLDxilResourceAttributeMDName[]             = "dx.hl.resource.attribute";
 const char DxilMDHelper::kHLDxilResourceAttributeMDName[]             = "dx.hl.resource.attribute";
 const char DxilMDHelper::kDxilValidatorVersionMDName[]                = "dx.valver";
 const char DxilMDHelper::kDxilValidatorVersionMDName[]                = "dx.valver";
+const char DxilMDHelper::kDxilDxrPayloadAnnotationsMDName[]           = "dx.dxrPayloadAnnotations";
 
 
 // This named metadata is not valid in final module (should be moved to DxilContainer)
 // This named metadata is not valid in final module (should be moved to DxilContainer)
 const char DxilMDHelper::kDxilRootSignatureMDName[]                   = "dx.rootSignature";
 const char DxilMDHelper::kDxilRootSignatureMDName[]                   = "dx.rootSignature";
@@ -77,7 +109,7 @@ const char DxilMDHelper::kDxilSourceArgsOldMDName[]                   = "llvm.db
 // This is reflection-only metadata
 // This is reflection-only metadata
 const char DxilMDHelper::kDxilCountersMDName[]                        = "dx.counters";
 const char DxilMDHelper::kDxilCountersMDName[]                        = "dx.counters";
 
 
-static std::array<const char *, 7> DxilMDNames = { {
+static std::array<const char *, 8> DxilMDNames = { {
   DxilMDHelper::kDxilVersionMDName,
   DxilMDHelper::kDxilVersionMDName,
   DxilMDHelper::kDxilShaderModelMDName,
   DxilMDHelper::kDxilShaderModelMDName,
   DxilMDHelper::kDxilEntryPointsMDName,
   DxilMDHelper::kDxilEntryPointsMDName,
@@ -85,6 +117,7 @@ static std::array<const char *, 7> DxilMDNames = { {
   DxilMDHelper::kDxilTypeSystemMDName,
   DxilMDHelper::kDxilTypeSystemMDName,
   DxilMDHelper::kDxilValidatorVersionMDName,
   DxilMDHelper::kDxilValidatorVersionMDName,
   DxilMDHelper::kDxilViewIdStateMDName,
   DxilMDHelper::kDxilViewIdStateMDName,
+  DxilMDHelper::kDxilDxrPayloadAnnotationsMDName,
 }};
 }};
 
 
 DxilMDHelper::DxilMDHelper(Module *pModule, std::unique_ptr<ExtraPropertyHelper> EPH)
 DxilMDHelper::DxilMDHelper(Module *pModule, std::unique_ptr<ExtraPropertyHelper> EPH)
@@ -384,14 +417,12 @@ void DxilMDHelper::EmitRootSignature(
     return;
     return;
   }
   }
 
 
-  Constant *V = llvm::ConstantDataArray::get(
-      m_Ctx, llvm::ArrayRef<uint8_t>(SerializedRootSignature.data(),
-                                     SerializedRootSignature.size()));
+  MDNode *Node = EmitSerializedRootSignature(SerializedRootSignature, m_Ctx);
 
 
   NamedMDNode *pRootSignatureNamedMD = m_pModule->getNamedMetadata(kDxilRootSignatureMDName);
   NamedMDNode *pRootSignatureNamedMD = m_pModule->getNamedMetadata(kDxilRootSignatureMDName);
   IFTBOOL(pRootSignatureNamedMD == nullptr, DXC_E_INCORRECT_DXIL_METADATA);
   IFTBOOL(pRootSignatureNamedMD == nullptr, DXC_E_INCORRECT_DXIL_METADATA);
   pRootSignatureNamedMD = m_pModule->getOrInsertNamedMetadata(kDxilRootSignatureMDName);
   pRootSignatureNamedMD = m_pModule->getOrInsertNamedMetadata(kDxilRootSignatureMDName);
-  pRootSignatureNamedMD->addOperand(MDNode::get(m_Ctx, {ConstantAsMetadata::get(V)}));
+  pRootSignatureNamedMD->addOperand(Node);
   return ;
   return ;
 }
 }
 
 
@@ -445,22 +476,7 @@ void DxilMDHelper::LoadRootSignature(std::vector<uint8_t> &SerializedRootSignatu
   IFTBOOL(pRootSignatureNamedMD->getNumOperands() == 1, DXC_E_INCORRECT_DXIL_METADATA);
   IFTBOOL(pRootSignatureNamedMD->getNumOperands() == 1, DXC_E_INCORRECT_DXIL_METADATA);
 
 
   MDNode *pNode = pRootSignatureNamedMD->getOperand(0);
   MDNode *pNode = pRootSignatureNamedMD->getOperand(0);
-  IFTBOOL(pNode->getNumOperands() == 1, DXC_E_INCORRECT_DXIL_METADATA);
-  const MDOperand &MDO = pNode->getOperand(0);
-
-  const ConstantAsMetadata *pMetaData = dyn_cast<ConstantAsMetadata>(MDO.get());
-  IFTBOOL(pMetaData != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
-  const ConstantDataArray *pData =
-      dyn_cast<ConstantDataArray>(pMetaData->getValue());
-  IFTBOOL(pData != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
-  IFTBOOL(pData->getElementType() == Type::getInt8Ty(m_Ctx),
-          DXC_E_INCORRECT_DXIL_METADATA);
-
-  SerializedRootSignature.clear();
-  unsigned size = pData->getRawDataValues().size();
-  SerializedRootSignature.resize(size);
-  memcpy(SerializedRootSignature.data(),
-         (const uint8_t *)pData->getRawDataValues().begin(), size);
+  LoadSerializedRootSignature(pNode, SerializedRootSignature, m_Ctx);
 }
 }
 
 
 static const MDTuple *CastToTupleOrNull(const MDOperand &MDO) {
 static const MDTuple *CastToTupleOrNull(const MDOperand &MDO) {
@@ -850,6 +866,130 @@ void DxilMDHelper::LoadDxilTypeSystem(DxilTypeSystem &TypeSystem) {
   }
   }
 }
 }
 
 
+void DxilMDHelper::EmitDxrPayloadAnnotations(DxilTypeSystem &TypeSystem) {
+  auto &TypeMap = TypeSystem.GetPayloadAnnotationMap();
+  vector<Metadata *> MDVals;
+  MDVals.emplace_back(Uint32ToConstMD(kDxilPayloadAnnotationStructTag)); // Tag
+  unsigned GVIdx = 0;
+  for (auto it = TypeMap.begin(); it != TypeMap.end(); ++it, GVIdx++) {
+    StructType *pStructType = const_cast<StructType *>(it->first);
+    DxilPayloadAnnotation *pA = it->second.get();
+    // Emit struct type field annotations.
+    Metadata *pMD = EmitDxrPayloadStructAnnotation(*pA);
+
+    MDVals.push_back(ValueAsMetadata::get(UndefValue::get(pStructType)));
+    MDVals.push_back(pMD);
+  }
+
+  NamedMDNode *pDxrPayloadAnnotationsMD = m_pModule->getNamedMetadata(kDxilDxrPayloadAnnotationsMDName);
+  if (pDxrPayloadAnnotationsMD != nullptr) {
+    m_pModule->eraseNamedMetadata(pDxrPayloadAnnotationsMD);
+  }
+
+  if (MDVals.size() > 1) {
+    pDxrPayloadAnnotationsMD = m_pModule->getOrInsertNamedMetadata(kDxilDxrPayloadAnnotationsMDName);
+    pDxrPayloadAnnotationsMD->addOperand(MDNode::get(m_Ctx, MDVals));
+  }
+}
+
+Metadata *
+DxilMDHelper::EmitDxrPayloadStructAnnotation(const DxilPayloadAnnotation &SA) {
+  vector<Metadata *> MDVals;
+  MDVals.reserve(SA.GetNumFields());
+  MDVals.resize(SA.GetNumFields());
+
+  const StructType* STy = SA.GetStructType();
+  for (unsigned i = 0; i < SA.GetNumFields(); i++) {
+    MDVals[i] = EmitDxrPayloadFieldAnnotation(SA.GetFieldAnnotation(i), STy->getElementType(i));
+  }
+
+  return MDNode::get(m_Ctx, MDVals);
+}
+
+void DxilMDHelper::LoadDxrPayloadAccessQualifiers(const MDOperand &MDO,
+                                               DxilPayloadFieldAnnotation &FA) {
+  unsigned fieldBitmask = ConstMDToInt32(MDO);
+  if (fieldBitmask & ~DXIL::PayloadAccessQualifierValidMask) {
+    DXASSERT(false, "Unknown payload access qualifier bits set");
+    m_bExtraMetadata = true;
+  }
+  fieldBitmask &= DXIL::PayloadAccessQualifierValidMask;
+  FA.SetPayloadFieldQualifierMask(fieldBitmask);
+}
+
+void DxilMDHelper::LoadDxrPayloadFieldAnnoation(
+    const MDOperand &MDO, DxilPayloadFieldAnnotation &FA) {
+  IFTBOOL(MDO.get() != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
+  const MDTuple *pTupleMD = dyn_cast<MDTuple>(MDO.get()); // Tag-Value list.
+  IFTBOOL(pTupleMD != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
+  IFTBOOL((pTupleMD->getNumOperands() & 0x1) == 0, DXC_E_INCORRECT_DXIL_METADATA);
+
+  for (unsigned i = 0; i < pTupleMD->getNumOperands(); i += 2) {
+    unsigned Tag = ConstMDToUint32(pTupleMD->getOperand(i));
+    const MDOperand &MDO = pTupleMD->getOperand(i + 1);
+    IFTBOOL(MDO.get() != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
+
+    switch (Tag) {
+    case kDxilPayloadFieldAnnotationAccessTag:
+      LoadDxrPayloadAccessQualifiers(MDO, FA);
+      break;
+    default:
+      DXASSERT(false, "Unknown payload field annotation tag");
+      m_bExtraMetadata = true;
+      break;
+    }
+  }
+}
+
+void DxilMDHelper::LoadDxrPayloadFieldAnnoations(const MDOperand &MDO,
+                                                DxilPayloadAnnotation &SA) {
+  IFTBOOL(MDO.get() != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
+  const MDTuple *pTupleMD = dyn_cast<MDTuple>(MDO.get());
+  IFTBOOL(pTupleMD != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
+  IFTBOOL(pTupleMD->getNumOperands() == SA.GetNumFields(),
+          DXC_E_INCORRECT_DXIL_METADATA);
+  for (unsigned i = 0; i < SA.GetNumFields(); ++i) {
+    LoadDxrPayloadFieldAnnoation(pTupleMD->getOperand(i), SA.GetFieldAnnotation(i));
+  }
+}
+
+void DxilMDHelper::LoadDxrPayloadAnnotationNode(const llvm::MDTuple &MDT,
+                                                DxilTypeSystem &TypeSystem) {
+  unsigned Tag = ConstMDToUint32(MDT.getOperand(0));
+  IFTBOOL(Tag == kDxilPayloadAnnotationStructTag, DXC_E_INCORRECT_DXIL_METADATA)
+  IFTBOOL((MDT.getNumOperands() & 0x1) == 1, DXC_E_INCORRECT_DXIL_METADATA);
+
+  Constant *pGV = dyn_cast<Constant>(ValueMDToValue(MDT.getOperand(1)));
+  IFTBOOL(pGV != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
+  StructType *pGVType = dyn_cast<StructType>(pGV->getType());
+  IFTBOOL(pGVType != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
+
+  // Check if this struct is already part of the DXIL Type System
+  DxilPayloadAnnotation *pPA = TypeSystem.AddPayloadAnnotation(pGVType);
+
+  LoadDxrPayloadFieldAnnoations(MDT.getOperand(2), *pPA);
+}
+
+void DxilMDHelper::LoadDxrPayloadAnnotations(DxilTypeSystem &TypeSystem) {
+  NamedMDNode *pDxilPayloadAnnotationsMD =
+      m_pModule->getNamedMetadata(kDxilDxrPayloadAnnotationsMDName);
+  if (pDxilPayloadAnnotationsMD == nullptr)
+    return;
+
+  if (DXIL::CompareVersions(m_MinValMajor, m_MinValMinor, 1, 6) < 0) {
+    DXASSERT(false, "payload access qualifier emitted for dxil version < 1.6");
+    m_bExtraMetadata = true;
+  }
+  DXASSERT(pDxilPayloadAnnotationsMD->getNumOperands() != 0, "empty metadata node?");
+
+  for (unsigned i = 0; i < pDxilPayloadAnnotationsMD->getNumOperands(); i++) {
+    const MDTuple *pTupleMD =
+        dyn_cast<MDTuple>(pDxilPayloadAnnotationsMD->getOperand(i));
+    IFTBOOL(pTupleMD != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
+    LoadDxrPayloadAnnotationNode(*pTupleMD, TypeSystem);
+  }
+}
+
 Metadata *DxilMDHelper::EmitDxilTemplateArgAnnotation(const DxilTemplateArgAnnotation &annotation) {
 Metadata *DxilMDHelper::EmitDxilTemplateArgAnnotation(const DxilTemplateArgAnnotation &annotation) {
   SmallVector<Metadata *, 2> MDVals;
   SmallVector<Metadata *, 2> MDVals;
   if (annotation.IsType()) {
   if (annotation.IsType()) {
@@ -1065,6 +1205,7 @@ Metadata *DxilMDHelper::EmitDxilFieldAnnotation(const DxilFieldAnnotation &FA) {
   return MDNode::get(m_Ctx, MDVals);
   return MDNode::get(m_Ctx, MDVals);
 }
 }
 
 
+
 void DxilMDHelper::LoadDxilFieldAnnotation(const MDOperand &MDO, DxilFieldAnnotation &FA) {
 void DxilMDHelper::LoadDxilFieldAnnotation(const MDOperand &MDO, DxilFieldAnnotation &FA) {
   IFTBOOL(MDO.get() != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
   IFTBOOL(MDO.get() != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
   const MDTuple *pTupleMD = dyn_cast<MDTuple>(MDO.get());
   const MDTuple *pTupleMD = dyn_cast<MDTuple>(MDO.get());
@@ -1116,6 +1257,17 @@ void DxilMDHelper::LoadDxilFieldAnnotation(const MDOperand &MDO, DxilFieldAnnota
   }
   }
 }
 }
 
 
+Metadata *
+DxilMDHelper::EmitDxrPayloadFieldAnnotation(const DxilPayloadFieldAnnotation &FA, Type* fieldType) {
+  vector<Metadata *> MDVals; // Tag-Value list.
+  MDVals.emplace_back(Uint32ToConstMD(kDxilPayloadFieldAnnotationAccessTag));
+
+  auto mask = FA.GetPayloadFieldQualifierMask();
+  MDVals.emplace_back(Uint32ToConstMD(mask));
+
+  return MDNode::get(m_Ctx, MDVals);
+}
+
 const Function *DxilMDHelper::LoadDxilFunctionProps(const MDTuple *pProps,
 const Function *DxilMDHelper::LoadDxilFunctionProps(const MDTuple *pProps,
                                               hlsl::DxilFunctionProps *props) {
                                               hlsl::DxilFunctionProps *props) {
   unsigned idx = 0;
   unsigned idx = 0;
@@ -1345,6 +1497,13 @@ MDTuple *DxilMDHelper::EmitDxilEntryProperties(uint64_t rawShaderFlag,
         MDNode::get(m_Ctx, {Uint32ToConstMD(autoBindingSpace)}));
         MDNode::get(m_Ctx, {Uint32ToConstMD(autoBindingSpace)}));
   }
   }
 
 
+  if (!props.serializedRootSignature.empty() &&
+      DXIL::CompareVersions(m_MinValMajor, m_MinValMinor, 1, 6) > 0) {
+    MDVals.emplace_back(Uint32ToConstMD(DxilMDHelper::kDxilEntryRootSigTag));
+    MDVals.emplace_back(
+        EmitSerializedRootSignature(props.serializedRootSignature, m_Ctx));
+  }
+
   if (!MDVals.empty())
   if (!MDVals.empty())
     return MDNode::get(m_Ctx, MDVals);
     return MDNode::get(m_Ctx, MDVals);
   else
   else
@@ -1468,6 +1627,10 @@ void DxilMDHelper::LoadDxilEntryProperties(const MDOperand &MDO,
       MDNode *pNode = cast<MDNode>(MDO.get());
       MDNode *pNode = cast<MDNode>(MDO.get());
       props.waveSize = ConstMDToUint32(pNode->getOperand(0));
       props.waveSize = ConstMDToUint32(pNode->getOperand(0));
     } break;
     } break;
+    case DxilMDHelper::kDxilEntryRootSigTag: {
+      MDNode *pNode = cast<MDNode>(MDO.get());
+      LoadSerializedRootSignature(pNode, props.serializedRootSignature, m_Ctx);
+    } break;
     default:
     default:
       DXASSERT(false, "Unknown extended shader properties tag");
       DXASSERT(false, "Unknown extended shader properties tag");
       m_bExtraMetadata = true;
       m_bExtraMetadata = true;

+ 45 - 13
lib/DXIL/DxilModule.cpp

@@ -1309,8 +1309,6 @@ void DxilModule::UpdateValidatorVersionMetadata() {
 }
 }
 
 
 void DxilModule::ResetSerializedRootSignature(std::vector<uint8_t> &Value) {
 void DxilModule::ResetSerializedRootSignature(std::vector<uint8_t> &Value) {
-  m_SerializedRootSignature.clear();
-  m_SerializedRootSignature.reserve(Value.size());
   m_SerializedRootSignature.assign(Value.begin(), Value.end());
   m_SerializedRootSignature.assign(Value.begin(), Value.end());
 }
 }
 
 
@@ -1318,6 +1316,10 @@ DxilTypeSystem &DxilModule::GetTypeSystem() {
   return *m_pTypeSystem;
   return *m_pTypeSystem;
 }
 }
 
 
+const DxilTypeSystem &DxilModule::GetTypeSystem() const {
+  return *m_pTypeSystem;
+}
+
 std::vector<unsigned> &DxilModule::GetSerializedViewIdState() {
 std::vector<unsigned> &DxilModule::GetSerializedViewIdState() {
   return m_SerializedState;
   return m_SerializedState;
 }
 }
@@ -1453,6 +1455,14 @@ void DxilModule::EmitDxilMetadata() {
        (m_ValMajor > 1 || (m_ValMajor == 1 && m_ValMinor >= 1)))) {
        (m_ValMajor > 1 || (m_ValMajor == 1 && m_ValMinor >= 1)))) {
     m_pMDHelper->EmitDxilViewIdState(m_SerializedState);
     m_pMDHelper->EmitDxilViewIdState(m_SerializedState);
   }
   }
+
+  // Emit the DXR Payload Annotations only for library Dxil 1.6 and above.
+  if (m_pSM->IsLib()) {
+    if (DXIL::CompareVersions(m_DxilMajor, m_DxilMinor, 1, 6) >= 0) {
+      m_pMDHelper->EmitDxrPayloadAnnotations(GetTypeSystem());
+    }
+  }
+
   EmitLLVMUsed();
   EmitLLVMUsed();
   MDTuple *pEntry = m_pMDHelper->EmitDxilEntryPointTuple(GetEntryFunction(), m_EntryName, pMDSignatures, pMDResources, pMDProperties);
   MDTuple *pEntry = m_pMDHelper->EmitDxilEntryPointTuple(GetEntryFunction(), m_EntryName, pMDSignatures, pMDResources, pMDProperties);
   vector<MDNode *> Entries;
   vector<MDNode *> Entries;
@@ -1505,7 +1515,6 @@ bool DxilModule::HasMetadataErrors() {
 
 
 void DxilModule::LoadDxilMetadata() {
 void DxilModule::LoadDxilMetadata() {
   m_bMetadataErrors = false;
   m_bMetadataErrors = false;
-  m_pMDHelper->LoadDxilVersion(m_DxilMajor, m_DxilMinor);
   m_pMDHelper->LoadValidatorVersion(m_ValMajor, m_ValMinor);
   m_pMDHelper->LoadValidatorVersion(m_ValMajor, m_ValMinor);
   const ShaderModel *loadedSM;
   const ShaderModel *loadedSM;
   m_pMDHelper->LoadDxilShaderModel(loadedSM);
   m_pMDHelper->LoadDxilShaderModel(loadedSM);
@@ -1547,6 +1556,9 @@ void DxilModule::LoadDxilMetadata() {
 
 
   // Now that we have the UseMinPrecision flag, set shader model:
   // Now that we have the UseMinPrecision flag, set shader model:
   SetShaderModel(loadedSM, m_bUseMinPrecision);
   SetShaderModel(loadedSM, m_bUseMinPrecision);
+  // SetShaderModel will initialize m_DxilMajor/m_DxilMinor to min for SM,
+  // so, load here after shader model so it matches the metadata.
+  m_pMDHelper->LoadDxilVersion(m_DxilMajor, m_DxilMinor);
 
 
   if (loadedSM->IsLib()) {
   if (loadedSM->IsLib()) {
     for (unsigned i = 1; i < pEntries->getNumOperands(); i++) {
     for (unsigned i = 1; i < pEntries->getNumOperands(); i++) {
@@ -1607,6 +1619,17 @@ void DxilModule::LoadDxilMetadata() {
     m_pTypeSystem->GetFunctionAnnotationMap().clear();
     m_pTypeSystem->GetFunctionAnnotationMap().clear();
   }
   }
 
 
+  // Payload annotations not required for consumption of dxil.
+  try {
+    m_pMDHelper->LoadDxrPayloadAnnotations(*m_pTypeSystem.get());
+  } catch (hlsl::Exception &) {
+    m_bMetadataErrors = true;
+#ifdef DBG
+    throw;
+#endif
+    m_pTypeSystem->GetPayloadAnnotationMap().clear();
+  }
+
   m_pMDHelper->LoadRootSignature(m_SerializedRootSignature);
   m_pMDHelper->LoadRootSignature(m_SerializedRootSignature);
 
 
   m_pMDHelper->LoadDxilViewIdState(m_SerializedState);
   m_pMDHelper->LoadDxilViewIdState(m_SerializedState);
@@ -1740,20 +1763,29 @@ bool DxilModule::StripReflection() {
     // since they have not yet been converted for legacy layout.
     // since they have not yet been converted for legacy layout.
     // Keep all structs contained in any we must keep.
     // Keep all structs contained in any we must keep.
     SmallStructSetVector structsToKeep;
     SmallStructSetVector structsToKeep;
-    SmallStructSetVector structsToRemove;
-    for (auto &item : m_pTypeSystem->GetStructAnnotationMap()) {
       SmallStructSetVector containedStructs;
       SmallStructSetVector containedStructs;
-      if (!ResourceTypeRequiresTranslation(item.first, containedStructs))
-        structsToRemove.insert(item.first);
-      else
-        structsToKeep.insert(containedStructs.begin(), containedStructs.end());
+    for (auto &CBuf : GetCBuffers())
+      if (StructType *ST = dyn_cast<StructType>(CBuf->GetHLSLType()))
+        if (ResourceTypeRequiresTranslation(ST, containedStructs))
+          structsToKeep.insert(containedStructs.begin(), containedStructs.end());
+
+    for (auto &UAV : GetUAVs()) {
+      if (DXIL::IsStructuredBuffer(UAV->GetKind()))
+        if (StructType *ST = dyn_cast<StructType>(UAV->GetHLSLType()))
+          if (ResourceTypeRequiresTranslation(ST, containedStructs))
+            structsToKeep.insert(containedStructs.begin(), containedStructs.end());
     }
     }
 
 
-    for (auto Ty : structsToKeep)
-      structsToRemove.remove(Ty);
-    for (auto Ty : structsToRemove) {
-      m_pTypeSystem->GetStructAnnotationMap().erase(Ty);
+    for (auto &SRV : GetSRVs()) {
+      if (SRV->IsStructuredBuffer() || SRV->IsTBuffer())
+        if (StructType *ST = dyn_cast<StructType>(SRV->GetHLSLType()))
+          if (ResourceTypeRequiresTranslation(ST, containedStructs))
+            structsToKeep.insert(containedStructs.begin(), containedStructs.end());
     }
     }
+
+    m_pTypeSystem->GetStructAnnotationMap().remove_if([structsToKeep](
+      const std::pair<const StructType *, std::unique_ptr<DxilStructAnnotation>>
+          &I) { return !structsToKeep.count(I.first); });
   } else {
   } else {
     // Remove struct annotations.
     // Remove struct annotations.
     if (!m_pTypeSystem->GetStructAnnotationMap().empty()) {
     if (!m_pTypeSystem->GetStructAnnotationMap().empty()) {

+ 8 - 0
lib/DXIL/DxilResource.cpp

@@ -77,6 +77,14 @@ void DxilResource::SetElementStride(unsigned ElemStride) {
   m_ElementStride = ElemStride;
   m_ElementStride = ElemStride;
 }
 }
 
 
+unsigned DxilResource::GetBaseAlignLog2() const {
+  return m_baseAlignLog2;
+}
+
+void DxilResource::SetBaseAlignLog2(unsigned baseAlignLog2) {
+  m_baseAlignLog2 = baseAlignLog2;
+}
+
 DXIL::SamplerFeedbackType DxilResource::GetSamplerFeedbackType() const {
 DXIL::SamplerFeedbackType DxilResource::GetSamplerFeedbackType() const {
   return m_SamplerFeedbackType;
   return m_SamplerFeedbackType;
 }
 }

+ 3 - 1
lib/DXIL/DxilResourceProperties.cpp

@@ -155,11 +155,13 @@ DxilResourceProperties loadPropsFromResourceBase(const DxilResourceBase *Res) {
 
 
       break;
       break;
     case DXIL::ResourceKind::StructuredBuffer:
     case DXIL::ResourceKind::StructuredBuffer:
+    {
       RP.StructStrideInBytes = Res.GetElementStride();
       RP.StructStrideInBytes = Res.GetElementStride();
+      RP.Basic.BaseAlignLog2 = Res.GetBaseAlignLog2();
       break;
       break;
+    }
     case DXIL::ResourceKind::Texture2DMS:
     case DXIL::ResourceKind::Texture2DMS:
     case DXIL::ResourceKind::Texture2DMSArray:
     case DXIL::ResourceKind::Texture2DMSArray:
-      break;
     case DXIL::ResourceKind::TypedBuffer:
     case DXIL::ResourceKind::TypedBuffer:
     case DXIL::ResourceKind::Texture1D:
     case DXIL::ResourceKind::Texture1D:
     case DXIL::ResourceKind::Texture2D:
     case DXIL::ResourceKind::Texture2D:

+ 2 - 1
lib/DXIL/DxilShaderFlags.cpp

@@ -373,6 +373,7 @@ ShaderFlags ShaderFlags::CollectShaderFlags(const Function *F,
   M->GetValidatorVersion(valMajor, valMinor);
   M->GetValidatorVersion(valMajor, valMinor);
   bool hasMulticomponentUAVLoadsBackCompat = valMajor == 1 && valMinor == 0;
   bool hasMulticomponentUAVLoadsBackCompat = valMajor == 1 && valMinor == 0;
   bool hasViewportOrRTArrayIndexBackCombat = valMajor == 1 && valMinor < 4;
   bool hasViewportOrRTArrayIndexBackCombat = valMajor == 1 && valMinor < 4;
+  bool hasBarycentricsBackCompat = valMajor == 1 && valMinor < 6;
 
 
   Type *int16Ty = Type::getInt16Ty(F->getContext());
   Type *int16Ty = Type::getInt16Ty(F->getContext());
   Type *int64Ty = Type::getInt64Ty(F->getContext());
   Type *int64Ty = Type::getInt64Ty(F->getContext());
@@ -630,7 +631,7 @@ ShaderFlags ShaderFlags::CollectShaderFlags(const Function *F,
   flag.SetViewID(hasViewID);
   flag.SetViewID(hasViewID);
   flag.SetViewportAndRTArrayIndex(hasViewportOrRTArrayIndex);
   flag.SetViewportAndRTArrayIndex(hasViewportOrRTArrayIndex);
   flag.SetShadingRate(hasShadingRate);
   flag.SetShadingRate(hasShadingRate);
-  flag.SetBarycentrics(hasBarycentrics);
+  flag.SetBarycentrics(hasBarycentricsBackCompat ? false : hasBarycentrics);
   flag.SetSamplerFeedback(hasSamplerFeedback);
   flag.SetSamplerFeedback(hasSamplerFeedback);
   flag.SetRaytracingTier1_1(hasRaytracingTier1_1);
   flag.SetRaytracingTier1_1(hasRaytracingTier1_1);
   flag.SetAtomicInt64OnTypedResource(hasAtomicInt64OnTypedResource);
   flag.SetAtomicInt64OnTypedResource(hasAtomicInt64OnTypedResource);

+ 94 - 63
lib/DXIL/DxilShaderModel.cpp

@@ -63,6 +63,7 @@ bool ShaderModel::IsValidForDxil() const {
       case 4:
       case 4:
       case 5:
       case 5:
       case 6:
       case 6:
+      case 7:
       // VALRULE-TEXT:END
       // VALRULE-TEXT:END
         return true;
         return true;
       case kOfflineMinor:
       case kOfflineMinor:
@@ -94,69 +95,78 @@ const ShaderModel *ShaderModel::Get(Kind Kind, unsigned Major, unsigned Minor) {
   {1540,8}, //ps_6_4
   {1540,8}, //ps_6_4
   {1541,9}, //ps_6_5
   {1541,9}, //ps_6_5
   {1542,10}, //ps_6_6
   {1542,10}, //ps_6_6
-  {66560,11}, //vs_4_0
-  {66561,12}, //vs_4_1
-  {66816,13}, //vs_5_0
-  {66817,14}, //vs_5_1
-  {67072,15}, //vs_6_0
-  {67073,16}, //vs_6_1
-  {67074,17}, //vs_6_2
-  {67075,18}, //vs_6_3
-  {67076,19}, //vs_6_4
-  {67077,20}, //vs_6_5
-  {67078,21}, //vs_6_6
-  {132096,22}, //gs_4_0
-  {132097,23}, //gs_4_1
-  {132352,24}, //gs_5_0
-  {132353,25}, //gs_5_1
-  {132608,26}, //gs_6_0
-  {132609,27}, //gs_6_1
-  {132610,28}, //gs_6_2
-  {132611,29}, //gs_6_3
-  {132612,30}, //gs_6_4
-  {132613,31}, //gs_6_5
-  {132614,32}, //gs_6_6
-  {197888,33}, //hs_5_0
-  {197889,34}, //hs_5_1
-  {198144,35}, //hs_6_0
-  {198145,36}, //hs_6_1
-  {198146,37}, //hs_6_2
-  {198147,38}, //hs_6_3
-  {198148,39}, //hs_6_4
-  {198149,40}, //hs_6_5
-  {198150,41}, //hs_6_6
-  {263424,42}, //ds_5_0
-  {263425,43}, //ds_5_1
-  {263680,44}, //ds_6_0
-  {263681,45}, //ds_6_1
-  {263682,46}, //ds_6_2
-  {263683,47}, //ds_6_3
-  {263684,48}, //ds_6_4
-  {263685,49}, //ds_6_5
-  {263686,50}, //ds_6_6
-  {328704,51}, //cs_4_0
-  {328705,52}, //cs_4_1
-  {328960,53}, //cs_5_0
-  {328961,54}, //cs_5_1
-  {329216,55}, //cs_6_0
-  {329217,56}, //cs_6_1
-  {329218,57}, //cs_6_2
-  {329219,58}, //cs_6_3
-  {329220,59}, //cs_6_4
-  {329221,60}, //cs_6_5
-  {329222,61}, //cs_6_6
-  {394753,62}, //lib_6_1
-  {394754,63}, //lib_6_2
-  {394755,64}, //lib_6_3
-  {394756,65}, //lib_6_4
-  {394757,66}, //lib_6_5
-  {394758,67}, //lib_6_6
+  {1543,11}, //ps_6_7
+  {66560,12}, //vs_4_0
+  {66561,13}, //vs_4_1
+  {66816,14}, //vs_5_0
+  {66817,15}, //vs_5_1
+  {67072,16}, //vs_6_0
+  {67073,17}, //vs_6_1
+  {67074,18}, //vs_6_2
+  {67075,19}, //vs_6_3
+  {67076,20}, //vs_6_4
+  {67077,21}, //vs_6_5
+  {67078,22}, //vs_6_6
+  {67079,23}, //vs_6_7
+  {132096,24}, //gs_4_0
+  {132097,25}, //gs_4_1
+  {132352,26}, //gs_5_0
+  {132353,27}, //gs_5_1
+  {132608,28}, //gs_6_0
+  {132609,29}, //gs_6_1
+  {132610,30}, //gs_6_2
+  {132611,31}, //gs_6_3
+  {132612,32}, //gs_6_4
+  {132613,33}, //gs_6_5
+  {132614,34}, //gs_6_6
+  {132615,35}, //gs_6_7
+  {197888,36}, //hs_5_0
+  {197889,37}, //hs_5_1
+  {198144,38}, //hs_6_0
+  {198145,39}, //hs_6_1
+  {198146,40}, //hs_6_2
+  {198147,41}, //hs_6_3
+  {198148,42}, //hs_6_4
+  {198149,43}, //hs_6_5
+  {198150,44}, //hs_6_6
+  {198151,45}, //hs_6_7
+  {263424,46}, //ds_5_0
+  {263425,47}, //ds_5_1
+  {263680,48}, //ds_6_0
+  {263681,49}, //ds_6_1
+  {263682,50}, //ds_6_2
+  {263683,51}, //ds_6_3
+  {263684,52}, //ds_6_4
+  {263685,53}, //ds_6_5
+  {263686,54}, //ds_6_6
+  {263687,55}, //ds_6_7
+  {328704,56}, //cs_4_0
+  {328705,57}, //cs_4_1
+  {328960,58}, //cs_5_0
+  {328961,59}, //cs_5_1
+  {329216,60}, //cs_6_0
+  {329217,61}, //cs_6_1
+  {329218,62}, //cs_6_2
+  {329219,63}, //cs_6_3
+  {329220,64}, //cs_6_4
+  {329221,65}, //cs_6_5
+  {329222,66}, //cs_6_6
+  {329223,67}, //cs_6_7
+  {394753,68}, //lib_6_1
+  {394754,69}, //lib_6_2
+  {394755,70}, //lib_6_3
+  {394756,71}, //lib_6_4
+  {394757,72}, //lib_6_5
+  {394758,73}, //lib_6_6
+  {394759,74}, //lib_6_7
   // lib_6_x is for offline linking only, and relaxes restrictions
   // lib_6_x is for offline linking only, and relaxes restrictions
-  {394767,68},//lib_6_x
-  {853509,69}, //ms_6_5
-  {853510,70}, //ms_6_6
-  {919045,71}, //as_6_5
-  {919046,72}, //as_6_6
+  {394767,75},//lib_6_x
+  {853509,76}, //ms_6_5
+  {853510,77}, //ms_6_6
+  {853511,78}, //ms_6_7
+  {919045,79}, //as_6_5
+  {919046,80}, //as_6_6
+  {919047,81}, //as_6_7
   };
   };
   unsigned hash = (unsigned)Kind << 16 | Major << 8 | Minor;
   unsigned hash = (unsigned)Kind << 16 | Major << 8 | Minor;
   auto it = hashToIdxMap.find(hash);
   auto it = hashToIdxMap.find(hash);
@@ -237,6 +247,12 @@ const ShaderModel *ShaderModel::GetByName(const char *pszName) {
       break;
       break;
     }
     }
   else return GetInvalid();
   else return GetInvalid();
+  case '7':
+    if (Major == 6) {
+      Minor = 7;
+      break;
+    }
+  else return GetInvalid();
   // VALRULE-TEXT:END
   // VALRULE-TEXT:END
     case 'x':
     case 'x':
       if (kind == Kind::Library && Major == 6) {
       if (kind == Kind::Library && Major == 6) {
@@ -279,8 +295,11 @@ void ShaderModel::GetDxilVersion(unsigned &DxilMajor, unsigned &DxilMinor) const
   case 6:
   case 6:
     DxilMinor = 6;
     DxilMinor = 6;
     break;
     break;
+  case 7:
+    DxilMinor = 7;
+    break;
   case kOfflineMinor: // Always update this to highest dxil version
   case kOfflineMinor: // Always update this to highest dxil version
-    DxilMinor = 6;
+    DxilMinor = 7;
     break;
     break;
   // VALRULE-TEXT:END
   // VALRULE-TEXT:END
   default:
   default:
@@ -316,6 +335,9 @@ void ShaderModel::GetMinValidatorVersion(unsigned &ValMajor, unsigned &ValMinor)
   case 6:
   case 6:
     ValMinor = 6;
     ValMinor = 6;
     break;
     break;
+  case 7:
+    ValMinor = 7;
+    break;
   // VALRULE-TEXT:END
   // VALRULE-TEXT:END
   case kOfflineMinor:
   case kOfflineMinor:
     ValMajor = 0;
     ValMajor = 0;
@@ -365,6 +387,7 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = {
   SM(Kind::Pixel, 6, 4, "ps_6_4", 32, 8, true, true, UINT_MAX),
   SM(Kind::Pixel, 6, 4, "ps_6_4", 32, 8, true, true, UINT_MAX),
   SM(Kind::Pixel, 6, 5, "ps_6_5", 32, 8, true, true, UINT_MAX),
   SM(Kind::Pixel, 6, 5, "ps_6_5", 32, 8, true, true, UINT_MAX),
   SM(Kind::Pixel, 6, 6, "ps_6_6", 32, 8, true, true, UINT_MAX),
   SM(Kind::Pixel, 6, 6, "ps_6_6", 32, 8, true, true, UINT_MAX),
+  SM(Kind::Pixel, 6, 7, "ps_6_7", 32, 8, true, true, UINT_MAX),
   SM(Kind::Vertex, 4, 0, "vs_4_0", 16, 16, false, false, 0),
   SM(Kind::Vertex, 4, 0, "vs_4_0", 16, 16, false, false, 0),
   SM(Kind::Vertex, 4, 1, "vs_4_1", 32, 32, false, false, 0),
   SM(Kind::Vertex, 4, 1, "vs_4_1", 32, 32, false, false, 0),
   SM(Kind::Vertex, 5, 0, "vs_5_0", 32, 32, true, true, 64),
   SM(Kind::Vertex, 5, 0, "vs_5_0", 32, 32, true, true, 64),
@@ -376,6 +399,7 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = {
   SM(Kind::Vertex, 6, 4, "vs_6_4", 32, 32, true, true, UINT_MAX),
   SM(Kind::Vertex, 6, 4, "vs_6_4", 32, 32, true, true, UINT_MAX),
   SM(Kind::Vertex, 6, 5, "vs_6_5", 32, 32, true, true, UINT_MAX),
   SM(Kind::Vertex, 6, 5, "vs_6_5", 32, 32, true, true, UINT_MAX),
   SM(Kind::Vertex, 6, 6, "vs_6_6", 32, 32, true, true, UINT_MAX),
   SM(Kind::Vertex, 6, 6, "vs_6_6", 32, 32, true, true, UINT_MAX),
+  SM(Kind::Vertex, 6, 7, "vs_6_7", 32, 32, true, true, UINT_MAX),
   SM(Kind::Geometry, 4, 0, "gs_4_0", 16, 32, false, false, 0),
   SM(Kind::Geometry, 4, 0, "gs_4_0", 16, 32, false, false, 0),
   SM(Kind::Geometry, 4, 1, "gs_4_1", 32, 32, false, false, 0),
   SM(Kind::Geometry, 4, 1, "gs_4_1", 32, 32, false, false, 0),
   SM(Kind::Geometry, 5, 0, "gs_5_0", 32, 32, true, true, 64),
   SM(Kind::Geometry, 5, 0, "gs_5_0", 32, 32, true, true, 64),
@@ -387,6 +411,7 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = {
   SM(Kind::Geometry, 6, 4, "gs_6_4", 32, 32, true, true, UINT_MAX),
   SM(Kind::Geometry, 6, 4, "gs_6_4", 32, 32, true, true, UINT_MAX),
   SM(Kind::Geometry, 6, 5, "gs_6_5", 32, 32, true, true, UINT_MAX),
   SM(Kind::Geometry, 6, 5, "gs_6_5", 32, 32, true, true, UINT_MAX),
   SM(Kind::Geometry, 6, 6, "gs_6_6", 32, 32, true, true, UINT_MAX),
   SM(Kind::Geometry, 6, 6, "gs_6_6", 32, 32, true, true, UINT_MAX),
+  SM(Kind::Geometry, 6, 7, "gs_6_7", 32, 32, true, true, UINT_MAX),
   SM(Kind::Hull, 5, 0, "hs_5_0", 32, 32, true, true, 64),
   SM(Kind::Hull, 5, 0, "hs_5_0", 32, 32, true, true, 64),
   SM(Kind::Hull, 5, 1, "hs_5_1", 32, 32, true, true, 64),
   SM(Kind::Hull, 5, 1, "hs_5_1", 32, 32, true, true, 64),
   SM(Kind::Hull, 6, 0, "hs_6_0", 32, 32, true, true, UINT_MAX),
   SM(Kind::Hull, 6, 0, "hs_6_0", 32, 32, true, true, UINT_MAX),
@@ -396,6 +421,7 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = {
   SM(Kind::Hull, 6, 4, "hs_6_4", 32, 32, true, true, UINT_MAX),
   SM(Kind::Hull, 6, 4, "hs_6_4", 32, 32, true, true, UINT_MAX),
   SM(Kind::Hull, 6, 5, "hs_6_5", 32, 32, true, true, UINT_MAX),
   SM(Kind::Hull, 6, 5, "hs_6_5", 32, 32, true, true, UINT_MAX),
   SM(Kind::Hull, 6, 6, "hs_6_6", 32, 32, true, true, UINT_MAX),
   SM(Kind::Hull, 6, 6, "hs_6_6", 32, 32, true, true, UINT_MAX),
+  SM(Kind::Hull, 6, 7, "hs_6_7", 32, 32, true, true, UINT_MAX),
   SM(Kind::Domain, 5, 0, "ds_5_0", 32, 32, true, true, 64),
   SM(Kind::Domain, 5, 0, "ds_5_0", 32, 32, true, true, 64),
   SM(Kind::Domain, 5, 1, "ds_5_1", 32, 32, true, true, 64),
   SM(Kind::Domain, 5, 1, "ds_5_1", 32, 32, true, true, 64),
   SM(Kind::Domain, 6, 0, "ds_6_0", 32, 32, true, true, UINT_MAX),
   SM(Kind::Domain, 6, 0, "ds_6_0", 32, 32, true, true, UINT_MAX),
@@ -405,6 +431,7 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = {
   SM(Kind::Domain, 6, 4, "ds_6_4", 32, 32, true, true, UINT_MAX),
   SM(Kind::Domain, 6, 4, "ds_6_4", 32, 32, true, true, UINT_MAX),
   SM(Kind::Domain, 6, 5, "ds_6_5", 32, 32, true, true, UINT_MAX),
   SM(Kind::Domain, 6, 5, "ds_6_5", 32, 32, true, true, UINT_MAX),
   SM(Kind::Domain, 6, 6, "ds_6_6", 32, 32, true, true, UINT_MAX),
   SM(Kind::Domain, 6, 6, "ds_6_6", 32, 32, true, true, UINT_MAX),
+  SM(Kind::Domain, 6, 7, "ds_6_7", 32, 32, true, true, UINT_MAX),
   SM(Kind::Compute, 4, 0, "cs_4_0", 0, 0, false, false, 0),
   SM(Kind::Compute, 4, 0, "cs_4_0", 0, 0, false, false, 0),
   SM(Kind::Compute, 4, 1, "cs_4_1", 0, 0, false, false, 0),
   SM(Kind::Compute, 4, 1, "cs_4_1", 0, 0, false, false, 0),
   SM(Kind::Compute, 5, 0, "cs_5_0", 0, 0, true, true, 64),
   SM(Kind::Compute, 5, 0, "cs_5_0", 0, 0, true, true, 64),
@@ -416,18 +443,22 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = {
   SM(Kind::Compute, 6, 4, "cs_6_4", 0, 0, true, true, UINT_MAX),
   SM(Kind::Compute, 6, 4, "cs_6_4", 0, 0, true, true, UINT_MAX),
   SM(Kind::Compute, 6, 5, "cs_6_5", 0, 0, true, true, UINT_MAX),
   SM(Kind::Compute, 6, 5, "cs_6_5", 0, 0, true, true, UINT_MAX),
   SM(Kind::Compute, 6, 6, "cs_6_6", 0, 0, true, true, UINT_MAX),
   SM(Kind::Compute, 6, 6, "cs_6_6", 0, 0, true, true, UINT_MAX),
+  SM(Kind::Compute, 6, 7, "cs_6_7", 0, 0, true, true, UINT_MAX),
   SM(Kind::Library, 6, 1, "lib_6_1", 32, 32, true, true, UINT_MAX),
   SM(Kind::Library, 6, 1, "lib_6_1", 32, 32, true, true, UINT_MAX),
   SM(Kind::Library, 6, 2, "lib_6_2", 32, 32, true, true, UINT_MAX),
   SM(Kind::Library, 6, 2, "lib_6_2", 32, 32, true, true, UINT_MAX),
   SM(Kind::Library, 6, 3, "lib_6_3", 32, 32, true, true, UINT_MAX),
   SM(Kind::Library, 6, 3, "lib_6_3", 32, 32, true, true, UINT_MAX),
   SM(Kind::Library, 6, 4, "lib_6_4", 32, 32, true, true, UINT_MAX),
   SM(Kind::Library, 6, 4, "lib_6_4", 32, 32, true, true, UINT_MAX),
   SM(Kind::Library, 6, 5, "lib_6_5", 32, 32, true, true, UINT_MAX),
   SM(Kind::Library, 6, 5, "lib_6_5", 32, 32, true, true, UINT_MAX),
   SM(Kind::Library, 6, 6, "lib_6_6", 32, 32, true, true, UINT_MAX),
   SM(Kind::Library, 6, 6, "lib_6_6", 32, 32, true, true, UINT_MAX),
+  SM(Kind::Library, 6, 7, "lib_6_7", 32, 32, true, true, UINT_MAX),
   // lib_6_x is for offline linking only, and relaxes restrictions
   // lib_6_x is for offline linking only, and relaxes restrictions
   SM(Kind::Library,  6, kOfflineMinor, "lib_6_x",  32, 32,  true,  true,  UINT_MAX),
   SM(Kind::Library,  6, kOfflineMinor, "lib_6_x",  32, 32,  true,  true,  UINT_MAX),
   SM(Kind::Mesh, 6, 5, "ms_6_5", 0, 0, true, true, UINT_MAX),
   SM(Kind::Mesh, 6, 5, "ms_6_5", 0, 0, true, true, UINT_MAX),
   SM(Kind::Mesh, 6, 6, "ms_6_6", 0, 0, true, true, UINT_MAX),
   SM(Kind::Mesh, 6, 6, "ms_6_6", 0, 0, true, true, UINT_MAX),
+  SM(Kind::Mesh, 6, 7, "ms_6_7", 0, 0, true, true, UINT_MAX),
   SM(Kind::Amplification, 6, 5, "as_6_5", 0, 0, true, true, UINT_MAX),
   SM(Kind::Amplification, 6, 5, "as_6_5", 0, 0, true, true, UINT_MAX),
   SM(Kind::Amplification, 6, 6, "as_6_6", 0, 0, true, true, UINT_MAX),
   SM(Kind::Amplification, 6, 6, "as_6_6", 0, 0, true, true, UINT_MAX),
+  SM(Kind::Amplification, 6, 7, "as_6_7", 0, 0, true, true, UINT_MAX),
   // Values before Invalid must remain sorted by Kind, then Major, then Minor.
   // Values before Invalid must remain sorted by Kind, then Major, then Minor.
   SM(Kind::Invalid,  0, 0, "invalid", 0,  0,   false, false, 0),
   SM(Kind::Invalid,  0, 0, "invalid", 0,  0,   false, false, 0),
   // VALRULE-TEXT:END
   // VALRULE-TEXT:END

+ 133 - 5
lib/DXIL/DxilTypeSystem.cpp

@@ -80,7 +80,69 @@ void DxilFieldAnnotation::SetFieldName(const std::string &FieldName) { m_FieldNa
 bool DxilFieldAnnotation::IsCBVarUsed() const { return m_bCBufferVarUsed; }
 bool DxilFieldAnnotation::IsCBVarUsed() const { return m_bCBufferVarUsed; }
 void DxilFieldAnnotation::SetCBVarUsed(bool used) { m_bCBufferVarUsed = used; }
 void DxilFieldAnnotation::SetCBVarUsed(bool used) { m_bCBufferVarUsed = used; }
 
 
+//------------------------------------------------------------------------------
+//
+// DxilPayloadFieldAnnotation class methods.
+//
+bool DxilPayloadFieldAnnotation::HasCompType() const { return m_CompType.GetKind() != CompType::Kind::Invalid; }
+const CompType &DxilPayloadFieldAnnotation::GetCompType() const { return m_CompType; }
+void DxilPayloadFieldAnnotation::SetCompType(CompType::Kind kind) { m_CompType = CompType(kind); }
+uint32_t DxilPayloadFieldAnnotation::GetPayloadFieldQualifierMask() const {
+  return m_bitmask;
+}
+
+unsigned DxilPayloadFieldAnnotation::GetBitOffsetForShaderStage(DXIL::PayloadAccessShaderStage shaderStage ) {
+  unsigned bitOffset = static_cast<unsigned>(shaderStage) *
+                       DXIL::PayloadAccessQualifierBitsPerStage;
+  return bitOffset;
+}
+
+void DxilPayloadFieldAnnotation::SetPayloadFieldQualifierMask(uint32_t fieldBitmask) {
+  DXASSERT((fieldBitmask & ~DXIL::PayloadAccessQualifierValidMask) == 0,
+           "Unknown payload access qualifier bits set");
+  m_bitmask = fieldBitmask & DXIL::PayloadAccessQualifierValidMask;
+}
+
+void DxilPayloadFieldAnnotation::AddPayloadFieldQualifier(
+    DXIL::PayloadAccessShaderStage shaderStage, DXIL::PayloadAccessQualifier qualifier) {
+  unsigned accessBits = static_cast<unsigned>(qualifier);
+  DXASSERT((accessBits & ~DXIL::PayloadAccessQualifierValidMaskPerStage) == 0,
+           "Unknown payload access qualifier bits set");
+  accessBits &= DXIL::PayloadAccessQualifierValidMaskPerStage;
 
 
+  accessBits <<= GetBitOffsetForShaderStage(shaderStage);
+  m_bitmask |= accessBits;
+}
+
+DXIL::PayloadAccessQualifier DxilPayloadFieldAnnotation::GetPayloadFieldQualifier(
+    DXIL::PayloadAccessShaderStage shaderStage) const {
+
+  int bitOffset = GetBitOffsetForShaderStage(shaderStage);
+
+  // default type is always ReadWrite
+  DXIL::PayloadAccessQualifier accessType = DXIL::PayloadAccessQualifier::ReadWrite;
+
+  const unsigned readBit = static_cast<unsigned>(DXIL::PayloadAccessQualifier::Read);
+  const unsigned writeBit = static_cast<unsigned>(DXIL::PayloadAccessQualifier::Write);
+
+  unsigned accessBits = m_bitmask >> bitOffset;
+  if (accessBits & readBit) {
+    // set Read if the first bit is set
+    accessType = DXIL::PayloadAccessQualifier::Read;
+  }
+  if (accessBits & writeBit) {
+
+    // set Write only if the second bit set, if both are set set to ReadWrite
+    accessType = accessType == DXIL::PayloadAccessQualifier::ReadWrite
+                     ? DXIL::PayloadAccessQualifier::Write
+                     : DXIL::PayloadAccessQualifier::ReadWrite;
+  }
+  return accessType;
+}
+
+bool DxilPayloadFieldAnnotation::HasAnnotations() const {
+  return m_bitmask != 0;
+}
 
 
 //------------------------------------------------------------------------------
 //------------------------------------------------------------------------------
 //
 //
@@ -98,10 +160,6 @@ bool DxilTemplateArgAnnotation::IsIntegral() const { return m_Type == nullptr; }
 int64_t DxilTemplateArgAnnotation::GetIntegral() const { return m_Integral; }
 int64_t DxilTemplateArgAnnotation::GetIntegral() const { return m_Integral; }
 void DxilTemplateArgAnnotation::SetIntegral(int64_t i64) { m_Type = nullptr; m_Integral = i64; }
 void DxilTemplateArgAnnotation::SetIntegral(int64_t i64) { m_Type = nullptr; m_Integral = i64; }
 
 
-//------------------------------------------------------------------------------
-//
-// DxilStructAnnotation class methods.
-//
 unsigned DxilStructAnnotation::GetNumFields() const {
 unsigned DxilStructAnnotation::GetNumFields() const {
   return (unsigned)m_FieldAnnotations.size();
   return (unsigned)m_FieldAnnotations.size();
 }
 }
@@ -200,7 +258,30 @@ const Function *DxilFunctionAnnotation::GetFunction() const {
 
 
 //------------------------------------------------------------------------------
 //------------------------------------------------------------------------------
 //
 //
-// DxilStructAnnotationSystem class methods.
+// DxilPayloadAnnotation class methods.
+//
+unsigned DxilPayloadAnnotation::GetNumFields() const {
+  return (unsigned)m_FieldAnnotations.size();
+}
+
+DxilPayloadFieldAnnotation &DxilPayloadAnnotation::GetFieldAnnotation(unsigned FieldIdx) {
+  return m_FieldAnnotations[FieldIdx];
+}
+
+const DxilPayloadFieldAnnotation &DxilPayloadAnnotation::GetFieldAnnotation(unsigned FieldIdx) const {
+  return m_FieldAnnotations[FieldIdx];
+}
+
+const StructType *DxilPayloadAnnotation::GetStructType() const {
+  return m_pStructType;
+}
+void DxilPayloadAnnotation::SetStructType(const llvm::StructType *Ty) {
+  m_pStructType = Ty;
+}
+
+//------------------------------------------------------------------------------
+//
+// DxilTypeSystem class methods.
 //
 //
 DxilTypeSystem::DxilTypeSystem(Module *pModule)
 DxilTypeSystem::DxilTypeSystem(Module *pModule)
     : m_pModule(pModule),
     : m_pModule(pModule),
@@ -246,6 +327,53 @@ DxilTypeSystem::StructAnnotationMap &DxilTypeSystem::GetStructAnnotationMap() {
   return m_StructAnnotations;
   return m_StructAnnotations;
 }
 }
 
 
+const DxilTypeSystem::StructAnnotationMap &DxilTypeSystem::GetStructAnnotationMap() const{
+  return m_StructAnnotations;
+}
+
+DxilPayloadAnnotation *DxilTypeSystem::AddPayloadAnnotation(const StructType *pStructType) {
+  DXASSERT_NOMSG(m_PayloadAnnotations.find(pStructType) == m_PayloadAnnotations.end());
+  DxilPayloadAnnotation *pA = new DxilPayloadAnnotation();
+  m_PayloadAnnotations[pStructType] = unique_ptr<DxilPayloadAnnotation>(pA);
+  pA->m_pStructType = pStructType;
+  pA->m_FieldAnnotations.resize(pStructType->getNumElements());
+  return pA;
+}
+
+DxilPayloadAnnotation *DxilTypeSystem::GetPayloadAnnotation(const StructType *pStructType) {
+  auto it = m_PayloadAnnotations.find(pStructType);
+  if (it != m_PayloadAnnotations.end()) {
+    return it->second.get();
+  } else {
+    return nullptr;
+  }
+}
+
+const DxilPayloadAnnotation *
+DxilTypeSystem::GetPayloadAnnotation(const StructType *pStructType) const {
+  auto it = m_PayloadAnnotations.find(pStructType);
+  if (it != m_PayloadAnnotations.end()) {
+    return it->second.get();
+  } else {
+    return nullptr;
+  }
+}
+
+void DxilTypeSystem::ErasePayloadAnnotation(const StructType *pStructType) {
+  DXASSERT_NOMSG(m_StructAnnotations.count(pStructType));
+  m_PayloadAnnotations.remove_if([pStructType](
+      const std::pair<const StructType *, std::unique_ptr<DxilPayloadAnnotation>>
+          &I) { return pStructType == I.first; });
+}
+
+DxilTypeSystem::PayloadAnnotationMap &DxilTypeSystem::GetPayloadAnnotationMap() {
+  return m_PayloadAnnotations;
+}
+
+const DxilTypeSystem::PayloadAnnotationMap &DxilTypeSystem::GetPayloadAnnotationMap() const{
+  return m_PayloadAnnotations;
+}
+
 DxilFunctionAnnotation *DxilTypeSystem::AddFunctionAnnotation(const Function *pFunction) {
 DxilFunctionAnnotation *DxilTypeSystem::AddFunctionAnnotation(const Function *pFunction) {
   DXASSERT_NOMSG(m_FunctionAnnotations.find(pFunction) == m_FunctionAnnotations.end());
   DXASSERT_NOMSG(m_FunctionAnnotations.find(pFunction) == m_FunctionAnnotations.end());
   DxilFunctionAnnotation *pA = new DxilFunctionAnnotation();
   DxilFunctionAnnotation *pA = new DxilFunctionAnnotation();

+ 22 - 1
lib/DXIL/DxilUtil.cpp

@@ -14,6 +14,7 @@
 #include "dxc/DXIL/DxilUtil.h"
 #include "dxc/DXIL/DxilUtil.h"
 #include "dxc/DXIL/DxilModule.h"
 #include "dxc/DXIL/DxilModule.h"
 #include "dxc/DXIL/DxilOperations.h"
 #include "dxc/DXIL/DxilOperations.h"
+#include "dxc/HLSL/DxilConvergentName.h"
 #include "dxc/Support/Global.h"
 #include "dxc/Support/Global.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/ADT/Twine.h"
@@ -314,7 +315,7 @@ static void EmitWarningOrErrorOnGlobalVariable(llvm::LLVMContext &Ctx, GlobalVar
 
 
   if (GV) {
   if (GV) {
     Module &M = *GV->getParent();
     Module &M = *GV->getParent();
-    if (getDebugMetadataVersionFromModule(M) != 0) {
+    if (hasDebugInfo(M)) {
       DebugInfoFinder FinderObj;
       DebugInfoFinder FinderObj;
       DebugInfoFinder &Finder = FinderObj;
       DebugInfoFinder &Finder = FinderObj;
       // Debug modules have no dxil modules. Use it if you got it.
       // Debug modules have no dxil modules. Use it if you got it.
@@ -1171,6 +1172,26 @@ void ReplaceRawBufferStore64Bit(llvm::Function *F, llvm::Type *ETy, hlsl::OP *hl
   }
   }
 }
 }
 
 
+bool IsConvergentMarker(const char *Name) {
+  StringRef RName = Name;
+  return RName.startswith(kConvergentFunctionPrefix);
+}
+
+bool IsConvergentMarker(const Function *F) {
+  return F && F->getName().startswith(kConvergentFunctionPrefix);
+}
+
+bool IsConvergentMarker(Value *V) {
+  CallInst *CI = dyn_cast<CallInst>(V);
+  if (!CI)
+    return false;
+  return IsConvergentMarker(CI->getCalledFunction());
+}
+
+Value *GetConvergentSource(Value *V) {
+  return cast<CallInst>(V)->getOperand(0);
+}
+
 }
 }
 }
 }
 
 

+ 8 - 0
lib/DxcSupport/FileIOHelper.cpp

@@ -778,6 +778,14 @@ HRESULT DxcCreateBlobWithEncodingFromPinned(LPCVOID pText, UINT32 size,
   return DxcCreateBlob(pText, size, true, false, true, codePage, nullptr, pBlobEncoding);
   return DxcCreateBlob(pText, size, true, false, true, codePage, nullptr, pBlobEncoding);
 }
 }
 
 
+HRESULT DxcCreateBlobFromPinned(
+    _In_bytecount_(size) LPCVOID pText, UINT32 size,
+    _COM_Outptr_ IDxcBlob **pBlob) throw() {
+  CComPtr<IDxcBlobEncoding> pBlobEncoding;
+  DxcCreateBlob(pText, size, true, false, false, CP_ACP, nullptr, &pBlobEncoding);
+  return pBlobEncoding.QueryInterface(pBlob);
+}
+
 _Use_decl_annotations_
 _Use_decl_annotations_
 HRESULT
 HRESULT
 DxcCreateBlobWithEncodingFromStream(IStream *pStream, bool newInstanceAlways,
 DxcCreateBlobWithEncodingFromStream(IStream *pStream, bool newInstanceAlways,

+ 26 - 10
lib/DxcSupport/HLSLOptions.cpp

@@ -138,16 +138,20 @@ bool DxcOpts::IsLibraryProfile() {
   return TargetProfile.startswith("lib_");
   return TargetProfile.startswith("lib_");
 }
 }
 
 
-bool DxcOpts::IsDebugInfoEnabled() {
+bool DxcOpts::GenerateFullDebugInfo() {
   return DebugInfo;
   return DebugInfo;
 }
 }
 
 
+bool DxcOpts::GeneratePDB() {
+  return DebugInfo || SourceOnlyDebug;
+}
+
 bool DxcOpts::EmbedDebugInfo() {
 bool DxcOpts::EmbedDebugInfo() {
   return EmbedDebug;
   return EmbedDebug;
 }
 }
 
 
 bool DxcOpts::EmbedPDBName() {
 bool DxcOpts::EmbedPDBName() {
-  return IsDebugInfoEnabled() || !DebugFile.empty();
+  return GeneratePDB() || !DebugFile.empty();
 }
 }
 
 
 bool DxcOpts::DebugFileIsDirectory() {
 bool DxcOpts::DebugFileIsDirectory() {
@@ -613,6 +617,7 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
   opts.DefaultRowMajor = Args.hasFlag(OPT_Zpr, OPT_INVALID, false);
   opts.DefaultRowMajor = Args.hasFlag(OPT_Zpr, OPT_INVALID, false);
   opts.DefaultColMajor = Args.hasFlag(OPT_Zpc, OPT_INVALID, false);
   opts.DefaultColMajor = Args.hasFlag(OPT_Zpc, OPT_INVALID, false);
   opts.DumpBin = Args.hasFlag(OPT_dumpbin, OPT_INVALID, false);
   opts.DumpBin = Args.hasFlag(OPT_dumpbin, OPT_INVALID, false);
+  opts.Link = Args.hasFlag(OPT_link, OPT_INVALID, false);
   opts.NotUseLegacyCBufLoad = Args.hasFlag(OPT_no_legacy_cbuf_layout, OPT_INVALID, false);
   opts.NotUseLegacyCBufLoad = Args.hasFlag(OPT_no_legacy_cbuf_layout, OPT_INVALID, false);
   opts.NotUseLegacyCBufLoad = Args.hasFlag(OPT_not_use_legacy_cbuf_load_, OPT_INVALID, opts.NotUseLegacyCBufLoad);
   opts.NotUseLegacyCBufLoad = Args.hasFlag(OPT_not_use_legacy_cbuf_load_, OPT_INVALID, opts.NotUseLegacyCBufLoad);
   opts.PackPrefixStable = Args.hasFlag(OPT_pack_prefix_stable, OPT_INVALID, false);
   opts.PackPrefixStable = Args.hasFlag(OPT_pack_prefix_stable, OPT_INVALID, false);
@@ -627,8 +632,8 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
   opts.StripDebug = Args.hasFlag(OPT_Qstrip_debug, OPT_INVALID, false);
   opts.StripDebug = Args.hasFlag(OPT_Qstrip_debug, OPT_INVALID, false);
   opts.EmbedDebug = Args.hasFlag(OPT_Qembed_debug, OPT_INVALID, false);
   opts.EmbedDebug = Args.hasFlag(OPT_Qembed_debug, OPT_INVALID, false);
   opts.SourceInDebugModule = Args.hasFlag(OPT_Qsource_in_debug_module, OPT_INVALID, false);
   opts.SourceInDebugModule = Args.hasFlag(OPT_Qsource_in_debug_module, OPT_INVALID, false);
-  opts.SourceOnlyDebug = Args.hasFlag(OPT_Qsource_only_debug, OPT_INVALID, false);
-  opts.FullDebug = Args.hasFlag(OPT_Qfull_debug, OPT_INVALID, false);
+  opts.SourceOnlyDebug = Args.hasFlag(OPT_Zs, OPT_INVALID, false);
+  opts.PdbInPrivate = Args.hasFlag(OPT_Qpdb_in_private, OPT_INVALID, false);
   opts.StripRootSignature = Args.hasFlag(OPT_Qstrip_rootsignature, OPT_INVALID, false);
   opts.StripRootSignature = Args.hasFlag(OPT_Qstrip_rootsignature, OPT_INVALID, false);
   opts.StripPrivate = Args.hasFlag(OPT_Qstrip_priv, OPT_INVALID, false);
   opts.StripPrivate = Args.hasFlag(OPT_Qstrip_priv, OPT_INVALID, false);
   opts.StripReflection = Args.hasFlag(OPT_Qstrip_reflect, OPT_INVALID, false);
   opts.StripReflection = Args.hasFlag(OPT_Qstrip_reflect, OPT_INVALID, false);
@@ -649,7 +654,18 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
   // Lifetime markers on by default in 6.6 unless disabled explicitly
   // Lifetime markers on by default in 6.6 unless disabled explicitly
   opts.EnableLifetimeMarkers = Args.hasFlag(OPT_enable_lifetime_markers, OPT_INVALID,
   opts.EnableLifetimeMarkers = Args.hasFlag(OPT_enable_lifetime_markers, OPT_INVALID,
                                             DXIL::CompareVersions(Major, Minor, 6, 6) >= 0) &&
                                             DXIL::CompareVersions(Major, Minor, 6, 6) >= 0) &&
-                               !Args.hasFlag(OPT_disable_lifetime_markers, OPT_INVALID, false);
+                              !Args.hasFlag(OPT_disable_lifetime_markers, OPT_INVALID, false);
+  opts.EnablePayloadQualifiers = Args.hasFlag(OPT_enable_payload_qualifiers, OPT_INVALID,
+                                            DXIL::CompareVersions(Major, Minor, 6, 7) >= 0); 
+  if (DXIL::CompareVersions(Major, Minor, 6, 8) < 0) {
+     opts.EnablePayloadQualifiers &= !Args.hasFlag(OPT_disable_payload_qualifiers, OPT_INVALID, false);
+  }
+  if (opts.EnablePayloadQualifiers && DXIL::CompareVersions(Major, Minor, 6, 6) < 0) {
+    errors << "Invalid target for payload access qualifiers. Only lib_6_6 and beyond are supported.";
+    return 1;
+  }
+
+  opts.HandleExceptions = !Args.hasFlag(OPT_disable_exception_handling, OPT_INVALID, false);
 
 
   if (opts.DefaultColMajor && opts.DefaultRowMajor) {
   if (opts.DefaultColMajor && opts.DefaultRowMajor) {
     errors << "Cannot specify /Zpr and /Zpc together, use /? to get usage information";
     errors << "Cannot specify /Zpr and /Zpc together, use /? to get usage information";
@@ -923,13 +939,13 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
     return 1;
     return 1;
   }
   }
 
 
-  if (opts.FullDebug && opts.SourceOnlyDebug) {
-    errors << "Cannot specify both /Qfull_debug and /Qsource_only_debug";
+  if (opts.DebugInfo && opts.SourceOnlyDebug) {
+    errors << "Cannot specify both /Zi and /Zs";
     return 1;
     return 1;
   }
   }
 
 
   if (opts.SourceInDebugModule && opts.SourceOnlyDebug) {
   if (opts.SourceInDebugModule && opts.SourceOnlyDebug) {
-    errors << "Cannot specify both /Qsource_in_debug_module and /Qsource_only_debug";
+    errors << "Cannot specify both /Qsource_in_debug_module and /Zs";
     return 1;
     return 1;
   }
   }
 
 
@@ -940,8 +956,8 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
     return 1;
     return 1;
   }
   }
 
 
-  if (opts.DebugNameForSource && !opts.DebugInfo) {
-    errors << "/Zss requires debug info (/Zi)";
+  if (opts.DebugNameForSource && (!opts.DebugInfo && !opts.SourceOnlyDebug)) {
+    errors << "/Zss requires debug info (/Zi or /Zs)";
     return 1;
     return 1;
   }
   }
 
 

+ 5 - 11
lib/DxilContainer/DxilContainerAssembler.cpp

@@ -1531,15 +1531,10 @@ DxilContainerWriter *hlsl::NewDxilContainerWriter() {
   return new DxilContainerWriter_impl();
   return new DxilContainerWriter_impl();
 }
 }
 
 
-static bool HasDebugInfo(const Module &M) {
-  for (Module::const_named_metadata_iterator NMI = M.named_metadata_begin(),
-                                             NME = M.named_metadata_end();
-       NMI != NME; ++NMI) {
-    if (NMI->getName().startswith("llvm.dbg.")) {
-      return true;
-    }
-  }
-  return false;
+static bool HasDebugInfoOrLineNumbers(const Module &M) {
+  return
+    llvm::getDebugMetadataVersionFromModule(M) != 0 ||
+    llvm::hasDebugInfo(M);
 }
 }
 
 
 static void GetPaddedProgramPartSize(AbstractMemoryStream *pStream,
 static void GetPaddedProgramPartSize(AbstractMemoryStream *pStream,
@@ -1776,8 +1771,7 @@ void hlsl::SerializeDxilContainerForModule(DxilModule *pModule,
   // If we have debug information present, serialize it to a debug part, then use the stripped version as the canonical program version.
   // If we have debug information present, serialize it to a debug part, then use the stripped version as the canonical program version.
   CComPtr<AbstractMemoryStream> pProgramStream = pInputProgramStream;
   CComPtr<AbstractMemoryStream> pProgramStream = pInputProgramStream;
   bool bModuleStripped = false;
   bool bModuleStripped = false;
-  bool bHasDebugInfo = HasDebugInfo(*pModule->GetModule());
-  if (bHasDebugInfo) {
+  if (HasDebugInfoOrLineNumbers(*pModule->GetModule())) {
     uint32_t debugInUInt32, debugPaddingBytes;
     uint32_t debugInUInt32, debugPaddingBytes;
     GetPaddedProgramPartSize(pInputProgramStream, debugInUInt32, debugPaddingBytes);
     GetPaddedProgramPartSize(pInputProgramStream, debugInUInt32, debugPaddingBytes);
     if (Flags & SerializeDxilFlags::IncludeDebugInfoPart) {
     if (Flags & SerializeDxilFlags::IncludeDebugInfoPart) {

+ 3 - 53
lib/DxilPIXPasses/DxilAddPixelHitInstrumentation.cpp

@@ -21,6 +21,8 @@
 #include "llvm/IR/PassManager.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/Local.h"
 
 
+#include "PixPassHelpers.h"
+
 using namespace llvm;
 using namespace llvm;
 using namespace hlsl;
 using namespace hlsl;
 
 
@@ -102,59 +104,7 @@ bool DxilAddPixelHitInstrumentation::runOnModule(Module &M) {
     IRBuilder<> Builder(
     IRBuilder<> Builder(
         dxilutil::FirstNonAllocaInsertionPt(DM.GetEntryFunction()));
         dxilutil::FirstNonAllocaInsertionPt(DM.GetEntryFunction()));
 
 
-    unsigned int UAVResourceHandle =
-        static_cast<unsigned int>(DM.GetUAVs().size());
-
-    // Set up a UAV with structure of a single int
-    SmallVector<llvm::Type *, 1> Elements{Type::getInt32Ty(Ctx)};
-    llvm::StructType *UAVStructTy =
-        llvm::StructType::create(Elements, "class.RWStructuredBuffer");
-    std::unique_ptr<DxilResource> pUAV = llvm::make_unique<DxilResource>();
-    pUAV->SetGlobalName("PIX_CountUAVName");
-    pUAV->SetGlobalSymbol(UndefValue::get(UAVStructTy->getPointerTo()));
-    pUAV->SetID(UAVResourceHandle);
-    pUAV->SetSpaceID(
-        (unsigned int)-2); // This is the reserved-for-tools register space
-    pUAV->SetSampleCount(1);
-    pUAV->SetGloballyCoherent(false);
-    pUAV->SetHasCounter(false);
-    pUAV->SetCompType(CompType::getI32());
-    pUAV->SetLowerBound(0);
-    pUAV->SetRangeSize(1);
-    pUAV->SetKind(DXIL::ResourceKind::RawBuffer);
-    pUAV->SetRW(true);
-
-    auto pAnnotation = DM.GetTypeSystem().GetStructAnnotation(UAVStructTy);
-    if (pAnnotation == nullptr) {
-      pAnnotation = DM.GetTypeSystem().AddStructAnnotation(UAVStructTy);
-      pAnnotation->GetFieldAnnotation(0).SetCBufferOffset(0);
-      pAnnotation->GetFieldAnnotation(0).SetCompType(
-          hlsl::DXIL::ComponentType::I32);
-      pAnnotation->GetFieldAnnotation(0).SetFieldName("count");
-    }
-
-    ID = DM.AddUAV(std::move(pUAV));
-
-    assert((unsigned)ID == UAVResourceHandle);
-
-    // Create handle for the newly-added UAV
-    Function *CreateHandleOpFunc =
-        HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(Ctx));
-    Constant *CreateHandleOpcodeArg =
-        HlslOP->GetU32Const((unsigned)DXIL::OpCode::CreateHandle);
-    Constant *UAVArg = HlslOP->GetI8Const(
-        static_cast<std::underlying_type<DxilResourceBase::Class>::type>(
-            DXIL::ResourceClass::UAV));
-    Constant *MetaDataArg =
-        HlslOP->GetU32Const(ID); // position of the metadata record in the
-                                 // corresponding metadata list
-    Constant *IndexArg = HlslOP->GetU32Const(0); //
-    Constant *FalseArg =
-        HlslOP->GetI1Const(0); // non-uniform resource index: false
-    HandleForUAV = Builder.CreateCall(
-        CreateHandleOpFunc,
-        {CreateHandleOpcodeArg, UAVArg, MetaDataArg, IndexArg, FalseArg},
-        "PIX_CountUAV_Handle");
+    HandleForUAV = PIXPassHelpers::CreateUAV(DM, Builder, 0, "PIX_CountUAV_Handle");
 
 
     DM.ReEmitDxilResources();
     DM.ReEmitDxilResources();
   }
   }

+ 2 - 48
lib/DxilPIXPasses/DxilDebugInstrumentation.cpp

@@ -252,7 +252,6 @@ public:
 
 
 private:
 private:
   SystemValueIndices addRequiredSystemValues(BuilderContext &BC);
   SystemValueIndices addRequiredSystemValues(BuilderContext &BC);
-  void addUAV(BuilderContext &BC);
   void addInvocationSelectionProlog(BuilderContext &BC,
   void addInvocationSelectionProlog(BuilderContext &BC,
                                     SystemValueIndices SVIndices);
                                     SystemValueIndices SVIndices);
   Value *addPixelShaderProlog(BuilderContext &BC, SystemValueIndices SVIndices);
   Value *addPixelShaderProlog(BuilderContext &BC, SystemValueIndices SVIndices);
@@ -548,52 +547,6 @@ DxilDebugInstrumentation::addPixelShaderProlog(BuilderContext &BC,
   return ComparePos;
   return ComparePos;
 }
 }
 
 
-void DxilDebugInstrumentation::addUAV(BuilderContext &BC) {
-  // Set up a UAV with structure of a single int
-  unsigned int UAVResourceHandle =
-      static_cast<unsigned int>(BC.DM.GetUAVs().size());
-  SmallVector<llvm::Type *, 1> Elements{Type::getInt32Ty(BC.Ctx)};
-  llvm::StructType *UAVStructTy =
-      llvm::StructType::create(Elements, "PIX_DebugUAV_Type");
-  std::unique_ptr<DxilResource> pUAV = llvm::make_unique<DxilResource>();
-  pUAV->SetGlobalName("PIX_DebugUAVName");
-  pUAV->SetGlobalSymbol(UndefValue::get(UAVStructTy->getPointerTo()));
-  pUAV->SetID(UAVResourceHandle);
-  pUAV->SetSpaceID(
-      (unsigned int)-2); // This is the reserved-for-tools register space
-  pUAV->SetSampleCount(1);
-  pUAV->SetGloballyCoherent(false);
-  pUAV->SetHasCounter(false);
-  pUAV->SetCompType(CompType::getI32());
-  pUAV->SetLowerBound(0);
-  pUAV->SetRangeSize(1);
-  pUAV->SetKind(DXIL::ResourceKind::RawBuffer);
-  pUAV->SetRW(true);
-
-  auto ID = BC.DM.AddUAV(std::move(pUAV));
-  assert(ID == UAVResourceHandle);
-
-  BC.DM.m_ShaderFlags.SetEnableRawAndStructuredBuffers(true);
-
-  // Create handle for the newly-added UAV
-  Function *CreateHandleOpFunc =
-      BC.HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(BC.Ctx));
-  Constant *CreateHandleOpcodeArg =
-      BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::CreateHandle);
-  Constant *UAVVArg = BC.HlslOP->GetI8Const(
-      static_cast<std::underlying_type<DxilResourceBase::Class>::type>(
-          DXIL::ResourceClass::UAV));
-  Constant *MetaDataArg = BC.HlslOP->GetU32Const(
-      ID); // position of the metadata record in the corresponding metadata list
-  Constant *IndexArg = BC.HlslOP->GetU32Const(0); //
-  Constant *FalseArg =
-      BC.HlslOP->GetI1Const(0); // non-uniform resource index: false
-  m_HandleForUAV = BC.Builder.CreateCall(
-      CreateHandleOpFunc,
-      {CreateHandleOpcodeArg, UAVVArg, MetaDataArg, IndexArg, FalseArg},
-      "PIX_DebugUAV_Handle");
-}
-
 void DxilDebugInstrumentation::addInvocationSelectionProlog(
 void DxilDebugInstrumentation::addInvocationSelectionProlog(
     BuilderContext &BC, SystemValueIndices SVIndices) {
     BuilderContext &BC, SystemValueIndices SVIndices) {
   auto ShaderModel = BC.DM.GetShaderModel();
   auto ShaderModel = BC.DM.GetShaderModel();
@@ -959,7 +912,8 @@ bool DxilDebugInstrumentation::runOnModule(Module &M) {
 
 
   BuilderContext BC{M, DM, Ctx, HlslOP, Builder};
   BuilderContext BC{M, DM, Ctx, HlslOP, Builder};
 
 
-  addUAV(BC);
+  m_HandleForUAV = PIXPassHelpers::CreateUAV(BC.DM, BC.Builder, 0, "PIX_DebugUAV_Handle");
+
   auto SystemValues = addRequiredSystemValues(BC);
   auto SystemValues = addRequiredSystemValues(BC);
   addInvocationSelectionProlog(BC, SystemValues);
   addInvocationSelectionProlog(BC, SystemValues);
   addInvocationStartMarker(BC);
   addInvocationStartMarker(BC);

+ 6 - 19
lib/DxilPIXPasses/DxilOutputColorBecomesConstant.cpp

@@ -20,6 +20,8 @@
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <array>
 #include <array>
 
 
+#include "PixPassHelpers.h"
+
 using namespace llvm;
 using namespace llvm;
 using namespace hlsl;
 using namespace hlsl;
 
 
@@ -166,8 +168,6 @@ bool DxilOutputColorBecomesConstant::runOnModule(Module &M) {
     pCBuf->SetRangeSize(1);
     pCBuf->SetRangeSize(1);
     pCBuf->SetSize(4);
     pCBuf->SetSize(4);
 
 
-    ID = DM.AddCBuffer(std::move(pCBuf));
-
     Instruction *entryPointInstruction =
     Instruction *entryPointInstruction =
         &*(DM.GetEntryFunction()->begin()->begin());
         &*(DM.GetEntryFunction()->begin()->begin());
     IRBuilder<> Builder(entryPointInstruction);
     IRBuilder<> Builder(entryPointInstruction);
@@ -175,23 +175,10 @@ bool DxilOutputColorBecomesConstant::runOnModule(Module &M) {
     // Create handle for the newly-added constant buffer (which is achieved via
     // Create handle for the newly-added constant buffer (which is achieved via
     // a function call)
     // a function call)
     auto ConstantBufferName = "PIX_Constant_Color_CB_Handle";
     auto ConstantBufferName = "PIX_Constant_Color_CB_Handle";
-    Function *createHandle =
-        HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(Ctx));
-    Constant *CreateHandleOpcodeArg =
-        HlslOP->GetU32Const((unsigned)DXIL::OpCode::CreateHandle);
-    Constant *CBVArg = HlslOP->GetI8Const(
-        static_cast<std::underlying_type<DxilResourceBase::Class>::type>(
-            DXIL::ResourceClass::CBuffer));
-    Constant *MetaDataArg =
-        HlslOP->GetU32Const(ID); // position of the metadata record in the
-                                 // corresponding metadata list
-    Constant *IndexArg = HlslOP->GetU32Const(0); //
-    Constant *FalseArg =
-        HlslOP->GetI1Const(0); // non-uniform resource index: false
-    CallInst *callCreateHandle = Builder.CreateCall(
-        createHandle,
-        {CreateHandleOpcodeArg, CBVArg, MetaDataArg, IndexArg, FalseArg},
-        ConstantBufferName);
+
+    CallInst* callCreateHandle = PIXPassHelpers::CreateHandleForResource(DM, Builder, pCBuf.get(), ConstantBufferName);
+
+    DM.AddCBuffer(std::move(pCBuf));
 
 
     DM.ReEmitDxilResources();
     DM.ReEmitDxilResources();
 
 

+ 3 - 49
lib/DxilPIXPasses/DxilPIXMeshShaderOutputInstrumentation.cpp

@@ -28,6 +28,8 @@
 #include <winerror.h>
 #include <winerror.h>
 #endif
 #endif
 
 
+#include "PixPassHelpers.h"
+
 // Keep these in sync with the same-named value in the debugger application's
 // Keep these in sync with the same-named value in the debugger application's
 // WinPixShaderUtils.h
 // WinPixShaderUtils.h
 
 
@@ -72,7 +74,6 @@ private:
     IRBuilder<> &Builder;
     IRBuilder<> &Builder;
   };
   };
 
 
-  CallInst *addUAV(BuilderContext &BC);
   Value *insertInstructionsToCalculateFlattenedGroupIdXandY(BuilderContext &BC);
   Value *insertInstructionsToCalculateFlattenedGroupIdXandY(BuilderContext &BC);
   Value *insertInstructionsToCalculateGroupIdZ(BuilderContext &BC);
   Value *insertInstructionsToCalculateGroupIdZ(BuilderContext &BC);
   Value *reserveDebugEntrySpace(BuilderContext &BC, uint32_t SpaceInBytes);
   Value *reserveDebugEntrySpace(BuilderContext &BC, uint32_t SpaceInBytes);
@@ -92,53 +93,6 @@ uint32_t DxilPIXMeshShaderOutputInstrumentation::UAVDumpingGroundOffset()
   return static_cast<uint32_t>(m_UAVSize - DebugBufferDumpingGroundSize);
   return static_cast<uint32_t>(m_UAVSize - DebugBufferDumpingGroundSize);
 }
 }
 
 
-CallInst *DxilPIXMeshShaderOutputInstrumentation::addUAV(BuilderContext &BC) 
-{
-  // Set up a UAV with structure of a single int
-  unsigned int UAVResourceHandle =
-      static_cast<unsigned int>(BC.DM.GetUAVs().size());
-  SmallVector<llvm::Type *, 1> Elements{Type::getInt32Ty(BC.Ctx)};
-  llvm::StructType *UAVStructTy =
-      llvm::StructType::create(Elements, "PIX_DebugUAV_Type");
-  std::unique_ptr<DxilResource> pUAV = llvm::make_unique<DxilResource>();
-  pUAV->SetGlobalName("PIX_DebugUAVName");
-  pUAV->SetGlobalSymbol(UndefValue::get(UAVStructTy->getPointerTo()));
-  pUAV->SetID(UAVResourceHandle);
-  pUAV->SetSpaceID(
-      (unsigned int)-2); // This is the reserved-for-tools register space
-  pUAV->SetSampleCount(1);
-  pUAV->SetGloballyCoherent(false);
-  pUAV->SetHasCounter(false);
-  pUAV->SetCompType(CompType::getI32());
-  pUAV->SetLowerBound(0);
-  pUAV->SetRangeSize(1);
-  pUAV->SetKind(DXIL::ResourceKind::RawBuffer);
-  pUAV->SetRW(true);
-
-  auto ID = BC.DM.AddUAV(std::move(pUAV));
-  assert(ID == UAVResourceHandle);
-
-  BC.DM.m_ShaderFlags.SetEnableRawAndStructuredBuffers(true);
-
-  // Create handle for the newly-added UAV
-  Function *CreateHandleOpFunc =
-      BC.HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(BC.Ctx));
-  Constant *CreateHandleOpcodeArg =
-      BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::CreateHandle);
-  Constant *UAVVArg = BC.HlslOP->GetI8Const(
-      static_cast<std::underlying_type<DxilResourceBase::Class>::type>(
-          DXIL::ResourceClass::UAV));
-  Constant *MetaDataArg = BC.HlslOP->GetU32Const(
-      ID); // position of the metadata record in the corresponding metadata list
-  Constant *IndexArg = BC.HlslOP->GetU32Const(0); //
-  Constant *FalseArg =
-      BC.HlslOP->GetI1Const(0); // non-uniform resource index: false
-  return BC.Builder.CreateCall(
-      CreateHandleOpFunc,
-      {CreateHandleOpcodeArg, UAVVArg, MetaDataArg, IndexArg, FalseArg},
-      "PIX_DebugUAV_Handle");
-}
-
 Value *DxilPIXMeshShaderOutputInstrumentation::
 Value *DxilPIXMeshShaderOutputInstrumentation::
     insertInstructionsToCalculateFlattenedGroupIdXandY(BuilderContext &BC)
     insertInstructionsToCalculateFlattenedGroupIdXandY(BuilderContext &BC)
 {
 {
@@ -275,7 +229,7 @@ bool DxilPIXMeshShaderOutputInstrumentation::runOnModule(Module &M)
 
 
   m_OffsetMask = BC.HlslOP->GetU32Const(UAVDumpingGroundOffset() - 1);
   m_OffsetMask = BC.HlslOP->GetU32Const(UAVDumpingGroundOffset() - 1);
 
 
-  m_OutputUAV = addUAV(BC);
+  m_OutputUAV = PIXPassHelpers::CreateUAV(DM, Builder, 0, "PIX_DebugUAV_Handle");
 
 
   auto GroupIdXandY = insertInstructionsToCalculateFlattenedGroupIdXandY(BC);
   auto GroupIdXandY = insertInstructionsToCalculateFlattenedGroupIdXandY(BC);
   auto GroupIdZ = insertInstructionsToCalculateGroupIdZ(BC);
   auto GroupIdZ = insertInstructionsToCalculateGroupIdZ(BC);

+ 333 - 138
lib/DxilPIXPasses/DxilShaderAccessTracking.cpp

@@ -14,6 +14,8 @@
 
 
 #include "dxc/DXIL/DxilInstructions.h"
 #include "dxc/DXIL/DxilInstructions.h"
 #include "dxc/DXIL/DxilModule.h"
 #include "dxc/DXIL/DxilModule.h"
+#include "dxc/DXIL/DxilResourceBinding.h"
+#include "dxc/DXIL/DxilResourceProperties.h"
 #include "dxc/DxilPIXPasses/DxilPIXPasses.h"
 #include "dxc/DxilPIXPasses/DxilPIXPasses.h"
 #include "dxc/HLSL/DxilGenerationPass.h"
 #include "dxc/HLSL/DxilGenerationPass.h"
 #include "dxc/HLSL/DxilSpanAllocator.h"
 #include "dxc/HLSL/DxilSpanAllocator.h"
@@ -23,6 +25,8 @@
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <deque>
 #include <deque>
 
 
+#include "PixPassHelpers.h"
+
 #ifdef _WIN32
 #ifdef _WIN32
 #include <winerror.h>
 #include <winerror.h>
 #endif
 #endif
@@ -48,6 +52,8 @@ enum class ShaderAccessFlags : uint32_t {
   // attached to the UAV was accessed, but not necessarily the UAV resource.
   // attached to the UAV was accessed, but not necessarily the UAV resource.
   Counter = 1 << 2,
   Counter = 1 << 2,
 
 
+  Sampler = 1 << 3,
+
   // Descriptor-only read (if any), but not the resource contents (if any).
   // Descriptor-only read (if any), but not the resource contents (if any).
   // Used for GetDimensions, samplers, and secondary texture for sampler
   // Used for GetDimensions, samplers, and secondary texture for sampler
   // feedback.
   // feedback.
@@ -141,10 +147,24 @@ struct SlotRange {
   unsigned numInvariableSlots;
   unsigned numInvariableSlots;
 };
 };
 
 
+enum class AccessStyle { None, FromRootSig, ResourceFromDescriptorHeap, SamplerFromDescriptorHeap };
 struct DxilResourceAndClass {
 struct DxilResourceAndClass {
-  DxilResourceBase *resource;
+  AccessStyle accessStyle;
+  RegisterType registerType;
+  int RegisterSpace;
+  unsigned RegisterID;
   Value *index;
   Value *index;
-  DXIL::ResourceClass resClass;
+  Value *dynamicallyBoundIndex;
+};
+
+enum class ResourceAccessStyle {
+  None,
+  Sampler,
+  UAVRead,
+  UAVWrite,
+  CBVRead,
+  SRVRead,
+  EndOfEnum
 };
 };
 
 
 //---------------------------------------------------------------------------------------------------------------------------------
 //---------------------------------------------------------------------------------------------------------------------------------
@@ -165,11 +185,23 @@ private:
   bool EmitResourceAccess(DxilResourceAndClass &res, Instruction *instruction,
   bool EmitResourceAccess(DxilResourceAndClass &res, Instruction *instruction,
                           OP *HlslOP, LLVMContext &Ctx,
                           OP *HlslOP, LLVMContext &Ctx,
                           ShaderAccessFlags readWrite);
                           ShaderAccessFlags readWrite);
+  DxilResourceAndClass GetResourceFromHandle(Value* resHandle, DxilModule& DM);
 
 
 private:
 private:
+  struct DynamicResourceBinding {
+    int HeapIndex;
+    bool HeapIsSampler; // else resource
+    std::string Name;
+  };
+
+  std::vector<DynamicResourceBinding> m_dynamicResourceBindings;
   bool m_CheckForDynamicIndexing = false;
   bool m_CheckForDynamicIndexing = false;
+  int m_DynamicResourceDataOffset = -1;
+  int m_DynamicSamplerDataOffset = -1;
+  int m_OutputBufferSize = -1;
   std::map<RegisterTypeAndSpace, SlotRange> m_slotAssignments;
   std::map<RegisterTypeAndSpace, SlotRange> m_slotAssignments;
   std::map<llvm::Function *, CallInst *> m_FunctionToUAVHandle;
   std::map<llvm::Function *, CallInst *> m_FunctionToUAVHandle;
+  std::map<llvm::Function *, std::map<ResourceAccessStyle, Constant *>> m_FunctionToEncodedAccess;
   std::set<RSRegisterIdentifier> m_DynamicallyIndexedBindPoints;
   std::set<RSRegisterIdentifier> m_DynamicallyIndexedBindPoints;
 };
 };
 
 
@@ -264,6 +296,11 @@ void DxilShaderAccessTracking::applyOptions(PassOptions O) {
 
 
       rt = ParseRegisterType(config);
       rt = ParseRegisterType(config);
     }
     }
+    m_DynamicResourceDataOffset = DeserializeInt(config);
+    ValidateDelimiter(config, ';');
+    m_DynamicSamplerDataOffset = DeserializeInt(config);
+    ValidateDelimiter(config, ';');
+    m_OutputBufferSize = DeserializeInt(config);
   }
   }
 }
 }
 
 
@@ -302,110 +339,301 @@ void DxilShaderAccessTracking::EmitAccess(LLVMContext &Ctx, OP *HlslOP,
       });
       });
 }
 }
 
 
+static ResourceAccessStyle AccessStyleFromAccessAndType(
+    AccessStyle accessStyle, 
+    RegisterType registerType,
+    ShaderAccessFlags readWrite)
+{
+    switch (accessStyle)
+    {
+    case AccessStyle::ResourceFromDescriptorHeap:
+        switch (registerType)
+        {
+        case RegisterType::CBV:
+          return ResourceAccessStyle::CBVRead;
+        case RegisterType::SRV:
+          return ResourceAccessStyle::SRVRead;
+        case RegisterType::UAV:
+            return readWrite == ShaderAccessFlags::Read ?
+                ResourceAccessStyle::UAVRead :
+                ResourceAccessStyle::UAVWrite;
+        default:
+          return ResourceAccessStyle::None;
+        }
+    case AccessStyle::SamplerFromDescriptorHeap:
+        return ResourceAccessStyle::Sampler;
+    default:
+        return ResourceAccessStyle::None;
+    }
+}
+
 bool DxilShaderAccessTracking::EmitResourceAccess(DxilResourceAndClass &res,
 bool DxilShaderAccessTracking::EmitResourceAccess(DxilResourceAndClass &res,
                                                   Instruction *instruction,
                                                   Instruction *instruction,
                                                   OP *HlslOP, LLVMContext &Ctx,
                                                   OP *HlslOP, LLVMContext &Ctx,
                                                   ShaderAccessFlags readWrite) {
                                                   ShaderAccessFlags readWrite) {
-
-  RegisterTypeAndSpace typeAndSpace{RegisterTypeFromResourceClass(res.resClass),
-                                    res.resource->GetSpaceID()};
-
-  auto slot = m_slotAssignments.find(typeAndSpace);
-  // If the assignment isn't found, we assume it's not accessed
-  if (slot != m_slotAssignments.end()) {
-
-    IRBuilder<> Builder(instruction);
-    Value *slotIndex;
-
-    if (isa<ConstantInt>(res.index)) {
-      unsigned index = cast<ConstantInt>(res.index)->getLimitedValue();
-      if (index > slot->second.numSlots) {
-        // out-of-range accesses are written to slot zero:
-        slotIndex = HlslOP->GetU32Const(0);
+  IRBuilder<> Builder(instruction);
+  
+  if (res.accessStyle == AccessStyle::FromRootSig) {
+    RegisterTypeAndSpace typeAndSpace{
+        res.registerType, 
+        static_cast<unsigned>(res.RegisterSpace) // reserved spaces are -ve, but user spaces can only be +ve
+    };
+
+    auto slot = m_slotAssignments.find(typeAndSpace);
+    // If the assignment isn't found, we assume it's not accessed
+    if (slot != m_slotAssignments.end()) {
+
+        Value *slotIndex;
+    
+      if (isa<ConstantInt>(res.index)) {
+        unsigned index = cast<ConstantInt>(res.index)->getLimitedValue();
+        if (index > slot->second.numSlots) {
+          // out-of-range accesses are written to slot zero:
+          slotIndex = HlslOP->GetU32Const(0);
+        } else {
+          slotIndex = HlslOP->GetU32Const((slot->second.startSlot + index) *
+                                          DWORDsPerResource * BytesPerDWORD);
+        }
       } else {
       } else {
-        slotIndex = HlslOP->GetU32Const((slot->second.startSlot + index) *
-                                        DWORDsPerResource * BytesPerDWORD);
+        RSRegisterIdentifier id{typeAndSpace.Type, typeAndSpace.Space,
+                                res.RegisterID};
+        m_DynamicallyIndexedBindPoints.emplace(std::move(id));
+    
+        // CompareWithSlotLimit will contain 1 if the access is out-of-bounds
+        // (both over- and and under-flow via the unsigned >= with slot count)
+        auto CompareWithSlotLimit = Builder.CreateICmpUGE(
+            res.index, HlslOP->GetU32Const(slot->second.numSlots),
+            "CompareWithSlotLimit");
+        auto CompareWithSlotLimitAsUint = Builder.CreateCast(
+            Instruction::CastOps::ZExt, CompareWithSlotLimit,
+            Type::getInt32Ty(Ctx), "CompareWithSlotLimitAsUint");
+    
+        // IsInBounds will therefore contain 0 if the access is out-of-bounds, and
+        // 1 otherwise.
+        auto IsInBounds = Builder.CreateSub(
+            HlslOP->GetU32Const(1), CompareWithSlotLimitAsUint, "IsInBounds");
+    
+        auto SlotDwordOffset = Builder.CreateAdd(
+            res.index, HlslOP->GetU32Const(slot->second.startSlot),
+            "SlotDwordOffset");
+        auto SlotByteOffset = Builder.CreateMul(
+            SlotDwordOffset,
+            HlslOP->GetU32Const(DWORDsPerResource * BytesPerDWORD),
+            "SlotByteOffset");
+    
+        // This will drive an out-of-bounds access slot down to 0
+        slotIndex = Builder.CreateMul(SlotByteOffset, IsInBounds, "slotIndex");
       }
       }
-    } else {
-      RSRegisterIdentifier id{typeAndSpace.Type, typeAndSpace.Space,
-                              res.resource->GetID()};
-      m_DynamicallyIndexedBindPoints.emplace(std::move(id));
-
-      // CompareWithSlotLimit will contain 1 if the access is out-of-bounds
-      // (both over- and and under-flow via the unsigned >= with slot count)
-      auto CompareWithSlotLimit = Builder.CreateICmpUGE(
-          res.index, HlslOP->GetU32Const(slot->second.numSlots),
-          "CompareWithSlotLimit");
-      auto CompareWithSlotLimitAsUint = Builder.CreateCast(
-          Instruction::CastOps::ZExt, CompareWithSlotLimit,
-          Type::getInt32Ty(Ctx), "CompareWithSlotLimitAsUint");
-
-      // IsInBounds will therefore contain 0 if the access is out-of-bounds, and
-      // 1 otherwise.
-      auto IsInBounds = Builder.CreateSub(
-          HlslOP->GetU32Const(1), CompareWithSlotLimitAsUint, "IsInBounds");
-
-      auto SlotDwordOffset = Builder.CreateAdd(
-          res.index, HlslOP->GetU32Const(slot->second.startSlot),
-          "SlotDwordOffset");
-      auto SlotByteOffset = Builder.CreateMul(
-          SlotDwordOffset,
-          HlslOP->GetU32Const(DWORDsPerResource * BytesPerDWORD),
-          "SlotByteOffset");
-
-      // This will drive an out-of-bounds access slot down to 0
-      slotIndex = Builder.CreateMul(SlotByteOffset, IsInBounds, "slotIndex");
+    
+      EmitAccess(Ctx, HlslOP, Builder, slotIndex, readWrite);
+    
+      return true; // did modify
     }
     }
+  }
+  else if (m_DynamicResourceDataOffset != -1) {
+      if (res.accessStyle == AccessStyle::ResourceFromDescriptorHeap ||
+          res.accessStyle == AccessStyle::SamplerFromDescriptorHeap)
+      {
+          Constant* BaseOfRecordsForType;
+          int LimitForType;
+          if (res.accessStyle == AccessStyle::ResourceFromDescriptorHeap) {
+              LimitForType = m_DynamicSamplerDataOffset - m_DynamicResourceDataOffset;
+              BaseOfRecordsForType =
+                  HlslOP->GetU32Const(m_DynamicResourceDataOffset);
+          } else {
+              LimitForType = m_OutputBufferSize - m_DynamicSamplerDataOffset;
+              BaseOfRecordsForType =
+                HlslOP->GetU32Const(m_DynamicSamplerDataOffset);
+          }
 
 
-    EmitAccess(Ctx, HlslOP, Builder, slotIndex, readWrite);
+          // Branchless limit: compare offset to size of data reserved for that type,
+          // resulting in a value of 0 or 1.
+          // Extend that 0/1 to an integer, and multiply the offset by that value.
+          // Result: expected offset, or 0 if too large.
+
+          // Add 1 to the index in order to skip over the zeroth entry: that's 
+          // reserved for "out of bounds" writes.
+          auto *IndexToWrite =
+              Builder.CreateAdd(res.dynamicallyBoundIndex, HlslOP->GetU32Const(1));
+
+          // Each record is two dwords:
+          // the first dword is for write access, the second for read.
+          Constant *SizeofRecord =
+              HlslOP->GetU32Const(2 * static_cast<unsigned int>(sizeof(uint32_t)));
+          auto *BaseOfRecord =
+              Builder.CreateMul(IndexToWrite, SizeofRecord);
+          Value* OffsetToWrite;
+          if (readWrite == ShaderAccessFlags::Write) {
+            OffsetToWrite = BaseOfRecord;
+          }
+          else {
+            OffsetToWrite = Builder.CreateAdd(BaseOfRecord, 
+                HlslOP->GetU32Const(static_cast<unsigned int>(sizeof(uint32_t))));
+          }
 
 
-    return true; // did modify
+          // Generate the 0 (out of bounds) or 1 (in-bounds) multiplier:
+          Constant *BufferLimit = HlslOP->GetU32Const(LimitForType);
+          auto *LimitBoolean =
+              Builder.CreateICmpULT(OffsetToWrite, BufferLimit);
+          
+          auto * LimitIntegerValue = Builder.CreateCast(
+              Instruction::CastOps::ZExt, LimitBoolean,
+              Type::getInt32Ty(Ctx));
+          
+          // Limit the offset to the out-of-bounds record if the above generated 0,
+          // or leave it as-is if the above generated 1:
+          auto *LimitedOffset = Builder.CreateMul(OffsetToWrite, LimitIntegerValue);
+          
+          // Offset into the range of records for this type of access (resource or sampler)
+          auto* Offset = Builder.CreateAdd(BaseOfRecordsForType, LimitedOffset);
+
+          ResourceAccessStyle accessStyle = AccessStyleFromAccessAndType(
+              res.accessStyle, 
+              res.registerType,
+              readWrite);
+
+          Constant* EncodedFlags = m_FunctionToEncodedAccess
+                                .at(Builder.GetInsertBlock()->getParent())
+                                .at(accessStyle);
+
+          Constant *ElementMask = HlslOP->GetI8Const(1);
+          Function *StoreFunc =
+              HlslOP->GetOpFunc(OP::OpCode::BufferStore, Type::getInt32Ty(Ctx));
+          Constant *StoreOpcode =
+              HlslOP->GetU32Const((unsigned)OP::OpCode::BufferStore);
+          UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(Ctx));
+          (void)Builder.CreateCall(
+              StoreFunc,
+              {
+                  StoreOpcode,                  // i32, ; opcode
+                  m_FunctionToUAVHandle.at(
+                      Builder.GetInsertBlock()
+                          ->getParent()),       // %dx.types.Handle, ; resource handle
+                  Offset,                // i32, ; coordinate c0: byte offset
+                  UndefArg,                     // i32, ; coordinate c1 (unused)
+                  EncodedFlags,                 // i32, ; value v0
+                  UndefArg,                     // i32, ; value v1
+                  UndefArg,                     // i32, ; value v2
+                  UndefArg,                     // i32, ; value v3
+                  ElementMask                   // i8 ; just the first value is used
+              });
+          return true; // did modify
+      }
   }
   }
+
   return false; // did not modify
   return false; // did not modify
 }
 }
 
 
-DxilResourceAndClass GetResourceFromHandle(Value *resHandle, DxilModule &DM) {
+DxilResourceAndClass 
+DxilShaderAccessTracking::GetResourceFromHandle(Value *resHandle,
+                                                DxilModule &DM) {
 
 
-  DxilResourceAndClass ret{nullptr, nullptr, DXIL::ResourceClass::Invalid};
+  DxilResourceAndClass ret{
+      AccessStyle::None, 
+      RegisterType::Terminator,
+      0,
+      0,
+      nullptr,
+      nullptr};
 
 
   CallInst *handle = cast<CallInst>(resHandle);
   CallInst *handle = cast<CallInst>(resHandle);
-  DxilInst_CreateHandle createHandle(handle);
 
 
-  // Dynamic rangeId is not supported - skip and let validation report the
-  // error.
-  if (!isa<ConstantInt>(createHandle.get_rangeId()))
-    return ret;
+  unsigned rangeId = -1;
 
 
-  unsigned rangeId =
-      cast<ConstantInt>(createHandle.get_rangeId())->getLimitedValue();
+  if (hlsl::OP::IsDxilOpFuncCallInst(handle, hlsl::OP::OpCode::CreateHandle))
+  {
+    DxilInst_CreateHandle createHandle(handle);
 
 
-  auto resClass =
-      static_cast<DXIL::ResourceClass>(createHandle.get_resourceClass_val());
+    // Dynamic rangeId is not supported - skip and let validation report the
+    // error.
+    if (isa<ConstantInt>(createHandle.get_rangeId())) {
+        rangeId = cast<ConstantInt>(createHandle.get_rangeId())->getLimitedValue();
 
 
-  switch (resClass) {
-  case DXIL::ResourceClass::SRV:
-    ret.resource = &DM.GetSRV(rangeId);
-    break;
-  case DXIL::ResourceClass::UAV:
-    ret.resource = &DM.GetUAV(rangeId);
-    break;
-  case DXIL::ResourceClass::CBuffer:
-    ret.resource = &DM.GetCBuffer(rangeId);
-    break;
-  case DXIL::ResourceClass::Sampler:
-    ret.resource = &DM.GetSampler(rangeId);
-    break;
-  default:
-    DXASSERT(0, "invalid res class");
-    return ret;
-  }
+        auto resClass = static_cast<DXIL::ResourceClass>(createHandle.get_resourceClass_val());
 
 
-  ret.index = createHandle.get_index();
-  ret.resClass = resClass;
+        DxilResourceBase* resource = nullptr;
+        RegisterType registerType = RegisterType::Invalid;
+        switch (resClass) {
+        case DXIL::ResourceClass::SRV:
+            resource = &DM.GetSRV(rangeId);
+            registerType = RegisterType::SRV;
+            break;
+        case DXIL::ResourceClass::UAV:
+            resource = &DM.GetUAV(rangeId);
+          registerType = RegisterType::UAV;
+          break;
+        case DXIL::ResourceClass::CBuffer:
+            resource = &DM.GetCBuffer(rangeId);
+            registerType = RegisterType::CBV;
+            break;
+        case DXIL::ResourceClass::Sampler:
+            resource = &DM.GetSampler(rangeId);
+            registerType = RegisterType::Sampler;
+            break;
+        }
+        if (resource != nullptr) {
+            ret.index = createHandle.get_index();
+            ret.registerType = registerType;
+            ret.accessStyle = AccessStyle::FromRootSig;
+            ret.RegisterID = resource->GetID();
+            ret.RegisterSpace = resource->GetSpaceID();
+        }
+    }
+  } else if (hlsl::OP::IsDxilOpFuncCallInst(handle, hlsl::OP::OpCode::AnnotateHandle)) {
+      DxilInst_AnnotateHandle annotateHandle(handle);
+      auto properties = hlsl::resource_helper::loadPropsFromAnnotateHandle(
+          annotateHandle, *DM.GetShaderModel());
+
+      auto* handleCreation = cast<CallInst>(annotateHandle.get_res());
+
+      if (hlsl::OP::IsDxilOpFuncCallInst(handleCreation, hlsl::OP::OpCode::CreateHandleFromBinding)) {
+          DxilInst_CreateHandleFromBinding createHandleFromBinding(handleCreation);
+          Constant* B = cast<Constant>(createHandleFromBinding.get_bind());
+          auto binding = hlsl::resource_helper::loadBindingFromConstant(*B);
+          ret.accessStyle = AccessStyle::FromRootSig;
+          ret.index = createHandleFromBinding.get_index();
+          ret.registerType = RegisterTypeFromResourceClass(
+              static_cast<hlsl::DXIL::ResourceClass>(binding.resourceClass));
+          ret.RegisterSpace = binding.spaceID;
+      } else if (hlsl::OP::IsDxilOpFuncCallInst(handleCreation, hlsl::OP::OpCode::CreateHandleFromHeap)) {
+          DxilInst_CreateHandleFromHeap createHandleFromHeap(handleCreation);
+          ret.accessStyle = createHandleFromHeap.get_samplerHeap_val()
+              ? AccessStyle::SamplerFromDescriptorHeap : AccessStyle::ResourceFromDescriptorHeap;
+          ret.dynamicallyBoundIndex = createHandleFromHeap.get_index();
+
+          ret.registerType = RegisterTypeFromResourceClass(properties.getResourceClass());
+
+          DynamicResourceBinding drb{};
+          drb.HeapIsSampler = createHandleFromHeap.get_samplerHeap_val();
+          drb.HeapIndex = -1;
+          drb.Name = "ShaderNameTodo";
+          if (auto * constInt = dyn_cast<ConstantInt>(createHandleFromHeap.get_index()))
+          {
+              drb.HeapIndex = constInt->getLimitedValue();
+          }
+          m_dynamicResourceBindings.emplace_back(std::move(drb));
+
+          return ret;
+      } else {
+          DXASSERT_NOMSG(false);
+      }
+  }
 
 
   return ret;
   return ret;
 }
 }
 
 
+static uint32_t EncodeShaderModel(DXIL::ShaderKind kind)
+{
+    DXASSERT_NOMSG(static_cast<int>(DXIL::ShaderKind::Invalid) <= 16);
+    return static_cast<uint32_t>(kind) << 28;
+}
+
+static uint32_t EncodeAccess(ResourceAccessStyle access) {
+    uint32_t encoded = static_cast<uint32_t>(access);
+    DXASSERT_NOMSG(encoded < 8);
+    return encoded << 24;
+}
+
 bool DxilShaderAccessTracking::runOnModule(Module &M) {
 bool DxilShaderAccessTracking::runOnModule(Module &M) {
   // This pass adds instrumentation for shader access to resources
   // This pass adds instrumentation for shader access to resources
 
 
@@ -447,66 +675,24 @@ bool DxilShaderAccessTracking::runOnModule(Module &M) {
           FOS << "ShouldAssumeDsvAccess";
           FOS << "ShouldAssumeDsvAccess";
         }
         }
       }
       }
-
+      int uavRegId = 0;
       for (llvm::Function &F : M.functions()) {
       for (llvm::Function &F : M.functions()) {
         if (!F.getBasicBlockList().empty()) {
         if (!F.getBasicBlockList().empty()) {
           IRBuilder<> Builder(F.getEntryBlock().getFirstInsertionPt());
           IRBuilder<> Builder(F.getEntryBlock().getFirstInsertionPt());
 
 
-          unsigned int UAVResourceHandle =
-              static_cast<unsigned int>(DM.GetUAVs().size());
-
-          // Set up a UAV with structure of a single int
-          SmallVector<llvm::Type *, 1> Elements{Type::getInt32Ty(Ctx)};
-          llvm::StructType *UAVStructTy =
-              llvm::StructType::create(Elements, "class.RWStructuredBuffer");
-          std::unique_ptr<DxilResource> pUAV =
-              llvm::make_unique<DxilResource>();
-          pUAV->SetGlobalName("PIX_CountUAVName");
-          pUAV->SetGlobalSymbol(UndefValue::get(UAVStructTy->getPointerTo()));
-          pUAV->SetID(UAVResourceHandle);
-          pUAV->SetSpaceID((
-              unsigned int)-2); // This is the reserved-for-tools register space
-          pUAV->SetSampleCount(1);
-          pUAV->SetGloballyCoherent(false);
-          pUAV->SetHasCounter(false);
-          pUAV->SetCompType(CompType::getI32());
-          pUAV->SetLowerBound(0);
-          pUAV->SetRangeSize(1);
-          pUAV->SetKind(DXIL::ResourceKind::RawBuffer);
-
-          auto pAnnotation =
-              DM.GetTypeSystem().GetStructAnnotation(UAVStructTy);
-          if (pAnnotation == nullptr) {
-
-            pAnnotation = DM.GetTypeSystem().AddStructAnnotation(UAVStructTy);
-            pAnnotation->GetFieldAnnotation(0).SetCBufferOffset(0);
-            pAnnotation->GetFieldAnnotation(0).SetCompType(
-                hlsl::DXIL::ComponentType::I32);
-            pAnnotation->GetFieldAnnotation(0).SetFieldName("count");
+          m_FunctionToUAVHandle[&F] = PIXPassHelpers::CreateUAV(DM, Builder, uavRegId++, "PIX_CountUAV_Handle");
+          auto const* shaderModel = DM.GetShaderModel();
+          auto shaderKind = shaderModel->GetKind();
+          OP *HlslOP = DM.GetOP();
+          for (int accessStyle = 1;
+              accessStyle < static_cast<int>(ResourceAccessStyle::EndOfEnum);
+              ++accessStyle)
+          {
+              ResourceAccessStyle style = static_cast<ResourceAccessStyle>(accessStyle);
+              m_FunctionToEncodedAccess[&F][style] =
+                  HlslOP->GetU32Const(EncodeShaderModel(shaderKind) |
+                      EncodeAccess(style));
           }
           }
-
-          ID = DM.AddUAV(std::move(pUAV));
-
-          assert((unsigned)ID == UAVResourceHandle);
-
-          // Create handle for the newly-added UAV
-          Function *CreateHandleOpFunc = HlslOP->GetOpFunc(
-              DXIL::OpCode::CreateHandle, Type::getVoidTy(Ctx));
-          Constant *CreateHandleOpcodeArg =
-              HlslOP->GetU32Const((unsigned)DXIL::OpCode::CreateHandle);
-          Constant *UAVArg = HlslOP->GetI8Const(
-              static_cast<std::underlying_type<DxilResourceBase::Class>::type>(
-                  DXIL::ResourceClass::UAV));
-          Constant *MetaDataArg =
-              HlslOP->GetU32Const(ID); // position of the metadata record in the
-                                       // corresponding metadata list
-          Constant *IndexArg = HlslOP->GetU32Const(0); //
-          Constant *FalseArg =
-              HlslOP->GetI1Const(0); // non-uniform resource index: false
-          m_FunctionToUAVHandle[&F] = Builder.CreateCall(
-              CreateHandleOpFunc,
-              {CreateHandleOpcodeArg, UAVArg, MetaDataArg, IndexArg, FalseArg},
-              "PIX_CountUAV_Handle");
         }
         }
       }
       }
       DM.ReEmitDxilResources();
       DM.ReEmitDxilResources();
@@ -576,9 +762,11 @@ bool DxilShaderAccessTracking::runOnModule(Module &M) {
 
 
         for (unsigned iParam : handleParams) {
         for (unsigned iParam : handleParams) {
           auto res = GetResourceFromHandle(Call->getArgOperand(iParam), DM);
           auto res = GetResourceFromHandle(Call->getArgOperand(iParam), DM);
+          if (res.accessStyle == AccessStyle::None) {
+            continue;
+          }
           // Don't instrument the accesses to the UAV that we just added
           // Don't instrument the accesses to the UAV that we just added
-          if (res.resClass == DXIL::ResourceClass::UAV &&
-              res.resource->GetSpaceID() == (unsigned)-2) {
+          if (res.RegisterSpace  == -2) {
             break;
             break;
           }
           }
           if (EmitResourceAccess(res, Call, HlslOP, Ctx, readWrite)) {
           if (EmitResourceAccess(res, Call, HlslOP, Ctx, readWrite)) {
@@ -598,6 +786,13 @@ bool DxilShaderAccessTracking::runOnModule(Module &M) {
             << ';';
             << ';';
       }
       }
       FOS << ".";
       FOS << ".";
+
+      // todo: this will reflect dynamic resource names when the metadata exists
+      FOS << "DynamicallyBoundResources=";
+      for (auto const &drb : m_dynamicResourceBindings) {
+        FOS << (drb.HeapIsSampler ? 'S' : 'R') << drb.HeapIndex << ';';
+      }
+      FOS << ".";
     }
     }
   }
   }
 
 

+ 146 - 8
lib/DxilPIXPasses/PixPassHelpers.cpp

@@ -8,15 +8,153 @@
 ///////////////////////////////////////////////////////////////////////////////
 ///////////////////////////////////////////////////////////////////////////////
 
 
 #include "dxc/DXIL/DxilOperations.h"
 #include "dxc/DXIL/DxilOperations.h"
+#include "dxc/DXIL/DxilInstructions.h"
+#include "dxc/DXIL/DxilModule.h"
+#include "dxc/DXIL/DxilResourceBinding.h"
+#include "dxc/DXIL/DxilResourceProperties.h"
+#include "dxc/HLSL/DxilSpanAllocator.h"
+
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
 #include "llvm/Pass.h"
 #include "llvm/Pass.h"
 
 
-namespace PIXPassHelpers
-{
-    bool IsAllocateRayQueryInstruction(llvm::Value* Val) {
-        if (llvm::Instruction* Inst = llvm::dyn_cast<llvm::Instruction>(Val)) {
-            return hlsl::OP::IsDxilOpFuncCallInst(Inst, hlsl::OP::OpCode::AllocateRayQuery);
-        }
-        return false;
+using namespace llvm;
+using namespace hlsl;
+
+namespace PIXPassHelpers {
+bool IsAllocateRayQueryInstruction(llvm::Value *Val) {
+  if (Val != nullptr) {
+    if (llvm::Instruction *Inst = llvm::dyn_cast<llvm::Instruction>(Val)) {
+      return hlsl::OP::IsDxilOpFuncCallInst(Inst,
+                                            hlsl::OP::OpCode::AllocateRayQuery);
     }
     }
-}
+  }
+  return false;
+}
+
+static unsigned int
+GetNextRegisterIdForClass(hlsl::DxilModule &DM,
+                          DXIL::ResourceClass resourceClass) {
+  switch (resourceClass) {
+  case DXIL::ResourceClass::CBuffer:
+    return static_cast<unsigned int>(DM.GetCBuffers().size());
+  case DXIL::ResourceClass::UAV:
+    return static_cast<unsigned int>(DM.GetUAVs().size());
+  default:
+    DXASSERT(false, "Unexpected resource class");
+    return 0;
+  }
+}
+
+static bool IsDynamicResourceShaderModel(DxilModule &DM) {
+  return DM.GetShaderModel()->IsSMAtLeast(6, 6);
+}
+
+llvm::CallInst *CreateHandleForResource(hlsl::DxilModule &DM,
+                                        llvm::IRBuilder<> &Builder,
+                                        hlsl::DxilResourceBase *resource,
+                                        const char *name) {
+
+  OP *HlslOP = DM.GetOP();
+  LLVMContext &Ctx = DM.GetModule()->getContext();
+
+  DXIL::ResourceClass resourceClass = resource->GetClass();
+
+  unsigned int resourceMetaDataId =
+      GetNextRegisterIdForClass(DM, resourceClass);
+
+  // Create handle for the newly-added resource
+  if (IsDynamicResourceShaderModel(DM)) {
+    Function *CreateHandleFromBindingOpFunc = HlslOP->GetOpFunc(
+        DXIL::OpCode::CreateHandleFromBinding, Type::getVoidTy(Ctx));
+    Constant *CreateHandleFromBindingOpcodeArg =
+        HlslOP->GetU32Const((unsigned)DXIL::OpCode::CreateHandleFromBinding);
+    DxilResourceBinding binding =
+        resource_helper::loadBindingFromResourceBase(resource);
+    Value *bindingV = resource_helper::getAsConstant(
+        binding, HlslOP->GetResourceBindingType(), *DM.GetShaderModel());
+
+    Value *registerIndex = HlslOP->GetU32Const(resourceMetaDataId);
+
+    Value *isUniformRes = HlslOP->GetI1Const(0);
+
+    Value *createHandleFromBindingArgs[] = {CreateHandleFromBindingOpcodeArg,
+                                            bindingV, registerIndex,
+                                            isUniformRes};
+
+    auto *handle = Builder.CreateCall(CreateHandleFromBindingOpFunc,
+                                      createHandleFromBindingArgs, name);
+
+    Function *annotHandleFn =
+        HlslOP->GetOpFunc(DXIL::OpCode::AnnotateHandle, Type::getVoidTy(Ctx));
+    Value *annotHandleArg =
+        HlslOP->GetI32Const((unsigned)DXIL::OpCode::AnnotateHandle);
+    DxilResourceProperties RP =
+        resource_helper::loadPropsFromResourceBase(resource);
+    Type *resPropertyTy = HlslOP->GetResourcePropertiesType();
+    Value *propertiesV =
+        resource_helper::getAsConstant(RP, resPropertyTy, *DM.GetShaderModel());
+
+    return Builder.CreateCall(annotHandleFn,
+                              {annotHandleArg, handle, propertiesV});
+  } else {
+    Function *CreateHandleOpFunc =
+        HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(Ctx));
+    Constant *CreateHandleOpcodeArg =
+        HlslOP->GetU32Const((unsigned)DXIL::OpCode::CreateHandle);
+    Constant *ClassArg = HlslOP->GetI8Const(
+        static_cast<std::underlying_type<DxilResourceBase::Class>::type>(
+            resourceClass));
+    Constant *MetaDataArg = HlslOP->GetU32Const(
+        resourceMetaDataId); // position of the metadata record in the
+                             // corresponding metadata list
+    Constant *IndexArg = HlslOP->GetU32Const(0); //
+    Constant *FalseArg =
+        HlslOP->GetI1Const(0); // non-uniform resource index: false
+    return Builder.CreateCall(
+        CreateHandleOpFunc,
+        {CreateHandleOpcodeArg, ClassArg, MetaDataArg, IndexArg, FalseArg}, name);
+  }
+}
+
+// Set up a UAV with structure of a single int
+llvm::CallInst *CreateUAV(DxilModule &DM, IRBuilder<> &Builder,
+                          unsigned int registerId, const char *name) {
+  LLVMContext &Ctx = DM.GetModule()->getContext();
+
+  SmallVector<llvm::Type *, 1> Elements{Type::getInt32Ty(Ctx)};
+  llvm::StructType *UAVStructTy =
+      llvm::StructType::create(Elements, "class.RWStructuredBuffer");
+  std::unique_ptr<DxilResource> pUAV = llvm::make_unique<DxilResource>();
+  pUAV->SetGlobalName(name);
+  pUAV->SetGlobalSymbol(UndefValue::get(UAVStructTy->getPointerTo()));
+  pUAV->SetID(GetNextRegisterIdForClass(DM, DXIL::ResourceClass::UAV));
+  pUAV->SetRW(true); // sets UAV class
+  pUAV->SetSpaceID(
+      (unsigned int)-2); // This is the reserved-for-tools register space
+  pUAV->SetSampleCount(1);
+  pUAV->SetGloballyCoherent(false);
+  pUAV->SetHasCounter(false);
+  pUAV->SetCompType(CompType::getI32());
+  pUAV->SetLowerBound(0);
+  pUAV->SetRangeSize(1);
+  pUAV->SetKind(DXIL::ResourceKind::RawBuffer);
+
+  auto pAnnotation = DM.GetTypeSystem().GetStructAnnotation(UAVStructTy);
+  if (pAnnotation == nullptr) {
+
+    pAnnotation = DM.GetTypeSystem().AddStructAnnotation(UAVStructTy);
+    pAnnotation->GetFieldAnnotation(0).SetCBufferOffset(0);
+    pAnnotation->GetFieldAnnotation(0).SetCompType(
+        hlsl::DXIL::ComponentType::I32);
+    pAnnotation->GetFieldAnnotation(0).SetFieldName("count");
+  }
+
+  auto *handle = CreateHandleForResource(DM, Builder, pUAV.get(), name);
+
+  DM.AddUAV(std::move(pUAV));
+
+  return handle;
+}
+} // namespace PIXPassHelpers

+ 5 - 0
lib/DxilPIXPasses/PixPassHelpers.h

@@ -12,4 +12,9 @@
 namespace PIXPassHelpers
 namespace PIXPassHelpers
 {
 {
 	bool IsAllocateRayQueryInstruction(llvm::Value* Val);
 	bool IsAllocateRayQueryInstruction(llvm::Value* Val);
+    llvm::CallInst* CreateUAV(hlsl::DxilModule& DM, llvm::IRBuilder<>& Builder,
+                                  unsigned int registerId, const char *name);
+    llvm::CallInst* CreateHandleForResource(hlsl::DxilModule& DM, llvm::IRBuilder<>& Builder,
+        hlsl::DxilResourceBase * resource,
+        const char* name);
 }
 }

+ 1 - 1
lib/HLSL/DxilCondenseResources.cpp

@@ -560,7 +560,7 @@ public:
 
 
     // Load up debug information, to cross-reference values and the instructions
     // Load up debug information, to cross-reference values and the instructions
     // used to load them.
     // used to load them.
-    m_HasDbgInfo = getDebugMetadataVersionFromModule(M) != 0;
+    m_HasDbgInfo = hasDebugInfo(M);
 
 
     GenerateDxilResourceHandles();
     GenerateDxilResourceHandles();
 
 

+ 3 - 1
lib/HLSL/DxilContainerReflection.cpp

@@ -1631,7 +1631,9 @@ void DxilModuleReflection::CreateReflectionObjectForResource(DxilResourceBase *R
     if (inputBind.NumSamples == 0) {
     if (inputBind.NumSamples == 0) {
       if (R->IsStructuredBuffer()) {
       if (R->IsStructuredBuffer()) {
         inputBind.NumSamples = CalcResTypeSize(*m_pDxilModule, *R);
         inputBind.NumSamples = CalcResTypeSize(*m_pDxilModule, *R);
-      } else if (!R->IsRawBuffer() && !R->IsTBuffer()) {
+      } else if (!R->IsRawBuffer() && !R->IsTBuffer() &&
+                 R->GetKind() != DXIL::ResourceKind::Texture2DMS &&
+                 R->GetKind() != DXIL::ResourceKind::Texture2DMSArray) {
         inputBind.NumSamples = 0xFFFFFFFF;
         inputBind.NumSamples = 0xFFFFFFFF;
       }
       }
     }
     }

+ 2 - 12
lib/HLSL/DxilConvergent.cpp

@@ -22,24 +22,13 @@
 #include "dxc/HLSL/DxilGenerationPass.h"
 #include "dxc/HLSL/DxilGenerationPass.h"
 #include "dxc/HLSL/HLOperations.h"
 #include "dxc/HLSL/HLOperations.h"
 #include "dxc/HLSL/HLModule.h"
 #include "dxc/HLSL/HLModule.h"
-#include "dxc/HLSL/DxilConvergent.h"
 #include "dxc/HlslIntrinsicOp.h"
 #include "dxc/HlslIntrinsicOp.h"
 #include "dxc/HLSL/DxilConvergentName.h"
 #include "dxc/HLSL/DxilConvergentName.h"
 
 
 using namespace llvm;
 using namespace llvm;
 using namespace hlsl;
 using namespace hlsl;
 
 
-bool hlsl::IsConvergentMarker(Value *V) {
-  CallInst *CI = dyn_cast<CallInst>(V);
-  if (!CI)
-    return false;
-  Function *F = CI->getCalledFunction();
-  return F->getName().startswith(kConvergentFunctionPrefix);
-}
 
 
-Value *hlsl::GetConvergentSource(Value *V) {
-  return cast<CallInst>(V)->getOperand(0);
-}
 
 
 ///////////////////////////////////////////////////////////////////////////////
 ///////////////////////////////////////////////////////////////////////////////
 // DxilConvergent.
 // DxilConvergent.
@@ -58,7 +47,8 @@ public:
 
 
   bool runOnModule(Module &M) override {
   bool runOnModule(Module &M) override {
     if (M.HasHLModule()) {
     if (M.HasHLModule()) {
-      if (!M.GetHLModule().GetShaderModel()->IsPS())
+      const ShaderModel *SM = M.GetHLModule().GetShaderModel();
+      if (!SM->IsPS() && !SM->IsLib() && (!SM->IsSM66Plus() || (!SM->IsCS() && !SM->IsMS() && !SM->IsAS())))
         return false;
         return false;
     }
     }
     bool bUpdated = false;
     bool bUpdated = false;

+ 1 - 1
lib/HLSL/DxilGenerationPass.cpp

@@ -201,7 +201,7 @@ public:
 
 
     // Load up debug information, to cross-reference values and the instructions
     // Load up debug information, to cross-reference values and the instructions
     // used to load them.
     // used to load them.
-    m_HasDbgInfo = getDebugMetadataVersionFromModule(M) != 0;
+    m_HasDbgInfo = hasDebugInfo(M);
 
 
     // EntrySig for shader functions.
     // EntrySig for shader functions.
     DxilEntryPropsMap EntryPropsMap;
     DxilEntryPropsMap EntryPropsMap;

+ 35 - 1
lib/HLSL/DxilLinker.cpp

@@ -119,6 +119,7 @@ public:
     return m_functionNameMap;
     return m_functionNameMap;
   }
   }
   bool IsInitFunc(llvm::Function *F);
   bool IsInitFunc(llvm::Function *F);
+  bool IsEntry(llvm::Function *F);
   bool IsResourceGlobal(const llvm::Constant *GV);
   bool IsResourceGlobal(const llvm::Constant *GV);
   DxilResourceBase *GetResource(const llvm::Constant *GV);
   DxilResourceBase *GetResource(const llvm::Constant *GV);
 
 
@@ -135,6 +136,7 @@ private:
   DxilModule &m_DM;
   DxilModule &m_DM;
   // Map from name to Link info for extern functions.
   // Map from name to Link info for extern functions.
   llvm::StringMap<std::unique_ptr<DxilFunctionLinkInfo>> m_functionNameMap;
   llvm::StringMap<std::unique_ptr<DxilFunctionLinkInfo>> m_functionNameMap;
+  llvm::SmallPtrSet<llvm::Function*,4>  m_entrySet;
   // Map from resource link global to resource. MapVector for deterministic iteration.
   // Map from resource link global to resource. MapVector for deterministic iteration.
   llvm::MapVector<const llvm::Constant *, DxilResourceBase *> m_resourceMap;
   llvm::MapVector<const llvm::Constant *, DxilResourceBase *> m_resourceMap;
   // Set of initialize functions for global variable. SetVector for deterministic iteration.
   // Set of initialize functions for global variable. SetVector for deterministic iteration.
@@ -202,6 +204,8 @@ DxilLib::DxilLib(std::unique_ptr<llvm::Module> pModule)
     }
     }
     m_functionNameMap[F.getName()] =
     m_functionNameMap[F.getName()] =
         llvm::make_unique<DxilFunctionLinkInfo>(&F);
         llvm::make_unique<DxilFunctionLinkInfo>(&F);
+    if (m_DM.IsEntry(&F))
+      m_entrySet.insert(&F);
   }
   }
 
 
   // Update internal global name.
   // Update internal global name.
@@ -211,6 +215,7 @@ DxilLib::DxilLib(std::unique_ptr<llvm::Module> pModule)
       GV.setName(MID + GV.getName());
       GV.setName(MID + GV.getName());
     }
     }
   }
   }
+
 }
 }
 
 
 void DxilLib::FixIntrinsicOverloads() {
 void DxilLib::FixIntrinsicOverloads() {
@@ -327,6 +332,7 @@ bool DxilLib::HasFunction(std::string &name) {
   return m_functionNameMap.count(name);
   return m_functionNameMap.count(name);
 }
 }
 
 
+bool DxilLib::IsEntry(llvm::Function *F) { return m_entrySet.count(F); }
 bool DxilLib::IsInitFunc(llvm::Function *F) { return m_initFuncSet.count(F); }
 bool DxilLib::IsInitFunc(llvm::Function *F) { return m_initFuncSet.count(F); }
 bool DxilLib::IsResourceGlobal(const llvm::Constant *GV) {
 bool DxilLib::IsResourceGlobal(const llvm::Constant *GV) {
   return m_resourceMap.count(GV);
   return m_resourceMap.count(GV);
@@ -782,6 +788,12 @@ DxilLinkJob::Link(std::pair<DxilFunctionLinkInfo *, DxilLib *> &entryLinkPair,
     if (newPatchConstantFunc->hasFnAttribute(llvm::Attribute::AlwaysInline))
     if (newPatchConstantFunc->hasFnAttribute(llvm::Attribute::AlwaysInline))
       newPatchConstantFunc->removeFnAttr(llvm::Attribute::AlwaysInline);
       newPatchConstantFunc->removeFnAttr(llvm::Attribute::AlwaysInline);
   }
   }
+
+  // Set root sig if exist.
+  if (!props.serializedRootSignature.empty()) {
+    DM.ResetSerializedRootSignature(props.serializedRootSignature);
+    props.serializedRootSignature.clear();
+  }
   // Set EntryProps
   // Set EntryProps
   DM.SetShaderProperties(&props);
   DM.SetShaderProperties(&props);
 
 
@@ -1466,7 +1478,7 @@ DxilLinkerImpl::Link(StringRef entry, StringRef profile, dxilutil::ExportMap &ex
       return nullptr;
       return nullptr;
 
 
   } else {
   } else {
-    if (exportMap.empty()) {
+    if (exportMap.empty() && !exportMap.isExportShadersOnly()) {
       // Add every function for lib profile.
       // Add every function for lib profile.
       for (auto &it : m_functionNameMap) {
       for (auto &it : m_functionNameMap) {
         StringRef name = it.getKey();
         StringRef name = it.getKey();
@@ -1496,6 +1508,28 @@ DxilLinkerImpl::Link(StringRef entry, StringRef profile, dxilutil::ExportMap &ex
           }
           }
         }
         }
       }
       }
+    } else if (exportMap.isExportShadersOnly()) {
+      SmallVector<StringRef, 4> workList;
+      for (auto *pLib : m_attachedLibs) {
+        auto &DM = pLib->GetDxilModule();
+        auto *pM = DM.GetModule();
+        for (Function &F : pM->functions()) {
+          if (!pLib->IsEntry(&F)) {
+            if (!F.isDeclaration()) {
+              // Set none entry to be internal so they could be removed.
+              F.setLinkage(GlobalValue::LinkageTypes::InternalLinkage);
+            }
+            continue;
+          }
+          workList.emplace_back(F.getName());
+        }
+        libSet.insert(pLib);
+      }
+
+      if (!AddFunctions(workList, libSet, addedFunctionSet, linkJob,
+                        /*bLazyLoadDone*/ false,
+                        /*bAllowFuncionDecls*/ false))
+        return nullptr;
     } else {
     } else {
       SmallVector<StringRef, 4> workList;
       SmallVector<StringRef, 4> workList;
 
 

+ 10 - 4
lib/HLSL/DxilPreparePasses.cpp

@@ -516,9 +516,11 @@ public:
     }
     }
   }
   }
 
 
+  GlobalVariable *GetIsHelperGV(Module &M) {
+    return M.getGlobalVariable(DXIL::kDxIsHelperGlobalName, /*AllowLocal*/ true);
+  }
   GlobalVariable *GetOrCreateIsHelperGV(Module &M, hlsl::OP *hlslOP) {
   GlobalVariable *GetOrCreateIsHelperGV(Module &M, hlsl::OP *hlslOP) {
-    GlobalVariable *GV =
-        M.getGlobalVariable(DXIL::kDxIsHelperGlobalName, /*AllowLocal*/ true);
+    GlobalVariable *GV = GetIsHelperGV(M);
     if (GV)
     if (GV)
       return GV;
       return GV;
     DxilModule &DM = M.GetDxilModule();
     DxilModule &DM = M.GetDxilModule();
@@ -593,7 +595,11 @@ public:
       for (auto uit = F->user_begin(); uit != F->user_end();) {
       for (auto uit = F->user_begin(); uit != F->user_end();) {
         CallInst *CI = cast<CallInst>(*(uit++));
         CallInst *CI = cast<CallInst>(*(uit++));
         if (!GV)
         if (!GV)
-          GV = GetOrCreateIsHelperGV(*F->getParent(), hlslOP);
+          GV = GetIsHelperGV(*F->getParent());
+        // If we don't already have a global for this,
+        // we didn't have any IsHelper() calls, so no need to add one now.
+        if (!GV)
+          return;
         IRBuilder<> Builder(CI);
         IRBuilder<> Builder(CI);
         Value *Cond =
         Value *Cond =
             Builder.CreateZExt(DxilInst_Discard(CI).get_condition(), I32Ty);
             Builder.CreateZExt(DxilInst_Discard(CI).get_condition(), I32Ty);
@@ -618,7 +624,7 @@ public:
       // in an exported function linked to a PS in another library in this case.
       // in an exported function linked to a PS in another library in this case.
       // But it won't pass validation otherwise.
       // But it won't pass validation otherwise.
       if (pSM->IsLib() && DXIL::CompareVersions(ValMajor, ValMinor, 1, 6) < 1) {
       if (pSM->IsLib() && DXIL::CompareVersions(ValMajor, ValMinor, 1, 6) < 1) {
-        if (GlobalVariable *GV = M.getGlobalVariable(DXIL::kDxIsHelperGlobalName, /*AllowLocal*/ true)) {
+        if (GlobalVariable *GV = GetIsHelperGV(M)) {
           GV->setLinkage(GlobalValue::InternalLinkage);
           GV->setLinkage(GlobalValue::InternalLinkage);
         }
         }
       }
       }

+ 92 - 12
lib/HLSL/DxilValidation.cpp

@@ -74,11 +74,12 @@ const char *hlsl::GetValidationRuleText(ValidationRule value) {
     case hlsl::ValidationRule::ContainerPartMissing: return "Missing part '%0' required by module.";
     case hlsl::ValidationRule::ContainerPartMissing: return "Missing part '%0' required by module.";
     case hlsl::ValidationRule::ContainerPartInvalid: return "Unknown part '%0' found in DXIL container.";
     case hlsl::ValidationRule::ContainerPartInvalid: return "Unknown part '%0' found in DXIL container.";
     case hlsl::ValidationRule::ContainerRootSignatureIncompatible: return "Root Signature in DXIL container is not compatible with shader.";
     case hlsl::ValidationRule::ContainerRootSignatureIncompatible: return "Root Signature in DXIL container is not compatible with shader.";
-    case hlsl::ValidationRule::MetaRequired: return "TODO - Required metadata missing.";
+    case hlsl::ValidationRule::MetaRequired: return "Required metadata missing.";
     case hlsl::ValidationRule::MetaKnown: return "Named metadata '%0' is unknown.";
     case hlsl::ValidationRule::MetaKnown: return "Named metadata '%0' is unknown.";
     case hlsl::ValidationRule::MetaUsed: return "All metadata must be used by dxil.";
     case hlsl::ValidationRule::MetaUsed: return "All metadata must be used by dxil.";
     case hlsl::ValidationRule::MetaTarget: return "Unknown target triple '%0'.";
     case hlsl::ValidationRule::MetaTarget: return "Unknown target triple '%0'.";
-    case hlsl::ValidationRule::MetaWellFormed: return "TODO - Metadata must be well-formed in operand count and types.";
+    case hlsl::ValidationRule::MetaWellFormed: return "Metadata must be well-formed in operand count and types.";
+    case hlsl::ValidationRule::MetaVersionSupported: return "%0 version in metadata (%1.%2) is not supported; maximum: (%3.%4).";
     case hlsl::ValidationRule::MetaSemanticLen: return "Semantic length must be at least 1 and at most 64.";
     case hlsl::ValidationRule::MetaSemanticLen: return "Semantic length must be at least 1 and at most 64.";
     case hlsl::ValidationRule::MetaInterpModeValid: return "Invalid interpolation mode for '%0'.";
     case hlsl::ValidationRule::MetaInterpModeValid: return "Invalid interpolation mode for '%0'.";
     case hlsl::ValidationRule::MetaSemaKindValid: return "Semantic kind for '%0' is invalid.";
     case hlsl::ValidationRule::MetaSemaKindValid: return "Semantic kind for '%0' is invalid.";
@@ -208,7 +209,7 @@ const char *hlsl::GetValidationRuleText(ValidationRule value) {
     case hlsl::ValidationRule::TypesNoPtrToPtr: return "Pointers to pointers, or pointers in structures are not allowed.";
     case hlsl::ValidationRule::TypesNoPtrToPtr: return "Pointers to pointers, or pointers in structures are not allowed.";
     case hlsl::ValidationRule::TypesI8: return "I8 can only be used as immediate value for intrinsic or as i8* via bitcast by lifetime intrinsics.";
     case hlsl::ValidationRule::TypesI8: return "I8 can only be used as immediate value for intrinsic or as i8* via bitcast by lifetime intrinsics.";
     case hlsl::ValidationRule::SmName: return "Unknown shader model '%0'.";
     case hlsl::ValidationRule::SmName: return "Unknown shader model '%0'.";
-    case hlsl::ValidationRule::SmDxilVersion: return "Shader model requires Dxil Version %0,%1.";
+    case hlsl::ValidationRule::SmDxilVersion: return "Shader model requires Dxil Version %0.%1.";
     case hlsl::ValidationRule::SmOpcode: return "Opcode %0 not valid in shader model %1.";
     case hlsl::ValidationRule::SmOpcode: return "Opcode %0 not valid in shader model %1.";
     case hlsl::ValidationRule::SmOperand: return "Operand must be defined in target shader model.";
     case hlsl::ValidationRule::SmOperand: return "Operand must be defined in target shader model.";
     case hlsl::ValidationRule::SmSemantic: return "Semantic '%0' is invalid as %1 %2.";
     case hlsl::ValidationRule::SmSemantic: return "Semantic '%0' is invalid as %1 %2.";
@@ -687,8 +688,49 @@ struct ValidationContext {
     Failed = true;
     Failed = true;
   }
   }
 
 
+  // Use this instead of DxilResourceBase::GetGlobalName
+  std::string GetResourceName(const hlsl::DxilResourceBase *Res) {
+    if (!Res)
+      return "nullptr";
+    std::string resName = Res->GetGlobalName();
+    if (!resName.empty())
+      return resName;
+    if (pDebugModule) {
+      DxilModule &DM = pDebugModule->GetOrCreateDxilModule();
+      switch (Res->GetClass()) {
+      case DXIL::ResourceClass::CBuffer:  return DM.GetCBuffer(Res->GetID()).GetGlobalName();
+      case DXIL::ResourceClass::Sampler:  return DM.GetSampler(Res->GetID()).GetGlobalName();
+      case DXIL::ResourceClass::SRV:      return DM.GetSRV(Res->GetID()).GetGlobalName();
+      case DXIL::ResourceClass::UAV:      return DM.GetUAV(Res->GetID()).GetGlobalName();
+      default: return "Invalid Resource";
+      }
+    }
+    // When names have been stripped, use class and binding location to
+    // identify the resource.  Format is roughly:
+    // Allocated:   (CB|T|U|S)<ID>: <ResourceKind> ((cb|t|u|s)<LB>[<RangeSize>] space<SpaceID>)
+    // Unallocated: (CB|T|U|S)<ID>: <ResourceKind> (no bind location)
+    // Example: U0: TypedBuffer (u5[2] space1)
+    // [<RangeSize>] and space<SpaceID> skipped if 1 and 0 respectively.
+    return (Twine(Res->GetResIDPrefix()) + Twine(Res->GetID()) + ": " +
+            Twine(Res->GetResKindName()) +
+            (Res->IsAllocated()
+                 ? (" (" + Twine(Res->GetResBindPrefix()) +
+                    Twine(Res->GetLowerBound()) +
+                    (Res->IsUnbounded()
+                         ? Twine("[unbounded]")
+                         : (Res->GetRangeSize() != 1)
+                               ? "[" + Twine(Res->GetRangeSize()) + "]"
+                               : Twine()) +
+                    ((Res->GetSpaceID() != 0)
+                         ? " space" + Twine(Res->GetSpaceID())
+                         : Twine()) +
+                    ")")
+                 : Twine(" (no bind location)")))
+        .str();
+  }
+
   void EmitResourceError(const hlsl::DxilResourceBase *Res, ValidationRule rule) {
   void EmitResourceError(const hlsl::DxilResourceBase *Res, ValidationRule rule) {
-    std::string QuotedRes = " '" + Res->GetGlobalName() + "'";
+    std::string QuotedRes = " '" + GetResourceName(Res) + "'";
     dxilutil::EmitErrorOnContext(M.getContext(), GetValidationRuleText(rule) + QuotedRes);
     dxilutil::EmitErrorOnContext(M.getContext(), GetValidationRuleText(rule) + QuotedRes);
     Failed = true;
     Failed = true;
   }
   }
@@ -696,7 +738,7 @@ struct ValidationContext {
   void EmitResourceFormatError(const hlsl::DxilResourceBase *Res,
   void EmitResourceFormatError(const hlsl::DxilResourceBase *Res,
                                ValidationRule rule,
                                ValidationRule rule,
                                ArrayRef<StringRef> args) {
                                ArrayRef<StringRef> args) {
-    std::string QuotedRes = " '" + Res->GetGlobalName() + "'";
+    std::string QuotedRes = " '" + GetResourceName(Res) + "'";
     std::string ruleText = GetValidationRuleText(rule);
     std::string ruleText = GetValidationRuleText(rule);
     FormatRuleText(ruleText, args);
     FormatRuleText(ruleText, args);
     dxilutil::EmitErrorOnContext(M.getContext(), ruleText + QuotedRes);
     dxilutil::EmitErrorOnContext(M.getContext(), ruleText + QuotedRes);
@@ -2525,6 +2567,17 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
       ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction,
       ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction,
                                   {"64-bit atomic operations", "Shader Model 6.6+"});
                                   {"64-bit atomic operations", "Shader Model 6.6+"});
   } break;
   } break;
+  case DXIL::OpCode::CreateHandle:
+    if (ValCtx.isLibProfile) {
+      ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction,
+                                  {"CreateHandle", "non-library targets"});
+    }
+    // CreateHandle should not be used in SM 6.6 and above:
+    if (DXIL::CompareVersions(ValCtx.m_DxilMajor, ValCtx.m_DxilMinor, 1, 5) > 0) {
+      ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction,
+                                  {"CreateHandle", "Shader model 6.5 and below"});
+    }
+    break;
   default:
   default:
     // TODO: make sure every opcode is checked.
     // TODO: make sure every opcode is checked.
     // Skip opcodes don't need special check.
     // Skip opcodes don't need special check.
@@ -3858,6 +3911,12 @@ static void ValidateValidatorVersion(ValidationContext &ValCtx) {
         // depending on the degree of compat across versions.
         // depending on the degree of compat across versions.
         if (majorVer == curMajor && minorVer <= curMinor) {
         if (majorVer == curMajor && minorVer <= curMinor) {
           return;
           return;
+        } else {
+          ValCtx.EmitFormatError(
+              ValidationRule::MetaVersionSupported,
+              {"Validator", std::to_string(majorVer), std::to_string(minorVer),
+               std::to_string(curMajor), std::to_string(curMinor)});
+          return;
         }
         }
       }
       }
     }
     }
@@ -3879,9 +3938,15 @@ static void ValidateDxilVersion(ValidationContext &ValCtx) {
           GetNodeOperandAsInt(ValCtx, pVerValues, 1, &minorVer)) {
           GetNodeOperandAsInt(ValCtx, pVerValues, 1, &minorVer)) {
         // This will need to be updated as dxil major/minor versions evolve,
         // This will need to be updated as dxil major/minor versions evolve,
         // depending on the degree of compat across versions.
         // depending on the degree of compat across versions.
-        if ((majorVer == 1 && minorVer <= DXIL::kDxilMinor) &&
+        if ((majorVer == DXIL::kDxilMajor && minorVer <= DXIL::kDxilMinor) &&
             (majorVer == ValCtx.m_DxilMajor && minorVer == ValCtx.m_DxilMinor)) {
             (majorVer == ValCtx.m_DxilMajor && minorVer == ValCtx.m_DxilMinor)) {
           return;
           return;
+        } else {
+          ValCtx.EmitFormatError(
+              ValidationRule::MetaVersionSupported,
+              {"Dxil", std::to_string(majorVer), std::to_string(minorVer),
+               std::to_string(DXIL::kDxilMajor), std::to_string(DXIL::kDxilMinor)});
+          return;
         }
         }
       }
       }
     }
     }
@@ -3923,6 +3988,9 @@ static void ValidateBitcode(ValidationContext &ValCtx) {
 }
 }
 
 
 static void ValidateMetadata(ValidationContext &ValCtx) {
 static void ValidateMetadata(ValidationContext &ValCtx) {
+  ValidateValidatorVersion(ValCtx);
+  ValidateDxilVersion(ValCtx);
+
   Module *pModule = &ValCtx.M;
   Module *pModule = &ValCtx.M;
   const std::string &target = pModule->getTargetTriple();
   const std::string &target = pModule->getTargetTriple();
   if (target != "dxil-ms-dx") {
   if (target != "dxil-ms-dx") {
@@ -3970,8 +4038,6 @@ static void ValidateMetadata(ValidationContext &ValCtx) {
     }
     }
   }
   }
 
 
-  ValidateDxilVersion(ValCtx);
-  ValidateValidatorVersion(ValCtx);
   ValidateTypeAnnotation(ValCtx);
   ValidateTypeAnnotation(ValCtx);
 }
 }
 
 
@@ -3996,7 +4062,7 @@ static void ValidateResourceOverlap(
   if (conflictRes) {
   if (conflictRes) {
     ValCtx.EmitFormatError(
     ValCtx.EmitFormatError(
         ValidationRule::SmResourceRangeOverlap,
         ValidationRule::SmResourceRangeOverlap,
-        {res.GetGlobalName(), std::to_string(base),
+        {ValCtx.GetResourceName(&res), std::to_string(base),
          std::to_string(size),
          std::to_string(size),
          std::to_string(conflictRes->GetLowerBound()),
          std::to_string(conflictRes->GetLowerBound()),
          std::to_string(conflictRes->GetRangeSize()),
          std::to_string(conflictRes->GetRangeSize()),
@@ -4208,7 +4274,7 @@ static void ValidateCBuffer(DxilCBuffer &cb, ValidationContext &ValCtx) {
       DXIL::kMaxCBufferSize << 4);
       DXIL::kMaxCBufferSize << 4);
   CollectCBufferRanges(annotation, constAllocator,
   CollectCBufferRanges(annotation, constAllocator,
                        0, typeSys,
                        0, typeSys,
-                       cb.GetGlobalName(), ValCtx);
+                       ValCtx.GetResourceName(&cb), ValCtx);
 }
 }
 
 
 static void ValidateResources(ValidationContext &ValCtx) {
 static void ValidateResources(ValidationContext &ValCtx) {
@@ -4240,7 +4306,7 @@ static void ValidateResources(ValidationContext &ValCtx) {
     if (uav->HasCounter() && uav->IsGloballyCoherent())
     if (uav->HasCounter() && uav->IsGloballyCoherent())
       ValCtx.EmitResourceFormatError(uav.get(),
       ValCtx.EmitResourceFormatError(uav.get(),
                                      ValidationRule::MetaGlcNotOnAppendConsume,
                                      ValidationRule::MetaGlcNotOnAppendConsume,
-                                     {uav.get()->GetGlobalName()});
+                                     {ValCtx.GetResourceName(uav.get())});
 
 
     ValidateResource(*uav, ValCtx);
     ValidateResource(*uav, ValCtx);
     ValidateResourceOverlap(*uav, uavAllocator, ValCtx);
     ValidateResourceOverlap(*uav, uavAllocator, ValCtx);
@@ -5693,7 +5759,7 @@ void GetValidationVersion(_Out_ unsigned *pMajor, _Out_ unsigned *pMinor) {
   // - Mesh and Amplification shaders
   // - Mesh and Amplification shaders
   // - DXR 1.1 & RayQuery support
   // - DXR 1.1 & RayQuery support
   *pMajor = 1;
   *pMajor = 1;
-  *pMinor = 6;
+  *pMinor = 7;
   // VALRULE-TEXT:END
   // VALRULE-TEXT:END
 }
 }
 
 
@@ -6246,6 +6312,8 @@ _Use_decl_annotations_ HRESULT ValidateLoadModuleFromContainerLazy(
 _Use_decl_annotations_
 _Use_decl_annotations_
 HRESULT ValidateDxilContainer(const void *pContainer,
 HRESULT ValidateDxilContainer(const void *pContainer,
                               uint32_t ContainerSize,
                               uint32_t ContainerSize,
+                              const void *pOptDebugBitcode,
+                              uint32_t OptDebugBitcodeSize,
                               llvm::raw_ostream &DiagStream) {
                               llvm::raw_ostream &DiagStream) {
   LLVMContext Ctx, DbgCtx;
   LLVMContext Ctx, DbgCtx;
   std::unique_ptr<llvm::Module> pModule, pDebugModule;
   std::unique_ptr<llvm::Module> pModule, pDebugModule;
@@ -6260,6 +6328,12 @@ HRESULT ValidateDxilContainer(const void *pContainer,
   IFR(ValidateLoadModuleFromContainer(pContainer, ContainerSize, pModule, pDebugModule,
   IFR(ValidateLoadModuleFromContainer(pContainer, ContainerSize, pModule, pDebugModule,
       Ctx, DbgCtx, DiagStream));
       Ctx, DbgCtx, DiagStream));
 
 
+  if (!pDebugModule && pOptDebugBitcode) {
+    // TODO: lazy load for perf
+    IFR(ValidateLoadModule((const char *)pOptDebugBitcode, OptDebugBitcodeSize,
+                           pDebugModule, DbgCtx, DiagStream, /*bLazyLoad*/false));
+  }
+
   // Validate DXIL Module
   // Validate DXIL Module
   IFR(ValidateDxilModule(pModule.get(), pDebugModule.get()));
   IFR(ValidateDxilModule(pModule.get(), pDebugModule.get()));
 
 
@@ -6271,4 +6345,10 @@ HRESULT ValidateDxilContainer(const void *pContainer,
     IsDxilContainerLike(pContainer, ContainerSize), ContainerSize);
     IsDxilContainerLike(pContainer, ContainerSize), ContainerSize);
 }
 }
 
 
+_Use_decl_annotations_
+HRESULT ValidateDxilContainer(const void *pContainer,
+                              uint32_t ContainerSize,
+                              llvm::raw_ostream &DiagStream) {
+  return ValidateDxilContainer(pContainer, ContainerSize, nullptr, 0, DiagStream);
+}
 } // namespace hlsl
 } // namespace hlsl

+ 3 - 3
lib/HLSL/HLMatrixLowerPass.cpp

@@ -197,7 +197,7 @@ bool HLMatrixLowerPass::runOnModule(Module &M) {
   m_pHLModule = &m_pModule->GetOrCreateHLModule();
   m_pHLModule = &m_pModule->GetOrCreateHLModule();
   // Load up debug information, to cross-reference values and the instructions
   // Load up debug information, to cross-reference values and the instructions
   // used to load them.
   // used to load them.
-  m_HasDbgInfo = getDebugMetadataVersionFromModule(M) != 0;
+  m_HasDbgInfo = hasDebugInfo(M);
   m_matToVecStubs = &matToVecStubs;
   m_matToVecStubs = &matToVecStubs;
   m_vecToMatStubs = &vecToMatStubs;
   m_vecToMatStubs = &vecToMatStubs;
 
 
@@ -531,8 +531,8 @@ void HLMatrixLowerPass::replaceAllVariableUses(
     }
     }
 
 
     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Use.getUser())) {
     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Use.getUser())) {
-      DXASSERT(CE->getOpcode() == Instruction::AddrSpaceCast,
-               "Unexpected constant user");
+      DXASSERT(CE->getOpcode() == Instruction::AddrSpaceCast ||
+        CE->use_empty(), "Unexpected constant user");
       replaceAllVariableUses(GEPIdxStack, CE, LoweredPtr);
       replaceAllVariableUses(GEPIdxStack, CE, LoweredPtr);
       DXASSERT_NOMSG(CE->use_empty());
       DXASSERT_NOMSG(CE->use_empty());
       CE->destroyConstant();
       CE->destroyConstant();

+ 1 - 3
lib/HLSL/HLModule.cpp

@@ -322,9 +322,7 @@ std::vector<uint8_t> &HLModule::GetSerializedRootSignature() {
 }
 }
 
 
 void HLModule::SetSerializedRootSignature(const uint8_t *pData, unsigned size) {
 void HLModule::SetSerializedRootSignature(const uint8_t *pData, unsigned size) {
-  m_SerializedRootSignature.clear();
-  m_SerializedRootSignature.resize(size);
-  memcpy(m_SerializedRootSignature.data(), pData, size);
+  m_SerializedRootSignature.assign(pData, pData+size);
 }
 }
 
 
 DxilTypeSystem &HLModule::GetTypeSystem() {
 DxilTypeSystem &HLModule::GetTypeSystem() {

+ 3 - 4
lib/HLSL/HLOperationLower.cpp

@@ -25,7 +25,6 @@
 #include "dxc/HLSL/HLOperationLowerExtension.h"
 #include "dxc/HLSL/HLOperationLowerExtension.h"
 #include "dxc/HLSL/HLOperations.h"
 #include "dxc/HLSL/HLOperations.h"
 #include "dxc/HlslIntrinsicOp.h"
 #include "dxc/HlslIntrinsicOp.h"
-#include "dxc/HLSL/DxilConvergent.h"
 #include "dxc/DXIL/DxilResourceProperties.h"
 #include "dxc/DXIL/DxilResourceProperties.h"
 
 
 #include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/GetElementPtrTypeIterator.h"
@@ -844,8 +843,8 @@ Value *FindScalarSource(Value *src, unsigned vecIdx = 0) {
         vecIdx = (unsigned)cast<ConstantInt>(EE->getIndexOperand())
         vecIdx = (unsigned)cast<ConstantInt>(EE->getIndexOperand())
           ->getUniqueInteger().getLimitedValue();
           ->getUniqueInteger().getLimitedValue();
         src = EE->getVectorOperand();
         src = EE->getVectorOperand();
-      } else if (hlsl::IsConvergentMarker(src)) {
-        src = hlsl::GetConvergentSource(src);
+      } else if (hlsl::dxilutil::IsConvergentMarker(src)) {
+        src = hlsl::dxilutil::GetConvergentSource(src);
       } else {
       } else {
         break;  // Found it.
         break;  // Found it.
       }
       }
@@ -6657,7 +6656,7 @@ void TranslateCBGepLegacy(GetElementPtrInst *GEP, Value *handle,
 
 
         // Load the whole register.
         // Load the whole register.
         Value *newLd = GenerateCBLoadLegacy(handle, legacyIndex,
         Value *newLd = GenerateCBLoadLegacy(handle, legacyIndex,
-                                     /*channelOffset*/ 0, EltTy,
+                                     /*channelOffset*/ channel, EltTy,
                                      /*vecSize*/ vecSize, hlslOP, Builder);
                                      /*vecSize*/ vecSize, hlslOP, Builder);
         // Copy to array.
         // Copy to array.
         IRBuilder<> AllocaBuilder(GEP->getParent()->getParent()->getEntryBlock().getFirstInsertionPt());
         IRBuilder<> AllocaBuilder(GEP->getParent()->getParent()->getEntryBlock().getFirstInsertionPt());

+ 60 - 6
lib/HLSL/WaveSensitivityAnalysis.cpp

@@ -63,6 +63,7 @@ private:
   map<Instruction *, WaveSensitivity> InstState;
   map<Instruction *, WaveSensitivity> InstState;
   map<BasicBlock *, WaveSensitivity> BBState;
   map<BasicBlock *, WaveSensitivity> BBState;
   std::vector<Instruction *> InstWorkList;
   std::vector<Instruction *> InstWorkList;
+  std::vector<PHINode *> UnknownPhis; // currently unknown phis. Indicate cycles after Analyze
   std::vector<BasicBlock *> BBWorkList;
   std::vector<BasicBlock *> BBWorkList;
   bool CheckBBState(BasicBlock *BB, WaveSensitivity WS);
   bool CheckBBState(BasicBlock *BB, WaveSensitivity WS);
   WaveSensitivity GetInstState(Instruction *I);
   WaveSensitivity GetInstState(Instruction *I);
@@ -72,6 +73,7 @@ private:
 public:
 public:
   WaveSensitivityAnalyzer(PostDominatorTree &PDT) : pPDT(&PDT) {}
   WaveSensitivityAnalyzer(PostDominatorTree &PDT) : pPDT(&PDT) {}
   void Analyze(Function *F);
   void Analyze(Function *F);
+  void Analyze();
   bool IsWaveSensitive(Instruction *op);
   bool IsWaveSensitive(Instruction *op);
 };
 };
 
 
@@ -79,8 +81,60 @@ WaveSensitivityAnalysis* WaveSensitivityAnalysis::create(PostDominatorTree &PDT)
   return new WaveSensitivityAnalyzer(PDT);
   return new WaveSensitivityAnalyzer(PDT);
 }
 }
 
 
+// Analyze the given function's instructions as wave-sensitive or not
 void WaveSensitivityAnalyzer::Analyze(Function *F) {
 void WaveSensitivityAnalyzer::Analyze(Function *F) {
-  UpdateBlock(&F->getEntryBlock(), KnownNotSensitive);
+  // Add all blocks but the entry in reverse order so they come out in order
+  auto it = F->getBasicBlockList().end();
+  for ( it-- ; it != F->getBasicBlockList().begin(); it--)
+    BBWorkList.emplace_back(&*it);
+  // Add entry block as non-sensitive
+  UpdateBlock(&*it, KnownNotSensitive);
+
+  // First analysis
+  Analyze();
+
+  // If any phis with explored preds remain unknown
+  // it has to be in a loop that don't include wave sensitivity
+  // Update each as such and redo Analyze to mark the descendents
+  while (!UnknownPhis.empty() || !InstWorkList.empty() || !BBWorkList.empty()) {
+    while (!UnknownPhis.empty()) {
+      PHINode *Phi = UnknownPhis.back();
+      UnknownPhis.pop_back();
+      // UnknownPhis might have actually known phis that were changed. skip them
+      if (Unknown == GetInstState(Phi)) {
+        // If any of the preds have not been visited, we can't assume a cycle yet
+        bool allPredsVisited = true;
+        for (unsigned i = 0; i < Phi->getNumIncomingValues(); i++) {
+          if (!BBState.count(Phi->getIncomingBlock(i))) {
+            allPredsVisited = false;
+            break;
+          }
+        }
+#ifndef NDEBUG
+        for (unsigned i = 0; i < Phi->getNumIncomingValues(); i++) {
+          if (Instruction *IArg = dyn_cast<Instruction>(Phi->getIncomingValue(i))) {
+            DXASSERT_LOCALVAR(IArg, GetInstState(IArg) != KnownSensitive,
+                   "Unknown wave-status Phi argument should not be able to be known sensitive");
+          }
+        }
+#endif
+        if (allPredsVisited)
+          UpdateInst(Phi, KnownNotSensitive);
+      }
+    }
+    Analyze();
+  }
+#ifndef NDEBUG
+  for (BasicBlock &BB : *F) {
+    for (Instruction &I : BB) {
+      DXASSERT_LOCALVAR(I, Unknown != GetInstState(&I), "Wave sensitivity analysis exited without finding results for all instructions");
+    }
+  }
+#endif
+}
+
+// Analyze the member instruction and BBlock worklists
+void WaveSensitivityAnalyzer::Analyze() {
   while (!InstWorkList.empty() || !BBWorkList.empty()) {
   while (!InstWorkList.empty() || !BBWorkList.empty()) {
     // Process the instruction work list.
     // Process the instruction work list.
     while (!InstWorkList.empty()) {
     while (!InstWorkList.empty()) {
@@ -94,8 +148,8 @@ void WaveSensitivityAnalyzer::Analyze(Function *F) {
       }
       }
     }
     }
 
 
-    // Process the basic block work list.
-    while (!BBWorkList.empty()) {
+    // Process one entry of the basic block work list.
+    if (!BBWorkList.empty()) {
       BasicBlock *BB = BBWorkList.back();
       BasicBlock *BB = BBWorkList.back();
       BBWorkList.pop_back();
       BBWorkList.pop_back();
 
 
@@ -184,6 +238,8 @@ void WaveSensitivityAnalyzer::VisitInst(Instruction *I) {
       if (WS == KnownSensitive) {
       if (WS == KnownSensitive) {
         UpdateInst(I, KnownSensitive);
         UpdateInst(I, KnownSensitive);
         return;
         return;
+      } else if (Unknown == GetInstState(I)) {
+        UnknownPhis.emplace_back(Phi);
       }
       }
     }
     }
   }
   }
@@ -196,10 +252,8 @@ void WaveSensitivityAnalyzer::VisitInst(Instruction *I) {
       if (WS == KnownSensitive) {
       if (WS == KnownSensitive) {
         UpdateInst(I, KnownSensitive);
         UpdateInst(I, KnownSensitive);
         return;
         return;
-      }
-      if (WS == Unknown) {
+      } else if (WS == Unknown) {
         allKnownNotSensitive = false;
         allKnownNotSensitive = false;
-        return;
       }
       }
     }
     }
   }
   }

+ 12 - 0
lib/IR/BasicBlock.cpp

@@ -168,6 +168,18 @@ CallInst *BasicBlock::getTerminatingMustTailCall() {
   return nullptr;
   return nullptr;
 }
 }
 
 
+// HLSL Change - begin
+size_t BasicBlock::compute_size_no_dbg() const {
+  size_t ret = 0;
+  for (auto it = InstList.begin(), E = InstList.end(); it != E; it++) {
+    if (isa<DbgInfoIntrinsic>(&*it))
+      continue;
+    ret++;
+  }
+  return ret;
+}
+// HLSL Change - end
+
 Instruction* BasicBlock::getFirstNonPHI() {
 Instruction* BasicBlock::getFirstNonPHI() {
   for (Instruction &I : *this)
   for (Instruction &I : *this)
     if (!isa<PHINode>(I))
     if (!isa<PHINode>(I))

+ 15 - 0
lib/IR/DebugInfo.cpp

@@ -379,6 +379,21 @@ unsigned llvm::getDebugMetadataVersionFromModule(const Module &M) {
   return 0;
   return 0;
 }
 }
 
 
+// HLSL Change - begin
+bool llvm::hasDebugInfo(const Module &M) {
+  // We might just get away with checking if there's "llvm.dbg.cu",
+  // but this is more robust.
+  for (Module::const_named_metadata_iterator NMI = M.named_metadata_begin(),
+                                             NME = M.named_metadata_end();
+       NMI != NME; ++NMI) {
+    if (NMI->getName().startswith("llvm.dbg.")) {
+      return true;
+    }
+  }
+  return false;
+}
+// HLSL Change - end
+
 DenseMap<const llvm::Function *, DISubprogram *>
 DenseMap<const llvm::Function *, DISubprogram *>
 llvm::makeSubprogramMap(const Module &M) {
 llvm::makeSubprogramMap(const Module &M) {
   DenseMap<const Function *, DISubprogram *> R;
   DenseMap<const Function *, DISubprogram *> R;

+ 2 - 5
lib/IR/DiagnosticInfo.cpp

@@ -246,15 +246,12 @@ void DiagnosticInfoDxil::print(DiagnosticPrinter &DP) const {
     DP << "Function: " << Func->getName() << ": ";
     DP << "Function: " << Func->getName() << ": ";
   }
   }
 
 
-  bool ZiPrompt = true;
   switch (getSeverity()) {
   switch (getSeverity()) {
-  case DiagnosticSeverity::DS_Note:    DP << "note: "; ZiPrompt = false; break;
-  case DiagnosticSeverity::DS_Remark:  DP << "remark: "; ZiPrompt = false; break;
+  case DiagnosticSeverity::DS_Note:    DP << "note: "; break;
+  case DiagnosticSeverity::DS_Remark:  DP << "remark: "; break;
   case DiagnosticSeverity::DS_Warning: DP << "warning: "; break;
   case DiagnosticSeverity::DS_Warning: DP << "warning: "; break;
   case DiagnosticSeverity::DS_Error:   DP << "error: "; break;
   case DiagnosticSeverity::DS_Error:   DP << "error: "; break;
   }
   }
   DP << getMsgStr();
   DP << getMsgStr();
-  if (!DLoc && ZiPrompt)
-    DP << " Use /Zi for source location.";
 }
 }
 // HLSL Change end - Dxil Diagnostic Info reporter
 // HLSL Change end - Dxil Diagnostic Info reporter

+ 0 - 2
lib/MC/MCObjectStreamer.cpp

@@ -280,10 +280,8 @@ void MCObjectStreamer::EmitInstToFragment(const MCInst &Inst,
   IF->getContents().append(Code.begin(), Code.end());
   IF->getContents().append(Code.begin(), Code.end());
 }
 }
 
 
-#ifndef NDEBUG
 static const char *const BundlingNotImplementedMsg =
 static const char *const BundlingNotImplementedMsg =
   "Aligned bundling is not implemented for this object format";
   "Aligned bundling is not implemented for this object format";
-#endif
 
 
 void MCObjectStreamer::EmitBundleAlignMode(unsigned AlignPow2) {
 void MCObjectStreamer::EmitBundleAlignMode(unsigned AlignPow2) {
   llvm_unreachable(BundlingNotImplementedMsg);
   llvm_unreachable(BundlingNotImplementedMsg);

+ 29 - 0
lib/Option/OptTable.cpp

@@ -188,6 +188,35 @@ static unsigned matchOption(const OptTable::Info *I, StringRef Str,
   return 0;
   return 0;
 }
 }
 
 
+// HLSL Change - begin
+Option OptTable::findOption(const char *normalizedName, unsigned FlagsToInclude, unsigned FlagsToExclude) const {
+  const Info *Start = OptionInfos + FirstSearchableIndex;
+  const Info *End = OptionInfos + getNumOptions();
+
+  StringRef Str(normalizedName);
+
+  for (; Start != End; ++Start) {
+    // Scan for first option which is a proper prefix.
+    for (; Start != End; ++Start)
+      if (Str.startswith(Start->Name))
+        break;
+    if (Start == End)
+      break;
+
+    Option Opt(Start, this);
+
+    if (FlagsToInclude && !Opt.hasFlag(FlagsToInclude))
+      continue;
+    if (Opt.hasFlag(FlagsToExclude))
+      continue;
+
+    return Opt;
+  }
+
+  return Option(nullptr, this);
+}
+// HLSL Change - end
+
 Arg *OptTable::ParseOneArg(const ArgList &Args, unsigned &Index,
 Arg *OptTable::ParseOneArg(const ArgList &Args, unsigned &Index,
                            unsigned FlagsToInclude,
                            unsigned FlagsToInclude,
                            unsigned FlagsToExclude) const {
                            unsigned FlagsToExclude) const {

+ 16 - 6
lib/Support/ErrorHandling.cpp

@@ -32,6 +32,7 @@
 #ifdef _WIN32
 #ifdef _WIN32
 #include "windows.h"  // HLSL Change
 #include "windows.h"  // HLSL Change
 #endif
 #endif
+#include "dxc/Support/exception.h"  // HLSL Change
 
 
 #if defined(HAVE_UNISTD_H)
 #if defined(HAVE_UNISTD_H)
 # include <unistd.h>
 # include <unistd.h>
@@ -112,7 +113,8 @@ void llvm::report_fatal_error(const Twine &Reason, bool GenCrashDiag) {
   if (handler) {
   if (handler) {
     handler(handlerData, Reason.str(), GenCrashDiag);
     handler(handlerData, Reason.str(), GenCrashDiag);
   }
   }
-  RaiseException(STATUS_LLVM_FATAL, 0, 0, 0);
+
+  throw hlsl::Exception(DXC_E_LLVM_FATAL_ERROR, std::string("LLVM ERROR: ") + Reason.str() + "\n");
 #endif
 #endif
 }
 }
 
 
@@ -121,19 +123,27 @@ void llvm::llvm_unreachable_internal(const char *msg, const char *file,
   // This code intentionally doesn't call the ErrorHandler callback, because
   // This code intentionally doesn't call the ErrorHandler callback, because
   // llvm_unreachable is intended to be used to indicate "impossible"
   // llvm_unreachable is intended to be used to indicate "impossible"
   // situations, and not legitimate runtime errors.
   // situations, and not legitimate runtime errors.
+  // HLSL Change - collect full message in string
+  SmallVector<char, 64> Buffer;
+  raw_svector_ostream OS(Buffer);
   if (msg)
   if (msg)
-    dbgs() << msg << "\n";
-  dbgs() << "UNREACHABLE executed";
+    OS << msg << "\n";
+  OS << "UNREACHABLE executed";
   if (file)
   if (file)
-    dbgs() << " at " << file << ":" << line;
-  dbgs() << "!\n";
+    OS << " at " << file << ":" << line;
+  OS << "!\n";
 #ifndef LLVM_ON_WIN32 // HLSL Change - unwind if necessary, but don't terminate the process
 #ifndef LLVM_ON_WIN32 // HLSL Change - unwind if necessary, but don't terminate the process
+  dbgs() << OS.str();
   abort();
   abort();
 #else
 #else
-  RaiseException(STATUS_LLVM_UNREACHABLE, 0, 0, 0);
+  throw hlsl::Exception(DXC_E_LLVM_UNREACHABLE, OS.str());
 #endif
 #endif
 }
 }
 
 
+void llvm::llvm_cast_assert_internal(const char *func) {
+  throw hlsl::Exception(DXC_E_LLVM_CAST_ERROR, std::string(func) + "<X>() argument of incompatible type!\n");
+}
+
 static void bindingsErrorHandler(void *user_data, const std::string& reason,
 static void bindingsErrorHandler(void *user_data, const std::string& reason,
                                  bool gen_crash_diag) {
                                  bool gen_crash_diag) {
   LLVMFatalErrorHandler handler =
   LLVMFatalErrorHandler handler =

+ 1 - 1
lib/Transforms/Scalar/LowerTypePasses.cpp

@@ -130,7 +130,7 @@ bool LowerTypePass::runOnModule(Module &M) {
   initialize(M);
   initialize(M);
   // Load up debug information, to cross-reference values and the instructions
   // Load up debug information, to cross-reference values and the instructions
   // used to load them.
   // used to load them.
-  bool HasDbgInfo = getDebugMetadataVersionFromModule(M) != 0;
+  bool HasDbgInfo = llvm::hasDebugInfo(M);
   llvm::DebugInfoFinder Finder;
   llvm::DebugInfoFinder Finder;
   if (HasDbgInfo) {
   if (HasDbgInfo) {
     Finder.processModule(M);
     Finder.processModule(M);

+ 4 - 2
lib/Transforms/Scalar/MergedLoadStoreMotion.cpp

@@ -359,7 +359,8 @@ bool MergedLoadStoreMotion::mergeLoads(BasicBlock *BB) {
   BasicBlock *Succ0 = BI->getSuccessor(0);
   BasicBlock *Succ0 = BI->getSuccessor(0);
   BasicBlock *Succ1 = BI->getSuccessor(1);
   BasicBlock *Succ1 = BI->getSuccessor(1);
   // #Instructions in Succ1 for Compile Time Control
   // #Instructions in Succ1 for Compile Time Control
-  int Size1 = Succ1->size();
+  // int Size1 = Succ1->size(); // HLSL Change
+  int Size1 = Succ1->compute_size_no_dbg(); // HLSL Change
   int NLoads = 0;
   int NLoads = 0;
   for (BasicBlock::iterator BBI = Succ0->begin(), BBE = Succ0->end();
   for (BasicBlock::iterator BBI = Succ0->begin(), BBE = Succ0->end();
        BBI != BBE;) {
        BBI != BBE;) {
@@ -529,7 +530,8 @@ bool MergedLoadStoreMotion::mergeStores(BasicBlock *T) {
     return false; // No. More than 2 predecessors.
     return false; // No. More than 2 predecessors.
 
 
   // #Instructions in Succ1 for Compile Time Control
   // #Instructions in Succ1 for Compile Time Control
-  int Size1 = Pred1->size();
+  // int Size1 = Succ1->size(); // HLSL Change
+  int Size1 = Pred1->compute_size_no_dbg(); // HLSL Change
   int NStores = 0;
   int NStores = 0;
 
 
   for (BasicBlock::reverse_iterator RBI = Pred0->rbegin(), RBE = Pred0->rend();
   for (BasicBlock::reverse_iterator RBI = Pred0->rbegin(), RBE = Pred0->rend();

+ 12 - 7
lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp

@@ -3770,8 +3770,11 @@ bool SROA_Helper::LowerMemcpy(Value *V, DxilFieldAnnotation *annotation,
             if (group == HLOpcodeGroup::HLSubscript) {
             if (group == HLOpcodeGroup::HLSubscript) {
               if (isReadOnlyPtr(PtrCI)) {
               if (isReadOnlyPtr(PtrCI)) {
                 // Ptr from CBuffer/SRV is safe.
                 // Ptr from CBuffer/SRV is safe.
-                if (ReplaceMemcpy(V, Src, MC, annotation, typeSys, DL, DT))
-                  return true;
+                if (ReplaceMemcpy(V, Src, MC, annotation, typeSys, DL, DT)) {
+                  if (V->user_empty())
+                    return true;
+                  return LowerMemcpy(V, annotation, typeSys, DL, DT, bAllowReplace);
+                }
               }
               }
             }
             }
           }
           }
@@ -3782,8 +3785,11 @@ bool SROA_Helper::LowerMemcpy(Value *V, DxilFieldAnnotation *annotation,
           hlutil::PointerStatus SrcPS(Src, size, /*bLdStOnly*/ false);
           hlutil::PointerStatus SrcPS(Src, size, /*bLdStOnly*/ false);
           SrcPS.analyze(typeSys, bStructElt);
           SrcPS.analyze(typeSys, bStructElt);
           if (SrcPS.storedType != hlutil::PointerStatus::StoredType::Stored) {
           if (SrcPS.storedType != hlutil::PointerStatus::StoredType::Stored) {
-            if (ReplaceMemcpy(V, Src, MC, annotation, typeSys, DL, DT))
-              return true;
+            if (ReplaceMemcpy(V, Src, MC, annotation, typeSys, DL, DT)) {
+              if (V->user_empty())
+                return true;
+              return LowerMemcpy(V, annotation, typeSys, DL, DT, bAllowReplace);
+            }
           }
           }
         }
         }
       }
       }
@@ -3899,7 +3905,7 @@ public:
     const DataLayout &DL = M.getDataLayout();
     const DataLayout &DL = M.getDataLayout();
     // Load up debug information, to cross-reference values and the instructions
     // Load up debug information, to cross-reference values and the instructions
     // used to load them.
     // used to load them.
-    m_HasDbgInfo = getDebugMetadataVersionFromModule(M) != 0;
+    m_HasDbgInfo = nullptr != M.getNamedMetadata("llvm.dbg.cu");
 
 
     InjectReturnAfterNoReturnPreserveOutput(*m_pHLModule);
     InjectReturnAfterNoReturnPreserveOutput(*m_pHLModule);
 
 
@@ -5769,8 +5775,7 @@ void SROA_Parameter_HLSL::createFlattenedFunction(Function *F) {
   if (m_pHLModule->HasDxilFunctionProps(F)) {
   if (m_pHLModule->HasDxilFunctionProps(F)) {
     DxilFunctionProps &funcProps = m_pHLModule->GetDxilFunctionProps(F);
     DxilFunctionProps &funcProps = m_pHLModule->GetDxilFunctionProps(F);
     std::unique_ptr<DxilFunctionProps> flatFuncProps = llvm::make_unique<DxilFunctionProps>();
     std::unique_ptr<DxilFunctionProps> flatFuncProps = llvm::make_unique<DxilFunctionProps>();
-    flatFuncProps->shaderKind = funcProps.shaderKind;
-    flatFuncProps->ShaderProps = funcProps.ShaderProps;
+    *flatFuncProps = funcProps;
     m_pHLModule->AddDxilFunctionProps(flatF, flatFuncProps);
     m_pHLModule->AddDxilFunctionProps(flatF, flatFuncProps);
     if (funcProps.shaderKind == ShaderModel::Kind::Vertex) {
     if (funcProps.shaderKind == ShaderModel::Kind::Vertex) {
       auto &VS = funcProps.ShaderProps.VS;
       auto &VS = funcProps.ShaderProps.VS;

+ 2 - 2
lib/Transforms/Scalar/Scalarizer.cpp

@@ -316,7 +316,7 @@ Scatterer Scalarizer::scatter(Instruction *Point, Value *V) {
     auto InsertPoint = BB->begin();
     auto InsertPoint = BB->begin();
     while (InsertPoint != BB->end() && isa<DbgInfoIntrinsic>(InsertPoint))
     while (InsertPoint != BB->end() && isa<DbgInfoIntrinsic>(InsertPoint))
       InsertPoint++;
       InsertPoint++;
-    Scatterer(BB, InsertPoint, V, AllowFolding, &Scattered[V]);
+    return Scatterer(BB, InsertPoint, V, AllowFolding, &Scattered[V]);
     // HLSL Change - End
     // HLSL Change - End
   }
   }
   if (Instruction *VOp = dyn_cast<Instruction>(V)) {
   if (Instruction *VOp = dyn_cast<Instruction>(V)) {
@@ -729,7 +729,7 @@ bool Scalarizer::finish() {
   Module &M = *Gathered.front().first->getModule();
   Module &M = *Gathered.front().first->getModule();
   LLVMContext &Ctx = M.getContext();
   LLVMContext &Ctx = M.getContext();
   const DataLayout &DL = M.getDataLayout();
   const DataLayout &DL = M.getDataLayout();
-  bool HasDbgInfo = getDebugMetadataVersionFromModule(M) != 0;
+  bool HasDbgInfo = hasDebugInfo(M);
   // Map from an extract element inst to a Value which replaced it.
   // Map from an extract element inst to a Value which replaced it.
   DenseMap<Instruction *, Value*> EltMap;
   DenseMap<Instruction *, Value*> EltMap;
   // HLSL Change Ends.
   // HLSL Change Ends.

+ 5 - 0
lib/Transforms/Utils/Local.cpp

@@ -46,6 +46,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/raw_ostream.h"
 
 
 #include "dxc/DXIL/DxilMetadataHelper.h" // HLSL Change - combine dxil metadata.
 #include "dxc/DXIL/DxilMetadataHelper.h" // HLSL Change - combine dxil metadata.
+#include "dxc/DXIL/DxilUtil.h" // HLSL Change - special handling of convergent marker
 using namespace llvm;
 using namespace llvm;
 
 
 #define DEBUG_TYPE "local"
 #define DEBUG_TYPE "local"
@@ -331,6 +332,10 @@ bool llvm::isInstructionTriviallyDead(Instruction *I,
     if (Constant *C = dyn_cast<Constant>(CI->getArgOperand(0)))
     if (Constant *C = dyn_cast<Constant>(CI->getArgOperand(0)))
       return C->isNullValue() || isa<UndefValue>(C);
       return C->isNullValue() || isa<UndefValue>(C);
 
 
+  // HLSL change - don't force unused convergenet markers to stay
+  if (CallInst *CI = dyn_cast<CallInst>(I))
+    if (hlsl::dxilutil::IsConvergentMarker(CI)) return true;
+
   return false;
   return false;
 }
 }
 
 

+ 2 - 2
projects/dxilconv/include/Support/DXIncludes.h

@@ -33,7 +33,7 @@
 #include <wincrypt.h>
 #include <wincrypt.h>
 
 
 #ifndef DECODE_D3D10_SB_TOKENIZED_PROGRAM_TYPE
 #ifndef DECODE_D3D10_SB_TOKENIZED_PROGRAM_TYPE
-#include <d3d12TokenizedProgramFormat.hpp>
+#include "dxc\Support\d3d12TokenizedProgramFormat.hpp"
 #endif
 #endif
 
 
-#include <ShaderBinary/ShaderBinary.h>
+#include "ShaderBinary/ShaderBinary.h"

+ 1 - 1
projects/dxilconv/lib/ShaderBinary/ShaderBinaryIncludes.h

@@ -18,7 +18,7 @@
 #include <d3d12.h>
 #include <d3d12.h>
 #define D3DX12_NO_STATE_OBJECT_HELPERS
 #define D3DX12_NO_STATE_OBJECT_HELPERS
 #include "dxc/Support/d3dx12.h"
 #include "dxc/Support/d3dx12.h"
-#include "D3D12TokenizedProgramFormat.hpp"
+#include "dxc/Support/D3D12TokenizedProgramFormat.hpp"
 #include "ShaderBinary/ShaderBinary.h"
 #include "ShaderBinary/ShaderBinary.h"
 
 
 #define ASSUME( _exp ) { assert( _exp ); __analysis_assume( _exp ); __assume( _exp ); }
 #define ASSUME( _exp ) { assert( _exp ); __analysis_assume( _exp ); __assume( _exp ); }

+ 17 - 1
tools/clang/include/clang/AST/HlslTypes.h

@@ -199,7 +199,8 @@ public:
   enum UnusualAnnotationKind {
   enum UnusualAnnotationKind {
     UA_RegisterAssignment,
     UA_RegisterAssignment,
     UA_ConstantPacking,
     UA_ConstantPacking,
-    UA_SemanticDecl
+    UA_SemanticDecl,
+    UA_PayloadAccessQualifier
   };
   };
 private:
 private:
   const UnusualAnnotationKind Kind;
   const UnusualAnnotationKind Kind;
@@ -243,6 +244,21 @@ struct RegisterAssignment : public UnusualAnnotation
   }
   }
 };
 };
 
 
+// <summary>Use this structure to capture a ': in/out' definiton.</summary>
+struct PayloadAccessAnnotation: public UnusualAnnotation {
+  /// <summary>Initializes a new PayloadAccessAnnotation in invalid state.</summary>
+  PayloadAccessAnnotation() : UnusualAnnotation(UA_PayloadAccessQualifier){};
+
+  DXIL::PayloadAccessQualifier qualifier = DXIL::PayloadAccessQualifier::NoAccess;
+  
+  llvm::SmallVector<DXIL::PayloadAccessShaderStage, 4> ShaderStages;
+
+  static bool classof(const UnusualAnnotation *UA) {
+    return UA->getKind() == UA_PayloadAccessQualifier;
+  }
+};
+
+
 /// <summary>Use this structure to capture a ': packoffset' definition.</summary>
 /// <summary>Use this structure to capture a ': packoffset' definition.</summary>
 struct ConstantPacking : public UnusualAnnotation
 struct ConstantPacking : public UnusualAnnotation
 {
 {

+ 5 - 0
tools/clang/include/clang/Basic/Attr.td

@@ -895,6 +895,11 @@ def HLSLPayload : InheritableAttr {
   let Documentation = [Undocumented];
   let Documentation = [Undocumented];
 }
 }
 
 
+def HLSLRayPayload : InheritableAttr {
+  let Spellings = [CXX11<"", "raypayload", 2015>];
+  let Documentation = [Undocumented];
+}
+
 def HLSLWaveSensitive : InheritableAttr {
 def HLSLWaveSensitive : InheritableAttr {
   let Spellings = [CXX11<"", "wavesensitive", 2015>];
   let Spellings = [CXX11<"", "wavesensitive", 2015>];
   let Subjects = SubjectList<[ParmVar]>;
   let Subjects = SubjectList<[ParmVar]>;

+ 33 - 0
tools/clang/include/clang/Basic/Diagnostic.h

@@ -23,6 +23,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/iterator_range.h"
+#include "llvm/ADT/SmallVector.h"
 #include <list>
 #include <list>
 #include <vector>
 #include <vector>
 
 
@@ -674,6 +675,17 @@ public:
 
 
   void Report(const StoredDiagnostic &storedDiag);
   void Report(const StoredDiagnostic &storedDiag);
 
 
+  /// \brief Issue the message to the client but only once.
+  ///
+  /// This actually returns an instance of DiagnosticBuilder which emits the
+  /// diagnostics (through @c ProcessDiag) when it is destroyed.
+  ///
+  /// \param DiagID A member of the @c diag::kind enum.
+  /// \param Loc Represents the source location associated with the diagnostic,
+  /// which can be an invalid location if no position information is available.
+  inline DiagnosticBuilder ReportOnce(unsigned DiagID);
+  inline DiagnosticBuilder ReportOnce(SourceLocation Loc, unsigned DiagID);
+
   /// \brief Determine whethere there is already a diagnostic in flight.
   /// \brief Determine whethere there is already a diagnostic in flight.
   bool isDiagnosticInFlight() const { return CurDiagID != ~0U; }
   bool isDiagnosticInFlight() const { return CurDiagID != ~0U; }
 
 
@@ -726,6 +738,9 @@ private:
   /// \brief The location of the current diagnostic that is in flight.
   /// \brief The location of the current diagnostic that is in flight.
   SourceLocation CurDiagLoc;
   SourceLocation CurDiagLoc;
 
 
+  /// \brief Stores Diagnostics that should be onyl remited once.
+  llvm::SmallVector<unsigned, 2> DiagOnceDiagnostics;
+
   /// \brief The ID of the current diagnostic that is in flight.
   /// \brief The ID of the current diagnostic that is in flight.
   ///
   ///
   /// This is set to ~0U when there is no diagnostic in flight.
   /// This is set to ~0U when there is no diagnostic in flight.
@@ -1126,10 +1141,28 @@ inline DiagnosticBuilder DiagnosticsEngine::Report(SourceLocation Loc,
   return DiagnosticBuilder(this);
   return DiagnosticBuilder(this);
 }
 }
 
 
+
 inline DiagnosticBuilder DiagnosticsEngine::Report(unsigned DiagID) {
 inline DiagnosticBuilder DiagnosticsEngine::Report(unsigned DiagID) {
   return Report(SourceLocation(), DiagID);
   return Report(SourceLocation(), DiagID);
 }
 }
 
 
+
+inline DiagnosticBuilder DiagnosticsEngine::ReportOnce(unsigned DiagID) {
+  return ReportOnce(SourceLocation(), DiagID);
+}
+
+inline DiagnosticBuilder DiagnosticsEngine::ReportOnce(SourceLocation Loc,
+                                                       unsigned DiagID) {
+  if (std::find(DiagOnceDiagnostics.begin(), DiagOnceDiagnostics.end(),
+                DiagID) != DiagOnceDiagnostics.end()) {
+    auto DisabledDiag =  DiagnosticBuilder(this);
+    DisabledDiag.IsActive = false;
+    return DisabledDiag;
+  }
+
+  DiagOnceDiagnostics.push_back(DiagID);
+  return Report(Loc, DiagID);
+}
 //===----------------------------------------------------------------------===//
 //===----------------------------------------------------------------------===//
 // Diagnostic
 // Diagnostic
 //                                                                           //
 //                                                                           //

+ 10 - 0
tools/clang/include/clang/Basic/DiagnosticGroups.td

@@ -787,4 +787,14 @@ def HLSLSpecifierOverride : DiagGroup<"specifier-override">;
 def HLSLPackOffsetOverride : DiagGroup<"packoffset-override">;
 def HLSLPackOffsetOverride : DiagGroup<"packoffset-override">;
 def HLSLCommaInInit : DiagGroup<"comma-in-init">;
 def HLSLCommaInInit : DiagGroup<"comma-in-init">;
 def HLSLAmbigLitShift : DiagGroup<"ambig-lit-shift">;
 def HLSLAmbigLitShift : DiagGroup<"ambig-lit-shift">;
+def HLSLPayloadAccessQualiferTrace: DiagGroup<"payload-access-trace">;
+def HLSLPayloadAccessQualiferShader: DiagGroup<"payload-access-shader">;
+def HLSLPayloadAccessQualiferPerf: DiagGroup<"payload-access-perf">;
+def HLSLPayloadAccessQualiferCall: DiagGroup<"payload-access-call">;
+def HLSLPayloadAccessQualifer: DiagGroup<"payload-access-qualifier", [
+     HLSLPayloadAccessQualiferTrace,
+     HLSLPayloadAccessQualiferShader,
+     HLSLPayloadAccessQualiferPerf,
+     HLSLPayloadAccessQualiferCall
+  ]>;
 // HLSL Change Ends
 // HLSL Change Ends

+ 36 - 0
tools/clang/include/clang/Basic/DiagnosticSemaKinds.td

@@ -7540,6 +7540,42 @@ def err_hlsl_unsupported_buffer_packoffset : Error<
   "packoffset is only allowed within a constant buffer, not on the constant buffer declaration">;
   "packoffset is only allowed within a constant buffer, not on the constant buffer declaration">;
 def err_hlsl_unsupported_buffer_slot_target_specific : Error<
 def err_hlsl_unsupported_buffer_slot_target_specific : Error<
   "user defined constant buffer slots cannot be target specific">;
   "user defined constant buffer slots cannot be target specific">;
+def err_hlsl_unsupported_payload_access_qualifier : Error<
+  "payload access qualifiers are only allowed for member variables of a payload structure">;
+def err_hlsl_unsupported_payload_access_qualifier_struct : Error<
+  "payload access qualifiers are not supported on struct types.">;
+def err_hlsl_payload_access_qualifier_unsupported_shader : Error<
+  "payload access qualifiers are only defined for raytracing shader stages closesthit, miss, anyhit and for special keyword: caller. '%0' is not supported">;
+def err_hlsl_payload_access_qualifier_invalid_combination : Error<
+  "field %0 is qualified '%1' for shader stage '%2' but has no valid %3">;
+def err_hlsl_payload_access_qualifier_multiple_defined : Error<
+  "payload access qualifier '%0' has already been defined">;
+def warn_hlsl_payload_access_data_loss : Warning<
+  "potential loss of data for payload field '%0'. Field is qualified 'write' in earlier stages and 'write' only for stage '%1' but never unconditionally written.">, InGroup<HLSLPayloadAccessQualiferShader>;
+def warn_hlsl_payload_access_undef_read : Warning<
+  "reading undefined value ('%0' is not qualified 'read' for shader stage '%1')">, InGroup<HLSLPayloadAccessQualiferShader>;
+def warn_hlsl_payload_access_write_loss : Warning<
+  "write will be dropped ('%0' is not qualified 'write' for shader stage '%1')">, InGroup<HLSLPayloadAccessQualiferShader>;
+def warn_hlsl_payload_access_no_write_for_trace_payload : Warning<
+  "field '%0' is 'write' for 'caller' stage but field is never written for TraceRay call">, InGroup<HLSLPayloadAccessQualiferTrace>;
+def warn_hlsl_payload_access_write_but_no_write_for_trace_payload : Warning<
+  "value will be undefined inside TraceRay ('%0' is not qualified 'write' for 'caller')">, InGroup<HLSLPayloadAccessQualiferTrace>;
+def warn_hlsl_payload_access_read_of_undef_after_trace : Warning<
+  "reading undefined value ('%0' is returned from TraceRay but not qualified 'read' for 'caller')">, InGroup<HLSLPayloadAccessQualiferTrace>;
+def warn_hlsl_payload_access_read_but_no_read_after_trace : Warning<
+  "'%0' is qualified 'read' for 'caller' but the field is never read after TraceCall (possible performance issue)">, InGroup<HLSLPayloadAccessQualiferPerf>;
+def warn_qualified_payload_passed_to_extern_function : Warning<
+  "passing a qualified payload to an extern function can cause undefined behavior if payload qualifiers mismatch">, InGroup<HLSLPayloadAccessQualiferCall>;
+def err_not_all_payload_fields_qualified : Error<
+  "payload type '%0' requires that all fields carry payload access qualifiers.">;
+def err_payload_requires_attribute : Error<
+  "type '%0' used as payload requires that it is annotated with the [raypayload] attribute">;
+def err_payload_fields_not_qualified : Error<
+  "payload field '%0' has no payload access qualifiers.">;
+def err_payload_fields_is_payload_and_overqualified : Error<
+  "payload field '%0' is a payload struct. Payload access qualifiers are not allowed on payload types.">;
+def warn_hlsl_payload_qualifer_dropped : Warning<
+  "payload access qualifieres are only supported for target lib_6_6 and beyond. You can opt-in for lib_6_6 with the -enable-payload-qualifiers flag. Qualifiers will be dropped.">, InGroup<HLSLPayloadAccessQualifer>;
 def err_hlsl_unsupported_builtin_op: Error<
 def err_hlsl_unsupported_builtin_op: Error<
   "operator cannot be used with built-in type %0">;
   "operator cannot be used with built-in type %0">;
 def err_hlsl_unsupported_char_literal : Error<
 def err_hlsl_unsupported_char_literal : Error<

+ 1 - 0
tools/clang/include/clang/Basic/LangOptions.h

@@ -159,6 +159,7 @@ public:
   bool EnableDX9CompatMode;
   bool EnableDX9CompatMode;
   bool EnableFXCCompatMode;
   bool EnableFXCCompatMode;
   bool EnableTemplates;
   bool EnableTemplates;
+  bool EnablePayloadAccessQualifiers;
   // HLSL Change Ends
   // HLSL Change Ends
 
 
   bool SPIRV = false;  // SPIRV Change
   bool SPIRV = false;  // SPIRV Change

+ 2 - 0
tools/clang/include/clang/Frontend/CodeGenOptions.h

@@ -234,6 +234,8 @@ public:
   bool HLSLEnableLifetimeMarkers = false;
   bool HLSLEnableLifetimeMarkers = false;
   /// Put shader sources and options in the module
   /// Put shader sources and options in the module
   bool HLSLEmbedSourcesInModule = false;
   bool HLSLEmbedSourcesInModule = false;
+  /// Enable generation of payload access qualifier metadata. 
+  bool HLSLEnablePayloadAccessQualifiers = false;
   // HLSL Change Ends
   // HLSL Change Ends
 
 
   // SPIRV Change Starts
   // SPIRV Change Starts

+ 7 - 0
tools/clang/include/clang/SPIRV/AstTypeProbe.h

@@ -86,6 +86,13 @@ bool isMx1Matrix(QualType type, QualType *elemType = nullptr,
 bool isMxNMatrix(QualType type, QualType *elemType = nullptr,
 bool isMxNMatrix(QualType type, QualType *elemType = nullptr,
                  uint32_t *rowCount = nullptr, uint32_t *colCount = nullptr);
                  uint32_t *rowCount = nullptr, uint32_t *colCount = nullptr);
 
 
+/// Returns true if the given type will be translated into a SPIR-V array type.
+///
+/// Writes the element type and count into *elementType and *count respectively
+/// if they are not nullptr.
+bool isArrayType(QualType type, QualType *elemType = nullptr,
+                 uint32_t *elemCount = nullptr);
+
 /// \brief Returns true if the given type is a ConstantBuffer or an array of
 /// \brief Returns true if the given type is a ConstantBuffer or an array of
 /// ConstantBuffers.
 /// ConstantBuffers.
 bool isConstantBuffer(QualType);
 bool isConstantBuffer(QualType);

+ 1 - 0
tools/clang/include/clang/SPIRV/FeatureManager.h

@@ -31,6 +31,7 @@ enum class Extension {
   KHR = 0,
   KHR = 0,
   KHR_16bit_storage,
   KHR_16bit_storage,
   KHR_device_group,
   KHR_device_group,
+  KHR_fragment_shading_rate,
   KHR_non_semantic_info,
   KHR_non_semantic_info,
   KHR_multiview,
   KHR_multiview,
   KHR_shader_draw_parameters,
   KHR_shader_draw_parameters,

+ 10 - 0
tools/clang/include/clang/Sema/SemaHLSL.h

@@ -86,6 +86,16 @@ void DiagnoseUnusualAnnotationsForHLSL(
   clang::Sema& S,
   clang::Sema& S,
   std::vector<hlsl::UnusualAnnotation *>& annotations);
   std::vector<hlsl::UnusualAnnotation *>& annotations);
 
 
+void DiagnosePayloadAccessQualifierAnnotations(
+  clang::Sema &S,
+  clang::Declarator& D,
+  const clang::QualType& T,
+  const std::vector<hlsl::UnusualAnnotation *> &annotations);
+
+void DiagnoseRaytracingPayloadAccess(
+  clang::Sema &S,
+  clang::TranslationUnitDecl* TU);
+
 /// <summary>Finds the best viable function on this overload set, if it exists.</summary>
 /// <summary>Finds the best viable function on this overload set, if it exists.</summary>
 clang::OverloadingResult GetBestViableFunction(
 clang::OverloadingResult GetBestViableFunction(
   clang::Sema &S,
   clang::Sema &S,

+ 3 - 0
tools/clang/lib/AST/ASTContextHLSL.cpp

@@ -1169,6 +1169,9 @@ UnusualAnnotation* hlsl::UnusualAnnotation::CopyToASTContext(ASTContext& Context
     break;
     break;
   case UA_ConstantPacking:
   case UA_ConstantPacking:
     instanceSize = sizeof(hlsl::ConstantPacking);
     instanceSize = sizeof(hlsl::ConstantPacking);
+    break;  
+  case UA_PayloadAccessQualifier:
+    instanceSize = sizeof(hlsl::PayloadAccessAnnotation);
     break;
     break;
   default:
   default:
     DXASSERT(Kind == UA_SemanticDecl, "Kind == UA_SemanticDecl -- otherwise switch is incomplete");
     DXASSERT(Kind == UA_SemanticDecl, "Kind == UA_SemanticDecl -- otherwise switch is incomplete");

+ 20 - 0
tools/clang/lib/AST/ASTDumper.cpp

@@ -994,6 +994,8 @@ void ASTDumper::dumpHLSLUnusualAnnotations(const ArrayRef<hlsl::UnusualAnnotatio
             OS << "RegisterAssignment"; break;
             OS << "RegisterAssignment"; break;
           case hlsl::UnusualAnnotation::UA_SemanticDecl:
           case hlsl::UnusualAnnotation::UA_SemanticDecl:
             OS << "SemanticDecl"; break;
             OS << "SemanticDecl"; break;
+          case hlsl::UnusualAnnotation::UA_PayloadAccessQualifier:
+            OS << "PayloadAccessQualifier"; break;
         }
         }
       }
       }
       dumpPointer(It);
       dumpPointer(It);
@@ -1043,7 +1045,25 @@ void ASTDumper::dumpHLSLUnusualAnnotations(const ArrayRef<hlsl::UnusualAnnotatio
           const hlsl::SemanticDecl* semanticDecl = cast<hlsl::SemanticDecl>(*It);
           const hlsl::SemanticDecl* semanticDecl = cast<hlsl::SemanticDecl>(*It);
           OS << " \"" << semanticDecl->SemanticName << "\"";
           OS << " \"" << semanticDecl->SemanticName << "\"";
           break;
           break;
+        }      
+      case hlsl::UnusualAnnotation::UA_PayloadAccessQualifier: {
+        const hlsl::PayloadAccessAnnotation *annotation =
+            cast<hlsl::PayloadAccessAnnotation>(*It);
+        OS << " "
+           << (annotation->qualifier == hlsl::DXIL::PayloadAccessQualifier::Read
+                   ? "read"
+                   : "write")
+           << "(";
+        StringRef shaderStageNames[] = {"caller", "closesthit", "miss", "anyhit"};
+        for (unsigned i = 0; i < annotation->ShaderStages.size(); ++i) {
+          OS << shaderStageNames[static_cast<unsigned>(
+              annotation->ShaderStages[i])];
+          if (i < annotation->ShaderStages.size() - 1)
+            OS << ", ";
         }
         }
+        OS << ")";
+        break;
+      }
       }
       }
     });
     });
   }
   }

+ 17 - 0
tools/clang/lib/AST/DeclPrinter.cpp

@@ -1495,6 +1495,23 @@ void DeclPrinter::VisitHLSLUnusualAnnotation(const hlsl::UnusualAnnotation *UA)
     Out << ")";
     Out << ")";
     break;
     break;
   }
   }
+  case hlsl::UnusualAnnotation::UA_PayloadAccessQualifier: {
+    const hlsl::PayloadAccessAnnotation *annotation =
+        cast<hlsl::PayloadAccessAnnotation>(UA);
+    Out << " : "
+        << (annotation->qualifier == hlsl::DXIL::PayloadAccessQualifier::Read
+                ? "read"
+                : "write")
+        << "(";
+    StringRef shaderStageNames[] = { "caller", "closesthit", "miss", "anyhit"};
+    for (unsigned i = 0; i < annotation->ShaderStages.size(); ++i) {
+      Out << shaderStageNames[static_cast<unsigned>(annotation->ShaderStages[i])];
+      if (i < annotation->ShaderStages.size() - 1)
+        Out << ", ";
+    }
+    Out << ")";
+    break;
+  }
   }
   }
 }
 }
 
 

+ 155 - 34
tools/clang/lib/CodeGen/CGExprConstant.cpp

@@ -636,6 +636,72 @@ public:
     return Visit(E->getInitializer());
     return Visit(E->getInitializer());
   }
   }
 
 
+  // HLSL changes begin
+  static void ExtractConstantValueElems(llvm::Constant *constVec, llvm::SmallVector<llvm::Constant*, 4> &Elems, unsigned vecSize) {
+    if (llvm::ConstantDataVector *CDV = dyn_cast<llvm::ConstantDataVector>(constVec)) {
+      for (unsigned c = 0; c < vecSize; c++) {
+        Elems[c] = CDV->getElementAsConstant(c);
+      }
+    }
+    else if (llvm::ConstantVector *CV = dyn_cast<llvm::ConstantVector>(constVec)) {
+      for (unsigned c = 0; c < vecSize; c++) {
+        Elems[c] = CV->getOperand(c);
+      }
+    }
+    else {
+      llvm::ConstantAggregateZero *CAZ = cast<llvm::ConstantAggregateZero>(constVec);
+      for (unsigned c = 0; c < vecSize; c++) {
+        Elems[c] = CAZ->getElementValue(c);
+      }
+    }
+  }
+
+  static llvm::Constant* ConvertToMatchDestType (const clang::Type *srcTy, const clang::Type *destTy,
+    llvm::Type *srcLLVMTy, llvm::Type *destLLVMTy, llvm::Constant *C, CodeGenModule &CGM) {
+
+    assert(srcTy->isFloatingType() || srcTy->isIntegerType());
+    assert(destTy->isFloatingType() || destTy->isIntegerType());
+
+    // Special handling for cast to boolean type
+    if (destLLVMTy->isIntegerTy() && destLLVMTy->getScalarSizeInBits() == 1) {
+      return C->isZeroValue() ? llvm::ConstantInt::get(destLLVMTy, 0)
+        : llvm::ConstantInt::get(destLLVMTy, 1);
+    }
+
+    llvm::Instruction::CastOps castOp = llvm::Instruction::CastOpsEnd;
+
+    if (srcLLVMTy->isFloatingPointTy() && destLLVMTy->isFloatingPointTy()) {
+      if (srcLLVMTy->getScalarSizeInBits() > destLLVMTy->getScalarSizeInBits()) {
+        castOp = llvm::Instruction::FPTrunc;
+      }
+      else {
+        castOp = llvm::Instruction::FPExt;
+      }
+    }
+    else if (srcLLVMTy->isFloatingPointTy() && destLLVMTy->isIntegerTy()) {
+      castOp = destTy->isSignedIntegerType() ? llvm::Instruction::FPToSI : llvm::Instruction::FPToUI;
+    }
+    else if (srcLLVMTy->isIntegerTy() && destLLVMTy->isFloatingPointTy()) {
+      castOp = srcTy->isSignedIntegerType() ? llvm::Instruction::SIToFP : llvm::Instruction::UIToFP;
+    }
+    else {
+      // Both src and dest should be of integer type here.
+      assert(srcLLVMTy->isIntegerTy() && destLLVMTy->isIntegerTy());
+
+      if (srcLLVMTy->getScalarSizeInBits() > destLLVMTy->getScalarSizeInBits()) {
+        castOp = llvm::Instruction::Trunc;
+      }
+      else {
+        castOp = srcTy->isSignedIntegerType() ? llvm::Instruction::SExt : llvm::Instruction::ZExt;
+      }
+    }
+
+    assert(castOp != llvm::Instruction::CastOpsEnd);
+    return llvm::ConstantExpr::getCast(castOp, C, destLLVMTy);
+  }
+
+  // HLSL changes end
+
   llvm::Constant *VisitCastExpr(CastExpr* E) {
   llvm::Constant *VisitCastExpr(CastExpr* E) {
     Expr *subExpr = E->getSubExpr();
     Expr *subExpr = E->getSubExpr();
     llvm::Constant *C = CGM.EmitConstantExpr(subExpr, subExpr->getType(), CGF);
     llvm::Constant *C = CGM.EmitConstantExpr(subExpr, subExpr->getType(), CGF);
@@ -748,10 +814,68 @@ public:
     case CK_HLSLCC_IntegralToBoolean:
     case CK_HLSLCC_IntegralToBoolean:
     case CK_HLSLCC_IntegralToFloating:
     case CK_HLSLCC_IntegralToFloating:
     case CK_HLSLCC_FloatingToIntegral:
     case CK_HLSLCC_FloatingToIntegral:
-    case CK_HLSLCC_FloatingToBoolean:
-      // Since these cast kinds have already been handled in ExprConstant.cpp,
-      // we can reuse the logic there.
-      return CGM.EmitConstantExpr(E, E->getType(), CGF);
+    case CK_HLSLCC_FloatingToBoolean: {
+      bool isMatrixCast = hlsl::IsHLSLMatType(E->getType()) && hlsl::IsHLSLMatType(E->getSubExpr()->getType());
+      if (!isMatrixCast) {
+        // Since these cast kinds have already been handled in ExprConstant.cpp,
+        // we can reuse the logic there.
+        return CGM.EmitConstantExpr(E, E->getType(), CGF);
+      }
+      else {
+        // For cast involving matrix type, if the subexperssion has already
+        // been successfully evaluated to a constant, then just cast it to
+        // match the destination type.
+        llvm::Constant *SubExprResult = C;
+
+        const clang::Type * srcEltType = hlsl::GetHLSLMatElementType(E->getSubExpr()->getType()).getCanonicalType().getTypePtr();
+        const clang::Type * destEltType = hlsl::GetHLSLMatElementType(E->getType()).getCanonicalType().getTypePtr();
+
+        // If the dest type is same as the src type, then trivially
+        // return the result of the subexpression evaluation.
+        llvm::Type *srcEltLLVMTy = CGM.getTypes().ConvertType(srcEltType->getCanonicalTypeInternal());
+        llvm::Type *destEltLLVMTy = CGM.getTypes().ConvertType(destEltType->getCanonicalTypeInternal());
+        // Use desugared llvm type for comparison as half and float could both mean float type
+        // when -enable-16bit-types flag is not used.
+        if (srcEltLLVMTy == destEltLLVMTy) {
+          return SubExprResult;
+        }
+
+        unsigned destRow, destCol;
+        hlsl::GetHLSLMatRowColCount(E->getType(), destRow, destCol);
+
+        unsigned srcRow, srcCol;
+        hlsl::GetHLSLMatRowColCount(E->getSubExpr()->getType(), srcRow, srcCol);
+
+        // Src and Dest matrices must have same order
+        assert(destRow == srcRow && destCol == srcCol);
+
+        if (llvm::ConstantStruct *srcVal = dyn_cast<llvm::ConstantStruct>(SubExprResult)) {
+          llvm::ConstantArray *srcMat = cast<llvm::ConstantArray>(srcVal->getOperand(0));
+          llvm::SmallVector<llvm::Constant*, 4> destRowElts;
+
+          for (unsigned r = 0; r < srcRow; r++) {
+            llvm::SmallVector<llvm::Constant*, 4> destColElts(srcCol);
+            llvm::Constant *srcColVal = srcMat->getOperand(r);
+            ExtractConstantValueElems(srcColVal, destColElts, srcCol);
+            for (unsigned i = 0; i < srcCol; i++) {
+              destColElts[i] = ConvertToMatchDestType(srcEltType, destEltType, srcEltLLVMTy, destEltLLVMTy, destColElts[i], CGM);
+            }
+            llvm::Constant *destCols = llvm::ConstantVector::get(destColElts);
+            destRowElts.emplace_back(destCols);
+          }
+
+          llvm::StructType *destValType = cast<llvm::StructType>(destType);
+          llvm::Constant *destMat = llvm::ConstantArray::get(
+            cast<llvm::ArrayType>(destValType->getElementType(0)), destRowElts);
+          llvm::Constant* destVal = llvm::ConstantStruct::get(destValType, destMat);
+          return destVal;
+        }
+        else if (llvm::ConstantAggregateZero *CAZ = dyn_cast<llvm::ConstantAggregateZero>(SubExprResult)) {
+          return llvm::Constant::getNullValue(destType);
+        }
+      }
+    }
+
     case CK_FlatConversion:
     case CK_FlatConversion:
       return nullptr;
       return nullptr;
     case CK_HLSLVectorSplat: {
     case CK_HLSLVectorSplat: {
@@ -773,54 +897,51 @@ public:
     case CK_HLSLVectorTruncationCast: {
     case CK_HLSLVectorTruncationCast: {
       unsigned vecSize = hlsl::GetHLSLVecSize(E->getType());
       unsigned vecSize = hlsl::GetHLSLVecSize(E->getType());
       SmallVector<llvm::Constant*, 4> Elts(vecSize);
       SmallVector<llvm::Constant*, 4> Elts(vecSize);
-      if (llvm::ConstantDataVector *CDV = dyn_cast<llvm::ConstantDataVector>(C)) {
-        for (unsigned i = 0; i < vecSize; i++)
-          Elts[i] = CDV->getElementAsConstant(i);
-      } else if (llvm::ConstantVector* CV = dyn_cast<llvm::ConstantVector>(C)) {
-        for (unsigned i = 0; i < vecSize; i++)
-          Elts[i] = CV->getOperand(i);
-      } else {
-        llvm::ConstantAggregateZero* CAZ = cast<llvm::ConstantAggregateZero>(C);
-        for (unsigned i = 0; i < vecSize; i++)
-          Elts[i] = CAZ->getElementValue(i);
-      }
+      ExtractConstantValueElems(C, Elts, vecSize);
       return llvm::ConstantVector::get(Elts);
       return llvm::ConstantVector::get(Elts);
     }
     }
     case CK_HLSLVectorToScalarCast: {
     case CK_HLSLVectorToScalarCast: {
-      if (llvm::ConstantDataVector* CDV = dyn_cast<llvm::ConstantDataVector>(C)) {
-        return CDV->getElementAsConstant(0);
+      SmallVector<llvm::Constant*, 4> Elts(1);
+      ExtractConstantValueElems(C, Elts, 1);
+      return Elts[0];
+    }
+    case CK_HLSLMatrixToScalarCast: {
+      unsigned rowCt, colCt;
+      hlsl::GetHLSLMatRowColCount(E->getType(), rowCt, colCt);
+      if (llvm::ConstantStruct *CS = dyn_cast<llvm::ConstantStruct>(C)) {
+        llvm::ConstantArray *CA = dyn_cast<llvm::ConstantArray>(CS->getOperand(0));
+        SmallVector<llvm::Constant*, 4> Elts(colCt);
+        ExtractConstantValueElems(CA->getOperand(0), Elts, colCt);
+        return Elts[0];
       }
       }
-      else if (llvm::ConstantVector* CV = dyn_cast<llvm::ConstantVector>(C)) {
-        return CV->getOperand(0);
-      } else {
-        llvm::ConstantAggregateZero* CAZ = cast<llvm::ConstantAggregateZero>(C);
-        return CAZ->getElementValue((unsigned)0);
+      else if (llvm::ConstantAggregateZero *CAZ = dyn_cast<llvm::ConstantAggregateZero>(C)) {
+        llvm::Constant *destVal = llvm::Constant::getNullValue(destType);
+        return destVal;
       }
       }
     }
     }
     case CK_HLSLMatrixTruncationCast: {
     case CK_HLSLMatrixTruncationCast: {
-      llvm::StructType *ST =
-          cast<llvm::StructType>(CGM.getTypes().ConvertType(E->getType()));
-      unsigned rowCt,colCt;
-      hlsl::GetHLSLMatRowColCount(E->getType(), rowCt, colCt);
       if (llvm::ConstantStruct *CS = dyn_cast<llvm::ConstantStruct>(C)) {
       if (llvm::ConstantStruct *CS = dyn_cast<llvm::ConstantStruct>(C)) {
+        unsigned rowCt, colCt;
+        hlsl::GetHLSLMatRowColCount(E->getType(), rowCt, colCt);
         llvm::ConstantArray *CA = dyn_cast<llvm::ConstantArray>(CS->getOperand(0));
         llvm::ConstantArray *CA = dyn_cast<llvm::ConstantArray>(CS->getOperand(0));
         SmallVector<llvm::Constant *, 4> Rows(rowCt);
         SmallVector<llvm::Constant *, 4> Rows(rowCt);
         for (unsigned i = 0; i < rowCt; i++) {
         for (unsigned i = 0; i < rowCt; i++) {
           SmallVector<llvm::Constant*, 4> Elts(colCt);
           SmallVector<llvm::Constant*, 4> Elts(colCt);
-          if (llvm::ConstantDataVector *CDV = dyn_cast<llvm::ConstantDataVector>(CA->getOperand(i))) {
-            for (unsigned j = 0; j < colCt; j++)
-              Elts[j] = CDV->getElementAsConstant(j);
-          } else {
-            llvm::ConstantVector *CV = cast<llvm::ConstantVector>(CA->getOperand(i));
-            for (unsigned j = 0; j < colCt; j++)
-              Elts[j] = CV->getOperand(j);
-          }
+          ExtractConstantValueElems(CA->getOperand(i), Elts, colCt);
           Rows[i] = llvm::ConstantVector::get(Elts);
           Rows[i] = llvm::ConstantVector::get(Elts);
         }
         }
+
+        // Create truncated matrix
+        llvm::StructType *ST =
+          cast<llvm::StructType>(CGM.getTypes().ConvertType(E->getType()));
         llvm::Constant *Mat = llvm::ConstantArray::get(
         llvm::Constant *Mat = llvm::ConstantArray::get(
             cast<llvm::ArrayType>(ST->getElementType(0)), Rows);
             cast<llvm::ArrayType>(ST->getElementType(0)), Rows);
         return llvm::ConstantStruct::get(ST, Mat);
         return llvm::ConstantStruct::get(ST, Mat);
       }
       }
+      else if (llvm::ConstantAggregateZero *CAZ = dyn_cast<llvm::ConstantAggregateZero>(C)) {
+        llvm::Constant *destVal = llvm::Constant::getNullValue(destType);
+        return destVal;
+      }
     }
     }
     // HLSL Change Ends.
     // HLSL Change Ends.
     }
     }

+ 151 - 21
tools/clang/lib/CodeGen/CGHLSLMS.cpp

@@ -179,8 +179,8 @@ private:
                               QualType Type, QualType SrcType,
                               QualType Type, QualType SrcType,
                               llvm::Type *Ty);
                               llvm::Type *Ty);
 
 
-  void EmitHLSLRootSignature(CodeGenFunction &CGF, HLSLRootSignatureAttr *RSA,
-                             llvm::Function *Fn) override;
+  void EmitHLSLRootSignature(HLSLRootSignatureAttr *RSA,
+                             Function *Fn, DxilFunctionProps &props);
 
 
   void CheckParameterAnnotation(SourceLocation SLoc,
   void CheckParameterAnnotation(SourceLocation SLoc,
                                 const DxilParameterAnnotation &paramInfo,
                                 const DxilParameterAnnotation &paramInfo,
@@ -204,6 +204,7 @@ private:
 
 
   // Type annotation related.
   // Type annotation related.
   unsigned ConstructStructAnnotation(DxilStructAnnotation *annotation,
   unsigned ConstructStructAnnotation(DxilStructAnnotation *annotation,
+                                     DxilPayloadAnnotation* payloadAnnotation,
                                      const RecordDecl *RD,
                                      const RecordDecl *RD,
                                      DxilTypeSystem &dxilTypeSys);
                                      DxilTypeSystem &dxilTypeSys);
   unsigned AddTypeAnnotation(QualType Ty, DxilTypeSystem &dxilTypeSys,
   unsigned AddTypeAnnotation(QualType Ty, DxilTypeSystem &dxilTypeSys,
@@ -920,6 +921,7 @@ static unsigned AlignBaseOffset(QualType Ty, unsigned baseOffset,
 }
 }
 
 
 unsigned CGMSHLSLRuntime::ConstructStructAnnotation(DxilStructAnnotation *annotation,
 unsigned CGMSHLSLRuntime::ConstructStructAnnotation(DxilStructAnnotation *annotation,
+                                      DxilPayloadAnnotation* payloadAnnotation,
                                       const RecordDecl *RD,
                                       const RecordDecl *RD,
                                       DxilTypeSystem &dxilTypeSys) {
                                       DxilTypeSystem &dxilTypeSys) {
   unsigned fieldIdx = 0;
   unsigned fieldIdx = 0;
@@ -992,6 +994,9 @@ unsigned CGMSHLSLRuntime::ConstructStructAnnotation(DxilStructAnnotation *annota
 
 
     unsigned CBufferOffset = offset;
     unsigned CBufferOffset = offset;
 
 
+    DxilFieldAnnotation &fieldAnnotation = annotation->GetFieldAnnotation(fieldIdx++);
+    ConstructFieldAttributedAnnotation(fieldAnnotation, fieldTy, bDefaultRowMajor);
+
     // Try to get info from fieldDecl.
     // Try to get info from fieldDecl.
     for (const hlsl::UnusualAnnotation *it :
     for (const hlsl::UnusualAnnotation *it :
          fieldDecl->getUnusualAnnotations()) {
          fieldDecl->getUnusualAnnotations()) {
@@ -1016,6 +1021,21 @@ unsigned CGMSHLSLRuntime::ConstructStructAnnotation(DxilStructAnnotation *annota
         Diags.Report(it->Loc, DiagID);
         Diags.Report(it->Loc, DiagID);
         return 0;
         return 0;
       } break;
       } break;
+      case hlsl::UnusualAnnotation::UA_PayloadAccessQualifier: {
+        // Forward payload access qualifiers to fieldAnnotation. 
+        if (payloadAnnotation) {
+          const hlsl::PayloadAccessAnnotation *annotation =
+              cast<hlsl::PayloadAccessAnnotation>(it);
+          DxilPayloadFieldAnnotation &payloadFieldAnnotation =
+              payloadAnnotation->GetFieldAnnotation(fieldIdx - 1);
+          payloadFieldAnnotation.SetCompType(
+              fieldAnnotation.GetCompType().GetKind());
+          for (auto stage : annotation->ShaderStages) {
+            payloadFieldAnnotation.AddPayloadFieldQualifier(
+                stage, annotation->qualifier);
+          }
+        }
+      } break;
       default:
       default:
         llvm_unreachable("only semantic for input/output");
         llvm_unreachable("only semantic for input/output");
         break;
         break;
@@ -1029,9 +1049,6 @@ unsigned CGMSHLSLRuntime::ConstructStructAnnotation(DxilStructAnnotation *annota
     // Update offset.
     // Update offset.
     offset += size;
     offset += size;
     
     
-    DxilFieldAnnotation &fieldAnnotation = annotation->GetFieldAnnotation(fieldIdx++);
-
-    ConstructFieldAttributedAnnotation(fieldAnnotation, fieldTy, bDefaultRowMajor);
     ConstructFieldInterpolation(fieldAnnotation, fieldDecl);
     ConstructFieldInterpolation(fieldAnnotation, fieldDecl);
     if (fieldDecl->hasAttr<HLSLPreciseAttr>())
     if (fieldDecl->hasAttr<HLSLPreciseAttr>())
       fieldAnnotation.SetPrecise();
       fieldAnnotation.SetPrecise();
@@ -1068,6 +1085,65 @@ static unsigned GetNumTemplateArgsForRecordDecl(const RecordDecl *RD) {
   return 0;
   return 0;
 }
 }
 
 
+static bool ValidatePayloadDecl(const RecordDecl *Decl,
+                                const ShaderModel &Model,
+                                DiagnosticsEngine &Diag,
+                                const CodeGenOptions &Options) {
+  // Already checked in Sema, this is not a payload.
+  if (!Decl->hasAttr<HLSLRayPayloadAttr>())
+    return false;
+
+  // If we have a payload warn about them beeing dropped.
+  if (!Options.HLSLEnablePayloadAccessQualifiers) {
+    Diag.ReportOnce(Decl->getLocation(), diag::warn_hlsl_payload_qualifer_dropped);
+    return false;
+  }
+
+  // Check if all fileds have a payload qualifier.
+  bool allFieldsQualifed = true;
+  for (FieldDecl *field : Decl->fields()) {
+    bool fieldHasPayloadQualifier = false;
+    bool isPayloadStruct = false;
+    for (UnusualAnnotation *annotation : field->getUnusualAnnotations()) {
+      fieldHasPayloadQualifier |= isa<hlsl::PayloadAccessAnnotation>(annotation);
+    }
+    // Check if this is a struct type. 
+    // If it is, check for the [payload] field, [payload] structs must carry
+    // PayloadAccessQualifiers and these are taken from the struct directly. 
+    // If it is not a payload struct, check if it has qualifiers attached.
+    if (RecordDecl *recordTy = field->getType()->getAsCXXRecordDecl()) {
+      if (recordTy->hasAttr<HLSLRayPayloadAttr>())
+        isPayloadStruct = true;
+    }
+
+    if (fieldHasPayloadQualifier && isPayloadStruct) {
+      Diag.Report(field->getLocation(),
+                  diag::err_payload_fields_is_payload_and_overqualified)
+          << field->getName();
+      continue;
+    }
+    else 
+    {
+        if (isPayloadStruct)
+            fieldHasPayloadQualifier = true;
+    }
+
+    if (!fieldHasPayloadQualifier) {
+      Diag.Report(field->getLocation(),
+                  diag::err_payload_fields_not_qualified)
+          << field->getName();
+    }
+    allFieldsQualifed &= fieldHasPayloadQualifier;
+  }
+  if (!allFieldsQualifed) {
+    Diag.Report(Decl->getLocation(), diag::err_not_all_payload_fields_qualified)
+        << Decl->getName();
+    return false;
+  }
+ 
+  return true;
+}
+
 // Return the size for constant buffer of each decl.
 // Return the size for constant buffer of each decl.
 unsigned CGMSHLSLRuntime::AddTypeAnnotation(QualType Ty,
 unsigned CGMSHLSLRuntime::AddTypeAnnotation(QualType Ty,
                                             DxilTypeSystem &dxilTypeSys,
                                             DxilTypeSystem &dxilTypeSys,
@@ -1108,8 +1184,10 @@ unsigned CGMSHLSLRuntime::AddTypeAnnotation(QualType Ty,
     }
     }
     DxilStructAnnotation *annotation = dxilTypeSys.AddStructAnnotation(ST,
     DxilStructAnnotation *annotation = dxilTypeSys.AddStructAnnotation(ST,
       GetNumTemplateArgsForRecordDecl(RT->getDecl()));
       GetNumTemplateArgsForRecordDecl(RT->getDecl()));
-
-    return ConstructStructAnnotation(annotation, RD, dxilTypeSys);
+    DxilPayloadAnnotation *payloadAnnotation = nullptr;
+    if (ValidatePayloadDecl(RT->getDecl(), *m_pHLModule->GetShaderModel(), CGM.getDiags(), CGM.getCodeGenOpts()))
+      payloadAnnotation = dxilTypeSys.AddPayloadAnnotation(ST);
+    return ConstructStructAnnotation(annotation, payloadAnnotation, RD, dxilTypeSys);
   } else if (const RecordType *RT = dyn_cast<RecordType>(paramTy)) {
   } else if (const RecordType *RT = dyn_cast<RecordType>(paramTy)) {
     // For this pointer.
     // For this pointer.
     RecordDecl *RD = RT->getDecl();
     RecordDecl *RD = RT->getDecl();
@@ -1121,8 +1199,10 @@ unsigned CGMSHLSLRuntime::AddTypeAnnotation(QualType Ty,
     }
     }
     DxilStructAnnotation *annotation = dxilTypeSys.AddStructAnnotation(ST,
     DxilStructAnnotation *annotation = dxilTypeSys.AddStructAnnotation(ST,
       GetNumTemplateArgsForRecordDecl(RT->getDecl()));
       GetNumTemplateArgsForRecordDecl(RT->getDecl()));
-
-    return ConstructStructAnnotation(annotation, RD, dxilTypeSys);
+    DxilPayloadAnnotation* payloadAnnotation = nullptr;
+    if (ValidatePayloadDecl(RT->getDecl(), *m_pHLModule->GetShaderModel(), CGM.getDiags(), CGM.getCodeGenOpts()))
+         payloadAnnotation = dxilTypeSys.AddPayloadAnnotation(ST);
+    return ConstructStructAnnotation(annotation, payloadAnnotation, RD, dxilTypeSys);
   } else if (IsHLSLResourceType(Ty)) {
   } else if (IsHLSLResourceType(Ty)) {
     // Save result type info.
     // Save result type info.
     AddTypeAnnotation(GetHLSLResourceResultType(Ty), dxilTypeSys, arrayEltSize);
     AddTypeAnnotation(GetHLSLResourceResultType(Ty), dxilTypeSys, arrayEltSize);
@@ -1731,6 +1811,7 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
   bool hasOutVertices = false;
   bool hasOutVertices = false;
   bool hasOutPrimitives = false;
   bool hasOutPrimitives = false;
   bool hasInPayload = false;
   bool hasInPayload = false;
+  bool rayShaderHaveErrors = false;
   for (; ArgNo < F->arg_size(); ++ArgNo, ++ParmIdx, ++ArgIt) {
   for (; ArgNo < F->arg_size(); ++ArgNo, ++ParmIdx, ++ArgIt) {
     DxilParameterAnnotation &paramAnnotation =
     DxilParameterAnnotation &paramAnnotation =
         FuncAnnotation->GetParameterAnnotation(ArgNo);
         FuncAnnotation->GetParameterAnnotation(ArgNo);
@@ -2071,27 +2152,31 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
           DiagnosticsEngine::Error, "parameters are not allowed for %0 shader"))
           DiagnosticsEngine::Error, "parameters are not allowed for %0 shader"))
             << (funcProps->shaderKind == DXIL::ShaderKind::RayGeneration ?
             << (funcProps->shaderKind == DXIL::ShaderKind::RayGeneration ?
                 "raygeneration" : "intersection");
                 "raygeneration" : "intersection");
-        break;
+        rayShaderHaveErrors = true;
       case DXIL::ShaderKind::AnyHit:
       case DXIL::ShaderKind::AnyHit:
       case DXIL::ShaderKind::ClosestHit:
       case DXIL::ShaderKind::ClosestHit:
         if (0 == ArgNo && dxilInputQ != DxilParamInputQual::Inout) {
         if (0 == ArgNo && dxilInputQ != DxilParamInputQual::Inout) {
           Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
           Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
             DiagnosticsEngine::Error,
             DiagnosticsEngine::Error,
             "ray payload parameter must be inout"));
             "ray payload parameter must be inout"));
+          rayShaderHaveErrors = true;
         } else if (1 == ArgNo && dxilInputQ != DxilParamInputQual::In) {
         } else if (1 == ArgNo && dxilInputQ != DxilParamInputQual::In) {
           Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
           Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
             DiagnosticsEngine::Error,
             DiagnosticsEngine::Error,
             "intersection attributes parameter must be in"));
             "intersection attributes parameter must be in"));
+          rayShaderHaveErrors = true;
         } else if (ArgNo > 1) {
         } else if (ArgNo > 1) {
           Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
           Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
             DiagnosticsEngine::Error,
             DiagnosticsEngine::Error,
             "too many parameters, expected payload and attributes parameters only."));
             "too many parameters, expected payload and attributes parameters only."));
+          rayShaderHaveErrors = true;
         }
         }
         if (ArgNo < 2) {
         if (ArgNo < 2) {
           if (!IsHLSLNumericUserDefinedType(parmDecl->getType())) {
           if (!IsHLSLNumericUserDefinedType(parmDecl->getType())) {
             Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
             Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
               DiagnosticsEngine::Error,
               DiagnosticsEngine::Error,
               "payload and attribute structures must be user defined types with only numeric contents."));
               "payload and attribute structures must be user defined types with only numeric contents."));
+            rayShaderHaveErrors = true;
           } else {
           } else {
             DataLayout DL(&this->TheModule);
             DataLayout DL(&this->TheModule);
             unsigned size = DL.getTypeAllocSize(F->getFunctionType()->getFunctionParamType(ArgNo)->getPointerElementType());
             unsigned size = DL.getTypeAllocSize(F->getFunctionType()->getFunctionParamType(ArgNo)->getPointerElementType());
@@ -2107,16 +2192,19 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
           Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
           Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
             DiagnosticsEngine::Error,
             DiagnosticsEngine::Error,
             "only one parameter (ray payload) allowed for miss shader"));
             "only one parameter (ray payload) allowed for miss shader"));
+          rayShaderHaveErrors = true;
         } else if (dxilInputQ != DxilParamInputQual::Inout) {
         } else if (dxilInputQ != DxilParamInputQual::Inout) {
           Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
           Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
             DiagnosticsEngine::Error,
             DiagnosticsEngine::Error,
             "ray payload parameter must be declared inout"));
             "ray payload parameter must be declared inout"));
+          rayShaderHaveErrors = true;
         }
         }
         if (ArgNo < 1) {
         if (ArgNo < 1) {
           if (!IsHLSLNumericUserDefinedType(parmDecl->getType())) {
           if (!IsHLSLNumericUserDefinedType(parmDecl->getType())) {
             Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
             Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
               DiagnosticsEngine::Error,
               DiagnosticsEngine::Error,
               "ray payload parameter must be a user defined type with only numeric contents."));
               "ray payload parameter must be a user defined type with only numeric contents."));
+            rayShaderHaveErrors = true;
           } else {
           } else {
             DataLayout DL(&this->TheModule);
             DataLayout DL(&this->TheModule);
             unsigned size = DL.getTypeAllocSize(F->getFunctionType()->getFunctionParamType(ArgNo)->getPointerElementType());
             unsigned size = DL.getTypeAllocSize(F->getFunctionType()->getFunctionParamType(ArgNo)->getPointerElementType());
@@ -2129,16 +2217,19 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
           Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
           Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
             DiagnosticsEngine::Error,
             DiagnosticsEngine::Error,
             "only one parameter allowed for callable shader"));
             "only one parameter allowed for callable shader"));
+          rayShaderHaveErrors = true;
         } else if (dxilInputQ != DxilParamInputQual::Inout) {
         } else if (dxilInputQ != DxilParamInputQual::Inout) {
           Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
           Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
             DiagnosticsEngine::Error,
             DiagnosticsEngine::Error,
             "callable parameter must be declared inout"));
             "callable parameter must be declared inout"));
+          rayShaderHaveErrors = true;
         }
         }
         if (ArgNo < 1) {
         if (ArgNo < 1) {
           if (!IsHLSLNumericUserDefinedType(parmDecl->getType())) {
           if (!IsHLSLNumericUserDefinedType(parmDecl->getType())) {
             Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
             Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(
               DiagnosticsEngine::Error,
               DiagnosticsEngine::Error,
               "callable parameter must be a user defined type with only numeric contents."));
               "callable parameter must be a user defined type with only numeric contents."));
+            rayShaderHaveErrors = true;
           } else {
           } else {
             DataLayout DL(&this->TheModule);
             DataLayout DL(&this->TheModule);
             unsigned size = DL.getTypeAllocSize(F->getFunctionType()->getFunctionParamType(ArgNo)->getPointerElementType());
             unsigned size = DL.getTypeAllocSize(F->getFunctionType()->getFunctionParamType(ArgNo)->getPointerElementType());
@@ -2188,6 +2279,7 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
           Diags.getCustomDiagID(DiagnosticsEngine::Error,
           Diags.getCustomDiagID(DiagnosticsEngine::Error,
             "shader must include inout parameter structure.");
             "shader must include inout parameter structure.");
         Diags.Report(FD->getLocation(), DiagID);
         Diags.Report(FD->getLocation(), DiagID);
+        rayShaderHaveErrors = true;
       }
       }
     }
     }
     if (bNeedsAttributes &&
     if (bNeedsAttributes &&
@@ -2195,9 +2287,17 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
       Diags.Report(FD->getLocation(), Diags.getCustomDiagID(
       Diags.Report(FD->getLocation(), Diags.getCustomDiagID(
         DiagnosticsEngine::Error,
         DiagnosticsEngine::Error,
         "shader must include attributes structure parameter."));
         "shader must include attributes structure parameter."));
+      rayShaderHaveErrors = true;
     }
     }
   }
   }
 
 
+  // If we encountered an error during verification of RayTracing 
+  // shader signatures, stop here. Otherwise we risk to trigger 
+  // unhandled behaviour, i.e., DXC crashes when the payload is 
+  // declared as matrix<float...> type.
+  if(rayShaderHaveErrors)
+      return;
+
   // Type annotation for parameters and return type.
   // Type annotation for parameters and return type.
   DxilTypeSystem &dxilTypeSys = m_pHLModule->GetTypeSystem();
   DxilTypeSystem &dxilTypeSys = m_pHLModule->GetTypeSystem();
   unsigned arrayEltSize = 0;
   unsigned arrayEltSize = 0;
@@ -2225,6 +2325,12 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
     }
     }
   }
   }
 
 
+  // Only parse root signature for entry function.
+  if (HLSLRootSignatureAttr *RSA = FD->getAttr<HLSLRootSignatureAttr>()) {
+    if (isExportedEntry || isEntry)
+      EmitHLSLRootSignature(RSA, F, *funcProps);
+  }
+
   // Only add functionProps when exist.
   // Only add functionProps when exist.
   if (isExportedEntry || isEntry)
   if (isExportedEntry || isEntry)
     m_pHLModule->AddDxilFunctionProps(F, funcProps);
     m_pHLModule->AddDxilFunctionProps(F, funcProps);
@@ -2715,6 +2821,10 @@ static void InitFromUnusualAnnotations(DxilResourceBase &Resource, NamedDecl &De
     case hlsl::UnusualAnnotation::UA_ConstantPacking:
     case hlsl::UnusualAnnotation::UA_ConstantPacking:
       // Should be handled by front-end
       // Should be handled by front-end
       llvm_unreachable("packoffset on resource");
       llvm_unreachable("packoffset on resource");
+      break;    
+    case hlsl::UnusualAnnotation::UA_PayloadAccessQualifier:
+      // Should be handled by front-end
+      llvm_unreachable("payload qualifier on resource");
       break;
       break;
     default:
     default:
       llvm_unreachable("unknown UnusualAnnotation on resource");
       llvm_unreachable("unknown UnusualAnnotation on resource");
@@ -3090,6 +3200,12 @@ bool CGMSHLSLRuntime::SetUAVSRV(SourceLocation loc,
 
 
     uint32_t strideInBytes = dataLayout.getTypeAllocSize(retTy);
     uint32_t strideInBytes = dataLayout.getTypeAllocSize(retTy);
     hlslRes->SetElementStride(strideInBytes);
     hlslRes->SetElementStride(strideInBytes);
+    if (kind == hlsl::DxilResource::Kind::StructuredBuffer) {
+      if (StructType* ST = dyn_cast<StructType>(retTy)) {
+        const StructLayout* SL = dataLayout.getStructLayout(ST);
+        hlslRes->SetBaseAlignLog2(Log2_32(SL->getAlignment()));
+      }
+    }
   }
   }
   if (HasHLSLGloballyCoherent(QualTy)) {
   if (HasHLSLGloballyCoherent(QualTy)) {
     hlslRes->SetGloballyCoherent(true);
     hlslRes->SetGloballyCoherent(true);
@@ -3273,6 +3389,9 @@ void CGMSHLSLRuntime::AddConstant(VarDecl *constDecl, HLCBuffer &CB) {
     }
     }
     case hlsl::UnusualAnnotation::UA_SemanticDecl:
     case hlsl::UnusualAnnotation::UA_SemanticDecl:
       // skip semantic on constant
       // skip semantic on constant
+      break;    
+    case hlsl::UnusualAnnotation::UA_PayloadAccessQualifier:
+      // skip payload qualifers on constant
       break;
       break;
     }
     }
   }
   }
@@ -5631,22 +5750,33 @@ void CGMSHLSLRuntime::EmitHLSLFlatConversion(CodeGenFunction &CGF,
   }
   }
 }
 }
 
 
-void CGMSHLSLRuntime::EmitHLSLRootSignature(CodeGenFunction &CGF,
-                                            HLSLRootSignatureAttr *RSA,
-                                            Function *Fn) {
-  // Only parse root signature for entry function.
-  if (Fn != Entry.Func)
-    return;
-
+void CGMSHLSLRuntime::EmitHLSLRootSignature(HLSLRootSignatureAttr *RSA,
+                                            Function *Fn,
+                                            DxilFunctionProps &props) {
   StringRef StrRef = RSA->getSignatureName();
   StringRef StrRef = RSA->getSignatureName();
-  DiagnosticsEngine &Diags = CGF.getContext().getDiagnostics();
+  DiagnosticsEngine &Diags = CGM.getDiags();
   SourceLocation SLoc = RSA->getLocation();
   SourceLocation SLoc = RSA->getLocation();
   RootSignatureHandle RootSigHandle;
   RootSignatureHandle RootSigHandle;
-  clang::CompileRootSignature(StrRef, Diags, SLoc, rootSigVer, DxilRootSignatureCompilationFlags::GlobalRootSignature, &RootSigHandle);
+  clang::CompileRootSignature(
+      StrRef, Diags, SLoc, rootSigVer,
+      DxilRootSignatureCompilationFlags::GlobalRootSignature, &RootSigHandle);
   if (!RootSigHandle.IsEmpty()) {
   if (!RootSigHandle.IsEmpty()) {
     RootSigHandle.EnsureSerializedAvailable();
     RootSigHandle.EnsureSerializedAvailable();
-    m_pHLModule->SetSerializedRootSignature(RootSigHandle.GetSerializedBytes(),
-                                            RootSigHandle.GetSerializedSize());
+    if (!m_bIsLib) {
+      m_pHLModule->SetSerializedRootSignature(
+          RootSigHandle.GetSerializedBytes(),
+          RootSigHandle.GetSerializedSize());
+    } else {
+      if (!props.IsRay()) {
+        props.SetSerializedRootSignature(RootSigHandle.GetSerializedBytes(),
+                                         RootSigHandle.GetSerializedSize());
+      } else {
+        unsigned DiagID = Diags.getCustomDiagID(
+            DiagnosticsEngine::Error, "root signature attribute not supported "
+                                      "for raytracing entry functions");
+        Diags.Report(RSA->getLocation(), DiagID);
+      }
+    }
   }
   }
 }
 }
 
 

+ 92 - 0
tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp

@@ -1460,6 +1460,10 @@ typedef APInt(__cdecl *IntBinaryEvalFuncType)(const APInt &, const APInt &);
 typedef float(__cdecl *FloatBinaryEvalFuncType)(float, float);
 typedef float(__cdecl *FloatBinaryEvalFuncType)(float, float);
 typedef double(__cdecl *DoubleBinaryEvalFuncType)(double, double);
 typedef double(__cdecl *DoubleBinaryEvalFuncType)(double, double);
 
 
+typedef APInt(__cdecl *IntTernaryEvalFuncType)(const APInt &, const APInt &, const APInt &);
+typedef float(__cdecl *FloatTernaryEvalFuncType)(float, float, float);
+typedef double(__cdecl *DoubleTernaryEvalFuncType)(double, double, double);
+
 Value *EvalUnaryIntrinsic(ConstantFP *fpV, FloatUnaryEvalFuncType floatEvalFunc,
 Value *EvalUnaryIntrinsic(ConstantFP *fpV, FloatUnaryEvalFuncType floatEvalFunc,
                           DoubleUnaryEvalFuncType doubleEvalFunc) {
                           DoubleUnaryEvalFuncType doubleEvalFunc) {
   llvm::Type *Ty = fpV->getType();
   llvm::Type *Ty = fpV->getType();
@@ -1510,6 +1514,45 @@ Value *EvalBinaryIntrinsic(Constant *cV0, Constant *cV1,
   return Result;
   return Result;
 }
 }
 
 
+Value *EvalTernaryIntrinsic(Constant *cV0, Constant *cV1, Constant *cV2,
+                             FloatTernaryEvalFuncType floatEvalFunc,
+                             DoubleTernaryEvalFuncType doubleEvalFunc,
+                             IntTernaryEvalFuncType intEvalFunc) {
+  llvm::Type *Ty = cV0->getType();
+  Value *Result = nullptr;
+  if (Ty->isDoubleTy()) {
+    ConstantFP *fpV0 = cast<ConstantFP>(cV0);
+    ConstantFP *fpV1 = cast<ConstantFP>(cV1);
+    ConstantFP *fpV2 = cast<ConstantFP>(cV2);
+    double dV0 = fpV0->getValueAPF().convertToDouble();
+    double dV1 = fpV1->getValueAPF().convertToDouble();
+    double dV2 = fpV2->getValueAPF().convertToDouble();
+    Value *dResult = ConstantFP::get(Ty, doubleEvalFunc(dV0, dV1, dV2));
+    Result = dResult;
+  } else if (Ty->isFloatTy()) {
+    ConstantFP *fpV0 = cast<ConstantFP>(cV0);
+    ConstantFP *fpV1 = cast<ConstantFP>(cV1);
+    ConstantFP *fpV2 = cast<ConstantFP>(cV2);
+    float fV0 = fpV0->getValueAPF().convertToFloat();
+    float fV1 = fpV1->getValueAPF().convertToFloat();
+    float fV2 = fpV2->getValueAPF().convertToFloat();
+    Value *dResult = ConstantFP::get(Ty, floatEvalFunc(fV0, fV1, fV2));
+    Result = dResult;
+  } else {
+    DXASSERT_NOMSG(Ty->isIntegerTy());
+    DXASSERT_NOMSG(intEvalFunc);
+    ConstantInt *ciV0 = cast<ConstantInt>(cV0);
+    ConstantInt *ciV1 = cast<ConstantInt>(cV1);
+    ConstantInt *ciV2 = cast<ConstantInt>(cV2);
+    const APInt &iV0 = ciV0->getValue();
+    const APInt &iV1 = ciV1->getValue();
+    const APInt &iV2 = ciV2->getValue();
+    Value *dResult = ConstantInt::get(Ty, intEvalFunc(iV0, iV1, iV2));
+    Result = dResult;
+  }
+  return Result;
+}
+
 Value *EvalUnaryIntrinsic(CallInst *CI, FloatUnaryEvalFuncType floatEvalFunc,
 Value *EvalUnaryIntrinsic(CallInst *CI, FloatUnaryEvalFuncType floatEvalFunc,
                           DoubleUnaryEvalFuncType doubleEvalFunc) {
                           DoubleUnaryEvalFuncType doubleEvalFunc) {
   Value *V = CI->getArgOperand(0);
   Value *V = CI->getArgOperand(0);
@@ -1566,6 +1609,43 @@ Value *EvalBinaryIntrinsic(CallInst *CI, FloatBinaryEvalFuncType floatEvalFunc,
   return Result;
   return Result;
 }
 }
 
 
+Value *EvalTernaryIntrinsic(CallInst *CI, FloatTernaryEvalFuncType floatEvalFunc,
+                             DoubleTernaryEvalFuncType doubleEvalFunc,
+                             IntTernaryEvalFuncType intEvalFunc = nullptr) {
+  Value *V0 = CI->getArgOperand(0);
+  Value *V1 = CI->getArgOperand(1);
+  Value *V2 = CI->getArgOperand(2);
+  llvm::Type *Ty = CI->getType();
+  Value *Result = nullptr;
+  if (llvm::VectorType *VT = dyn_cast<llvm::VectorType>(Ty)) {
+    Result = UndefValue::get(Ty);
+    Constant *CV0 = cast<Constant>(V0);
+    Constant *CV1 = cast<Constant>(V1);
+    Constant *CV2 = cast<Constant>(V2);
+    IRBuilder<> Builder(CI);
+    for (unsigned i = 0; i < VT->getNumElements(); i++) {
+      Constant *cV0 = cast<Constant>(CV0->getAggregateElement(i));
+      Constant *cV1 = cast<Constant>(CV1->getAggregateElement(i));
+      Constant *cV2 = cast<Constant>(CV2->getAggregateElement(i));
+      Value *EltResult = EvalTernaryIntrinsic(cV0, cV1, cV2, floatEvalFunc,
+                                             doubleEvalFunc, intEvalFunc);
+      Result = Builder.CreateInsertElement(Result, EltResult, i);
+    }
+  } else {
+    Constant *cV0 = cast<Constant>(V0);
+    Constant *cV1 = cast<Constant>(V1);
+    Constant *cV2 = cast<Constant>(V2);
+    Result = EvalTernaryIntrinsic(cV0, cV1, cV2, floatEvalFunc, doubleEvalFunc,
+                                 intEvalFunc);
+  }
+  CI->replaceAllUsesWith(Result);
+  CI->eraseFromParent();
+  return Result;
+
+  CI->eraseFromParent();
+  return Result;
+}
+
 void SimpleTransformForHLDXIRInst(Instruction *I, SmallInstSet &deadInsts) {
 void SimpleTransformForHLDXIRInst(Instruction *I, SmallInstSet &deadInsts) {
 
 
   unsigned opcode = I->getOpcode();
   unsigned opcode = I->getOpcode();
@@ -1789,6 +1869,18 @@ Value *TryEvalIntrinsic(CallInst *CI, IntrinsicOp intriOp,
     CI->eraseFromParent();
     CI->eraseFromParent();
     return cNan;
     return cNan;
   } break;
   } break;
+  case IntrinsicOp::IOP_clamp: {
+    auto clampF = [](float a, float b, float c) {
+      return a < b ? b : a > c ? c : a;
+    };
+    auto clampD = [](double a, double b, double c) {
+      return a < b ? b : a > c ? c : a;
+    };
+    auto clampI = [](const APInt &a, const APInt &b, const APInt &c) -> APInt {
+      return a.slt(b) ? b : a.sgt(c) ? c : a;
+    };
+    return EvalTernaryIntrinsic(CI, clampF, clampD, clampI);
+  } break;
   default:
   default:
     return nullptr;
     return nullptr;
   }
   }

+ 0 - 3
tools/clang/lib/CodeGen/CGHLSLRuntime.h

@@ -120,9 +120,6 @@ public:
                                    clang::QualType SrcTy,
                                    clang::QualType SrcTy,
                                    llvm::Value *DestPtr,
                                    llvm::Value *DestPtr,
                                    clang::QualType DestTy) = 0;
                                    clang::QualType DestTy) = 0;
-  virtual void EmitHLSLRootSignature(CodeGenFunction &CGF,
-                                     clang::HLSLRootSignatureAttr *RSA,
-                                     llvm::Function *Fn) = 0;
   virtual llvm::Value *EmitHLSLLiteralCast(CodeGenFunction &CGF, llvm::Value *Src, clang::QualType SrcType,
   virtual llvm::Value *EmitHLSLLiteralCast(CodeGenFunction &CGF, llvm::Value *Src, clang::QualType SrcType,
                                                clang::QualType DstType) = 0;
                                                clang::QualType DstType) = 0;
 
 

+ 1 - 3
tools/clang/lib/CodeGen/CodeGenAction.cpp

@@ -556,10 +556,8 @@ BackendConsumer::DxilDiagHandler(const llvm::DiagnosticInfoDxil &D) {
   }
   }
   FullSourceLoc Loc(DILoc, SourceMgr);
   FullSourceLoc Loc(DILoc, SourceMgr);
 
 
-  // If no location information is available, prompt for debug flag
-  // and add function name to give some information
+  // If no location information is available, add function name
   if (Loc.isInvalid()) {
   if (Loc.isInvalid()) {
-    Message += " Use /Zi for source location.";
     auto *DiagClient = dynamic_cast<TextDiagnosticPrinter*>(Diags.getClient());
     auto *DiagClient = dynamic_cast<TextDiagnosticPrinter*>(Diags.getClient());
     auto *func = D.getFunction();
     auto *func = D.getFunction();
     if (DiagClient && func)
     if (DiagClient && func)

+ 0 - 6
tools/clang/lib/CodeGen/CodeGenFunction.cpp

@@ -866,12 +866,6 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
   FunctionArgList Args;
   FunctionArgList Args;
   QualType ResTy = FD->getReturnType();
   QualType ResTy = FD->getReturnType();
 
 
-  // HLSL Change Start - emit root signature associated with function
-  if (HLSLRootSignatureAttr *RSA = FD->getAttr<HLSLRootSignatureAttr>()) {
-    CGM.getHLSLRuntime().EmitHLSLRootSignature(*this, RSA, Fn);
-  }
-  // HLSL Change Ends - emit root signature associated with function
-
   CurGD = GD;
   CurGD = GD;
   const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(FD);
   const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(FD);
   if (MD && MD->isInstance()) {
   if (MD && MD->isInstance()) {

+ 73 - 3
tools/clang/lib/Parse/ParseDecl.cpp

@@ -27,8 +27,10 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/StringSwitch.h"
-#include "dxc/Support/Global.h"    // HLSL Change
-#include "clang/Sema/SemaHLSL.h"   // HLSL Change
+#include "dxc/Support/Global.h"       // HLSL Change
+#include "clang/Sema/SemaHLSL.h"      // HLSL Change
+#include "dxc/DXIL/DxilShaderModel.h" // HLSL Change
+#include "dxc/DXIL/DxilConstants.h"   // HLSL Change
 
 
 using namespace clang;
 using namespace clang;
 
 
@@ -359,7 +361,70 @@ bool Parser::MaybeParseHLSLAttributes(std::vector<hlsl::UnusualAnnotation *> &ta
       return false;
       return false;
     }
     }
 
 
-    if (NextToken().is(tok::kw_register)) {
+    bool identifierIsPayloadAnnotation = false;
+    if (NextToken().is(tok::identifier)) {
+        StringRef identifier = NextToken().getIdentifierInfo()->getName();
+        identifierIsPayloadAnnotation = identifier == "read" || identifier == "write";
+    }
+
+    if (identifierIsPayloadAnnotation) {
+      hlsl::PayloadAccessAnnotation mod;
+
+      if (NextToken().getIdentifierInfo()->getName() == "read")
+          mod.qualifier = hlsl::DXIL::PayloadAccessQualifier::Read;
+      else
+          mod.qualifier = hlsl::DXIL::PayloadAccessQualifier::Write;
+
+      // : read/write ( shader stage *[,shader stage])
+      ConsumeToken(); // consume the colon.
+
+      mod.Loc = Tok.getLocation();
+      ConsumeToken(); // consume the read/write identifier
+      if (ExpectAndConsume(tok::l_paren, diag::err_expected_lparen_after,
+                           "payload access qualifier")) {
+        return true;
+      }
+
+      while(Tok.is(tok::identifier)) {
+        hlsl::DXIL::PayloadAccessShaderStage stage = hlsl::DXIL::PayloadAccessShaderStage::Invalid;
+        const char *stagePtr = Tok.getIdentifierInfo()->getName().data();
+        StringRef shaderStage(stagePtr);
+        if (shaderStage != "caller" && shaderStage != "anyhit" &&
+            shaderStage != "closesthit" && shaderStage != "miss") {
+          Diag(Tok.getLocation(),
+               diag::err_hlsl_payload_access_qualifier_unsupported_shader)
+              << shaderStage;
+          return true;
+        }
+
+        if (shaderStage == "caller") {
+          stage = hlsl::DXIL::PayloadAccessShaderStage::Caller;
+        } else if (shaderStage == "closesthit") {
+          stage = hlsl::DXIL::PayloadAccessShaderStage::Closesthit;
+        } else if (shaderStage == "miss") {
+          stage = hlsl::DXIL::PayloadAccessShaderStage::Miss;
+        } else if (shaderStage == "anyhit") {
+          stage = hlsl::DXIL::PayloadAccessShaderStage::Anyhit;
+        } 
+
+        mod.ShaderStages.push_back(stage);
+        ConsumeToken(); // consume shader type
+
+        if (Tok.is(tok::comma)) // check if we have a list of shader types
+          ConsumeToken();
+
+      } while (Tok.is(tok::identifier));
+
+      if (ExpectAndConsume(tok::r_paren, diag::err_expected_rparen_after,
+                           "payload access qualifier")) {
+        return true;
+      }
+
+      if (mod.ShaderStages.empty())
+          mod.qualifier = hlsl::DXIL::PayloadAccessQualifier::NoAccess;
+
+      target.push_back(new (context) hlsl::PayloadAccessAnnotation(mod));
+    }else if (NextToken().is(tok::kw_register)) {
       hlsl::RegisterAssignment r;
       hlsl::RegisterAssignment r;
 
 
       // : register ([shader_profile], Type#[subcomponent] [,spaceX])
       // : register ([shader_profile], Type#[subcomponent] [,spaceX])
@@ -551,6 +616,10 @@ bool Parser::MaybeParseHLSLAttributes(std::vector<hlsl::UnusualAnnotation *> &ta
       ConsumeToken(); // consume colon.
       ConsumeToken(); // consume colon.
 
 
       StringRef semanticName = Tok.getIdentifierInfo()->getName();
       StringRef semanticName = Tok.getIdentifierInfo()->getName();
+      if (semanticName.equals("VFACE")) {
+        Diag(Tok.getLocation(), diag::warn_unsupported_target_attribute)
+            << semanticName;
+      }
       hlsl::SemanticDecl *pUA = new (context) hlsl::SemanticDecl(semanticName);
       hlsl::SemanticDecl *pUA = new (context) hlsl::SemanticDecl(semanticName);
       pUA->Loc = Tok.getLocation();
       pUA->Loc = Tok.getLocation();
       ConsumeToken(); // consume semantic
       ConsumeToken(); // consume semantic
@@ -562,6 +631,7 @@ bool Parser::MaybeParseHLSLAttributes(std::vector<hlsl::UnusualAnnotation *> &ta
       return false;
       return false;
     }
     }
   }
   }
+  return true;
 }
 }
 // HLSL Change Ends
 // HLSL Change Ends
 
 

+ 16 - 3
tools/clang/lib/SPIRV/AstTypeProbe.cpp

@@ -265,6 +265,17 @@ bool isSubpassInputMS(QualType type) {
   return false;
   return false;
 }
 }
 
 
+bool isArrayType(QualType type, QualType *elemType, uint32_t *elemCount) {
+  if (const auto *arrayType = type->getAsArrayTypeUnsafe()) {
+    if (elemType)
+      *elemType = arrayType->getElementType();
+    if (elemCount)
+      *elemCount = hlsl::GetArraySize(type);
+    return true;
+  }
+  return false;
+}
+
 bool isConstantBuffer(clang::QualType type) {
 bool isConstantBuffer(clang::QualType type) {
   // Strip outer arrayness first
   // Strip outer arrayness first
   while (type->isArrayType())
   while (type->isArrayType())
@@ -1051,12 +1062,14 @@ bool isRelaxedPrecisionType(QualType type, const SpirvCodeGenOptions &opts) {
         }
         }
   }
   }
 
 
-  // Vector & Matrix types could use relaxed precision based on their element
-  // type.
+  // Vector, Matrix and Array types could use relaxed precision based on their
+  // element type.
   {
   {
     QualType elemType = {};
     QualType elemType = {};
-    if (isVectorType(type, &elemType) || isMxNMatrix(type, &elemType))
+    if (isVectorType(type, &elemType) || isMxNMatrix(type, &elemType) ||
+        isArrayType(type, &elemType)) {
       return isRelaxedPrecisionType(elemType, opts);
       return isRelaxedPrecisionType(elemType, opts);
+    }
   }
   }
 
 
   // Images with RelaxedPrecision sampled type.
   // Images with RelaxedPrecision sampled type.

+ 4 - 4
tools/clang/lib/SPIRV/CapabilityVisitor.cpp

@@ -350,10 +350,10 @@ bool CapabilityVisitor::visit(SpirvDecoration *decor) {
                    "SV_Barycentrics", loc);
                    "SV_Barycentrics", loc);
       break;
       break;
     }
     }
-    case spv::BuiltIn::FragSizeEXT: {
-      addExtension(Extension::EXT_fragment_invocation_density, "SV_ShadingRate",
-                   loc);
-      addCapability(spv::Capability::FragmentDensityEXT);
+    case spv::BuiltIn::ShadingRateKHR:
+    case spv::BuiltIn::PrimitiveShadingRateKHR: {
+      addExtension(Extension::KHR_fragment_shading_rate, "SV_ShadingRate", loc);
+      addCapability(spv::Capability::FragmentShadingRateKHR);
       break;
       break;
     }
     }
     default:
     default:

+ 18 - 28
tools/clang/lib/SPIRV/DeclResultIdMapper.cpp

@@ -2221,8 +2221,6 @@ bool DeclResultIdMapper::createStageVars(
     // * SV_DispatchThreadID, SV_GroupThreadID, and SV_GroupID are allowed to be
     // * SV_DispatchThreadID, SV_GroupThreadID, and SV_GroupID are allowed to be
     //   uint, uint2, or uint3, but the corresponding builtins
     //   uint, uint2, or uint3, but the corresponding builtins
     //   (GlobalInvocationId, LocalInvocationId, WorkgroupId) must be a uint3.
     //   (GlobalInvocationId, LocalInvocationId, WorkgroupId) must be a uint3.
-    // * SV_ShadingRate is a uint value, but the builtin it corresponds to is a
-    //   int2.
 
 
     if (glPerVertex.tryToAccess(sigPointKind, semanticKind,
     if (glPerVertex.tryToAccess(sigPointKind, semanticKind,
                                 semanticToUse->index, invocationId, value,
                                 semanticToUse->index, invocationId, value,
@@ -2264,9 +2262,6 @@ bool DeclResultIdMapper::createStageVars(
           hlsl::IsHLSLVecType(type) ? hlsl::GetHLSLVecElementType(type) : type,
           hlsl::IsHLSLVecType(type) ? hlsl::GetHLSLVecElementType(type) : type,
           3);
           3);
       break;
       break;
-    case hlsl::Semantic::Kind::ShadingRate:
-      evalType = astContext.getExtVectorType(astContext.IntTy, 2);
-      break;
     default:
     default:
       // Only the semantic kinds mentioned above are handled.
       // Only the semantic kinds mentioned above are handled.
       break;
       break;
@@ -2505,25 +2500,6 @@ bool DeclResultIdMapper::createStageVars(
               astContext.getExtVectorType(srcVecElemType, 2), *value, *value,
               astContext.getExtVectorType(srcVecElemType, 2), *value, *value,
               {0, 1}, thisSemantic.loc);
               {0, 1}, thisSemantic.loc);
       }
       }
-      // Special handling of SV_ShadingRate, which is a bitpacked enum value,
-      // but SPIR-V's FragSizeEXT uses an int2. We build the enum value from
-      // the separate axis values.
-      else if (semanticKind == hlsl::Semantic::Kind::ShadingRate) {
-        // From the D3D12 functional spec for Variable-Rate Shading.
-        // #define D3D12_MAKE_COARSE_SHADING_RATE(x,y) ((x) << 2 | (y))
-        const auto x = spvBuilder.createCompositeExtract(
-            astContext.IntTy, *value, {0}, thisSemantic.loc);
-        const auto y = spvBuilder.createCompositeExtract(
-            astContext.IntTy, *value, {1}, thisSemantic.loc);
-        const auto constTwo =
-            spvBuilder.getConstantInt(astContext.IntTy, llvm::APInt(32, 2));
-        *value = spvBuilder.createBinaryOp(
-            spv::Op::OpBitwiseOr, astContext.UnsignedIntTy,
-            spvBuilder.createBinaryOp(spv::Op::OpShiftLeftLogical,
-                                      astContext.IntTy, x, constTwo,
-                                      thisSemantic.loc),
-            y, thisSemantic.loc);
-      }
 
 
       // Reciprocate SV_Position.w if requested
       // Reciprocate SV_Position.w if requested
       if (semanticKind == hlsl::Semantic::Kind::Position)
       if (semanticKind == hlsl::Semantic::Kind::Position)
@@ -3472,16 +3448,30 @@ SpirvVariable *DeclResultIdMapper::createSpirvStageVar(
   }
   }
   // According to DXIL spec, the ShadingRate SV can only be used by GSOut,
   // According to DXIL spec, the ShadingRate SV can only be used by GSOut,
   // VSOut, or PSIn. According to Vulkan spec, the FragSizeEXT BuiltIn can only
   // VSOut, or PSIn. According to Vulkan spec, the FragSizeEXT BuiltIn can only
-  // be used as PSIn.
+  // be used as VSOut, GSOut, MSOut or PSIn.
   case hlsl::Semantic::Kind::ShadingRate: {
   case hlsl::Semantic::Kind::ShadingRate: {
+    QualType checkType = type->getAs<ReferenceType>()
+                             ? type->getAs<ReferenceType>()->getPointeeType()
+                             : type;
+    QualType scalarTy;
+    if (!isScalarType(checkType, &scalarTy) || !scalarTy->isIntegerType()) {
+      emitError("semantic ShadingRate must be interger scalar type", srcLoc);
+    }
+
     switch (sigPointKind) {
     switch (sigPointKind) {
     case hlsl::SigPoint::Kind::PSIn:
     case hlsl::SigPoint::Kind::PSIn:
       stageVar->setIsSpirvBuiltin();
       stageVar->setIsSpirvBuiltin();
-      return spvBuilder.addStageBuiltinVar(type, sc, BuiltIn::FragSizeEXT,
+      return spvBuilder.addStageBuiltinVar(type, sc, BuiltIn::ShadingRateKHR,
                                            isPrecise, srcLoc);
                                            isPrecise, srcLoc);
+    case hlsl::SigPoint::Kind::VSOut:
+    case hlsl::SigPoint::Kind::GSOut:
+    case hlsl::SigPoint::Kind::MSOut:
+      stageVar->setIsSpirvBuiltin();
+      return spvBuilder.addStageBuiltinVar(
+          type, sc, BuiltIn::PrimitiveShadingRateKHR, isPrecise, srcLoc);
     default:
     default:
-      emitError("semantic ShadingRate currently unsupported in non-PS shader"
-                " stages",
+      emitError("semantic ShadingRate must be used only for PSIn, VSOut, "
+                "GSOut, MSOut",
                 srcLoc);
                 srcLoc);
       break;
       break;
     }
     }

+ 4 - 0
tools/clang/lib/SPIRV/FeatureManager.cpp

@@ -140,6 +140,8 @@ Extension FeatureManager::getExtensionSymbol(llvm::StringRef name) {
       .Case("SPV_NV_ray_tracing", Extension::NV_ray_tracing)
       .Case("SPV_NV_ray_tracing", Extension::NV_ray_tracing)
       .Case("SPV_NV_mesh_shader", Extension::NV_mesh_shader)
       .Case("SPV_NV_mesh_shader", Extension::NV_mesh_shader)
       .Case("SPV_KHR_ray_query", Extension::KHR_ray_query)
       .Case("SPV_KHR_ray_query", Extension::KHR_ray_query)
+      .Case("SPV_KHR_fragment_shading_rate",
+            Extension::KHR_fragment_shading_rate)
       .Default(Extension::Unknown);
       .Default(Extension::Unknown);
 }
 }
 
 
@@ -189,6 +191,8 @@ const char *FeatureManager::getExtensionName(Extension symbol) {
     return "SPV_NV_mesh_shader";
     return "SPV_NV_mesh_shader";
   case Extension::KHR_ray_query:
   case Extension::KHR_ray_query:
     return "SPV_KHR_ray_query";
     return "SPV_KHR_ray_query";
+  case Extension::KHR_fragment_shading_rate:
+    return "SPV_KHR_fragment_shading_rate";
   default:
   default:
     break;
     break;
   }
   }

Algúns arquivos non se mostraron porque demasiados arquivos cambiaron neste cambio